mirror of https://github.com/ceph/ceph-ansible.git
Common: Fix handlers that are not properly triggered.
Until now, only the first task were executed. The idea here is to use `listen` statement to be able to notify multiple handler and regroup all of them in `./handlers/main.yml` as notifying an included handler task is not possible. Signed-off-by: Guillaume Abrioux <gabrioux@redhat.com>pull/1412/head
parent
3c59ed58a5
commit
5bec62ba7c
|
@ -4,17 +4,67 @@
|
||||||
update-cache: yes
|
update-cache: yes
|
||||||
when: ansible_os_family == 'Debian'
|
when: ansible_os_family == 'Debian'
|
||||||
|
|
||||||
- name: restart ceph mons
|
- block:
|
||||||
include: "./restart-mon.yml"
|
- name: copy mon restart script
|
||||||
|
template:
|
||||||
|
src: restart_mon_daemon.sh.j2
|
||||||
|
dest: /tmp/restart_mon_daemon.sh
|
||||||
|
owner: root
|
||||||
|
group: root
|
||||||
|
mode: 0750
|
||||||
|
listen: "restart ceph mons"
|
||||||
|
|
||||||
- name: restart ceph osds
|
- name: restart ceph mon daemon(s)
|
||||||
include: "./restart-osd.yml"
|
command: /tmp/restart_mon_daemon.sh
|
||||||
|
listen: "restart ceph mons"
|
||||||
|
|
||||||
|
when:
|
||||||
|
- mon_group_name in group_names
|
||||||
|
|
||||||
|
# This does not just restart OSDs but everything else too. Unfortunately
|
||||||
|
# at this time the ansible role does not have an OSD id list to use
|
||||||
|
# for restarting them specifically.
|
||||||
|
- block:
|
||||||
|
- name: copy osd restart script
|
||||||
|
template:
|
||||||
|
src: restart_osd_daemon.sh.j2
|
||||||
|
dest: /tmp/restart_osd_daemon.sh
|
||||||
|
owner: root
|
||||||
|
group: root
|
||||||
|
mode: 0750
|
||||||
|
listen: "restart ceph osds"
|
||||||
|
|
||||||
|
- name: restart ceph osds daemon(s)
|
||||||
|
command: /tmp/restart_osd_daemon.sh
|
||||||
|
listen: "restart ceph osds"
|
||||||
|
when:
|
||||||
|
- handler_health_osd_check
|
||||||
|
when:
|
||||||
|
- osd_group_name in group_names
|
||||||
|
|
||||||
- name: restart ceph mdss
|
- name: restart ceph mdss
|
||||||
include: "./restart-mds.yml"
|
service:
|
||||||
|
name: ceph-mds@{{ mds_name }}
|
||||||
|
state: restarted
|
||||||
|
# serial: 1 would be the proper solution here, but that can only be set on play level
|
||||||
|
# upstream issue: https://github.com/ansible/ansible/issues/12170
|
||||||
|
run_once: true
|
||||||
|
with_items: "{{ groups.get(mds_group_name, []) }}"
|
||||||
|
delegate_to: "{{ item }}"
|
||||||
|
when:
|
||||||
|
- mds_group_name in group_names
|
||||||
|
|
||||||
- name: restart ceph rgws
|
- name: restart ceph rgws
|
||||||
include: "./restart-rgw.yml"
|
service:
|
||||||
|
name: ceph-radosgw@rgw.{{ ansible_hostname }}
|
||||||
|
state: restarted
|
||||||
|
# serial: 1 would be the proper solution here, but that can only be set on play level
|
||||||
|
# upstream issue: https://github.com/ansible/ansible/issues/12170
|
||||||
|
run_once: true
|
||||||
|
with_items: "{{ groups.get(rgw_group_name, []) }}"
|
||||||
|
delegate_to: "{{ item }}"
|
||||||
|
when:
|
||||||
|
- rgw_group_name in group_names
|
||||||
|
|
||||||
- name: restart ceph nfss
|
- name: restart ceph nfss
|
||||||
service:
|
service:
|
||||||
|
|
|
@ -1,13 +0,0 @@
|
||||||
---
|
|
||||||
- name: restart ceph mdss
|
|
||||||
service:
|
|
||||||
name: ceph-mds@{{ mds_name }}
|
|
||||||
state: restarted
|
|
||||||
# serial: 1 would be the proper solution here, but that can only be set on play level
|
|
||||||
# upstream issue: https://github.com/ansible/ansible/issues/12170
|
|
||||||
run_once: true
|
|
||||||
with_items: "{{ groups.get(mds_group_name, []) }}"
|
|
||||||
delegate_to: "{{ item }}"
|
|
||||||
when:
|
|
||||||
- socket.rc == 0
|
|
||||||
- mds_group_name in group_names
|
|
|
@ -1,17 +0,0 @@
|
||||||
---
|
|
||||||
- name: restart ceph mons
|
|
||||||
service:
|
|
||||||
name: ceph-mon@{{ monitor_name }}
|
|
||||||
state: restarted
|
|
||||||
# serial: 1 would be the proper solution here, but that can only be set on play level
|
|
||||||
# upstream issue: https://github.com/ansible/ansible/issues/12170
|
|
||||||
run_once: true
|
|
||||||
with_items: "{{ groups.get(mon_group_name, []) }}"
|
|
||||||
delegate_to: "{{ item }}"
|
|
||||||
when:
|
|
||||||
- socket.rc == 0
|
|
||||||
- mon_group_name in group_names
|
|
||||||
|
|
||||||
- name: validate monitors
|
|
||||||
include: validate-mon.yml
|
|
||||||
when: mon_group_name in group_names
|
|
|
@ -1,22 +0,0 @@
|
||||||
---
|
|
||||||
# This does not just restart OSDs but everything else too. Unfortunately
|
|
||||||
# at this time the ansible role does not have an OSD id list to use
|
|
||||||
# for restarting them specifically.
|
|
||||||
- name: restart ceph osds
|
|
||||||
shell: |
|
|
||||||
for id in $(ls /var/lib/ceph/osd/ | sed 's/.*-//'); do
|
|
||||||
systemctl restart ceph-osd@$id
|
|
||||||
sleep 5
|
|
||||||
done
|
|
||||||
# serial: 1 would be the proper solution here, but that can only be set on play level
|
|
||||||
# upstream issue: https://github.com/ansible/ansible/issues/12170
|
|
||||||
run_once: true
|
|
||||||
with_items: "{{ groups.get(osd_group_name, []) }}"
|
|
||||||
delegate_to: "{{ item }}"
|
|
||||||
when:
|
|
||||||
- socket.rc == 0
|
|
||||||
- osd_group_name in group_names
|
|
||||||
|
|
||||||
- name: validate osds
|
|
||||||
include: validate-osd.yml
|
|
||||||
when: osd_group_name in group_names
|
|
|
@ -1,13 +0,0 @@
|
||||||
---
|
|
||||||
- name: restart ceph rgws
|
|
||||||
service:
|
|
||||||
name: ceph-rgw@{{ ansible_hostname }}
|
|
||||||
state: restarted
|
|
||||||
# serial: 1 would be the proper solution here, but that can only be set on play level
|
|
||||||
# upstream issue: https://github.com/ansible/ansible/issues/12170
|
|
||||||
run_once: true
|
|
||||||
with_items: "{{ groups.get(rgw_group_name, []) }}"
|
|
||||||
delegate_to: "{{ item }}"
|
|
||||||
when:
|
|
||||||
- socketrgw.rc == 0
|
|
||||||
- rgw_group_name in group_names
|
|
|
@ -1,28 +0,0 @@
|
||||||
---
|
|
||||||
- name: wait for ceph monitor socket
|
|
||||||
wait_for:
|
|
||||||
path: "/var/run/ceph/{{ cluster }}-mon.{{ monitor_name }}.asok"
|
|
||||||
|
|
||||||
- name: set mon_host_count
|
|
||||||
set_fact: mon_host_count={{ groups[mon_group_name] | length }}
|
|
||||||
|
|
||||||
- name: select a running monitor
|
|
||||||
set_fact: mon_host={{ item }}
|
|
||||||
with_items: "{{ groups[mon_group_name] }}"
|
|
||||||
when:
|
|
||||||
- item != inventory_hostname
|
|
||||||
- mon_host_count | int > 1
|
|
||||||
|
|
||||||
- name: select first monitor if only one monitor
|
|
||||||
set_fact: mon_host={{ item }}
|
|
||||||
with_items: "{{ groups[mon_group_name][0] }}"
|
|
||||||
when: mon_host_count | int == 1
|
|
||||||
|
|
||||||
- name: waiting for the monitor to join the quorum...
|
|
||||||
shell: |
|
|
||||||
ceph -s --cluster {{ cluster }} | grep monmap | sed 's/.*quorum//' | egrep -sq {{ ansible_hostname }}
|
|
||||||
register: result
|
|
||||||
until: result.rc == 0
|
|
||||||
retries: "{{ handler_health_mon_check_retries }}"
|
|
||||||
delay: "{{ handler_health_mon_check_delay }}"
|
|
||||||
delegate_to: "{{ mon_host }}"
|
|
|
@ -1,20 +0,0 @@
|
||||||
---
|
|
||||||
- name: collect osds
|
|
||||||
shell: |
|
|
||||||
ls /var/lib/ceph/osd/ | sed 's/.*-//'
|
|
||||||
register: osd_ids
|
|
||||||
|
|
||||||
- name: wait for ceph osd socket(s)
|
|
||||||
wait_for:
|
|
||||||
path: "/var/run/ceph/{{ cluster }}-osd.{{ item }}.asok"
|
|
||||||
with_items: "{{ osd_ids.stdout_lines }}"
|
|
||||||
|
|
||||||
- name: waiting for clean pgs...
|
|
||||||
shell: |
|
|
||||||
test "$(ceph --cluster {{ cluster }} pg stat | sed 's/^.*pgs://;s/active+clean.*//;s/ //')" -eq "$(ceph --cluster {{ cluster }} pg stat | sed 's/pgs.*//;s/^.*://;s/ //')" && ceph --cluster {{ cluster }} health | egrep -sq "HEALTH_OK|HEALTH_WARN"
|
|
||||||
register: result
|
|
||||||
until: result.rc == 0
|
|
||||||
retries: "{{ handler_health_osd_check_retries }}"
|
|
||||||
delay: "{{ handler_health_osd_check_delay }}"
|
|
||||||
delegate_to: "{{ groups[mon_group_name][0] }}"
|
|
||||||
when: handler_health_osd_check
|
|
|
@ -1,14 +0,0 @@
|
||||||
---
|
|
||||||
- name: check for a ceph socket
|
|
||||||
shell: "stat /var/run/ceph/*.asok > /dev/null 2>&1"
|
|
||||||
changed_when: false
|
|
||||||
failed_when: false
|
|
||||||
always_run: true
|
|
||||||
register: socket
|
|
||||||
|
|
||||||
- name: check for a rados gateway socket
|
|
||||||
shell: "stat {{ rbd_client_admin_socket_path }}*.asok > /dev/null 2>&1"
|
|
||||||
changed_when: false
|
|
||||||
failed_when: false
|
|
||||||
always_run: true
|
|
||||||
register: socketrgw
|
|
|
@ -87,7 +87,6 @@
|
||||||
static: False
|
static: False
|
||||||
|
|
||||||
- include: facts.yml
|
- include: facts.yml
|
||||||
- include: ./checks/check_socket.yml
|
|
||||||
- include: create_ceph_initial_dirs.yml
|
- include: create_ceph_initial_dirs.yml
|
||||||
- include: generate_cluster_fsid.yml
|
- include: generate_cluster_fsid.yml
|
||||||
- include: generate_ceph_conf.yml
|
- include: generate_ceph_conf.yml
|
||||||
|
|
|
@ -0,0 +1,33 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
RETRIES="{{ handler_health_mon_check_retries }}"
|
||||||
|
DELAY="{{ handler_health_mon_check_delay }}"
|
||||||
|
MONITOR_NAME="{{ monitor_name }}"
|
||||||
|
CLUSTER="{{ cluster }}"
|
||||||
|
SOCKET=/var/run/ceph/${CLUSTER}-mon.${MONITOR_NAME}.asok
|
||||||
|
|
||||||
|
|
||||||
|
check_quorum() {
|
||||||
|
while [ $RETRIES -ne 0 ]; do
|
||||||
|
MEMBERS=$(ceph --cluster ${CLUSTER} -s --format json | sed -r 's/.*"quorum_names":(\[[^]]+\]).*/\1/')
|
||||||
|
test "${MEMBERS/$MONITOR_NAME}" != "$MEMBERS" && exit 0
|
||||||
|
sleep $DELAY
|
||||||
|
let RETRIES=RETRIES-1
|
||||||
|
done
|
||||||
|
# If we reach this point, it means there is a problem with the quorum
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
# First, restart the daemon
|
||||||
|
systemctl restart ceph-mon@${MONITOR_NAME}
|
||||||
|
|
||||||
|
COUNT=10
|
||||||
|
# Wait and ensure the socket exists after restarting the daemon
|
||||||
|
while [ $COUNT -ne 0 ]; do
|
||||||
|
test -S $SOCKET && check_quorum
|
||||||
|
sleep 1
|
||||||
|
let COUNT=COUNT-1
|
||||||
|
done
|
||||||
|
# If we reach this point, it means the socket is not present.
|
||||||
|
echo "Error while restarting mon daemon"
|
||||||
|
exit 1
|
|
@ -0,0 +1,36 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
RETRIES="{{ handler_health_osd_check_retries }}"
|
||||||
|
DELAY="{{ handler_health_osd_check_delay }}"
|
||||||
|
CEPH_CLI="--name client.bootstrap-osd --keyring /var/lib/ceph/bootstrap-osd/{{ cluster }}.keyring --cluster {{ cluster }}"
|
||||||
|
|
||||||
|
check_pgs() {
|
||||||
|
while [ $RETRIES -ne 0 ]; do
|
||||||
|
ceph $CEPH_CLI -s | grep -sq 'active+clean'
|
||||||
|
RET=$?
|
||||||
|
test $RET -eq 0 && exit 0
|
||||||
|
sleep $DELAY
|
||||||
|
let RETRIES=RETRIES-1
|
||||||
|
done
|
||||||
|
# PGs not clean, exiting with return code 1
|
||||||
|
echo "Error with PGs, check config"
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
for id in $(ls /var/lib/ceph/osd/ | sed 's/.*-//'); do
|
||||||
|
# First, restart daemon(s)
|
||||||
|
systemctl restart ceph-osd@${id}
|
||||||
|
# We need to wait because it may take some time for the socket to actually exists
|
||||||
|
COUNT=10
|
||||||
|
# Wait and ensure the socket exists after restarting the daemon
|
||||||
|
SOCKET=/var/run/ceph/{{ cluster }}-osd.${id}.asok
|
||||||
|
while [ $COUNT -ne 0 ]; do
|
||||||
|
test -S $SOCKET && check_pgs
|
||||||
|
sleep 1
|
||||||
|
let COUNT=COUNT-1
|
||||||
|
done
|
||||||
|
# If we reach this point, it means the socket is not present.
|
||||||
|
echo "Error while restarting mon daemon"
|
||||||
|
exit 1
|
||||||
|
done
|
Loading…
Reference in New Issue