diff --git a/group_vars/all.yml.sample b/group_vars/all.yml.sample index e2d54f78b..22110f879 100644 --- a/group_vars/all.yml.sample +++ b/group_vars/all.yml.sample @@ -398,6 +398,14 @@ dummy: #handler_health_nfs_check_retries: 5 #handler_health_nfs_check_delay: 10 +# RBD MIRROR handler checks +#handler_health_rbd_mirror_check_retries: 5 +#handler_health_rbd_mirror_check_delay: 10 + +# MGR handler checks +#handler_health_mgr_check_retries: 5 +#handler_health_mgr_check_delay: 10 + ############### # NFS-GANESHA # ############### diff --git a/group_vars/rhcs.yml.sample b/group_vars/rhcs.yml.sample index 112efa41e..5ba40b7fc 100644 --- a/group_vars/rhcs.yml.sample +++ b/group_vars/rhcs.yml.sample @@ -398,6 +398,14 @@ ceph_repository: rhcs #handler_health_nfs_check_retries: 5 #handler_health_nfs_check_delay: 10 +# RBD MIRROR handler checks +#handler_health_rbd_mirror_check_retries: 5 +#handler_health_rbd_mirror_check_delay: 10 + +# MGR handler checks +#handler_health_mgr_check_retries: 5 +#handler_health_mgr_check_delay: 10 + ############### # NFS-GANESHA # ############### diff --git a/roles/ceph-config/tasks/main.yml b/roles/ceph-config/tasks/main.yml index 3ecf5d9e7..4a5df3350 100644 --- a/roles/ceph-config/tasks/main.yml +++ b/roles/ceph-config/tasks/main.yml @@ -117,6 +117,8 @@ - restart ceph osds - restart ceph mdss - restart ceph rgws + - restart ceph rbdmirrors + - restart ceph mgrs - name: set fsid fact when generate_fsid = true set_fact: diff --git a/roles/ceph-defaults/defaults/main.yml b/roles/ceph-defaults/defaults/main.yml index cfdbbbdca..20a31d3e7 100644 --- a/roles/ceph-defaults/defaults/main.yml +++ b/roles/ceph-defaults/defaults/main.yml @@ -390,6 +390,14 @@ handler_health_rgw_check_delay: 10 handler_health_nfs_check_retries: 5 handler_health_nfs_check_delay: 10 +# RBD MIRROR handler checks +handler_health_rbd_mirror_check_retries: 5 +handler_health_rbd_mirror_check_delay: 10 + +# MGR handler checks +handler_health_mgr_check_retries: 5 +handler_health_mgr_check_delay: 10 + ############### # NFS-GANESHA # ############### diff --git a/roles/ceph-defaults/handlers/main.yml b/roles/ceph-defaults/handlers/main.yml index 885a42abe..98799bd9a 100644 --- a/roles/ceph-defaults/handlers/main.yml +++ b/roles/ceph-defaults/handlers/main.yml @@ -126,3 +126,43 @@ # We do not want to run these checks on initial deployment (`socket.rc == 0`) - nfs_group_name in group_names - nfs_socket_stat.rc == 0 + +- name: copy rbd mirror restart script + template: + src: restart_rbd_mirror_daemon.sh.j2 + dest: /tmp/restart_rbd_mirror_daemon.sh + owner: root + group: root + mode: 0750 + listen: "restart ceph rbdmirrors" + when: + - rbdmirror_group_name in group_names + - inventory_hostname in play_hosts + +- name: restart ceph rbd mirror daemon(s) + command: /tmp/restart_rbd_mirror_daemon.sh + listen: "restart ceph rbdmirrors" + when: + # We do not want to run these checks on initial deployment (`socket.rc == 0`) + - rbdmirror_group_name in group_names + - rbd_mirror_socket_stat.rc == 0 + +- name: copy mgr restart script + template: + src: restart_mgr_daemon.sh.j2 + dest: /tmp/restart_mgr_daemon.sh + owner: root + group: root + mode: 0750 + listen: "restart ceph mgrs" + when: + - mgr_group_name in group_names + - inventory_hostname in play_hosts + +- name: restart ceph mgr daemon(s) + command: /tmp/restart_mgr_daemon.sh + listen: "restart ceph mgrs" + when: + # We do not want to run these checks on initial deployment (`socket.rc == 0`) + - mgr_group_name in group_names + - mgr_socket_stat.rc == 0 diff --git a/roles/ceph-defaults/templates/restart_mgr_daemon.sh.j2 b/roles/ceph-defaults/templates/restart_mgr_daemon.sh.j2 new file mode 100644 index 000000000..bfc85ba40 --- /dev/null +++ b/roles/ceph-defaults/templates/restart_mgr_daemon.sh.j2 @@ -0,0 +1,20 @@ +#!/bin/bash + +RETRIES="{{ handler_health_mgr_check_retries }}" +DELAY="{{ handler_health_mgr_check_delay }}" +MGR_NAME="{{ ansible_hostname }}" +SOCKET=/var/run/ceph/{{ cluster }}-mgr.${MGR_NAME}.asok + +# First, restart the daemon +systemctl restart ceph-mgr@${MGR_NAME} + +COUNT=10 +# Wait and ensure the socket exists after restarting the daemds +while [ $RETRIES -ne 0 ]; do + {{ docker_exec_cmd }} test -S $SOCKET && exit 0 + sleep $DELAY + let RETRIES=RETRIES-1 +done +# If we reach this point, it means the socket is not present. +echo "Socket file ${SOCKET} could not be found, which means ceph manager is not running." +exit 1 diff --git a/roles/ceph-defaults/templates/restart_rbd_mirror_daemon.sh.j2 b/roles/ceph-defaults/templates/restart_rbd_mirror_daemon.sh.j2 new file mode 100644 index 000000000..a9e9f6e29 --- /dev/null +++ b/roles/ceph-defaults/templates/restart_rbd_mirror_daemon.sh.j2 @@ -0,0 +1,20 @@ +#!/bin/bash + +RETRIES="{{ handler_health_rbd_mirror_check_retries }}" +DELAY="{{ handler_health_rbd_mirror_check_delay }}" +RBD_MIRROR_NAME="{{ ansible_hostname }}" +SOCKET=/var/run/ceph/{{ cluster }}-client.rbd-mirror.${RBD_MIRROR_NAME}.asok + +# First, restart the daemon +systemctl restart ceph-rbd-mirror@rbd-mirror.${RBD_MIRROR_NAME} + +COUNT=10 +# Wait and ensure the socket exists after restarting the daemon +while [ $RETRIES -ne 0 ]; do + {{ docker_exec_cmd }} test -S $SOCKET && exit 0 + sleep $DELAY + let RETRIES=RETRIES-1 +done +# If we reach this point, it means the socket is not present. +echo "Socket file ${SOCKET} could not be found, which means rbd mirror is not running." +exit 1