2017-04-04 01:55:11 +08:00
|
|
|
#!/bin/bash
|
|
|
|
|
|
|
|
RETRIES="{{ handler_health_mon_check_retries }}"
|
|
|
|
DELAY="{{ handler_health_mon_check_delay }}"
|
|
|
|
MONITOR_NAME="{{ monitor_name }}"
|
2017-07-29 07:00:06 +08:00
|
|
|
SOCKET=/var/run/ceph/{{ cluster }}-mon.${MONITOR_NAME}.asok
|
2017-04-04 01:55:11 +08:00
|
|
|
|
|
|
|
|
|
|
|
check_quorum() {
|
|
|
|
while [ $RETRIES -ne 0 ]; do
|
2017-08-31 17:22:33 +08:00
|
|
|
MEMBERS=$({{ docker_exec_cmd }} ceph --cluster {{ cluster }} -s --format json | sed -r 's/.*"quorum_names":(\[[^]]+\]).*/\1/')
|
2017-04-04 01:55:11 +08:00
|
|
|
test "${MEMBERS/$MONITOR_NAME}" != "$MEMBERS" && exit 0
|
|
|
|
sleep $DELAY
|
|
|
|
let RETRIES=RETRIES-1
|
|
|
|
done
|
|
|
|
# If we reach this point, it means there is a problem with the quorum
|
2017-04-18 18:40:43 +08:00
|
|
|
echo "Error with quorum."
|
|
|
|
echo "cluster status:"
|
2017-07-29 07:00:06 +08:00
|
|
|
{{ docker_exec_cmd }} ceph --cluster {{ cluster }} -s
|
2017-04-04 01:55:11 +08:00
|
|
|
exit 1
|
|
|
|
}
|
|
|
|
|
|
|
|
# First, restart the daemon
|
|
|
|
systemctl restart ceph-mon@${MONITOR_NAME}
|
|
|
|
|
|
|
|
COUNT=10
|
|
|
|
# Wait and ensure the socket exists after restarting the daemon
|
|
|
|
while [ $COUNT -ne 0 ]; do
|
2017-07-29 07:00:06 +08:00
|
|
|
{{ docker_exec_cmd }} test -S $SOCKET && check_quorum
|
2017-04-04 01:55:11 +08:00
|
|
|
sleep 1
|
|
|
|
let COUNT=COUNT-1
|
|
|
|
done
|
|
|
|
# If we reach this point, it means the socket is not present.
|
2017-04-18 18:40:43 +08:00
|
|
|
echo "Socket file ${SOCKET} could not be found, which means the monitor is not running."
|
2017-04-04 01:55:11 +08:00
|
|
|
exit 1
|