ceph-ansible/roles/ceph-defaults/templates/restart_mon_daemon.sh.j2

39 lines
1.1 KiB
Django/Jinja

#!/bin/bash
RETRIES="{{ handler_health_mon_check_retries }}"
DELAY="{{ handler_health_mon_check_delay }}"
MONITOR_NAME="{{ monitor_name }}"
SOCKET=/var/run/ceph/{{ cluster }}-mon.${MONITOR_NAME}.asok
{% if containerized_deployment %}
DOCKER_EXEC="docker exec ceph-mon-{{ ansible_hostname }}"
{% endif %}
check_quorum() {
while [ $RETRIES -ne 0 ]; do
MEMBERS=$($DOCKER_EXEC ceph --cluster {{ cluster }} -s --format json | sed -r 's/.*"quorum_names":(\[[^]]+\]).*/\1/')
test "${MEMBERS/$MONITOR_NAME}" != "$MEMBERS" && exit 0
sleep $DELAY
let RETRIES=RETRIES-1
done
# If we reach this point, it means there is a problem with the quorum
echo "Error with quorum."
echo "cluster status:"
$DOCKER_EXEC ceph --cluster {{ cluster }} -s
exit 1
}
# First, restart the daemon
systemctl restart ceph-mon@${MONITOR_NAME}
COUNT=10
# Wait and ensure the socket exists after restarting the daemon
while [ $COUNT -ne 0 ]; do
$DOCKER_EXEC test -S $SOCKET && check_quorum
sleep $DELAY
let COUNT=COUNT-1
done
# If we reach this point, it means the socket is not present.
echo "Socket file ${SOCKET} could not be found, which means the monitor is not running."
exit 1