#!/bin/bash RETRIES="{{ handler_health_mon_check_retries }}" DELAY="{{ handler_health_mon_check_delay }}" MONITOR_NAME="{{ monitor_name }}" SOCKET=/var/run/ceph/{{ cluster }}-mon.${MONITOR_NAME}.asok {% if containerized_deployment %} DOCKER_EXEC="docker exec ceph-mon-{{ ansible_hostname }}" {% endif %} check_quorum() { while [ $RETRIES -ne 0 ]; do MEMBERS=$($DOCKER_EXEC ceph --cluster {{ cluster }} -s --format json | sed -r 's/.*"quorum_names":(\[[^]]+\]).*/\1/') test "${MEMBERS/$MONITOR_NAME}" != "$MEMBERS" && exit 0 sleep $DELAY let RETRIES=RETRIES-1 done # If we reach this point, it means there is a problem with the quorum echo "Error with quorum." echo "cluster status:" $DOCKER_EXEC ceph --cluster {{ cluster }} -s echo "quorum status:" $DOCKER_EXEC ceph --cluster {{ cluster }} daemon mon.${MONITOR_NAME} mon_status $DOCKER_EXEC ceph --cluster {{ cluster }} daemon mon.${MONITOR_NAME} quorum_status exit 1 } # First, restart the daemon systemctl restart ceph-mon@${MONITOR_NAME} COUNT=10 # Wait and ensure the socket exists after restarting the daemon while [ $COUNT -ne 0 ]; do $DOCKER_EXEC test -S $SOCKET && check_quorum sleep $DELAY let COUNT=COUNT-1 done # If we reach this point, it means the socket is not present. echo "Socket file ${SOCKET} could not be found, which means the monitor is not running." exit 1