#!/bin/bash RETRIES="{{ handler_health_mgr_check_retries }}" DELAY="{{ handler_health_mgr_check_delay }}" MGR_NAME="{{ ansible_hostname }}" {% if containerized_deployment | bool %} DOCKER_EXEC="{{ container_binary }} exec ceph-mgr-{{ ansible_hostname }}" {% endif %} # Backward compatibility $DOCKER_EXEC test -S /var/run/ceph/{{ cluster }}-mgr.{{ ansible_fqdn }}.asok && SOCKET=/var/run/ceph/{{ cluster }}-mgr.{{ ansible_fqdn }}.asok $DOCKER_EXEC test -S /var/run/ceph/{{ cluster }}-mgr.{{ ansible_hostname }}.asok && SOCKET=/var/run/ceph/{{ cluster }}-mgr.{{ ansible_hostname }}.asok systemctl reset-failed ceph-mgr@${MGR_NAME} # First, restart the daemon systemctl restart ceph-mgr@${MGR_NAME} # Wait and ensure the socket exists after restarting the daemds while [ $RETRIES -ne 0 ]; do $DOCKER_EXEC test -S $SOCKET && exit 0 sleep $DELAY let RETRIES=RETRIES-1 done # If we reach this point, it means the socket is not present. echo "Socket file ${SOCKET} could not be found, which means ceph manager is not running. Showing ceph-mgr unit logs now:" journalctl -u ceph-mgr@${MGR_NAME} exit 1