ceph-ansible/roles/ceph-handler/templates/restart_mgr_daemon.sh.j2

#!/bin/bash

RETRIES="{{ handler_health_mgr_check_retries }}"
DELAY="{{ handler_health_mgr_check_delay }}"
MGR_NAME="{{ ansible_hostname }}"
{% if containerized_deployment | bool %}
DOCKER_EXEC="{{ container_binary }} exec ceph-mgr-{{ ansible_hostname }}"
{% endif %}

# Backward compatibility
$DOCKER_EXEC test -S /var/run/ceph/{{ cluster }}-mgr.{{ ansible_fqdn }}.asok && SOCKET=/var/run/ceph/{{ cluster }}-mgr.{{ ansible_fqdn }}.asok
$DOCKER_EXEC test -S /var/run/ceph/{{ cluster }}-mgr.{{ ansible_hostname }}.asok && SOCKET=/var/run/ceph/{{ cluster }}-mgr.{{ ansible_hostname }}.asok

systemctl reset-failed ceph-mgr@${MGR_NAME}
# First, restart the daemon
systemctl restart ceph-mgr@${MGR_NAME}

# Wait and ensure the socket exists after restarting the daemds
while [ $RETRIES -ne 0 ]; do
  $DOCKER_EXEC test -S $SOCKET && exit 0
  sleep $DELAY
  let RETRIES=RETRIES-1
done
# If we reach this point, it means the socket is not present.
echo "Socket file ${SOCKET} could not be found, which means ceph manager is not running. Showing ceph-mgr unit logs now:"
journalctl -u ceph-mgr@${MGR_NAME}
exit 1
defaults: add missing handlers for rbd mirorr and mgr Signed-off-by: Sébastien Han <seb@redhat.com> 2017-09-27 08:08:40 +08:00			`#!/bin/bash`

			`RETRIES="{{ handler_health_mgr_check_retries }}"`
			`DELAY="{{ handler_health_mgr_check_delay }}"`
			`MGR_NAME="{{ ansible_hostname }}"`
add missing boolean filter Otherwise this will generate an ansible warning about the missing filter. [DEPRECATION WARNING]: evaluating xxx as a bare variable, this behaviour will go away and you might need to add \|bool to the expression in the future. Also see CONDITIONAL_BARE_VARS configuration toggle.. This feature will be removed in version 2.12. Signed-off-by: Dimitri Savineau <dsavinea@redhat.com> 2020-09-26 00:15:02 +08:00			`{% if containerized_deployment \| bool %}`
Add new container scenario Test with podman instead of docker and also support for python 3 only. Signed-off-by: Sébastien Han <seb@redhat.com> 2018-11-08 17:02:37 +08:00			`DOCKER_EXEC="{{ container_binary }} exec ceph-mgr-{{ ansible_hostname }}"`
ci: add collocation scenario Signed-off-by: Sébastien Han <seb@redhat.com> 2017-09-28 00:22:15 +08:00			`{% endif %}`
defaults: add missing handlers for rbd mirorr and mgr Signed-off-by: Sébastien Han <seb@redhat.com> 2017-09-27 08:08:40 +08:00
defaults: backward compatibility with fqdn deployments This commit ensures we are backward compatible with fqdn deployments. Since ceph-container enforces deployment to be done with shortname, we must keep backward compatibility with clusters already deployed with fqdn configuration Signed-off-by: Guillaume Abrioux <gabrioux@redhat.com> 2018-07-31 21:18:28 +08:00			`# Backward compatibility`
			`$DOCKER_EXEC test -S /var/run/ceph/{{ cluster }}-mgr.{{ ansible_fqdn }}.asok && SOCKET=/var/run/ceph/{{ cluster }}-mgr.{{ ansible_fqdn }}.asok`
			`$DOCKER_EXEC test -S /var/run/ceph/{{ cluster }}-mgr.{{ ansible_hostname }}.asok && SOCKET=/var/run/ceph/{{ cluster }}-mgr.{{ ansible_hostname }}.asok`

reset failed count of ceph-mgr Depending on your setup, ceph-mgr might get restarted multiple times. When this is done to fast, systemd will prevent further restarts because of configured limits in the ceph-mgr systemd unit file. Resetting the failure count will prevent this problem. The reset is done before the restart so in case of a real problem during the restart it still fails. Fixes: #2768 Signed-off-by: Christian Zunker <christian.zunker@codecentric.cloud> 2018-06-20 13:01:06 +08:00			`systemctl reset-failed ceph-mgr@${MGR_NAME}`
defaults: add missing handlers for rbd mirorr and mgr Signed-off-by: Sébastien Han <seb@redhat.com> 2017-09-27 08:08:40 +08:00			`# First, restart the daemon`
			`systemctl restart ceph-mgr@${MGR_NAME}`

			`# Wait and ensure the socket exists after restarting the daemds`
			`while [ $RETRIES -ne 0 ]; do`
ci: add collocation scenario Signed-off-by: Sébastien Han <seb@redhat.com> 2017-09-28 00:22:15 +08:00			`$DOCKER_EXEC test -S $SOCKET && exit 0`
defaults: add missing handlers for rbd mirorr and mgr Signed-off-by: Sébastien Han <seb@redhat.com> 2017-09-27 08:08:40 +08:00			`sleep $DELAY`
			`let RETRIES=RETRIES-1`
			`done`
			`# If we reach this point, it means the socket is not present.`
handler: show unit logs on error This will tremendously help debugging daemons that fail on restart by showing the systemd unit logs. Signed-off-by: Sébastien Han <seb@redhat.com> 2018-11-27 17:45:05 +08:00			`echo "Socket file ${SOCKET} could not be found, which means ceph manager is not running. Showing ceph-mgr unit logs now:"`
			`journalctl -u ceph-mgr@${MGR_NAME}`
defaults: add missing handlers for rbd mirorr and mgr Signed-off-by: Sébastien Han <seb@redhat.com> 2017-09-27 08:08:40 +08:00			`exit 1`