mirror of https://github.com/ceph/ceph-ansible.git
39 lines
1.3 KiB
Django/Jinja
39 lines
1.3 KiB
Django/Jinja
#!/bin/bash
|
|
|
|
RETRIES="{{ handler_health_osd_check_retries }}"
|
|
DELAY="{{ handler_health_osd_check_delay }}"
|
|
CEPH_CLI="--name client.bootstrap-osd --keyring /var/lib/ceph/bootstrap-osd/{{ cluster }}.keyring --cluster {{ cluster }}"
|
|
|
|
check_pgs() {
|
|
while [ $RETRIES -ne 0 ]; do
|
|
ceph $CEPH_CLI -s | grep -sq 'active+clean'
|
|
RET=$?
|
|
test $RET -eq 0 && exit 0
|
|
sleep $DELAY
|
|
let RETRIES=RETRIES-1
|
|
done
|
|
# PGs not clean, exiting with return code 1
|
|
echo "Error while running 'ceph $CEPH_CLI -s', PGs were not reported as active+clean"
|
|
echo "It is possible that the cluster has less OSDs than the replica configuration"
|
|
echo "Will refuse to continue"
|
|
ceph $CEPH_CLI -s
|
|
exit 1
|
|
}
|
|
|
|
for id in $(ls /var/lib/ceph/osd/ | sed 's/.*-//'); do
|
|
# First, restart daemon(s)
|
|
systemctl restart ceph-osd@${id}
|
|
# We need to wait because it may take some time for the socket to actually exists
|
|
COUNT=10
|
|
# Wait and ensure the socket exists after restarting the daemon
|
|
SOCKET=/var/run/ceph/{{ cluster }}-osd.${id}.asok
|
|
while [ $COUNT -ne 0 ]; do
|
|
test -S $SOCKET && check_pgs
|
|
sleep 1
|
|
let COUNT=COUNT-1
|
|
done
|
|
# If we reach this point, it means the socket is not present.
|
|
echo "Socket file ${SOCKET} could not be found, which means the osd daemon is not running."
|
|
exit 1
|
|
done
|