From bf8347e1491926418923bba9870109dfc3fbe89a Mon Sep 17 00:00:00 2001 From: Christian Zunker Date: Mon, 12 Jun 2017 08:30:22 +0000 Subject: [PATCH] Restart all OSDs and do not stop after the first one. The current handler only restarts one OSD on each OSD server. After the first one the handler stops, not matter what results the checks had. Co-Authored-By: Gaudenz Steinlin (@gaudenz) --- roles/ceph-common/templates/restart_osd_daemon.sh.j2 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/roles/ceph-common/templates/restart_osd_daemon.sh.j2 b/roles/ceph-common/templates/restart_osd_daemon.sh.j2 index 5d1d04597..f027574cf 100644 --- a/roles/ceph-common/templates/restart_osd_daemon.sh.j2 +++ b/roles/ceph-common/templates/restart_osd_daemon.sh.j2 @@ -8,7 +8,7 @@ check_pgs() { while [ $RETRIES -ne 0 ]; do test "[""$(ceph $CEPH_CLI -s -f json | python -c 'import sys, json; print(json.load(sys.stdin)["pgmap"]["num_pgs"])')""]" == "$(ceph $CEPH_CLI -s -f json | python -c 'import sys, json; print [ i["count"] for i in json.load(sys.stdin)["pgmap"]["pgs_by_state"] if i["state_name"] == "active+clean"]')" RET=$? - test $RET -eq 0 && exit 0 + test $RET -eq 0 && return 0 sleep $DELAY let RETRIES=RETRIES-1 done @@ -28,7 +28,7 @@ for id in $(ls /var/lib/ceph/osd/ | sed 's/.*-//'); do # Wait and ensure the socket exists after restarting the daemon SOCKET=/var/run/ceph/{{ cluster }}-osd.${id}.asok while [ $COUNT -ne 0 ]; do - test -S $SOCKET && check_pgs + test -S $SOCKET && check_pgs && continue 2 sleep 1 let COUNT=COUNT-1 done