Restart all OSDs and do not stop after the first one.

The current handler only restarts one OSD on each OSD server. After
the first one the handler stops, not matter what results the checks had.

Co-Authored-By: Gaudenz Steinlin (@gaudenz)
pull/1595/head
Christian Zunker 2017-06-12 08:30:22 +00:00
parent f5c2d3de9c
commit bf8347e149
1 changed files with 2 additions and 2 deletions

View File

@ -8,7 +8,7 @@ check_pgs() {
while [ $RETRIES -ne 0 ]; do
test "[""$(ceph $CEPH_CLI -s -f json | python -c 'import sys, json; print(json.load(sys.stdin)["pgmap"]["num_pgs"])')""]" == "$(ceph $CEPH_CLI -s -f json | python -c 'import sys, json; print [ i["count"] for i in json.load(sys.stdin)["pgmap"]["pgs_by_state"] if i["state_name"] == "active+clean"]')"
RET=$?
test $RET -eq 0 && exit 0
test $RET -eq 0 && return 0
sleep $DELAY
let RETRIES=RETRIES-1
done
@ -28,7 +28,7 @@ for id in $(ls /var/lib/ceph/osd/ | sed 's/.*-//'); do
# Wait and ensure the socket exists after restarting the daemon
SOCKET=/var/run/ceph/{{ cluster }}-osd.${id}.asok
while [ $COUNT -ne 0 ]; do
test -S $SOCKET && check_pgs
test -S $SOCKET && check_pgs && continue 2
sleep 1
let COUNT=COUNT-1
done