restart_osd_daemon.sh.j2 - Reset RETRIES between calls of check_pgs

Previously RETRIES was set (by default to 40) once at the start of the
script; this meant that it would only ever wait for up to 40 lots of
30s across *all* the OSDs on a host before bombing out. In fact, we
want to be prepared to wait for the same amount of time after each OSD
restart for the clusters' pgs to be happy again before continuing.

Closes: #3154
Signed-off-by: Matthew Vernon <mv3@sanger.ac.uk>
pull/3152/head
Matthew Vernon 2018-09-21 17:55:01 +01:00 committed by mergify[bot]
parent bd82c380c4
commit aa97ecf048
1 changed files with 1 additions and 1 deletions

View File

@ -1,6 +1,5 @@
#!/bin/bash #!/bin/bash
RETRIES="{{ handler_health_osd_check_retries }}"
DELAY="{{ handler_health_osd_check_delay }}" DELAY="{{ handler_health_osd_check_delay }}"
CEPH_CLI="--name client.bootstrap-osd --keyring /var/lib/ceph/bootstrap-osd/{{ cluster }}.keyring --cluster {{ cluster }}" CEPH_CLI="--name client.bootstrap-osd --keyring /var/lib/ceph/bootstrap-osd/{{ cluster }}.keyring --cluster {{ cluster }}"
@ -78,6 +77,7 @@ for unit in $(systemctl list-units | grep -E "loaded * active" | grep -oE "ceph-
{% endif %} {% endif %}
SOCKET=/var/run/ceph/{{ cluster }}-osd.${osd_id}.asok SOCKET=/var/run/ceph/{{ cluster }}-osd.${osd_id}.asok
while [ $COUNT -ne 0 ]; do while [ $COUNT -ne 0 ]; do
RETRIES="{{ handler_health_osd_check_retries }}"
$docker_exec test -S "$SOCKET" && check_pgs && continue 2 $docker_exec test -S "$SOCKET" && check_pgs && continue 2
sleep $DELAY sleep $DELAY
let COUNT=COUNT-1 let COUNT=COUNT-1