ceph-ansible/roles/ceph-defaults/templates/restart_osd_daemon.sh.j2

#!/bin/bash

RETRIES="{{ handler_health_osd_check_retries }}"
DELAY="{{ handler_health_osd_check_delay }}"
CEPH_CLI="--name client.bootstrap-osd --keyring /var/lib/ceph/bootstrap-osd/{{ cluster }}.keyring --cluster {{ cluster }}"

check_pgs() {
  while [ $RETRIES -ne 0 ]; do
    test "[""$($docker_exec ceph $CEPH_CLI -s -f json | python -c 'import sys, json; print(json.load(sys.stdin)["pgmap"]["num_pgs"])')""]" = "$($docker_exec ceph $CEPH_CLI -s -f json | python -c 'import sys, json; print [ i["count"] for i in json.load(sys.stdin)["pgmap"]["pgs_by_state"] if i["state_name"] == "active+clean"]')"
    RET=$?
    test $RET -eq 0 && return 0
    sleep $DELAY
    let RETRIES=RETRIES-1
  done
  # PGs not clean, exiting with return code 1
  echo "Error while running 'ceph $CEPH_CLI -s', PGs were not reported as active+clean"
  echo "It is possible that the cluster has less OSDs than the replica configuration"
  echo "Will refuse to continue"
  $docker_exec ceph $CEPH_CLI -s
  exit 1
}

wait_for_socket_in_docker() {
  if ! docker exec "$1" timeout 10 bash -c "while [ ! -e /var/run/ceph/*.asok ]; do sleep 1 ; done"; then
    echo "Timed out while trying to look for a Ceph OSD socket."
    echo "Abort mission!"
    exit 1
  fi
}

get_dev_name() {
  echo $1 | sed -r 's/ceph-osd@([a-z]{1,4})\.service/\1/'
}

get_docker_id_from_dev_name() {
  local id
  local count
  count=10
  while [ $count -ne 0 ]; do
    id=$(docker ps -q -f "name=$1")
    test "$id" != "" && break
    sleep $DELAY
    let count=count-1
  done
  echo "$id"
}

get_docker_osd_id() {
  wait_for_socket_in_docker $1
  docker exec "$1" ls /var/run/ceph | cut -d'.' -f2
}

# For containerized deployments, the unit file looks like: ceph-osd@sda.service
# For non-containerized deployments, the unit file looks like: ceph-osd@0.service
for unit in $(systemctl list-units | grep "loaded active" | grep -oE "ceph-osd@([0-9]{1,2}|[a-z]+).service"); do
  # First, restart daemon(s)
  systemctl restart "${unit}"
  # We need to wait because it may take some time for the socket to actually exists
  COUNT=10
  # Wait and ensure the socket exists after restarting the daemon
  {% if containerized_deployment -%}
  id=$(get_dev_name "$unit")
  container_id=$(get_docker_id_from_dev_name "$id")
  osd_id=$(get_docker_osd_id "$container_id")
  docker_exec="docker exec $container_id"
  {% else %}
  osd_id=$(echo ${unit#ceph-osd@} | grep -oE '[0-9]{1,2}')
  {% endif %}
  SOCKET=/var/run/ceph/{{ cluster }}-osd.${osd_id}.asok
  while [ $COUNT -ne 0 ]; do
    $docker_exec test -S "$SOCKET" && check_pgs && continue 2
    sleep $DELAY
    let COUNT=COUNT-1
  done
  # If we reach this point, it means the socket is not present.
  echo "Socket file ${SOCKET} could not be found, which means the osd daemon is not running."
  exit 1
done
Add handlers for containerized deployment Until now, there is no handlers for containerized deployments. Signed-off-by: Guillaume Abrioux <gabrioux@redhat.com> 2017-07-29 07:00:06 +08:00			`#!/bin/bash`

			`RETRIES="{{ handler_health_osd_check_retries }}"`
			`DELAY="{{ handler_health_osd_check_delay }}"`
			`CEPH_CLI="--name client.bootstrap-osd --keyring /var/lib/ceph/bootstrap-osd/{{ cluster }}.keyring --cluster {{ cluster }}"`

			`check_pgs() {`
			`while [ $RETRIES -ne 0 ]; do`
			`test "[""$($docker_exec ceph $CEPH_CLI -s -f json \| python -c 'import sys, json; print(json.load(sys.stdin)["pgmap"]["num_pgs"])')""]" = "$($docker_exec ceph $CEPH_CLI -s -f json \| python -c 'import sys, json; print [ i["count"] for i in json.load(sys.stdin)["pgmap"]["pgs_by_state"] if i["state_name"] == "active+clean"]')"`
			`RET=$?`
			`test $RET -eq 0 && return 0`
			`sleep $DELAY`
			`let RETRIES=RETRIES-1`
			`done`
			`# PGs not clean, exiting with return code 1`
			`echo "Error while running 'ceph $CEPH_CLI -s', PGs were not reported as active+clean"`
			`echo "It is possible that the cluster has less OSDs than the replica configuration"`
			`echo "Will refuse to continue"`
handler: display ceph status properly Fix bash error, doing ceph "$CEPH_CLI" -s gives us ceph '--name client.bootstrap-osd --keyring /var/lib/ceph/bootstrap-osd/test.keyring --cluster test' -s which results in a wrongly formatted command. Removing the double quotes expands the array properly. Signed-off-by: Sébastien Han <seb@redhat.com> 2017-09-22 23:45:35 +08:00			`$docker_exec ceph $CEPH_CLI -s`
Add handlers for containerized deployment Until now, there is no handlers for containerized deployments. Signed-off-by: Guillaume Abrioux <gabrioux@redhat.com> 2017-07-29 07:00:06 +08:00			`exit 1`
			`}`

			`wait_for_socket_in_docker() {`
			`if ! docker exec "$1" timeout 10 bash -c "while [ ! -e /var/run/ceph/*.asok ]; do sleep 1 ; done"; then`
switch-from-non-containerized-to-containerized: simplify This commit eases the use of the infrastructure-playbooks/switch-from-non-containerized-to-containerized-ceph-daemons.yml playbook. We basically run it with a couple of pre-tasks and then we let the playbook run the docker roles. It obviously expect to have proper variables configured in order to work. Signed-off-by: Sébastien Han <seb@redhat.com> 2017-08-22 22:43:01 +08:00			`echo "Timed out while trying to look for a Ceph OSD socket."`
			`echo "Abort mission!"`
Add handlers for containerized deployment Until now, there is no handlers for containerized deployments. Signed-off-by: Guillaume Abrioux <gabrioux@redhat.com> 2017-07-29 07:00:06 +08:00			`exit 1`
			`fi`
			`}`

			`get_dev_name() {`
			`echo $1 \| sed -r 's/ceph-osd@([a-z]{1,4})\.service/\1/'`
			`}`

			`get_docker_id_from_dev_name() {`
			`local id`
			`local count`
			`count=10`
			`while [ $count -ne 0 ]; do`
			`id=$(docker ps -q -f "name=$1")`
			`test "$id" != "" && break`
defaults: restart docker daemon higher delay Use default delay since the mon (in particular) can take more time to restart. Solves error with: STDERR: Error response from daemon: No such container: ceph-mon-mon0 Signed-off-by: Sébastien Han <seb@redhat.com> 2017-09-15 03:38:11 +08:00			`sleep $DELAY`
Add handlers for containerized deployment Until now, there is no handlers for containerized deployments. Signed-off-by: Guillaume Abrioux <gabrioux@redhat.com> 2017-07-29 07:00:06 +08:00			`let count=count-1`
			`done`
			`echo "$id"`
			`}`

			`get_docker_osd_id() {`
			`wait_for_socket_in_docker $1`
			`docker exec "$1" ls /var/run/ceph \| cut -d'.' -f2`
			`}`

			`# For containerized deployments, the unit file looks like: ceph-osd@sda.service`
			`# For non-containerized deployments, the unit file looks like: ceph-osd@0.service`
switch-from-non-containerized-to-containerized: simplify This commit eases the use of the infrastructure-playbooks/switch-from-non-containerized-to-containerized-ceph-daemons.yml playbook. We basically run it with a couple of pre-tasks and then we let the playbook run the docker roles. It obviously expect to have proper variables configured in order to work. Signed-off-by: Sébastien Han <seb@redhat.com> 2017-08-22 22:43:01 +08:00			`for unit in $(systemctl list-units \| grep "loaded active" \| grep -oE "ceph-osd@([0-9]{1,2}\|[a-z]+).service"); do`
Add handlers for containerized deployment Until now, there is no handlers for containerized deployments. Signed-off-by: Guillaume Abrioux <gabrioux@redhat.com> 2017-07-29 07:00:06 +08:00			`# First, restart daemon(s)`
			`systemctl restart "${unit}"`
			`# We need to wait because it may take some time for the socket to actually exists`
			`COUNT=10`
			`# Wait and ensure the socket exists after restarting the daemon`
			`{% if containerized_deployment -%}`
			`id=$(get_dev_name "$unit")`
			`container_id=$(get_docker_id_from_dev_name "$id")`
			`osd_id=$(get_docker_osd_id "$container_id")`
			`docker_exec="docker exec $container_id"`
			`{% else %}`
			`osd_id=$(echo ${unit#ceph-osd@} \| grep -oE '[0-9]{1,2}')`
			`{% endif %}`
switch-from-non-containerized-to-containerized: simplify This commit eases the use of the infrastructure-playbooks/switch-from-non-containerized-to-containerized-ceph-daemons.yml playbook. We basically run it with a couple of pre-tasks and then we let the playbook run the docker roles. It obviously expect to have proper variables configured in order to work. Signed-off-by: Sébastien Han <seb@redhat.com> 2017-08-22 22:43:01 +08:00			`SOCKET=/var/run/ceph/{{ cluster }}-osd.${osd_id}.asok`
Add handlers for containerized deployment Until now, there is no handlers for containerized deployments. Signed-off-by: Guillaume Abrioux <gabrioux@redhat.com> 2017-07-29 07:00:06 +08:00			`while [ $COUNT -ne 0 ]; do`
			`$docker_exec test -S "$SOCKET" && check_pgs && continue 2`
defaults: restart docker daemon higher delay Use default delay since the mon (in particular) can take more time to restart. Solves error with: STDERR: Error response from daemon: No such container: ceph-mon-mon0 Signed-off-by: Sébastien Han <seb@redhat.com> 2017-09-15 03:38:11 +08:00			`sleep $DELAY`
Add handlers for containerized deployment Until now, there is no handlers for containerized deployments. Signed-off-by: Guillaume Abrioux <gabrioux@redhat.com> 2017-07-29 07:00:06 +08:00			`let COUNT=COUNT-1`
			`done`
			`# If we reach this point, it means the socket is not present.`
			`echo "Socket file ${SOCKET} could not be found, which means the osd daemon is not running."`
			`exit 1`
			`done`