#!/bin/bash RETRIES="{{ handler_health_rgw_check_retries }}" DELAY="{{ handler_health_rgw_check_delay }}" HOST_NAME="{{ ansible_facts['hostname'] }}" RGW_NUMS={{ rgw_instances | length | int }} RGW_FRONTEND_SSL_CERT={{ radosgw_frontend_ssl_certificate }} if [ -n "$RGW_FRONTEND_SSL_CERT" ]; then RGW_PROTOCOL=https else RGW_PROTOCOL=http fi INSTANCES_NAME=({% for i in rgw_instances %}{{ i.instance_name }} {% endfor %}) RGW_IPS=({% for i in rgw_instances %}{{ i.radosgw_address }} {% endfor %}) RGW_PORTS=({% for i in rgw_instances %}{{ i.radosgw_frontend_port }} {% endfor %}) declare -a DOCKER_EXECS declare -a SOCKET_PREFIX for ((i=0; i<${RGW_NUMS}; i++)); do SOCKET_PREFIX[i]="/var/run/ceph/{{ cluster }}-client.rgw.${HOST_NAME}.${INSTANCES_NAME[i]}" DOCKER_EXECS[i]="" {% if containerized_deployment | bool %} DOCKER_EXECS[i]="{{ container_binary }} exec ceph-rgw-${HOST_NAME}-${INSTANCES_NAME[i]}" {% endif %} done check_socket() { local i=$1 local succ=0 local count=10 # Wait and ensure the socket exists after restarting the daemon while [ $count -ne 0 ]; do SOCKET=$(grep ${SOCKET_PREFIX[i]} /proc/net/unix | awk '{ print $8 }') if [ -n "${SOCKET}" ]; then ${DOCKER_EXECS[i]} test -S ${SOCKET} && succ=$((succ+1)) && break fi sleep $DELAY let count=count-1 done if [ $succ -ne 1 ]; then echo "Socket file ${SOCKET} could not be found, which means Rados Gateway is not running. Showing ceph-rgw unit logs now:" journalctl -u ceph-radosgw@rgw.${HOST_NAME}.${INSTANCES_NAME[i]} exit 1 fi } check_for_curl_or_wget() { local i=$1 url="$RGW_PROTOCOL://${RGW_IPS[i]}:${RGW_PORTS[i]}" if ${DOCKER_EXECS[i]} command -v wget &>/dev/null; then rgw_test_result=$(${DOCKER_EXECS[i]} wget --no-check-certificate --tries 1 --quiet --server-response --spider -O /dev/null 2>&1 $url | awk 'NR==1{print $2}') elif ${DOCKER_EXECS[i]} command -v curl &>/dev/null; then rgw_test_result=$(${DOCKER_EXECS[i]} curl {{ '-g' if ip_version == 'ipv6' else '' }} -k -w "%{http_code}" --silent --output /dev/null $url) else echo "It seems that neither curl nor wget are available on your system." echo "Cannot test rgw connection." rgw_test_result=0 fi } check_rest() { local i=$1 local succ=0 while [ $RETRIES -ne 0 ]; do check_for_curl_or_wget ${i} if [ $rgw_test_result -eq 200 ] || [ $rgw_test_result -eq 404 ] || [ $rgw_test_result -eq 405 ]; then succ=$((succ+1)) break fi sleep $DELAY let RETRIES=RETRIES-1 done if [ $succ -ne 1 ]; then # If we reach this point, it means there is a problem with the connection to rgw echo "Error connecting locally to Rados Gateway service: $RGW_PROTOCOL://${RGW_IPS[i]}:${RGW_PORTS[i]}" exit 1 fi } for ((i=0; i<${RGW_NUMS}; i++)); do # First, restart the daemon # Check if systemd unit exists # This is needed for new instances as the restart might trigger before the deployment if systemctl list-units --full --all | grep -q "ceph-radosgw@rgw.${HOST_NAME}.${INSTANCES_NAME[i]}"; then systemctl restart ceph-radosgw@rgw.${HOST_NAME}.${INSTANCES_NAME[i]} else echo "Systemd unit ceph-radosgw@rgw.${HOST_NAME}.${INSTANCES_NAME[i]} does not exist." continue fi # Check socket files check_socket ${i} # Check rest check_rest ${i} done