mirror of https://github.com/ceph/ceph-ansible.git
ceph-handler: Fix rgw socket in restart script
Since Mimic the radosgw socket has two extra fields in the socket name (before the .asok suffix): <pid>.<ctid> Before: /var/run/ceph/ceph-client.rgw.cephaio-1.asok After: /var/run/ceph/ceph-client.rgw.cephaio-1.16913.23928832.asok The radosgw restart script doesn't handle this and could fail during an upgrade. If the SOCKETS variable isn't defined in the script then the test command won't fail because the return code is 0 $ test -S $ echo $? 0 There multiple issues in that script: - The default SOCKETS value isn't defined due to a typo SOCKET vs SOCKETS. - Because the socket name uses the pid then we need to check the socket name after the service restart. - After restarting the radosgw service we need to wait few seconds otherwise the socket won't be created. - Update the wget parameters because the command is doing a loop. We now use the same option than curl. - The check_rest function doesn't test the radosgw at all due to a wrong test command (test against a string) and always returns 0. This needs to use the DOCKER_EXECS variable in order to execute the command. $ test 'wget http://192.168.100.11:8080' $ echo $? 0 Also remove the test based on the ansible_fqdn because we only use the ansible_hostname + rgw instance name. Finally group all for loop into a single one. Resolves: #3926 Signed-off-by: Dimitri Savineau <dsavinea@redhat.com>pull/4188/head
parent
d526803c6c
commit
c90f605b51
|
@ -14,19 +14,14 @@ fi
|
|||
declare -a DOCKER_EXECS
|
||||
for ((i=0; i<${RGW_NUMS}; i++)); do
|
||||
DOCKER_EXECS[i]=""
|
||||
{% if containerized_deployment %}
|
||||
{% if containerized_deployment | bool %}
|
||||
CONTAINER_NAME="ceph-rgw-${HOST_NAME}-rgw${i}"
|
||||
DOCKER_EXECS[i]="{{ container_binary }} exec ${CONTAINER_NAME}"
|
||||
{% endif %}
|
||||
done
|
||||
declare -a SOCKETS
|
||||
# Backward compatibility
|
||||
for ((i=0; i<${RGW_NUMS}; i++)); do
|
||||
SOCKET[i]="EMPTY_SOCKET"
|
||||
${DOCKER_EXECS[i]} test -S /var/run/ceph/{{ cluster }}-client.rgw.{{ ansible_fqdn }}.asok && SOCKETS[i]=/var/run/ceph/{{ cluster }}-client.rgw.{{ ansible_fqdn }}.asok
|
||||
${DOCKER_EXECS[i]} test -S /var/run/ceph/{{ cluster }}-client.rgw.${HOST_NAME}.rgw${i}.asok && SOCKETS[i]=/var/run/ceph/{{ cluster }}-client.rgw.${HOST_NAME}.rgw${i}.asok
|
||||
done
|
||||
RGW_IP={{ hostvars[inventory_hostname]['_radosgw_address'] }}
|
||||
SOCKET_PREFIX="/var/run/ceph/ceph-client.rgw.${HOST_NAME}.rgw"
|
||||
|
||||
check_socket() {
|
||||
local i=$1
|
||||
|
@ -34,6 +29,7 @@ check_socket() {
|
|||
local count=10
|
||||
# Wait and ensure the socket exists after restarting the daemon
|
||||
while [ $count -ne 0 ]; do
|
||||
${DOCKER_EXECS[i]} test -S ${SOCKET_PREFIX}${i}.*.*.asok && SOCKETS[i]=$(stat --printf=%n ${SOCKET_PREFIX}${i}.*.*.asok)
|
||||
${DOCKER_EXECS[i]} test -S ${SOCKETS[i]} && succ=$((succ+1)) && break
|
||||
sleep $DELAY
|
||||
let count=count-1
|
||||
|
@ -48,7 +44,7 @@ check_socket() {
|
|||
check_for_curl_or_wget() {
|
||||
local i=$1
|
||||
if ${DOCKER_EXECS[i]} command -v wget &>/dev/null; then
|
||||
rgw_test_command="wget --quiet"
|
||||
rgw_test_command="wget --tries 1 --quiet -O /dev/null"
|
||||
elif ${DOCKER_EXECS[i]} command -v curl &>/dev/null; then
|
||||
rgw_test_command="curl --fail --silent --output /dev/null"
|
||||
else
|
||||
|
@ -63,28 +59,23 @@ check_rest() {
|
|||
check_for_curl_or_wget ${i}
|
||||
local succ=0
|
||||
while [ $RETRIES -ne 0 ]; do
|
||||
test "$rgw_test_command $RGW_PROTOCOL://$RGW_IP:$((RGW_BASE_PORT+i))" && succ=$((succ+1)) && break
|
||||
${DOCKER_EXECS[i]} $rgw_test_command $RGW_PROTOCOL://$RGW_IP:$((RGW_BASE_PORT+i)) && succ=$((succ+1)) && break
|
||||
sleep $DELAY
|
||||
let RETRIES=RETRIES-1
|
||||
done
|
||||
if [ $succ -ne 1 ]; then
|
||||
# If we reach this point, it means there is a problem with the connection to rgw
|
||||
echo "Error connecting locally to Rados Gateway service: $RGW_PROTOCOL://$rgw_listen"
|
||||
echo "Error connecting locally to Rados Gateway service: $RGW_PROTOCOL://$RGW_IP:$((RGW_BASE_PORT+i))"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
# First, restart the daemon
|
||||
for ((i=0; i<${RGW_NUMS}; i++)); do
|
||||
SOCKETS[i]="EMPTY_SOCKET"
|
||||
# First, restart the daemon
|
||||
systemctl restart ceph-radosgw@rgw.${HOST_NAME}.rgw${i}
|
||||
done
|
||||
|
||||
# Check socket files
|
||||
for ((i=0; i<${RGW_NUMS}; i++)); do
|
||||
# Check socket files
|
||||
check_socket ${i}
|
||||
done
|
||||
|
||||
# Check rest
|
||||
for ((i=0; i<${RGW_NUMS}; i++)); do
|
||||
# Check rest
|
||||
check_rest ${i}
|
||||
done
|
||||
|
|
Loading…
Reference in New Issue