mirror of https://github.com/ceph/ceph-ansible.git
ceph-handler: Fix rgw socket in restart script
Since Mimic the radosgw socket has two extra fields in the socket
name (before the .asok suffix): <pid>.<ctid>
Before:
/var/run/ceph/ceph-client.rgw.cephaio-1.asok
After:
/var/run/ceph/ceph-client.rgw.cephaio-1.16913.23928832.asok
The radosgw restart script doesn't handle this and could fail during
an upgrade.
If the SOCKETS variable isn't defined in the script then the test
command won't fail because the return code is 0
$ test -S
$ echo $?
0
There multiple issues in that script:
- The default SOCKETS value isn't defined due to a typo
SOCKET vs SOCKETS.
- Because the socket name uses the pid then we need to check the
socket name after the service restart.
- After restarting the radosgw service we need to wait few seconds
otherwise the socket won't be created.
- Update the wget parameters because the command is doing a loop.
We now use the same option than curl.
- The check_rest function doesn't test the radosgw at all due to
a wrong test command (test against a string) and always returns 0.
This needs to use the DOCKER_EXECS variable in order to execute the
command.
$ test 'wget http://192.168.100.11:8080'
$ echo $?
0
Also remove the test based on the ansible_fqdn because we only use
the ansible_hostname + rgw instance name.
Finally group all for loop into a single one.
Resolves: #3926
Signed-off-by: Dimitri Savineau <dsavinea@redhat.com>
(cherry picked from commit c90f605b51
)
pull/4192/head
parent
72e0ac1f44
commit
d4a3e26534
|
@ -14,19 +14,14 @@ fi
|
|||
declare -a DOCKER_EXECS
|
||||
for ((i=0; i<${RGW_NUMS}; i++)); do
|
||||
DOCKER_EXECS[i]=""
|
||||
{% if containerized_deployment %}
|
||||
{% if containerized_deployment | bool %}
|
||||
CONTAINER_NAME="ceph-rgw-${HOST_NAME}-rgw${i}"
|
||||
DOCKER_EXECS[i]="{{ container_binary }} exec ${CONTAINER_NAME}"
|
||||
{% endif %}
|
||||
done
|
||||
declare -a SOCKETS
|
||||
# Backward compatibility
|
||||
for ((i=0; i<${RGW_NUMS}; i++)); do
|
||||
SOCKET[i]="EMPTY_SOCKET"
|
||||
${DOCKER_EXECS[i]} test -S /var/run/ceph/{{ cluster }}-client.rgw.{{ ansible_fqdn }}.asok && SOCKETS[i]=/var/run/ceph/{{ cluster }}-client.rgw.{{ ansible_fqdn }}.asok
|
||||
${DOCKER_EXECS[i]} test -S /var/run/ceph/{{ cluster }}-client.rgw.${HOST_NAME}.rgw${i}.asok && SOCKETS[i]=/var/run/ceph/{{ cluster }}-client.rgw.${HOST_NAME}.rgw${i}.asok
|
||||
done
|
||||
RGW_IP={{ hostvars[inventory_hostname]['_radosgw_address'] }}
|
||||
SOCKET_PREFIX="/var/run/ceph/ceph-client.rgw.${HOST_NAME}.rgw"
|
||||
|
||||
check_socket() {
|
||||
local i=$1
|
||||
|
@ -34,6 +29,7 @@ check_socket() {
|
|||
local count=10
|
||||
# Wait and ensure the socket exists after restarting the daemon
|
||||
while [ $count -ne 0 ]; do
|
||||
${DOCKER_EXECS[i]} test -S ${SOCKET_PREFIX}${i}.*.*.asok && SOCKETS[i]=$(stat --printf=%n ${SOCKET_PREFIX}${i}.*.*.asok)
|
||||
${DOCKER_EXECS[i]} test -S ${SOCKETS[i]} && succ=$((succ+1)) && break
|
||||
sleep $DELAY
|
||||
let count=count-1
|
||||
|
@ -48,7 +44,7 @@ check_socket() {
|
|||
check_for_curl_or_wget() {
|
||||
local i=$1
|
||||
if ${DOCKER_EXECS[i]} command -v wget &>/dev/null; then
|
||||
rgw_test_command="wget --quiet"
|
||||
rgw_test_command="wget --tries 1 --quiet -O /dev/null"
|
||||
elif ${DOCKER_EXECS[i]} command -v curl &>/dev/null; then
|
||||
rgw_test_command="curl --fail --silent --output /dev/null"
|
||||
else
|
||||
|
@ -63,28 +59,23 @@ check_rest() {
|
|||
check_for_curl_or_wget ${i}
|
||||
local succ=0
|
||||
while [ $RETRIES -ne 0 ]; do
|
||||
test "$rgw_test_command $RGW_PROTOCOL://$RGW_IP:$((RGW_BASE_PORT+i))" && succ=$((succ+1)) && break
|
||||
${DOCKER_EXECS[i]} $rgw_test_command $RGW_PROTOCOL://$RGW_IP:$((RGW_BASE_PORT+i)) && succ=$((succ+1)) && break
|
||||
sleep $DELAY
|
||||
let RETRIES=RETRIES-1
|
||||
done
|
||||
if [ $succ -ne 1 ]; then
|
||||
# If we reach this point, it means there is a problem with the connection to rgw
|
||||
echo "Error connecting locally to Rados Gateway service: $RGW_PROTOCOL://$rgw_listen"
|
||||
echo "Error connecting locally to Rados Gateway service: $RGW_PROTOCOL://$RGW_IP:$((RGW_BASE_PORT+i))"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
for ((i=0; i<${RGW_NUMS}; i++)); do
|
||||
SOCKETS[i]="EMPTY_SOCKET"
|
||||
# First, restart the daemon
|
||||
for ((i=0; i<${RGW_NUMS}; i++)); do
|
||||
systemctl restart ceph-radosgw@rgw.${HOST_NAME}.rgw${i}
|
||||
done
|
||||
|
||||
# Check socket files
|
||||
for ((i=0; i<${RGW_NUMS}; i++)); do
|
||||
check_socket ${i}
|
||||
done
|
||||
|
||||
# Check rest
|
||||
for ((i=0; i<${RGW_NUMS}; i++)); do
|
||||
check_rest ${i}
|
||||
done
|
||||
|
|
Loading…
Reference in New Issue