From 4fa9057a3ebb4c8929df23c1a2b12554eb3957e4 Mon Sep 17 00:00:00 2001 From: Seena Fallah Date: Mon, 10 Jun 2024 12:11:55 +0200 Subject: [PATCH] ceph-handler: use haproxy maintenance for rgw restarts RGW currently restarts without waiting for existing connections to close. By adjusting the HAProxy weight before the restart, we can ensure that no active connections are disrupted during the restart process. Signed-off-by: Seena Fallah --- group_vars/all.yml.sample | 1 + roles/ceph-defaults/defaults/main.yml | 1 + .../templates/restart_rgw_daemon.sh.j2 | 30 +++++++++++++++---- 3 files changed, 27 insertions(+), 5 deletions(-) diff --git a/group_vars/all.yml.sample b/group_vars/all.yml.sample index 205397288..ca6e48004 100644 --- a/group_vars/all.yml.sample +++ b/group_vars/all.yml.sample @@ -420,6 +420,7 @@ dummy: # RGW handler checks #handler_health_rgw_check_retries: 5 #handler_health_rgw_check_delay: 10 +#handler_rgw_use_haproxy_maintenance: false # NFS handler checks #handler_health_nfs_check_retries: 5 diff --git a/roles/ceph-defaults/defaults/main.yml b/roles/ceph-defaults/defaults/main.yml index d8b06cdd1..a61280100 100644 --- a/roles/ceph-defaults/defaults/main.yml +++ b/roles/ceph-defaults/defaults/main.yml @@ -412,6 +412,7 @@ handler_health_mds_check_delay: 10 # RGW handler checks handler_health_rgw_check_retries: 5 handler_health_rgw_check_delay: 10 +handler_rgw_use_haproxy_maintenance: false # NFS handler checks handler_health_nfs_check_retries: 5 diff --git a/roles/ceph-handler/templates/restart_rgw_daemon.sh.j2 b/roles/ceph-handler/templates/restart_rgw_daemon.sh.j2 index 5ea0f3c7d..d7eb36a72 100644 --- a/roles/ceph-handler/templates/restart_rgw_daemon.sh.j2 +++ b/roles/ceph-handler/templates/restart_rgw_daemon.sh.j2 @@ -11,6 +11,7 @@ else RGW_PROTOCOL=http fi INSTANCES_NAME=({% for i in rgw_instances %}{{ i.instance_name }} {% endfor %}) +HAPROXY_BACKEND=({% for i in rgw_instances %}{{ i.haproxy_backend | default('rgw-backend') }} {% endfor %}) RGW_IPS=({% for i in rgw_instances %}{{ i.radosgw_address }} {% endfor %}) RGW_PORTS=({% for i in rgw_instances %}{{ i.radosgw_frontend_port }} {% endfor %}) RGW_ZONE="{{ rgw_zone }}" @@ -78,19 +79,38 @@ check_rest() { } for ((i=0; i<${RGW_NUMS}; i++)); do - # First, restart the daemon - # Check if systemd unit exists # This is needed for new instances as the restart might trigger before the deployment - if systemctl list-units --full --all | grep -q "ceph-radosgw@rgw.${RGW_ZONE}.${HOST_NAME}.${INSTANCES_NAME[i]}"; then - systemctl restart ceph-radosgw@rgw.${RGW_ZONE}.${HOST_NAME}.${INSTANCES_NAME[i]} - else + if ! systemctl list-units --full --all | grep -q "ceph-radosgw@rgw.${RGW_ZONE}.${HOST_NAME}.${INSTANCES_NAME[i]}"; then echo "Systemd unit ceph-radosgw@rgw.${RGW_ZONE}.${HOST_NAME}.${INSTANCES_NAME[i]} does not exist." continue fi +{% if handler_rgw_use_haproxy_maintenance %} + # set server weight to 0 on haproxy + echo "set weight ${HAPROXY_BACKEND[i]}/${INSTANCES_NAME[i]} 0" | socat stdio {{ haproxy_socket_path }} + + # wait for the connections to drop + retries={{ handler_rgw_haproxy_maintenance_retries | default(60) }} + while [ $retries -gt 0 ]; do + if [ "$(echo "show servers conn ${HAPROXY_BACKEND[i]}" | socat stdio {{ haproxy_socket_path }} | grep "${HAPROXY_BACKEND[i]}/${INSTANCES_NAME[i]} " | awk '{ print $7 }')" -eq 0 ]; then + break + fi + sleep 1 + let retries=retries-1 + done +{% endif %} + + # Restart the daemon + systemctl restart ceph-radosgw@rgw.${RGW_ZONE}.${HOST_NAME}.${INSTANCES_NAME[i]} + # Check socket files check_socket ${i} # Check rest check_rest ${i} + +{% if handler_rgw_use_haproxy_maintenance %} + # set server weight to 100 on haproxy + echo "set weight ${HAPROXY_BACKEND[i]}/${INSTANCES_NAME[i]} 100" | socat stdio {{ haproxy_socket_path }} +{% endif %} done