Merge pull request #1838 from ceph/rgw-units

Rgw units
pull/1844/head
Sébastien Han 2017-08-31 19:38:23 +02:00 committed by GitHub
commit ea9b6395cb
8 changed files with 161 additions and 36 deletions

View File

@ -327,7 +327,6 @@ dummy:
#radosgw_dns_name: your.subdomain.tld # subdomains used by radosgw. See http://ceph.com/docs/master/radosgw/config/#enabling-subdomain-s3-calls #radosgw_dns_name: your.subdomain.tld # subdomains used by radosgw. See http://ceph.com/docs/master/radosgw/config/#enabling-subdomain-s3-calls
#radosgw_resolve_cname: false # enable for radosgw to resolve DNS CNAME based bucket names #radosgw_resolve_cname: false # enable for radosgw to resolve DNS CNAME based bucket names
#radosgw_civetweb_port: 8080 #radosgw_civetweb_port: 8080
#radosgw_civetweb_bind_ip: "{{ ansible_default_ipv4.address }}" # when using ipv6 enclose with brackets: "[{{ ansible_default_ipv6.address }}]"
#radosgw_civetweb_num_threads: 100 #radosgw_civetweb_num_threads: 100
# For additional civetweb configuration options available such as SSL, logging, # For additional civetweb configuration options available such as SSL, logging,
# keepalive, and timeout settings, please see the civetweb docs at # keepalive, and timeout settings, please see the civetweb docs at
@ -366,11 +365,23 @@ dummy:
# Obviously between the checks (for monitors to be in quorum and for osd's pgs # Obviously between the checks (for monitors to be in quorum and for osd's pgs
# to be clean) we have to wait. These retries and delays can be configurable # to be clean) we have to wait. These retries and delays can be configurable
# for both monitors and osds. # for both monitors and osds.
#
# Monitor handler checks
#handler_health_mon_check_retries: 5 #handler_health_mon_check_retries: 5
#handler_health_mon_check_delay: 10 #handler_health_mon_check_delay: 10
#
# OSD handler checks
#handler_health_osd_check_retries: 40 #handler_health_osd_check_retries: 40
#handler_health_osd_check_delay: 30 #handler_health_osd_check_delay: 30
#handler_health_osd_check: true #handler_health_osd_check: true
#
# MDS handler checks
#handler_health_mds_check_retries: 5
#handler_health_mds_check_delay: 10
#
# RGW handler checks
#handler_health_rgw_check_retries: 5
#handler_health_rgw_check_delay: 10
# Confiure the type of NFS gatway access. At least one must be enabled for an # Confiure the type of NFS gatway access. At least one must be enabled for an
# NFS role to be useful # NFS role to be useful

View File

@ -327,7 +327,6 @@ ceph_repository: rhcs
#radosgw_dns_name: your.subdomain.tld # subdomains used by radosgw. See http://ceph.com/docs/master/radosgw/config/#enabling-subdomain-s3-calls #radosgw_dns_name: your.subdomain.tld # subdomains used by radosgw. See http://ceph.com/docs/master/radosgw/config/#enabling-subdomain-s3-calls
#radosgw_resolve_cname: false # enable for radosgw to resolve DNS CNAME based bucket names #radosgw_resolve_cname: false # enable for radosgw to resolve DNS CNAME based bucket names
#radosgw_civetweb_port: 8080 #radosgw_civetweb_port: 8080
#radosgw_civetweb_bind_ip: "{{ ansible_default_ipv4.address }}" # when using ipv6 enclose with brackets: "[{{ ansible_default_ipv6.address }}]"
#radosgw_civetweb_num_threads: 100 #radosgw_civetweb_num_threads: 100
# For additional civetweb configuration options available such as SSL, logging, # For additional civetweb configuration options available such as SSL, logging,
# keepalive, and timeout settings, please see the civetweb docs at # keepalive, and timeout settings, please see the civetweb docs at
@ -366,11 +365,23 @@ ceph_repository: rhcs
# Obviously between the checks (for monitors to be in quorum and for osd's pgs # Obviously between the checks (for monitors to be in quorum and for osd's pgs
# to be clean) we have to wait. These retries and delays can be configurable # to be clean) we have to wait. These retries and delays can be configurable
# for both monitors and osds. # for both monitors and osds.
#
# Monitor handler checks
#handler_health_mon_check_retries: 5 #handler_health_mon_check_retries: 5
#handler_health_mon_check_delay: 10 #handler_health_mon_check_delay: 10
#
# OSD handler checks
#handler_health_osd_check_retries: 40 #handler_health_osd_check_retries: 40
#handler_health_osd_check_delay: 30 #handler_health_osd_check_delay: 30
#handler_health_osd_check: true #handler_health_osd_check: true
#
# MDS handler checks
#handler_health_mds_check_retries: 5
#handler_health_mds_check_delay: 10
#
# RGW handler checks
#handler_health_rgw_check_retries: 5
#handler_health_rgw_check_delay: 10
# Confiure the type of NFS gatway access. At least one must be enabled for an # Confiure the type of NFS gatway access. At least one must be enabled for an
# NFS role to be useful # NFS role to be useful

View File

@ -319,7 +319,6 @@ mds_max_mds: 3
#radosgw_dns_name: your.subdomain.tld # subdomains used by radosgw. See http://ceph.com/docs/master/radosgw/config/#enabling-subdomain-s3-calls #radosgw_dns_name: your.subdomain.tld # subdomains used by radosgw. See http://ceph.com/docs/master/radosgw/config/#enabling-subdomain-s3-calls
radosgw_resolve_cname: false # enable for radosgw to resolve DNS CNAME based bucket names radosgw_resolve_cname: false # enable for radosgw to resolve DNS CNAME based bucket names
radosgw_civetweb_port: 8080 radosgw_civetweb_port: 8080
radosgw_civetweb_bind_ip: "{{ ansible_default_ipv4.address }}" # when using ipv6 enclose with brackets: "[{{ ansible_default_ipv6.address }}]"
radosgw_civetweb_num_threads: 100 radosgw_civetweb_num_threads: 100
# For additional civetweb configuration options available such as SSL, logging, # For additional civetweb configuration options available such as SSL, logging,
# keepalive, and timeout settings, please see the civetweb docs at # keepalive, and timeout settings, please see the civetweb docs at
@ -358,11 +357,23 @@ restapi_port: 5000
# Obviously between the checks (for monitors to be in quorum and for osd's pgs # Obviously between the checks (for monitors to be in quorum and for osd's pgs
# to be clean) we have to wait. These retries and delays can be configurable # to be clean) we have to wait. These retries and delays can be configurable
# for both monitors and osds. # for both monitors and osds.
#
# Monitor handler checks
handler_health_mon_check_retries: 5 handler_health_mon_check_retries: 5
handler_health_mon_check_delay: 10 handler_health_mon_check_delay: 10
#
# OSD handler checks
handler_health_osd_check_retries: 40 handler_health_osd_check_retries: 40
handler_health_osd_check_delay: 30 handler_health_osd_check_delay: 30
handler_health_osd_check: true handler_health_osd_check: true
#
# MDS handler checks
handler_health_mds_check_retries: 5
handler_health_mds_check_delay: 10
#
# RGW handler checks
handler_health_rgw_check_retries: 5
handler_health_rgw_check_delay: 10
# Confiure the type of NFS gatway access. At least one must be enabled for an # Confiure the type of NFS gatway access. At least one must be enabled for an
# NFS role to be useful # NFS role to be useful

View File

@ -18,7 +18,7 @@
command: /tmp/restart_mon_daemon.sh command: /tmp/restart_mon_daemon.sh
listen: "restart ceph mons" listen: "restart ceph mons"
when: when:
# We do not want to run these checks on initial deployment (`socket.rc == 0`) # We do not want to run these checks on initial deployment (`socket.rc == 0`)
- socket.rc == 0 - socket.rc == 0
- mon_group_name in group_names - mon_group_name in group_names
@ -42,8 +42,8 @@
listen: "restart ceph osds" listen: "restart ceph osds"
with_items: "{{ socket_osd_container.results | default([]) }}" with_items: "{{ socket_osd_container.results | default([]) }}"
when: when:
# We do not want to run these checks on initial deployment (`socket_osd_container.results[n].rc == 0`) # We do not want to run these checks on initial deployment (`socket_osd_container.results[n].rc == 0`)
# except when a crush location is specified. ceph-disk will start the osds before the osd crush location is specified # except when a crush location is specified. ceph-disk will start the osds before the osd crush location is specified
- containerized_deployment - containerized_deployment
- ((crush_location is defined and crush_location) or item.get('rc') == 0) - ((crush_location is defined and crush_location) or item.get('rc') == 0)
- handler_health_osd_check - handler_health_osd_check
@ -55,8 +55,8 @@
command: /tmp/restart_osd_daemon.sh command: /tmp/restart_osd_daemon.sh
listen: "restart ceph osds" listen: "restart ceph osds"
when: when:
# We do not want to run these checks on initial deployment (`socket_osd_container.results[n].rc == 0`) # We do not want to run these checks on initial deployment (`socket_osd_container.results[n].rc == 0`)
# except when a crush location is specified. ceph-disk will start the osds before the osd crush location is specified # except when a crush location is specified. ceph-disk will start the osds before the osd crush location is specified
- ((crush_location is defined and crush_location) or socket.rc == 0) - ((crush_location is defined and crush_location) or socket.rc == 0)
- ceph_current_fsid.rc == 0 - ceph_current_fsid.rc == 0
- handler_health_osd_check - handler_health_osd_check
@ -64,28 +64,44 @@
- inventory_hostname in play_hosts - inventory_hostname in play_hosts
- osd_group_name in group_names - osd_group_name in group_names
- name: restart ceph mdss - name: copy mds restart script
service: template:
name: ceph-mds@{{ mds_name }} src: restart_mds_daemon.sh.j2
state: restarted dest: /tmp/restart_mds_daemon.sh
# serial: 1 would be the proper solution here, but that can only be set on play level owner: root
# upstream issue: https://github.com/ansible/ansible/issues/12170 group: root
run_once: true mode: 0750
with_items: "{{ groups.get(mds_group_name, []) }}" listen: "restart ceph mdss"
delegate_to: "{{ item }}"
when: when:
- inventory_hostname in play_hosts
- mds_group_name in group_names - mds_group_name in group_names
- name: restart ceph rgws - name: restart ceph mds daemon(s)
service: command: /tmp/restart_mds_daemon.sh
name: ceph-radosgw@rgw.{{ ansible_hostname }} listen: "restart ceph mdss"
state: restarted
# serial: 1 would be the proper solution here, but that can only be set on play level
# upstream issue: https://github.com/ansible/ansible/issues/12170
run_once: true
with_items: "{{ groups.get(rgw_group_name, []) }}"
delegate_to: "{{ item }}"
when: when:
# We do not want to run these checks on initial deployment (`socket.rc == 0`)
- socket.rc == 0
- mds_group_name in group_names
- name: copy rgw restart script
template:
src: restart_rgw_daemon.sh.j2
dest: /tmp/restart_rgw_daemon.sh
owner: root
group: root
mode: 0750
listen: "restart ceph rgws"
when:
- inventory_hostname in play_hosts
- rgw_group_name in group_names
- name: restart ceph rgw daemon(s)
command: /tmp/restart_rgw_daemon.sh
listen: "restart ceph rgws"
when:
# We do not want to run these checks on initial deployment (`socket.rc == 0`)
- socket.rc == 0
- rgw_group_name in group_names - rgw_group_name in group_names
- name: restart ceph nfss - name: restart ceph nfss

View File

@ -0,0 +1,20 @@
#!/bin/bash
RETRIES="{{ handler_health_mds_check_retries }}"
DELAY="{{ handler_health_mds_check_delay }}"
MDS_NAME="{{ ansible_hostname }}"
SOCKET=/var/run/ceph/{{ cluster }}-mds.${MDS_NAME}.asok
# First, restart the daemon
systemctl restart ceph-mds@${MDS_NAME}
COUNT=10
# Wait and ensure the socket exists after restarting the daemds
while [ $RETRIES -ne 0 ]; do
{{ docker_exec_cmd }} test -S $SOCKET && exit 0
sleep $DELAY
let RETRIES=RETRIES-1
done
# If we reach this point, it means the socket is not present.
echo "Socket file ${SOCKET} could not be found, which means the Metadata Server is not running."
exit 1

View File

@ -8,7 +8,7 @@ SOCKET=/var/run/ceph/{{ cluster }}-mon.${MONITOR_NAME}.asok
check_quorum() { check_quorum() {
while [ $RETRIES -ne 0 ]; do while [ $RETRIES -ne 0 ]; do
MEMBERS=$({{ docker_exec_cmd }} ceph --cluster {{ cluster }} -s --format json | sed -r 's/.*"quorum_names":(\[[^]]+\]).*/\1/') MEMBERS=$({{ docker_exec_cmd }} ceph --cluster {{ cluster }} -s --format json | sed -r 's/.*"quorum_names":(\[[^]]+\]).*/\1/')
test "${MEMBERS/$MONITOR_NAME}" != "$MEMBERS" && exit 0 test "${MEMBERS/$MONITOR_NAME}" != "$MEMBERS" && exit 0
sleep $DELAY sleep $DELAY
let RETRIES=RETRIES-1 let RETRIES=RETRIES-1

View File

@ -0,0 +1,65 @@
#!/bin/bash
RETRIES="{{ handler_health_rgw_check_retries }}"
DELAY="{{ handler_health_rgw_check_delay }}"
RGW_NAME="{{ ansible_hostname }}"
RGW_PORT="{{ radosgw_civetweb_port }}"
SOCKET=/var/run/ceph/{{ cluster }}-client.rgw.${RGW_NAME}.asok
{% if radosgw_address_block | length > 0 %}
{% if ip_version == 'ipv4' -%}
RGW_IP={{ hostvars[inventory_hostname]['ansible_all_' + ip_version + '_addresses'] | ipaddr(radosgw_address_block) | first }}
{%- elif ip_version == 'ipv6' -%}
RGW_IP=[{{ hostvars[inventory_hostname]['ansible_all_' + ip_version + '_addresses'] | ipaddr(radosgw_address_block) | first }}]
{%- endif %}
{% elif hostvars[inventory_hostname]['radosgw_address'] is defined and hostvars[inventory_hostname]['radosgw_address'] != '0.0.0.0' -%}
{% if ip_version == 'ipv4' -%}
RGW_IP={{ hostvars[inventory_hostname]['radosgw_address'] }}
{%- elif ip_version == 'ipv6' -%}
RGW_IP=[{{ hostvars[inventory_hostname]['radosgw_address'] }}]
{% endif %}
{%- else -%}
{% set interface = ["ansible_",radosgw_interface]|join %}
{% if ip_version == 'ipv6' -%}
RGW_IP=[{{ hostvars[inventory_hostname][interface][ip_version][0]['address'] }}]
{%- elif ip_version == 'ipv4' -%}
RGW_IP={{ hostvars[inventory_hostname][interface][ip_version]['address'] }}
{% endif %}
{%- endif %}
check_for_curl_or_wget() {
if {{ docker_exec_cmd }} command -v wget &>/dev/null; then
rgw_test_command="wget --quiet"
elif {{ docker_exec_cmd }} command -v curl &>/dev/null; then
rgw_test_command="curl --fail --silent --output /dev/null"
else
echo "It seems that neither curl or wget are available on your system."
echo "Cannot test rgw connection."
exit 0
fi
}
check_rest() {
check_for_curl_or_wget
while [ $RETRIES -ne 0 ]; do
test "$rgw_test_command http://$RGW_IP:$RGW_PORT" && exit 0
sleep $DELAY
let RETRIES=RETRIES-1
done
# If we reach this point, it means there is a problem with the connection to rgw
echo "Error connecting locally to Rados Gateway service: http://$rgw_listen"
exit 1
}
# First, restart the daemon
systemctl restart ceph-radosgw@rgw.${RGW_NAME}
COUNT=10
# Wait and ensure the socket exists after restarting the daemon
while [ $COUNT -ne 0 ]; do
{{ docker_exec_cmd }} test -S $SOCKET && check_rest
sleep 1
let COUNT=COUNT-1
done
echo "Socket file ${SOCKET} could not be found, which means Rados Gateway is not running."
exit 1

View File

@ -1,18 +1,10 @@
--- ---
- name: check if rgw is started
command: /etc/init.d/radosgw status
register: rgwstatus
changed_when: false
failed_when: false
always_run: true
- name: ensure systemd service override directory exists - name: ensure systemd service override directory exists
file: file:
state: directory state: directory
path: "/etc/systemd/system/ceph-rgw@.service.d/" path: "/etc/systemd/system/ceph-rgw@.service.d/"
when: when:
- ceph_rgw_systemd_overrides is defined - ceph_rgw_systemd_overrides is defined
- ansible_service_mgr == 'systemd'
- name: add ceph-rgw systemd service overrides - name: add ceph-rgw systemd service overrides
config_template: config_template:
@ -22,7 +14,6 @@
config_type: "ini" config_type: "ini"
when: when:
- ceph_rgw_systemd_overrides is defined - ceph_rgw_systemd_overrides is defined
- ansible_service_mgr == 'systemd'
- name: start rgw - name: start rgw
service: service: