Merge pull request #1838 from ceph/rgw-units

Rgw units
2017-08-31 19:38:23 +02:00 · 2017-08-31 19:38:23 +02:00 · ea9b6395cb
parent 29df79e54e 3dd47a45cb
commit ea9b6395cb
8 changed files with 161 additions and 36 deletions
--- a/group_vars/all.yml.sample
+++ b/group_vars/all.yml.sample
@ -327,7 +327,6 @@ dummy:
 #radosgw_dns_name: your.subdomain.tld # subdomains used by radosgw. See http://ceph.com/docs/master/radosgw/config/#enabling-subdomain-s3-calls
 #radosgw_resolve_cname: false # enable for radosgw to resolve DNS CNAME based bucket names
 #radosgw_civetweb_port: 8080
 #radosgw_civetweb_bind_ip: "{{ ansible_default_ipv4.address }}" # when using ipv6 enclose with brackets: "[{{ ansible_default_ipv6.address }}]"
 #radosgw_civetweb_num_threads: 100
 # For additional civetweb configuration options available such as SSL, logging,
 # keepalive, and timeout settings, please see the civetweb docs at
@ -366,11 +365,23 @@ dummy:
 # Obviously between the checks (for monitors to be in quorum and for osd's pgs
 # to be clean) we have to wait. These retries and delays can be configurable
 # for both monitors and osds.
 #
 # Monitor handler checks
 #handler_health_mon_check_retries: 5
 #handler_health_mon_check_delay: 10
 #
 # OSD handler checks
 #handler_health_osd_check_retries: 40
 #handler_health_osd_check_delay: 30
 #handler_health_osd_check: true
 #
 # MDS handler checks
 #handler_health_mds_check_retries: 5
 #handler_health_mds_check_delay: 10
 #
 # RGW handler checks
 #handler_health_rgw_check_retries: 5
 #handler_health_rgw_check_delay: 10
 # Confiure the type of NFS gatway access.  At least one must be enabled for an
 # NFS role to be useful
--- a/group_vars/rhcs.yml.sample
+++ b/group_vars/rhcs.yml.sample
@ -327,7 +327,6 @@ ceph_repository: rhcs
 #radosgw_dns_name: your.subdomain.tld # subdomains used by radosgw. See http://ceph.com/docs/master/radosgw/config/#enabling-subdomain-s3-calls
 #radosgw_resolve_cname: false # enable for radosgw to resolve DNS CNAME based bucket names
 #radosgw_civetweb_port: 8080
 #radosgw_civetweb_bind_ip: "{{ ansible_default_ipv4.address }}" # when using ipv6 enclose with brackets: "[{{ ansible_default_ipv6.address }}]"
 #radosgw_civetweb_num_threads: 100
 # For additional civetweb configuration options available such as SSL, logging,
 # keepalive, and timeout settings, please see the civetweb docs at
@ -366,11 +365,23 @@ ceph_repository: rhcs
 # Obviously between the checks (for monitors to be in quorum and for osd's pgs
 # to be clean) we have to wait. These retries and delays can be configurable
 # for both monitors and osds.
 #
 # Monitor handler checks
 #handler_health_mon_check_retries: 5
 #handler_health_mon_check_delay: 10
 #
 # OSD handler checks
 #handler_health_osd_check_retries: 40
 #handler_health_osd_check_delay: 30
 #handler_health_osd_check: true
 #
 # MDS handler checks
 #handler_health_mds_check_retries: 5
 #handler_health_mds_check_delay: 10
 #
 # RGW handler checks
 #handler_health_rgw_check_retries: 5
 #handler_health_rgw_check_delay: 10
 # Confiure the type of NFS gatway access.  At least one must be enabled for an
 # NFS role to be useful
--- a/roles/ceph-defaults/defaults/main.yml
+++ b/roles/ceph-defaults/defaults/main.yml
@ -319,7 +319,6 @@ mds_max_mds: 3
 #radosgw_dns_name: your.subdomain.tld # subdomains used by radosgw. See http://ceph.com/docs/master/radosgw/config/#enabling-subdomain-s3-calls
 radosgw_resolve_cname: false # enable for radosgw to resolve DNS CNAME based bucket names
 radosgw_civetweb_port: 8080
 radosgw_civetweb_bind_ip: "{{ ansible_default_ipv4.address }}" # when using ipv6 enclose with brackets: "[{{ ansible_default_ipv6.address }}]"
 radosgw_civetweb_num_threads: 100
 # For additional civetweb configuration options available such as SSL, logging,
 # keepalive, and timeout settings, please see the civetweb docs at
@ -358,11 +357,23 @@ restapi_port: 5000
 # Obviously between the checks (for monitors to be in quorum and for osd's pgs
 # to be clean) we have to wait. These retries and delays can be configurable
 # for both monitors and osds.
 #
 # Monitor handler checks
 handler_health_mon_check_retries: 5
 handler_health_mon_check_delay: 10
 #
 # OSD handler checks
 handler_health_osd_check_retries: 40
 handler_health_osd_check_delay: 30
 handler_health_osd_check: true
 #
 # MDS handler checks
 handler_health_mds_check_retries: 5
 handler_health_mds_check_delay: 10
 #
 # RGW handler checks
 handler_health_rgw_check_retries: 5
 handler_health_rgw_check_delay: 10
 # Confiure the type of NFS gatway access.  At least one must be enabled for an
 # NFS role to be useful
--- a/roles/ceph-defaults/handlers/main.yml
+++ b/roles/ceph-defaults/handlers/main.yml
@ -18,7 +18,7 @@
    command: /tmp/restart_mon_daemon.sh
    listen: "restart ceph mons"
  when:
-# We do not want to run these checks on initial deployment (`socket.rc == 0`)
+    # We do not want to run these checks on initial deployment (`socket.rc == 0`)
    - socket.rc == 0
    - mon_group_name in group_names
@ -42,8 +42,8 @@
  listen: "restart ceph osds"
  with_items: "{{ socket_osd_container.results | default([]) }}"
  when:
-  # We do not want to run these checks on initial deployment (`socket_osd_container.results[n].rc == 0`)
+    # We do not want to run these checks on initial deployment (`socket_osd_container.results[n].rc == 0`)
-  # except when a crush location is specified. ceph-disk will start the osds before the osd crush location is specified
+    # except when a crush location is specified. ceph-disk will start the osds before the osd crush location is specified
    - containerized_deployment
    - ((crush_location is defined and crush_location) or item.get('rc') == 0)
    - handler_health_osd_check
@ -55,8 +55,8 @@
  command: /tmp/restart_osd_daemon.sh
  listen: "restart ceph osds"
  when:
-  # We do not want to run these checks on initial deployment (`socket_osd_container.results[n].rc == 0`)
+    # We do not want to run these checks on initial deployment (`socket_osd_container.results[n].rc == 0`)
-  # except when a crush location is specified. ceph-disk will start the osds before the osd crush location is specified
+    # except when a crush location is specified. ceph-disk will start the osds before the osd crush location is specified
    - ((crush_location is defined and crush_location) or socket.rc == 0)
    - ceph_current_fsid.rc == 0
    - handler_health_osd_check
@ -64,28 +64,44 @@
    - inventory_hostname in play_hosts
    - osd_group_name in group_names
- name: restart ceph mdss
+- name: copy mds restart script
-  service:
+  template:
-    name: ceph-mds@{{ mds_name }}
+    src: restart_mds_daemon.sh.j2
-    state: restarted
+    dest: /tmp/restart_mds_daemon.sh
-  # serial: 1 would be the proper solution here, but that can only be set on play level
+    owner: root
-  # upstream issue: https://github.com/ansible/ansible/issues/12170
+    group: root
-  run_once: true
+    mode: 0750
-  with_items: "{{ groups.get(mds_group_name, []) }}"
+  listen: "restart ceph mdss"
  delegate_to: "{{ item }}"
  when:
    - inventory_hostname in play_hosts
    - mds_group_name in group_names
- name: restart ceph rgws
+- name: restart ceph mds daemon(s)
-  service:
+  command: /tmp/restart_mds_daemon.sh
-    name: ceph-radosgw@rgw.{{ ansible_hostname }}
+  listen: "restart ceph mdss"
    state: restarted
  # serial: 1 would be the proper solution here, but that can only be set on play level
  # upstream issue: https://github.com/ansible/ansible/issues/12170
  run_once: true
  with_items: "{{ groups.get(rgw_group_name, []) }}"
  delegate_to: "{{ item }}"
  when:
    # We do not want to run these checks on initial deployment (`socket.rc == 0`)
    - socket.rc == 0
    - mds_group_name in group_names
 - name: copy rgw restart script
  template:
    src: restart_rgw_daemon.sh.j2
    dest: /tmp/restart_rgw_daemon.sh
    owner: root
    group: root
    mode: 0750
  listen: "restart ceph rgws"
  when:
    - inventory_hostname in play_hosts
    - rgw_group_name in group_names
 - name: restart ceph rgw daemon(s)
  command: /tmp/restart_rgw_daemon.sh
  listen: "restart ceph rgws"
  when:
    # We do not want to run these checks on initial deployment (`socket.rc == 0`)
    - socket.rc == 0
    - rgw_group_name in group_names
 - name: restart ceph nfss
--- a/roles/ceph-defaults/templates/restart_mds_daemon.sh.j2
+++ b/roles/ceph-defaults/templates/restart_mds_daemon.sh.j2
@ -0,0 +1,20 @@
 #!/bin/bash
 RETRIES="{{ handler_health_mds_check_retries }}"
 DELAY="{{ handler_health_mds_check_delay }}"
 MDS_NAME="{{ ansible_hostname }}"
 SOCKET=/var/run/ceph/{{ cluster }}-mds.${MDS_NAME}.asok
 # First, restart the daemon
 systemctl restart ceph-mds@${MDS_NAME}
 COUNT=10
 # Wait and ensure the socket exists after restarting the daemds
 while [ $RETRIES -ne 0 ]; do
  {{ docker_exec_cmd }} test -S $SOCKET && exit 0
  sleep $DELAY
  let RETRIES=RETRIES-1
 done
 # If we reach this point, it means the socket is not present.
 echo "Socket file ${SOCKET} could not be found, which means the Metadata Server is not running."
 exit 1
--- a/roles/ceph-defaults/templates/restart_mon_daemon.sh.j2
+++ b/roles/ceph-defaults/templates/restart_mon_daemon.sh.j2
@ -8,7 +8,7 @@ SOCKET=/var/run/ceph/{{ cluster }}-mon.${MONITOR_NAME}.asok
 check_quorum() {
 while [ $RETRIES -ne 0 ]; do
-  MEMBERS=$({{ docker_exec_cmd }} ceph --cluster {{ cluster }}   -s --format json | sed -r 's/.*"quorum_names":(\[[^]]+\]).*/\1/')
+  MEMBERS=$({{ docker_exec_cmd }} ceph --cluster {{ cluster }} -s --format json | sed -r 's/.*"quorum_names":(\[[^]]+\]).*/\1/')
  test "${MEMBERS/$MONITOR_NAME}" != "$MEMBERS" && exit 0
  sleep $DELAY
  let RETRIES=RETRIES-1
--- a/roles/ceph-defaults/templates/restart_rgw_daemon.sh.j2
+++ b/roles/ceph-defaults/templates/restart_rgw_daemon.sh.j2
@ -0,0 +1,65 @@
 #!/bin/bash
 RETRIES="{{ handler_health_rgw_check_retries }}"
 DELAY="{{ handler_health_rgw_check_delay }}"
 RGW_NAME="{{ ansible_hostname }}"
 RGW_PORT="{{ radosgw_civetweb_port }}"
 SOCKET=/var/run/ceph/{{ cluster }}-client.rgw.${RGW_NAME}.asok
 {% if radosgw_address_block | length > 0 %}
  {% if ip_version == 'ipv4' -%}
 RGW_IP={{ hostvars[inventory_hostname]['ansible_all_' + ip_version + '_addresses'] | ipaddr(radosgw_address_block) | first }}
  {%- elif ip_version == 'ipv6' -%}
 RGW_IP=[{{ hostvars[inventory_hostname]['ansible_all_' + ip_version + '_addresses'] | ipaddr(radosgw_address_block) | first }}]
  {%- endif %}
 {% elif hostvars[inventory_hostname]['radosgw_address'] is defined and hostvars[inventory_hostname]['radosgw_address'] != '0.0.0.0' -%}
  {% if ip_version == 'ipv4' -%}
 RGW_IP={{ hostvars[inventory_hostname]['radosgw_address'] }}
  {%- elif ip_version == 'ipv6' -%}
 RGW_IP=[{{ hostvars[inventory_hostname]['radosgw_address'] }}]
  {% endif %}
 {%- else -%}
  {% set interface = ["ansible_",radosgw_interface]|join %}
  {% if ip_version == 'ipv6' -%}
 RGW_IP=[{{ hostvars[inventory_hostname][interface][ip_version][0]['address'] }}]
  {%- elif ip_version == 'ipv4' -%}
 RGW_IP={{ hostvars[inventory_hostname][interface][ip_version]['address'] }}
  {% endif %}
 {%- endif %}
 check_for_curl_or_wget() {
  if {{ docker_exec_cmd }} command -v wget &>/dev/null; then
    rgw_test_command="wget --quiet"
  elif {{ docker_exec_cmd }} command -v curl &>/dev/null; then
    rgw_test_command="curl --fail --silent --output /dev/null"
  else
    echo "It seems that neither curl or wget are available on your system."
    echo "Cannot test rgw connection."
    exit 0
  fi
 }
 check_rest() {
  check_for_curl_or_wget
  while [ $RETRIES -ne 0 ]; do
    test "$rgw_test_command http://$RGW_IP:$RGW_PORT" && exit 0
    sleep $DELAY
    let RETRIES=RETRIES-1
  done
  # If we reach this point, it means there is a problem with the connection to rgw
  echo "Error connecting locally to Rados Gateway service: http://$rgw_listen"
  exit 1
 }
 # First, restart the daemon
 systemctl restart ceph-radosgw@rgw.${RGW_NAME}
 COUNT=10
 # Wait and ensure the socket exists after restarting the daemon
 while [ $COUNT -ne 0 ]; do
  {{ docker_exec_cmd }} test -S $SOCKET && check_rest
  sleep 1
  let COUNT=COUNT-1
 done
 echo "Socket file ${SOCKET} could not be found, which means Rados Gateway is not running."
 exit 1
--- a/roles/ceph-rgw/tasks/start_radosgw.yml
+++ b/roles/ceph-rgw/tasks/start_radosgw.yml
@ -1,18 +1,10 @@
 ---
 - name: check if rgw is started
  command: /etc/init.d/radosgw status
  register: rgwstatus
  changed_when: false
  failed_when: false
  always_run: true
 - name: ensure systemd service override directory exists
  file:
    state: directory
    path: "/etc/systemd/system/ceph-rgw@.service.d/"
  when:
    - ceph_rgw_systemd_overrides is defined
    - ansible_service_mgr == 'systemd'
 - name: add ceph-rgw systemd service overrides
  config_template:
@ -22,7 +14,6 @@
    config_type: "ini"
  when:
    - ceph_rgw_systemd_overrides is defined
    - ansible_service_mgr == 'systemd'
 - name: start rgw
  service: