Merge pull request #1939 from ceph/ci-reboot

tests: implement reboot after a deployment
pull/1952/merge
Sébastien Han 2017-09-29 16:34:46 +02:00 committed by GitHub
commit de44f125bb
24 changed files with 410 additions and 114 deletions

View File

@ -394,6 +394,18 @@ dummy:
#handler_health_rgw_check_retries: 5 #handler_health_rgw_check_retries: 5
#handler_health_rgw_check_delay: 10 #handler_health_rgw_check_delay: 10
# NFS handler checks
#handler_health_nfs_check_retries: 5
#handler_health_nfs_check_delay: 10
# RBD MIRROR handler checks
#handler_health_rbd_mirror_check_retries: 5
#handler_health_rbd_mirror_check_delay: 10
# MGR handler checks
#handler_health_mgr_check_retries: 5
#handler_health_mgr_check_delay: 10
############### ###############
# NFS-GANESHA # # NFS-GANESHA #
############### ###############

View File

@ -394,6 +394,18 @@ ceph_repository: rhcs
#handler_health_rgw_check_retries: 5 #handler_health_rgw_check_retries: 5
#handler_health_rgw_check_delay: 10 #handler_health_rgw_check_delay: 10
# NFS handler checks
#handler_health_nfs_check_retries: 5
#handler_health_nfs_check_delay: 10
# RBD MIRROR handler checks
#handler_health_rbd_mirror_check_retries: 5
#handler_health_rbd_mirror_check_delay: 10
# MGR handler checks
#handler_health_mgr_check_retries: 5
#handler_health_mgr_check_delay: 10
############### ###############
# NFS-GANESHA # # NFS-GANESHA #
############### ###############

View File

@ -312,37 +312,24 @@
- ceph-defaults - ceph-defaults
tasks: tasks:
- name: unset osd flags - name: set_fact docker_exec_cmd_osd
command: ceph osd unset {{ item }} --cluster {{ cluster }} set_fact:
with_items: docker_exec_cmd_update_osd: "docker exec ceph-mon-{{ hostvars[groups[mon_group_name][0]]['ansible_hostname'] }}"
- noout when:
- noscrub - containerized_deployment
- nodeep-scrub
delegate_to: "{{ groups[mon_group_name][0] }}"
when: not containerized_deployment
- name: unset containerized osd flags - name: unset osd flags
command: | command: "{{ docker_exec_cmd_update_osd|default('') }} ceph osd unset {{ item }} --cluster {{ cluster }}"
docker exec ceph-mon-{{ hostvars[groups[mon_group_name][0]]['ansible_hostname'] }} ceph osd unset {{ item }} --cluster {{ cluster }}
with_items: with_items:
- noout - noout
- noscrub - noscrub
- nodeep-scrub - nodeep-scrub
delegate_to: "{{ groups[mon_group_name][0] }}" delegate_to: "{{ groups[mon_group_name][0] }}"
when: containerized_deployment
- name: get osd versions - name: get osd versions
command: ceph --cluster {{ cluster }} versions command: "{{ docker_exec_cmd_update_osd|default('') }} ceph --cluster {{ cluster }} versions"
register: ceph_versions register: ceph_versions
delegate_to: "{{ groups[mon_group_name][0] }}" delegate_to: "{{ groups[mon_group_name][0] }}"
when: not containerized_deployment
- name: containers - get osd versions
command: |
docker exec ceph-mon-{{ hostvars[groups[mon_group_name][0]]['ansible_hostname'] }} ceph --cluster {{ cluster }} versions
register: ceph_versions
delegate_to: "{{ groups[mon_group_name][0] }}"
when: containerized_deployment
- name: set_fact ceph_versions_osd - name: set_fact ceph_versions_osd
set_fact: set_fact:
@ -352,21 +339,12 @@
# length == 1 means there is a single osds versions entry # length == 1 means there is a single osds versions entry
# thus all the osds are running the same version # thus all the osds are running the same version
- name: complete osds upgrade - name: complete osds upgrade
command: ceph --cluster {{ cluster }} osd require-osd-release luminous command: "{{ docker_exec_cmd_update_osd|default('') }} ceph --cluster {{ cluster }} osd require-osd-release luminous"
delegate_to: "{{ groups[mon_group_name][0] }}" delegate_to: "{{ groups[mon_group_name][0] }}"
when: when:
- not containerized_deployment
- (ceph_versions.stdout|from_json).osd | length == 1 - (ceph_versions.stdout|from_json).osd | length == 1
- ceph_versions_osd | string | search("ceph version 12") - ceph_versions_osd | string | search("ceph version 12")
- name: containers - complete osds upgrade
command: |
docker exec ceph-mon-{{ hostvars[groups[mon_group_name][0]]['ansible_hostname'] }} ceph --cluster {{ cluster }} osd require-osd-release luminous
delegate_to: "{{ groups[mon_group_name][0] }}"
when:
- containerized_deployment
- (ceph_versions.stdout|from_json).osd | length == 1
- ceph_versions_osd | string | search("ceph version 12")
- name: upgrade ceph mdss cluster - name: upgrade ceph mdss cluster

View File

@ -117,6 +117,8 @@
- restart ceph osds - restart ceph osds
- restart ceph mdss - restart ceph mdss
- restart ceph rgws - restart ceph rgws
- restart ceph rbdmirrors
- restart ceph mgrs
- name: set fsid fact when generate_fsid = true - name: set fsid fact when generate_fsid = true
set_fact: set_fact:

View File

@ -386,6 +386,18 @@ handler_health_mds_check_delay: 10
handler_health_rgw_check_retries: 5 handler_health_rgw_check_retries: 5
handler_health_rgw_check_delay: 10 handler_health_rgw_check_delay: 10
# NFS handler checks
handler_health_nfs_check_retries: 5
handler_health_nfs_check_delay: 10
# RBD MIRROR handler checks
handler_health_rbd_mirror_check_retries: 5
handler_health_rbd_mirror_check_delay: 10
# MGR handler checks
handler_health_mgr_check_retries: 5
handler_health_mgr_check_delay: 10
############### ###############
# NFS-GANESHA # # NFS-GANESHA #
############### ###############

View File

@ -20,8 +20,8 @@
listen: "restart ceph mons" listen: "restart ceph mons"
when: when:
# We do not want to run these checks on initial deployment (`socket.rc == 0`) # We do not want to run these checks on initial deployment (`socket.rc == 0`)
- mon_socket_stat.rc == 0
- mon_group_name in group_names - mon_group_name in group_names
- mon_socket_stat.rc == 0
# This does not just restart OSDs but everything else too. Unfortunately # This does not just restart OSDs but everything else too. Unfortunately
# at this time the ansible role does not have an OSD id list to use # at this time the ansible role does not have an OSD id list to use
@ -35,8 +35,8 @@
mode: 0750 mode: 0750
listen: "restart ceph osds" listen: "restart ceph osds"
when: when:
- inventory_hostname in play_hosts
- osd_group_name in group_names - osd_group_name in group_names
- inventory_hostname in play_hosts
- name: restart containerized ceph osds daemon(s) - name: restart containerized ceph osds daemon(s)
command: /tmp/restart_osd_daemon.sh command: /tmp/restart_osd_daemon.sh
@ -45,17 +45,18 @@
when: when:
# We do not want to run these checks on initial deployment (`socket_osd_container_stat.results[n].rc == 0`) # We do not want to run these checks on initial deployment (`socket_osd_container_stat.results[n].rc == 0`)
# except when a crush location is specified. ceph-disk will start the osds before the osd crush location is specified # except when a crush location is specified. ceph-disk will start the osds before the osd crush location is specified
- osd_group_name in group_names
- containerized_deployment - containerized_deployment
- ((crush_location is defined and crush_location) or item.get('rc') == 0) - ((crush_location is defined and crush_location) or item.get('rc') == 0)
- handler_health_osd_check - handler_health_osd_check
# See https://github.com/ceph/ceph-ansible/issues/1457 for the condition below # See https://github.com/ceph/ceph-ansible/issues/1457 for the condition below
- inventory_hostname in play_hosts - inventory_hostname in play_hosts
- osd_group_name in group_names
- name: restart non-containerized ceph osds daemon(s) - name: restart non-containerized ceph osds daemon(s)
command: /tmp/restart_osd_daemon.sh command: /tmp/restart_osd_daemon.sh
listen: "restart ceph osds" listen: "restart ceph osds"
when: when:
- osd_group_name in group_names
- not containerized_deployment - not containerized_deployment
# We do not want to run these checks on initial deployment (`socket_osd_container.results[n].rc == 0`) # We do not want to run these checks on initial deployment (`socket_osd_container.results[n].rc == 0`)
# except when a crush location is specified. ceph-disk will start the osds before the osd crush location is specified # except when a crush location is specified. ceph-disk will start the osds before the osd crush location is specified
@ -64,8 +65,8 @@
- handler_health_osd_check - handler_health_osd_check
# See https://github.com/ceph/ceph-ansible/issues/1457 for the condition below # See https://github.com/ceph/ceph-ansible/issues/1457 for the condition below
- inventory_hostname in play_hosts - inventory_hostname in play_hosts
- osd_group_name in group_names
- block:
- name: copy mds restart script - name: copy mds restart script
template: template:
src: restart_mds_daemon.sh.j2 src: restart_mds_daemon.sh.j2
@ -75,16 +76,16 @@
mode: 0750 mode: 0750
listen: "restart ceph mdss" listen: "restart ceph mdss"
when: when:
- inventory_hostname in play_hosts
- mds_group_name in group_names - mds_group_name in group_names
- inventory_hostname in play_hosts
- name: restart ceph mds daemon(s) - name: restart ceph mds daemon(s)
command: /tmp/restart_mds_daemon.sh command: /tmp/restart_mds_daemon.sh
listen: "restart ceph mdss" listen: "restart ceph mdss"
when: when:
# We do not want to run these checks on initial deployment (`socket.rc == 0`) # We do not want to run these checks on initial deployment (`socket.rc == 0`)
- mds_socket_stat.rc == 0
- mds_group_name in group_names - mds_group_name in group_names
- mds_socket_stat.rc == 0
- name: copy rgw restart script - name: copy rgw restart script
template: template:
@ -95,20 +96,73 @@
mode: 0750 mode: 0750
listen: "restart ceph rgws" listen: "restart ceph rgws"
when: when:
- inventory_hostname in play_hosts
- rgw_group_name in group_names - rgw_group_name in group_names
- inventory_hostname in play_hosts
- name: restart ceph rgw daemon(s) - name: restart ceph rgw daemon(s)
command: /tmp/restart_rgw_daemon.sh command: /tmp/restart_rgw_daemon.sh
listen: "restart ceph rgws" listen: "restart ceph rgws"
when: when:
# We do not want to run these checks on initial deployment (`socket.rc == 0`) # We do not want to run these checks on initial deployment (`socket.rc == 0`)
- rgw_socket_stat.rc == 0
- rgw_group_name in group_names - rgw_group_name in group_names
- rgw_socket_stat.rc == 0
- name: restart ceph nfss - name: copy nfs restart script
service: template:
name: nfs-ganesha src: restart_nfs_daemon.sh.j2
state: restarted dest: /tmp/restart_nfs_daemon.sh
owner: root
group: root
mode: 0750
listen: "restart ceph nfss"
when: when:
- nfs_group_name in group_names - nfs_group_name in group_names
- inventory_hostname in play_hosts
- name: restart ceph nfs daemon(s)
command: /tmp/restart_nfs_daemon.sh
listen: "restart ceph nfss"
when:
# We do not want to run these checks on initial deployment (`socket.rc == 0`)
- nfs_group_name in group_names
- nfs_socket_stat.rc == 0
- name: copy rbd mirror restart script
template:
src: restart_rbd_mirror_daemon.sh.j2
dest: /tmp/restart_rbd_mirror_daemon.sh
owner: root
group: root
mode: 0750
listen: "restart ceph rbdmirrors"
when:
- rbdmirror_group_name in group_names
- inventory_hostname in play_hosts
- name: restart ceph rbd mirror daemon(s)
command: /tmp/restart_rbd_mirror_daemon.sh
listen: "restart ceph rbdmirrors"
when:
# We do not want to run these checks on initial deployment (`socket.rc == 0`)
- rbdmirror_group_name in group_names
- rbd_mirror_socket_stat.rc == 0
- name: copy mgr restart script
template:
src: restart_mgr_daemon.sh.j2
dest: /tmp/restart_mgr_daemon.sh
owner: root
group: root
mode: 0750
listen: "restart ceph mgrs"
when:
- mgr_group_name in group_names
- inventory_hostname in play_hosts
- name: restart ceph mgr daemon(s)
command: /tmp/restart_mgr_daemon.sh
listen: "restart ceph mgrs"
when:
# We do not want to run these checks on initial deployment (`socket.rc == 0`)
- mgr_group_name in group_names
- mgr_socket_stat.rc == 0

View File

@ -4,6 +4,7 @@
set_fact: set_fact:
docker_exec_cmd: "docker exec ceph-mon-{{ ansible_hostname }}" docker_exec_cmd: "docker exec ceph-mon-{{ ansible_hostname }}"
when: when:
- inventory_hostname in groups.get(mon_group_name, [])
- containerized_deployment - containerized_deployment
- name: check for a ceph mon socket - name: check for a ceph mon socket
@ -13,6 +14,8 @@
failed_when: false failed_when: false
always_run: true always_run: true
register: mon_socket_stat register: mon_socket_stat
when:
- inventory_hostname in groups.get(mon_group_name, [])
- name: check if the ceph mon socket is in-use - name: check if the ceph mon socket is in-use
shell: | shell: |
@ -22,6 +25,7 @@
always_run: true always_run: true
register: mon_socket register: mon_socket
when: when:
- inventory_hostname in groups.get(mon_group_name, [])
- mon_socket_stat.rc == 0 - mon_socket_stat.rc == 0
- name: remove ceph mon socket if exists and not used by a process - name: remove ceph mon socket if exists and not used by a process
@ -29,6 +33,7 @@
name: "{{ mon_socket_stat.stdout }}" name: "{{ mon_socket_stat.stdout }}"
state: absent state: absent
when: when:
- inventory_hostname in groups.get(mon_group_name, [])
- not containerized_deployment - not containerized_deployment
- mon_socket_stat.rc == 0 - mon_socket_stat.rc == 0
- mon_socket.rc != 0 - mon_socket.rc != 0
@ -41,6 +46,7 @@
always_run: true always_run: true
register: osd_socket_stat register: osd_socket_stat
when: when:
- inventory_hostname in groups.get(osd_group_name, [])
- not containerized_deployment - not containerized_deployment
- name: check if the ceph osd socket is in-use - name: check if the ceph osd socket is in-use
@ -51,6 +57,7 @@
always_run: true always_run: true
register: osd_socket register: osd_socket
when: when:
- inventory_hostname in groups.get(osd_group_name, [])
- not containerized_deployment - not containerized_deployment
- osd_socket_stat.rc == 0 - osd_socket_stat.rc == 0
@ -59,6 +66,7 @@
name: "{{ osd_socket_stat.stdout }}" name: "{{ osd_socket_stat.stdout }}"
state: absent state: absent
when: when:
- inventory_hostname in groups.get(osd_group_name, [])
- not containerized_deployment - not containerized_deployment
- osd_socket_stat.rc == 0 - osd_socket_stat.rc == 0
- osd_socket.rc != 0 - osd_socket.rc != 0
@ -67,6 +75,7 @@
set_fact: set_fact:
docker_exec_cmd: "docker exec ceph-mds-{{ ansible_hostname }}" docker_exec_cmd: "docker exec ceph-mds-{{ ansible_hostname }}"
when: when:
- inventory_hostname in groups.get(mds_group_name, [])
- containerized_deployment - containerized_deployment
- name: check for a ceph mds socket - name: check for a ceph mds socket
@ -76,6 +85,8 @@
failed_when: false failed_when: false
always_run: true always_run: true
register: mds_socket_stat register: mds_socket_stat
when:
- inventory_hostname in groups.get(mds_group_name, [])
- name: check if the ceph mds socket is in-use - name: check if the ceph mds socket is in-use
shell: | shell: |
@ -85,6 +96,7 @@
always_run: true always_run: true
register: mds_socket register: mds_socket
when: when:
- inventory_hostname in groups.get(mds_group_name, [])
- mds_socket_stat.rc == 0 - mds_socket_stat.rc == 0
- name: remove ceph mds socket if exists and not used by a process - name: remove ceph mds socket if exists and not used by a process
@ -92,6 +104,7 @@
name: "{{ mds_socket_stat.stdout }}" name: "{{ mds_socket_stat.stdout }}"
state: absent state: absent
when: when:
- inventory_hostname in groups.get(mds_group_name, [])
- not containerized_deployment - not containerized_deployment
- mds_socket_stat.rc == 0 - mds_socket_stat.rc == 0
- mds_socket.rc != 0 - mds_socket.rc != 0
@ -100,6 +113,7 @@
set_fact: set_fact:
docker_exec_cmd: "docker exec ceph-rgw-{{ ansible_hostname }}" docker_exec_cmd: "docker exec ceph-rgw-{{ ansible_hostname }}"
when: when:
- inventory_hostname in groups.get(rgw_group_name, [])
- containerized_deployment - containerized_deployment
- name: check for a ceph rgw socket - name: check for a ceph rgw socket
@ -109,6 +123,8 @@
failed_when: false failed_when: false
always_run: true always_run: true
register: rgw_socket_stat register: rgw_socket_stat
when:
- inventory_hostname in groups.get(rgw_group_name, [])
- name: check if the ceph rgw socket is in-use - name: check if the ceph rgw socket is in-use
shell: | shell: |
@ -118,6 +134,7 @@
always_run: true always_run: true
register: rgw_socket register: rgw_socket
when: when:
- inventory_hostname in groups.get(rgw_group_name, [])
- rgw_socket_stat.rc == 0 - rgw_socket_stat.rc == 0
- name: remove ceph rgw socket if exists and not used by a process - name: remove ceph rgw socket if exists and not used by a process
@ -125,6 +142,7 @@
name: "{{ rgw_socket_stat.stdout }}" name: "{{ rgw_socket_stat.stdout }}"
state: absent state: absent
when: when:
- inventory_hostname in groups.get(rgw_group_name, [])
- not containerized_deployment - not containerized_deployment
- rgw_socket_stat.rc == 0 - rgw_socket_stat.rc == 0
- rgw_socket.rc != 0 - rgw_socket.rc != 0
@ -133,6 +151,7 @@
set_fact: set_fact:
docker_exec_cmd: "docker exec ceph-mgr-{{ ansible_hostname }}" docker_exec_cmd: "docker exec ceph-mgr-{{ ansible_hostname }}"
when: when:
- inventory_hostname in groups.get(mgr_group_name, [])
- containerized_deployment - containerized_deployment
- name: check for a ceph mgr socket - name: check for a ceph mgr socket
@ -142,6 +161,8 @@
failed_when: false failed_when: false
always_run: true always_run: true
register: mgr_socket_stat register: mgr_socket_stat
when:
- inventory_hostname in groups.get(mgr_group_name, [])
- name: check if the ceph mgr socket is in-use - name: check if the ceph mgr socket is in-use
shell: | shell: |
@ -151,6 +172,7 @@
always_run: true always_run: true
register: mgr_socket register: mgr_socket
when: when:
- inventory_hostname in groups.get(mgr_group_name, [])
- mgr_socket_stat.rc == 0 - mgr_socket_stat.rc == 0
- name: remove ceph mgr socket if exists and not used by a process - name: remove ceph mgr socket if exists and not used by a process
@ -158,6 +180,7 @@
name: "{{ mgr_socket_stat.stdout }}" name: "{{ mgr_socket_stat.stdout }}"
state: absent state: absent
when: when:
- inventory_hostname in groups.get(mgr_group_name, [])
- not containerized_deployment - not containerized_deployment
- mgr_socket_stat.rc == 0 - mgr_socket_stat.rc == 0
- mgr_socket.rc != 0 - mgr_socket.rc != 0
@ -166,6 +189,7 @@
set_fact: set_fact:
docker_exec_cmd: "docker exec ceph-rbd-mirror-{{ ansible_hostname }}" docker_exec_cmd: "docker exec ceph-rbd-mirror-{{ ansible_hostname }}"
when: when:
- inventory_hostname in groups.get(rbdmirror_group_name, [])
- containerized_deployment - containerized_deployment
- name: check for a ceph rbd mirror socket - name: check for a ceph rbd mirror socket
@ -175,6 +199,8 @@
failed_when: false failed_when: false
always_run: true always_run: true
register: rbd_mirror_socket_stat register: rbd_mirror_socket_stat
when:
- inventory_hostname in groups.get(rbdmirror_group_name, [])
- name: check if the ceph rbd mirror socket is in-use - name: check if the ceph rbd mirror socket is in-use
shell: | shell: |
@ -184,6 +210,7 @@
always_run: true always_run: true
register: rbd_mirror_socket register: rbd_mirror_socket
when: when:
- inventory_hostname in groups.get(rbdmirror_group_name, [])
- rbd_mirror_socket_stat.rc == 0 - rbd_mirror_socket_stat.rc == 0
- name: remove ceph rbd mirror socket if exists and not used by a process - name: remove ceph rbd mirror socket if exists and not used by a process
@ -191,10 +218,49 @@
name: "{{ rbd_mirror_socket_stat.stdout }}" name: "{{ rbd_mirror_socket_stat.stdout }}"
state: absent state: absent
when: when:
- inventory_hostname in groups.get(rbdmirror_group_name, [])
- not containerized_deployment - not containerized_deployment
- rbd_mirror_socket_stat.rc == 0 - rbd_mirror_socket_stat.rc == 0
- rbd_mirror_socket.rc != 0 - rbd_mirror_socket.rc != 0
- name: set_fact docker_exec_cmd nfs ganesha
set_fact:
docker_exec_cmd: "docker exec ceph-rbd-mirror-{{ ansible_hostname }}"
when:
- inventory_hostname in groups.get(nfs_group_name, [])
- containerized_deployment
- name: check for a ceph nfs ganesha socket
shell: |
{{ docker_exec_cmd | default('') }} bash -c 'stat --printf=%n /var/run/ganesha.pid'
changed_when: false
failed_when: false
always_run: true
register: nfs_socket_stat
when:
- inventory_hostname in groups.get(nfs_group_name, [])
- name: check if the ceph nfs ganesha socket is in-use
shell: |
{{ docker_exec_cmd | default('') }} bash -c 'fuser --silent {{ nfs_socket_stat.stdout }}'
changed_when: false
failed_when: false
always_run: true
register: nfs_socket
when:
- inventory_hostname in groups.get(nfs_group_name, [])
- nfs_socket_stat.rc == 0
- name: remove ceph nfs ganesha socket if exists and not used by a process
file:
name: "{{ nfs_socket_stat.stdout }}"
state: absent
when:
- inventory_hostname in groups.get(nfs_group_name, [])
- not containerized_deployment
- nfs_socket_stat.rc == 0
- nfs_socket.rc != 0
- name: check for a ceph socket in containerized deployment (osds) - name: check for a ceph socket in containerized deployment (osds)
shell: | shell: |
docker exec ceph-osd-"{{ ansible_hostname }}"-"{{ item | replace('/', '') }}" bash -c 'stat --printf=%n /var/run/ceph/*.asok' docker exec ceph-osd-"{{ ansible_hostname }}"-"{{ item | replace('/', '') }}" bash -c 'stat --printf=%n /var/run/ceph/*.asok'
@ -206,3 +272,4 @@
when: when:
- containerized_deployment - containerized_deployment
- inventory_hostname in groups.get(osd_group_name, []) - inventory_hostname in groups.get(osd_group_name, [])

View File

@ -0,0 +1,20 @@
#!/bin/bash
RETRIES="{{ handler_health_mgr_check_retries }}"
DELAY="{{ handler_health_mgr_check_delay }}"
MGR_NAME="{{ ansible_hostname }}"
SOCKET=/var/run/ceph/{{ cluster }}-mgr.${MGR_NAME}.asok
# First, restart the daemon
systemctl restart ceph-mgr@${MGR_NAME}
COUNT=10
# Wait and ensure the socket exists after restarting the daemds
while [ $RETRIES -ne 0 ]; do
{{ docker_exec_cmd }} test -S $SOCKET && exit 0
sleep $DELAY
let RETRIES=RETRIES-1
done
# If we reach this point, it means the socket is not present.
echo "Socket file ${SOCKET} could not be found, which means ceph manager is not running."
exit 1

View File

@ -0,0 +1,23 @@
#!/bin/bash
RETRIES="{{ handler_health_nfs_check_retries }}"
DELAY="{{ handler_health_nfs_check_delay }}"
NFS_NAME="{{ ansible_hostname }}"
PID=/var/run/ganesha.pid
# First, restart the daemon
{% if containerized_deployment -%}
systemctl restart ceph-nfs@${NFS_NAME}
COUNT=10
# Wait and ensure the pid exists after restarting the daemon
while [ $RETRIES -ne 0 ]; do
{{ docker_exec_cmd }} test -f $PID && exit 0
sleep $DELAY
let RETRIES=RETRIES-1
done
# If we reach this point, it means the pid is not present.
echo "PID file ${PID} could not be found, which means Ganesha is not running."
exit 1
{% else %}
systemctl restart nfs-ganesha
{% endif %}

View File

@ -0,0 +1,20 @@
#!/bin/bash
RETRIES="{{ handler_health_rbd_mirror_check_retries }}"
DELAY="{{ handler_health_rbd_mirror_check_delay }}"
RBD_MIRROR_NAME="{{ ansible_hostname }}"
SOCKET=/var/run/ceph/{{ cluster }}-client.rbd-mirror.${RBD_MIRROR_NAME}.asok
# First, restart the daemon
systemctl restart ceph-rbd-mirror@rbd-mirror.${RBD_MIRROR_NAME}
COUNT=10
# Wait and ensure the socket exists after restarting the daemon
while [ $RETRIES -ne 0 ]; do
{{ docker_exec_cmd }} test -S $SOCKET && exit 0
sleep $DELAY
let RETRIES=RETRIES-1
done
# If we reach this point, it means the socket is not present.
echo "Socket file ${SOCKET} could not be found, which means rbd mirror is not running."
exit 1

View File

@ -8,6 +8,7 @@
- name: create rgw nfs user - name: create rgw nfs user
command: "{{ docker_exec_cmd_nfs | default('') }} radosgw-admin --cluster {{ cluster }} user create --uid={{ ceph_nfs_rgw_user }} --display-name='RGW NFS User'" command: "{{ docker_exec_cmd_nfs | default('') }} radosgw-admin --cluster {{ cluster }} user create --uid={{ ceph_nfs_rgw_user }} --display-name='RGW NFS User'"
register: rgwuser register: rgwuser
changed_when: false
delegate_to: "{{ groups[mon_group_name][0] }}" delegate_to: "{{ groups[mon_group_name][0] }}"
when: when:
- nfs_obj_gw - nfs_obj_gw

View File

@ -1,3 +0,0 @@
---
- name: include start_docker_nfs.yml
include: start_docker_nfs.yml

View File

@ -1,18 +0,0 @@
---
- name: generate systemd unit file
become: true
template:
src: "{{ role_path }}/templates/ceph-nfs.service.j2"
dest: /etc/systemd/system/ceph-nfs@.service
owner: "root"
group: "root"
mode: "0644"
- name: systemd start nfs container
systemd:
name: "ceph-nfs@{{ ansible_hostname }}.service"
state: started
enabled: yes
daemon_reload: yes
when:
- ceph_nfs_enable_service

View File

@ -0,0 +1,28 @@
---
- name: check if selinux is enabled
command: getenforce
register: selinuxstatus
changed_when: false
failed_when: false
always_run: true
- name: install policycoreutils-python to get semanage
package:
name: policycoreutils-python
state: present
when:
- selinuxstatus.stdout != 'Disabled'
- name: test if ganesha_t is already permissive
shell: |
semanage permissive -l | grep -soq ganesha_t
changed_when: false
failed_when: false
register: ganesha_t_permissive
- name: run semanage permissive -a ganesha_t
command: semanage permissive -a ganesha_t
changed_when: false
when:
- selinuxstatus.stdout != 'Disabled'
- ganesha_t_permissive.rc != 0

View File

@ -1,18 +1,24 @@
--- ---
- name: include pre_requisite.yml - name: include pre_requisite_non_container.yml
include: pre_requisite.yml include: pre_requisite_non_container.yml
when: when:
- not containerized_deployment - not containerized_deployment
- name: include pre_requisite_container.yml
include: pre_requisite_container.yml
when:
- containerized_deployment
- name: include create_rgw_nfs_user.yml - name: include create_rgw_nfs_user.yml
include: create_rgw_nfs_user.yml include: create_rgw_nfs_user.yml
- name: include start_nfs.yml # NOTE (leseb): workaround for issues with ganesha and librgw
include: start_nfs.yml - name: include ganesha_selinux_fix.yml
include: ganesha_selinux_fix.yml
when: when:
- not containerized_deployment - not containerized_deployment
- ansible_os_family == 'RedHat'
- ansible_distribution_version >= '7.4'
- name: include docker/main.yml - name: include start_nfs.yml
include: docker/main.yml include: start_nfs.yml
when:
- containerized_deployment

View File

@ -1,4 +1,6 @@
--- ---
# NOTE (leseb): we use root:ceph for permissions since ganesha
# does not have the right selinux context to read ceph directories.
- name: create rados gateway and ganesha directories - name: create rados gateway and ganesha directories
file: file:
path: "{{ item }}" path: "{{ item }}"
@ -11,8 +13,8 @@
- /var/lib/ceph/radosgw - /var/lib/ceph/radosgw
- /var/lib/ceph/radosgw/{{ cluster }}-rgw.{{ ansible_hostname }} - /var/lib/ceph/radosgw/{{ cluster }}-rgw.{{ ansible_hostname }}
- "{{ rbd_client_admin_socket_path }}" - "{{ rbd_client_admin_socket_path }}"
- /var/lib/nfs/ganesha - /var/log/ceph
- /var/run/ganesha - /var/run/ceph/
when: when:
- nfs_obj_gw - nfs_obj_gw
@ -51,7 +53,7 @@
- name: change ownership on /var/log/ganesha - name: change ownership on /var/log/ganesha
file: file:
path: '/var/log/ganesha' path: /var/log/ganesha
owner: 'root' owner: "root"
group: 'root' group: "root"
mode: '0755' mode: "0755"

View File

@ -1,4 +1,12 @@
--- ---
- name: create /etc/ganesha
file:
path: /etc/ganesha
state: directory
owner: root
group: root
mode: "0755"
- name: generate ganesha configuration file - name: generate ganesha configuration file
action: config_template action: config_template
args: args:
@ -11,6 +19,27 @@
notify: notify:
- restart ceph nfss - restart ceph nfss
- name: generate systemd unit file
become: true
template:
src: "{{ role_path }}/templates/ceph-nfs.service.j2"
dest: /etc/systemd/system/ceph-nfs@.service
owner: "root"
group: "root"
mode: "0644"
when:
- containerized_deployment
- name: systemd start nfs container
systemd:
name: "ceph-nfs@{{ ansible_hostname }}.service"
state: started
enabled: yes
daemon_reload: yes
when:
- ceph_nfs_enable_service
- containerized_deployment
- name: start nfs gateway service - name: start nfs gateway service
service: service:
name: nfs-ganesha name: nfs-ganesha
@ -18,3 +47,4 @@
enabled: yes enabled: yes
when: when:
- ceph_nfs_enable_service - ceph_nfs_enable_service
- not containerized_deployment

View File

@ -9,7 +9,9 @@ ExecStartPre=-/usr/bin/docker rm ceph-nfs-%i
ExecStartPre=/usr/bin/mkdir -p /etc/ceph /etc/ganesha /var/lib/nfs/ganesha ExecStartPre=/usr/bin/mkdir -p /etc/ceph /etc/ganesha /var/lib/nfs/ganesha
ExecStart=/usr/bin/docker run --rm --net=host \ ExecStart=/usr/bin/docker run --rm --net=host \
{% if not containerized_deployment_with_kv -%} {% if not containerized_deployment_with_kv -%}
-v /var/lib/ceph:/var/lib/ceph \
-v /etc/ceph:/etc/ceph \ -v /etc/ceph:/etc/ceph \
-v /var/lib/ganesha:/var/lib/ganesha \
-v /etc/ganesha:/etc/ganesha \ -v /etc/ganesha:/etc/ganesha \
{% else -%} {% else -%}
-e KV_TYPE={{kv_type}} \ -e KV_TYPE={{kv_type}} \
@ -17,7 +19,7 @@ ExecStart=/usr/bin/docker run --rm --net=host \
-e KV_PORT={{kv_port}} \ -e KV_PORT={{kv_port}} \
{% endif -%} {% endif -%}
-v /etc/localtime:/etc/localtime:ro \ -v /etc/localtime:/etc/localtime:ro \
--privileged \ -e CLUSTER={{ cluster }} \
-e CEPH_DAEMON=NFS \ -e CEPH_DAEMON=NFS \
{{ ceph_nfs_docker_extra_env }} \ {{ ceph_nfs_docker_extra_env }} \
--name=ceph-nfs-{{ ansible_hostname }} \ --name=ceph-nfs-{{ ansible_hostname }} \

View File

@ -35,6 +35,17 @@
when: when:
- ceph_release_num.{{ ceph_release }} >= ceph_release_num.luminous - ceph_release_num.{{ ceph_release }} >= ceph_release_num.luminous
# This task is a workaround for rbd-mirror not starting after reboot
# The upstream fix is: https://github.com/ceph/ceph/pull/17969
# It's affecting, ceph version 12.2.0 (32ce2a3ae5239ee33d6150705cdb24d43bab910c) luminous (rc) and before
- name: enable ceph-rbd-mirror.target
service:
name: "ceph-rbd-mirror.target"
enabled: yes
changed_when: false
when:
- ceph_release_num.{{ ceph_release }} >= ceph_release_num.luminous
- name: start and add the rbd-mirror service instance - name: start and add the rbd-mirror service instance
service: service:
name: "ceph-rbd-mirror@rbd-mirror.{{ ansible_hostname }}" name: "ceph-rbd-mirror@rbd-mirror.{{ ansible_hostname }}"

View File

@ -73,10 +73,10 @@
become: True become: True
gather_facts: false gather_facts: false
roles: roles:
- ceph-defaults - { role: ceph-defaults, when: "ceph_release_num.{{ ceph_stable_release }} >= ceph_release_num.luminous" }
- ceph-docker-common - { role: ceph-docker-common, when: "ceph_release_num.{{ ceph_stable_release }} >= ceph_release_num.luminous" }
- ceph-config - { role: ceph-config, when: "ceph_release_num.{{ ceph_stable_release }} >= ceph_release_num.luminous" }
- ceph-nfs - { role: ceph-nfs, when: "ceph_release_num.{{ ceph_stable_release }} >= ceph_release_num.luminous" }
- hosts: rbdmirrors - hosts: rbdmirrors
become: True become: True

View File

@ -110,10 +110,10 @@
gather_facts: false gather_facts: false
become: True become: True
roles: roles:
- ceph-defaults - { role: ceph-defaults, when: "ceph_release_num.{{ ceph_stable_release }} >= ceph_release_num.luminous" }
- ceph-common - { role: ceph-common, when: "ceph_release_num.{{ ceph_stable_release }} >= ceph_release_num.luminous" }
- ceph-config - { role: ceph-config, when: "ceph_release_num.{{ ceph_stable_release }} >= ceph_release_num.luminous" }
- ceph-nfs - { role: ceph-nfs, when: "ceph_release_num.{{ ceph_stable_release }} >= ceph_release_num.luminous" }
- hosts: restapis - hosts: restapis
gather_facts: false gather_facts: false

View File

@ -1,3 +1,4 @@
import json
import pytest import pytest
class TestNFSs(object): class TestNFSs(object):
@ -22,6 +23,30 @@ class TestNFSs(object):
def test_nfs_config_override(self, node, host): def test_nfs_config_override(self, node, host):
assert host.file("/etc/ganesha/ganesha.conf").contains("Entries_HWMark") assert host.file("/etc/ganesha/ganesha.conf").contains("Entries_HWMark")
@pytest.mark.no_docker
def test_nfs_is_up(self, node, host):
hostname = node["vars"]["inventory_hostname"]
cluster = node['cluster_name']
cmd = "sudo ceph --name client.rgw.{hostname} --keyring /var/lib/ceph/radosgw/{cluster}-rgw.{hostname}/keyring --cluster={cluster} --connect-timeout 5 -f json -s".format(
hostname=hostname,
cluster=cluster
)
output = host.check_output(cmd)
daemons = [i for i in json.loads(output)["servicemap"]["services"]["rgw-nfs"]["daemons"]]
assert hostname in daemons
@pytest.mark.docker
def test_docker_nfs_is_up(self, node, host):
hostname = node["vars"]["inventory_hostname"]
cluster = node['cluster_name']
cmd = "sudo docker exec ceph-nfs-{hostname} ceph --name client.rgw.{hostname} --keyring /var/lib/ceph/radosgw/{cluster}-rgw.{hostname}/keyring --cluster={cluster} --connect-timeout 5 -f json -s".format(
hostname=hostname,
cluster=cluster
)
output = host.check_output(cmd)
daemons = [i for i in json.loads(output)["servicemap"]["services"]["rgw-nfs"]["daemons"]]
assert hostname in daemons
#NOTE (guits): This check must be fixed. (Permission denied error) #NOTE (guits): This check must be fixed. (Permission denied error)
# @pytest.mark.no_docker # @pytest.mark.no_docker
# def test_nfs_rgw_fsal_export(self, node, host): # def test_nfs_rgw_fsal_export(self, node, host):

14
tox.ini
View File

@ -117,6 +117,7 @@ whitelist_externals =
bash bash
pip pip
cp cp
sleep
passenv=* passenv=*
sitepackages=True sitepackages=True
setenv= setenv=
@ -176,7 +177,7 @@ changedir=
purge_docker_cluster: {toxinidir}/tests/functional/centos/7/docker purge_docker_cluster: {toxinidir}/tests/functional/centos/7/docker
docker_dedicated_journal: {toxinidir}/tests/functional/centos/7/docker-ded-jrn docker_dedicated_journal: {toxinidir}/tests/functional/centos/7/docker-ded-jrn
docker_dmcrypt_journal_collocation: {toxinidir}/tests/functional/centos/7/docker-crypt-jrn-col docker_dmcrypt_journal_collocation: {toxinidir}/tests/functional/centos/7/docker-crypt-jrn-col
purge_cluster: {toxinidir}/tests/functional/ubuntu/16.04/cluster purge_cluster: {toxinidir}/tests/functional/centos/7/cluster
purge_dmcrypt: {toxinidir}/tests/functional/centos/7/crypt-ded-jrn purge_dmcrypt: {toxinidir}/tests/functional/centos/7/crypt-ded-jrn
update_dmcrypt: {toxinidir}/tests/functional/centos/7/crypt-ded-jrn update_dmcrypt: {toxinidir}/tests/functional/centos/7/crypt-ded-jrn
update_cluster: {toxinidir}/tests/functional/centos/7/cluster update_cluster: {toxinidir}/tests/functional/centos/7/cluster
@ -214,6 +215,17 @@ commands=
ansible-playbook -vv -i {changedir}/hosts {toxinidir}/tests/functional/setup.yml ansible-playbook -vv -i {changedir}/hosts {toxinidir}/tests/functional/setup.yml
# wait 5 minutes for services to be ready
sleep 300
# test cluster state using ceph-ansible tests
testinfra -n 4 --sudo -v --connection=ansible --ansible-inventory={changedir}/hosts {toxinidir}/tests/functional/tests
# reboot all vms
vagrant reload --no-provision
# wait 5 minutes for services to be ready
sleep 300
# retest to ensure cluster came back up correctly after rebooting
testinfra -n 4 --sudo -v --connection=ansible --ansible-inventory={changedir}/hosts {toxinidir}/tests/functional/tests testinfra -n 4 --sudo -v --connection=ansible --ansible-inventory={changedir}/hosts {toxinidir}/tests/functional/tests
purge_cluster: {[purge]commands} purge_cluster: {[purge]commands}