mirror of https://github.com/ceph/ceph-ansible.git
shrink-mds: refact post tasks
This commit refacts the way we check the "mds_to_kill" node is well
stopped.
Signed-off-by: Guillaume Abrioux <gabrioux@redhat.com>
Co-authored-by: Rishabh Dave <ridave@redhat.com>
(cherry picked from commit 7df62fde34
)
pull/4208/head
parent
e213163b63
commit
85a448429d
|
@ -59,15 +59,11 @@
|
||||||
|
|
||||||
- name: set_fact container_exec_cmd for mon0
|
- name: set_fact container_exec_cmd for mon0
|
||||||
set_fact:
|
set_fact:
|
||||||
container_exec_cmd: >
|
container_exec_cmd: "{{ container_binary }} exec ceph-mon-{{ hostvars[groups[mon_group_name][0]]['ansible_hostname'] }}"
|
||||||
{{ container_binary }} exec ceph-mon-{{ hostvars[groups
|
|
||||||
[mon_group_name][0]]['ansible_hostname'] }}
|
|
||||||
when: containerized_deployment | bool
|
when: containerized_deployment | bool
|
||||||
|
|
||||||
- name: exit playbook, if can not connect to the cluster
|
- name: exit playbook, if can not connect to the cluster
|
||||||
command: >
|
command: "{{ container_exec_cmd | default('') }} timeout 5 ceph --cluster {{ cluster }} health"
|
||||||
{{ container_exec_cmd | default('') }} timeout 5 ceph --cluster
|
|
||||||
{{ cluster }} health
|
|
||||||
register: ceph_health
|
register: ceph_health
|
||||||
until: ceph_health is succeeded
|
until: ceph_health is succeeded
|
||||||
delegate_to: "{{ groups[mon_group_name][0] }}"
|
delegate_to: "{{ groups[mon_group_name][0] }}"
|
||||||
|
@ -79,13 +75,62 @@
|
||||||
mds_to_kill_hostname: "{{ hostvars[mds_to_kill]['ansible_hostname'] }}"
|
mds_to_kill_hostname: "{{ hostvars[mds_to_kill]['ansible_hostname'] }}"
|
||||||
|
|
||||||
tasks:
|
tasks:
|
||||||
- name: stop mds service(s)
|
# get rid of this as soon as "systemctl stop ceph-msd@$HOSTNAME" also
|
||||||
service:
|
# removes the MDS from the FS map.
|
||||||
name: ceph-mds@{{ mds_to_kill_hostname }}
|
- name: exit mds if it the deployment is containerized
|
||||||
state: stopped
|
when: containerized_deployment | bool
|
||||||
enabled: no
|
command: "{{ container_exec_cmd | default('') }} ceph tell mds.{{ mds_to_kill }} exit"
|
||||||
delegate_to: "{{ mds_to_kill }}"
|
delegate_to: "{{ groups[mon_group_name][0] }}"
|
||||||
failed_when: false
|
|
||||||
|
- name: stop mds service and verify it
|
||||||
|
block:
|
||||||
|
- name: stop mds service
|
||||||
|
service:
|
||||||
|
name: ceph-mds@{{ mds_to_kill_hostname }}
|
||||||
|
state: stopped
|
||||||
|
enabled: no
|
||||||
|
delegate_to: "{{ mds_to_kill }}"
|
||||||
|
failed_when: false
|
||||||
|
|
||||||
|
- name: ensure that the mds is stopped
|
||||||
|
command: "systemctl is-active ceph_mds@{{ mds_to_kill_hostname }}"
|
||||||
|
register: mds_to_kill_status
|
||||||
|
failed_when: mds_to_kill_status.rc == 0
|
||||||
|
delegate_to: "{{ mds_to_kill }}"
|
||||||
|
retries: 5
|
||||||
|
delay: 2
|
||||||
|
|
||||||
|
- name: fail if the mds is reported as active or standby
|
||||||
|
block:
|
||||||
|
- name: get ceph status
|
||||||
|
command: "{{ container_exec_cmd | default('') }} ceph --cluster {{ cluster }} -s -f json"
|
||||||
|
register: ceph_status
|
||||||
|
delegate_to: "{{ groups[mon_group_name][0] }}"
|
||||||
|
|
||||||
|
- name: get active mds nodes list
|
||||||
|
set_fact:
|
||||||
|
active_mdss: "{{ active_mdss | default([]) + [item.name] }}"
|
||||||
|
with_items: "{{ (ceph_status.stdout | from_json)['fsmap']['by_rank'] }}"
|
||||||
|
|
||||||
|
- name: get ceph fs dump status
|
||||||
|
command: "{{ container_exec_cmd | default('') }} ceph --cluster {{ cluster }} fs dump -f json"
|
||||||
|
register: ceph_fs_status
|
||||||
|
delegate_to: "{{ groups[mon_group_name][0] }}"
|
||||||
|
|
||||||
|
- name: create a list of standby mdss
|
||||||
|
set_fact:
|
||||||
|
standby_mdss: (ceph_fs_status.stdout | from_json)['standbys'] | map(attribute='name') | list
|
||||||
|
|
||||||
|
- name: fail if mds just killed is being reported as active or standby
|
||||||
|
fail:
|
||||||
|
msg: "mds node {{ mds_to_kill }} still up and running."
|
||||||
|
when:
|
||||||
|
- (mds_to_kill in active_mdss | default([])) or
|
||||||
|
(mds_to_kill in standby_mdss | default([]))
|
||||||
|
|
||||||
|
- name: delete the filesystem too if deleted the last mds too
|
||||||
|
command: "{{ container_exec_cmd | default('') }} ceph --cluster {{ cluster }} fs rm --yes-i-really-mean-it {{ cephfs }}"
|
||||||
|
delegate_to: "{{ groups[mon_group_name][0] }}"
|
||||||
|
|
||||||
- name: purge mds store
|
- name: purge mds store
|
||||||
file:
|
file:
|
||||||
|
@ -94,16 +139,6 @@
|
||||||
delegate_to: "{{ mds_to_kill }}"
|
delegate_to: "{{ mds_to_kill }}"
|
||||||
|
|
||||||
post_tasks:
|
post_tasks:
|
||||||
- name: verify that the mds has stopped
|
|
||||||
shell: >
|
|
||||||
{{ container_exec_cmd | default('') }} ceph --cluster ceph --conf
|
|
||||||
/etc/ceph/ceph.conf fs dump | grep mds0
|
|
||||||
register: result
|
|
||||||
failed_when: result.rc == 0
|
|
||||||
delegate_to: "{{ mds_to_kill }}"
|
|
||||||
|
|
||||||
- name: show ceph health
|
- name: show ceph health
|
||||||
command: >
|
command: "{{ container_exec_cmd | default('') }} ceph --cluster {{ cluster }} -s"
|
||||||
{{ container_exec_cmd | default('') }} ceph --cluster
|
|
||||||
{{ cluster }} -s
|
|
||||||
delegate_to: "{{ groups[mon_group_name][0] }}"
|
delegate_to: "{{ groups[mon_group_name][0] }}"
|
||||||
|
|
Loading…
Reference in New Issue