rolling_update: unmask monitor service after a failure

if for some reason the playbook fails after the service was
stopped, disabled and masked and before it got restarted, enabled and
unmasked, the playbook leaves the service masked and which can make users
confused and forces them to unmask the unit manually.

Closes: https://bugzilla.redhat.com/show_bug.cgi?id=1917680

Signed-off-by: Guillaume Abrioux <gabrioux@redhat.com>
(cherry picked from commit 07029e1bf1)
pull/6420/head
Guillaume Abrioux 2021-03-18 09:08:51 +01:00
parent 653d180ec0
commit 82b934cfc1
1 changed files with 151 additions and 131 deletions

View File

@ -118,150 +118,170 @@
serial: 1
become: True
tasks:
- name: remove ceph aliases
file:
path: /etc/profile.d/ceph-aliases.sh
state: absent
when: containerized_deployment | bool
- name: upgrade ceph mon cluster
block:
- name: upgrade ceph mon cluster
block:
- name: remove ceph aliases
file:
path: /etc/profile.d/ceph-aliases.sh
state: absent
when: containerized_deployment | bool
- name: set mon_host_count
set_fact:
mon_host_count: "{{ groups[mon_group_name] | length }}"
- name: set mon_host_count
set_fact:
mon_host_count: "{{ groups[mon_group_name] | length }}"
- name: fail when less than three monitors
fail:
msg: "Upgrade of cluster with less than three monitors is not supported."
when: mon_host_count | int < 3
- name: select a running monitor
set_fact:
mon_host: "{{ groups[mon_group_name] | difference([inventory_hostname]) | last }}"
- import_role:
name: ceph-defaults
- import_role:
name: ceph-facts
- block:
- name: get ceph cluster status
command: "{{ container_exec_cmd }} ceph --cluster {{ cluster }} health -f json"
register: check_cluster_health
delegate_to: "{{ mon_host }}"
- block:
- name: display ceph health detail
command: "{{ container_exec_cmd }} ceph --cluster {{ cluster }} health detail"
delegate_to: "{{ mon_host }}"
- name: fail if cluster isn't in an acceptable state
- name: fail when less than three monitors
fail:
msg: "cluster is not in an acceptable state!"
when: (check_cluster_health.stdout | from_json).status == 'HEALTH_ERR'
when: inventory_hostname == groups[mon_group_name] | first
msg: "Upgrade of cluster with less than three monitors is not supported."
when: mon_host_count | int < 3
- name: ensure /var/lib/ceph/bootstrap-rbd-mirror is present
file:
path: /var/lib/ceph/bootstrap-rbd-mirror
owner: "{{ ceph_uid if containerized_deployment else 'ceph' }}"
group: "{{ ceph_uid if containerized_deployment else 'ceph' }}"
mode: '755'
state: directory
delegate_to: "{{ item }}"
with_items: "{{ groups[mon_group_name] }}"
when:
- cephx | bool
- inventory_hostname == groups[mon_group_name][0]
- name: select a running monitor
set_fact:
mon_host: "{{ groups[mon_group_name] | difference([inventory_hostname]) | last }}"
- name: create potentially missing keys (rbd and rbd-mirror)
ceph_key:
name: "client.{{ item.0 }}"
dest: "/var/lib/ceph/{{ item.0 }}/"
caps:
mon: "allow profile {{ item.0 }}"
cluster: "{{ cluster }}"
delegate_to: "{{ item.1 }}"
with_nested:
- ['bootstrap-rbd', 'bootstrap-rbd-mirror']
- "{{ groups[mon_group_name] }}" # so the key goes on all the nodes
environment:
CEPH_CONTAINER_IMAGE: "{{ ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment else None }}"
CEPH_CONTAINER_BINARY: "{{ container_binary }}"
when:
- cephx | bool
- inventory_hostname == groups[mon_group_name][0]
- import_role:
name: ceph-defaults
- import_role:
name: ceph-facts
# NOTE: we mask the service so the RPM can't restart it
# after the package gets upgraded
- name: stop ceph mon - shortname
systemd:
name: ceph-mon@{{ ansible_facts['hostname'] }}
state: stopped
enabled: no
masked: yes
ignore_errors: True
- block:
- name: get ceph cluster status
command: "{{ container_exec_cmd }} ceph --cluster {{ cluster }} health -f json"
register: check_cluster_health
delegate_to: "{{ mon_host }}"
# NOTE: we mask the service so the RPM can't restart it
# after the package gets upgraded
- name: stop ceph mon - fqdn
systemd:
name: ceph-mon@{{ ansible_facts['fqdn'] }}
state: stopped
enabled: no
masked: yes
ignore_errors: True
- block:
- name: display ceph health detail
command: "{{ container_exec_cmd }} ceph --cluster {{ cluster }} health detail"
delegate_to: "{{ mon_host }}"
# only mask the service for mgr because it must be upgraded
# after ALL monitors, even when collocated
- name: mask the mgr service
systemd:
name: ceph-mgr@{{ ansible_facts['hostname'] }}
masked: yes
when: inventory_hostname in groups[mgr_group_name] | default([])
or groups[mgr_group_name] | default([]) | length == 0
- name: fail if cluster isn't in an acceptable state
fail:
msg: "cluster is not in an acceptable state!"
when: (check_cluster_health.stdout | from_json).status == 'HEALTH_ERR'
when: inventory_hostname == groups[mon_group_name] | first
- import_role:
name: ceph-handler
- import_role:
name: ceph-common
when: not containerized_deployment | bool
- import_role:
name: ceph-container-common
when: containerized_deployment | bool
- import_role:
name: ceph-config
- import_role:
name: ceph-mon
- name: ensure /var/lib/ceph/bootstrap-rbd-mirror is present
file:
path: /var/lib/ceph/bootstrap-rbd-mirror
owner: "{{ ceph_uid if containerized_deployment else 'ceph' }}"
group: "{{ ceph_uid if containerized_deployment else 'ceph' }}"
mode: '755'
state: directory
delegate_to: "{{ item }}"
with_items: "{{ groups[mon_group_name] }}"
when:
- cephx | bool
- inventory_hostname == groups[mon_group_name][0]
- name: start ceph mgr
systemd:
name: ceph-mgr@{{ ansible_facts['hostname'] }}
state: started
enabled: yes
ignore_errors: True # if no mgr collocated with mons
- name: create potentially missing keys (rbd and rbd-mirror)
ceph_key:
name: "client.{{ item.0 }}"
dest: "/var/lib/ceph/{{ item.0 }}/"
caps:
mon: "allow profile {{ item.0 }}"
cluster: "{{ cluster }}"
delegate_to: "{{ item.1 }}"
with_nested:
- ['bootstrap-rbd', 'bootstrap-rbd-mirror']
- "{{ groups[mon_group_name] }}" # so the key goes on all the nodes
environment:
CEPH_CONTAINER_IMAGE: "{{ ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment else None }}"
CEPH_CONTAINER_BINARY: "{{ container_binary }}"
when:
- cephx | bool
- inventory_hostname == groups[mon_group_name][0]
- name: non container | waiting for the monitor to join the quorum...
command: ceph --cluster "{{ cluster }}" -m "{{ hostvars[groups[mon_group_name][0]]['_current_monitor_address'] }}" quorum_status --format json
register: ceph_health_raw
until:
- ceph_health_raw.rc == 0
- (hostvars[inventory_hostname]['ansible_facts']['hostname'] in (ceph_health_raw.stdout | default('{}') | from_json)["quorum_names"] or
hostvars[inventory_hostname]['ansible_facts']['fqdn'] in (ceph_health_raw.stdout | default('{}') | from_json)["quorum_names"])
retries: "{{ health_mon_check_retries }}"
delay: "{{ health_mon_check_delay }}"
when: not containerized_deployment | bool
# NOTE: we mask the service so the RPM can't restart it
# after the package gets upgraded
- name: stop ceph mon - shortname
systemd:
name: ceph-mon@{{ ansible_facts['hostname'] }}
state: stopped
enabled: no
masked: yes
ignore_errors: True
- name: container | waiting for the containerized monitor to join the quorum...
command: >
{{ container_binary }} exec ceph-mon-{{ ansible_facts['hostname'] }} ceph --cluster "{{ cluster }}" -m "{{ hostvars[groups[mon_group_name][0]]['_current_monitor_address'] }}" quorum_status --format json
register: ceph_health_raw
until:
- ceph_health_raw.rc == 0
- (hostvars[inventory_hostname]['ansible_facts']['hostname'] in (ceph_health_raw.stdout | default('{}') | from_json)["quorum_names"] or
hostvars[inventory_hostname]['ansible_facts']['fqdn'] in (ceph_health_raw.stdout | default('{}') | from_json)["quorum_names"])
retries: "{{ health_mon_check_retries }}"
delay: "{{ health_mon_check_delay }}"
when: containerized_deployment | bool
# NOTE: we mask the service so the RPM can't restart it
# after the package gets upgraded
- name: stop ceph mon - fqdn
systemd:
name: ceph-mon@{{ ansible_facts['fqdn'] }}
state: stopped
enabled: no
masked: yes
ignore_errors: True
# only mask the service for mgr because it must be upgraded
# after ALL monitors, even when collocated
- name: mask the mgr service
systemd:
name: ceph-mgr@{{ ansible_facts['hostname'] }}
masked: yes
when: inventory_hostname in groups[mgr_group_name] | default([])
or groups[mgr_group_name] | default([]) | length == 0
- import_role:
name: ceph-handler
- import_role:
name: ceph-common
when: not containerized_deployment | bool
- import_role:
name: ceph-container-common
when: containerized_deployment | bool
- import_role:
name: ceph-config
- import_role:
name: ceph-mon
- name: start ceph mgr
systemd:
name: ceph-mgr@{{ ansible_facts['hostname'] }}
state: started
enabled: yes
ignore_errors: True # if no mgr collocated with mons
- name: non container | waiting for the monitor to join the quorum...
command: ceph --cluster "{{ cluster }}" -m "{{ hostvars[groups[mon_group_name][0]]['_current_monitor_address'] }}" quorum_status --format json
register: ceph_health_raw
until:
- ceph_health_raw.rc == 0
- (hostvars[inventory_hostname]['ansible_facts']['hostname'] in (ceph_health_raw.stdout | default('{}') | from_json)["quorum_names"] or
hostvars[inventory_hostname]['ansible_facts']['fqdn'] in (ceph_health_raw.stdout | default('{}') | from_json)["quorum_names"])
retries: "{{ health_mon_check_retries }}"
delay: "{{ health_mon_check_delay }}"
when: not containerized_deployment | bool
- name: container | waiting for the containerized monitor to join the quorum...
command: >
{{ container_binary }} exec ceph-mon-{{ ansible_facts['hostname'] }} ceph --cluster "{{ cluster }}" -m "{{ hostvars[groups[mon_group_name][0]]['_current_monitor_address'] }}" quorum_status --format json
register: ceph_health_raw
until:
- ceph_health_raw.rc == 0
- (hostvars[inventory_hostname]['ansible_facts']['hostname'] in (ceph_health_raw.stdout | default('{}') | from_json)["quorum_names"] or
hostvars[inventory_hostname]['ansible_facts']['fqdn'] in (ceph_health_raw.stdout | default('{}') | from_json)["quorum_names"])
retries: "{{ health_mon_check_retries }}"
delay: "{{ health_mon_check_delay }}"
when: containerized_deployment | bool
rescue:
- name: unmask the mon service
systemd:
name: ceph-mon@{{ item }}
enabled: yes
masked: no
with_items:
- "{{ ansible_facts['hostname'] }}"
- "{{ ansible_facts['fqdn'] }}"
- name: unmask the mgr service
systemd:
name: ceph-mgr@{{ ansible_facts['hostname'] }}
masked: no
when: inventory_hostname in groups[mgr_group_name] | default([])
or groups[mgr_group_name] | default([]) | length == 0
- name: reset mon_host
hosts: "{{ mon_group_name|default('mons') }}"