rolling_update: unmask monitor service after a failure

if for some reason the playbook fails after the service was
stopped, disabled and masked and before it got restarted, enabled and
unmasked, the playbook leaves the service masked and which can make users
confused and forces them to unmask the unit manually.

Closes: https://bugzilla.redhat.com/show_bug.cgi?id=1917680

Signed-off-by: Guillaume Abrioux <gabrioux@redhat.com>
(cherry picked from commit 07029e1bf1)
pull/6397/head
Guillaume Abrioux 2021-03-18 09:08:51 +01:00
parent fcd9544048
commit 1fd0661d3e
1 changed files with 137 additions and 119 deletions

View File

@ -128,144 +128,162 @@
serial: 1 serial: 1
become: True become: True
tasks: tasks:
- name: remove ceph aliases - name: upgrade ceph mon cluster
file: block:
path: /etc/profile.d/ceph-aliases.sh - name: remove ceph aliases
state: absent file:
when: containerized_deployment | bool path: /etc/profile.d/ceph-aliases.sh
state: absent
when: containerized_deployment | bool
- name: set mon_host_count - name: set mon_host_count
set_fact: set_fact:
mon_host_count: "{{ groups[mon_group_name] | length }}" mon_host_count: "{{ groups[mon_group_name] | length }}"
- name: fail when less than three monitors - name: fail when less than three monitors
fail: fail:
msg: "Upgrade of cluster with less than three monitors is not supported." msg: "Upgrade of cluster with less than three monitors is not supported."
when: mon_host_count | int < 3 when: mon_host_count | int < 3
- name: select a running monitor - name: select a running monitor
set_fact: set_fact:
mon_host: "{{ groups[mon_group_name] | difference([inventory_hostname]) | last }}" mon_host: "{{ groups[mon_group_name] | difference([inventory_hostname]) | last }}"
- import_role: - import_role:
name: ceph-defaults name: ceph-defaults
- import_role: - import_role:
name: ceph-facts name: ceph-facts
- block:
- name: get ceph cluster status
command: "{{ container_exec_cmd }} ceph --cluster {{ cluster }} health -f json"
register: check_cluster_health
delegate_to: "{{ mon_host }}"
- block: - block:
- name: display ceph health detail - name: get ceph cluster status
command: "{{ container_exec_cmd }} ceph --cluster {{ cluster }} health detail" command: "{{ container_exec_cmd }} ceph --cluster {{ cluster }} health -f json"
register: check_cluster_health
delegate_to: "{{ mon_host }}" delegate_to: "{{ mon_host }}"
- name: fail if cluster isn't in an acceptable state - block:
fail: - name: display ceph health detail
msg: "cluster is not in an acceptable state!" command: "{{ container_exec_cmd }} ceph --cluster {{ cluster }} health detail"
when: (check_cluster_health.stdout | from_json).status == 'HEALTH_ERR' delegate_to: "{{ mon_host }}"
when: inventory_hostname == groups[mon_group_name] | first
- name: ensure /var/lib/ceph/bootstrap-rbd-mirror is present - name: fail if cluster isn't in an acceptable state
file: fail:
path: /var/lib/ceph/bootstrap-rbd-mirror msg: "cluster is not in an acceptable state!"
owner: "{{ ceph_uid if containerized_deployment | bool else 'ceph' }}" when: (check_cluster_health.stdout | from_json).status == 'HEALTH_ERR'
group: "{{ ceph_uid if containerized_deployment | bool else 'ceph' }}" when: inventory_hostname == groups[mon_group_name] | first
mode: '755'
state: directory
delegate_to: "{{ item }}"
with_items: "{{ groups[mon_group_name] }}"
when:
- cephx | bool
- inventory_hostname == groups[mon_group_name][0]
- name: create potentially missing keys (rbd and rbd-mirror) - name: ensure /var/lib/ceph/bootstrap-rbd-mirror is present
ceph_key: file:
name: "client.{{ item.0 }}" path: /var/lib/ceph/bootstrap-rbd-mirror
dest: "/var/lib/ceph/{{ item.0 }}/" owner: "{{ ceph_uid if containerized_deployment | bool else 'ceph' }}"
caps: group: "{{ ceph_uid if containerized_deployment | bool else 'ceph' }}"
mon: "allow profile {{ item.0 }}" mode: '755'
cluster: "{{ cluster }}" state: directory
delegate_to: "{{ item.1 }}" delegate_to: "{{ item }}"
with_nested: with_items: "{{ groups[mon_group_name] }}"
- ['bootstrap-rbd', 'bootstrap-rbd-mirror'] when:
- "{{ groups[mon_group_name] }}" # so the key goes on all the nodes - cephx | bool
environment: - inventory_hostname == groups[mon_group_name][0]
CEPH_CONTAINER_IMAGE: "{{ ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment | bool else None }}"
CEPH_CONTAINER_BINARY: "{{ container_binary }}" - name: create potentially missing keys (rbd and rbd-mirror)
when: ceph_key:
- cephx | bool name: "client.{{ item.0 }}"
- inventory_hostname == groups[mon_group_name][0] dest: "/var/lib/ceph/{{ item.0 }}/"
caps:
mon: "allow profile {{ item.0 }}"
cluster: "{{ cluster }}"
delegate_to: "{{ item.1 }}"
with_nested:
- ['bootstrap-rbd', 'bootstrap-rbd-mirror']
- "{{ groups[mon_group_name] }}" # so the key goes on all the nodes
environment:
CEPH_CONTAINER_IMAGE: "{{ ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment | bool else None }}"
CEPH_CONTAINER_BINARY: "{{ container_binary }}"
when:
- cephx | bool
- inventory_hostname == groups[mon_group_name][0]
# NOTE: we mask the service so the RPM can't restart it # NOTE: we mask the service so the RPM can't restart it
# after the package gets upgraded # after the package gets upgraded
- name: stop ceph mon - name: stop ceph mon
systemd: systemd:
name: ceph-mon@{{ item }} name: ceph-mon@{{ item }}
state: stopped state: stopped
enabled: no enabled: no
masked: yes masked: yes
with_items: with_items:
- "{{ ansible_facts['hostname'] }}" - "{{ ansible_facts['hostname'] }}"
- "{{ ansible_facts['fqdn'] }}" - "{{ ansible_facts['fqdn'] }}"
# only mask the service for mgr because it must be upgraded # only mask the service for mgr because it must be upgraded
# after ALL monitors, even when collocated # after ALL monitors, even when collocated
- name: mask the mgr service - name: mask the mgr service
systemd: systemd:
name: ceph-mgr@{{ ansible_facts['hostname'] }} name: ceph-mgr@{{ ansible_facts['hostname'] }}
masked: yes masked: yes
when: inventory_hostname in groups[mgr_group_name] | default([]) when: inventory_hostname in groups[mgr_group_name] | default([])
or groups[mgr_group_name] | default([]) | length == 0 or groups[mgr_group_name] | default([]) | length == 0
- import_role: - import_role:
name: ceph-handler name: ceph-handler
- import_role: - import_role:
name: ceph-common name: ceph-common
when: not containerized_deployment | bool when: not containerized_deployment | bool
- import_role: - import_role:
name: ceph-container-common name: ceph-container-common
when: containerized_deployment | bool when: containerized_deployment | bool
- import_role: - import_role:
name: ceph-config name: ceph-config
- import_role: - import_role:
name: ceph-mon name: ceph-mon
- name: start ceph mgr - name: start ceph mgr
systemd: systemd:
name: ceph-mgr@{{ ansible_facts['hostname'] }} name: ceph-mgr@{{ ansible_facts['hostname'] }}
state: started state: started
enabled: yes enabled: yes
masked: no masked: no
when: inventory_hostname in groups[mgr_group_name] | default([]) when: inventory_hostname in groups[mgr_group_name] | default([])
or groups[mgr_group_name] | default([]) | length == 0 or groups[mgr_group_name] | default([]) | length == 0
- name: non container | waiting for the monitor to join the quorum... - name: non container | waiting for the monitor to join the quorum...
command: ceph --cluster "{{ cluster }}" -m "{{ hostvars[groups[mon_group_name][0]]['_current_monitor_address'] }}" quorum_status --format json command: ceph --cluster "{{ cluster }}" -m "{{ hostvars[groups[mon_group_name][0]]['_current_monitor_address'] }}" quorum_status --format json
register: ceph_health_raw register: ceph_health_raw
until: until:
- ceph_health_raw.rc == 0 - ceph_health_raw.rc == 0
- (hostvars[inventory_hostname]['ansible_facts']['hostname'] in (ceph_health_raw.stdout | default('{}') | from_json)["quorum_names"] or - (hostvars[inventory_hostname]['ansible_facts']['hostname'] in (ceph_health_raw.stdout | default('{}') | from_json)["quorum_names"] or
hostvars[inventory_hostname]['ansible_facts']['fqdn'] in (ceph_health_raw.stdout | default('{}') | from_json)["quorum_names"]) hostvars[inventory_hostname]['ansible_facts']['fqdn'] in (ceph_health_raw.stdout | default('{}') | from_json)["quorum_names"])
retries: "{{ health_mon_check_retries }}" retries: "{{ health_mon_check_retries }}"
delay: "{{ health_mon_check_delay }}" delay: "{{ health_mon_check_delay }}"
when: not containerized_deployment | bool when: not containerized_deployment | bool
- name: container | waiting for the containerized monitor to join the quorum... - name: container | waiting for the containerized monitor to join the quorum...
command: > command: >
{{ container_binary }} exec ceph-mon-{{ ansible_facts['hostname'] }} ceph --cluster "{{ cluster }}" -m "{{ hostvars[groups[mon_group_name][0]]['_current_monitor_address'] }}" quorum_status --format json {{ container_binary }} exec ceph-mon-{{ ansible_facts['hostname'] }} ceph --cluster "{{ cluster }}" -m "{{ hostvars[groups[mon_group_name][0]]['_current_monitor_address'] }}" quorum_status --format json
register: ceph_health_raw register: ceph_health_raw
until: until:
- ceph_health_raw.rc == 0 - ceph_health_raw.rc == 0
- (hostvars[inventory_hostname]['ansible_facts']['hostname'] in (ceph_health_raw.stdout | default('{}') | from_json)["quorum_names"] or - (hostvars[inventory_hostname]['ansible_facts']['hostname'] in (ceph_health_raw.stdout | default('{}') | from_json)["quorum_names"] or
hostvars[inventory_hostname]['ansible_facts']['fqdn'] in (ceph_health_raw.stdout | default('{}') | from_json)["quorum_names"]) hostvars[inventory_hostname]['ansible_facts']['fqdn'] in (ceph_health_raw.stdout | default('{}') | from_json)["quorum_names"])
retries: "{{ health_mon_check_retries }}" retries: "{{ health_mon_check_retries }}"
delay: "{{ health_mon_check_delay }}" delay: "{{ health_mon_check_delay }}"
when: containerized_deployment | bool when: containerized_deployment | bool
rescue:
- name: unmask the mon service
systemd:
name: ceph-mon@{{ item }}
enabled: yes
masked: no
with_items:
- "{{ ansible_facts['hostname'] }}"
- "{{ ansible_facts['fqdn'] }}"
- name: unmask the mgr service
systemd:
name: ceph-mgr@{{ ansible_facts['hostname'] }}
masked: no
when: inventory_hostname in groups[mgr_group_name] | default([])
or groups[mgr_group_name] | default([]) | length == 0
- name: reset mon_host - name: reset mon_host
hosts: "{{ mon_group_name|default('mons') }}" hosts: "{{ mon_group_name|default('mons') }}"