mirror of https://github.com/ceph/ceph-ansible.git
rolling_update: unmask monitor service after a failure
if for some reason the playbook fails after the service was
stopped, disabled and masked and before it got restarted, enabled and
unmasked, the playbook leaves the service masked and which can make users
confused and forces them to unmask the unit manually.
Closes: https://bugzilla.redhat.com/show_bug.cgi?id=1917680
Signed-off-by: Guillaume Abrioux <gabrioux@redhat.com>
(cherry picked from commit 07029e1bf1
)
pull/6397/head
parent
fcd9544048
commit
1fd0661d3e
|
@ -128,144 +128,162 @@
|
||||||
serial: 1
|
serial: 1
|
||||||
become: True
|
become: True
|
||||||
tasks:
|
tasks:
|
||||||
- name: remove ceph aliases
|
- name: upgrade ceph mon cluster
|
||||||
file:
|
block:
|
||||||
path: /etc/profile.d/ceph-aliases.sh
|
- name: remove ceph aliases
|
||||||
state: absent
|
file:
|
||||||
when: containerized_deployment | bool
|
path: /etc/profile.d/ceph-aliases.sh
|
||||||
|
state: absent
|
||||||
|
when: containerized_deployment | bool
|
||||||
|
|
||||||
- name: set mon_host_count
|
- name: set mon_host_count
|
||||||
set_fact:
|
set_fact:
|
||||||
mon_host_count: "{{ groups[mon_group_name] | length }}"
|
mon_host_count: "{{ groups[mon_group_name] | length }}"
|
||||||
|
|
||||||
- name: fail when less than three monitors
|
- name: fail when less than three monitors
|
||||||
fail:
|
fail:
|
||||||
msg: "Upgrade of cluster with less than three monitors is not supported."
|
msg: "Upgrade of cluster with less than three monitors is not supported."
|
||||||
when: mon_host_count | int < 3
|
when: mon_host_count | int < 3
|
||||||
|
|
||||||
- name: select a running monitor
|
- name: select a running monitor
|
||||||
set_fact:
|
set_fact:
|
||||||
mon_host: "{{ groups[mon_group_name] | difference([inventory_hostname]) | last }}"
|
mon_host: "{{ groups[mon_group_name] | difference([inventory_hostname]) | last }}"
|
||||||
|
|
||||||
- import_role:
|
- import_role:
|
||||||
name: ceph-defaults
|
name: ceph-defaults
|
||||||
- import_role:
|
- import_role:
|
||||||
name: ceph-facts
|
name: ceph-facts
|
||||||
|
|
||||||
- block:
|
|
||||||
- name: get ceph cluster status
|
|
||||||
command: "{{ container_exec_cmd }} ceph --cluster {{ cluster }} health -f json"
|
|
||||||
register: check_cluster_health
|
|
||||||
delegate_to: "{{ mon_host }}"
|
|
||||||
|
|
||||||
- block:
|
- block:
|
||||||
- name: display ceph health detail
|
- name: get ceph cluster status
|
||||||
command: "{{ container_exec_cmd }} ceph --cluster {{ cluster }} health detail"
|
command: "{{ container_exec_cmd }} ceph --cluster {{ cluster }} health -f json"
|
||||||
|
register: check_cluster_health
|
||||||
delegate_to: "{{ mon_host }}"
|
delegate_to: "{{ mon_host }}"
|
||||||
|
|
||||||
- name: fail if cluster isn't in an acceptable state
|
- block:
|
||||||
fail:
|
- name: display ceph health detail
|
||||||
msg: "cluster is not in an acceptable state!"
|
command: "{{ container_exec_cmd }} ceph --cluster {{ cluster }} health detail"
|
||||||
when: (check_cluster_health.stdout | from_json).status == 'HEALTH_ERR'
|
delegate_to: "{{ mon_host }}"
|
||||||
when: inventory_hostname == groups[mon_group_name] | first
|
|
||||||
|
|
||||||
- name: ensure /var/lib/ceph/bootstrap-rbd-mirror is present
|
- name: fail if cluster isn't in an acceptable state
|
||||||
file:
|
fail:
|
||||||
path: /var/lib/ceph/bootstrap-rbd-mirror
|
msg: "cluster is not in an acceptable state!"
|
||||||
owner: "{{ ceph_uid if containerized_deployment | bool else 'ceph' }}"
|
when: (check_cluster_health.stdout | from_json).status == 'HEALTH_ERR'
|
||||||
group: "{{ ceph_uid if containerized_deployment | bool else 'ceph' }}"
|
when: inventory_hostname == groups[mon_group_name] | first
|
||||||
mode: '755'
|
|
||||||
state: directory
|
|
||||||
delegate_to: "{{ item }}"
|
|
||||||
with_items: "{{ groups[mon_group_name] }}"
|
|
||||||
when:
|
|
||||||
- cephx | bool
|
|
||||||
- inventory_hostname == groups[mon_group_name][0]
|
|
||||||
|
|
||||||
- name: create potentially missing keys (rbd and rbd-mirror)
|
- name: ensure /var/lib/ceph/bootstrap-rbd-mirror is present
|
||||||
ceph_key:
|
file:
|
||||||
name: "client.{{ item.0 }}"
|
path: /var/lib/ceph/bootstrap-rbd-mirror
|
||||||
dest: "/var/lib/ceph/{{ item.0 }}/"
|
owner: "{{ ceph_uid if containerized_deployment | bool else 'ceph' }}"
|
||||||
caps:
|
group: "{{ ceph_uid if containerized_deployment | bool else 'ceph' }}"
|
||||||
mon: "allow profile {{ item.0 }}"
|
mode: '755'
|
||||||
cluster: "{{ cluster }}"
|
state: directory
|
||||||
delegate_to: "{{ item.1 }}"
|
delegate_to: "{{ item }}"
|
||||||
with_nested:
|
with_items: "{{ groups[mon_group_name] }}"
|
||||||
- ['bootstrap-rbd', 'bootstrap-rbd-mirror']
|
when:
|
||||||
- "{{ groups[mon_group_name] }}" # so the key goes on all the nodes
|
- cephx | bool
|
||||||
environment:
|
- inventory_hostname == groups[mon_group_name][0]
|
||||||
CEPH_CONTAINER_IMAGE: "{{ ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment | bool else None }}"
|
|
||||||
CEPH_CONTAINER_BINARY: "{{ container_binary }}"
|
- name: create potentially missing keys (rbd and rbd-mirror)
|
||||||
when:
|
ceph_key:
|
||||||
- cephx | bool
|
name: "client.{{ item.0 }}"
|
||||||
- inventory_hostname == groups[mon_group_name][0]
|
dest: "/var/lib/ceph/{{ item.0 }}/"
|
||||||
|
caps:
|
||||||
|
mon: "allow profile {{ item.0 }}"
|
||||||
|
cluster: "{{ cluster }}"
|
||||||
|
delegate_to: "{{ item.1 }}"
|
||||||
|
with_nested:
|
||||||
|
- ['bootstrap-rbd', 'bootstrap-rbd-mirror']
|
||||||
|
- "{{ groups[mon_group_name] }}" # so the key goes on all the nodes
|
||||||
|
environment:
|
||||||
|
CEPH_CONTAINER_IMAGE: "{{ ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment | bool else None }}"
|
||||||
|
CEPH_CONTAINER_BINARY: "{{ container_binary }}"
|
||||||
|
when:
|
||||||
|
- cephx | bool
|
||||||
|
- inventory_hostname == groups[mon_group_name][0]
|
||||||
|
|
||||||
# NOTE: we mask the service so the RPM can't restart it
|
# NOTE: we mask the service so the RPM can't restart it
|
||||||
# after the package gets upgraded
|
# after the package gets upgraded
|
||||||
- name: stop ceph mon
|
- name: stop ceph mon
|
||||||
systemd:
|
systemd:
|
||||||
name: ceph-mon@{{ item }}
|
name: ceph-mon@{{ item }}
|
||||||
state: stopped
|
state: stopped
|
||||||
enabled: no
|
enabled: no
|
||||||
masked: yes
|
masked: yes
|
||||||
with_items:
|
with_items:
|
||||||
- "{{ ansible_facts['hostname'] }}"
|
- "{{ ansible_facts['hostname'] }}"
|
||||||
- "{{ ansible_facts['fqdn'] }}"
|
- "{{ ansible_facts['fqdn'] }}"
|
||||||
|
|
||||||
# only mask the service for mgr because it must be upgraded
|
# only mask the service for mgr because it must be upgraded
|
||||||
# after ALL monitors, even when collocated
|
# after ALL monitors, even when collocated
|
||||||
- name: mask the mgr service
|
- name: mask the mgr service
|
||||||
systemd:
|
systemd:
|
||||||
name: ceph-mgr@{{ ansible_facts['hostname'] }}
|
name: ceph-mgr@{{ ansible_facts['hostname'] }}
|
||||||
masked: yes
|
masked: yes
|
||||||
when: inventory_hostname in groups[mgr_group_name] | default([])
|
when: inventory_hostname in groups[mgr_group_name] | default([])
|
||||||
or groups[mgr_group_name] | default([]) | length == 0
|
or groups[mgr_group_name] | default([]) | length == 0
|
||||||
|
|
||||||
- import_role:
|
- import_role:
|
||||||
name: ceph-handler
|
name: ceph-handler
|
||||||
- import_role:
|
- import_role:
|
||||||
name: ceph-common
|
name: ceph-common
|
||||||
when: not containerized_deployment | bool
|
when: not containerized_deployment | bool
|
||||||
- import_role:
|
- import_role:
|
||||||
name: ceph-container-common
|
name: ceph-container-common
|
||||||
when: containerized_deployment | bool
|
when: containerized_deployment | bool
|
||||||
- import_role:
|
- import_role:
|
||||||
name: ceph-config
|
name: ceph-config
|
||||||
- import_role:
|
- import_role:
|
||||||
name: ceph-mon
|
name: ceph-mon
|
||||||
|
|
||||||
- name: start ceph mgr
|
- name: start ceph mgr
|
||||||
systemd:
|
systemd:
|
||||||
name: ceph-mgr@{{ ansible_facts['hostname'] }}
|
name: ceph-mgr@{{ ansible_facts['hostname'] }}
|
||||||
state: started
|
state: started
|
||||||
enabled: yes
|
enabled: yes
|
||||||
masked: no
|
masked: no
|
||||||
when: inventory_hostname in groups[mgr_group_name] | default([])
|
when: inventory_hostname in groups[mgr_group_name] | default([])
|
||||||
or groups[mgr_group_name] | default([]) | length == 0
|
or groups[mgr_group_name] | default([]) | length == 0
|
||||||
|
|
||||||
- name: non container | waiting for the monitor to join the quorum...
|
- name: non container | waiting for the monitor to join the quorum...
|
||||||
command: ceph --cluster "{{ cluster }}" -m "{{ hostvars[groups[mon_group_name][0]]['_current_monitor_address'] }}" quorum_status --format json
|
command: ceph --cluster "{{ cluster }}" -m "{{ hostvars[groups[mon_group_name][0]]['_current_monitor_address'] }}" quorum_status --format json
|
||||||
register: ceph_health_raw
|
register: ceph_health_raw
|
||||||
until:
|
until:
|
||||||
- ceph_health_raw.rc == 0
|
- ceph_health_raw.rc == 0
|
||||||
- (hostvars[inventory_hostname]['ansible_facts']['hostname'] in (ceph_health_raw.stdout | default('{}') | from_json)["quorum_names"] or
|
- (hostvars[inventory_hostname]['ansible_facts']['hostname'] in (ceph_health_raw.stdout | default('{}') | from_json)["quorum_names"] or
|
||||||
hostvars[inventory_hostname]['ansible_facts']['fqdn'] in (ceph_health_raw.stdout | default('{}') | from_json)["quorum_names"])
|
hostvars[inventory_hostname]['ansible_facts']['fqdn'] in (ceph_health_raw.stdout | default('{}') | from_json)["quorum_names"])
|
||||||
retries: "{{ health_mon_check_retries }}"
|
retries: "{{ health_mon_check_retries }}"
|
||||||
delay: "{{ health_mon_check_delay }}"
|
delay: "{{ health_mon_check_delay }}"
|
||||||
when: not containerized_deployment | bool
|
when: not containerized_deployment | bool
|
||||||
|
|
||||||
- name: container | waiting for the containerized monitor to join the quorum...
|
- name: container | waiting for the containerized monitor to join the quorum...
|
||||||
command: >
|
command: >
|
||||||
{{ container_binary }} exec ceph-mon-{{ ansible_facts['hostname'] }} ceph --cluster "{{ cluster }}" -m "{{ hostvars[groups[mon_group_name][0]]['_current_monitor_address'] }}" quorum_status --format json
|
{{ container_binary }} exec ceph-mon-{{ ansible_facts['hostname'] }} ceph --cluster "{{ cluster }}" -m "{{ hostvars[groups[mon_group_name][0]]['_current_monitor_address'] }}" quorum_status --format json
|
||||||
register: ceph_health_raw
|
register: ceph_health_raw
|
||||||
until:
|
until:
|
||||||
- ceph_health_raw.rc == 0
|
- ceph_health_raw.rc == 0
|
||||||
- (hostvars[inventory_hostname]['ansible_facts']['hostname'] in (ceph_health_raw.stdout | default('{}') | from_json)["quorum_names"] or
|
- (hostvars[inventory_hostname]['ansible_facts']['hostname'] in (ceph_health_raw.stdout | default('{}') | from_json)["quorum_names"] or
|
||||||
hostvars[inventory_hostname]['ansible_facts']['fqdn'] in (ceph_health_raw.stdout | default('{}') | from_json)["quorum_names"])
|
hostvars[inventory_hostname]['ansible_facts']['fqdn'] in (ceph_health_raw.stdout | default('{}') | from_json)["quorum_names"])
|
||||||
retries: "{{ health_mon_check_retries }}"
|
retries: "{{ health_mon_check_retries }}"
|
||||||
delay: "{{ health_mon_check_delay }}"
|
delay: "{{ health_mon_check_delay }}"
|
||||||
when: containerized_deployment | bool
|
when: containerized_deployment | bool
|
||||||
|
|
||||||
|
rescue:
|
||||||
|
- name: unmask the mon service
|
||||||
|
systemd:
|
||||||
|
name: ceph-mon@{{ item }}
|
||||||
|
enabled: yes
|
||||||
|
masked: no
|
||||||
|
with_items:
|
||||||
|
- "{{ ansible_facts['hostname'] }}"
|
||||||
|
- "{{ ansible_facts['fqdn'] }}"
|
||||||
|
|
||||||
|
- name: unmask the mgr service
|
||||||
|
systemd:
|
||||||
|
name: ceph-mgr@{{ ansible_facts['hostname'] }}
|
||||||
|
masked: no
|
||||||
|
when: inventory_hostname in groups[mgr_group_name] | default([])
|
||||||
|
or groups[mgr_group_name] | default([]) | length == 0
|
||||||
|
|
||||||
- name: reset mon_host
|
- name: reset mon_host
|
||||||
hosts: "{{ mon_group_name|default('mons') }}"
|
hosts: "{{ mon_group_name|default('mons') }}"
|
||||||
|
|
Loading…
Reference in New Issue