ceph-ansible/infrastructure-playbooks/rolling_update.yml

671 lines
19 KiB
YAML
Raw Blame History

This file contains invisible Unicode characters!

This file contains invisible Unicode characters that may be processed differently from what appears below. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to reveal hidden characters.

---
# This playbook does a rolling update for all the Ceph services
#
# The value of 'serial:' adjusts the number of servers to be updated simultaneously.
# We recommend a value of 1, which means hosts of a group (e.g: monitor) will be
# upgraded one by one. It is really crucial for the update process to happen
# in a serialized fashion. DO NOT CHANGE THIS VALUE.
#
#
# If you run a Ceph community version, you have to change the variable: ceph_stable_release to the new release
#
# If you run Red Hat Ceph Storage and are doing a **major** update (e.g: from 2 to 3), you have two options:
# - if you use a CDN, you have to change the ceph_rhcs_version to a newer one
# - if you use an ISO, you have to change the ceph_rhcs_iso_path to the directory containing the new Ceph version
#
- name: confirm whether user really meant to upgrade the cluster
hosts: localhost
become: false
vars:
- mgr_group_name: mgrs
- jewel_minor_update: False
vars_prompt:
- name: ireallymeanit
prompt: Are you sure you want to upgrade the cluster?
default: 'no'
private: no
tasks:
- name: exit playbook, if user did not mean to upgrade cluster
fail:
msg: >
"Exiting rolling_update.yml playbook, cluster was NOT upgraded.
To upgrade the cluster, either say 'yes' on the prompt or
use `-e ireallymeanit=yes` on the command line when
invoking the playbook"
when: ireallymeanit != 'yes'
- name: fail if no mgr host is present in the inventory
fail:
msg: "Please add a mgr host to your inventory."
when:
- not jewel_minor_update
- groups.get(mgr_group_name, []) | length == 0
- name: gather facts and check the init system
hosts:
- "{{ mon_group_name|default('mons') }}"
- "{{ osd_group_name|default('osds') }}"
- "{{ mds_group_name|default('mdss') }}"
- "{{ rgw_group_name|default('rgws') }}"
- "{{ mgr_group_name|default('mgrs') }}"
- "{{ rbd_mirror_group_name|default('rbdmirrors') }}"
- "{{ nfs_group_name|default('nfss') }}"
- "{{ client_group_name|default('clients') }}"
become: True
gather_facts: False
vars:
delegate_facts_host: True
tasks:
- debug: msg="gather facts on all Ceph hosts for following reference"
- name: gather facts
setup:
when:
- not delegate_facts_host | bool
- name: gather and delegate facts
setup:
delegate_to: "{{ item }}"
delegate_facts: True
with_items: "{{ groups['all'] }}"
when:
- delegate_facts_host | bool
- set_fact: rolling_update=true
- name: upgrade ceph mon cluster
vars:
health_mon_check_retries: 5
health_mon_check_delay: 15
upgrade_ceph_packages: True
hosts:
- "{{ mon_group_name|default('mons') }}"
serial: 1
become: True
pre_tasks:
- name: set mon_host_count
set_fact: mon_host_count={{ groups[mon_group_name] | length }}
- debug: msg="WARNING - upgrading a ceph cluster with only one monitor node ({{ inventory_hostname }})"
when: mon_host_count | int == 1
- name: fail when single containerized monitor
fail:
msg: "Upgrades of a single monitor are not supported, also running 1 monitor is not recommended always use 3."
when:
- containerized_deployment
- mon_host_count | int == 1
- name: stop ceph mon
systemd:
name: ceph-mon@{{ ansible_hostname }}
state: stopped
enabled: yes
when:
- not containerized_deployment
roles:
- ceph-defaults
- { role: ceph-common, when: not containerized_deployment }
- { role: ceph-docker-common, when: containerized_deployment }
- ceph-config
- ceph-mon
post_tasks:
- name: start ceph mon
systemd:
name: ceph-mon@{{ ansible_hostname }}
state: started
enabled: yes
when:
- not containerized_deployment
- name: restart containerized ceph mon
systemd:
name: ceph-mon@{{ ansible_hostname }}
state: restarted
enabled: yes
daemon_reload: yes
when:
- containerized_deployment
- name: set mon_host_count
set_fact: mon_host_count={{ groups[mon_group_name] | length }}
- name: select a running monitor if multiple monitors
set_fact: mon_host={{ item }}
with_items: "{{ groups[mon_group_name] }}"
when:
- mon_host_count | int > 1
- item != inventory_hostname
- name: select first monitor if only one monitor
set_fact: mon_host={{ item }}
with_items: "{{ groups[mon_group_name][0] }}"
when:
- mon_host_count | int == 1
- name: non container | waiting for the monitor to join the quorum...
command: ceph --cluster "{{ cluster }}" -s --format json
register: ceph_health_raw
until: >
hostvars[mon_host]['ansible_hostname'] in (ceph_health_raw.stdout | from_json)["quorum_names"] or
hostvars[mon_host]['ansible_fqdn'] in (ceph_health_raw.stdout | from_json)["quorum_names"]
retries: "{{ health_mon_check_retries }}"
delay: "{{ health_mon_check_delay }}"
delegate_to: "{{ mon_host }}"
when:
- not containerized_deployment
- name: container | waiting for the containerized monitor to join the quorum...
command: docker exec ceph-mon-{{ hostvars[mon_host]['ansible_hostname'] }} ceph --cluster "{{ cluster }}" -s --format json
register: ceph_health_raw
until: >
hostvars[mon_host]['ansible_hostname'] in (ceph_health_raw.stdout | from_json)["quorum_names"] or
hostvars[mon_host]['ansible_fqdn'] in (ceph_health_raw.stdout | from_json)["quorum_names"]
retries: "{{ health_mon_check_retries }}"
delay: "{{ health_mon_check_delay }}"
delegate_to: "{{ mon_host }}"
when:
- containerized_deployment
- name: set osd flags
command: ceph --cluster {{ cluster }} osd set {{ item }}
with_items:
- noout
- noscrub
- nodeep-scrub
delegate_to: "{{ mon_host }}"
when: not containerized_deployment
- name: set containerized osd flags
command: |
docker exec ceph-mon-{{ hostvars[mon_host]['ansible_hostname'] }} ceph --cluster {{ cluster }} osd set {{ item }}
with_items:
- noout
- noscrub
- nodeep-scrub
delegate_to: "{{ mon_host }}"
when: containerized_deployment
- name: upgrade ceph mgr node
vars:
upgrade_ceph_packages: True
hosts:
- "{{ mgr_group_name|default('mgrs') }}"
serial: 1
become: True
pre_tasks:
- name: get current fsid
command: "ceph --cluster {{ cluster }} fsid"
register: cluster_uuid
delegate_to: "{{ groups[mon_group_name][0] }}"
- name: non container | create ceph mgr keyring(s)
command: "ceph --cluster {{ cluster }} auth get-or-create mgr.{{ hostvars[item]['ansible_hostname'] }} mon 'allow profile mgr' osd 'allow *' mds 'allow *' -o /etc/ceph/{{ cluster }}.mgr.{{ hostvars[item]['ansible_hostname'] }}.keyring"
args:
creates: "{{ ceph_conf_key_directory }}/{{ cluster }}.mgr.{{ hostvars[item]['ansible_hostname'] }}.keyring"
changed_when: false
delegate_to: "{{ groups[mon_group_name][0] }}"
with_items:
- "{{ groups.get(mgr_group_name, []) }}"
when:
- not containerized_deployment
- "{{ groups.get(mgr_group_name, []) | length > 0 }}"
- name: container | create ceph mgr keyring(s)
command: "docker exec ceph-mon-{{ hostvars[groups[mon_group_name][0]]['ansible_hostname'] }} ceph --cluster {{ cluster }} auth get-or-create mgr.{{ hostvars[item]['ansible_hostname'] }} mon 'allow profile mgr' osd 'allow *' mds 'allow *' -o /etc/ceph/{{ cluster }}.mgr.{{ hostvars[item]['ansible_hostname'] }}.keyring"
args:
creates: "{{ ceph_conf_key_directory }}/{{ cluster }}.mgr.{{ hostvars[item]['ansible_hostname'] }}.keyring"
changed_when: false
delegate_to: "{{ groups[mon_group_name][0] }}"
with_items:
- "{{ groups.get(mgr_group_name, []) }}"
when:
- containerized_deployment
- "{{ groups.get(mgr_group_name, []) | length > 0 }}"
- name: fetch ceph mgr key(s)
fetch:
src: "{{ ceph_conf_key_directory }}/{{ cluster }}.mgr.{{ hostvars[item]['ansible_hostname'] }}.keyring"
dest: "{{ fetch_directory }}/{{ fsid }}/{{ ceph_conf_key_directory }}/"
flat: yes
fail_on_missing: no
delegate_to: "{{ groups[mon_group_name][0] }}"
with_items:
- "{{ groups.get(mgr_group_name, []) }}"
# The following task has a failed_when: false
# to handle the scenario where no mgr existed before the upgrade
# or if we run a Ceph cluster before Luminous
- name: stop ceph mgr
systemd:
name: ceph-mgr@{{ ansible_hostname }}
state: stopped
enabled: yes
failed_when: false
when:
- not containerized_deployment
roles:
- ceph-defaults
- { role: ceph-common, when: not containerized_deployment }
- { role: ceph-docker-common, when: containerized_deployment }
- ceph-config
- { role: ceph-mgr,
when: "(ceph_release_num[ceph_release] >= ceph_release_num.luminous) or
(ceph_release_num[ceph_release] < ceph_release_num.luminous and rolling_update)" }
post_tasks:
- name: start ceph mgr
systemd:
name: ceph-mgr@{{ ansible_hostname }}
state: started
enabled: yes
when:
- not containerized_deployment
- name: restart containerized ceph mgr
systemd:
name: ceph-mgr@{{ ansible_hostname }}
state: restarted
enabled: yes
daemon_reload: yes
when:
- containerized_deployment
- name: upgrade ceph osds cluster
vars:
health_osd_check_retries: 40
health_osd_check_delay: 30
upgrade_ceph_packages: True
hosts:
- "{{ osd_group_name|default('osds') }}"
serial: 1
become: True
pre_tasks:
- name: get osd numbers
shell: "if [ -d /var/lib/ceph/osd ] ; then ls /var/lib/ceph/osd | sed 's/.*-//' ; fi"
register: osd_ids
changed_when: false
when: not containerized_deployment
- name: stop ceph osd
systemd:
name: ceph-osd@{{ item }}
state: stopped
enabled: yes
with_items: "{{ osd_ids.stdout_lines }}"
when:
- not containerized_deployment
roles:
- ceph-defaults
- { role: ceph-common, when: not containerized_deployment }
- { role: ceph-docker-common, when: containerized_deployment }
- ceph-config
- ceph-osd
post_tasks:
- name: get osd numbers
shell: "if [ -d /var/lib/ceph/osd ] ; then ls /var/lib/ceph/osd | sed 's/.*-//' ; fi"
register: osd_ids
changed_when: false
when: not containerized_deployment
- name: start ceph osd
systemd:
name: ceph-osd@{{ item }}
state: started
enabled: yes
with_items: "{{ osd_ids.stdout_lines }}"
when:
- not containerized_deployment
- name: restart containerized ceph osd
systemd:
name: ceph-osd@{{ item | basename }}
state: restarted
enabled: yes
daemon_reload: yes
with_items: "{{ devices }}"
when:
- containerized_deployment
- name: set_fact docker_exec_cmd_osd
set_fact:
docker_exec_cmd_update_osd: "docker exec ceph-mon-{{ hostvars[groups[mon_group_name][0]]['ansible_hostname'] }}"
when:
- containerized_deployment
- name: get num_pgs - non container
command: "{{ docker_exec_cmd_update_osd|default('') }} ceph --cluster {{ cluster }} -s --format json"
register: ceph_pgs
delegate_to: "{{ groups[mon_group_name][0] }}"
- name: waiting for clean pgs...
command: "{{ docker_exec_cmd_update_osd|default('') }} ceph --cluster {{ cluster }} -s --format json"
register: ceph_health_post
until: >
((ceph_health_post.stdout | from_json).pgmap.pgs_by_state | length) == 1
and
(ceph_health_post.stdout | from_json).pgmap.pgs_by_state.0.state_name == "active+clean"
delegate_to: "{{ groups[mon_group_name][0] }}"
retries: "{{ health_osd_check_retries }}"
delay: "{{ health_osd_check_delay }}"
when:
- (ceph_pgs.stdout | from_json).pgmap.num_pgs != 0
- name: unset osd flags
vars:
- jewel_minor_update: False
hosts:
- "{{ mon_group_name|default('mons') }}"
become: True
roles:
- ceph-defaults
tasks:
- name: set_fact docker_exec_cmd_osd
set_fact:
docker_exec_cmd_update_osd: "docker exec ceph-mon-{{ hostvars[groups[mon_group_name][0]]['ansible_hostname'] }}"
when:
- containerized_deployment
- name: unset osd flags
command: "{{ docker_exec_cmd_update_osd|default('') }} ceph osd unset {{ item }} --cluster {{ cluster }}"
with_items:
- noout
- noscrub
- nodeep-scrub
delegate_to: "{{ groups[mon_group_name][0] }}"
- name: get osd versions
command: "{{ docker_exec_cmd_update_osd|default('') }} ceph --cluster {{ cluster }} versions"
register: ceph_versions
delegate_to: "{{ groups[mon_group_name][0] }}"
when:
- not jewel_minor_update
- name: set_fact ceph_versions_osd
set_fact:
ceph_versions_osd: "{{ (ceph_versions.stdout|from_json).osd }}"
delegate_to: "{{ groups[mon_group_name][0] }}"
when:
- not jewel_minor_update
# length == 1 means there is a single osds versions entry
# thus all the osds are running the same version
- name: complete osds upgrade
command: "{{ docker_exec_cmd_update_osd|default('') }} ceph --cluster {{ cluster }} osd require-osd-release luminous"
delegate_to: "{{ groups[mon_group_name][0] }}"
when:
- (ceph_versions.get('stdout', '{}')|from_json).get('osd', {}) | length == 1
- ceph_versions_osd | string | search("ceph version 12")
- not jewel_minor_update
- name: upgrade ceph mdss cluster
vars:
upgrade_ceph_packages: True
hosts:
- "{{ mds_group_name|default('mdss') }}"
serial: 1
become: True
pre_tasks:
- name: stop ceph mds
systemd:
name: ceph-mds@{{ ansible_hostname }}
state: stopped
enabled: yes
when:
- not containerized_deployment
roles:
- ceph-defaults
- { role: ceph-common, when: not containerized_deployment }
- { role: ceph-docker-common, when: containerized_deployment }
- ceph-config
- ceph-mds
post_tasks:
- name: start ceph mds
systemd:
name: ceph-mds@{{ ansible_hostname }}
state: started
enabled: yes
when:
- not containerized_deployment
- name: restart ceph mds
systemd:
name: ceph-mds@{{ ansible_hostname }}
state: restarted
enabled: yes
daemon_reload: yes
when:
- containerized_deployment
- name: upgrade ceph rgws cluster
vars:
upgrade_ceph_packages: True
hosts:
- "{{ rgw_group_name|default('rgws') }}"
serial: 1
become: True
pre_tasks:
- name: stop ceph rgw
systemd:
name: ceph-radosgw@rgw.{{ ansible_hostname }}
state: stopped
enabled: yes
when:
- not containerized_deployment
roles:
- ceph-defaults
- { role: ceph-common, when: not containerized_deployment }
- { role: ceph-docker-common, when: containerized_deployment }
- ceph-config
- ceph-rgw
post_tasks:
- name: start ceph rgw
systemd:
name: ceph-radosgw@rgw.{{ ansible_hostname }}
state: started
enabled: yes
when:
- not containerized_deployment
- name: restart containerized ceph rgw
systemd:
name: ceph-radosgw@rgw.{{ ansible_hostname }}
state: restarted
enabled: yes
daemon_reload: yes
when:
- containerized_deployment
- name: upgrade ceph rbd mirror node
vars:
upgrade_ceph_packages: True
hosts:
- "{{ rbd_mirror_group_name|default('rbdmirrors') }}"
serial: 1
become: True
pre_tasks:
# NOTE(leseb): these tasks have a 'failed_when: false'
# in case we run before luminous or after
- name: stop ceph rbd mirror before luminous
systemd:
name: "ceph-rbd-mirror@{{ ceph_rbd_mirror_local_user }}"
state: stopped
enabled: no
failed_when: false
- name: stop ceph rbd mirror for and after luminous
systemd:
name: "ceph-rbd-mirror@rbd-mirror.{{ ansible_hostname }}"
state: stopped
enabled: yes
failed_when: false
roles:
- ceph-defaults
- { role: ceph-common, when: not containerized_deployment }
- { role: ceph-docker-common, when: containerized_deployment }
- ceph-config
- ceph-rbd-mirror
post_tasks:
- name: start ceph rbd mirror
systemd:
name: "ceph-rbd-mirror@rbd-mirror.{{ ansible_hostname }}"
state: started
enabled: yes
when:
- not containerized_deployment
- name: restart containerized ceph rbd mirror
systemd:
name: ceph-rbd-mirror@rbd-mirror.{{ ansible_hostname }}
state: restarted
enabled: yes
daemon_reload: yes
when:
- containerized_deployment
- name: upgrade ceph nfs node
vars:
upgrade_ceph_packages: True
hosts:
- "{{ nfs_group_name|default('nfss') }}"
serial: 1
become: True
pre_tasks:
# failed_when: false is here so that if we upgrade
# from a version of ceph that does not have nfs-ganesha
# then this task will not fail
- name: stop ceph nfs
systemd:
name: nfs-ganesha
state: stopped
enabled: yes
failed_when: false
when:
- not containerized_deployment
roles:
- ceph-defaults
- { role: ceph-common, when: not containerized_deployment }
- { role: ceph-docker-common, when: containerized_deployment }
- ceph-config
- { role: ceph-nfs,
when: "(ceph_release_num[ceph_release] >= ceph_release_num.luminous) or
(ceph_release_num[ceph_release] < ceph_release_num.luminous and rolling_update)" }
post_tasks:
- name: start nfs gateway
systemd:
name: nfs-ganesha
state: started
enabled: yes
when:
- not containerized_deployment
- ceph_nfs_enable_service
- name: systemd restart nfs container
systemd:
name: ceph-nfs@{{ ceph_nfs_service_suffix | default(ansible_hostname) }}
state: restarted
enabled: yes
daemon_reload: yes
when:
- ceph_nfs_enable_service
- containerized_deployment
- name: upgrade ceph client node
vars:
upgrade_ceph_packages: True
hosts:
- "{{ client_group_name|default('clients') }}"
serial: 1
become: True
roles:
- ceph-defaults
- { role: ceph-common, when: not containerized_deployment }
- { role: ceph-docker-common, when: containerized_deployment }
- ceph-config
- ceph-client
- name: show ceph status
hosts:
- "{{ mon_group_name|default('mons') }}"
become: True
roles:
- ceph-defaults
tasks:
- name: set_fact docker_exec_cmd_status
set_fact:
docker_exec_cmd_status: "docker exec ceph-mon-{{ hostvars[groups[mon_group_name][0]]['ansible_hostname'] }}"
when:
- containerized_deployment
- name: show ceph status
command: "{{ docker_exec_cmd_status|default('') }} ceph --cluster {{ cluster }} -s"
delegate_to: "{{ groups[mon_group_name][0] }}"