ceph-ansible/infrastructure-playbooks/rolling_update.yml

1401 lines
48 KiB
YAML
Raw Normal View History

---
# This playbook does a rolling update for all the Ceph services
#
# The value of 'serial:' adjusts the number of servers to be updated simultaneously.
# We recommend a value of 1, which means hosts of a group (e.g: monitor) will be
# upgraded one by one. It is really crucial for the update process to happen
# in a serialized fashion. DO NOT CHANGE THIS VALUE.
#
#
# If you run a Ceph community version, you have to change the variable: ceph_stable_release to the new release
- name: Confirm whether user really meant to upgrade the cluster
hosts: localhost
tags: always
become: false
gather_facts: false
vars:
mgr_group_name: mgrs
vars_prompt:
- name: ireallymeanit # noqa: name[casing]
prompt: Are you sure you want to upgrade the cluster?
default: 'no'
private: false
tasks:
- name: Exit playbook, if user did not mean to upgrade cluster
ansible.builtin.fail:
msg: >
"Exiting rolling_update.yml playbook, cluster was NOT upgraded.
To upgrade the cluster, either say 'yes' on the prompt or
use `-e ireallymeanit=yes` on the command line when
invoking the playbook"
when: ireallymeanit != 'yes'
- name: Import_role ceph-defaults
ansible.builtin.import_role:
name: ceph-defaults
- name: Check if a legacy grafana-server group exists
ansible.builtin.import_role:
name: ceph-facts
tasks_from: convert_grafana_server_group_name.yml
when: groups.get((grafana_server_group_name | default('grafana-server')), []) | length > 0
- name: Gather facts and check the init system
hosts:
- "{{ mon_group_name|default('mons') }}"
- "{{ osd_group_name|default('osds') }}"
- "{{ mds_group_name|default('mdss') }}"
- "{{ rgw_group_name|default('rgws') }}"
- "{{ mgr_group_name|default('mgrs') }}"
- "{{ rbdmirror_group_name|default('rbdmirrors') }}"
- "{{ nfs_group_name|default('nfss') }}"
- "{{ client_group_name|default('clients') }}"
- "{{ monitoring_group_name|default('monitoring') }}"
tags: always
any_errors_fatal: true
become: true
gather_facts: false
vars:
delegate_facts_host: true
tasks:
- name: Gather facts on all Ceph hosts for following reference
ansible.builtin.debug:
msg: "gather facts on all Ceph hosts for following reference"
- name: Import ceph-defaults role
ansible.builtin.import_role:
name: ceph-defaults
- name: Gather facts
ansible.builtin.setup:
gather_subset:
- 'all'
- '!facter'
- '!ohai'
when: not delegate_facts_host | bool or inventory_hostname in groups.get(client_group_name, [])
- name: Gather and delegate facts
ansible.builtin.setup:
gather_subset:
- 'all'
- '!facter'
- '!ohai'
delegate_to: "{{ item }}"
delegate_facts: true
with_items: "{{ groups['all'] | difference(groups.get('clients', [])) }}"
run_once: true
when: delegate_facts_host | bool
- name: Set_fact rolling_update
ansible.builtin.set_fact:
rolling_update: true
- name: Import ceph-facts role
ansible.builtin.import_role:
name: ceph-facts
- name: Import ceph-infra role
ansible.builtin.import_role:
name: ceph-infra
tags: ceph_infra
- name: Import ceph-validate role
ansible.builtin.import_role:
name: ceph-validate
- name: Import ceph-container-engine role
ansible.builtin.import_role:
name: ceph-container-engine
when:
- (group_names != ['clients']) or (inventory_hostname == groups.get('clients', [''])|first)
- (containerized_deployment | bool) or (dashboard_enabled | bool)
- name: Import ceph-container-common role
ansible.builtin.import_role:
name: ceph-container-common
tasks_from: registry
when:
- (group_names != ['clients']) or (inventory_hostname == groups.get('clients', [''])|first)
- (containerized_deployment | bool) or (dashboard_enabled | bool)
- ceph_docker_registry_auth | bool
- name: Check ceph release in container image
when:
- groups.get(mon_group_name, []) | length > 0
- containerized_deployment | bool
delegate_to: "{{ groups[mon_group_name][0] }}"
run_once: true
block:
- name: Get the ceph release being deployed
ansible.builtin.command: "{{ ceph_cmd }} --cluster {{ cluster }} --version"
register: ceph_version
changed_when: false
- name: Check ceph release being deployed
ansible.builtin.fail:
msg: "This version of ceph-ansible is intended for upgrading to Ceph Squid only."
when: "'squid' not in ceph_version.stdout.split()"
- name: Upgrade ceph mon cluster
tags: mons
vars:
health_mon_check_retries: 5
health_mon_check_delay: 15
upgrade_ceph_packages: true
hosts: "{{ mon_group_name|default('mons') }}"
serial: 1
become: true
gather_facts: false
tasks:
- name: Import ceph-defaults role
ansible.builtin.import_role:
name: ceph-defaults
- name: Upgrade ceph mon cluster
block:
- name: Remove ceph aliases
ansible.builtin.file:
path: /etc/profile.d/ceph-aliases.sh
state: absent
when: containerized_deployment | bool
- name: Set mon_host_count
ansible.builtin.set_fact:
mon_host_count: "{{ groups[mon_group_name] | length }}"
- name: Fail when less than three monitors
ansible.builtin.fail:
msg: "Upgrade of cluster with less than three monitors is not supported."
when: mon_host_count | int < 3
- name: Select a running monitor
ansible.builtin.set_fact:
mon_host: "{{ groups[mon_group_name] | difference([inventory_hostname]) | last }}"
- name: Import ceph-facts role
ansible.builtin.import_role:
name: ceph-facts
- name: Check Ceph monitors quorum status
when: inventory_hostname == groups[mon_group_name] | first
block:
- name: Get ceph cluster status
ansible.builtin.command: "{{ container_exec_cmd }} ceph --cluster {{ cluster }} health -f json"
register: check_cluster_health
delegate_to: "{{ mon_host }}"
changed_when: false
- name: Display health status before failing
when: (check_cluster_health.stdout | from_json).status == 'HEALTH_ERR'
block:
- name: Display ceph health detail
ansible.builtin.command: "{{ container_exec_cmd }} ceph --cluster {{ cluster }} health detail"
delegate_to: "{{ mon_host }}"
changed_when: false
- name: Fail if cluster isn't in an acceptable state
ansible.builtin.fail:
msg: "cluster is not in an acceptable state!"
- name: Get the ceph quorum status
ansible.builtin.command: "{{ container_exec_cmd }} ceph --cluster {{ cluster }} quorum_status -f json"
register: check_quorum_status
delegate_to: "{{ mon_host }}"
changed_when: false
- name: Fail if the cluster quorum isn't in an acceptable state
ansible.builtin.fail:
msg: "cluster quorum is not in an acceptable state!"
when: (check_quorum_status.stdout | from_json).quorum | length != groups[mon_group_name] | length
- name: Ensure /var/lib/ceph/bootstrap-rbd-mirror is present
ansible.builtin.file:
path: /var/lib/ceph/bootstrap-rbd-mirror
owner: "{{ ceph_uid if containerized_deployment | bool else 'ceph' }}"
group: "{{ ceph_uid if containerized_deployment | bool else 'ceph' }}"
mode: '755'
state: directory
delegate_to: "{{ item }}"
with_items: "{{ groups[mon_group_name] }}"
when:
- cephx | bool
- inventory_hostname == groups[mon_group_name][0]
- name: Create potentially missing keys (rbd and rbd-mirror)
ceph_key:
name: "client.{{ item.0 }}"
dest: "/var/lib/ceph/{{ item.0 }}/"
caps:
mon: "allow profile {{ item.0 }}"
cluster: "{{ cluster }}"
delegate_to: "{{ item.1 }}"
with_nested:
- ['bootstrap-rbd', 'bootstrap-rbd-mirror']
- "{{ groups[mon_group_name] }}" # so the key goes on all the nodes
environment:
CEPH_CONTAINER_IMAGE: "{{ ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment | bool else None }}"
CEPH_CONTAINER_BINARY: "{{ container_binary }}"
when:
- cephx | bool
- inventory_hostname == groups[mon_group_name][0]
# NOTE: we mask the service so the RPM can't restart it
# after the package gets upgraded
- name: Stop ceph mon
ansible.builtin.systemd:
name: ceph-mon@{{ item }}
state: stopped
enabled: false
masked: true
with_items:
- "{{ ansible_facts['hostname'] }}"
- "{{ ansible_facts['fqdn'] }}"
# only mask the service for mgr because it must be upgraded
# after ALL monitors, even when collocated
- name: Mask the mgr service
ansible.builtin.systemd:
name: ceph-mgr@{{ ansible_facts['hostname'] }}
masked: true
when: inventory_hostname in groups[mgr_group_name] | default([])
or groups[mgr_group_name] | default([]) | length == 0
- name: Import ceph-handler role
ansible.builtin.import_role:
name: ceph-handler
- name: Import ceph-common role
ansible.builtin.import_role:
name: ceph-common
when: not containerized_deployment | bool
- name: Import ceph-container-common role
ansible.builtin.import_role:
name: ceph-container-common
when: containerized_deployment | bool
- name: Import ceph-config role
ansible.builtin.import_role:
name: ceph-config
- name: Import ceph-mon role
ansible.builtin.import_role:
name: ceph-mon
- name: Start ceph mgr
ansible.builtin.systemd:
name: ceph-mgr@{{ ansible_facts['hostname'] }}
state: started
enabled: true
masked: false
when: inventory_hostname in groups[mgr_group_name] | default([])
or groups[mgr_group_name] | default([]) | length == 0
- name: Import_role ceph-facts
ansible.builtin.import_role:
name: ceph-facts
tasks_from: set_monitor_address.yml
delegate_to: "{{ groups[mon_group_name][0] }}"
delegate_facts: true
- name: Non container | waiting for the monitor to join the quorum...
ansible.builtin.command: ceph --cluster "{{ cluster }}" -m "{{ hostvars[groups[mon_group_name][0]]['_current_monitor_address'] }}" quorum_status --format json
register: ceph_health_raw
until:
- ceph_health_raw.rc == 0
- (hostvars[inventory_hostname]['ansible_facts']['hostname'] in (ceph_health_raw.stdout | default('{}') | from_json)["quorum_names"] or
hostvars[inventory_hostname]['ansible_facts']['fqdn'] in (ceph_health_raw.stdout | default('{}') | from_json)["quorum_names"])
retries: "{{ health_mon_check_retries }}"
delay: "{{ health_mon_check_delay }}"
changed_when: false
when: not containerized_deployment | bool
- name: Container | waiting for the containerized monitor to join the quorum...
ansible.builtin.command: >
{{ container_binary }} exec ceph-mon-{{ ansible_facts['hostname'] }} ceph --cluster "{{ cluster }}" -m "{{ hostvars[groups[mon_group_name][0]]['_current_monitor_address'] }}" quorum_status --format json
register: ceph_health_raw
until:
- ceph_health_raw.rc == 0
- (hostvars[inventory_hostname]['ansible_facts']['hostname'] in (ceph_health_raw.stdout | default('{}') | from_json)["quorum_names"] or
hostvars[inventory_hostname]['ansible_facts']['fqdn'] in (ceph_health_raw.stdout | default('{}') | from_json)["quorum_names"])
retries: "{{ health_mon_check_retries }}"
delay: "{{ health_mon_check_delay }}"
changed_when: false
when: containerized_deployment | bool
rescue:
- name: Import ceph-defaults role
ansible.builtin.import_role:
name: ceph-defaults
- name: Unmask the mon service
ansible.builtin.systemd:
name: ceph-mon@{{ ansible_facts['hostname'] }}
enabled: true
masked: false
- name: Unmask the mgr service
ansible.builtin.systemd:
name: ceph-mgr@{{ ansible_facts['hostname'] }}
masked: false
when: inventory_hostname in groups[mgr_group_name] | default([])
or groups[mgr_group_name] | default([]) | length == 0
- name: Stop the playbook execution
ansible.builtin.fail:
msg: "There was an error during monitor upgrade. Please, check the previous task results."
- name: Reset mon_host
hosts: "{{ mon_group_name|default('mons') }}"
tags: always
become: true
gather_facts: false
tasks:
- name: Import ceph-defaults role
ansible.builtin.import_role:
name: ceph-defaults
- name: Reset mon_host fact
ansible.builtin.set_fact:
mon_host: "{{ groups[mon_group_name][0] }}"
- name: Upgrade ceph mgr nodes when implicitly collocated on monitors
vars:
health_mon_check_retries: 5
health_mon_check_delay: 15
upgrade_ceph_packages: true
hosts: "{{ mon_group_name|default('mons') }}"
tags: mgrs
serial: 1
become: true
gather_facts: false
tasks:
- name: Upgrade mgrs when no mgr group explicitly defined in inventory
when: groups.get(mgr_group_name, []) | length == 0
block:
- name: Stop ceph mgr
ansible.builtin.systemd:
name: ceph-mgr@{{ ansible_facts['hostname'] }}
state: stopped
masked: true
- name: Import ceph-defaults role
ansible.builtin.import_role:
name: ceph-defaults
- name: Import ceph-facts role
ansible.builtin.import_role:
name: ceph-facts
- name: Import ceph-handler role
ansible.builtin.import_role:
name: ceph-handler
- name: Import ceph-common role
ansible.builtin.import_role:
name: ceph-common
when: not containerized_deployment | bool
- name: Import ceph-container-common role
ansible.builtin.import_role:
name: ceph-container-common
when: containerized_deployment | bool
- name: Import ceph-config role
ansible.builtin.import_role:
name: ceph-config
- name: Import ceph-mgr role
ansible.builtin.import_role:
name: ceph-mgr
- name: Upgrade ceph mgr nodes
vars:
upgrade_ceph_packages: true
ceph_release: "{{ ceph_stable_release }}"
hosts: "{{ mgr_group_name|default('mgrs') }}"
tags: mgrs
serial: 1
become: true
gather_facts: false
tasks:
# The following task has a failed_when: false
# to handle the scenario where no mgr existed before the upgrade
# or if we run a Ceph cluster before Luminous
- name: Stop ceph mgr
ansible.builtin.systemd:
name: ceph-mgr@{{ ansible_facts['hostname'] }}
state: stopped
enabled: false
masked: false
failed_when: false
- name: Mask ceph mgr systemd unit
ansible.builtin.systemd:
name: ceph-mgr@{{ ansible_facts['hostname'] }}
masked: true
failed_when: false
- name: Import ceph-defaults role
ansible.builtin.import_role:
name: ceph-defaults
- name: Import ceph-facts role
ansible.builtin.import_role:
name: ceph-facts
- name: Import ceph-handler role
ansible.builtin.import_role:
name: ceph-handler
- name: Import ceph-common role
ansible.builtin.import_role:
name: ceph-common
when: not containerized_deployment | bool
- name: Import ceph-container-common role
ansible.builtin.import_role:
name: ceph-container-common
when: containerized_deployment | bool
- name: Import ceph-config role
ansible.builtin.import_role:
name: ceph-config
- name: Import ceph-mgr role
ansible.builtin.import_role:
name: ceph-mgr
- name: Set osd flags
hosts: "{{ osd_group_name | default('osds') }}"
tags: osds
become: true
gather_facts: false
tasks:
- name: Import ceph-defaults
ansible.builtin.import_role:
name: ceph-defaults
- name: Import ceph-facts role
ansible.builtin.import_role:
name: ceph-facts
tasks_from: container_binary.yml
- name: Set osd flags, disable autoscaler and balancer
run_once: true
delegate_to: "{{ groups[mon_group_name][0] }}"
block:
- name: Get pool list
ansible.builtin.command: "{{ ceph_cmd }} --cluster {{ cluster }} osd pool ls detail -f json"
register: pool_list
changed_when: false
check_mode: false
- name: Get balancer module status
ansible.builtin.command: "{{ ceph_cmd }} --cluster {{ cluster }} balancer status -f json"
register: balancer_status_update
run_once: true
changed_when: false
check_mode: false
- name: Set_fact pools_pgautoscaler_mode
ansible.builtin.set_fact:
pools_pgautoscaler_mode: "{{ pools_pgautoscaler_mode | default([]) | union([{'name': item.pool_name, 'mode': item.pg_autoscale_mode}]) }}"
with_items: "{{ pool_list.stdout | default('{}') | from_json }}"
- name: Disable balancer
ansible.builtin.command: "{{ ceph_cmd }} --cluster {{ cluster }} balancer off"
changed_when: false
when: (balancer_status_update.stdout | from_json)['active'] | bool
- name: Disable pg autoscale on pools
ceph_pool:
name: "{{ item.name }}"
cluster: "{{ cluster }}"
pg_autoscale_mode: false
with_items: "{{ pools_pgautoscaler_mode }}"
when:
- pools_pgautoscaler_mode is defined
- item.mode == 'on'
environment:
CEPH_CONTAINER_IMAGE: "{{ ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment | bool else None }}"
CEPH_CONTAINER_BINARY: "{{ container_binary }}"
- name: Set osd flags
ceph_osd_flag:
name: "{{ item }}"
cluster: "{{ cluster }}"
environment:
CEPH_CONTAINER_IMAGE: "{{ ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment | bool else None }}"
CEPH_CONTAINER_BINARY: "{{ container_binary }}"
with_items:
- noout
- nodeep-scrub
- name: Upgrade ceph osds cluster
vars:
health_osd_check_retries: 600
health_osd_check_delay: 2
upgrade_ceph_packages: true
hosts: osds
tags: osds
serial: 1
become: true
gather_facts: false
tasks:
- name: Import ceph-defaults role
ansible.builtin.import_role:
name: ceph-defaults
- name: Import ceph-facts role
ansible.builtin.import_role:
name: ceph-facts
- name: Get osd numbers - non container
ansible.builtin.shell: if [ -d /var/lib/ceph/osd ] ; then ls /var/lib/ceph/osd | sed 's/.*-//' ; fi # noqa: risky-shell-pipe
register: osd_ids
changed_when: false
- name: Set num_osds
ansible.builtin.set_fact:
num_osds: "{{ osd_ids.stdout_lines | default([]) | length }}"
- name: Set_fact container_exec_cmd_osd
ansible.builtin.set_fact:
container_exec_cmd_update_osd: "{{ container_binary }} exec ceph-mon-{{ hostvars[groups[mon_group_name][0]]['ansible_facts']['hostname'] }}"
when: containerized_deployment | bool
- name: Stop ceph osd
ansible.builtin.systemd:
name: ceph-osd@{{ item }}
state: stopped
enabled: false
masked: true
with_items: "{{ osd_ids.stdout_lines }}"
- name: Import ceph-handler role
ansible.builtin.import_role:
name: ceph-handler
- name: Import ceph-common role
ansible.builtin.import_role:
name: ceph-common
when: not containerized_deployment | bool
- name: Import ceph-container-common role
ansible.builtin.import_role:
name: ceph-container-common
when: containerized_deployment | bool
- name: Import ceph-config role
ansible.builtin.import_role:
name: ceph-config
- name: Import ceph-osd role
ansible.builtin.import_role:
name: ceph-osd
- name: Scan ceph-disk osds with ceph-volume if deploying nautilus
ceph_volume_simple_scan:
cluster: "{{ cluster }}"
force: true
environment:
CEPH_VOLUME_DEBUG: "{{ ceph_volume_debug }}"
when: not containerized_deployment | bool
- name: Activate scanned ceph-disk osds and migrate to ceph-volume if deploying nautilus
ceph_volume_simple_activate:
cluster: "{{ cluster }}"
osd_all: true
environment:
CEPH_VOLUME_DEBUG: "{{ ceph_volume_debug }}"
when: not containerized_deployment | bool
- name: Waiting for clean pgs...
ansible.builtin.command: "{{ container_exec_cmd_update_osd | default('') }} ceph --cluster {{ cluster }} pg stat --format json"
register: ceph_health_post
until: >
(((ceph_health_post.stdout | from_json).pg_summary.num_pg_by_state | length) > 0)
and
(((ceph_health_post.stdout | from_json).pg_summary.num_pg_by_state | selectattr('name', 'search', '^active\\+clean') | map(attribute='num') | list | sum) == (ceph_health_post.stdout | from_json).pg_summary.num_pgs)
delegate_to: "{{ groups[mon_group_name][0] }}"
changed_when: false
retries: "{{ health_osd_check_retries }}"
delay: "{{ health_osd_check_delay }}"
- name: Complete osd upgrade
hosts: "{{ osd_group_name | default('osds') }}"
tags: osds
become: true
gather_facts: false
tasks:
- name: Import ceph-defaults role
ansible.builtin.import_role:
name: ceph-defaults
- name: Import ceph-facts role
ansible.builtin.import_role:
name: ceph-facts
tasks_from: container_binary.yml
- name: Unset osd flags, re-enable pg autoscaler and balancer
run_once: true
delegate_to: "{{ groups[mon_group_name][0] }}"
block:
- name: Re-enable pg autoscale on pools
ceph_pool:
name: "{{ item.name }}"
cluster: "{{ cluster }}"
pg_autoscale_mode: true
with_items: "{{ pools_pgautoscaler_mode }}"
when:
- pools_pgautoscaler_mode is defined
- item.mode == 'on'
environment:
CEPH_CONTAINER_IMAGE: "{{ ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment | bool else None }}"
CEPH_CONTAINER_BINARY: "{{ container_binary }}"
- name: Unset osd flags
ceph_osd_flag:
name: "{{ item }}"
cluster: "{{ cluster }}"
state: absent
environment:
CEPH_CONTAINER_IMAGE: "{{ ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment | bool else None }}"
CEPH_CONTAINER_BINARY: "{{ container_binary }}"
with_items:
- noout
- nodeep-scrub
- name: Re-enable balancer
ansible.builtin.command: "{{ ceph_cmd }} --cluster {{ cluster }} balancer on"
changed_when: false
when: (balancer_status_update.stdout | from_json)['active'] | bool
- name: Upgrade ceph mdss cluster, deactivate all rank > 0
hosts: "{{ mon_group_name | default('mons') }}[0]"
tags: mdss
become: true
gather_facts: false
tasks:
- name: Deactivate all mds rank > 0
when: groups.get(mds_group_name, []) | length > 0
block:
- name: Import ceph-defaults role
ansible.builtin.import_role:
name: ceph-defaults
- name: Import ceph-facts role
ansible.builtin.import_role:
name: ceph-facts
- name: Deactivate all mds rank > 0 if any
when: groups.get(mds_group_name, []) | length > 1
block:
- name: Set max_mds 1 on ceph fs
ceph_fs:
name: "{{ cephfs }}"
cluster: "{{ cluster }}"
data: "{{ cephfs_data_pool.name }}"
metadata: "{{ cephfs_metadata_pool.name }}"
max_mds: 1
environment:
CEPH_CONTAINER_IMAGE: "{{ ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment | bool else None }}"
CEPH_CONTAINER_BINARY: "{{ container_binary }}"
- name: Wait until only rank 0 is up
ceph_fs:
name: "{{ cephfs }}"
cluster: "{{ cluster }}"
state: info
register: wait_rank_zero
retries: 720
delay: 5
until: (wait_rank_zero.stdout | from_json).mdsmap.in | length == 1 and (wait_rank_zero.stdout | from_json).mdsmap.in[0] == 0
environment:
CEPH_CONTAINER_IMAGE: "{{ ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment | bool else None }}"
CEPH_CONTAINER_BINARY: "{{ container_binary }}"
- name: Get name of remaining active mds
ansible.builtin.command: "{{ container_exec_cmd | default('') }} ceph --cluster {{ cluster }} fs dump -f json"
changed_when: false
register: _mds_active_name
- name: Set_fact mds_active_name
ansible.builtin.set_fact:
mds_active_name: "{{ (_mds_active_name.stdout | from_json)['filesystems'][0]['mdsmap']['info'][item.key]['name'] }}"
with_dict: "{{ (_mds_active_name.stdout | default('{}') | from_json).filesystems[0]['mdsmap']['info'] | default({}) }}"
- name: Set_fact mds_active_host
ansible.builtin.set_fact:
mds_active_host: "{{ [hostvars[item]['inventory_hostname']] }}"
with_items: "{{ groups[mds_group_name] }}"
when: hostvars[item]['ansible_facts']['hostname'] == mds_active_name
- name: Create standby_mdss group
ansible.builtin.add_host:
name: "{{ item }}"
groups: standby_mdss
ansible_host: "{{ hostvars[item]['ansible_host'] | default(omit) }}"
ansible_port: "{{ hostvars[item]['ansible_port'] | default(omit) }}"
with_items: "{{ groups[mds_group_name] | difference(mds_active_host) }}"
- name: Stop standby ceph mds
ansible.builtin.systemd:
name: "ceph-mds@{{ hostvars[item]['ansible_facts']['hostname'] }}"
state: stopped
enabled: false
delegate_to: "{{ item }}"
with_items: "{{ groups['standby_mdss'] }}"
when: groups['standby_mdss'] | default([]) | length > 0
# dedicated task for masking systemd unit
# somehow, having a single task doesn't work in containerized context
- name: Mask systemd units for standby ceph mds
ansible.builtin.systemd:
name: "ceph-mds@{{ hostvars[item]['ansible_facts']['hostname'] }}"
masked: true
delegate_to: "{{ item }}"
with_items: "{{ groups['standby_mdss'] }}"
when: groups['standby_mdss'] | default([]) | length > 0
- name: Wait until all standbys mds are stopped
ansible.builtin.command: "{{ container_exec_cmd | default('') }} ceph --cluster {{ cluster }} fs dump -f json"
changed_when: false
register: wait_standbys_down
retries: 300
delay: 5
until: (wait_standbys_down.stdout | from_json).standbys | length == 0
- name: Create active_mdss group
ansible.builtin.add_host:
name: "{{ mds_active_host[0] if mds_active_host is defined else groups.get(mds_group_name)[0] }}"
groups: active_mdss
ansible_host: "{{ hostvars[mds_active_host[0] if mds_active_host is defined else groups.get(mds_group_name)[0]]['ansible_host'] | default(omit) }}"
ansible_port: "{{ hostvars[mds_active_host[0] if mds_active_host is defined else groups.get(mds_group_name)[0]]['ansible_port'] | default(omit) }}"
- name: Upgrade active mds
vars:
upgrade_ceph_packages: true
hosts: active_mdss
tags: mdss
become: true
gather_facts: false
tasks:
- name: Import ceph-defaults role
ansible.builtin.import_role:
name: ceph-defaults
- name: Import ceph-facts role
ansible.builtin.import_role:
name: ceph-facts
- name: Prevent restart from the packaging
ansible.builtin.systemd:
name: ceph-mds@{{ ansible_facts['hostname'] }}
enabled: false
masked: true
when: not containerized_deployment | bool
- name: Import ceph-handler role
ansible.builtin.import_role:
name: ceph-handler
- name: Import ceph-common role
ansible.builtin.import_role:
name: ceph-common
when: not containerized_deployment | bool
- name: Import ceph-container-common role
ansible.builtin.import_role:
name: ceph-container-common
when: containerized_deployment | bool
- name: Import ceph-config role
ansible.builtin.import_role:
name: ceph-config
- name: Import ceph-mds role
ansible.builtin.import_role:
name: ceph-mds
- name: Restart ceph mds
ansible.builtin.systemd:
name: ceph-mds@{{ ansible_facts['hostname'] }}
state: restarted
enabled: true
masked: false
when: not containerized_deployment | bool
- name: Restart active mds
ansible.builtin.command: "{{ container_binary }} stop ceph-mds-{{ ansible_facts['hostname'] }}"
changed_when: false
when: containerized_deployment | bool
- name: Upgrade standbys ceph mdss cluster
vars:
upgrade_ceph_packages: true
hosts: standby_mdss
tags: mdss
become: true
gather_facts: false
tasks:
- name: Import ceph-defaults role
ansible.builtin.import_role:
name: ceph-defaults
- name: Import ceph-facts role
ansible.builtin.import_role:
name: ceph-facts
- name: Prevent restarts from the packaging
ansible.builtin.systemd:
name: ceph-mds@{{ ansible_facts['hostname'] }}
enabled: false
masked: true
when: not containerized_deployment | bool
- name: Import ceph-handler role
ansible.builtin.import_role:
name: ceph-handler
- name: Import ceph-common role
ansible.builtin.import_role:
name: ceph-common
when: not containerized_deployment | bool
- name: Import ceph-container-common role
ansible.builtin.import_role:
name: ceph-container-common
when: containerized_deployment | bool
- name: Import ceph-config role
ansible.builtin.import_role:
name: ceph-config
- name: Import ceph-mds role
ansible.builtin.import_role:
name: ceph-mds
- name: Set max_mds
ceph_fs:
name: "{{ cephfs }}"
cluster: "{{ cluster }}"
max_mds: "{{ mds_max_mds }}"
data: "{{ cephfs_data_pool.name }}"
metadata: "{{ cephfs_metadata_pool.name }}"
delegate_to: "{{ groups[mon_group_name][0] }}"
environment:
CEPH_CONTAINER_IMAGE: "{{ ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment | bool else None }}"
CEPH_CONTAINER_BINARY: "{{ container_binary }}"
when: inventory_hostname == groups['standby_mdss'] | last
- name: Upgrade ceph rgws cluster
vars:
upgrade_ceph_packages: true
hosts: "{{ rgw_group_name|default('rgws') }}"
tags: rgws
serial: 1
become: true
gather_facts: false
tasks:
- name: Import ceph-defaults role
ansible.builtin.import_role:
name: ceph-defaults
- name: Import ceph-facts role
ansible.builtin.import_role:
name: ceph-facts
- name: Stop ceph rgw when upgrading from stable-3.2 # noqa: ignore-errors
ansible.builtin.systemd:
name: ceph-radosgw@rgw.{{ ansible_facts['hostname'] }}
state: stopped
enabled: false
masked: true
ignore_errors: true
- name: Stop ceph rgw
ansible.builtin.systemd:
name: ceph-radosgw@rgw.{{ ansible_facts['hostname'] }}.{{ item.instance_name }}
state: stopped
enabled: false
masked: true
with_items: "{{ rgw_instances }}"
- name: Import ceph-handler role
ansible.builtin.import_role:
name: ceph-handler
- name: Import ceph-common role
ansible.builtin.import_role:
name: ceph-common
when: not containerized_deployment | bool
- name: Import ceph-container-common role
ansible.builtin.import_role:
name: ceph-container-common
when: containerized_deployment | bool
- name: Import ceph-config role
ansible.builtin.import_role:
name: ceph-config
- name: Import ceph-rgw role
ansible.builtin.import_role:
name: ceph-rgw
- name: Upgrade ceph rbd mirror node
vars:
upgrade_ceph_packages: true
hosts: "{{ rbdmirror_group_name|default('rbdmirrors') }}"
tags: rbdmirrors
serial: 1
become: true
gather_facts: false
tasks:
- name: Check for ceph rbd mirror services
ansible.builtin.command: systemctl show --no-pager --property=Id --state=enabled ceph-rbd-mirror@* # noqa command-instead-of-module
changed_when: false
register: rbdmirror_services
- name: Stop ceph rbd mirror
ansible.builtin.service:
name: "{{ item.split('=')[1] }}"
state: stopped
enabled: false
masked: true
loop: "{{ rbdmirror_services.stdout_lines }}"
- name: Import ceph-defaults role
ansible.builtin.import_role:
name: ceph-defaults
- name: Import ceph-facts role
ansible.builtin.import_role:
name: ceph-facts
- name: Import ceph-handler role
ansible.builtin.import_role:
name: ceph-handler
- name: Import ceph-common role
ansible.builtin.import_role:
name: ceph-common
when: not containerized_deployment | bool
- name: Import ceph-container-common role
ansible.builtin.import_role:
name: ceph-container-common
when: containerized_deployment | bool
- name: Import ceph-config role
ansible.builtin.import_role:
name: ceph-config
- name: Import ceph-rbd-mirror role
ansible.builtin.import_role:
name: ceph-rbd-mirror
- name: Upgrade ceph nfs node
vars:
upgrade_ceph_packages: true
hosts: "{{ nfs_group_name|default('nfss') }}"
tags: nfss
serial: 1
become: true
gather_facts: false
tasks:
# failed_when: false is here so that if we upgrade
# from a version of ceph that does not have nfs-ganesha
# then this task will not fail
- name: Stop ceph nfs
ansible.builtin.systemd:
name: nfs-ganesha
state: stopped
enabled: false
masked: true
failed_when: false
when: not containerized_deployment | bool
- name: Systemd stop nfs container
ansible.builtin.systemd:
name: ceph-nfs@{{ ceph_nfs_service_suffix | default(ansible_facts['hostname']) }}
state: stopped
enabled: false
masked: true
failed_when: false
when:
- ceph_nfs_enable_service | bool
- containerized_deployment | bool
- name: Import ceph-defaults role
ansible.builtin.import_role:
name: ceph-defaults
- name: Import ceph-facts role
ansible.builtin.import_role:
name: ceph-facts
- name: Import ceph-handler role
ansible.builtin.import_role:
name: ceph-handler
- name: Import ceph-common role
ansible.builtin.import_role:
name: ceph-common
when: not containerized_deployment | bool
- name: Import ceph-container-common role
ansible.builtin.import_role:
name: ceph-container-common
when: containerized_deployment | bool
- name: Import ceph-config role
ansible.builtin.import_role:
name: ceph-config
- name: Import ceph-nfs role
ansible.builtin.import_role:
name: ceph-nfs
- name: Upgrade ceph client node
vars:
upgrade_ceph_packages: true
hosts: "{{ client_group_name|default('clients') }}"
tags: clients
serial: "{{ client_update_batch | default(20) }}"
become: true
gather_facts: false
tasks:
- name: Import ceph-defaults role
ansible.builtin.import_role:
name: ceph-defaults
- name: Import ceph-facts role
ansible.builtin.import_role:
name: ceph-facts
tasks_from: container_binary.yml
when: containerized_deployment | bool
- name: Import ceph-handler role
ansible.builtin.import_role:
name: ceph-handler
- name: Import ceph-common role
ansible.builtin.import_role:
name: ceph-common
when: not containerized_deployment | bool
- name: Import ceph-container-common role
ansible.builtin.import_role:
name: ceph-container-common
when:
- (group_names != ['clients']) or (inventory_hostname == groups.get('clients', [''])|first)
- containerized_deployment | bool
- name: Upgrade ceph-crash daemons
hosts:
- "{{ mon_group_name | default('mons') }}"
- "{{ osd_group_name | default('osds') }}"
- "{{ mds_group_name | default('mdss') }}"
- "{{ rgw_group_name | default('rgws') }}"
- "{{ rbdmirror_group_name | default('rbdmirrors') }}"
- "{{ mgr_group_name | default('mgrs') }}"
tags:
- post_upgrade
- crash
gather_facts: false
become: true
tasks:
- name: Stop the ceph-crash service
ansible.builtin.systemd:
name: "{{ 'ceph-crash@' + ansible_facts['hostname'] if containerized_deployment | bool else 'ceph-crash.service' }}"
state: stopped
# it needs to be done in a separate task otherwise the stop just before doesn't work.
- name: Mask and disable the ceph-crash service
ansible.builtin.systemd:
name: "{{ 'ceph-crash@' + ansible_facts['hostname'] if containerized_deployment | bool else 'ceph-crash.service' }}"
enabled: false
masked: true
- name: Import ceph-defaults role
ansible.builtin.import_role:
name: ceph-defaults
- name: Import ceph-facts role
ansible.builtin.import_role:
name: ceph-facts
tasks_from: container_binary.yml
- name: Import ceph-handler role
ansible.builtin.import_role:
name: ceph-handler
- name: Import ceph-crash role
ansible.builtin.import_role:
name: ceph-crash
- name: Upgrade ceph-exporter daemons
hosts:
- "{{ mon_group_name | default('mons') }}"
- "{{ osd_group_name | default('osds') }}"
- "{{ mds_group_name | default('mdss') }}"
- "{{ rgw_group_name | default('rgws') }}"
- "{{ rbdmirror_group_name | default('rbdmirrors') }}"
- "{{ mgr_group_name | default('mgrs') }}"
tags:
- post_upgrade
- ceph-exporter
gather_facts: false
become: true
tasks:
- name: Exit ceph-exporter upgrade if non containerized deployment
ansible.builtin.meta: end_play
when: not containerized_deployment | bool
- name: Stop the ceph-exporter service
ansible.builtin.systemd:
name: "{{ 'ceph-exporter@' + ansible_facts['hostname'] if containerized_deployment | bool else 'ceph-exporter.service' }}"
state: stopped
# it needs to be done in a separate task otherwise the stop just before doesn't work.
- name: Mask and disable the ceph-exporter service
ansible.builtin.systemd:
name: "{{ 'ceph-exporter@' + ansible_facts['hostname'] if containerized_deployment | bool else 'ceph-exporter.service' }}"
enabled: false
masked: true
- name: Import ceph-defaults role
ansible.builtin.import_role:
name: ceph-defaults
- name: Import ceph-facts role
ansible.builtin.import_role:
name: ceph-facts
tasks_from: container_binary.yml
- name: Import ceph-handler role
ansible.builtin.import_role:
name: ceph-handler
- name: Import ceph-exporter role
ansible.builtin.import_role:
name: ceph-exporter
- name: Complete upgrade
hosts: "{{ mon_group_name | default('mons') }}"
tags: post_upgrade
become: true
gather_facts: false
tasks:
- name: Import ceph-defaults role
ansible.builtin.import_role:
name: ceph-defaults
- name: Import ceph-facts role
ansible.builtin.import_role:
name: ceph-facts
tasks_from: container_binary.yml
- name: Container | disallow pre-squid OSDs and enable all new squid-only functionality
ansible.builtin.command: "{{ container_binary }} exec ceph-mon-{{ hostvars[groups[mon_group_name][0]]['ansible_facts']['hostname'] }} ceph --cluster {{ cluster }} osd require-osd-release squid"
delegate_to: "{{ groups[mon_group_name][0] }}"
run_once: true
changed_when: false
when:
- containerized_deployment | bool
- groups.get(mon_group_name, []) | length > 0
- name: Non container | disallow pre-squid OSDs and enable all new squid-only functionality
ansible.builtin.command: "ceph --cluster {{ cluster }} osd require-osd-release squid"
delegate_to: "{{ groups[mon_group_name][0] }}"
run_once: true
changed_when: false
when:
- not containerized_deployment | bool
- groups.get(mon_group_name, []) | length > 0
- name: Upgrade node-exporter
hosts:
- "{{ mon_group_name|default('mons') }}"
- "{{ osd_group_name|default('osds') }}"
- "{{ mds_group_name|default('mdss') }}"
- "{{ rgw_group_name|default('rgws') }}"
- "{{ mgr_group_name|default('mgrs') }}"
- "{{ rbdmirror_group_name|default('rbdmirrors') }}"
- "{{ nfs_group_name|default('nfss') }}"
- "{{ monitoring_group_name|default('monitoring') }}"
tags: monitoring
gather_facts: false
become: true
tasks:
- name: Import ceph-defaults role
ansible.builtin.import_role:
name: ceph-defaults
- name: With dashboard configuration
when: dashboard_enabled | bool
block:
- name: Stop node-exporter
ansible.builtin.service:
name: node_exporter
state: stopped
failed_when: false
- name: Import ceph-facts role
ansible.builtin.import_role:
name: ceph-facts
- name: Import ceph-container-engine role
ansible.builtin.import_role:
name: ceph-container-engine
- name: Import ceph-container-common role
ansible.builtin.import_role:
name: ceph-container-common
tasks_from: registry
when:
- not containerized_deployment | bool
- ceph_docker_registry_auth | bool
- name: Import ceph-node-exporter role
ansible.builtin.import_role:
name: ceph-node-exporter
- name: Upgrade monitoring node
hosts: "{{ monitoring_group_name|default('monitoring') }}"
tags: monitoring
gather_facts: false
become: true
tasks:
- name: Import ceph-defaults role
ansible.builtin.import_role:
name: ceph-defaults
- name: With dashboard configuration
when: dashboard_enabled | bool
block:
- name: Stop monitoring services
ansible.builtin.service:
name: '{{ item }}'
state: stopped
failed_when: false
with_items:
- alertmanager
- prometheus
- grafana-server
# - name: Import ceph-facts role
# ansible.builtin.import_role:
# name: ceph-facts
- name: Import ceph-facts role
ansible.builtin.import_role:
name: ceph-facts
tasks_from: grafana
- name: Import ceph-prometheus role
ansible.builtin.import_role:
name: ceph-prometheus
- name: Import ceph-grafana role
ansible.builtin.import_role:
name: ceph-grafana
- name: Upgrade ceph dashboard
hosts: "{{ groups[mgr_group_name|default('mgrs')] | default(groups[mon_group_name|default('mons')]) | default(omit) }}"
tags: monitoring
gather_facts: false
become: true
tasks:
- name: Import ceph-defaults role
ansible.builtin.import_role:
name: ceph-defaults
- name: With dashboard configuration
when: dashboard_enabled | bool
block:
- name: Import ceph-facts role
ansible.builtin.import_role:
name: ceph-facts
- name: Import ceph-facts role
ansible.builtin.import_role:
name: ceph-facts
tasks_from: grafana
- name: Import ceph-dashboard role
ansible.builtin.import_role:
name: ceph-dashboard
- name: Switch any existing crush buckets to straw2
hosts: "{{ mon_group_name | default('mons') }}[0]"
tags: post_upgrade
become: true
any_errors_fatal: true
gather_facts: false
tasks:
- name: Import ceph-defaults role
ansible.builtin.import_role:
name: ceph-defaults
- name: Import ceph-facts role
ansible.builtin.import_role:
name: ceph-facts
tasks_from: container_binary.yml
- name: Set_fact ceph_cmd
ansible.builtin.set_fact:
ceph_cmd: "{{ container_binary + ' run --rm --net=host -v /etc/ceph:/etc/ceph:z --entrypoint=ceph ' + ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment | bool else 'ceph' }}"
- name: Backup the crushmap
ansible.builtin.command: "{{ ceph_cmd }} --cluster {{ cluster }} osd getcrushmap -o /etc/ceph/{{ cluster }}-crushmap"
changed_when: false
- name: Migrate crush buckets to straw2
block:
- name: Switch crush buckets to straw2
ansible.builtin.command: "{{ ceph_cmd }} --cluster {{ cluster }} osd crush set-all-straw-buckets-to-straw2"
changed_when: false
rescue:
- name: Restore the crushmap
ansible.builtin.command: "{{ ceph_cmd }} --cluster {{ cluster }} osd setcrushmap -i /etc/ceph/{{ cluster }}-crushmap"
changed_when: false
- name: Inform that the switch to straw2 buckets failed
ansible.builtin.fail:
msg: >
"An attempt to switch to straw2 bucket was made but failed.
Check the cluster status."
- name: Remove crushmap backup
ansible.builtin.file:
path: /etc/ceph/{{ cluster }}-crushmap
state: absent
- name: Show ceph status
hosts: "{{ mon_group_name|default('mons') }}"
tags: always
become: true
gather_facts: false
tasks:
- name: Import ceph-defaults role
ansible.builtin.import_role:
name: ceph-defaults
- name: Set_fact container_exec_cmd_status
ansible.builtin.set_fact:
container_exec_cmd_status: "{{ container_binary }} exec ceph-mon-{{ hostvars[groups[mon_group_name][0]]['ansible_facts']['hostname'] }}"
when: containerized_deployment | bool
- name: Show ceph status
ansible.builtin.command: "{{ container_exec_cmd_status | default('') }} ceph --cluster {{ cluster }} -s"
changed_when: false
run_once: true
delegate_to: "{{ groups[mon_group_name][0] }}"
- name: Show all daemons version
ansible.builtin.command: "{{ container_exec_cmd_status | default('') }} ceph --cluster {{ cluster }} versions"
run_once: true
delegate_to: "{{ groups[mon_group_name][0] }}"
changed_when: false