--- # This playbook does a rolling update for all the Ceph services # # The value of 'serial:' adjusts the number of servers to be updated simultaneously. # We recommend a value of 1, which means hosts of a group (e.g: monitor) will be # upgraded one by one. It is really crucial for the update process to happen # in a serialized fashion. DO NOT CHANGE THIS VALUE. # # # If you run a Ceph community version, you have to change the variable: ceph_stable_release to the new release # # If you run Red Hat Ceph Storage and are doing a **major** update (e.g: from 2 to 3), you have two options: # - if you use a CDN, you have to change the ceph_rhcs_version to a newer one # - if you use an ISO, you have to change the ceph_rhcs_iso_path to the directory containing the new Ceph version # - name: confirm whether user really meant to upgrade the cluster hosts: localhost become: false vars: - mgr_group_name: mgrs vars_prompt: - name: ireallymeanit prompt: Are you sure you want to upgrade the cluster? default: 'no' private: no tasks: - name: exit playbook, if user did not mean to upgrade cluster fail: msg: > "Exiting rolling_update.yml playbook, cluster was NOT upgraded. To upgrade the cluster, either say 'yes' on the prompt or use `-e ireallymeanit=yes` on the command line when invoking the playbook" when: ireallymeanit != 'yes' - name: fail if no mgr host is present in the inventory fail: msg: "Please add a mgr host to your inventory." when: - groups.get(mgr_group_name, []) | length == 0 - name: gather facts and check the init system hosts: - "{{ mon_group_name|default('mons') }}" - "{{ osd_group_name|default('osds') }}" - "{{ mds_group_name|default('mdss') }}" - "{{ rgw_group_name|default('rgws') }}" - "{{ mgr_group_name|default('mgrs') }}" - "{{ rbd_mirror_group_name|default('rbdmirrors') }}" - "{{ nfs_group_name|default('nfss') }}" - "{{ client_group_name|default('clients') }}" become: True gather_facts: False vars: delegate_facts_host: True tasks: - debug: msg="gather facts on all Ceph hosts for following reference" - name: gather facts setup: when: - not delegate_facts_host | bool - name: gather and delegate facts setup: delegate_to: "{{ item }}" delegate_facts: True with_items: "{{ groups['all'] }}" run_once: true when: - delegate_facts_host | bool - set_fact: rolling_update=true - name: upgrade ceph mon cluster vars: health_mon_check_retries: 5 health_mon_check_delay: 15 upgrade_ceph_packages: True hosts: - "{{ mon_group_name|default('mons') }}" serial: 1 become: True pre_tasks: - name: set mon_host_count set_fact: mon_host_count={{ groups[mon_group_name] | length }} - name: fail when less than three monitors fail: msg: "Upgrade of cluster with less than three monitors is not supported." when: - mon_host_count | int < 3 - name: stop ceph mon - shortname systemd: name: ceph-mon@{{ ansible_hostname }} state: stopped enabled: no ignore_errors: True when: - not containerized_deployment - name: stop ceph mon - fqdn systemd: name: ceph-mon@{{ ansible_fqdn }} state: stopped enabled: no ignore_errors: True when: - not containerized_deployment roles: - ceph-defaults - ceph-facts - ceph-handler - { role: ceph-common, when: not containerized_deployment } - { role: ceph-docker-common, when: containerized_deployment } - ceph-config - ceph-mon post_tasks: - name: start ceph mon systemd: name: ceph-mon@{{ monitor_name }} state: started enabled: yes when: - not containerized_deployment - name: restart containerized ceph mon systemd: name: ceph-mon@{{ monitor_name }} state: restarted enabled: yes daemon_reload: yes when: - containerized_deployment - name: set mon_host_count set_fact: mon_host_count={{ groups[mon_group_name] | length }} - name: select a running monitor set_fact: mon_host: "{{ groups[mon_group_name] | difference([inventory_hostname]) | last }}" - name: non container | waiting for the monitor to join the quorum... command: ceph --cluster "{{ cluster }}" -s --format json register: ceph_health_raw until: > hostvars[mon_host]['ansible_hostname'] in (ceph_health_raw.stdout | default('{}') | from_json)["quorum_names"] or hostvars[mon_host]['ansible_fqdn'] in (ceph_health_raw.stdout | default('{}') | from_json)["quorum_names"] retries: "{{ health_mon_check_retries }}" delay: "{{ health_mon_check_delay }}" delegate_to: "{{ mon_host }}" when: - not containerized_deployment - name: container | waiting for the containerized monitor to join the quorum... command: docker exec ceph-mon-{{ hostvars[mon_host]['ansible_hostname'] }} ceph --cluster "{{ cluster }}" -s --format json register: ceph_health_raw until: > hostvars[mon_host]['ansible_hostname'] in (ceph_health_raw.stdout | default('{}') | from_json)["quorum_names"] or hostvars[mon_host]['ansible_fqdn'] in (ceph_health_raw.stdout | default('{}') | from_json)["quorum_names"] retries: "{{ health_mon_check_retries }}" delay: "{{ health_mon_check_delay }}" delegate_to: "{{ mon_host }}" when: - containerized_deployment - name: create potentially missing keys (rbd and rbd-mirror) ceph_key: name: "client.{{ item.0 }}" state: present dest: "/var/lib/ceph/{{ item.0 }}/" caps: mon: "allow profile {{ item.0 }}" cluster: "{{ cluster }}" containerized: "{{ 'docker exec ceph-mon-' + hostvars[mon_host]['ansible_hostname'] if containerized_deployment else None }}" when: - cephx delegate_to: "{{ mon_host }}" ignore_errors: True # this might fail for upgrade from J to L on rbd-mirror and also on partially updated clusters with_nested: - ['bootstrap-rbd', 'bootstrap-rbd-mirror'] - "{{ groups[mon_group_name] }}" # so the key goes on all the nodes - name: upgrade ceph mgr node vars: upgrade_ceph_packages: True ceph_release: "{{ ceph_stable_release }}" hosts: - "{{ mgr_group_name|default('mgrs') }}" serial: 1 become: True pre_tasks: - name: non container - get current fsid command: "ceph --cluster {{ cluster }} fsid" register: cluster_uuid_non_container delegate_to: "{{ groups[mon_group_name][0] }}" when: - not containerized_deployment - name: container - get current fsid command: "docker exec ceph-mon-{{ hostvars[groups[mon_group_name][0]]['ansible_hostname'] }} ceph --cluster {{ cluster }} fsid" register: cluster_uuid_container delegate_to: "{{ groups[mon_group_name][0] }}" when: - containerized_deployment - name: set_fact ceph_cluster_fsid set_fact: ceph_cluster_fsid: "{{ cluster_uuid_container.stdout if containerized_deployment else cluster_uuid_non_container.stdout }}" - name: create ceph mgr keyring(s) when mon is not containerized ceph_key: name: "mgr.{{ hostvars[item]['ansible_hostname'] }}" state: present caps: mon: allow profile mgr osd: allow * mds: allow * cluster: "{{ cluster }}" when: - not containerized_deployment - cephx - groups.get(mgr_group_name, []) | length > 0 delegate_to: "{{ groups[mon_group_name][0] }}" with_items: "{{ groups.get(mgr_group_name, []) }}" - name: create ceph mgr keyring(s) when mon is containerized ceph_key: name: "mgr.{{ hostvars[item]['ansible_hostname'] }}" state: present caps: mon: allow profile mgr osd: allow * mds: allow * cluster: "{{ cluster }}" containerized: "docker exec ceph-mon-{{ hostvars[groups[mon_group_name][0]]['ansible_hostname'] }}" when: - containerized_deployment - cephx - groups.get(mgr_group_name, []) | length > 0 delegate_to: "{{ groups[mon_group_name][0] }}" with_items: "{{ groups.get(mgr_group_name, []) }}" - name: fetch ceph mgr key(s) fetch: src: "{{ ceph_conf_key_directory }}/{{ cluster }}.mgr.{{ hostvars[item]['ansible_hostname'] }}.keyring" dest: "{{ fetch_directory }}/{{ ceph_cluster_fsid }}/{{ ceph_conf_key_directory }}/" flat: yes fail_on_missing: no delegate_to: "{{ groups[mon_group_name][0] }}" with_items: - "{{ groups.get(mgr_group_name, []) }}" # The following task has a failed_when: false # to handle the scenario where no mgr existed before the upgrade # or if we run a Ceph cluster before Luminous - name: stop ceph mgr systemd: name: ceph-mgr@{{ ansible_hostname }} state: stopped enabled: yes failed_when: false when: - not containerized_deployment roles: - ceph-defaults - ceph-facts - ceph-handler - { role: ceph-common, when: not containerized_deployment } - { role: ceph-docker-common, when: containerized_deployment } - ceph-config - ceph-mgr post_tasks: - name: start ceph mgr systemd: name: ceph-mgr@{{ ansible_hostname }} state: started enabled: yes when: - not containerized_deployment - name: restart containerized ceph mgr systemd: name: ceph-mgr@{{ ansible_hostname }} state: restarted enabled: yes daemon_reload: yes when: - containerized_deployment - name: set osd flags hosts: "{{ mon_group_name | default('mons') }}[0]" become: True roles: - ceph-defaults - ceph-facts post_tasks: - name: set osd flags command: "{{ docker_exec_cmd | default('') }} ceph --cluster {{ cluster }} osd set {{ item }}" with_items: - noout - nodeep-scrub - name: upgrade ceph osds cluster vars: health_osd_check_retries: 40 health_osd_check_delay: 30 upgrade_ceph_packages: True hosts: - "{{ osd_group_name|default('osds') }}" serial: 1 become: True pre_tasks: - name: get osd numbers shell: "if [ -d /var/lib/ceph/osd ] ; then ls /var/lib/ceph/osd | sed 's/.*-//' ; fi" register: osd_ids changed_when: false - name: stop ceph osd systemd: name: ceph-osd@{{ item }} state: stopped enabled: yes with_items: "{{ osd_ids.stdout_lines }}" when: - not containerized_deployment roles: - ceph-defaults - ceph-facts - ceph-handler - { role: ceph-common, when: not containerized_deployment } - { role: ceph-docker-common, when: containerized_deployment } - ceph-config - ceph-osd post_tasks: - name: restart ceph osd systemd: name: "ceph-osd@{{ item }}" state: restarted enabled: yes daemon_reload: yes with_items: "{{ osd_ids.stdout_lines }}" - name: set_fact docker_exec_cmd_osd set_fact: docker_exec_cmd_update_osd: "docker exec ceph-mon-{{ hostvars[groups[mon_group_name][0]]['ansible_hostname'] }}" when: - containerized_deployment - name: get osd versions command: "{{ docker_exec_cmd_update_osd|default('') }} ceph --cluster {{ cluster }} versions" register: ceph_versions delegate_to: "{{ groups[mon_group_name][0] }}" - name: set_fact ceph_versions_osd set_fact: ceph_versions_osd: "{{ (ceph_versions.stdout|from_json).osd }}" delegate_to: "{{ groups[mon_group_name][0] }}" # length == 1 means there is a single osds versions entry # thus all the osds are running the same version - name: osd set sortbitwise command: "{{ docker_exec_cmd_update_osd|default('') }} ceph --cluster {{ cluster }} osd set sortbitwise" delegate_to: "{{ groups[mon_group_name][0] }}" when: - (ceph_versions.get('stdout', '{}')|from_json).get('osd', {}) | length == 1 - ceph_versions_osd | string is search("ceph version 10") - name: get num_pgs - non container command: "{{ docker_exec_cmd_update_osd|default('') }} ceph --cluster {{ cluster }} -s --format json" register: ceph_pgs delegate_to: "{{ groups[mon_group_name][0] }}" - name: waiting for clean pgs... command: "{{ docker_exec_cmd_update_osd|default('') }} ceph --cluster {{ cluster }} -s --format json" register: ceph_health_post until: > (((ceph_health_post.stdout | from_json).pgmap.pgs_by_state | length) > 0) and (((ceph_health_post.stdout | from_json).pgmap.pgs_by_state | selectattr('state_name', 'search', '^active\\+clean') | map(attribute='count') | list | sum) == (ceph_pgs.stdout | from_json).pgmap.num_pgs) delegate_to: "{{ groups[mon_group_name][0] }}" retries: "{{ health_osd_check_retries }}" delay: "{{ health_osd_check_delay }}" when: - (ceph_pgs.stdout | from_json).pgmap.num_pgs != 0 - name: unset osd flags hosts: - "{{ mon_group_name | default('mons') }}[0]" become: True roles: - ceph-defaults - ceph-facts tasks: - name: set_fact docker_exec_cmd_osd set_fact: docker_exec_cmd_update_osd: "docker exec ceph-mon-{{ hostvars[groups[mon_group_name][0]]['ansible_hostname'] }}" when: - containerized_deployment - name: unset osd flags command: "{{ docker_exec_cmd_update_osd | default('') }} ceph osd unset {{ item }} --cluster {{ cluster }}" with_items: - noout - nodeep-scrub - name: get osd versions command: "{{ docker_exec_cmd_update_osd|default('') }} ceph --cluster {{ cluster }} versions" register: ceph_versions - name: set_fact ceph_versions_osd set_fact: ceph_versions_osd: "{{ (ceph_versions.stdout|from_json).osd }}" # length == 1 means there is a single osds versions entry # thus all the osds are running the same version - name: complete osds upgrade command: "{{ docker_exec_cmd_update_osd|default('') }} ceph --cluster {{ cluster }} osd require-osd-release luminous" delegate_to: "{{ groups[mon_group_name][0] }}" when: - (ceph_versions.get('stdout', '{}')|from_json).get('osd', {}) | length == 1 - ceph_versions_osd | string is search("ceph version 12") - name: run crush rules on osd nodes hosts: "{{ osd_group_name|default('osds') }}" become: True tasks: roles: - ceph-defaults - ceph-facts post_tasks: - import_role: name: ceph-osd tasks_from: crush_rules - name: upgrade ceph mdss cluster, deactivate all rank > 0 hosts: "{{ groups[mon_group_name|default('mons')][0] }}" become: true roles: - role: ceph-defaults - role: ceph-facts when: groups.get(mds_group_name, []) | length > 1 post_tasks: - name: deactivate all mds rank > 0 when: groups.get(mds_group_name, []) | length > 1 block: - name: get mds cluster status command: "{{ docker_exec_cmd }} ceph --cluster {{ cluster }} fs get {{ cephfs }} -f json" changed_when: false register: _cephfs_status - name: get all mds names command: "{{ docker_exec_cmd }} ceph --cluster {{ cluster }} mds dump -f json" changed_when: false register: _all_mds_name - name: set_fact all_mds_name set_fact: all_mds_name: "{{ all_mds_name | default([]) + [(_all_mds_name.stdout | from_json)['info'][item.key]['name'] ] }}" with_dict: "{{ ((_all_mds_name.stdout | default('{}') | from_json).info | default({})) }}" - name: set max_mds 1 on ceph fs command: "{{ docker_exec_cmd }} ceph --cluster {{ cluster }} fs set {{ cephfs }} max_mds 1" changed_when: false register: _max_mds_result - name: deactivate all non-zero ranks shell: | #!/bin/bash {{ docker_exec_cmd }} ceph --cluster {{ cluster }} mds deactivate {{ cephfs }}:{{ item }} {{ docker_exec_cmd }} ceph --cluster {{ cluster }} fs get {{ cephfs }} -f json register: deactivate_status retries: 10 delay: 1 failed_when: false until: item not in (deactivate_status.stdout | from_json).mdsmap.in with_items: "{{ (_cephfs_status.stdout | default('{}') | from_json).mdsmap.in | default([]) | difference([0]) | sort(reverse=True) }}" - name: get name of remaining active mds command: "{{ docker_exec_cmd }} ceph --cluster {{ cluster }} mds dump -f json" changed_when: false register: _mds_active_name - name: set_fact mds_active_name set_fact: mds_active_name: "{{ (_mds_active_name.stdout | from_json)['info'][item.key]['name'] }}" with_dict: "{{ (_mds_active_name.stdout | default('{}') | from_json).info | default({}) }}" - name: set_fact mds_active_host set_fact: mds_active_host: "{{ hostvars[item]['inventory_hostname'] }}" with_items: "{{ groups[mds_group_name] }}" when: hostvars[item]['ansible_hostname'] == mds_active_name - name: create standby_mdss group add_host: name: "{{ item }}" groups: standby_mdss ansible_host: "{{ hostvars[item]['ansible_host'] | default(omit) }}" ansible_port: "{{ hostvars[item]['ansible_port'] | default(omit) }}" with_items: "{{ groups[mds_group_name] | difference(mds_active_host) }}" - name: stop standby ceph mds systemd: name: "ceph-mds@{{ hostvars[item]['ansible_hostname'] }}" state: stopped enabled: no delegate_to: "{{ item }}" with_items: "{{ groups['standby_mdss'] | default([]) }}" when: groups['standby_mdss'] | default([]) | length > 0 # dedicated task for masking systemd unit # somehow, having a single task doesn't work in containerized context - name: mask stop standby ceph mds systemd: name: "ceph-mds@{{ hostvars[item]['ansible_hostname'] }}" masked: yes delegate_to: "{{ item }}" with_items: "{{ groups['standby_mdss'] | default([]) }}" when: groups['standby_mdss'] | default([]) | length > 0 - name: wait until all standbys mds are stopped command: "{{ docker_exec_cmd | default('') }} ceph --cluster {{ cluster }} fs dump -f json" changed_when: false register: wait_standbys_down retries: 300 delay: 5 until: (wait_standbys_down.stdout | from_json).standbys | length == 0 - name: create active_mdss group add_host: name: "{{ mds_active_host if mds_active_host is defined else groups.get(mds_group_name)[0] }}" groups: active_mdss ansible_host: "{{ hostvars[mds_active_host if mds_active_host is defined else groups.get(mds_group_name)[0]]['ansible_host'] | default(omit) }}" ansible_port: "{{ hostvars[mds_active_host if mds_active_host is defined else groups.get(mds_group_name)[0]]['ansible_port'] | default(omit) }}" when: groups.get(mds_group_name, []) | length > 0 - name: upgrade active mds vars: upgrade_ceph_packages: True hosts: active_mdss become: true pre_tasks: - name: prevent restart from the packaging systemd: name: ceph-mds@{{ ansible_hostname }} enabled: no masked: yes when: not containerized_deployment | bool roles: - role: ceph-defaults - role: ceph-facts - role: ceph-handler - role: ceph-common when: not containerized_deployment | bool - role: ceph-docker-common when: containerized_deployment | bool - role: ceph-config - role: ceph-mds post_tasks: - name: restart ceph mds systemd: name: ceph-mds@{{ ansible_hostname }} state: restarted enabled: yes masked: no daemon_reload: yes - name: upgrade standbys ceph mdss cluster vars: upgrade_ceph_packages: True hosts: standby_mdss become: True pre_tasks: - name: prevent restarts from the packaging systemd: name: ceph-mds@{{ ansible_hostname }} enabled: no masked: yes when: not containerized_deployment | bool roles: - role: ceph-defaults - role: ceph-facts - role: ceph-handler - role: ceph-common when: not containerized_deployment | bool - role: ceph-docker-common when: containerized_deployment | bool - role: ceph-config - role: ceph-mds post_tasks: - name: restart ceph mds systemd: name: ceph-mds@{{ ansible_hostname }} state: restarted enabled: yes daemon_reload: yes - name: set max_mds command: "{{ hostvars[groups[mon_group_name][0]]['docker_exec_cmd'] | default('') }} ceph --cluster {{ cluster }} fs set {{ cephfs }} max_mds {{ mds_max_mds }}" changed_when: false delegate_to: "{{ groups[mon_group_name][0] }}" when: inventory_hostname == groups['standby_mdss'] | last - name: upgrade ceph rgws cluster vars: upgrade_ceph_packages: True hosts: - "{{ rgw_group_name|default('rgws') }}" serial: 1 become: True pre_tasks: - name: stop ceph rgw systemd: name: ceph-radosgw@rgw.{{ ansible_hostname }} state: stopped enabled: yes when: - not containerized_deployment roles: - ceph-defaults - ceph-facts - ceph-handler - { role: ceph-common, when: not containerized_deployment } - { role: ceph-docker-common, when: containerized_deployment } - ceph-config - ceph-rgw post_tasks: - name: start ceph rgw systemd: name: ceph-radosgw@rgw.{{ ansible_hostname }} state: started enabled: yes when: - not containerized_deployment - name: restart containerized ceph rgw systemd: name: ceph-radosgw@rgw.{{ ansible_hostname }} state: restarted enabled: yes daemon_reload: yes when: - containerized_deployment - name: upgrade ceph rbd mirror node vars: upgrade_ceph_packages: True hosts: - "{{ rbd_mirror_group_name|default('rbdmirrors') }}" serial: 1 become: True pre_tasks: # NOTE(leseb): these tasks have a 'failed_when: false' # in case we run before luminous or after - name: stop ceph rbd mirror before luminous systemd: name: "ceph-rbd-mirror@{{ ceph_rbd_mirror_local_user }}" state: stopped enabled: no failed_when: false - name: stop ceph rbd mirror for and after luminous systemd: name: "ceph-rbd-mirror@rbd-mirror.{{ ansible_hostname }}" state: stopped enabled: yes failed_when: false roles: - ceph-defaults - ceph-facts - ceph-handler - { role: ceph-common, when: not containerized_deployment } - { role: ceph-docker-common, when: containerized_deployment } - ceph-config - ceph-rbd-mirror post_tasks: - name: start ceph rbd mirror systemd: name: "ceph-rbd-mirror@rbd-mirror.{{ ansible_hostname }}" state: started enabled: yes when: - not containerized_deployment - name: restart containerized ceph rbd mirror systemd: name: ceph-rbd-mirror@rbd-mirror.{{ ansible_hostname }} state: restarted enabled: yes daemon_reload: yes when: - containerized_deployment - name: upgrade ceph nfs node vars: upgrade_ceph_packages: True hosts: - "{{ nfs_group_name|default('nfss') }}" serial: 1 become: True pre_tasks: # failed_when: false is here so that if we upgrade # from a version of ceph that does not have nfs-ganesha # then this task will not fail - name: stop ceph nfs systemd: name: nfs-ganesha state: stopped enabled: yes failed_when: false when: - not containerized_deployment roles: - ceph-defaults - ceph-facts - ceph-handler - { role: ceph-common, when: not containerized_deployment } - { role: ceph-docker-common, when: containerized_deployment } - ceph-config - ceph-nfs post_tasks: - name: start nfs gateway systemd: name: nfs-ganesha state: started enabled: yes when: - not containerized_deployment - ceph_nfs_enable_service - name: systemd restart nfs container systemd: name: ceph-nfs@{{ ceph_nfs_service_suffix | default(ansible_hostname) }} state: restarted enabled: yes daemon_reload: yes when: - ceph_nfs_enable_service - containerized_deployment - name: upgrade ceph iscsi gateway node vars: upgrade_ceph_packages: True hosts: - "{{ iscsi_gw_group_name|default('iscsigws') }}" - iscsi-gws # for backward compatibility only! serial: 1 become: True pre_tasks: # failed_when: false is here so that if we upgrade # from a version of ceph that does not have iscsi gws # then this task will not fail - name: stop ceph iscsi services systemd: name: '{{ item }}' state: stopped enabled: yes failed_when: false with_items: - rbd-target-api - rbd-target-gw - tcmu-runner when: not containerized_deployment roles: - ceph-defaults - ceph-facts - ceph-handler - { role: ceph-common, when: not containerized_deployment } - { role: ceph-docker-common, when: containerized_deployment } - ceph-config - ceph-iscsi-gw post_tasks: - name: start ceph iscsi services systemd: name: '{{ item }}' state: started enabled: yes with_items: - tcmu-runner - rbd-target-api - rbd-target-gw when: not containerized_deployment - name: upgrade ceph client node vars: upgrade_ceph_packages: True hosts: - "{{ client_group_name|default('clients') }}" serial: "{{ client_update_batch | default(20) }}" become: True roles: - ceph-defaults - ceph-facts - ceph-handler - { role: ceph-common, when: not containerized_deployment } - { role: ceph-docker-common, when: containerized_deployment } - ceph-config - ceph-client - name: show ceph status hosts: - "{{ mon_group_name|default('mons') }}" become: True roles: - ceph-defaults tasks: - name: set_fact docker_exec_cmd_status set_fact: docker_exec_cmd_status: "docker exec ceph-mon-{{ hostvars[groups[mon_group_name][0]]['ansible_hostname'] }}" when: - containerized_deployment - name: show ceph status command: "{{ docker_exec_cmd_status|default('') }} ceph --cluster {{ cluster }} -s" delegate_to: "{{ groups[mon_group_name][0] }}"