--- # This playbook does a rolling update for all the Ceph services # # The value of 'serial:' adjusts the number of servers to be updated simultaneously. # We recommend a value of 1, which means hosts of a group (e.g: monitor) will be # upgraded one by one. It is really crucial for the update process to happen # in a serialized fashion. DO NOT CHANGE THIS VALUE. # # # If you run a Ceph community version, you have to change the variable: ceph_stable_release to the new release - name: Confirm whether user really meant to upgrade the cluster hosts: localhost tags: always become: false gather_facts: false vars: mgr_group_name: mgrs vars_prompt: - name: ireallymeanit # noqa: name[casing] prompt: Are you sure you want to upgrade the cluster? default: 'no' private: false tasks: - name: Exit playbook, if user did not mean to upgrade cluster ansible.builtin.fail: msg: > "Exiting rolling_update.yml playbook, cluster was NOT upgraded. To upgrade the cluster, either say 'yes' on the prompt or use `-e ireallymeanit=yes` on the command line when invoking the playbook" when: ireallymeanit != 'yes' - name: Import_role ceph-defaults ansible.builtin.import_role: name: ceph-defaults - name: Gather facts and check the init system hosts: - "{{ mon_group_name|default('mons') }}" - "{{ osd_group_name|default('osds') }}" - "{{ mds_group_name|default('mdss') }}" - "{{ rgw_group_name|default('rgws') }}" - "{{ mgr_group_name|default('mgrs') }}" - "{{ rbdmirror_group_name|default('rbdmirrors') }}" - "{{ nfs_group_name|default('nfss') }}" - "{{ client_group_name|default('clients') }}" - "{{ monitoring_group_name|default('monitoring') }}" tags: always any_errors_fatal: true become: true gather_facts: false vars: delegate_facts_host: true tasks: - name: Gather facts on all Ceph hosts for following reference ansible.builtin.debug: msg: "gather facts on all Ceph hosts for following reference" - name: Import ceph-defaults role ansible.builtin.import_role: name: ceph-defaults - name: Gather facts ansible.builtin.setup: gather_subset: - 'all' - '!facter' - '!ohai' when: not delegate_facts_host | bool or inventory_hostname in groups.get(client_group_name, []) - name: Gather and delegate facts ansible.builtin.setup: gather_subset: - 'all' - '!facter' - '!ohai' delegate_to: "{{ item }}" delegate_facts: true with_items: "{{ groups['all'] | difference(groups.get('clients', [])) }}" run_once: true when: delegate_facts_host | bool - name: Set_fact rolling_update ansible.builtin.set_fact: rolling_update: true - name: Import ceph-facts role ansible.builtin.import_role: name: ceph-facts - name: Import ceph-infra role ansible.builtin.import_role: name: ceph-infra tags: ceph_infra - name: Import ceph-validate role ansible.builtin.import_role: name: ceph-validate - name: Import ceph-container-engine role ansible.builtin.import_role: name: ceph-container-engine when: - (group_names != ['clients']) or (inventory_hostname == groups.get('clients', [''])|first) - (containerized_deployment | bool) or (dashboard_enabled | bool) - name: Import ceph-container-common role ansible.builtin.import_role: name: ceph-container-common tasks_from: registry when: - (group_names != ['clients']) or (inventory_hostname == groups.get('clients', [''])|first) - (containerized_deployment | bool) or (dashboard_enabled | bool) - ceph_docker_registry_auth | bool - name: Check ceph release in container image when: - groups.get(mon_group_name, []) | length > 0 - containerized_deployment | bool delegate_to: "{{ groups[mon_group_name][0] }}" run_once: true block: - name: Get the ceph release being deployed ansible.builtin.command: "{{ ceph_cmd }} --cluster {{ cluster }} --version" register: ceph_version changed_when: false - name: Check ceph release being deployed ansible.builtin.fail: msg: "This version of ceph-ansible is intended for upgrading to Ceph Reef only." when: "'reef' not in ceph_version.stdout.split()" - name: Ensure cluster config is applied hosts: mons[0] become: true gather_facts: false any_errors_fatal: true tasks: - name: Import default role ansible.builtin.import_role: name: ceph-defaults - name: Import ceph-facts role ansible.builtin.import_role: name: ceph-facts tasks_from: container_binary.yml - name: Set cluster configs ceph_config: action: set who: "{{ item.0.key }}" option: "{{ item.1.key }}" value: "{{ item.1.value }}" when: item.1.value != omit loop: "{{ ceph_cluster_conf | dict2dict }}" environment: CEPH_CONTAINER_IMAGE: "{{ ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment | bool else None }}" CEPH_CONTAINER_BINARY: "{{ container_binary }}" - name: Upgrade ceph mon cluster tags: mons vars: health_mon_check_retries: 5 health_mon_check_delay: 15 upgrade_ceph_packages: true hosts: "{{ mon_group_name|default('mons') }}" serial: 1 become: true gather_facts: false tasks: - name: Import ceph-defaults role ansible.builtin.import_role: name: ceph-defaults - name: Upgrade ceph mon cluster block: - name: Remove ceph aliases ansible.builtin.file: path: /etc/profile.d/ceph-aliases.sh state: absent when: containerized_deployment | bool - name: Set mon_host_count ansible.builtin.set_fact: mon_host_count: "{{ groups[mon_group_name] | length }}" - name: Fail when less than three monitors ansible.builtin.fail: msg: "Upgrade of cluster with less than three monitors is not supported." when: mon_host_count | int < 3 - name: Select a running monitor ansible.builtin.set_fact: mon_host: "{{ groups[mon_group_name] | difference([inventory_hostname]) | last }}" - name: Import ceph-facts role ansible.builtin.import_role: name: ceph-facts - name: Check Ceph monitors quorum status when: inventory_hostname == groups[mon_group_name] | first block: - name: Get ceph cluster status ansible.builtin.command: "{{ container_exec_cmd }} ceph --cluster {{ cluster }} health -f json" register: check_cluster_health delegate_to: "{{ mon_host }}" changed_when: false - name: Display health status before failing when: (check_cluster_health.stdout | from_json).status == 'HEALTH_ERR' block: - name: Display ceph health detail ansible.builtin.command: "{{ container_exec_cmd }} ceph --cluster {{ cluster }} health detail" delegate_to: "{{ mon_host }}" changed_when: false - name: Fail if cluster isn't in an acceptable state ansible.builtin.fail: msg: "cluster is not in an acceptable state!" - name: Get the ceph quorum status ansible.builtin.command: "{{ container_exec_cmd }} ceph --cluster {{ cluster }} quorum_status -f json" register: check_quorum_status delegate_to: "{{ mon_host }}" changed_when: false - name: Fail if the cluster quorum isn't in an acceptable state ansible.builtin.fail: msg: "cluster quorum is not in an acceptable state!" when: (check_quorum_status.stdout | from_json).quorum | length != groups[mon_group_name] | length - name: Ensure /var/lib/ceph/bootstrap-rbd-mirror is present ansible.builtin.file: path: /var/lib/ceph/bootstrap-rbd-mirror owner: "{{ ceph_uid if containerized_deployment | bool else 'ceph' }}" group: "{{ ceph_uid if containerized_deployment | bool else 'ceph' }}" mode: '755' state: directory delegate_to: "{{ item }}" with_items: "{{ groups[mon_group_name] }}" when: - cephx | bool - inventory_hostname == groups[mon_group_name][0] - name: Create potentially missing keys (rbd and rbd-mirror) ceph_key: name: "client.{{ item.0 }}" dest: "/var/lib/ceph/{{ item.0 }}/" caps: mon: "allow profile {{ item.0 }}" cluster: "{{ cluster }}" delegate_to: "{{ item.1 }}" with_nested: - ['bootstrap-rbd', 'bootstrap-rbd-mirror'] - "{{ groups[mon_group_name] }}" # so the key goes on all the nodes environment: CEPH_CONTAINER_IMAGE: "{{ ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment | bool else None }}" CEPH_CONTAINER_BINARY: "{{ container_binary }}" when: - cephx | bool - inventory_hostname == groups[mon_group_name][0] # NOTE: we mask the service so the RPM can't restart it # after the package gets upgraded - name: Stop ceph mon ansible.builtin.systemd: name: ceph-mon@{{ item }} state: stopped enabled: false masked: true with_items: - "{{ ansible_facts['hostname'] }}" - "{{ ansible_facts['fqdn'] }}" # only mask the service for mgr because it must be upgraded # after ALL monitors, even when collocated - name: Mask the mgr service ansible.builtin.systemd: name: ceph-mgr@{{ ansible_facts['hostname'] }} masked: true when: inventory_hostname in groups[mgr_group_name] | default([]) or groups[mgr_group_name] | default([]) | length == 0 - name: Import ceph-handler role ansible.builtin.import_role: name: ceph-handler - name: Import ceph-common role ansible.builtin.import_role: name: ceph-common when: not containerized_deployment | bool - name: Import ceph-container-common role ansible.builtin.import_role: name: ceph-container-common when: containerized_deployment | bool - name: Import ceph-config role ansible.builtin.import_role: name: ceph-config - name: Import ceph-mon role ansible.builtin.import_role: name: ceph-mon - name: Start ceph mgr ansible.builtin.systemd: name: ceph-mgr@{{ ansible_facts['hostname'] }} state: started enabled: true masked: false when: inventory_hostname in groups[mgr_group_name] | default([]) or groups[mgr_group_name] | default([]) | length == 0 - name: Import_role ceph-facts ansible.builtin.import_role: name: ceph-facts tasks_from: set_monitor_address.yml delegate_to: "{{ groups[mon_group_name][0] }}" delegate_facts: true - name: Non container | waiting for the monitor to join the quorum... ansible.builtin.command: ceph --cluster "{{ cluster }}" -m "{{ _monitor_addresses[groups['mons'][0]] }}" quorum_status --format json register: ceph_health_raw until: - ceph_health_raw.rc == 0 - (hostvars[inventory_hostname]['ansible_facts']['hostname'] in (ceph_health_raw.stdout | default('{}') | from_json)["quorum_names"] or hostvars[inventory_hostname]['ansible_facts']['fqdn'] in (ceph_health_raw.stdout | default('{}') | from_json)["quorum_names"]) retries: "{{ health_mon_check_retries }}" delay: "{{ health_mon_check_delay }}" changed_when: false when: not containerized_deployment | bool - name: Container | waiting for the containerized monitor to join the quorum... ansible.builtin.command: > {{ container_binary }} exec ceph-mon-{{ ansible_facts['hostname'] }} ceph --cluster "{{ cluster }}" -m "{{ _monitor_addresses[groups['mons'][0]] }}" quorum_status --format json register: ceph_health_raw until: - ceph_health_raw.rc == 0 - (hostvars[inventory_hostname]['ansible_facts']['hostname'] in (ceph_health_raw.stdout | default('{}') | from_json)["quorum_names"] or hostvars[inventory_hostname]['ansible_facts']['fqdn'] in (ceph_health_raw.stdout | default('{}') | from_json)["quorum_names"]) retries: "{{ health_mon_check_retries }}" delay: "{{ health_mon_check_delay }}" changed_when: false when: containerized_deployment | bool rescue: - name: Import ceph-defaults role ansible.builtin.import_role: name: ceph-defaults - name: Unmask the mon service ansible.builtin.systemd: name: ceph-mon@{{ ansible_facts['hostname'] }} enabled: true masked: false - name: Unmask the mgr service ansible.builtin.systemd: name: ceph-mgr@{{ ansible_facts['hostname'] }} masked: false when: inventory_hostname in groups[mgr_group_name] | default([]) or groups[mgr_group_name] | default([]) | length == 0 - name: Stop the playbook execution ansible.builtin.fail: msg: "There was an error during monitor upgrade. Please, check the previous task results." - name: Reset mon_host hosts: "{{ mon_group_name|default('mons') }}" tags: always become: true gather_facts: false tasks: - name: Import ceph-defaults role ansible.builtin.import_role: name: ceph-defaults - name: Reset mon_host fact ansible.builtin.set_fact: mon_host: "{{ groups[mon_group_name][0] }}" - name: Upgrade ceph mgr nodes when implicitly collocated on monitors vars: health_mon_check_retries: 5 health_mon_check_delay: 15 upgrade_ceph_packages: true hosts: "{{ mon_group_name|default('mons') }}" tags: mgrs serial: 1 become: true gather_facts: false tasks: - name: Upgrade mgrs when no mgr group explicitly defined in inventory when: groups.get(mgr_group_name, []) | length == 0 block: - name: Stop ceph mgr ansible.builtin.systemd: name: ceph-mgr@{{ ansible_facts['hostname'] }} state: stopped masked: true - name: Import ceph-defaults role ansible.builtin.import_role: name: ceph-defaults - name: Import ceph-facts role ansible.builtin.import_role: name: ceph-facts - name: Import ceph-handler role ansible.builtin.import_role: name: ceph-handler - name: Import ceph-common role ansible.builtin.import_role: name: ceph-common when: not containerized_deployment | bool - name: Import ceph-container-common role ansible.builtin.import_role: name: ceph-container-common when: containerized_deployment | bool - name: Import ceph-config role ansible.builtin.import_role: name: ceph-config - name: Import ceph-mgr role ansible.builtin.import_role: name: ceph-mgr - name: Upgrade ceph mgr nodes vars: upgrade_ceph_packages: true ceph_release: "{{ ceph_stable_release }}" hosts: "{{ mgr_group_name|default('mgrs') }}" tags: mgrs serial: 1 become: true gather_facts: false tasks: # The following task has a failed_when: false # to handle the scenario where no mgr existed before the upgrade # or if we run a Ceph cluster before Luminous - name: Stop ceph mgr ansible.builtin.systemd: name: ceph-mgr@{{ ansible_facts['hostname'] }} state: stopped enabled: false masked: false failed_when: false - name: Mask ceph mgr systemd unit ansible.builtin.systemd: name: ceph-mgr@{{ ansible_facts['hostname'] }} masked: true failed_when: false - name: Import ceph-defaults role ansible.builtin.import_role: name: ceph-defaults - name: Import ceph-facts role ansible.builtin.import_role: name: ceph-facts - name: Import ceph-handler role ansible.builtin.import_role: name: ceph-handler - name: Import ceph-common role ansible.builtin.import_role: name: ceph-common when: not containerized_deployment | bool - name: Import ceph-container-common role ansible.builtin.import_role: name: ceph-container-common when: containerized_deployment | bool - name: Import ceph-config role ansible.builtin.import_role: name: ceph-config - name: Import ceph-mgr role ansible.builtin.import_role: name: ceph-mgr - name: Set osd flags hosts: "{{ osd_group_name | default('osds') }}" tags: osds become: true gather_facts: false tasks: - name: Import ceph-defaults ansible.builtin.import_role: name: ceph-defaults - name: Import ceph-facts role ansible.builtin.import_role: name: ceph-facts tasks_from: container_binary.yml - name: Set osd flags, disable autoscaler and balancer run_once: true delegate_to: "{{ groups[mon_group_name][0] }}" block: - name: Get pool list ansible.builtin.command: "{{ ceph_cmd }} --cluster {{ cluster }} osd pool ls detail -f json" register: pool_list changed_when: false check_mode: false - name: Get balancer module status ansible.builtin.command: "{{ ceph_cmd }} --cluster {{ cluster }} balancer status -f json" register: balancer_status_update run_once: true changed_when: false check_mode: false - name: Set_fact pools_pgautoscaler_mode ansible.builtin.set_fact: pools_pgautoscaler_mode: "{{ pools_pgautoscaler_mode | default([]) | union([{'name': item.pool_name, 'mode': item.pg_autoscale_mode}]) }}" with_items: "{{ pool_list.stdout | default('{}') | from_json }}" - name: Disable balancer ansible.builtin.command: "{{ ceph_cmd }} --cluster {{ cluster }} balancer off" changed_when: false when: (balancer_status_update.stdout | from_json)['active'] | bool - name: Disable pg autoscale on pools ceph_pool: name: "{{ item.name }}" cluster: "{{ cluster }}" pg_autoscale_mode: false with_items: "{{ pools_pgautoscaler_mode }}" when: - pools_pgautoscaler_mode is defined - item.mode == 'on' environment: CEPH_CONTAINER_IMAGE: "{{ ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment | bool else None }}" CEPH_CONTAINER_BINARY: "{{ container_binary }}" - name: Set osd flags ceph_osd_flag: name: "{{ item }}" cluster: "{{ cluster }}" environment: CEPH_CONTAINER_IMAGE: "{{ ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment | bool else None }}" CEPH_CONTAINER_BINARY: "{{ container_binary }}" with_items: - noout - nodeep-scrub - name: Upgrade ceph osds cluster vars: health_osd_check_retries: 600 health_osd_check_delay: 2 upgrade_ceph_packages: true hosts: osds tags: osds serial: 1 become: true gather_facts: false tasks: - name: Import ceph-defaults role ansible.builtin.import_role: name: ceph-defaults - name: Import ceph-facts role ansible.builtin.import_role: name: ceph-facts - name: Get osd numbers - non container ansible.builtin.shell: if [ -d /var/lib/ceph/osd ] ; then ls /var/lib/ceph/osd | sed 's/.*-//' ; fi # noqa: risky-shell-pipe register: osd_ids changed_when: false - name: Set num_osds ansible.builtin.set_fact: num_osds: "{{ osd_ids.stdout_lines | default([]) | length }}" - name: Set_fact container_exec_cmd_osd ansible.builtin.set_fact: container_exec_cmd_update_osd: "{{ container_binary }} exec ceph-mon-{{ hostvars[groups[mon_group_name][0]]['ansible_facts']['hostname'] }}" when: containerized_deployment | bool - name: Stop ceph osd ansible.builtin.systemd: name: ceph-osd@{{ item }} state: stopped enabled: false masked: true with_items: "{{ osd_ids.stdout_lines }}" - name: Import ceph-handler role ansible.builtin.import_role: name: ceph-handler - name: Import ceph-common role ansible.builtin.import_role: name: ceph-common when: not containerized_deployment | bool - name: Import ceph-container-common role ansible.builtin.import_role: name: ceph-container-common when: containerized_deployment | bool - name: Import ceph-config role ansible.builtin.import_role: name: ceph-config - name: Import ceph-osd role ansible.builtin.import_role: name: ceph-osd - name: Scan ceph-disk osds with ceph-volume if deploying nautilus ceph_volume_simple_scan: cluster: "{{ cluster }}" force: true environment: CEPH_VOLUME_DEBUG: "{{ ceph_volume_debug }}" when: not containerized_deployment | bool - name: Activate scanned ceph-disk osds and migrate to ceph-volume if deploying nautilus ceph_volume_simple_activate: cluster: "{{ cluster }}" osd_all: true environment: CEPH_VOLUME_DEBUG: "{{ ceph_volume_debug }}" when: not containerized_deployment | bool - name: Waiting for clean pgs... ansible.builtin.command: "{{ container_exec_cmd_update_osd | default('') }} ceph --cluster {{ cluster }} pg stat --format json" register: ceph_health_post until: > (((ceph_health_post.stdout | from_json).pg_summary.num_pg_by_state | length) > 0) and (((ceph_health_post.stdout | from_json).pg_summary.num_pg_by_state | selectattr('name', 'search', '^active\\+clean') | map(attribute='num') | list | sum) == (ceph_health_post.stdout | from_json).pg_summary.num_pgs) delegate_to: "{{ groups[mon_group_name][0] }}" changed_when: false retries: "{{ health_osd_check_retries }}" delay: "{{ health_osd_check_delay }}" - name: Complete osd upgrade hosts: "{{ osd_group_name | default('osds') }}" tags: osds become: true gather_facts: false tasks: - name: Import ceph-defaults role ansible.builtin.import_role: name: ceph-defaults - name: Import ceph-facts role ansible.builtin.import_role: name: ceph-facts tasks_from: container_binary.yml - name: Unset osd flags, re-enable pg autoscaler and balancer run_once: true delegate_to: "{{ groups[mon_group_name][0] }}" block: - name: Re-enable pg autoscale on pools ceph_pool: name: "{{ item.name }}" cluster: "{{ cluster }}" pg_autoscale_mode: true with_items: "{{ pools_pgautoscaler_mode }}" when: - pools_pgautoscaler_mode is defined - item.mode == 'on' environment: CEPH_CONTAINER_IMAGE: "{{ ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment | bool else None }}" CEPH_CONTAINER_BINARY: "{{ container_binary }}" - name: Unset osd flags ceph_osd_flag: name: "{{ item }}" cluster: "{{ cluster }}" state: absent environment: CEPH_CONTAINER_IMAGE: "{{ ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment | bool else None }}" CEPH_CONTAINER_BINARY: "{{ container_binary }}" with_items: - noout - nodeep-scrub - name: Re-enable balancer ansible.builtin.command: "{{ ceph_cmd }} --cluster {{ cluster }} balancer on" changed_when: false when: (balancer_status_update.stdout | from_json)['active'] | bool - name: Upgrade ceph mdss cluster, deactivate all rank > 0 hosts: "{{ mon_group_name | default('mons') }}[0]" tags: mdss become: true gather_facts: false tasks: - name: Deactivate all mds rank > 0 when: groups.get(mds_group_name, []) | length > 0 block: - name: Import ceph-defaults role ansible.builtin.import_role: name: ceph-defaults - name: Import ceph-facts role ansible.builtin.import_role: name: ceph-facts - name: Deactivate all mds rank > 0 if any when: groups.get(mds_group_name, []) | length > 1 block: - name: Set max_mds 1 on ceph fs ceph_fs: name: "{{ cephfs }}" cluster: "{{ cluster }}" data: "{{ cephfs_data_pool.name }}" metadata: "{{ cephfs_metadata_pool.name }}" max_mds: 1 environment: CEPH_CONTAINER_IMAGE: "{{ ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment | bool else None }}" CEPH_CONTAINER_BINARY: "{{ container_binary }}" - name: Wait until only rank 0 is up ceph_fs: name: "{{ cephfs }}" cluster: "{{ cluster }}" state: info register: wait_rank_zero retries: 720 delay: 5 until: (wait_rank_zero.stdout | from_json).mdsmap.in | length == 1 and (wait_rank_zero.stdout | from_json).mdsmap.in[0] == 0 environment: CEPH_CONTAINER_IMAGE: "{{ ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment | bool else None }}" CEPH_CONTAINER_BINARY: "{{ container_binary }}" - name: Get name of remaining active mds ansible.builtin.command: "{{ container_exec_cmd | default('') }} ceph --cluster {{ cluster }} fs dump -f json" changed_when: false register: _mds_active_name - name: Set_fact mds_active_name ansible.builtin.set_fact: mds_active_name: "{{ (_mds_active_name.stdout | from_json)['filesystems'][0]['mdsmap']['info'][item.key]['name'] }}" with_dict: "{{ (_mds_active_name.stdout | default('{}') | from_json).filesystems[0]['mdsmap']['info'] | default({}) }}" - name: Set_fact mds_active_host ansible.builtin.set_fact: mds_active_host: "{{ [hostvars[item]['inventory_hostname']] }}" with_items: "{{ groups[mds_group_name] }}" when: hostvars[item]['ansible_facts']['hostname'] == mds_active_name - name: Create standby_mdss group ansible.builtin.add_host: name: "{{ item }}" groups: standby_mdss ansible_host: "{{ hostvars[item]['ansible_host'] | default(omit) }}" ansible_port: "{{ hostvars[item]['ansible_port'] | default(omit) }}" with_items: "{{ groups[mds_group_name] | difference(mds_active_host) }}" - name: Stop standby ceph mds ansible.builtin.systemd: name: "ceph-mds@{{ hostvars[item]['ansible_facts']['hostname'] }}" state: stopped enabled: false delegate_to: "{{ item }}" with_items: "{{ groups['standby_mdss'] }}" when: groups['standby_mdss'] | default([]) | length > 0 # dedicated task for masking systemd unit # somehow, having a single task doesn't work in containerized context - name: Mask systemd units for standby ceph mds ansible.builtin.systemd: name: "ceph-mds@{{ hostvars[item]['ansible_facts']['hostname'] }}" masked: true delegate_to: "{{ item }}" with_items: "{{ groups['standby_mdss'] }}" when: groups['standby_mdss'] | default([]) | length > 0 - name: Wait until all standbys mds are stopped ansible.builtin.command: "{{ container_exec_cmd | default('') }} ceph --cluster {{ cluster }} fs dump -f json" changed_when: false register: wait_standbys_down retries: 300 delay: 5 until: (wait_standbys_down.stdout | from_json).standbys | length == 0 - name: Create active_mdss group ansible.builtin.add_host: name: "{{ mds_active_host[0] if mds_active_host is defined else groups.get(mds_group_name)[0] }}" groups: active_mdss ansible_host: "{{ hostvars[mds_active_host[0] if mds_active_host is defined else groups.get(mds_group_name)[0]]['ansible_host'] | default(omit) }}" ansible_port: "{{ hostvars[mds_active_host[0] if mds_active_host is defined else groups.get(mds_group_name)[0]]['ansible_port'] | default(omit) }}" - name: Upgrade active mds vars: upgrade_ceph_packages: true hosts: active_mdss tags: mdss become: true gather_facts: false tasks: - name: Import ceph-defaults role ansible.builtin.import_role: name: ceph-defaults - name: Import ceph-facts role ansible.builtin.import_role: name: ceph-facts - name: Prevent restart from the packaging ansible.builtin.systemd: name: ceph-mds@{{ ansible_facts['hostname'] }} enabled: false masked: true when: not containerized_deployment | bool - name: Import ceph-handler role ansible.builtin.import_role: name: ceph-handler - name: Import ceph-common role ansible.builtin.import_role: name: ceph-common when: not containerized_deployment | bool - name: Import ceph-container-common role ansible.builtin.import_role: name: ceph-container-common when: containerized_deployment | bool - name: Import ceph-config role ansible.builtin.import_role: name: ceph-config - name: Import ceph-mds role ansible.builtin.import_role: name: ceph-mds - name: Restart ceph mds ansible.builtin.systemd: name: ceph-mds@{{ ansible_facts['hostname'] }} state: restarted enabled: true masked: false when: not containerized_deployment | bool - name: Restart active mds ansible.builtin.command: "{{ container_binary }} stop ceph-mds-{{ ansible_facts['hostname'] }}" changed_when: false when: containerized_deployment | bool - name: Upgrade standbys ceph mdss cluster vars: upgrade_ceph_packages: true hosts: standby_mdss tags: mdss become: true gather_facts: false tasks: - name: Import ceph-defaults role ansible.builtin.import_role: name: ceph-defaults - name: Import ceph-facts role ansible.builtin.import_role: name: ceph-facts - name: Prevent restarts from the packaging ansible.builtin.systemd: name: ceph-mds@{{ ansible_facts['hostname'] }} enabled: false masked: true when: not containerized_deployment | bool - name: Import ceph-handler role ansible.builtin.import_role: name: ceph-handler - name: Import ceph-common role ansible.builtin.import_role: name: ceph-common when: not containerized_deployment | bool - name: Import ceph-container-common role ansible.builtin.import_role: name: ceph-container-common when: containerized_deployment | bool - name: Import ceph-config role ansible.builtin.import_role: name: ceph-config - name: Import ceph-mds role ansible.builtin.import_role: name: ceph-mds - name: Set max_mds ceph_fs: name: "{{ cephfs }}" cluster: "{{ cluster }}" max_mds: "{{ mds_max_mds }}" data: "{{ cephfs_data_pool.name }}" metadata: "{{ cephfs_metadata_pool.name }}" delegate_to: "{{ groups[mon_group_name][0] }}" environment: CEPH_CONTAINER_IMAGE: "{{ ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment | bool else None }}" CEPH_CONTAINER_BINARY: "{{ container_binary }}" when: inventory_hostname == groups['standby_mdss'] | last - name: Upgrade ceph rgws cluster vars: upgrade_ceph_packages: true hosts: "{{ rgw_group_name|default('rgws') }}" tags: rgws serial: 1 become: true gather_facts: false tasks: - name: Import ceph-defaults role ansible.builtin.import_role: name: ceph-defaults - name: Import ceph-facts role ansible.builtin.import_role: name: ceph-facts - name: Stop ceph rgw when upgrading from stable-3.2 # noqa: ignore-errors ansible.builtin.systemd: name: ceph-radosgw@rgw.{{ rgw_zone }}.{{ ansible_facts['hostname'] }} state: stopped enabled: false masked: true ignore_errors: true - name: Stop ceph rgw ansible.builtin.systemd: name: ceph-radosgw@rgw.{{ rgw_zone }}.{{ ansible_facts['hostname'] }}.{{ item.instance_name }} state: stopped enabled: false masked: true with_items: "{{ rgw_instances }}" - name: Import ceph-handler role ansible.builtin.import_role: name: ceph-handler - name: Import ceph-common role ansible.builtin.import_role: name: ceph-common when: not containerized_deployment | bool - name: Import ceph-container-common role ansible.builtin.import_role: name: ceph-container-common when: containerized_deployment | bool - name: Import ceph-config role ansible.builtin.import_role: name: ceph-config - name: Import ceph-rgw role ansible.builtin.import_role: name: ceph-rgw - name: Upgrade ceph rbd mirror node vars: upgrade_ceph_packages: true hosts: "{{ rbdmirror_group_name|default('rbdmirrors') }}" tags: rbdmirrors serial: 1 become: true gather_facts: false tasks: - name: Check for ceph rbd mirror services ansible.builtin.command: systemctl show --no-pager --property=Id --state=enabled ceph-rbd-mirror@* # noqa command-instead-of-module changed_when: false register: rbdmirror_services - name: Stop ceph rbd mirror ansible.builtin.service: name: "{{ item.split('=')[1] }}" state: stopped enabled: false masked: true loop: "{{ rbdmirror_services.stdout_lines }}" - name: Import ceph-defaults role ansible.builtin.import_role: name: ceph-defaults - name: Import ceph-facts role ansible.builtin.import_role: name: ceph-facts - name: Import ceph-handler role ansible.builtin.import_role: name: ceph-handler - name: Import ceph-common role ansible.builtin.import_role: name: ceph-common when: not containerized_deployment | bool - name: Import ceph-container-common role ansible.builtin.import_role: name: ceph-container-common when: containerized_deployment | bool - name: Import ceph-config role ansible.builtin.import_role: name: ceph-config - name: Import ceph-rbd-mirror role ansible.builtin.import_role: name: ceph-rbd-mirror - name: Upgrade ceph nfs node vars: upgrade_ceph_packages: true hosts: "{{ nfs_group_name|default('nfss') }}" tags: nfss serial: 1 become: true gather_facts: false tasks: # failed_when: false is here so that if we upgrade # from a version of ceph that does not have nfs-ganesha # then this task will not fail - name: Stop ceph nfs ansible.builtin.systemd: name: nfs-ganesha state: stopped enabled: false masked: true failed_when: false when: not containerized_deployment | bool - name: Systemd stop nfs container ansible.builtin.systemd: name: ceph-nfs@{{ ceph_nfs_service_suffix | default(ansible_facts['hostname']) }} state: stopped enabled: false masked: true failed_when: false when: - ceph_nfs_enable_service | bool - containerized_deployment | bool - name: Import ceph-defaults role ansible.builtin.import_role: name: ceph-defaults - name: Import ceph-facts role ansible.builtin.import_role: name: ceph-facts - name: Import ceph-handler role ansible.builtin.import_role: name: ceph-handler - name: Import ceph-common role ansible.builtin.import_role: name: ceph-common when: not containerized_deployment | bool - name: Import ceph-container-common role ansible.builtin.import_role: name: ceph-container-common when: containerized_deployment | bool - name: Import ceph-config role ansible.builtin.import_role: name: ceph-config - name: Import ceph-nfs role ansible.builtin.import_role: name: ceph-nfs - name: Upgrade ceph client node vars: upgrade_ceph_packages: true hosts: "{{ client_group_name|default('clients') }}" tags: clients serial: "{{ client_update_batch | default(20) }}" become: true gather_facts: false tasks: - name: Import ceph-defaults role ansible.builtin.import_role: name: ceph-defaults - name: Import ceph-facts role ansible.builtin.import_role: name: ceph-facts tasks_from: container_binary.yml when: containerized_deployment | bool - name: Import ceph-handler role ansible.builtin.import_role: name: ceph-handler - name: Import ceph-common role ansible.builtin.import_role: name: ceph-common when: not containerized_deployment | bool - name: Import ceph-container-common role ansible.builtin.import_role: name: ceph-container-common when: - (group_names != ['clients']) or (inventory_hostname == groups.get('clients', [''])|first) - containerized_deployment | bool - name: Upgrade ceph-crash daemons hosts: - "{{ mon_group_name | default('mons') }}" - "{{ osd_group_name | default('osds') }}" - "{{ mds_group_name | default('mdss') }}" - "{{ rgw_group_name | default('rgws') }}" - "{{ rbdmirror_group_name | default('rbdmirrors') }}" - "{{ mgr_group_name | default('mgrs') }}" tags: - post_upgrade - crash gather_facts: false become: true tasks: - name: Stop the ceph-crash service ansible.builtin.systemd: name: "{{ 'ceph-crash@' + ansible_facts['hostname'] if containerized_deployment | bool else 'ceph-crash.service' }}" state: stopped # it needs to be done in a separate task otherwise the stop just before doesn't work. - name: Mask and disable the ceph-crash service ansible.builtin.systemd: name: "{{ 'ceph-crash@' + ansible_facts['hostname'] if containerized_deployment | bool else 'ceph-crash.service' }}" enabled: false masked: true - name: Import ceph-defaults role ansible.builtin.import_role: name: ceph-defaults - name: Import ceph-facts role ansible.builtin.import_role: name: ceph-facts tasks_from: container_binary.yml - name: Import ceph-handler role ansible.builtin.import_role: name: ceph-handler - name: Import ceph-crash role ansible.builtin.import_role: name: ceph-crash - name: Upgrade ceph-exporter daemons hosts: - "{{ mon_group_name | default('mons') }}" - "{{ osd_group_name | default('osds') }}" - "{{ mds_group_name | default('mdss') }}" - "{{ rgw_group_name | default('rgws') }}" - "{{ rbdmirror_group_name | default('rbdmirrors') }}" - "{{ mgr_group_name | default('mgrs') }}" tags: - post_upgrade - ceph-exporter gather_facts: false become: true tasks: - name: Exit ceph-exporter upgrade if non containerized deployment ansible.builtin.meta: end_play when: not containerized_deployment | bool - name: Stop the ceph-exporter service # noqa: ignore-errors ansible.builtin.systemd: name: "{{ 'ceph-exporter@' + ansible_facts['hostname'] if containerized_deployment | bool else 'ceph-exporter.service' }}" state: stopped ignore_errors: true # it needs to be done in a separate task otherwise the stop just before doesn't work. - name: Mask and disable the ceph-exporter service # noqa: ignore-errors ansible.builtin.systemd: name: "{{ 'ceph-exporter@' + ansible_facts['hostname'] if containerized_deployment | bool else 'ceph-exporter.service' }}" enabled: false masked: true ignore_errors: true - name: Import ceph-defaults role ansible.builtin.import_role: name: ceph-defaults - name: Import ceph-facts role ansible.builtin.import_role: name: ceph-facts tasks_from: container_binary.yml - name: Import ceph-handler role ansible.builtin.import_role: name: ceph-handler - name: Import ceph-exporter role ansible.builtin.import_role: name: ceph-exporter - name: Complete upgrade hosts: "{{ mon_group_name | default('mons') }}" tags: post_upgrade become: true gather_facts: false tasks: - name: Import ceph-defaults role ansible.builtin.import_role: name: ceph-defaults - name: Import ceph-facts role ansible.builtin.import_role: name: ceph-facts tasks_from: container_binary.yml - name: Container | disallow pre-reef OSDs and enable all new reef-only functionality ansible.builtin.command: "{{ container_binary }} exec ceph-mon-{{ hostvars[groups[mon_group_name][0]]['ansible_facts']['hostname'] }} ceph --cluster {{ cluster }} osd require-osd-release reef" delegate_to: "{{ groups[mon_group_name][0] }}" run_once: true changed_when: false when: - containerized_deployment | bool - groups.get(mon_group_name, []) | length > 0 - name: Non container | disallow pre-reef OSDs and enable all new reef-only functionality ansible.builtin.command: "ceph --cluster {{ cluster }} osd require-osd-release reef" delegate_to: "{{ groups[mon_group_name][0] }}" run_once: true changed_when: false when: - not containerized_deployment | bool - groups.get(mon_group_name, []) | length > 0 - name: Upgrade node-exporter hosts: - "{{ mon_group_name|default('mons') }}" - "{{ osd_group_name|default('osds') }}" - "{{ mds_group_name|default('mdss') }}" - "{{ rgw_group_name|default('rgws') }}" - "{{ mgr_group_name|default('mgrs') }}" - "{{ rbdmirror_group_name|default('rbdmirrors') }}" - "{{ nfs_group_name|default('nfss') }}" - "{{ monitoring_group_name|default('monitoring') }}" tags: monitoring gather_facts: false become: true tasks: - name: Import ceph-defaults role ansible.builtin.import_role: name: ceph-defaults - name: With dashboard configuration when: dashboard_enabled | bool block: - name: Stop node-exporter ansible.builtin.service: name: node_exporter state: stopped failed_when: false - name: Import ceph-facts role ansible.builtin.import_role: name: ceph-facts - name: Import ceph-container-engine role ansible.builtin.import_role: name: ceph-container-engine - name: Import ceph-container-common role ansible.builtin.import_role: name: ceph-container-common tasks_from: registry when: - not containerized_deployment | bool - ceph_docker_registry_auth | bool - name: Import ceph-node-exporter role ansible.builtin.import_role: name: ceph-node-exporter - name: Upgrade monitoring node hosts: "{{ monitoring_group_name|default('monitoring') }}" tags: monitoring gather_facts: false become: true tasks: - name: Import ceph-defaults role ansible.builtin.import_role: name: ceph-defaults - name: With dashboard configuration when: dashboard_enabled | bool block: - name: Stop monitoring services ansible.builtin.service: name: '{{ item }}' state: stopped failed_when: false with_items: - alertmanager - prometheus - grafana-server # - name: Import ceph-facts role # ansible.builtin.import_role: # name: ceph-facts - name: Import ceph-facts role ansible.builtin.import_role: name: ceph-facts tasks_from: grafana - name: Import ceph-prometheus role ansible.builtin.import_role: name: ceph-prometheus - name: Import ceph-grafana role ansible.builtin.import_role: name: ceph-grafana - name: Upgrade ceph dashboard hosts: "{{ groups[mgr_group_name|default('mgrs')] | default(groups[mon_group_name|default('mons')]) | default(omit) }}" tags: monitoring gather_facts: false become: true tasks: - name: Import ceph-defaults role ansible.builtin.import_role: name: ceph-defaults - name: With dashboard configuration when: dashboard_enabled | bool block: - name: Import ceph-facts role ansible.builtin.import_role: name: ceph-facts - name: Import ceph-facts role ansible.builtin.import_role: name: ceph-facts tasks_from: grafana - name: Import ceph-dashboard role ansible.builtin.import_role: name: ceph-dashboard - name: Switch any existing crush buckets to straw2 hosts: "{{ mon_group_name | default('mons') }}[0]" tags: post_upgrade become: true any_errors_fatal: true gather_facts: false tasks: - name: Import ceph-defaults role ansible.builtin.import_role: name: ceph-defaults - name: Import ceph-facts role ansible.builtin.import_role: name: ceph-facts tasks_from: container_binary.yml - name: Set_fact ceph_cmd ansible.builtin.set_fact: ceph_cmd: "{{ container_binary + ' run --rm --net=host -v /etc/ceph:/etc/ceph:z --entrypoint=ceph ' + ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment | bool else 'ceph' }}" - name: Backup the crushmap ansible.builtin.command: "{{ ceph_cmd }} --cluster {{ cluster }} osd getcrushmap -o /etc/ceph/{{ cluster }}-crushmap" changed_when: false - name: Migrate crush buckets to straw2 block: - name: Switch crush buckets to straw2 ansible.builtin.command: "{{ ceph_cmd }} --cluster {{ cluster }} osd crush set-all-straw-buckets-to-straw2" changed_when: false rescue: - name: Restore the crushmap ansible.builtin.command: "{{ ceph_cmd }} --cluster {{ cluster }} osd setcrushmap -i /etc/ceph/{{ cluster }}-crushmap" changed_when: false - name: Inform that the switch to straw2 buckets failed ansible.builtin.fail: msg: > "An attempt to switch to straw2 bucket was made but failed. Check the cluster status." - name: Remove crushmap backup ansible.builtin.file: path: /etc/ceph/{{ cluster }}-crushmap state: absent - name: Show ceph status hosts: "{{ mon_group_name|default('mons') }}" tags: always become: true gather_facts: false tasks: - name: Import ceph-defaults role ansible.builtin.import_role: name: ceph-defaults - name: Set_fact container_exec_cmd_status ansible.builtin.set_fact: container_exec_cmd_status: "{{ container_binary }} exec ceph-mon-{{ hostvars[groups[mon_group_name][0]]['ansible_facts']['hostname'] }}" when: containerized_deployment | bool - name: Show ceph status ansible.builtin.command: "{{ container_exec_cmd_status | default('') }} ceph --cluster {{ cluster }} -s" changed_when: false run_once: true delegate_to: "{{ groups[mon_group_name][0] }}" - name: Show all daemons version ansible.builtin.command: "{{ container_exec_cmd_status | default('') }} ceph --cluster {{ cluster }} versions" run_once: true delegate_to: "{{ groups[mon_group_name][0] }}" changed_when: false