--- # This playbook purges Ceph # It removes: packages, configuration files and ALL THE DATA # # Use it like this: # ansible-playbook purge-cluster.yml # Prompts for confirmation to purge, defaults to no and # doesn't purge the cluster. yes purges the cluster. # # ansible-playbook -e ireallymeanit=yes|no purge-cluster.yml # Overrides the prompt using -e option. Can be used in # automation scripts to avoid interactive prompt. - name: confirm whether user really meant to purge the cluster hosts: localhost gather_facts: false vars_prompt: - name: ireallymeanit prompt: Are you sure you want to purge the cluster? default: 'no' private: no tasks: - name: exit playbook, if user did not mean to purge cluster fail: msg: > "Exiting purge-cluster playbook, cluster was NOT purged. To purge the cluster, either say 'yes' on the prompt or or use `-e ireallymeanit=yes` on the command line when invoking the playbook" when: ireallymeanit != 'yes' - name: gather facts on all hosts hosts: - "{{ mon_group_name|default('mons') }}" - "{{ osd_group_name|default('osds') }}" - "{{ mds_group_name|default('mdss') }}" - "{{ rgw_group_name|default('rgws') }}" - "{{ rbdmirror_group_name|default('rbdmirrors') }}" - "{{ nfs_group_name|default('nfss') }}" - "{{ client_group_name|default('clients') }}" - "{{ mgr_group_name|default('mgrs') }}" - grafana-server become: true tasks: - debug: msg="gather facts on all Ceph hosts for following reference" - name: check there's no ceph kernel threads present hosts: "{{ client_group_name|default('clients') }}" become: true any_errors_fatal: true tasks: - import_role: name: ceph-defaults - block: - name: get nfs nodes ansible facts setup: gather_subset: - 'all' - '!facter' - '!ohai' delegate_to: "{{ item }}" delegate_facts: True with_items: "{{ groups[nfs_group_name] }}" run_once: true - name: get all nfs-ganesha mount points command: grep "{{ hostvars[item]['ansible_all_ipv4_addresses'] | ips_in_ranges(public_network.split(',')) | first }}" /proc/mounts register: nfs_ganesha_mount_points failed_when: false with_items: "{{ groups[nfs_group_name] }}" - name: ensure nfs-ganesha mountpoint(s) are unmounted mount: path: "{{ item.split(' ')[1] }}" state: unmounted with_items: - "{{ nfs_ganesha_mount_points.results | map(attribute='stdout_lines') | list }}" when: item | length > 0 when: groups[nfs_group_name] | default([]) | length > 0 - name: ensure cephfs mountpoint(s) are unmounted command: umount -a -t ceph changed_when: false - name: find mapped rbd ids find: paths: /sys/bus/rbd/devices file_type: any register: rbd_mapped_ids - name: use sysfs to unmap rbd devices shell: "echo {{ item.path | basename }} > /sys/bus/rbd/remove_single_major" changed_when: false with_items: "{{ rbd_mapped_ids.files }}" - name: unload ceph kernel modules modprobe: name: "{{ item }}" state: absent with_items: - rbd - ceph - libceph - name: purge ceph nfs cluster vars: nfs_group_name: nfss hosts: "{{ nfs_group_name|default('nfss') }}" gather_facts: false # Already gathered previously become: true tasks: - name: stop ceph nfss with systemd service: name: nfs-ganesha state: stopped failed_when: false when: ansible_service_mgr == 'systemd' - name: purge node-exporter hosts: - "{{ mon_group_name|default('mons') }}" - "{{ osd_group_name|default('osds') }}" - "{{ mds_group_name|default('mdss') }}" - "{{ rgw_group_name|default('rgws') }}" - "{{ rbdmirror_group_name|default('rbdmirrors') }}" - "{{ nfs_group_name|default('nfss') }}" - "{{ client_group_name|default('clients') }}" - "{{ mgr_group_name|default('mgrs') }}" - grafana-server - clients - iscsigws become: true tasks: - import_role: name: ceph-defaults - block: - import_role: name: ceph-facts tasks_from: container_binary - name: disable node_exporter service service: name: node_exporter state: stopped enabled: no failed_when: false - name: remove node_exporter service file file: name: /etc/systemd/system/node_exporter.service state: absent - name: remove node-exporter image command: "{{ container_binary }} rmi {{ node_exporter_container_image }}" failed_when: false tags: - remove_img when: dashboard_enabled | bool - name: purge ceph grafana-server hosts: grafana-server become: true vars: grafana_services: - grafana-server - prometheus - alertmanager tasks: - import_role: name: ceph-defaults - block: - import_role: name: ceph-facts tasks_from: container_binary - name: stop services service: name: "{{ item }}" state: stopped enabled: no with_items: "{{ grafana_services }}" failed_when: false - name: remove service files file: name: "/etc/systemd/system/{{ item }}.service" state: absent with_items: "{{ grafana_services }}" failed_when: false - name: remove ceph dashboard container images command: "{{ container_binary }} rmi {{ item }}" with_items: - "{{ prometheus_container_image }}" - "{{ grafana_container_image }}" - "{{ alertmanager_container_image }}" failed_when: false tags: - remove_img - name: remove data file: name: "{{ item }}" state: absent with_items: - /etc/grafana/dashboards - /etc/grafana/grafana.ini - /etc/grafana/provisioning - /var/lib/grafana - /etc/alertmanager - /var/lib/alertmanager - /var/lib/prometheus - /etc/prometheus failed_when: false when: dashboard_enabled | bool - name: purge ceph mds cluster vars: mds_group_name: mdss hosts: "{{ mds_group_name|default('mdss') }}" gather_facts: false # Already gathered previously become: true tasks: - name: stop ceph mdss with systemd service: name: ceph-mds@{{ ansible_hostname }} state: stopped enabled: no failed_when: false - name: purge ceph mgr cluster vars: mgr_group_name: mgrs hosts: "{{ mgr_group_name|default('mgrs') }}" gather_facts: false # Already gathered previously become: true tasks: - name: stop ceph mgrs with systemd service: name: ceph-mgr@{{ ansible_hostname }} state: stopped enabled: no failed_when: false when: ansible_service_mgr == 'systemd' - name: purge rgwloadbalancer cluster vars: rgwloadbalancer_group_name: rgwloadbalancers hosts: - "{{ rgwloadbalancer_group_name|default('rgwloadbalancers') }}" gather_facts: false # Already gathered previously become: true tasks: - name: stop rgwloadbalancer services service: name: ['keepalived', 'haproxy'] state: stopped enabled: no failed_when: false - name: purge ceph rgw cluster vars: rgw_group_name: rgws hosts: "{{ rgw_group_name|default('rgws') }}" gather_facts: false # Already gathered previously become: true tasks: - import_role: name: ceph-defaults - import_role: name: ceph-facts tasks_from: set_radosgw_address - name: stop ceph rgws with systemd service: name: "ceph-radosgw@rgw.{{ ansible_hostname }}.{{ item.instance_name }}" state: stopped enabled: no failed_when: false with_items: "{{ rgw_instances }}" - name: purge ceph rbd-mirror cluster vars: rbdmirror_group_name: rbdmirrors hosts: "{{ rbdmirror_group_name|default('rbdmirrors') }}" gather_facts: false # Already gathered previously become: true tasks: - name: stop ceph rbd mirror with systemd service: name: "ceph-rbd-mirror@rbd-mirror.{{ ansible_hostname }}" state: stopped failed_when: false - name: purge ceph osd cluster vars: osd_group_name: osds reboot_osd_node: False hosts: "{{ osd_group_name|default('osds') }}" gather_facts: false # Already gathered previously become: true handlers: - name: restart machine shell: sleep 2 && shutdown -r now "Ansible updates triggered" async: 1 poll: 0 ignore_errors: true - name: wait for server to boot become: false local_action: module: wait_for port: 22 host: "{{ hostvars[inventory_hostname]['ansible_default_ipv4']['address'] }}" state: started delay: 10 timeout: 500 - name: remove data shell: rm -rf /var/lib/ceph/* tasks: - import_role: name: ceph-defaults - name: default lvm_volumes if not defined set_fact: lvm_volumes: [] when: lvm_volumes is not defined - name: get osd numbers shell: | set -o pipefail; if [ -d /var/lib/ceph/osd ] ; then ls /var/lib/ceph/osd | sed 's/.*-//' ; fi register: osd_ids changed_when: false - name: stop ceph-osd with systemd service: name: ceph-osd@{{ item }} state: stopped enabled: no with_items: "{{ osd_ids.stdout_lines }}" when: ansible_service_mgr == 'systemd' - name: remove ceph udev rules file: path: "{{ item }}" state: absent with_items: - /usr/lib/udev/rules.d/95-ceph-osd.rules - /usr/lib/udev/rules.d/60-ceph-by-parttypeuuid.rules # NOTE(leseb): hope someone will find a more elegant way one day... - name: see if encrypted partitions are present shell: | set -o pipefail; blkid -t TYPE=crypto_LUKS -s PARTLABEL -s PARTUUID | grep "ceph.*." | grep -o PARTUUID.* | cut -d '"' -f 2 register: encrypted_ceph_partuuid failed_when: false changed_when: false - name: get osd data and lockbox mount points shell: | set -o pipefail; (grep /var/lib/ceph/osd /proc/mounts || echo -n) | awk '{ print $2 }' register: mounted_osd changed_when: false - name: drop all cache shell: "sync && sleep 1 && echo 3 > /proc/sys/vm/drop_caches" changed_when: false - name: umount osd data partition mount: path: "{{ item }}" state: unmounted with_items: "{{ mounted_osd.stdout_lines }}" - name: remove osd mountpoint tree file: path: /var/lib/ceph/osd/ state: absent register: remove_osd_mountpoints ignore_errors: true - name: is reboot needed local_action: module: command echo requesting reboot become: false notify: - restart machine - wait for server to boot - remove data when: - reboot_osd_node | bool - remove_osd_mountpoints.failed is defined - name: wipe table on dm-crypt devices command: dmsetup wipe_table --force "{{ item }}" with_items: "{{ encrypted_ceph_partuuid.stdout_lines }}" when: encrypted_ceph_partuuid.stdout_lines | length > 0 - name: delete dm-crypt devices if any command: dmsetup remove --retry --force {{ item }} with_items: "{{ encrypted_ceph_partuuid.stdout_lines }}" when: encrypted_ceph_partuuid.stdout_lines | length > 0 - name: get payload_offset shell: | set -o pipefail; cryptsetup luksDump /dev/disk/by-partuuid/{{ item }} | awk '/Payload offset:/ { print $3 }' register: payload_offset with_items: "{{ encrypted_ceph_partuuid.stdout_lines }}" when: encrypted_ceph_partuuid.stdout_lines | length > 0 - name: get physical sector size command: blockdev --getpbsz /dev/disk/by-partuuid/{{ item }} changed_when: false with_items: "{{ encrypted_ceph_partuuid.stdout_lines }}" when: encrypted_ceph_partuuid.stdout_lines | length > 0 register: phys_sector_size - name: wipe dmcrypt device command: dd if=/dev/zero of=/dev/disk/by-partuuid/{{ item.0 }} bs={{ item.1.stdout }} count={{ item.2.stdout }} oflag=direct changed_when: false with_together: - "{{ encrypted_ceph_partuuid.stdout_lines }}" - "{{ payload_offset.results }}" - "{{ phys_sector_size.results }}" - name: get ceph data partitions shell: | blkid -o device -t PARTLABEL="ceph data" changed_when: false failed_when: false register: ceph_data_partition_to_erase_path - name: get ceph lockbox partitions shell: | blkid -o device -t PARTLABEL="ceph lockbox" changed_when: false failed_when: false register: ceph_lockbox_partition_to_erase_path - name: see if ceph-volume is installed command: command -v ceph-volume changed_when: false failed_when: false register: ceph_volume_present - name: zap and destroy osds created by ceph-volume with lvm_volumes ceph_volume: data: "{{ item.data }}" data_vg: "{{ item.data_vg|default(omit) }}" journal: "{{ item.journal|default(omit) }}" journal_vg: "{{ item.journal_vg|default(omit) }}" db: "{{ item.db|default(omit) }}" db_vg: "{{ item.db_vg|default(omit) }}" wal: "{{ item.wal|default(omit) }}" wal_vg: "{{ item.wal_vg|default(omit) }}" action: "zap" environment: CEPH_VOLUME_DEBUG: "{{ ceph_volume_debug }}" with_items: "{{ lvm_volumes }}" when: - lvm_volumes | default([]) | length > 0 - ceph_volume_present.rc == 0 - name: zap and destroy osds created by ceph-volume with devices ceph_volume: data: "{{ item }}" action: "zap" environment: CEPH_VOLUME_DEBUG: "{{ ceph_volume_debug }}" with_items: "{{ devices | default([]) }}" when: - devices | default([]) | length > 0 - ceph_volume_present.rc == 0 - name: get ceph block partitions shell: | blkid -o device -t PARTLABEL="ceph block" changed_when: false failed_when: false register: ceph_block_partition_to_erase_path - name: get ceph journal partitions shell: | blkid -o device -t PARTLABEL="ceph journal" changed_when: false failed_when: false register: ceph_journal_partition_to_erase_path - name: get ceph db partitions shell: | blkid -o device -t PARTLABEL="ceph block.db" changed_when: false failed_when: false register: ceph_db_partition_to_erase_path - name: get ceph wal partitions shell: | blkid -o device -t PARTLABEL="ceph block.wal" changed_when: false failed_when: false register: ceph_wal_partition_to_erase_path - name: set_fact combined_devices_list set_fact: combined_devices_list: "{{ ceph_data_partition_to_erase_path.stdout_lines + ceph_lockbox_partition_to_erase_path.stdout_lines + ceph_block_partition_to_erase_path.stdout_lines + ceph_journal_partition_to_erase_path.stdout_lines + ceph_db_partition_to_erase_path.stdout_lines + ceph_wal_partition_to_erase_path.stdout_lines }}" - name: resolve parent device command: lsblk --nodeps -no pkname "{{ item }}" register: tmp_resolved_parent_device changed_when: false with_items: "{{ combined_devices_list }}" - name: set_fact resolved_parent_device set_fact: resolved_parent_device: "{{ tmp_resolved_parent_device.results | map(attribute='stdout') | list | unique }}" - name: wipe partitions shell: | set -o pipefail; wipefs --all "{{ item }}" dd if=/dev/zero of="{{ item }}" bs=1 count=4096 changed_when: false with_items: "{{ combined_devices_list }}" - name: zap ceph journal/block db/block wal partitions shell: | set -o pipefail; # if the disk passed is a raw device AND the boot system disk if parted -s /dev/"{{ item }}" print | grep -sq boot; then echo "Looks like /dev/{{ item }} has a boot partition," echo "if you want to delete specific partitions point to the partition instead of the raw device" echo "Do not use your system disk!" exit 1 fi sgdisk -Z --clear --mbrtogpt -g -- /dev/"{{ item }}" dd if=/dev/zero of=/dev/"{{ item }}" bs=1M count=200 parted -s /dev/"{{ item }}" mklabel gpt partprobe /dev/"{{ item }}" udevadm settle --timeout=600 with_items: "{{ resolved_parent_device }}" changed_when: false - name: purge ceph mon cluster vars: mon_group_name: mons hosts: "{{ mon_group_name|default('mons') }}" gather_facts: false # already gathered previously become: true tasks: - name: stop ceph mons with systemd service: name: "ceph-{{ item }}@{{ ansible_hostname }}" state: stopped enabled: no failed_when: false with_items: - mon - mgr - name: remove monitor store and bootstrap keys file: path: "{{ item }}" state: absent with_items: - /var/lib/ceph/mon - /var/lib/ceph/bootstrap-mds - /var/lib/ceph/bootstrap-osd - /var/lib/ceph/bootstrap-rgw - /var/lib/ceph/bootstrap-rbd - /var/lib/ceph/bootstrap-mgr - /var/lib/ceph/tmp - name: purge ceph-crash daemons hosts: - "{{ mon_group_name | default('mons') }}" - "{{ osd_group_name | default('osds') }}" - "{{ mds_group_name | default('mdss') }}" - "{{ rgw_group_name | default('rgws') }}" - "{{ rbdmirror_group_name | default('rbdmirrors') }}" - "{{ mgr_group_name | default('mgrs') }}" gather_facts: false become: true tasks: - name: stop ceph-crash service service: name: ceph-crash.service state: stopped enabled: no failed_when: false - name: remove /var/lib/ceph/crash file: path: /var/lib/ceph/crash state: absent - name: final cleanup - check any running ceph, purge ceph packages, purge config and remove data vars: # When set to true both groups of packages are purged. # This can cause problem with qemu-kvm purge_all_packages: true ceph_packages: - ceph - ceph-common - ceph-fs-common - ceph-fuse - ceph-mds - ceph-mgr - ceph-release - ceph-radosgw - calamari-server - ceph-grafana-dashboards ceph_remaining_packages: - libcephfs1 - libcephfs2 - librados2 - libradosstriper1 - librbd1 - python-ceph-argparse - python-cephfs - python-rados - python-rbd extra_packages: - keepalived - haproxy hosts: - "{{ mon_group_name|default('mons') }}" - "{{ osd_group_name|default('osds') }}" - "{{ mds_group_name|default('mdss') }}" - "{{ rgw_group_name|default('rgws') }}" - "{{ rbdmirror_group_name|default('rbdmirrors') }}" - "{{ nfs_group_name|default('nfss') }}" - "{{ client_group_name|default('clients') }}" - "{{ mgr_group_name|default('mgrs') }}" - grafana-server gather_facts: false # Already gathered previously become: true handlers: - name: get osd data and lockbox mount points shell: "(grep /var/lib/ceph/osd /proc/mounts || echo -n) | awk '{ print $2 }'" register: mounted_osd changed_when: false listen: "remove data" - name: umount osd data partition mount: path: "{{ item }}" state: unmounted with_items: "{{ mounted_osd.stdout_lines }}" listen: "remove data" - name: remove data shell: rm -rf /var/lib/ceph/* listen: "remove data" tasks: - name: purge ceph packages with yum yum: name: "{{ ceph_packages }}" state: absent when: ansible_pkg_mgr == 'yum' - name: purge ceph packages with dnf dnf: name: "{{ ceph_packages }}" state: absent when: ansible_pkg_mgr == 'dnf' - name: purge ceph packages with apt apt: name: "{{ ceph_packages }}" state: absent purge: true when: ansible_pkg_mgr == 'apt' - name: purge remaining ceph packages with yum yum: name: "{{ ceph_remaining_packages }}" state: absent when: - ansible_pkg_mgr == 'yum' - purge_all_packages | bool - name: purge remaining ceph packages with dnf dnf: name: "{{ ceph_remaining_packages }}" state: absent when: - ansible_pkg_mgr == 'dnf' - purge_all_packages | bool - name: purge remaining ceph packages with apt apt: name: "{{ ceph_remaining_packages }}" state: absent when: - ansible_pkg_mgr == 'apt' - purge_all_packages | bool - name: purge extra packages with yum yum: name: "{{ extra_packages }}" state: absent when: - ansible_pkg_mgr == 'yum' - purge_all_packages | bool - name: purge extra packages with dnf dnf: name: "{{ extra_packages }}" state: absent when: - ansible_pkg_mgr == 'dnf' - purge_all_packages | bool - name: purge extra packages with apt apt: name: "{{ extra_packages }}" state: absent when: - ansible_pkg_mgr == 'apt' - purge_all_packages | bool - name: remove config and any ceph socket left file: path: "{{ item }}" state: absent with_items: - /etc/ceph - /etc/keepalived - /etc/haproxy - /run/ceph - name: remove logs file: path: /var/log/ceph state: absent - name: request data removal local_action: module: command echo requesting data removal # noqa 301 become: false notify: remove data - name: purge dnf cache command: dnf clean all when: ansible_pkg_mgr == 'dnf' - name: purge rpm cache in /tmp file: path: /tmp/rh-storage-repo state: absent - name: clean apt command: apt-get clean when: ansible_pkg_mgr == 'apt' - name: purge ceph repo file in /etc/yum.repos.d file: path: '/etc/yum.repos.d/{{ item }}.repo' state: absent with_items: - ceph-dev - ceph_stable - rh_storage when: ansible_os_family == 'RedHat' - name: check for anything running ceph command: "ps -u ceph -U ceph" register: check_for_running_ceph changed_when: false failed_when: check_for_running_ceph.rc == 0 - name: find ceph systemd unit files to remove find: paths: "/etc/systemd/system" pattern: "ceph*" recurse: true file_type: any register: systemd_files - name: remove ceph systemd unit files file: path: "{{ item.path }}" state: absent with_items: "{{ systemd_files.files }}" when: ansible_service_mgr == 'systemd' - name: purge fetch directory hosts: localhost gather_facts: false tasks: - name: set fetch_directory value if not set set_fact: fetch_directory: "fetch/" when: fetch_directory is not defined - name: purge fetch directory for localhost file: path: "{{ fetch_directory | default('fetch/') }}" state: absent