ceph-ansible/infrastructure-playbooks/purge-container-cluster.yml

672 lines
19 KiB
YAML

---
# This playbook purges a containerized Ceph cluster
# It removes: packages, containers, configuration files and ALL THE DATA
- name: confirm whether user really meant to purge the cluster
hosts: localhost
gather_facts: false
vars_prompt:
- name: ireallymeanit
prompt: >
Are you sure you want to purge the cluster?
Note that if with_pkg is not set docker packages
and more will be uninstalled from non-atomic hosts.
Do you want to continue?
default: 'no'
private: no
tasks:
- name: exit playbook, if user did not mean to purge cluster
fail:
msg: >
"Exiting purge-container-cluster playbook, cluster was NOT purged.
To purge the cluster, either say 'yes' on the prompt or
or use `-e ireallymeanit=yes` on the command line when
invoking the playbook"
when: ireallymeanit != 'yes'
- name: set ceph_docker_registry value if not set
set_fact:
ceph_docker_registry: "docker.io"
when: ceph_docker_registry is not defined
- name: check there's no ceph kernel threads present
hosts: "{{ client_group_name|default('clients') }}"
become: true
any_errors_fatal: true
tasks:
- import_role:
name: ceph-defaults
- block:
- name: get nfs nodes ansible facts
setup:
gather_subset:
- 'all'
- '!facter'
- '!ohai'
delegate_to: "{{ item }}"
delegate_facts: True
with_items: "{{ groups[nfs_group_name] }}"
run_once: true
- name: get all nfs-ganesha mount points
command: grep "{{ hostvars[item]['ansible_facts']['all_ipv4_addresses'] | ips_in_ranges(public_network.split(',')) | first }}" /proc/mounts
register: nfs_ganesha_mount_points
failed_when: false
with_items: "{{ groups[nfs_group_name] }}"
- name: ensure nfs-ganesha mountpoint(s) are unmounted
mount:
path: "{{ item.split(' ')[1] }}"
state: unmounted
with_items:
- "{{ nfs_ganesha_mount_points.results | map(attribute='stdout_lines') | list }}"
when: item | length > 0
when: groups[nfs_group_name] | default([]) | length > 0
- name: ensure cephfs mountpoint are unmounted
command: umount -a -t ceph
changed_when: false
- name: find mapped rbd ids
find:
paths: /sys/bus/rbd/devices
file_type: any
register: rbd_mapped_ids
- name: use sysfs to unmap rbd devices
shell: "echo {{ item.path | basename }} > /sys/bus/rbd/remove_single_major"
changed_when: false
with_items: "{{ rbd_mapped_ids.files }}"
- name: unload ceph kernel modules
modprobe:
name: "{{ item }}"
state: absent
with_items:
- rbd
- ceph
- libceph
- name: purge ceph nfs cluster
hosts: "{{ nfs_group_name | default('nfss') }}"
become: true
tasks:
- name: disable ceph nfs service
service:
name: "ceph-nfs@{{ ansible_facts['hostname'] }}"
state: stopped
enabled: no
ignore_errors: true
- name: remove ceph nfs service
file:
path: /etc/systemd/system/ceph-nfs@.service
state: absent
- name: remove ceph nfs directories for "{{ ansible_facts['hostname'] }}"
file:
path: "{{ item }}"
state: absent
with_items:
- /etc/ganesha
- /var/lib/nfs/ganesha
- /var/run/ganesha
- name: purge ceph mds cluster
hosts: "{{ mds_group_name | default('mdss') }}"
become: true
tasks:
- name: disable ceph mds service
service:
name: "ceph-mds@{{ ansible_facts['hostname'] }}"
state: stopped
enabled: no
ignore_errors: true
- name: remove ceph mds service
file:
path: /etc/systemd/system/ceph-mds@.service
state: absent
- name: purge ceph iscsigws cluster
hosts: "{{ iscsi_gw_group_name | default('iscsigws') }}"
become: true
tasks:
- name: disable ceph iscsigw services
service:
name: "{{ item }}"
state: stopped
enabled: no
ignore_errors: true
with_items:
- rbd-target-api
- rbd-target-gw
- tcmu-runner
- name: remove ceph iscsigw systemd unit files
file:
path: /etc/systemd/system/{{ item }}.service
state: absent
ignore_errors: true
with_items:
- rbd-target-api
- rbd-target-gw
- tcmu-runner
- name: purge ceph mgr cluster
hosts: "{{ mgr_group_name | default('mgrs') }}"
become: true
tasks:
- name: disable ceph mgr service
service:
name: "ceph-mgr@{{ ansible_facts['hostname'] }}"
state: stopped
enabled: no
ignore_errors: true
- name: remove ceph mgr service
file:
path: /etc/systemd/system/ceph-mgr@.service
state: absent
- name: purge ceph rgw cluster
hosts: "{{ rgw_group_name | default('rgws') }}"
become: true
tasks:
- import_role:
name: ceph-defaults
- import_role:
name: ceph-facts
tasks_from: set_radosgw_address
- name: disable ceph rgw service
service:
name: "ceph-radosgw@rgw.{{ ansible_facts['hostname'] }}.{{ item.instance_name }}"
state: stopped
enabled: no
failed_when: false
with_items: "{{ rgw_instances }}"
- name: remove ceph rgw service
file:
path: /etc/systemd/system/ceph-radosgw@.service
state: absent
- name: purge ceph rbd-mirror cluster
hosts: "{{ rbdmirror_group_name | default('rbdmirrors') }}"
become: true
tasks:
- name: disable ceph rbd-mirror service
service:
name: "ceph-rbd-mirror@rbd-mirror.{{ ansible_facts['hostname'] }}"
state: stopped
enabled: no
ignore_errors: true
- name: remove ceph rbd-mirror service
file:
path: /etc/systemd/system/ceph-rbd-mirror@.service
state: absent
- name: purge ceph osd cluster
hosts: "{{ osd_group_name | default('osds') }}"
gather_facts: true
become: true
tasks:
- import_role:
name: ceph-defaults
- name: gather monitors facts
setup:
gather_subset:
- 'all'
- '!facter'
- '!ohai'
delegate_to: "{{ item }}"
delegate_facts: True
with_items: "{{ groups.get(mon_group_name | default('mons')) }}"
- import_role:
name: ceph-facts
- name: get all the running osds
shell: systemctl list-units --all | grep -oE "ceph-osd@([0-9]+).service" # noqa 303
register: osd_units
changed_when: false
ignore_errors: true
- name: disable ceph osd service
service:
name: "{{ item }}"
state: stopped
enabled: no
with_items: "{{ osd_units.stdout_lines }}"
- name: remove osd mountpoint tree
file:
path: /var/lib/ceph/osd/
state: absent
ignore_errors: true
- name: default lvm_volumes if not defined
set_fact:
lvm_volumes: []
when: lvm_volumes is not defined
- name: zap and destroy osds created by ceph-volume with lvm_volumes
ceph_volume:
data: "{{ item.data }}"
data_vg: "{{ item.data_vg|default(omit) }}"
journal: "{{ item.journal|default(omit) }}"
journal_vg: "{{ item.journal_vg|default(omit) }}"
db: "{{ item.db|default(omit) }}"
db_vg: "{{ item.db_vg|default(omit) }}"
wal: "{{ item.wal|default(omit) }}"
wal_vg: "{{ item.wal_vg|default(omit) }}"
action: "zap"
environment:
CEPH_VOLUME_DEBUG: "{{ ceph_volume_debug }}"
CEPH_CONTAINER_IMAGE: "{{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }}"
CEPH_CONTAINER_BINARY: "{{ container_binary }}"
with_items: "{{ lvm_volumes }}"
when: lvm_volumes | default([]) | length > 0
- name: zap and destroy osds created by ceph-volume with devices
ceph_volume:
data: "{{ item }}"
action: "zap"
environment:
CEPH_VOLUME_DEBUG: "{{ ceph_volume_debug }}"
CEPH_CONTAINER_IMAGE: "{{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }}"
CEPH_CONTAINER_BINARY: "{{ container_binary }}"
with_items:
- "{{ devices | default([]) }}"
- "{{ dedicated_devices | default([]) }}"
- "{{ bluestore_wal_devices | default([]) }}"
- name: remove ceph osd service
file:
path: /etc/systemd/system/ceph-osd@.service
state: absent
- name: include vars from group_vars/osds.yml
include_vars:
file: "{{ item }}"
with_first_found:
- files:
- "{{ playbook_dir }}/group_vars/osds"
- "{{ playbook_dir }}/group_vars/osds.yml"
skip: true
- name: purge ceph mon cluster
hosts: "{{ mon_group_name|default('mons') }}"
become: true
tasks:
# since mgr are now collocated with mons by default
- name: disable ceph mon and mgr service
service:
name: "{{ item }}"
state: stopped
enabled: no
ignore_errors: true
with_items:
- "ceph-mgr@{{ ansible_facts['hostname'] }}"
- "ceph-mon@{{ ansible_facts['hostname'] }}"
- name: remove ceph mon and mgr service
file:
path: "/etc/systemd/system/ceph-{{ item }}@.service"
state: absent
with_items:
- mon
- mgr
- name: purge node-exporter
hosts:
- "{{ mon_group_name | default('mons') }}"
- "{{ osd_group_name | default('osds') }}"
- "{{ mds_group_name | default('mdss') }}"
- "{{ rgw_group_name | default('rgws') }}"
- "{{ rbdmirror_group_name | default('rbdmirrors') }}"
- "{{ nfs_group_name | default('nfss') }}"
- "{{ mgr_group_name | default('mgrs') }}"
- "{{ monitoring_group_name | default('monitoring') }}"
- iscsigws
- clients
gather_facts: false
become: true
tasks:
- import_role:
name: ceph-defaults
- block:
- import_role:
name: ceph-facts
tasks_from: container_binary
- name: disable node_exporter service
service:
name: node_exporter
state: stopped
enabled: no
failed_when: false
- name: remove node_exporter service file
file:
name: /etc/systemd/system/node_exporter.service
state: absent
- name: remove node-exporter image
command: "{{ container_binary }} rmi {{ node_exporter_container_image }}"
failed_when: false
tags:
- remove_img
when: dashboard_enabled | bool
- name: purge ceph-grafana
hosts: monitoring
gather_facts: false
become: true
vars:
grafana_services:
- grafana-server
- prometheus
- alertmanager
tasks:
- import_role:
name: ceph-defaults
- block:
- import_role:
name: ceph-facts
tasks_from: container_binary
- name: stop services
service:
name: "{{ item }}"
state: stopped
enabled: no
with_items: "{{ grafana_services }}"
failed_when: false
- name: remove service files
file:
name: "/etc/systemd/system/{{ item }}.service"
state: absent
with_items: "{{ grafana_services }}"
failed_when: false
- name: remove ceph dashboard container images
command: "{{ container_binary }} rmi {{ item }}"
with_items:
- "{{ prometheus_container_image }}"
- "{{ grafana_container_image }}"
- "{{ alertmanager_container_image }}"
failed_when: false
tags:
- remove_img
- name: remove data
file:
name: "{{ item }}"
state: absent
with_items:
- /etc/grafana/grafana.ini
- /etc/grafana/provisioning
- /var/lib/grafana
- /etc/alertmanager
- /var/lib/alertmanager
- /var/lib/prometheus
- /etc/prometheus
failed_when: false
when: dashboard_enabled | bool
- name: purge ceph-crash containers
hosts:
- "{{ mon_group_name | default('mons') }}"
- "{{ osd_group_name | default('osds') }}"
- "{{ mds_group_name | default('mdss') }}"
- "{{ rgw_group_name | default('rgws') }}"
- "{{ rbdmirror_group_name | default('rbdmirrors') }}"
- "{{ mgr_group_name | default('mgrs') }}"
gather_facts: false
become: true
tasks:
- name: stop ceph-crash container
service:
name: "ceph-crash@{{ ansible_facts['hostname'] }}"
state: stopped
enabled: no
failed_when: false
- name: remove service file
file:
name: "/etc/systemd/system/ceph-crash.service"
state: absent
failed_when: false
- name: remove /var/lib/ceph/crash
file:
path: /var/lib/ceph/crash
state: absent
- name: check container hosts
hosts:
- "{{ mon_group_name | default('mons') }}"
- "{{ osd_group_name | default('osds') }}"
- "{{ mds_group_name | default('mdss') }}"
- "{{ rgw_group_name | default('rgws') }}"
- "{{ rbdmirror_group_name | default('rbdmirrors') }}"
- "{{ nfs_group_name | default('nfss') }}"
- "{{ mgr_group_name | default('mgrs') }}"
gather_facts: true
become: true
tasks:
- import_role:
name: ceph-defaults
- import_role:
name: ceph-facts
tasks_from: container_binary
- name: remove stopped/exited containers
command: >
{{ container_binary }} container prune -f
changed_when: false
- name: show container list on all the nodes (should be empty)
command: >
{{ container_binary }} ps --filter='name=ceph' -a -q
register: containers_list
changed_when: false
- name: show container images on all the nodes (should be empty if tags was passed remove_img)
command: >
{{ container_binary }} images
register: images_list
changed_when: false
- name: fail if container are still present
fail:
msg: "It looks like container are still present."
when: containers_list.stdout_lines|length > 0
- name: final cleanup
hosts:
- "{{ mon_group_name | default('mons') }}"
- "{{ osd_group_name | default('osds') }}"
- "{{ mds_group_name | default('mdss') }}"
- "{{ rgw_group_name | default('rgws') }}"
- "{{ rbdmirror_group_name | default('rbdmirrors') }}"
- "{{ nfs_group_name | default('nfss') }}"
- "{{ mgr_group_name | default('mgrs') }}"
- "{{ monitoring_group_name | default('monitoring') }}"
become: true
tags: with_pkg
tasks:
- import_role:
name: ceph-defaults
- name: check if it is Atomic host
stat: path=/run/ostree-booted
register: stat_ostree
- name: set fact for using Atomic host
set_fact:
is_atomic: "{{ stat_ostree.stat.exists }}"
- import_role:
name: ceph-facts
tasks_from: container_binary
- name: remove ceph container image
command: "{{ container_binary }} rmi {{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }}"
changed_when: false
tags:
- remove_img
- name: stop docker service
service:
name: docker
state: stopped
enabled: no
when:
- not is_atomic
- container_binary == 'docker'
ignore_errors: true
- name: remove docker on debian/ubuntu
apt:
name: ['docker-ce', 'docker-engine', 'docker.io', 'python-docker', 'python3-docker']
state: absent
update_cache: yes
autoremove: yes
when: ansible_facts['os_family'] == 'Debian'
- name: red hat based systems tasks
block:
- name: yum related tasks on red hat
block:
- name: remove packages on redhat
yum:
name: ['epel-release', 'docker', 'python-docker-py']
state: absent
- name: remove package dependencies on redhat
command: yum -y autoremove
args:
warn: no
- name: remove package dependencies on redhat again
command: yum -y autoremove
args:
warn: no
when:
ansible_facts['pkg_mgr'] == "yum"
- name: dnf related tasks on red hat
block:
- name: remove docker on redhat
dnf:
name: ['docker', 'python3-docker']
state: absent
- name: remove package dependencies on redhat
command: dnf -y autoremove
args:
warn: no
- name: remove package dependencies on redhat again
command: dnf -y autoremove
args:
warn: no
when:
ansible_facts['pkg_mgr'] == "dnf"
when:
ansible_facts['os_family'] == 'RedHat' and
not is_atomic
- name: find any service-cid file left
find:
paths: /run
patterns:
- "ceph-*.service-cid"
- "rbd-target-api.service-cid"
- "rbd-target-gw.service-cid"
- "tcmu-runner.service-cid"
- "node_exporter.service-cid"
- "prometheus.service-cid"
- "grafana-server.service-cid"
- "alertmanager.service-cid"
register: service_cid_files
- name: rm any service-cid file
file:
path: "{{ item.path }}"
state: absent
with_items: "{{ service_cid_files.files }}"
- name: purge ceph directories
hosts:
- "{{ mon_group_name | default('mons') }}"
- "{{ osd_group_name | default('osds') }}"
- "{{ mds_group_name | default('mdss') }}"
- "{{ rgw_group_name | default('rgws') }}"
- "{{ rbdmirror_group_name | default('rbdmirrors') }}"
- "{{ nfs_group_name | default('nfss') }}"
- "{{ mgr_group_name | default('mgrs') }}"
gather_facts: false # Already gathered previously
become: true
tasks:
- name: purge ceph directories for "{{ ansible_facts['hostname'] }}" and ceph socket
file:
path: "{{ item }}"
state: absent
with_items:
- /etc/ceph
- /var/log/ceph
- /run/ceph
- "{{ ceph_osd_docker_run_script_path | default('/usr/share') }}/ceph-osd-run.sh"
- name: remove ceph data
shell: rm -rf /var/lib/ceph/* # noqa 302
changed_when: false
# (todo): remove this when we are able to manage docker
# service on atomic host.
- name: remove docker data
shell: rm -rf /var/lib/docker/* # noqa 302
when: not is_atomic | bool
- name: purge fetch directory
hosts: localhost
gather_facts: false
tasks:
- name: set fetch_directory value if not set
set_fact:
fetch_directory: "fetch/"
when: fetch_directory is not defined
- name: purge fetch directory for localhost
file:
path: "{{ fetch_directory | default('fetch/') }}/"
state: absent