ceph-ansible/infrastructure-playbooks/purge-container-cluster.yml

668 lines
17 KiB
YAML

---
# This playbook purges a containerized Ceph cluster
# It removes: packages, containers, configuration files and ALL THE DATA
- name: confirm whether user really meant to purge the cluster
hosts: localhost
gather_facts: false
vars_prompt:
- name: ireallymeanit
prompt: >
Are you sure you want to purge the cluster?
Note that if with_pkg is not set docker packages
and more will be uninstalled from non-atomic hosts.
Do you want to continue?
default: 'no'
private: no
tasks:
- name: exit playbook, if user did not mean to purge cluster
fail:
msg: >
"Exiting purge-container-cluster playbook, cluster was NOT purged.
To purge the cluster, either say 'yes' on the prompt or
or use `-e ireallymeanit=yes` on the command line when
invoking the playbook"
when: ireallymeanit != 'yes'
- name: set ceph_docker_registry value if not set
set_fact:
ceph_docker_registry: "docker.io"
when: ceph_docker_registry is not defined
- name: check there's no ceph kernel threads present
hosts: "{{ client_group_name|default('clients') }}"
become: true
any_errors_fatal: true
tasks:
- import_role:
name: ceph-defaults
- block:
- name: get nfs nodes ansible facts
setup:
delegate_to: "{{ item }}"
delegate_facts: True
with_items: "{{ groups[nfs_group_name] }}"
run_once: true
- name: get all nfs-ganesha mount points
command: grep "{{ hostvars[item]['ansible_all_ipv4_addresses'] | ips_in_ranges(public_network.split(',')) | first }}" /proc/mounts
register: nfs_ganesha_mount_points
failed_when: false
with_items: "{{ groups[nfs_group_name] }}"
- name: ensure nfs-ganesha mountpoint(s) are unmounted
mount:
path: "{{ item.split(' ')[1] }}"
state: unmounted
with_items:
- "{{ nfs_ganesha_mount_points.results | map(attribute='stdout_lines') | list }}"
when: item | length > 0
when: groups[nfs_group_name] | default([]) | length > 0
- name: ensure cephfs mountpoint are unmounted
command: umount -a -t ceph
- name: find mapped rbd ids
find:
paths: /sys/bus/rbd/devices
file_type: any
register: rbd_mapped_ids
- name: use sysfs to unmap rbd devices
shell: "echo {{ item.path | basename }} > /sys/bus/rbd/remove_single_major"
with_items: "{{ rbd_mapped_ids.files }}"
- name: unload ceph kernel modules
modprobe:
name: "{{ item }}"
state: absent
with_items:
- rbd
- ceph
- libceph
- name: purge ceph nfs cluster
hosts: "{{ nfs_group_name|default('nfss') }}"
become: true
tasks:
- name: disable ceph nfs service
service:
name: "ceph-nfs@{{ ansible_hostname }}"
state: stopped
enabled: no
ignore_errors: true
- name: remove ceph nfs service
file:
path: /etc/systemd/system/ceph-nfs@.service
state: absent
- name: remove ceph nfs directories for "{{ ansible_hostname }}"
file:
path: "{{ item }}"
state: absent
with_items:
- /etc/ganesha
- /var/lib/nfs/ganesha
- /var/run/ganesha
- name: purge ceph mds cluster
hosts: "{{ mds_group_name|default('mdss') }}"
become: true
tasks:
- name: disable ceph mds service
service:
name: "ceph-mds@{{ ansible_hostname }}"
state: stopped
enabled: no
ignore_errors: true
- name: remove ceph mds service
file:
path: /etc/systemd/system/ceph-mds@.service
state: absent
- name: purge ceph iscsigws cluster
hosts: "{{ iscsi_gw_group_name|default('iscsigws') }}"
become: true
tasks:
- name: disable ceph iscsigw services
service:
name: "{{ item }}"
state: stopped
enabled: no
ignore_errors: true
with_items:
- rbd-target-api
- rbd-target-gw
- tcmu-runner
- name: remove ceph iscsigw systemd unit files
file:
path: /etc/systemd/system/{{ item }}.service
state: absent
ignore_errors: true
with_items:
- rbd-target-api
- rbd-target-gw
- tcmu-runner
- name: purge ceph mgr cluster
hosts: "{{ mgr_group_name|default('mgrs') }}"
become: true
tasks:
- name: disable ceph mgr service
service:
name: "ceph-mgr@{{ ansible_hostname }}"
state: stopped
enabled: no
ignore_errors: true
- name: remove ceph mgr service
file:
path: /etc/systemd/system/ceph-mgr@.service
state: absent
- name: purge ceph rgw cluster
hosts: "{{ rgw_group_name|default('rgws') }}"
become: true
tasks:
- import_role:
name: ceph-defaults
- import_role:
name: ceph-facts
tasks_from: set_radosgw_address
- name: disable ceph rgw service
service:
name: "ceph-radosgw@rgw.{{ ansible_hostname }}.{{ item.instance_name }}"
state: stopped
enabled: no
failed_when: false
with_items: "{{ rgw_instances }}"
- name: remove ceph rgw service
file:
path: /etc/systemd/system/ceph-radosgw@.service
state: absent
- name: purge ceph rbd-mirror cluster
hosts: "{{ rbdmirror_group_name|default('rbdmirrors') }}"
become: true
tasks:
- name: disable ceph rbd-mirror service
service:
name: "ceph-rbd-mirror@rbd-mirror.{{ ansible_hostname }}"
state: stopped
enabled: no
ignore_errors: true
- name: remove ceph rbd-mirror service
file:
path: /etc/systemd/system/ceph-rbd-mirror@.service
state: absent
- name: purge ceph osd cluster
hosts: "{{ osd_group_name | default('osds') }}"
gather_facts: true
become: true
tasks:
- import_role:
name: ceph-defaults
- name: gather monitors facts
setup:
delegate_to: "{{ item }}"
delegate_facts: True
with_items: "{{ groups.get(mon_group_name | default('mons')) }}"
- import_role:
name: ceph-facts
- name: get all the running osds
shell: |
systemctl list-units --all | grep -oE "ceph-osd@([0-9]+).service"
register: osd_units
ignore_errors: true
- name: disable ceph osd service
service:
name: "{{ item }}"
state: stopped
enabled: no
with_items: "{{ osd_units.stdout_lines }}"
- name: remove osd mountpoint tree
file:
path: /var/lib/ceph/osd/
state: absent
ignore_errors: true
- name: default lvm_volumes if not defined
set_fact:
lvm_volumes: []
when: lvm_volumes is not defined
- name: zap and destroy osds created by ceph-volume with lvm_volumes
ceph_volume:
data: "{{ item.data }}"
data_vg: "{{ item.data_vg|default(omit) }}"
journal: "{{ item.journal|default(omit) }}"
journal_vg: "{{ item.journal_vg|default(omit) }}"
db: "{{ item.db|default(omit) }}"
db_vg: "{{ item.db_vg|default(omit) }}"
wal: "{{ item.wal|default(omit) }}"
wal_vg: "{{ item.wal_vg|default(omit) }}"
action: "zap"
environment:
CEPH_VOLUME_DEBUG: 1
CEPH_CONTAINER_IMAGE: "{{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }}"
CEPH_CONTAINER_BINARY: "{{ container_binary }}"
with_items: "{{ lvm_volumes }}"
when: lvm_volumes | default([]) | length > 0
- name: zap and destroy osds created by ceph-volume with devices
ceph_volume:
data: "{{ item }}"
action: "zap"
environment:
CEPH_VOLUME_DEBUG: 1
CEPH_CONTAINER_IMAGE: "{{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }}"
CEPH_CONTAINER_BINARY: "{{ container_binary }}"
with_items: "{{ devices | default([]) }}"
when: devices | default([]) | length > 0
- name: remove ceph osd service
file:
path: /etc/systemd/system/ceph-osd@.service
state: absent
- name: include vars from group_vars/osds.yml
include_vars:
file: "{{ item }}"
with_first_found:
- files:
- "{{ playbook_dir }}/group_vars/osds"
- "{{ playbook_dir }}/group_vars/osds.yml"
skip: true
- name: find all osd_disk_prepare logs
find:
paths: "{{ ceph_osd_docker_run_script_path | default('/usr/share') }}"
pattern: "ceph-osd-prepare-*.log"
register: osd_disk_prepare_logs
- name: ensure all osd_disk_prepare logs are removed
file:
path: "{{ item.path }}"
state: absent
with_items: "{{ osd_disk_prepare_logs.files }}"
- name: purge ceph mon cluster
hosts: "{{ mon_group_name|default('mons') }}"
become: true
tasks:
# since mgr are now collocated with mons by default
- name: disable ceph mon and mgr service
service:
name: "{{ item }}"
state: stopped
enabled: no
ignore_errors: true
with_items:
- "ceph-mgr@{{ ansible_hostname }}"
- "ceph-mon@{{ ansible_hostname }}"
- name: remove ceph mon and mgr service
file:
path: "/etc/systemd/system/ceph-{{ item }}@.service"
state: absent
with_items:
- mon
- mgr
- name: purge node-exporter
hosts:
- "{{ mon_group_name|default('mons') }}"
- "{{ osd_group_name|default('osds') }}"
- "{{ mds_group_name|default('mdss') }}"
- "{{ rgw_group_name|default('rgws') }}"
- "{{ rbdmirror_group_name|default('rbdmirrors') }}"
- "{{ nfs_group_name|default('nfss') }}"
- "{{ mgr_group_name|default('mgrs') }}"
- grafana-server
- iscsigws
- clients
gather_facts: false
become: true
tasks:
- import_role:
name: ceph-defaults
- block:
- import_role:
name: ceph-facts
tasks_from: container_binary
- name: disable node_exporter service
service:
name: node_exporter
state: stopped
enabled: no
failed_when: false
- name: remove node_exporter service file
file:
name: /etc/systemd/system/node_exporter.service
state: absent
- name: remove node-exporter image
command: "{{ container_binary }} rmi {{ node_exporter_container_image }}"
failed_when: false
tags:
- remove_img
when: dashboard_enabled | bool
- name: purge ceph-grafana
hosts: grafana-server
gather_facts: false
become: true
vars:
grafana_services:
- grafana-server
- prometheus
- alertmanager
tasks:
- import_role:
name: ceph-defaults
- block:
- import_role:
name: ceph-facts
tasks_from: container_binary
- name: stop services
service:
name: "{{ item }}"
state: stopped
enabled: no
with_items: "{{ grafana_services }}"
failed_when: false
- name: remove service files
file:
name: "/etc/systemd/system/{{ item }}.service"
state: absent
with_items: "{{ grafana_services }}"
failed_when: false
- name: remove ceph dashboard container images
command: "{{ container_binary }} rmi {{ item }}"
with_items:
- "{{ prometheus_container_image }}"
- "{{ grafana_container_image }}"
- "{{ alertmanager_container_image }}"
failed_when: false
tags:
- remove_img
- name: remove data
file:
name: "{{ item }}"
state: absent
with_items:
- /etc/grafana/grafana.ini
- /etc/grafana/provisioning
- /var/lib/grafana
- /etc/alertmanager
- /var/lib/alertmanager
- /var/lib/prometheus
- /etc/prometheus
failed_when: false
when: dashboard_enabled | bool
- name: check container hosts
hosts:
- "{{ mon_group_name|default('mons') }}"
- "{{ osd_group_name|default('osds') }}"
- "{{ mds_group_name|default('mdss') }}"
- "{{ rgw_group_name|default('rgws') }}"
- "{{ rbdmirror_group_name|default('rbdmirrors') }}"
- "{{ nfs_group_name|default('nfss') }}"
- "{{ mgr_group_name|default('mgrs') }}"
gather_facts: true
become: true
tasks:
- import_role:
name: ceph-defaults
- import_role:
name: ceph-facts
tasks_from: container_binary
- name: remove stopped/exited containers
command: >
{{ container_binary }} container prune{% if container_binary == 'docker' %} -f{% endif %}
changed_when: false
- name: show container list on all the nodes (should be empty)
command: >
{{ container_binary }} ps --filter='name=ceph' -a -q
register: containers_list
changed_when: false
- name: show container images on all the nodes (should be empty if tags was passed remove_img)
command: >
{{ container_binary }} images
register: images_list
changed_when: false
- name: fail if container are still present
fail:
msg: "It looks like container are still present."
when: containers_list.stdout_lines|length > 0
- name: final cleanup
hosts:
- "{{ mon_group_name|default('mons') }}"
- "{{ osd_group_name|default('osds') }}"
- "{{ mds_group_name|default('mdss') }}"
- "{{ rgw_group_name|default('rgws') }}"
- "{{ rbdmirror_group_name|default('rbdmirrors') }}"
- "{{ nfs_group_name|default('nfss') }}"
- "{{ mgr_group_name|default('mgrs') }}"
become: true
tags: with_pkg
tasks:
- import_role:
name: ceph-defaults
- name: check if it is Atomic host
stat: path=/run/ostree-booted
register: stat_ostree
- name: set fact for using Atomic host
set_fact:
is_atomic: "{{ stat_ostree.stat.exists }}"
- import_role:
name: ceph-facts
tasks_from: container_binary
- name: remove ceph container image
command: "{{ container_binary }} rmi {{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }}"
tags:
- remove_img
- name: stop docker service
service:
name: docker
state: stopped
enabled: no
when:
- not is_atomic
- container_binary == 'docker'
ignore_errors: true
- name: remove docker on debian/ubuntu
apt:
name: ['docker-ce', 'docker-engine', 'docker.io', 'python-docker', 'python3-docker']
state: absent
update_cache: yes
autoremove: yes
when: ansible_os_family == 'Debian'
- name: red hat based systems tasks
block:
- name: yum related tasks on red hat
block:
- name: remove packages on redhat
yum:
name: ['epel-release', 'docker', 'python-docker-py']
state: absent
- name: remove package dependencies on redhat
command: yum -y autoremove
args:
warn: no
- name: remove package dependencies on redhat again
command: yum -y autoremove
args:
warn: no
when:
ansible_pkg_mgr == "yum"
- name: dnf related tasks on red hat
block:
- name: remove docker on redhat
dnf:
name: ['docker', 'python3-docker']
state: absent
- name: remove package dependencies on redhat
command: dnf -y autoremove
args:
warn: no
- name: remove package dependencies on redhat again
command: dnf -y autoremove
args:
warn: no
when:
ansible_pkg_mgr == "dnf"
when:
ansible_os_family == 'RedHat' and
not is_atomic
- name: purge ceph directories
hosts:
- "{{ mon_group_name|default('mons') }}"
- "{{ osd_group_name|default('osds') }}"
- "{{ mds_group_name|default('mdss') }}"
- "{{ rgw_group_name|default('rgws') }}"
- "{{ rbdmirror_group_name|default('rbdmirrors') }}"
- "{{ nfs_group_name|default('nfss') }}"
- "{{ mgr_group_name|default('mgrs') }}"
gather_facts: false # Already gathered previously
become: true
tasks:
- name: purge ceph directories for "{{ ansible_hostname }}"
file:
path: "{{ item }}"
state: absent
with_items:
- /etc/ceph
- /var/log/ceph
- name: remove ceph data
shell: rm -rf /var/lib/ceph/*
# (todo): remove this when we are able to manage docker
# service on atomic host.
- name: remove docker data
shell: rm -rf /var/lib/docker/*
when: not is_atomic | bool
- name: purge fetch directory
hosts: localhost
gather_facts: false
tasks:
- name: set fetch_directory value if not set
set_fact:
fetch_directory: "fetch/"
when: fetch_directory is not defined
- name: purge fetch directory for localhost
file:
path: "{{ fetch_directory }}/"
state: absent