ceph-ansible/infrastructure-playbooks/purge-cluster.yml

712 lines
20 KiB
YAML
Raw Normal View History

---
# This playbook purges Ceph
# It removes: packages, configuration files and ALL THE DATA
#
# Use it like this:
# ansible-playbook purge-cluster.yml
# Prompts for confirmation to purge, defaults to no and
# doesn't purge the cluster. yes purges the cluster.
#
# ansible-playbook -e ireallymeanit=yes|no purge-cluster.yml
# Overrides the prompt using -e option. Can be used in
# automation scripts to avoid interactive prompt.
- name: confirm whether user really meant to purge the cluster
hosts: localhost
gather_facts: false
vars_prompt:
- name: ireallymeanit
prompt: Are you sure you want to purge the cluster?
default: 'no'
private: no
tasks:
- name: exit playbook, if user did not mean to purge cluster
fail:
msg: >
"Exiting purge-cluster playbook, cluster was NOT purged.
To purge the cluster, either say 'yes' on the prompt or
or use `-e ireallymeanit=yes` on the command line when
invoking the playbook"
when: ireallymeanit != 'yes'
- name: gather facts on all hosts
hosts:
- "{{ mon_group_name|default('mons') }}"
- "{{ osd_group_name|default('osds') }}"
- "{{ mds_group_name|default('mdss') }}"
- "{{ rgw_group_name|default('rgws') }}"
- "{{ rbdmirror_group_name|default('rbdmirrors') }}"
- "{{ nfs_group_name|default('nfss') }}"
- "{{ client_group_name|default('clients') }}"
- "{{ mgr_group_name|default('mgrs') }}"
become: true
tasks:
- debug: msg="gather facts on all Ceph hosts for following reference"
- name: purge ceph mds cluster
vars:
mds_group_name: mdss
hosts:
- "{{ mds_group_name|default('mdss') }}"
gather_facts: false # Already gathered previously
become: true
tasks:
2016-04-07 03:58:17 +08:00
- name: stop ceph mdss with systemd
service:
name: ceph-mds@{{ ansible_hostname }}
state: stopped
enabled: no
when: ansible_service_mgr == 'systemd'
- name: stop ceph mdss
shell: "service ceph status mds ; if [ $? == 0 ] ; then service ceph stop mds ; else echo ; fi"
when: ansible_service_mgr == 'sysvinit'
- name: stop ceph mdss on ubuntu
command: initctl stop ceph-mds cluster={{ cluster }} id={{ ansible_hostname }}
failed_when: false
when: ansible_service_mgr == 'upstart'
- name: purge ceph mgr cluster
vars:
mgr_group_name: mgrs
hosts:
- "{{ mgr_group_name|default('mgrs') }}"
gather_facts: false # Already gathered previously
become: true
tasks:
- name: stop ceph mgrs with systemd
service:
name: ceph-mgr@{{ ansible_hostname}}
state: stopped
enabled: no
when: ansible_service_mgr == 'systemd'
- name: purge ceph rgw cluster
vars:
rgw_group_name: rgws
hosts:
- "{{ rgw_group_name|default('rgws') }}"
gather_facts: false # Already gathered previously
become: true
tasks:
- name: stop ceph rgws with systemd
service:
name: ceph-radosgw@rgw.{{ ansible_hostname }}
state: stopped
enabled: no
when: ansible_service_mgr == 'systemd'
- name: stop ceph rgws
shell: "service ceph-radosgw status ; if [ $? == 0 ] ; then service ceph-radosgw stop ; else echo ; fi"
when: ansible_service_mgr == 'sysvinit'
- name: stop ceph rgws on ubuntu
command: initctl stop radosgw cluster={{ cluster }} id={{ ansible_hostname }}
failed_when: false
when: ansible_service_mgr == 'upstart'
- name: purge ceph rbd-mirror cluster
vars:
rbdmirror_group_name: rbd-mirrors
hosts:
- "{{ rbdmirror_group_name|default('rbdmirrors') }}"
gather_facts: false # Already gathered previously
become: true
tasks:
- name: stop ceph rbd mirror with systemd
service:
name: ceph-rbd-mirror@admin.service
state: stopped
when: ansible_service_mgr == 'systemd'
2016-04-07 03:58:17 +08:00
- name: stop ceph rbd mirror on ubuntu
command: initctl stop ceph-rbd-mirror cluster={{ cluster }} id=admin
failed_when: false
when: ansible_service_mgr == 'upstart'
- name: purge ceph nfs cluster
vars:
nfs_group_name: nfss
hosts:
- "{{ nfs_group_name|default('nfss') }}"
gather_facts: false # Already gathered previously
become: true
tasks:
- name: stop ceph nfss with systemd
service:
name: nfs-ganesha
state: stopped
when: ansible_service_mgr == 'systemd'
- name: stop ceph nfss
shell: "service nfs-ganesha status ; if [ $? == 0 ] ; then service nfs-ganesha stop ; else echo ; fi"
when: ansible_service_mgr == 'sysvinit'
- name: stop ceph nfss on ubuntu
command: initctl stop nfs-ganesha
failed_when: false
when: ansible_service_mgr == 'upstart'
- name: purge ceph osd cluster
vars:
osd_group_name: osds
hosts:
- "{{ osd_group_name|default('osds') }}"
gather_facts: false # Already gathered previously
become: true
handlers:
- name: restart machine
shell: sleep 2 && shutdown -r now "Ansible updates triggered"
async: 1
poll: 0
ignore_errors: true
- name: wait for server to boot
become: false
local_action: wait_for port=22 host={{ inventory_hostname }} state=started delay=10 timeout=500
- name: remove data
file:
path: /var/lib/ceph
state: absent
tasks:
- name: default lvm_volumes if not defined
set_fact:
lvm_volumes: []
when: lvm_volumes is not defined
- name: get osd numbers
shell: "if [ -d /var/lib/ceph/osd ] ; then ls /var/lib/ceph/osd | sed 's/.*-//' ; fi"
register: osd_ids
changed_when: false
- name: stop ceph-osd with systemd
service:
name: ceph-osd@{{item}}
state: stopped
enabled: no
with_items: "{{ osd_ids.stdout_lines }}"
when: ansible_service_mgr == 'systemd'
# before infernalis release, using sysvinit scripts
# we use this test so we do not have to know which RPM contains the boot script
# or where it is placed.
- name: stop ceph osds
shell: "service ceph status osd ; if [ $? == 0 ] ; then service ceph stop osd ; else echo ; fi"
when: ansible_service_mgr == 'sysvinit'
- name: stop ceph osds on ubuntu
command: initctl stop ceph-osd cluster={{ cluster }} id={{ item }}
failed_when: false
when: ansible_service_mgr == 'upstart'
with_items: "{{ osd_ids.stdout_lines }}"
- name: remove ceph udev rules
file:
path: "{{ item }}"
state: absent
with_items:
- /usr/lib/udev/rules.d/95-ceph-osd.rules
- /usr/lib/udev/rules.d/60-ceph-by-parttypeuuid.rules
2016-04-07 03:58:17 +08:00
- name: see if ceph-disk-created data partitions are present
shell: |
ls /dev/disk/by-partlabel | grep -q "ceph.*.data"
2016-04-07 03:58:17 +08:00
failed_when: false
register: ceph_data_partlabels
- name: see if ceph-disk-created block partitions are present
shell: |
ls /dev/disk/by-partlabel | grep -q "ceph.*block$"
failed_when: false
register: ceph_block_partlabels
2016-04-07 03:58:17 +08:00
- name: see if ceph-disk-created journal partitions are present
shell: |
ls /dev/disk/by-partlabel | grep -q "ceph.*.journal"
2016-04-07 03:58:17 +08:00
failed_when: false
register: ceph_journal_partlabels
- name: see if ceph-disk-created block db partitions are present
shell: |
ls /dev/disk/by-partlabel | grep -q "ceph.*.block.db"
failed_when: false
register: ceph_db_partlabels
- name: see if ceph-disk-created block wal partitions are present
shell: |
ls /dev/disk/by-partlabel | grep -q "ceph.*.block.wal"
failed_when: false
register: ceph_wal_partlabels
- name: see if ceph-disk-created lockbox partitions are present
shell: |
ls /dev/disk/by-partlabel | grep -q "ceph.*.lockbox"
failed_when: false
register: ceph_lockbox_partlabels
# Initial attempt, doing everything in Ansible...
# - name: see if encrypted partitions are present
# shell: blkid -t TYPE=crypto_LUKS -o value -s PARTUUID
# register: encrypted_partuuid
#
# - name: find if these encrypted partitions are ceph data partitions
# shell: blkid -t PARTLABEL="ceph data" -o value -s PARTUUID $(blkid -U {{ item }})
# failed_when: false
# with_items: "{{ encrypted_partuuid.stdout_lines }}"
# when: "{{ encrypted_partuuid | length > 0 }}"
# register: encrypted_partuuid_ceph_data
#
# - name: find if these encrypted partitions are ceph journal partitions
# shell: blkid -t PARTLABEL="ceph journal" -o value -s PARTUUID $(blkid -U {{ item }})
# failed_when: false
# with_items: "{{ encrypted_partuuid.stdout_lines }}"
# when: "{{ encrypted_partuuid | length > 0 }}"
# register: encrypted_partuuid_ceph_journal
#
# - name: merge the list of ceph encrypted partitions
# set_fact:
# encrypted_partuuid_ceph: "{{ encrypted_partuuid_ceph_data + encrypted_partuuid_ceph_journal }}"
# NOTE(leseb): hope someone will find a more elegant way one day...
- name: see if encrypted partitions are present
shell: |
blkid -t TYPE=crypto_LUKS -s PARTLABEL -s PARTUUID | grep "ceph.*." | grep -o PARTUUID.* | cut -d '"' -f 2
register: encrypted_ceph_partuuid
- name: get osd data and lockbox mount points
shell: "(grep /var/lib/ceph /proc/mounts || echo -n) | awk '{ print $2 }'"
register: mounted_osd
changed_when: false
- name: drop all cache
shell: "sync && sleep 1 && echo 3 > /proc/sys/vm/drop_caches"
- name: umount osd data partition
shell: umount {{ item }}
with_items: "{{ mounted_osd.stdout_lines }}"
- name: remove osd mountpoint tree
file:
path: /var/lib/ceph/osd/
state: absent
register: remove_osd_mountpoints
ignore_errors: true
- name: is reboot needed
local_action: shell echo requesting reboot
become: false
notify:
- restart machine
- wait for server to boot
- remove data
when: remove_osd_mountpoints.failed is defined
2016-04-07 03:58:17 +08:00
- name: see if ceph-disk is installed
shell: "which ceph-disk"
failed_when: false
register: ceph_disk_present
- name: delete dm-crypt devices if any
command: dmsetup remove --retry --force {{ item }}
with_items: "{{ encrypted_ceph_partuuid.stdout_lines }}"
when: "{{ encrypted_ceph_partuuid.stdout_lines | length > 0 }}"
- name: get ceph data partitions
shell: |
blkid | awk -F: '/ceph data/ { print $1 }'
when: ceph_data_partlabels.rc == 0
failed_when: false
register: ceph_data_partition_to_erase_path
- name: get ceph lockbox partitions
shell: |
blkid | awk '/ceph lockbox/ { sub (":", "", $1); print $1 }'
when: ceph_lockbox_partlabels.rc == 0
failed_when: false
register: ceph_lockbox_partition_to_erase_path
- name: zap osd disks
shell: |
if (echo "{{ item }}" | grep -Esq '[0-9]{1,2}$'); then
raw_device=$(echo "{{ item }}" | grep -Eo '/dev/([hsv]d[a-z]{1,2}|cciss/c[0-9]d[0-9]|nvme[0-9]n[0-9]){1,2}')
partition_nb=$(echo "{{ item }}" | grep -Eo '[0-9]{1,2}$')
sgdisk --delete $partition_nb $raw_device
udevadm settle --timeout=600
else
ceph-disk zap "{{ item }}"
udevadm settle --timeout=600
fi
with_items:
- "{{ ceph_data_partition_to_erase_path.stdout_lines | default([]) }}"
- "{{ ceph_lockbox_partition_to_erase_path.stdout_lines | default([]) }}"
when:
- ceph_disk_present.rc == 0
- ceph_data_partlabels.rc == 0
# this should go away once 'ceph-volume lvm zap' is available
- name: remove osd logical volumes
command: "lvremove -f {{ item.data_vg }}/{{ item.data }}"
with_items: "{{ lvm_volumes }}"
when:
- osd_scenario == "lvm"
# this should go away once 'ceph-volume lvm zap' is available
- name: remove osd lvm journals
command: "lvremove -f {{ item.journal_vg }}/{{ item.journal }}"
with_items: "{{ lvm_volumes }}"
# journals might be logical volumes, but they could also be
# devices so fail silently if this doesn't work
failed_when: false
when:
- osd_scenario == "lvm"
- item.journal_vg is defined
- name: get ceph block partitions
shell: |
blkid | awk '/ceph block"/ { sub (":", "", $1); print $1 }'
when: ceph_block_partlabels.rc == 0
failed_when: false
register: ceph_block_partition_to_erase_path
- name: get ceph journal partitions
shell: |
blkid | awk '/ceph journal/ { sub (":", "", $1); print $1 }'
when: ceph_journal_partlabels.rc == 0
failed_when: false
register: ceph_journal_partition_to_erase_path
- name: get ceph db partitions
shell: |
blkid | awk '/ceph block.db/ { sub (":", "", $1); print $1 }'
when: ceph_db_partlabels.rc == 0
failed_when: false
register: ceph_db_partition_to_erase_path
- name: get ceph wal partitions
shell: |
blkid | awk '/ceph block.wal/ { sub (":", "", $1); print $1 }'
when: ceph_wal_partlabels.rc == 0
failed_when: false
register: ceph_wal_partition_to_erase_path
- name: zap ceph journal/block db/block wal partitions
shell: |
# if the disk passed is a raw device AND the boot system disk
if echo "{{ item }}" | egrep -sq '/dev/([hsv]d[a-z]{1,2}|cciss/c[0-9]d[0-9]p|nvme[0-9]n[0-9]p){1,2}$' && parted -s $(echo "{{ item }}" | egrep -o '/dev/([hsv]d[a-z]{1,2}|cciss/c[0-9]d[0-9]p|nvme[0-9]n[0-9]p){1,2}') print | grep -sq boot; then
echo "Looks like {{ item }} has a boot partition,"
echo "if you want to delete specific partitions point to the partition instead of the raw device"
echo "Do not use your system disk!"
exit 1
fi
raw_device=$(echo "{{ item }}" | egrep -o '/dev/([hsv]d[a-z]{1,2}|cciss/c[0-9]d[0-9]|nvme[0-9]n[0-9]){1,2}')
partition_nb=$(echo "{{ item }}" | egrep -o '[0-9]{1,2}$')
sgdisk --delete $partition_nb $raw_device
with_items:
- "{{ ceph_block_partition_to_erase_path.stdout_lines | default([]) }}"
- "{{ ceph_journal_partition_to_erase_path.stdout_lines | default([]) }}"
- "{{ ceph_db_partition_to_erase_path.stdout_lines | default([]) }}"
- "{{ ceph_wal_partition_to_erase_path.stdout_lines | default([]) }}"
when:
- (ceph_block_partlabels.rc == 0 or ceph_journal_partlabels.rc == 0 or ceph_db_partlabels.rc == 0 or ceph_wal_partlabels.rc == 0)
- osd_scenario == 'non-collocated'
- name: purge ceph mon cluster
vars:
mon_group_name: mons
restapi_group_name: restapis
hosts:
- "{{ mon_group_name|default('mons') }}"
gather_facts: false # already gathered previously
become: true
tasks:
- name: stop ceph mons with systemd
service:
name: ceph-mon@{{ ansible_hostname }}
state: stopped
enabled: no
when: ansible_service_mgr == 'systemd'
- name: stop ceph mons
shell: "service ceph status mon ; if [ $? == 0 ] ; then service ceph stop mon ; else echo ; fi"
when: ansible_service_mgr == 'sysvinit'
- name: stop ceph mons on ubuntu
command: initctl stop ceph-mon cluster={{ cluster }} id={{ ansible_hostname }}
failed_when: false
when: ansible_service_mgr == 'upstart'
- name: remove monitor store and bootstrap keys
file:
path: "{{ item }}"
state: absent
with_items:
- /var/lib/ceph/mon
- /var/lib/ceph/bootstrap-mds
- /var/lib/ceph/bootstrap-osd
- /var/lib/ceph/bootstrap-rgw
- /var/lib/ceph/bootstrap-rbd
- /var/lib/ceph/bootstrap-mgr
- /var/lib/ceph/tmp
- name: purge iscsi gateway(s)
vars:
igw_purge_type: all
hosts:
- "{{ iscsi_gw_group_name|default('iscsi-gws') }}"
gather_facts: false # already gathered previously
become: true
tasks:
- name: igw_purge | purging the gateway configuration
igw_purge:
mode: "gateway"
- name: igw_purge | deleting configured rbd devices
igw_purge:
mode: "disks"
when:
- igw_purge_type == 'all'
- name: restart rbd-target-gw daemons
service:
name: rbd-target-gw
state: restarted
when:
- ansible_service_mgr == 'systemd'
- name: final cleanup - check any running ceph, purge ceph packages, purge config and remove data
vars:
# When set to true both groups of packages are purged.
# This can cause problem with qemu-kvm
purge_all_packages: true
ceph_packages:
- ceph
- ceph-common
- ceph-fs-common
- ceph-fuse
- ceph-mds
- ceph-mgr
- ceph-release
- ceph-radosgw
- calamari-server
ceph_remaining_packages:
- libcephfs1
- libcephfs2
- librados2
- libradosstriper1
- librbd1
- python-cephfs
- python-rados
- python-rbd
hosts:
- "{{ mon_group_name|default('mons') }}"
- "{{ osd_group_name|default('osds') }}"
- "{{ mds_group_name|default('mdss') }}"
- "{{ rgw_group_name|default('rgws') }}"
- "{{ rbdmirror_group_name|default('rbdmirrors') }}"
- "{{ nfs_group_name|default('nfss') }}"
- "{{ client_group_name|default('clients') }}"
- "{{ mgr_group_name|default('mgrs') }}"
gather_facts: false # Already gathered previously
become: true
handlers:
- name: get osd data and lockbox mount points
shell: "(grep /var/lib/ceph /proc/mounts || echo -n) | awk '{ print $2 }'"
register: mounted_osd
changed_when: false
listen: "remove data"
- name: umount osd data partition
shell: umount {{ item }}
with_items: "{{ mounted_osd.stdout_lines }}"
listen: "remove data"
- name: remove data
file:
path: /var/lib/ceph
state: absent
listen: "remove data"
tasks:
- name: purge ceph packages with yum
yum:
name: "{{ item }}"
state: absent
with_items: "{{ ceph_packages }}"
when: ansible_pkg_mgr == 'yum'
- name: purge ceph packages with dnf
dnf:
name: "{{ item }}"
state: absent
with_items: "{{ ceph_packages }}"
when: ansible_pkg_mgr == 'dnf'
- name: purge ceph packages with apt
apt:
name: "{{ item }}"
state: absent
with_items: "{{ ceph_packages }}"
when: ansible_pkg_mgr == 'apt'
- name: purge remaining ceph packages with yum
yum:
name: "{{ item }}"
state: absent
with_items: "{{ ceph_remaining_packages }}"
when:
- ansible_pkg_mgr == 'yum'
- purge_all_packages == true
- name: purge remaining ceph packages with dnf
dnf:
name: "{{ item }}"
state: absent
with_items: "{{ ceph_remaining_packages }}"
when:
- ansible_pkg_mgr == 'dnf'
- purge_all_packages == true
- name: purge remaining ceph packages with apt
apt:
name: "{{ item }}"
state: absent
with_items: "{{ ceph_remaining_packages }}"
when:
- ansible_pkg_mgr == 'apt'
- purge_all_packages == true
- name: remove config
file:
path: /etc/ceph
state: absent
- name: remove logs
file:
path: /var/log/ceph
state: absent
- name: request data removal
local_action: shell echo requesting data removal
become: false
notify:
- remove data
- name: purge dnf cache
command: dnf clean all
when: ansible_pkg_mgr == 'dnf'
- name: purge rpm cache in /tmp
2016-04-28 01:36:32 +08:00
file:
path: /tmp/rh-storage-repo
state: absent
- name: clean apt
shell: apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
when: ansible_pkg_mgr == 'apt'
- name: purge rh_storage.repo file in /etc/yum.repos.d
file:
path: /etc/yum.repos.d/rh_storage.repo
state: absent
when: ansible_os_family == 'RedHat'
- name: check for anything running ceph
command: "ps -u ceph -U ceph"
register: check_for_running_ceph
failed_when: check_for_running_ceph.rc == 0
- name: remove ceph systemd unit files
shell: rm -rf /etc/systemd/system/ceph*
changed_when: false
when: ansible_service_mgr == 'systemd'
- name: purge fetch directory
hosts:
- localhost
gather_facts: false
tasks:
- name: set fetch_directory value if not set
set_fact:
fetch_directory: "fetch/"
when: fetch_directory is not defined
- name: purge fetch directory for localhost
file:
path: "{{ fetch_directory }}"
state: absent