ceph-ansible/infrastructure-playbooks/filestore-to-bluestore.yml

394 lines
17 KiB
YAML

# This playbook migrates an OSD from filestore to bluestore backend.
#
# Use it like this:
# ansible-playbook infrastructure-playbooks/filestore-to-bluestore.yml --limit <osd-node-to-migrate>
# If all osds on the node are using filestore backend, then *ALL* of them will be shrinked and redeployed using bluestore backend with ceph-volume.
#
# If a mix of filestore and bluestore OSDs is detected on the node, the node will be skipped unless you pass `force_filestore_to_bluestore=True` as an extra var.
# ie: ansible-playbook infrastructure-playbooks/filestore-to-bluestore.yml --limit <osd-node-to-migrate> -e force_filestore_to_bluestore=True
- hosts: "{{ osd_group_name }}"
become: true
serial: 1
vars:
delegate_facts_host: true
tasks:
- name: gather and delegate facts
setup:
gather_subset:
- 'all'
- '!facter'
- '!ohai'
delegate_to: "{{ item }}"
delegate_facts: True
with_items: "{{ groups[mon_group_name] }}"
run_once: true
when: delegate_facts_host | bool
- import_role:
name: ceph-defaults
- name: import_role ceph-facts
import_role:
name: ceph-facts
tasks_from: container_binary.yml
- name: set_fact container_run_cmd, container_exec_cmd
set_fact:
container_run_cmd: "{{ container_binary + ' run --rm --privileged=true --net=host --pid=host --ipc=host -v /dev:/dev -v /etc/ceph:/etc/ceph -v /var/lib/ceph:/var/lib/ceph -v /var/run:/var/run --entrypoint=' if containerized_deployment | bool else '' }}ceph-volume {{ ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment | bool else '' }}"
container_exec_cmd: "{{ container_binary + ' exec ceph-mon-' + hostvars[groups[mon_group_name][0]]['ansible_facts']['hostname'] if containerized_deployment | bool else '' }}"
- name: get ceph osd tree data
command: "{{ container_exec_cmd }} ceph --cluster {{ cluster }} osd tree -f json"
delegate_to: "{{ groups[mon_group_name][0] }}"
register: osd_tree
changed_when: false
run_once: true
- name: set_fact osd_ids
set_fact:
osd_ids: "{{ osd_ids | default([]) | union(item) }}"
with_items:
- "{{ ((osd_tree.stdout | default('{}') | trim | from_json).nodes | selectattr('name', 'match', '^' + inventory_hostname + '$') | map(attribute='children') | list) }}"
- name: get osd metadata
command: "{{ container_exec_cmd }} ceph --cluster {{ cluster }} osd metadata osd.{{ item }} -f json"
register: osd_metadata
delegate_to: "{{ groups[mon_group_name][0] }}"
run_once: true
changed_when: false
with_items: "{{ osd_ids }}"
- name: set_fact _osd_objectstore
set_fact:
_osd_objectstore: "{{ _osd_objectstore | default([]) | union([(item.stdout | from_json).osd_objectstore]) }}"
with_items: "{{ osd_metadata.results }}"
- name: set_fact skip_this_node
set_fact:
skip_this_node: "{{ ('filestore' in _osd_objectstore and 'bluestore' in _osd_objectstore and not force_filestore_to_bluestore | default(False)) or ('filestore' not in _osd_objectstore) }}"
- name: filestore to bluestore migration workflow
when: not skip_this_node | bool
block:
- name: get ceph-volume lvm inventory data
command: "{{ container_run_cmd }} --cluster {{ cluster }} inventory --format json"
register: ceph_volume_inventory
- name: set_fact inventory
set_fact:
inventory: "{{ ceph_volume_inventory.stdout | from_json }}"
- name: set_fact ceph_disk_osds
set_fact:
ceph_disk_osds_devices: "{{ ceph_disk_osds_devices | default([]) + [item.path] }}"
with_items: "{{ inventory }}"
when:
- not item.available | bool
- "'Used by ceph-disk' in item.rejected_reasons"
- name: ceph-disk prepared OSDs related tasks
when: ceph_disk_osds_devices | default([]) | length > 0
block:
- name: get partlabel
command: blkid "{{ item + 'p' if item is match('/dev/(cciss/c[0-9]d[0-9]|nvme[0-9]n[0-9]){1,2}$') else item }}"1 -s PARTLABEL -o value
register: partlabel
with_items: "{{ ceph_disk_osds_devices | default([]) }}"
- name: get simple scan data
command: "{{ container_run_cmd }} --cluster {{ cluster }} simple scan {{ item.item + 'p1' if item.item is match('/dev/(cciss/c[0-9]d[0-9]|nvme[0-9]n[0-9]){1,2}$') else item.item + '1' }} --stdout"
register: simple_scan
with_items: "{{ partlabel.results | default([]) }}"
when: item.stdout == 'ceph data'
ignore_errors: true
- name: mark out osds
command: "{{ container_exec_cmd }} ceph --cluster {{ cluster }} osd out {{ (item.0.stdout | from_json).whoami }}"
with_together:
- "{{ simple_scan.results }}"
- "{{ partlabel.results }}"
delegate_to: "{{ groups[mon_group_name][0] }}"
run_once: true
when: item.1.stdout == 'ceph data'
- name: stop and disable old osd services
service:
name: "ceph-osd@{{ (item.0.stdout | from_json).whoami }}"
state: stopped
enabled: no
with_together:
- "{{ simple_scan.results }}"
- "{{ partlabel.results }}"
when: item.1.stdout == 'ceph data'
- name: umount osd data
mount:
path: "/var/lib/ceph/osd/{{ cluster }}-{{ (item.0.stdout | from_json).whoami }}"
state: unmounted
with_together:
- "{{ simple_scan.results }}"
- "{{ partlabel.results }}"
when: item.1.stdout == 'ceph data'
- name: umount osd lockbox
mount:
path: "/var/lib/ceph/osd-lockbox/{{ (item.0.stdout | from_json).data.uuid }}"
state: unmounted
with_together:
- "{{ simple_scan.results }}"
- "{{ partlabel.results }}"
when:
- item.1.stdout == 'ceph data'
- (item.0.stdout | from_json).encrypted | default(False) | bool
- name: ensure dmcrypt for data device is closed
command: cryptsetup close "{{ (item.0.stdout | from_json).data.uuid }}"
with_together:
- "{{ simple_scan.results }}"
- "{{ partlabel.results }}"
failed_when: false
changed_when: false
when:
- item.1.stdout == 'ceph data'
- (item.0.stdout | from_json).encrypted | default(False) | bool
- name: ensure dmcrypt for journal device is closed
command: cryptsetup close "{{ (item.0.stdout | from_json).journal.uuid }}"
with_together:
- "{{ simple_scan.results }}"
- "{{ partlabel.results }}"
failed_when: false
changed_when: false
when:
- item.1.stdout == 'ceph data'
- (item.0.stdout | from_json).encrypted | default(False) | bool
- name: zap data devices
command: "{{ container_run_cmd }} --cluster {{ cluster }} lvm zap --destroy {{ (item.0.stdout | from_json).data.path }}"
with_together:
- "{{ simple_scan.results }}"
- "{{ partlabel.results }}"
when: item.1.stdout == 'ceph data'
- name: zap journal devices
command: "{{ container_run_cmd }} --cluster {{ cluster }} lvm zap --destroy {{ (item.0.stdout | from_json).journal.path }}"
with_together:
- "{{ simple_scan.results }}"
- "{{ partlabel.results }}"
when:
- item.1.stdout == 'ceph data'
- (item.0.stdout | from_json).journal.path is defined
- name: get ceph-volume lvm list data
command: "{{ container_run_cmd }} --cluster {{ cluster }} lvm list --format json"
register: ceph_volume_lvm_list
- name: set_fact _lvm_list
set_fact:
_lvm_list: "{{ _lvm_list | default([]) + item.value }}"
with_dict: "{{ (ceph_volume_lvm_list.stdout | default('{}') | from_json) }}"
- name: ceph-volume prepared OSDs related tasks
block:
- name: mark out osds
command: "{{ container_exec_cmd }} ceph --cluster {{ cluster }} osd out {{ item }}"
with_items: "{{ (ceph_volume_lvm_list.stdout | default('{}') | from_json).keys() | list }}"
delegate_to: "{{ groups[mon_group_name][0] }}"
run_once: true
- name: stop and disable old osd services
service:
name: "ceph-osd@{{ item }}"
state: stopped
enabled: no
with_items: "{{ (ceph_volume_lvm_list.stdout | default('{}') | from_json).keys() | list }}"
- name: stop and disable ceph-volume services
service:
name: "ceph-volume@lvm-{{ item.tags['ceph.osd_id'] }}-{{ item.tags['ceph.osd_fsid'] }}"
state: stopped
enabled: no
with_items: "{{ _lvm_list }}"
when:
- not containerized_deployment | bool
- item.type == 'data'
- name: mark down osds
command: "{{ container_exec_cmd }} ceph --cluster {{ cluster }} osd down {{ item }}"
with_items: "{{ (ceph_volume_lvm_list.stdout | default('{}') | from_json).keys() | list }}"
delegate_to: "{{ groups[mon_group_name][0] }}"
run_once: true
- name: ensure all dmcrypt for data and journal are closed
command: cryptsetup close "{{ item['lv_uuid'] }}"
with_items: "{{ _lvm_list }}"
changed_when: false
failed_when: false
when: item['tags'].get('ceph.encrypted', 0) | int == 1
- name: set_fact osd_fsid_list
set_fact:
osd_fsid_list: "{{ osd_fsid_list | default([]) + [{'osd_fsid': item.tags['ceph.osd_fsid'], 'destroy': (item.lv_name.startswith('osd-data-') and item.vg_name.startswith('ceph-')) | ternary(true, false), 'device': item.devices[0], 'journal': item['tags']['ceph.journal_device'] }] }}"
with_items: "{{ _lvm_list }}"
when: item.type == 'data'
- name: zap ceph-volume prepared OSDs
ceph_volume:
action: "zap"
osd_fsid: "{{ item.osd_fsid }}"
destroy: false
environment:
CEPH_VOLUME_DEBUG: "{{ ceph_volume_debug }}"
CEPH_CONTAINER_IMAGE: "{{ ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment else None }}"
CEPH_CONTAINER_BINARY: "{{ container_binary }}"
loop: "{{ osd_fsid_list }}"
when: osd_fsid_list is defined
- name: zap destroy ceph-volume prepared devices
ceph_volume:
action: "zap"
data: "{{ item.device }}"
destroy: true
environment:
CEPH_VOLUME_DEBUG: "{{ ceph_volume_debug }}"
CEPH_CONTAINER_IMAGE: "{{ ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment else None }}"
CEPH_CONTAINER_BINARY: "{{ container_binary }}"
loop: "{{ osd_fsid_list }}"
when:
- osd_fsid_list is defined
- item.destroy | bool
- name: test if the journal device hasn't been already destroyed because of collocation
stat:
path: "{{ item.journal }}"
loop: "{{ osd_fsid_list }}"
register: journal_path
when:
- osd_fsid_list is defined
- item.destroy | bool
- item.journal is defined
- item.journal not in (lvm_volumes | selectattr('journal', 'defined') | map(attribute='journal') | list)
- name: zap destroy ceph-volume prepared journal devices
ceph_volume:
action: "zap"
data: "{{ item.0.journal }}"
destroy: true
environment:
CEPH_VOLUME_DEBUG: "{{ ceph_volume_debug }}"
CEPH_CONTAINER_IMAGE: "{{ ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment else None }}"
CEPH_CONTAINER_BINARY: "{{ container_binary }}"
loop: "{{ osd_fsid_list | zip(journal_path.results) | list }}"
when:
- osd_fsid_list is defined
- item.0.destroy | bool
- item.0.journal is defined
- item.0.journal not in (lvm_volumes | selectattr('journal', 'defined') | map(attribute='journal') | list)
- item.1.stat.exists | bool
- name: ensure all dm are closed
command: dmsetup remove "{{ item['lv_path'] }}"
with_items: "{{ _lvm_list }}"
changed_when: false
failed_when: false
when:
- item['lv_path'] is defined
# Do not close mappers for non 'lvm batch' devices
- devices | default([]) | length > 0
- name: ensure all pv are removed
command: "pvremove --yes {{ item.devices[0] }}"
with_items: "{{ _lvm_list }}"
failed_when: false
when:
- item.type == 'data'
- item.lv_name.startswith('osd-data-') | bool
- item.vg_name.startswith('ceph-') | bool
when: _lvm_list is defined
- name: set_fact osd_ids
set_fact:
osd_ids: "{{ osd_ids | default([]) + [item] }}"
with_items:
- "{{ ((osd_tree.stdout | default('{}') | from_json).nodes | selectattr('name', 'match', '^' + inventory_hostname + '$') | map(attribute='children') | list) }}"
- name: purge osd(s) from the cluster
command: >
{{ container_exec_cmd }} ceph --cluster {{ cluster }} osd purge {{ item }} --yes-i-really-mean-it
run_once: true
delegate_to: "{{ groups[mon_group_name][0] }}"
with_items: "{{ osd_ids }}"
- name: purge /var/lib/ceph/osd directories
file:
path: "/var/lib/ceph/osd/{{ cluster }}-{{ item }}"
state: absent
with_items: "{{ osd_ids }}"
- name: force osd_objectstore to bluestore
set_fact:
osd_objectstore: bluestore
- name: refresh ansible devices fact
setup:
gather_subset:
- 'all'
- '!facter'
- '!ohai'
filter: ansible_devices
when: osd_auto_discovery | bool
- import_role:
name: ceph-defaults
- import_role:
name: ceph-facts
- name: remove gpt header
command: parted -s "{{ item }}" mklabel msdos
with_items: "{{ (devices + dedicated_devices | default([]) + ceph_disk_osds_devices | default([])) | unique }}"
- name: update lvm_volumes configuration for bluestore
when:
- lvm_volumes | length > 0
- not osd_auto_discovery | bool
block:
- name: reuse filestore journal partition for bluestore db
set_fact:
config_part: "{{ config_part | default([]) + [item | combine({'db': item.journal})] }}"
with_items: "{{ lvm_volumes | selectattr('journal_vg', 'undefined') | list }}"
- name: reuse filestore journal vg/lv for bluestore db
set_fact:
config_vglv: "{{ config_vglv | default([]) + [item | combine({'db': item.journal, 'db_vg': item.journal_vg})] }}"
with_items: "{{ lvm_volumes | selectattr('journal_vg', 'defined') | list }}"
- name: override lvm_volumes with bluestore configuration
set_fact:
lvm_volumes: "{{ config_part | default([]) + config_vglv | default([]) }}"
- import_role:
name: ceph-handler
- import_role:
name: ceph-container-common
when: containerized_deployment | bool
- import_role:
name: ceph-config
- import_role:
name: ceph-osd
- name: final play
hosts: "{{ osd_group_name }}"
become: true
gather_facts: false
tasks:
- import_role:
name: ceph-defaults
- name: report any skipped node during this playbook
debug:
msg: |
"WARNING:"
"This node has been skipped because OSDs are either"
"all bluestore or there's a mix of filestore and bluestore OSDs"
when:
- skip_this_node | bool