--- # This playbook switches from non-containerized to containerized Ceph daemons - name: confirm whether user really meant to switch from non-containerized to containerized ceph daemons hosts: - localhost gather_facts: false vars_prompt: - name: ireallymeanit prompt: Are you sure you want to switch from non-containerized to containerized ceph daemons? default: 'no' private: no tasks: - name: exit playbook, if user did not mean to switch from non-containerized to containerized daemons? fail: msg: > "Exiting switch-from-non-containerized-to-containerized-ceph-daemons.yml playbook, cluster did not switch from non-containerized to containerized ceph daemons. To switch from non-containerized to containerized ceph daemons, either say 'yes' on the prompt or or use `-e ireallymeanit=yes` on the command line when invoking the playbook" when: ireallymeanit != 'yes' - name: make sure docker is present and started hosts: - "{{ mon_group_name|default('mons') }}" - "{{ mgr_group_name|default('mgrs') }}" - "{{ osd_group_name|default('osds') }}" - "{{ mds_group_name|default('mdss') }}" - "{{ rgw_group_name|default('rgws') }}" - "{{ rbdmirror_group_name|default('rbdmirrors') }}" - "{{ nfs_group_name|default('nfss') }}" become: true vars: delegate_facts_host: True roles: - ceph-defaults post_tasks: - name: gather and delegate facts setup: gather_subset: - 'all' - '!facter' - '!ohai' delegate_to: "{{ item }}" delegate_facts: True with_items: "{{ groups['all'] | difference(groups.get(client_group_name, [])) }}" run_once: true when: delegate_facts_host | bool tags: always - name: switching from non-containerized to containerized ceph mon vars: health_mon_check_retries: 5 health_mon_check_delay: 15 containerized_deployment: true mon_group_name: mons restapi_group_name: restapis hosts: - "{{ mon_group_name|default('mons') }}" serial: 1 become: true pre_tasks: - name: set_fact switch_to_container to indicate that we run a special playbook set_fact: switch_to_container: True - name: select a running monitor set_fact: mon_host={{ item }} with_items: "{{ groups[mon_group_name] }}" when: item != inventory_hostname - name: stop non-containerized ceph mon service: name: "ceph-mon@{{ ansible_hostname }}" state: stopped enabled: no # NOTE: changed from file module to raw find command for performance reasons # The file module has to run checks on current ownership of all directories and files. This is unnecessary # as in this case we know we want all owned by ceph user - name: set proper ownership on ceph directories command: "find /var/lib/ceph /etc/ceph -not -( -user {{ ceph_uid }} -or -group {{ ceph_uid }} -) -execdir chown -h {{ ceph_uid }}:{{ ceph_uid }} {} +" changed_when: false - name: check for existing old leveldb file extension (ldb) shell: stat /var/lib/ceph/mon/*/store.db/*.ldb changed_when: false failed_when: false register: ldb_files - name: rename leveldb extension from ldb to sst shell: rename -v .ldb .sst /var/lib/ceph/mon/*/store.db/*.ldb changed_when: false failed_when: false when: ldb_files.rc == 0 - name: copy mon initial keyring in /etc/ceph to satisfy fetch config task in ceph-docker-common command: cp /var/lib/ceph/mon/{{ cluster }}-{{ ansible_hostname }}/keyring /etc/ceph/{{ cluster }}.mon.keyring args: creates: /etc/ceph/{{ cluster }}.mon.keyring changed_when: false failed_when: false roles: - ceph-defaults - ceph-facts - ceph-handler - ceph-docker-common - ceph-mon post_tasks: # We don't do a container test by running 'docker exec ...' since not all the monitors have switched to containers yet. # Thus, we continue to use the 'ceph' binary from the host, there is no issue with that. - name: non container | waiting for the monitor to join the quorum... command: ceph --cluster "{{ cluster }}" -s --format json register: ceph_health_raw until: > hostvars[mon_host]['ansible_hostname'] in (ceph_health_raw.stdout | from_json)["quorum_names"] retries: "{{ health_mon_check_retries }}" delay: "{{ health_mon_check_delay }}" delegate_to: "{{ mon_host }}" - name: switching from non-containerized to containerized ceph mgr hosts: - "{{ mgr_group_name|default('mgrs') }}" vars: containerized_deployment: true mgr_group_name: mgrs serial: 1 become: true pre_tasks: # failed_when: false is here because if we're # working with a jewel cluster then ceph mgr # will not exist - name: stop non-containerized ceph mgr(s) service: name: "ceph-mgr@{{ ansible_hostname }}" state: stopped enabled: no failed_when: false # NOTE: changed from file module to raw find command for performance reasons # The file module has to run checks on current ownership of all directories and files. This is unnecessary # as in this case we know we want all owned by ceph user - name: set proper ownership on ceph directories command: "find /var/lib/ceph /etc/ceph -not -( -user {{ ceph_uid }} -or -group {{ ceph_uid }} -) -execdir chown -h {{ ceph_uid }}:{{ ceph_uid }} {} +" changed_when: false roles: - ceph-defaults - ceph-facts - ceph-handler - ceph-docker-common - ceph-mgr - name: set osd flags hosts: "{{ mon_group_name | default('mons') }}[0]" become: True roles: - ceph-defaults post_tasks: - name: set osd flags command: "docker exec ceph-mon-{{ ansible_hostname }} ceph --cluster {{ cluster }} osd set {{ item }}" with_items: - noout - nodeep-scrub - name: switching from non-containerized to containerized ceph osd vars: health_osd_check_retries: 5 health_osd_check_delay: 15 containerized_deployment: true osd_group_name: osds switch_to_containers: True hosts: - "{{ osd_group_name|default('osds') }}" serial: 1 become: true pre_tasks: - name: collect running osds and ceph-disk unit(s) shell: | systemctl list-units | grep "loaded active" | grep -Eo 'ceph-osd@[0-9]+.service|ceph-disk@dev-[a-z]{3,4}[0-9]{1}.service|ceph-volume|ceph\.target' register: running_osds changed_when: false failed_when: false - name: stop/disable/mask non-containerized ceph osd(s) and ceph-disk units (if any) systemd: name: "{{ item }}" state: stopped enabled: no with_items: "{{ running_osds.stdout_lines | default([])}}" when: running_osds != [] - name: remove old ceph-osd systemd units file: path: "{{ item }}" state: absent with_items: - /usr/lib/systemd/system/ceph-osd.target - /usr/lib/systemd/system/ceph-osd@.service - /usr/lib/systemd/system/ceph-volume@.service - /lib/systemd/system/ceph-osd.target - /lib/systemd/system/ceph-osd@.service - /lib/systemd/system/ceph-volume@.service - /etc/systemd/system/ceph.target.wants - name: dmcrypt extra operations when: - osd_scenario != 'lvm' - dmcrypt | bool block: - name: remove zero(s) partuuid symlink file: path: /dev/disk/by-partuuid/00000000-0000-0000-0000-000000000000 state: absent - name: get lockbox partitions command: blkid -t PARTLABEL="ceph lockbox" -o device changed_when: false failed_when: false register: lockbox_partitions - name: get lockbox devices without partuuid command: "blkid -t PARTUUID=00000000-0000-0000-0000-000000000000 -o device {{ item }}" changed_when: false failed_when: false register: lockbox_devices with_items: '{{ lockbox_partitions.stdout_lines }}' - name: set guid on lockbox partition shell: 'sgdisk --partition-guid=5:$(uuidgen) {{ item.stdout[:-2] if item.stdout is match("^/dev/(cciss|nvme|loop).*") else item.stdout[:-1] }}' with_items: '{{ lockbox_devices.results }}' # NOTE: changed from file module to raw find command for performance reasons # The file module has to run checks on current ownership of all directories and files. This is unnecessary # as in this case we know we want all owned by ceph user - name: set proper ownership on ceph directories command: "find /var/lib/ceph /etc/ceph -not -( -user {{ ceph_uid }} -or -group {{ ceph_uid }} -) -execdir chown -h {{ ceph_uid }}:{{ ceph_uid }} {} +" changed_when: false - name: check for existing old leveldb file extension (ldb) shell: stat /var/lib/ceph/osd/*/current/omap/*.ldb changed_when: false failed_when: false register: ldb_files - name: rename leveldb extension from ldb to sst shell: rename -v .ldb .sst /var/lib/ceph/osd/*/current/omap/*.ldb changed_when: false failed_when: false when: - ldb_files.rc == 0 - name: check if containerized osds are already running command: > docker ps -q --filter='name=ceph-osd' changed_when: false failed_when: false register: osd_running - name: get osd directories command: > find /var/lib/ceph/osd {% if dmcrypt | bool %}/var/lib/ceph/osd-lockbox{% endif %} -maxdepth 1 -mindepth 1 -type d register: osd_dirs changed_when: false failed_when: false - name: unmount all the osd directories command: > umount {{ item }} changed_when: false failed_when: false with_items: "{{ osd_dirs.stdout_lines }}" when: osd_running.rc != 0 or osd_running.stdout_lines | length == 0 roles: - ceph-defaults - ceph-facts - ceph-handler - ceph-docker-common - ceph-osd post_tasks: - name: get num_pgs command: docker exec ceph-mon-{{ hostvars[groups[mon_group_name][0]]['ansible_hostname'] }} ceph --cluster "{{ cluster }}" -s --format json register: ceph_pgs delegate_to: "{{ groups[mon_group_name][0] }}" - name: container - waiting for clean pgs... command: "docker exec ceph-mon-{{ hostvars[groups[mon_group_name][0]]['ansible_hostname'] }} ceph --cluster {{ cluster }} -s --format json" register: ceph_health_post until: > (((ceph_health_post.stdout | from_json).pgmap.pgs_by_state | length) > 0) and (((ceph_health_post.stdout | from_json).pgmap.pgs_by_state | selectattr('state_name', 'search', '^active\\+clean') | map(attribute='count') | list | sum) == (ceph_pgs.stdout | from_json).pgmap.num_pgs) delegate_to: "{{ groups[mon_group_name][0] }}" retries: "{{ health_osd_check_retries }}" delay: "{{ health_osd_check_delay }}" when: - (ceph_pgs.stdout | from_json).pgmap.num_pgs != 0 - name: unset osd flags hosts: "{{ mon_group_name | default('mons') }}[0]" become: True roles: - ceph-defaults post_tasks: - name: set osd flags command: "docker exec ceph-mon-{{ ansible_hostname }} ceph --cluster {{ cluster }} osd unset {{ item }}" with_items: - noout - nodeep-scrub - name: switching from non-containerized to containerized ceph mds hosts: - "{{ mds_group_name|default('mdss') }}" vars: containerized_deployment: true mds_group_name: mdss serial: 1 become: true pre_tasks: - name: stop non-containerized ceph mds(s) service: name: "ceph-mds@{{ ansible_hostname }}" state: stopped enabled: no # NOTE: changed from file module to raw find command for performance reasons # The file module has to run checks on current ownership of all directories and files. This is unnecessary # as in this case we know we want all owned by ceph user - name: set proper ownership on ceph directories command: "find /var/lib/ceph /etc/ceph -not -( -user {{ ceph_uid }} -or -group {{ ceph_uid }} -) -execdir chown {{ ceph_uid }}:{{ ceph_uid }} {} +" changed_when: false roles: - ceph-defaults - ceph-facts - ceph-handler - ceph-docker-common - ceph-mds - name: switching from non-containerized to containerized ceph rgw hosts: - "{{ rgw_group_name|default('rgws') }}" vars: containerized_deployment: true rgw_group_name: rgws serial: 1 become: true pre_tasks: - name: stop non-containerized ceph rgw(s) service: name: "ceph-radosgw@rgw.{{ ansible_hostname }}" state: stopped enabled: no # NOTE: changed from file module to raw find command for performance reasons # The file module has to run checks on current ownership of all directories and files. This is unnecessary # as in this case we know we want all owned by ceph user - name: set proper ownership on ceph directories command: "find /var/lib/ceph /etc/ceph -not -( -user {{ ceph_uid }} -or -group {{ ceph_uid }} -) -execdir chown {{ ceph_uid }}:{{ ceph_uid }} {} +" changed_when: false roles: - ceph-defaults - ceph-facts - ceph-handler - ceph-docker-common - ceph-rgw - name: switching from non-containerized to containerized ceph rbd-mirror hosts: - "{{ rbdmirror_group_name|default('rbdmirrors') }}" vars: containerized_deployment: true rbdmirror_group_name: rbdmirrors serial: 1 become: true pre_tasks: - name: stop non-containerized ceph rbd mirror(s) service: name: "ceph-rbd-mirror@rbd-mirror.{{ ansible_hostname }}" state: stopped enabled: no # NOTE: changed from file module to raw find command for performance reasons # The file module has to run checks on current ownership of all directories and files. This is unnecessary # as in this case we know we want all owned by ceph user - name: set proper ownership on ceph directories command: "find /var/lib/ceph /etc/ceph -not -( -user {{ ceph_uid }} -or -group {{ ceph_uid }} -) -execdir chown {{ ceph_uid }}:{{ ceph_uid }} {} +" changed_when: false roles: - ceph-defaults - ceph-facts - ceph-handler - ceph-docker-common - ceph-rbd-mirror - name: switching from non-containerized to containerized ceph nfs hosts: - "{{ nfs_group_name|default('nfss') }}" vars: containerized_deployment: true nfs_group_name: nfss serial: 1 become: true pre_tasks: # failed_when: false is here because if we're # working with a jewel cluster then ceph nfs # will not exist - name: stop non-containerized ceph nfs(s) service: name: nfs-ganesha state: stopped enabled: no failed_when: false # NOTE: changed from file module to raw find command for performance reasons # The file module has to run checks on current ownership of all directories and files. This is unnecessary # as in this case we know we want all owned by ceph user - name: set proper ownership on ceph directories command: "find /var/lib/ceph /etc/ceph -not -( -user {{ ceph_uid }} -or -group {{ ceph_uid }} -) -execdir chown {{ ceph_uid }}:{{ ceph_uid }} {} +" changed_when: false roles: - ceph-defaults - ceph-facts - ceph-handler - ceph-docker-common - ceph-nfs