---
# This playbook switches from non-containerized to containerized Ceph daemons

- name: confirm whether user really meant to switch from non-containerized to containerized ceph daemons

  hosts:
    - localhost

  gather_facts: false

  vars_prompt:
    - name: ireallymeanit
      prompt: Are you sure you want to switch from non-containerized to containerized ceph daemons?
      default: 'no'
      private: no

  tasks:
    - name: exit playbook, if user did not mean to switch from non-containerized to containerized daemons?
      fail:
        msg: >
          "Exiting switch-from-non-containerized-to-containerized-ceph-daemons.yml playbook,
           cluster did not switch from non-containerized to containerized ceph daemons.
           To switch from non-containerized to containerized ceph daemons, either say 'yes' on the prompt or
           or use `-e ireallymeanit=yes` on the command line when
           invoking the playbook"
      when: ireallymeanit != 'yes'


- name: make sure docker is present and started

  hosts:
    - "{{ mon_group_name|default('mons') }}"
    - "{{ mgr_group_name|default('mgrs') }}"
    - "{{ osd_group_name|default('osds') }}"
    - "{{ mds_group_name|default('mdss') }}"
    - "{{ rgw_group_name|default('rgws') }}"
    - "{{ rbdmirror_group_name|default('rbdmirrors') }}"
    - "{{ nfs_group_name|default('nfss') }}"

  become: true

  vars:
    delegate_facts_host: True

  roles:
    - ceph-defaults

  post_tasks:
    - name: gather and delegate facts
      setup:
        gather_subset:
          - 'all'
          - '!facter'
          - '!ohai'
      delegate_to: "{{ item }}"
      delegate_facts: True
      with_items: "{{ groups['all'] | difference(groups.get(client_group_name, [])) }}"
      run_once: true
      when: delegate_facts_host | bool
      tags: always

- name: switching from non-containerized to containerized ceph mon

  vars:
    health_mon_check_retries: 5
    health_mon_check_delay: 15
    containerized_deployment: true
    mon_group_name:       mons
    restapi_group_name:   restapis

  hosts:
    - "{{ mon_group_name|default('mons') }}"

  serial: 1
  become: true

  pre_tasks:
    - name: set_fact switch_to_container to indicate that we run a special playbook
      set_fact:
        switch_to_container: True

    - name: select a running monitor
      set_fact: mon_host={{ item }}
      with_items: "{{ groups[mon_group_name] }}"
      when: item != inventory_hostname

    - name: stop non-containerized ceph mon
      service:
        name: "ceph-mon@{{ ansible_hostname }}"
        state: stopped
        enabled: no

    # NOTE: changed from file module to raw find command for performance reasons
    # The file module has to run checks on current ownership of all directories and files. This is unnecessary
    # as in this case we know we want all owned by ceph user
    - name: set proper ownership on ceph directories
      command: "find /var/lib/ceph /etc/ceph -not -( -user {{ ceph_uid }} -or -group {{ ceph_uid }} -) -execdir chown -h {{ ceph_uid }}:{{ ceph_uid }} {} +"
      changed_when: false

    - name: check for existing old leveldb file extension (ldb)
      shell: stat /var/lib/ceph/mon/*/store.db/*.ldb
      changed_when: false
      failed_when: false
      register: ldb_files

    - name: rename leveldb extension from ldb to sst
      shell: rename -v .ldb .sst /var/lib/ceph/mon/*/store.db/*.ldb
      changed_when: false
      failed_when: false
      when: ldb_files.rc == 0

    - name: copy mon initial keyring in /etc/ceph to satisfy fetch config task in ceph-docker-common
      command: cp /var/lib/ceph/mon/{{ cluster }}-{{ ansible_hostname }}/keyring /etc/ceph/{{ cluster }}.mon.keyring
      args:
        creates: /etc/ceph/{{ cluster }}.mon.keyring
      changed_when: false
      failed_when: false

  roles:
    - ceph-defaults
    - ceph-facts
    - ceph-handler
    - ceph-docker-common
    - ceph-mon

  post_tasks:
    # We don't do a container test by running 'docker exec ...' since not all the monitors have switched to containers yet.
    # Thus, we continue to use the 'ceph' binary from the host, there is no issue with that.
    - name: non container | waiting for the monitor to join the quorum...
      command: ceph --cluster "{{ cluster }}" -s --format json
      register: ceph_health_raw
      until: >
        hostvars[mon_host]['ansible_hostname'] in (ceph_health_raw.stdout | from_json)["quorum_names"]
      retries: "{{ health_mon_check_retries }}"
      delay: "{{ health_mon_check_delay }}"
      delegate_to: "{{ mon_host }}"

- name: switching from non-containerized to containerized ceph mgr

  hosts:
    - "{{ mgr_group_name|default('mgrs') }}"

  vars:
    containerized_deployment: true
    mgr_group_name: mgrs

  serial: 1
  become: true

  pre_tasks:
    # failed_when: false is here because if we're
    # working with a jewel cluster then ceph mgr
    # will not exist
    - name: stop non-containerized ceph mgr(s)
      service:
        name: "ceph-mgr@{{ ansible_hostname }}"
        state: stopped
        enabled: no
      failed_when: false

    # NOTE: changed from file module to raw find command for performance reasons
    # The file module has to run checks on current ownership of all directories and files. This is unnecessary
    # as in this case we know we want all owned by ceph user
    - name: set proper ownership on ceph directories
      command: "find /var/lib/ceph /etc/ceph -not -( -user {{ ceph_uid }} -or -group {{ ceph_uid }} -) -execdir chown -h {{ ceph_uid }}:{{ ceph_uid }} {} +"
      changed_when: false

  roles:
    - ceph-defaults
    - ceph-facts
    - ceph-handler
    - ceph-docker-common
    - ceph-mgr


- name: set osd flags
  hosts: "{{ mon_group_name | default('mons') }}[0]"
  become: True
  roles:
    - ceph-defaults
  post_tasks:
    - name: set osd flags
      command: "docker exec ceph-mon-{{ ansible_hostname }} ceph --cluster {{ cluster }} osd set {{ item }}"
      with_items:
        - noout
        - nodeep-scrub


- name: switching from non-containerized to containerized ceph osd

  vars:
    health_osd_check_retries: 5
    health_osd_check_delay: 15
    containerized_deployment: true
    osd_group_name: osds
    switch_to_containers: True

  hosts:
    - "{{ osd_group_name|default('osds') }}"

  serial: 1
  become: true


  pre_tasks:
    - name: collect running osds and ceph-disk unit(s)
      shell: |
        systemctl list-units | grep "loaded active" | grep -Eo 'ceph-osd@[0-9]+.service|ceph-disk@dev-[a-z]{3,4}[0-9]{1}.service|ceph-volume|ceph\.target'
      register: running_osds
      changed_when: false
      failed_when: false

    - name: stop/disable/mask non-containerized ceph osd(s) and ceph-disk units (if any)
      systemd:
        name: "{{ item }}"
        state: stopped
        enabled: no
      with_items: "{{ running_osds.stdout_lines | default([])}}"
      when: running_osds != []

    - name: remove old ceph-osd systemd units
      file:
        path: "{{ item }}"
        state: absent
      with_items:
        - /usr/lib/systemd/system/ceph-osd.target
        - /usr/lib/systemd/system/ceph-osd@.service
        - /usr/lib/systemd/system/ceph-volume@.service
        - /lib/systemd/system/ceph-osd.target
        - /lib/systemd/system/ceph-osd@.service
        - /lib/systemd/system/ceph-volume@.service
        - /etc/systemd/system/ceph.target.wants

    - name: dmcrypt extra operations
      when:
        - osd_scenario != 'lvm'
        - dmcrypt | bool
      block:
        - name: remove zero(s) partuuid symlink
          file:
            path: /dev/disk/by-partuuid/00000000-0000-0000-0000-000000000000
            state: absent

        - name: get lockbox partitions
          command: blkid -t PARTLABEL="ceph lockbox" -o device
          changed_when: false
          failed_when: false
          register: lockbox_partitions

        - name: get lockbox devices without partuuid
          command: "blkid -t PARTUUID=00000000-0000-0000-0000-000000000000 -o device {{ item }}"
          changed_when: false
          failed_when: false
          register: lockbox_devices
          with_items: '{{ lockbox_partitions.stdout_lines }}'

        - name: set guid on lockbox partition
          shell: 'sgdisk --partition-guid=5:$(uuidgen) {{ item.stdout[:-2] if item.stdout is match("^/dev/(cciss|nvme|loop).*") else item.stdout[:-1] }}'
          with_items: '{{ lockbox_devices.results }}'

    # NOTE: changed from file module to raw find command for performance reasons
    # The file module has to run checks on current ownership of all directories and files. This is unnecessary
    # as in this case we know we want all owned by ceph user
    - name: set proper ownership on ceph directories
      command: "find /var/lib/ceph /etc/ceph -not -( -user {{ ceph_uid }} -or -group {{ ceph_uid }} -) -execdir chown -h {{ ceph_uid }}:{{ ceph_uid }} {} +"
      changed_when: false

    - name: check for existing old leveldb file extension (ldb)
      shell: stat /var/lib/ceph/osd/*/current/omap/*.ldb
      changed_when: false
      failed_when: false
      register: ldb_files

    - name: rename leveldb extension from ldb to sst
      shell: rename -v .ldb .sst /var/lib/ceph/osd/*/current/omap/*.ldb
      changed_when: false
      failed_when: false
      when:
        - ldb_files.rc == 0

    - name: check if containerized osds are already running
      command: >
        docker ps -q --filter='name=ceph-osd'
      changed_when: false
      failed_when: false
      register: osd_running

    - name: get osd directories
      command: >
        find /var/lib/ceph/osd {% if dmcrypt | bool %}/var/lib/ceph/osd-lockbox{% endif %} -maxdepth 1 -mindepth 1 -type d
      register: osd_dirs
      changed_when: false
      failed_when: false

    - name: unmount all the osd directories
      command: >
        umount {{ item }}
      changed_when: false
      failed_when: false
      with_items: "{{ osd_dirs.stdout_lines }}"
      when: osd_running.rc != 0 or osd_running.stdout_lines | length == 0


  roles:
    - ceph-defaults
    - ceph-facts
    - ceph-handler
    - ceph-docker-common
    - ceph-osd

  post_tasks:
    - name: get num_pgs
      command: docker exec ceph-mon-{{ hostvars[groups[mon_group_name][0]]['ansible_hostname'] }} ceph --cluster "{{ cluster }}" -s --format json
      register: ceph_pgs
      delegate_to: "{{ groups[mon_group_name][0] }}"

    - name: container - waiting for clean pgs...
      command: "docker exec ceph-mon-{{ hostvars[groups[mon_group_name][0]]['ansible_hostname'] }} ceph --cluster {{ cluster }} -s --format json"
      register: ceph_health_post
      until: >
        (((ceph_health_post.stdout | from_json).pgmap.pgs_by_state | length) > 0)
        and
        (((ceph_health_post.stdout | from_json).pgmap.pgs_by_state | selectattr('state_name', 'search', '^active\\+clean') | map(attribute='count') | list | sum) == (ceph_pgs.stdout | from_json).pgmap.num_pgs)
      delegate_to: "{{ groups[mon_group_name][0] }}"
      retries: "{{ health_osd_check_retries }}"
      delay: "{{ health_osd_check_delay }}"
      when:
        - (ceph_pgs.stdout | from_json).pgmap.num_pgs != 0


- name: unset osd flags
  hosts: "{{ mon_group_name | default('mons') }}[0]"
  become: True
  roles:
    - ceph-defaults
  post_tasks:
    - name: set osd flags
      command: "docker exec ceph-mon-{{ ansible_hostname }} ceph --cluster {{ cluster }} osd unset {{ item }}"
      with_items:
        - noout
        - nodeep-scrub

- name: switching from non-containerized to containerized ceph mds

  hosts:
    - "{{ mds_group_name|default('mdss') }}"

  vars:
    containerized_deployment: true
    mds_group_name: mdss

  serial: 1
  become: true

  pre_tasks:
    - name: stop non-containerized ceph mds(s)
      service:
        name: "ceph-mds@{{ ansible_hostname }}"
        state: stopped
        enabled: no

    # NOTE: changed from file module to raw find command for performance reasons
    # The file module has to run checks on current ownership of all directories and files. This is unnecessary
    # as in this case we know we want all owned by ceph user
    - name: set proper ownership on ceph directories
      command: "find /var/lib/ceph /etc/ceph -not -( -user {{ ceph_uid }} -or -group {{ ceph_uid }} -) -execdir chown {{ ceph_uid }}:{{ ceph_uid }} {} +"
      changed_when: false

  roles:
    - ceph-defaults
    - ceph-facts
    - ceph-handler
    - ceph-docker-common
    - ceph-mds


- name: switching from non-containerized to containerized ceph rgw

  hosts:
    - "{{ rgw_group_name|default('rgws') }}"

  vars:
    containerized_deployment: true
    rgw_group_name: rgws

  serial: 1
  become: true

  pre_tasks:
    - name: stop non-containerized ceph rgw(s)
      service:
        name: "ceph-radosgw@rgw.{{ ansible_hostname }}"
        state: stopped
        enabled: no

    # NOTE: changed from file module to raw find command for performance reasons
    # The file module has to run checks on current ownership of all directories and files. This is unnecessary
    # as in this case we know we want all owned by ceph user
    - name: set proper ownership on ceph directories
      command: "find /var/lib/ceph /etc/ceph -not -( -user {{ ceph_uid }} -or -group {{ ceph_uid }} -) -execdir chown {{ ceph_uid }}:{{ ceph_uid }} {} +"
      changed_when: false

  roles:
    - ceph-defaults
    - ceph-facts
    - ceph-handler
    - ceph-docker-common
    - ceph-rgw


- name: switching from non-containerized to containerized ceph rbd-mirror

  hosts:
    - "{{ rbdmirror_group_name|default('rbdmirrors') }}"

  vars:
    containerized_deployment: true
    rbdmirror_group_name: rbdmirrors

  serial: 1
  become: true

  pre_tasks:
    - name: stop non-containerized ceph rbd mirror(s)
      service:
        name: "ceph-rbd-mirror@rbd-mirror.{{ ansible_hostname }}"
        state: stopped
        enabled: no

    # NOTE: changed from file module to raw find command for performance reasons
    # The file module has to run checks on current ownership of all directories and files. This is unnecessary
    # as in this case we know we want all owned by ceph user
    - name: set proper ownership on ceph directories
      command: "find /var/lib/ceph /etc/ceph -not -( -user {{ ceph_uid }} -or -group {{ ceph_uid }} -) -execdir chown {{ ceph_uid }}:{{ ceph_uid }} {} +"
      changed_when: false

  roles:
    - ceph-defaults
    - ceph-facts
    - ceph-handler
    - ceph-docker-common
    - ceph-rbd-mirror


- name: switching from non-containerized to containerized ceph nfs

  hosts:
    - "{{ nfs_group_name|default('nfss') }}"

  vars:
    containerized_deployment: true
    nfs_group_name: nfss

  serial: 1
  become: true

  pre_tasks:
    # failed_when: false is here because if we're
    # working with a jewel cluster then ceph nfs
    # will not exist
    - name: stop non-containerized ceph nfs(s)
      service:
        name: nfs-ganesha
        state: stopped
        enabled: no
      failed_when: false

    # NOTE: changed from file module to raw find command for performance reasons
    # The file module has to run checks on current ownership of all directories and files. This is unnecessary
    # as in this case we know we want all owned by ceph user
    - name: set proper ownership on ceph directories
      command: "find /var/lib/ceph /etc/ceph -not -( -user {{ ceph_uid }} -or -group {{ ceph_uid }} -) -execdir chown {{ ceph_uid }}:{{ ceph_uid }} {} +"
      changed_when: false

  roles:
    - ceph-defaults
    - ceph-facts
    - ceph-handler
    - ceph-docker-common
    - ceph-nfs