ceph-ansible/infrastructure-playbooks/switch-from-non-containeriz...

---
# This playbook switches from non-containerized to containerized Ceph daemons

- name: confirm whether user really meant to switch from non-containerized to containerized ceph daemons

  hosts:
    - localhost

  gather_facts: false

  vars_prompt:
    - name: ireallymeanit
      prompt: Are you sure you want to switch from non-containerized to containerized ceph daemons?
      default: 'no'
      private: no

  tasks:
    - name: exit playbook, if user did not mean to switch from non-containerized to containerized daemons?
      fail:
        msg: >
          "Exiting switch-from-non-containerized-to-containerized-ceph-daemons.yml playbook,
           cluster did not switch from non-containerized to containerized ceph daemons.
           To switch from non-containerized to containerized ceph daemons, either say 'yes' on the prompt or
           or use `-e ireallymeanit=yes` on the command line when
           invoking the playbook"
      when: ireallymeanit != 'yes'


- name: make sure docker is present and started

  hosts:
    - "{{ mon_group_name|default('mons') }}"
    - "{{ mgr_group_name|default('mgrs') }}"
    - "{{ osd_group_name|default('osds') }}"
    - "{{ mds_group_name|default('mdss') }}"
    - "{{ rgw_group_name|default('rgws') }}"
    - "{{ rbdmirror_group_name|default('rbdmirrors') }}"
    - "{{ nfs_group_name|default('nfss') }}"

  become: true

  tasks:
    - name: gather and delegate facts
      setup:
      delegate_to: "{{ item }}"
      delegate_facts: True
      with_items: "{{ groups['all'] }}"

- name: switching from non-containerized to containerized ceph mon

  vars:
    health_mon_check_retries: 5
    health_mon_check_delay: 15
    containerized_deployment: true
    mon_group_name:       mons
    restapi_group_name:   restapis

  hosts:
    - "{{ mon_group_name|default('mons') }}"

  serial: 1
  become: true

  pre_tasks:
    - name: select a running monitor
      set_fact: mon_host={{ item }}
      with_items: "{{ groups[mon_group_name] }}"
      when: item != inventory_hostname

    - name: stop non-containerized ceph mon
      service:
        name: "ceph-mon@{{ ansible_hostname }}"
        state: stopped
        enabled: no

    - name: set_fact ceph_uid for ubuntu
      set_fact:
        ceph_uid: 64045
      when: ceph_docker_image_tag | string is search("ubuntu")

    - name: set_fact ceph_uid for red hat
      set_fact:
        ceph_uid: 167
      when: ceph_docker_image_tag | string is match("latest") or ceph_docker_image_tag | string is search("centos") or ceph_docker_image_tag | string is search("fedora")

    - name: set_fact ceph_uid for rhel
      set_fact:
         ceph_uid: 167
      when: ceph_docker_image is search("rhceph")

    - name: set proper ownership on ceph directories
      file:
        path: "{{ item }}"
        owner: "{{ ceph_uid }}"
        recurse: yes
      with_items:
        - /var/lib/ceph
        - /etc/ceph

    - name: check for existing old leveldb file extension (ldb)
      shell: stat /var/lib/ceph/mon/*/store.db/*.ldb
      changed_when: false
      failed_when: false
      register: ldb_files

    - name: rename leveldb extension from ldb to sst
      shell: rename -v .ldb .sst /var/lib/ceph/mon/*/store.db/*.ldb
      changed_when: false
      failed_when: false
      when: ldb_files.rc == 0

    - name: copy mon initial keyring in /etc/ceph to satisfy fetch config task in ceph-docker-common
      command: cp /var/lib/ceph/mon/{{ cluster }}-{{ ansible_hostname }}/keyring /etc/ceph/{{ cluster }}.mon.keyring
      args:
        creates: /etc/ceph/{{ cluster }}.mon.keyring
      changed_when: false
      failed_when: false

  roles:
    - ceph-defaults
    - ceph-handler
    - ceph-docker-common
    - ceph-mon

  post_tasks:
    # We don't do a container test by running 'docker exec ...' since not all the monitors have switched to containers yet.
    # Thus, we continue to use the 'ceph' binary from the host, there is no issue with that.
    - name: non container | waiting for the monitor to join the quorum...
      command: ceph --cluster "{{ cluster }}" -s --format json
      register: ceph_health_raw
      until: >
        hostvars[mon_host]['ansible_hostname'] in (ceph_health_raw.stdout | from_json)["quorum_names"]
      retries: "{{ health_mon_check_retries }}"
      delay: "{{ health_mon_check_delay }}"
      delegate_to: "{{ mon_host }}"

- name: switching from non-containerized to containerized ceph mgr

  hosts:
    - "{{ mgr_group_name|default('mgrs') }}"

  vars:
    containerized_deployment: true
    mgr_group_name: mgrs

  serial: 1
  become: true

  pre_tasks:
    # failed_when: false is here because if we're
    # working with a jewel cluster then ceph mgr
    # will not exist
    - name: stop non-containerized ceph mgr(s)
      service:
        name: "ceph-mgr@{{ ansible_hostname }}"
        state: stopped
        enabled: no
      failed_when: false

    - set_fact:
        ceph_uid: 64045
      when: ceph_docker_image_tag | string is match("latest") or ceph_docker_image_tag | string is search("ubuntu")

    - set_fact:
        ceph_uid: 167
      when: ceph_docker_image_tag | string is search("centos") or ceph_docker_image is search("rhceph") or ceph_docker_image_tag | string is search("fedora")

    - name: set proper ownership on ceph directories
      file:
        path: "{{ item }}"
        owner: "{{ ceph_uid }}"
        recurse: yes
      with_items:
        - /var/lib/ceph
        - /etc/ceph

  roles:
    - ceph-defaults
    - ceph-handler
    - ceph-docker-common
    - ceph-mgr


- name: switching from non-containerized to containerized ceph osd

  vars:
    health_osd_check_retries: 5
    health_osd_check_delay: 15
    containerized_deployment: true
    osd_group_name: osds

  hosts:
    - "{{ osd_group_name|default('osds') }}"

  serial: 1
  become: true


  pre_tasks:
    - name: collect running osds and ceph-disk unit(s)
      shell: |
        systemctl list-units | grep "loaded active" | grep -Eo 'ceph-osd@[0-9]+.service|ceph-disk@dev-[a-z]{3,4}[0-9]{1}.service'
      register: running_osds
      changed_when: false
      failed_when: false

    - name: collect osd devices
      shell: |
        blkid | awk '/ceph data/ { sub ("1:", "", $1); print $1 }'
      register: collect_devices
      changed_when: false
      when:
        - devices is not defined or (devices is defined and devices == [])

    - name: set devices
      set_fact:
        devices: "{{ collect_devices.stdout_lines | list }}"
      when:
        - collect_devices is defined
        - not collect_devices.get("skipped")
        - collect_devices != []

    - name: stop/disable/mask non-containerized ceph osd(s) and ceph-disk units (if any)
      systemd:
        name: "{{ item }}"
        state: stopped
        enabled: no
        masked: yes
      with_items: "{{ running_osds.stdout_lines | default([])}}"
      when: running_osds != []

    - set_fact:
        ceph_uid: 64045
      when: ceph_docker_image_tag | string is match("latest") or ceph_docker_image_tag | string is search("ubuntu")

    - name: set_fact ceph_uid for red hat
      set_fact:
        ceph_uid: 167
      when: ceph_docker_image_tag | string is match("latest") or ceph_docker_image_tag | string is search("centos") or ceph_docker_image_tag | string is search("fedora")

    - name: set_fact ceph_uid for rhel
      set_fact:
         ceph_uid: 167
      when: ceph_docker_image is search("rhceph")

    - name: set proper ownership on ceph directories
      file:
        path: "{{ item }}"
        owner: "{{ ceph_uid }}"
        recurse: yes
      with_items:
        - /var/lib/ceph
        - /etc/ceph

    - name: check for existing old leveldb file extension (ldb)
      shell: stat /var/lib/ceph/osd/*/current/omap/*.ldb
      changed_when: false
      failed_when: false
      register: ldb_files

    - name: rename leveldb extension from ldb to sst
      shell: rename -v .ldb .sst /var/lib/ceph/osd/*/current/omap/*.ldb
      changed_when: false
      failed_when: false
      when:
        - ldb_files.rc == 0

    - name: check if containerized osds are already running
      shell: |
        docker ps | grep -sq {{ item | regex_replace('/', '') }}
      changed_when: false
      failed_when: false
      with_items: "{{ devices }}"
      register: osd_running

    - name: resolve device(s) path(s)
      command: readlink -f {{ item }}
      changed_when: false
      with_items: "{{ devices }}"
      register: devices_canonicalize

    - name: unmount all the osd directories
      command: umount "{{ item.0.stdout }}"1
      changed_when: false
      failed_when: false
      with_together:
        - "{{ devices_canonicalize.results }}"
        - "{{ osd_running.results }}"
      when:
        - item.1.get("rc", 0) != 0

  roles:
    - ceph-defaults
    - ceph-handler
    - ceph-docker-common
    - ceph-osd

  post_tasks:
    - name: get num_pgs
      command: docker exec ceph-mon-{{ hostvars[groups[mon_group_name][0]]['ansible_hostname'] }} ceph --cluster "{{ cluster }}" -s --format json
      register: ceph_pgs
      delegate_to: "{{ groups[mon_group_name][0] }}"

    - name: container - waiting for clean pgs...
      command: "docker exec ceph-mon-{{ hostvars[groups[mon_group_name][0]]['ansible_hostname'] }} ceph --cluster {{ cluster }} -s --format json"
      register: ceph_health_post
      until: >
        (((ceph_health_post.stdout | from_json).pgmap.pgs_by_state | length) > 0)
        and
        (((ceph_health_post.stdout | from_json).pgmap.pgs_by_state | selectattr('state_name', 'search', '^active\\+clean') | map(attribute='count') | list | sum) == (ceph_pgs.stdout | from_json).pgmap.num_pgs)
      delegate_to: "{{ groups[mon_group_name][0] }}"
      retries: "{{ health_osd_check_retries }}"
      delay: "{{ health_osd_check_delay }}"
      when:
        - (ceph_pgs.stdout | from_json).pgmap.num_pgs != 0


- name: switching from non-containerized to containerized ceph mds

  hosts:
    - "{{ mds_group_name|default('mdss') }}"

  vars:
    containerized_deployment: true
    mds_group_name: mdss

  serial: 1
  become: true

  pre_tasks:
    - name: stop non-containerized ceph mds(s)
      service:
        name: "ceph-mds@{{ ansible_hostname }}"
        state: stopped
        enabled: no

    - set_fact:
        ceph_uid: 64045
      when: ceph_docker_image_tag | string is match("latest") or ceph_docker_image_tag | string is search("ubuntu")

    - set_fact:
        ceph_uid: 167
      when: ceph_docker_image_tag | string is search("centos") or ceph_docker_image is search("rhceph") or ceph_docker_image_tag | string is search("fedora")

    - name: set proper ownership on ceph directories
      file:
        path: "{{ item }}"
        owner: "{{ ceph_uid }}"
        recurse: yes
      with_items:
        - /var/lib/ceph
        - /etc/ceph

  roles:
    - ceph-defaults
    - ceph-handler
    - ceph-docker-common
    - ceph-mds


- name: switching from non-containerized to containerized ceph rgw

  hosts:
    - "{{ rgw_group_name|default('rgws') }}"

  vars:
    containerized_deployment: true
    rgw_group_name: rgws

  serial: 1
  become: true

  pre_tasks:
    - name: stop non-containerized ceph rgw(s)
      service:
        name: "ceph-radosgw@rgw.{{ ansible_hostname }}"
        state: stopped
        enabled: no

    - set_fact:
        ceph_uid: 64045
      when: ceph_docker_image_tag | string is match("latest") or ceph_docker_image_tag | string is search("ubuntu")

    - set_fact:
        ceph_uid: 167
      when: ceph_docker_image_tag | string is search("centos") or ceph_docker_image is search("rhceph") or ceph_docker_image_tag | string is search("fedora")

    - name: set proper ownership on ceph directories
      file:
        path: "{{ item }}"
        owner: "{{ ceph_uid }}"
        recurse: yes
      with_items:
        - /var/lib/ceph
        - /etc/ceph

  roles:
    - ceph-defaults
    - ceph-handler
    - ceph-docker-common
    - ceph-rgw


- name: switching from non-containerized to containerized ceph rbd-mirror

  hosts:
    - "{{ rbdmirror_group_name|default('rbdmirrors') }}"

  vars:
    containerized_deployment: true
    rbdmirror_group_name: rbdmirrors

  serial: 1
  become: true

  pre_tasks:
    - name: stop non-containerized ceph rbd mirror(s)
      service:
        name: "ceph-rbd-mirror@rbd-mirror.{{ ansible_hostname }}"
        state: stopped
        enabled: no

    - set_fact:
        ceph_uid: 64045
      when: ceph_docker_image_tag | string is match("latest") or ceph_docker_image_tag | string is search("ubuntu")

    - set_fact:
        ceph_uid: 167
      when: ceph_docker_image_tag | string is search("centos") or ceph_docker_image is search("rhceph") or ceph_docker_image_tag | string is search("fedora")

    - name: set proper ownership on ceph directories
      file:
        path: "{{ item }}"
        owner: "{{ ceph_uid }}"
        recurse: yes
      with_items:
        - /var/lib/ceph
        - /etc/ceph

  roles:
    - ceph-defaults
    - ceph-handler
    - ceph-docker-common
    - ceph-rbd-mirror


- name: switching from non-containerized to containerized ceph nfs

  hosts:
    - "{{ nfs_group_name|default('nfss') }}"

  vars:
    containerized_deployment: true
    nfs_group_name: nfss

  serial: 1
  become: true

  pre_tasks:
    # failed_when: false is here because if we're
    # working with a jewel cluster then ceph nfs
    # will not exist
    - name: stop non-containerized ceph nfs(s)
      service:
        name: nfs-ganesha
        state: stopped
        enabled: no
      failed_when: false

    - set_fact:
        ceph_uid: 64045
      when: ceph_docker_image_tag | string is match("latest") or ceph_docker_image_tag | string is search("ubuntu")

    - set_fact:
        ceph_uid: 167
      when: ceph_docker_image_tag | string is search("centos") or ceph_docker_image is search("rhceph") or ceph_docker_image_tag | string is search("fedora")

    - name: set proper ownership on ceph directories
      file:
        path: "{{ item }}"
        owner: "{{ ceph_uid }}"
        recurse: yes
      with_items:
        - /var/lib/ceph
        - /etc/ceph

  roles:
    - ceph-defaults
    - ceph-handler
    - ceph-docker-common
    - ceph-nfs