diff --git a/infrastructure-playbooks/switch-from-non-containerized-to-containerized-ceph-daemons.yml b/infrastructure-playbooks/switch-from-non-containerized-to-containerized-ceph-daemons.yml index a73054871..746f06595 100644 --- a/infrastructure-playbooks/switch-from-non-containerized-to-containerized-ceph-daemons.yml +++ b/infrastructure-playbooks/switch-from-non-containerized-to-containerized-ceph-daemons.yml @@ -15,15 +15,15 @@ private: no tasks: - - name: exit playbook, if user did not mean to switch from non-containerized to containerized daemons? - fail: - msg: > - "Exiting switch-from-non-containerized-to-containerized-ceph-daemons.yml playbook, - cluster did not switch from non-containerized to containerized ceph daemons. - To switch from non-containerized to containerized ceph daemons, either say 'yes' on the prompt or - or use `-e ireallymeanit=yes` on the command line when - invoking the playbook" - when: ireallymeanit != 'yes' + - name: exit playbook, if user did not mean to switch from non-containerized to containerized daemons? + fail: + msg: > + "Exiting switch-from-non-containerized-to-containerized-ceph-daemons.yml playbook, + cluster did not switch from non-containerized to containerized ceph daemons. + To switch from non-containerized to containerized ceph daemons, either say 'yes' on the prompt or + or use `-e ireallymeanit=yes` on the command line when + invoking the playbook" + when: ireallymeanit != 'yes' - name: make sure docker is present and started @@ -39,52 +39,54 @@ become: true tasks: + - name: install docker and dependencies for the docker module + package: + name: "{{ item }}" + state: present + with_items: + - python-docker-py + - python-urllib3 + - docker + when: ansible_os_family == 'RedHat' - - name: install docker and dependencies for the docker module - package: - name: "{{ item }}" - state: present - with_items: - - python-docker-py - - python-urllib3 - - docker - when: ansible_os_family == 'RedHat' + - name: install docker-py for the docker module + package: + name: "{{ item }}" + state: present + with_items: + - docker-py + - python-urllib3 + - docker + when: ansible_os_family == 'Debian' - - name: install docker-py for the docker module - package: - name: "{{ item }}" - state: present - with_items: - - docker-py - - python-urllib3 - - docker - when: ansible_os_family == 'Debian' + - name: start docker service + service: + name: docker + state: started + enabled: yes - - name: start docker service - service: - name: docker - state: started - enabled: yes + - name: check if selinux is enabled + command: getenforce + register: sestatus + changed_when: false + when: ansible_os_family == 'RedHat' - - name: check if selinux is enabled - command: getenforce - register: sestatus - changed_when: false - when: ansible_os_family == 'RedHat' - - - name: set selinux permissions - command: chcon -Rt svirt_sandbox_file_t "{{ item }}" - with_items: - - /etc/ceph - - /var/lib/ceph - changed_when: false - when: - - sestatus.stdout != 'Disabled' - - ansible_os_family == 'RedHat' + - name: set selinux permissions + command: chcon -Rt svirt_sandbox_file_t "{{ item }}" + with_items: + - /etc/ceph + - /var/lib/ceph + changed_when: false + when: + - sestatus.stdout != 'Disabled' + - ansible_os_family == 'RedHat' - name: switching from non-containerized to containerized ceph mon vars: + health_mon_check_retries: 5 + health_mon_check_delay: 15 + containerized_deployment: true mon_group_name: mons restapi_group_name: restapis @@ -94,79 +96,69 @@ serial: 1 become: true - tasks: + pre_tasks: + - name: select a running monitor + set_fact: mon_host={{ item }} + with_items: "{{ groups[mon_group_name] }}" + when: item != inventory_hostname - - name: select a running monitor - set_fact: mon_host={{ item }} - with_items: "{{ groups[mon_group_name] }}" - when: item != inventory_hostname + - name: stop non-containerized ceph mon + service: + name: "ceph-mon@{{ ansible_hostname }}" + state: stopped + enabled: no - - name: get current ceph fsid - command: ceph fsid - register: ceph_fsid - changed_when: false - delegate_to: "{{ mon_host }}" + - set_fact: + ceph_uid: 64045 + when: ceph_docker_image_tag | match("latest") or ceph_docker_image_tag | search("ubuntu") - - name: stop ceph mon bare metal service - service: - name: "ceph-mon@{{ ansible_hostname }}" - state: stopped - enabled: no + - set_fact: + ceph_uid: 167 + when: ceph_docker_image_tag | search("centos") or ceph_docker_image | search("rhceph") or ceph_docker_image_tag | search("fedora") - # NOTE(leseb): should we also create systemd files - # instead of running raw docker commands? - # It is probably more elegant but will require a template file... - # which would make this single file playbook more complex + - name: set proper ownership on ceph directories + file: + path: "{{ item }}" + owner: "{{ ceph_uid }}" + recurse: yes + with_items: + - /var/lib/ceph + - /etc/ceph - - set_fact: - ceph_uid: 64045 - when: ceph_docker_image_tag | match("latest") + - name: check for existing old leveldb file extension (ldb) + shell: stat /var/lib/ceph/mon/*/store.db/*.ldb + changed_when: false + failed_when: false + register: ldb_files - - set_fact: - ceph_uid: 64045 - when: ceph_docker_image_tag | search("ubuntu") + - name: rename leveldb extension from ldb to sst + shell: rename -v .ldb .sst /var/lib/ceph/mon/*/store.db/*.ldb + changed_when: false + failed_when: false + when: ldb_files.rc == 0 - - set_fact: - ceph_uid: 167 - when: ceph_docker_image_tag | search("centos") or ceph_docker_image | search("rhceph") + roles: + - ceph-defaults + - ceph-docker-common + - ceph-mon - - set_fact: - ceph_uid: 167 - when: ceph_docker_image_tag | search("fedora") - - - name: set proper ownership on ceph directories - file: - path: "{{ item }}" - owner: "{{ ceph_uid }}" - recurse: yes - with_items: - - /var/lib/ceph - - /etc/ceph - - - name: start ceph mon container image - docker: - image: "{{ ceph_docker_registry|default('docker.io') }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }}" - name: "ceph-mon-{{ ansible_hostname }}" - net: "host" - state: "running" - privileged: "{{ mon_docker_privileged|default(false) }}" - env: "MON_IP={{ hostvars[inventory_hostname]['ansible_' + monitor_interface]['ipv4']['address'] }},CEPH_DAEMON=MON,CEPH_PUBLIC_NETWORK={{ public_network }},CEPH_FSID={{ ceph_fsid.stdout }},{{ ceph_mon_docker_extra_env|default('') }}" - volumes: "/var/lib/ceph:/var/lib/ceph,/etc/ceph:/etc/ceph,/etc/localtime:/etc/localtime:ro" - - - name: waiting for the monitor to join the quorum... - shell: | - ceph -s --cluster {{ cluster|default('ceph') }} | grep monmap | sed 's/.*quorum//' | egrep -sq {{ ansible_hostname }} - register: result - until: result.rc == 0 - retries: 5 - delay: 10 - changed_when: false - delegate_to: "{{ mon_host }}" + post_tasks: + - name: waiting for the containerized monitor to join the quorum... + shell: | + docker exec ceph-mon-{{ hostvars[mon_host]['ansible_hostname'] }} ceph --cluster {{ cluster }} -s -f json | python -c 'import sys, json; print(json.load(sys.stdin)["quorum_names"])' + register: result + until: "{{ ansible_hostname in result.stdout }}" + retries: "{{ health_mon_check_retries }}" + delay: "{{ health_mon_check_delay }}" + delegate_to: "{{ mon_host }}" - name: switching from non-containerized to containerized ceph osd vars: + health_osd_check_retries: 5 + health_osd_check_delay: 15 + containerized_deployment: true osd_group_name: osds hosts: @@ -175,92 +167,91 @@ serial: 1 become: true - tasks: - - name: collect osd ids - shell: | - if [ -d /var/lib/ceph/osd ] ; then ls /var/lib/ceph/osd | sed 's/.*-//' ; fi - register: osd_ids - changed_when: false + pre_tasks: + - name: collect running osds + shell: | + systemctl list-units | grep "loaded active" | grep -Eo 'ceph-osd@[0-9]{1,2}.service' + register: running_osds + changed_when: false - - name: collect osd devices - shell: | - blkid | awk '/ceph data/ { sub ("1:", "", $1); print $1 }' - register: ceph_osd_docker_devices - changed_when: false + - name: collect osd devices + shell: | + blkid | awk '/ceph data/ { sub ("1:", "", $1); print $1 }' + register: ceph_osd_docker_devices + changed_when: false - - name: stop ceph osd service - service: - name: "ceph-osd@{{ item }}" - state: stopped - enabled: no - with_items: "{{ osd_ids.stdout_lines }}" + - name: stop non-containerized ceph osd(s) + service: + name: "{{ item }}" + state: stopped + enabled: no + with_items: "{{ running_osds.stdout_lines }}" - - set_fact: - ceph_uid: 64045 - when: ceph_docker_image_tag | match("latest") + - set_fact: + ceph_uid: 64045 + when: ceph_docker_image_tag | match("latest") or ceph_docker_image_tag | search("ubuntu") - - set_fact: - ceph_uid: 64045 - when: ceph_docker_image_tag | search("ubuntu") + - set_fact: + ceph_uid: 167 + when: ceph_docker_image_tag | search("centos") or ceph_docker_image | search("rhceph") or ceph_docker_image_tag | search("fedora") - - set_fact: - ceph_uid: 167 - when: ceph_docker_image_tag | search("centos") or ceph_docker_image | search("rhceph") + - name: set proper ownership on ceph directories + file: + path: "{{ item }}" + owner: "{{ ceph_uid }}" + recurse: yes + with_items: + - /var/lib/ceph + - /etc/ceph - - set_fact: - ceph_uid: 167 - when: ceph_docker_image_tag | search("fedora") + - name: check for existing old leveldb file extension (ldb) + shell: stat /var/lib/ceph/osd/*/current/omap/*.ldb + changed_when: false + failed_when: false + register: ldb_files - - name: set proper ownership on ceph directories - file: - path: "{{ item }}" - owner: "{{ ceph_uid }}" - recurse: yes - with_items: - - /var/lib/ceph - - /etc/ceph + - name: rename leveldb extension from ldb to sst + shell: rename -v .ldb .sst /var/lib/ceph/osd/*/current/omap/*.ldb + changed_when: false + failed_when: false + when: + - ldb_files.rc == 0 - - name: check if containerized osds are already running - shell: | - docker ps | grep -sq {{ item | regex_replace('/', '') }} - changed_when: false - with_items: "{{ ceph_osd_docker_devices.stdout_lines }}" - register: osd_running + - name: check if containerized osds are already running + shell: | + docker ps | grep -sq {{ item | regex_replace('/', '') }} + changed_when: false + failed_when: false + with_items: "{{ ceph_osd_docker_devices.stdout_lines }}" + register: osd_running - - name: unmount all the osd directories - mount: - name: "/var/lib/ceph/osd/{{ cluster }}-{{ item.0 }}" - state: unmounted - src: "{{ item.1 }}" - fstype: xfs - with_together: - - "{{ osd_ids.stdout_lines }}" - - "{{ ceph_osd_docker_devices.stdout_lines }}" - - "{{ osd_running.results }}" - when: item.2.rc != 0 + - name: unmount all the osd directories + command: umount "{{ item.0 }}"1 + changed_when: false + failed_when: false + with_together: + - "{{ ceph_osd_docker_devices.stdout_lines }}" + - "{{ osd_running.results }}" + when: + - item.1.get("rc", 0) != 0 - - name: start ceph osd container image(s) - docker: - image: "{{ ceph_docker_registry|default('docker.io') }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }}" - name: "ceph-osd-{{ ansible_hostname }}-dev{{ item | regex_replace('/', '') }}" - net: host - pid: host - state: started - privileged: yes - env: "OSD_DEVICE={{ item }},{{ ceph_osd_docker_extra_env }}" - volumes: "/var/lib/ceph:/var/lib/ceph,/etc/ceph:/etc/ceph,/etc/localtime:/etc/localtime:ro,/dev:/dev,/run:/run" - with_items: "{{ ceph_osd_docker_devices.stdout_lines }}" + - set_fact: ceph_osd_docker_devices={{ ceph_osd_docker_devices.stdout_lines }} - - name: waiting for clean pgs... - shell: | - test "[""$(ceph --cluster {{ cluster }} -s -f json | python -c 'import sys, json; print(json.load(sys.stdin)["pgmap"]["num_pgs"])')""]" = "$(ceph --cluster {{ cluster }} -s -f json | python -c 'import sys, json; print [ i["count"] for i in json.load(sys.stdin)["pgmap"]["pgs_by_state"] if i["state_name"] == "active+clean"]')" - register: result - until: result.rc == 0 - retries: 10 - delay: 10 - changed_when: false - delegate_to: "{{ groups[mon_group_name][0] }}" + roles: + - ceph-defaults + - ceph-docker-common + - ceph-osd + + post_tasks: + - name: waiting for clean pgs... + shell: | + test "[""$(docker exec ceph-mon-{{ hostvars[groups[mon_group_name][0]]['ansible_hostname'] }} ceph --cluster {{ cluster }} -s -f json | python -c 'import sys, json; print(json.load(sys.stdin)["pgmap"]["num_pgs"])')""]" = "$(docker exec ceph-mon-{{ hostvars[groups[mon_group_name][0]]['ansible_hostname'] }} ceph --cluster {{ cluster }} -s -f json | python -c 'import sys, json; print [ i["count"] for i in json.load(sys.stdin)["pgmap"]["pgs_by_state"] if i["state_name"] == "active+clean"]')" + register: result + until: result.rc == 0 + retries: "{{ health_osd_check_retries }}" + delay: "{{ health_osd_check_delay }}" + delegate_to: "{{ groups[mon_group_name][0] }}" - name: switching from non-containerized to containerized ceph mds @@ -271,47 +262,34 @@ serial: 1 become: true - tasks: + pre_tasks: + - name: stop non-containerized ceph mds(s) + service: + name: "ceph-mds@{{ ansible_hostname }}" + state: stopped + enabled: no - - name: stop ceph mds service - service: - name: "ceph-mds@{{ ansible_hostname }}" - state: stopped - enabled: no + - set_fact: + ceph_uid: 64045 + when: ceph_docker_image_tag | match("latest") or ceph_docker_image_tag | search("ubuntu") - - set_fact: - ceph_uid: 64045 - when: ceph_docker_image_tag | match("latest") + - set_fact: + ceph_uid: 167 + when: ceph_docker_image_tag | search("centos") or ceph_docker_image | search("rhceph") or ceph_docker_image_tag | search("fedora") - - set_fact: - ceph_uid: 64045 - when: ceph_docker_image_tag | search("ubuntu") + - name: set proper ownership on ceph directories + file: + path: "{{ item }}" + owner: "{{ ceph_uid }}" + recurse: yes + with_items: + - /var/lib/ceph + - /etc/ceph - - set_fact: - ceph_uid: 167 - when: ceph_docker_image_tag | search("centos") or ceph_docker_image | search("rhceph") - - - set_fact: - ceph_uid: 167 - when: ceph_docker_image_tag | search("fedora") - - - name: set proper ownership on ceph directories - file: - path: "{{ item }}" - owner: "{{ ceph_uid }}" - recurse: yes - with_items: - - /var/lib/ceph - - /etc/ceph - - - name: start ceph metadata container image - docker: - image: "{{ ceph_docker_registry|default('docker.io') }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }}" - name: ceph-mds-{{ ansible_hostname }} - net: host - state: running - env: "CEPH_DAEMON=MDS,CEPHFS_CREATE=1,{{ ceph_mds_docker_extra_env }}" - volumes: "/var/lib/ceph:/var/lib/ceph,/etc/ceph:/etc/ceph,/etc/localtime:/etc/localtime:ro" + roles: + - ceph-defaults + - ceph-docker-common + - ceph-mds - name: switching from non-containerized to containerized ceph rgw @@ -322,48 +300,34 @@ serial: 1 become: true - tasks: + pre_tasks: + - name: stop non-containerized ceph rgw(s) + service: + name: "ceph-rgw@{{ ansible_hostname }}" + state: stopped + enabled: no - - name: stop ceph rgw service - service: - name: "ceph-rgw@{{ ansible_hostname }}" - state: stopped - enabled: no + - set_fact: + ceph_uid: 64045 + when: ceph_docker_image_tag | match("latest") or ceph_docker_image_tag | search("ubuntu") - - set_fact: - ceph_uid: 64045 - when: ceph_docker_image_tag | match("latest") + - set_fact: + ceph_uid: 167 + when: ceph_docker_image_tag | search("centos") or ceph_docker_image | search("rhceph") or ceph_docker_image_tag | search("fedora") - - set_fact: - ceph_uid: 64045 - when: ceph_docker_image_tag | search("ubuntu") + - name: set proper ownership on ceph directories + file: + path: "{{ item }}" + owner: "{{ ceph_uid }}" + recurse: yes + with_items: + - /var/lib/ceph + - /etc/ceph - - set_fact: - ceph_uid: 167 - when: ceph_docker_image_tag | search("centos") or ceph_docker_image | search("rhceph") - - - set_fact: - ceph_uid: 167 - when: ceph_docker_image_tag | search("fedora") - - - name: set proper ownership on ceph directories - file: - path: "{{ item }}" - owner: "{{ ceph_uid }}" - recurse: yes - with_items: - - /var/lib/ceph - - /etc/ceph - - - name: start ceph rados gateway container image - docker: - image: "{{ ceph_docker_registry|default('docker.io') }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }}" - name: ceph-rgw-{{ ansible_hostname }} - expose: "{{ ceph_rgw_civetweb_port }}" - ports: "{{ ceph_rgw_civetweb_port }}:{{ ceph_rgw_civetweb_port }}" - state: running - env: "CEPH_DAEMON=RGW,{{ ceph_rgw_docker_extra_env }}" - volumes: "/var/lib/ceph:/var/lib/ceph,/etc/ceph:/etc/ceph,/etc/localtime:/etc/localtime:ro" + roles: + - ceph-defaults + - ceph-docker-common + - ceph-rgw - name: switching from non-containerized to containerized ceph rbd-mirror @@ -374,46 +338,34 @@ serial: 1 become: true - tasks: + pre_tasks: + - name: stop non-containerized ceph rbd mirror(s) + service: + name: "ceph-rbd-mirror@{{ ansible_hostname }}" + state: stopped + enabled: no - - name: stop ceph rbd mirror service - service: - name: "ceph-rbd-mirror@{{ ansible_hostname }}" - state: stopped - enabled: no + - set_fact: + ceph_uid: 64045 + when: ceph_docker_image_tag | match("latest") or ceph_docker_image_tag | search("ubuntu") - - set_fact: - ceph_uid: 64045 - when: ceph_docker_image_tag | match("latest") + - set_fact: + ceph_uid: 167 + when: ceph_docker_image_tag | search("centos") or ceph_docker_image | search("rhceph") or ceph_docker_image_tag | search("fedora") - - set_fact: - ceph_uid: 64045 - when: ceph_docker_image_tag | search("ubuntu") + - name: set proper ownership on ceph directories + file: + path: "{{ item }}" + owner: "{{ ceph_uid }}" + recurse: yes + with_items: + - /var/lib/ceph + - /etc/ceph - - set_fact: - ceph_uid: 167 - when: ceph_docker_image_tag | search("centos") or ceph_docker_image | search("rhceph") - - - set_fact: - ceph_uid: 167 - when: ceph_docker_image_tag | search("fedora") - - - name: set proper ownership on ceph directories - file: - path: "{{ item }}" - owner: "{{ ceph_uid }}" - recurse: yes - with_items: - - /var/lib/ceph - - /etc/ceph - - - name: start ceph rbd mirror container image - docker: - image: "{{ ceph_docker_registry|default('docker.io') }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }}" - name: "ceph-rbd-mirror-{{ ansible_hostname }}" - net: host - state: running - volumes: "/etc/ceph:/etc/ceph,/etc/localtime:/etc/localtime:ro" + roles: + - ceph-defaults + - ceph-docker-common + - ceph-rbd-mirror - name: switching from non-containerized to containerized ceph nfs @@ -424,46 +376,31 @@ serial: 1 become: true - tasks: + pre_tasks: + - name: stop non-containerized ceph nfs(s) + service: + name: "ceph-nfs@{{ ansible_hostname }}" + state: stopped + enabled: no - - name: stop ceph nfs service - service: - name: "ceph-nfs@{{ ansible_hostname }}" - state: stopped - enabled: no + - set_fact: + ceph_uid: 64045 + when: ceph_docker_image_tag | match("latest") or ceph_docker_image_tag | search("ubuntu") - - set_fact: - ceph_uid: 64045 - when: ceph_docker_image_tag | match("latest") + - set_fact: + ceph_uid: 167 + when: ceph_docker_image_tag | search("centos") or ceph_docker_image | search("rhceph") or ceph_docker_image_tag | search("fedora") - - set_fact: - ceph_uid: 64045 - when: ceph_docker_image_tag | search("ubuntu") + - name: set proper ownership on ceph directories + file: + path: "{{ item }}" + owner: "{{ ceph_uid }}" + recurse: yes + with_items: + - /var/lib/ceph + - /etc/ceph - - set_fact: - ceph_uid: 167 - when: ceph_docker_image_tag | search("centos") or ceph_docker_image | search("rhceph") - - - set_fact: - ceph_uid: 167 - when: ceph_docker_image_tag | search("fedora") - - - name: set proper ownership on ceph directories - file: - path: "{{ item }}" - owner: "{{ ceph_uid }}" - recurse: yes - with_items: - - /var/lib/ceph - - /etc/ceph - - - name: start ceph nfs container image - docker: - image: "{{ ceph_docker_registry|default('docker.io') }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }}" - name: "ceph-nfs-{{ ansible_hostname }}" - net: "host" - state: "running" - privileged: true - ports: "{{ ceph_nfs_port }}:{{ ceph_nfs_port }},111:111" - env: "CEPH_DAEMON=NFS,CEPH_PUBLIC_NETWORK={{ ceph_nfs_docker_subnet }},{{ ceph_nfs_docker_extra_env }}" - volumes: "/etc/ceph:/etc/ceph,/etc/ganesha:/etc/ganesha,/etc/localtime:/etc/localtime:ro" + roles: + - ceph-defaults + - ceph-docker-common + - ceph-nfs diff --git a/roles/ceph-defaults/templates/restart_osd_daemon.sh.j2 b/roles/ceph-defaults/templates/restart_osd_daemon.sh.j2 index de1fe101b..f89e8cd55 100644 --- a/roles/ceph-defaults/templates/restart_osd_daemon.sh.j2 +++ b/roles/ceph-defaults/templates/restart_osd_daemon.sh.j2 @@ -22,8 +22,8 @@ check_pgs() { wait_for_socket_in_docker() { if ! docker exec "$1" timeout 10 bash -c "while [ ! -e /var/run/ceph/*.asok ]; do sleep 1 ; done"; then - log "Timed out while trying to look for a Ceph OSD socket." - log "Abort mission!" + echo "Timed out while trying to look for a Ceph OSD socket." + echo "Abort mission!" exit 1 fi } @@ -52,7 +52,7 @@ get_docker_osd_id() { # For containerized deployments, the unit file looks like: ceph-osd@sda.service # For non-containerized deployments, the unit file looks like: ceph-osd@0.service -for unit in $(systemctl list-units | grep -oE "ceph-osd@([0-9]{1,2}|[a-z]+).service"); do +for unit in $(systemctl list-units | grep "loaded active" | grep -oE "ceph-osd@([0-9]{1,2}|[a-z]+).service"); do # First, restart daemon(s) systemctl restart "${unit}" # We need to wait because it may take some time for the socket to actually exists @@ -66,7 +66,7 @@ for unit in $(systemctl list-units | grep -oE "ceph-osd@([0-9]{1,2}|[a-z]+).serv {% else %} osd_id=$(echo ${unit#ceph-osd@} | grep -oE '[0-9]{1,2}') {% endif %} - SOCKET=/var/run/ceph/test-osd.${osd_id}.asok + SOCKET=/var/run/ceph/{{ cluster }}-osd.${osd_id}.asok while [ $COUNT -ne 0 ]; do $docker_exec test -S "$SOCKET" && check_pgs && continue 2 sleep 1