diff --git a/roles/ceph-handler/templates/restart_osd_daemon.sh.j2 b/roles/ceph-handler/templates/restart_osd_daemon.sh.j2 index d7fb3e1e0..e39fde10a 100644 --- a/roles/ceph-handler/templates/restart_osd_daemon.sh.j2 +++ b/roles/ceph-handler/templates/restart_osd_daemon.sh.j2 @@ -36,10 +36,6 @@ wait_for_socket_in_docker() { fi } -get_dev_name() { - echo $1 | sed -r 's/ceph-osd@([a-z]{1,4})\.service/\1/' -} - get_docker_id_from_dev_name() { local id local count @@ -53,26 +49,17 @@ get_docker_id_from_dev_name() { echo "$id" } -# For containerized deployments, the unit file looks like: ceph-osd@sda.service -# For non-containerized deployments, the unit file looks like: ceph-osd@NNN.service where NNN is OSD ID -for unit in $(systemctl list-units | grep -E "loaded * active" | grep -oE "ceph-osd@([0-9]+|[a-z]+).service"); do +# The unit file looks like: ceph-osd@NNN.service where NNN is OSD ID +for unit in $(systemctl list-units | grep -E "loaded * active" | grep -oE "ceph-osd@[0-9]+.service"); do # First, restart daemon(s) systemctl restart "${unit}" # We need to wait because it may take some time for the socket to actually exists COUNT=10 # Wait and ensure the socket exists after restarting the daemon - {% if containerized_deployment and osd_scenario != 'lvm' -%} - id=$(get_dev_name "$unit") - container_id=$(get_docker_id_from_dev_name "$id") - wait_for_socket_in_docker "$container_id" - osd_id=$whoami - docker_exec="docker exec $container_id" - {% elif containerized_deployment and osd_scenario == 'lvm' %} osd_id=$(echo ${unit#ceph-osd@} | grep -oE '[0-9]+') + {% if containerized_deployment -%} container_id=$(get_docker_id_from_dev_name "ceph-osd-${osd_id}") docker_exec="docker exec $container_id" - {% else %} - osd_id=$(echo ${unit#ceph-osd@} | grep -oE '[0-9]+') {% endif %} SOCKET=/var/run/ceph/{{ cluster }}-osd.${osd_id}.asok while [ $COUNT -ne 0 ]; do diff --git a/roles/ceph-osd/tasks/start_osds.yml b/roles/ceph-osd/tasks/start_osds.yml index d3dab5f53..a6503dfc5 100644 --- a/roles/ceph-osd/tasks/start_osds.yml +++ b/roles/ceph-osd/tasks/start_osds.yml @@ -11,21 +11,52 @@ when: - ceph_docker_on_openstack - - name: test if the container image has directory {{ container_bin_path }} - command: "docker run --rm --entrypoint=test {{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }} -d {{ container_bin_path }}" - changed_when: false - failed_when: false - register: test_container_bin_path - when: - - osd_scenario != 'lvm' + - name: with non lvm scenario + when: osd_scenario != 'lvm' + block: + - name: test if the container image has directory {{ container_bin_path }} + command: "docker run --rm --entrypoint=test {{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }} -d {{ container_bin_path }}" + changed_when: false + failed_when: false + register: test_container_bin_path - - name: test if the container image has the disk_list function - command: "docker run --rm --entrypoint=stat {{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }} {{ container_bin_path + '/disk_list.sh' if test_container_bin_path.rc == 0 else 'disk_list.sh' }}" - changed_when: false - failed_when: false - register: disk_list - when: - - osd_scenario != 'lvm' + - name: test if the container image has the disk_list function + command: "docker run --rm --entrypoint=stat {{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }} {{ container_bin_path + '/disk_list.sh' if test_container_bin_path.rc == 0 else 'disk_list.sh' }}" + changed_when: false + failed_when: false + register: disk_list + + - name: test activated ceph-disk osds + shell: | + ls /var/lib/ceph/osd/ | sed 's/.*-//' + register: activated_osds + + - name: activate containerized osd(s) + shell: | + DOCKER_ENV=$(docker run --rm --net=host --ulimit nofile=1024:1024 \ + --privileged=true -v /dev/:/dev/ -v /etc/ceph:/etc/ceph:z \ + -e CLUSTER={{ cluster }} -e OSD_DEVICE={{ item }} \ + {{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }} \ + disk_list) + docker run --rm --net=host \ + --ulimit nofile=1024:1024 \ + --ipc=host --pid=host --privileged=true \ + -v /etc/ceph:/etc/ceph:z \ + -v /var/lib/ceph/:/var/lib/ceph/:z \ + -v /dev:/dev \ + -v /etc/localtime:/etc/localtime:ro \ + -e DEBUG=verbose \ + -e CLUSTER={{ cluster }} \ + -e CEPH_DAEMON=OSD_CEPH_DISK_ACTIVATE_ONLY \ + -e OSD_DEVICE={{ item }} \ + ${DOCKER_ENV} \ + {{ docker_env_args }} \ + {{ ceph_osd_docker_prepare_env }} \ + {{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }} + with_items: "{{ devices }}" + when: + - devices is defined + - devices | length > activated_osds.stdout_lines | length - name: generate ceph osd docker run script become: true @@ -44,18 +75,21 @@ - name: get osd ids shell: | ls /var/lib/ceph/osd/ | sed 's/.*-//' - register: osd_ids_non_container + register: ceph_disk_osd_ids + when: osd_scenario != 'lvm' - name: set_fact docker_exec_start_osd set_fact: docker_exec_start_osd: "{{ 'docker run --rm --ulimit nofile=1024:1024 --privileged=true -v /run/lvm/lvmetad.socket:/run/lvm/lvmetad.socket -v /var/run/udev/:/var/run/udev/:z -v /etc/ceph:/etc/ceph:z -v /dev:/dev --entrypoint=ceph-volume ' + ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment else 'ceph-volume' }}" + when: osd_scenario == 'lvm' - name: collect osd ids shell: > {{ docker_exec_start_osd }} lvm list --format json changed_when: false failed_when: false - register: ceph_osd_ids + register: ceph_volume_osd_ids + when: osd_scenario == 'lvm' - name: generate systemd unit file become: true @@ -70,13 +104,41 @@ when: - containerized_deployment +- name: device to ID migration + when: + - containerized_deployment | bool + - osd_scenario != 'lvm' + block: + - name: check ceph-osd service using device name + shell: | + systemctl list-units | grep -E "loaded * active" | grep -coE "ceph-osd@([a-z]+|nvme[0-9]+n[0-9]+).service" + register: ceph_osd_device_name + changed_when: false + failed_when: false + + - name: copy systemd-device-to-id.sh script + template: + src: systemd-device-to-id.sh.j2 + dest: /tmp/systemd-device-to-id.sh + owner: root + group: root + mode: 0750 + when: ceph_osd_device_name.stdout|int != 0 + + - name: run the systemd-device-to-id.sh script + command: /usr/bin/env bash /tmp/systemd-device-to-id.sh + when: ceph_osd_device_name.stdout|int != 0 + with_items: "{{ groups[osd_group_name] }}" + delegate_to: "{{ item }}" + run_once: true + - name: systemd start osd systemd: - name: ceph-osd@{{ item | regex_replace('/dev/', '') if osd_scenario != 'lvm' and containerized_deployment else item }} + name: ceph-osd@{{ item }} state: started enabled: yes daemon_reload: yes - with_items: "{{ devices if osd_scenario != 'lvm' and containerized_deployment else ((ceph_osd_ids.stdout | from_json).keys() | list) if osd_scenario == 'lvm' and not containerized_deployment else osd_ids_non_container.stdout_lines }}" + with_items: "{{ ((ceph_volume_osd_ids.stdout | from_json).keys() | list) if osd_scenario == 'lvm' else ceph_disk_osd_ids.stdout_lines }}" - name: ensure systemd service override directory exists file: diff --git a/roles/ceph-osd/templates/ceph-osd-run.sh.j2 b/roles/ceph-osd/templates/ceph-osd-run.sh.j2 index a98dc6578..e7e0e29cf 100644 --- a/roles/ceph-osd/templates/ceph-osd-run.sh.j2 +++ b/roles/ceph-osd/templates/ceph-osd-run.sh.j2 @@ -12,8 +12,20 @@ DOCKER_ENV="" ############# # FUNCTIONS # ############# +function id_to_device () { +{% if dmcrypt | bool %} + docker run --rm --net=host --ulimit nofile=1024:1024 --ipc=host --pid=host --privileged=true -v /etc/ceph:/etc/ceph:z -v /var/lib/ceph/:/var/lib/ceph/:z -v /dev:/dev -v /etc/localtime:/etc/localtime:ro -e DEBUG=verbose -e CLUSTER={{ cluster }} {{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }} osd_ceph_disk_dmcrypt_data_map +{% endif %} + DATA_PART=$(docker run --rm --ulimit nofile=1024:1024 --privileged=true -v /dev/:/dev/ -v /etc/ceph:/etc/ceph:z --entrypoint ceph-disk {{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }} list | grep ", osd\.${1}," | awk '{ print $1 }') + if [[ "${DATA_PART}" =~ ^/dev/(cciss|nvme) ]]; then + OSD_DEVICE=${DATA_PART:0:-2} + else + OSD_DEVICE=${DATA_PART:0:-1} + fi +} + function expose_partitions () { -DOCKER_ENV=$(docker run --rm --net=host --name expose_partitions_${1} --privileged=true -v /dev/:/dev/ -v /etc/ceph:/etc/ceph:z -e CLUSTER={{ cluster }} -e OSD_DEVICE=/dev/${1} {{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }} disk_list) + DOCKER_ENV=$(docker run --rm --net=host --privileged=true -v /dev/:/dev/ -v /etc/ceph:/etc/ceph:z -e CLUSTER={{ cluster }} -e OSD_DEVICE=${1} {{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }} disk_list) } {% else -%} # NOTE(leseb): maintains backwards compatibility with old ceph-docker Jewel images @@ -54,7 +66,8 @@ function expose_partitions { {% endif -%} -expose_partitions "$1" +id_to_device "$1" +expose_partitions "${OSD_DEVICE}" # discover osd_objectstore for ceph-disk based osds if [[ $DOCKER_ENV =~ "BLUESTORE" ]]; then @@ -122,12 +135,11 @@ numactl \ -v /run/lvm/lvmetad.socket:/run/lvm/lvmetad.socket \ -e CEPH_DAEMON=OSD_CEPH_VOLUME_ACTIVATE \ -e OSD_ID="$1" \ - --name=ceph-osd-"$1" \ {% else -%} $DOCKER_ENV \ -e CEPH_DAEMON=OSD_CEPH_DISK_ACTIVATE \ - -e OSD_DEVICE=/dev/"${1}" \ - --name=ceph-osd-{{ ansible_hostname }}-"${1}" \ + -e OSD_DEVICE="${OSD_DEVICE}" \ {% endif -%} + --name=ceph-osd-"$1" \ {{ ceph_osd_docker_extra_env }} \ {{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }} diff --git a/roles/ceph-osd/templates/systemd-device-to-id.sh.j2 b/roles/ceph-osd/templates/systemd-device-to-id.sh.j2 new file mode 100644 index 000000000..b73e1c1b9 --- /dev/null +++ b/roles/ceph-osd/templates/systemd-device-to-id.sh.j2 @@ -0,0 +1,81 @@ +#!/bin/bash + +DELAY="{{ handler_health_osd_check_delay }}" +CEPH_CLI="--name client.bootstrap-osd --keyring /var/lib/ceph/bootstrap-osd/{{ cluster }}.keyring --cluster {{ cluster }}" + +check_pgs() { + num_pgs=$($docker_exec ceph $CEPH_CLI -s -f json|python -c 'import sys, json; print(json.load(sys.stdin)["pgmap"]["num_pgs"])') + if [[ "$num_pgs" == "0" ]]; then + return 0 + fi + while [ $RETRIES -ne 0 ]; do + test "$($docker_exec ceph $CEPH_CLI -s -f json | python -c 'import sys, json; print(json.load(sys.stdin)["pgmap"]["num_pgs"])')" -eq "$($docker_exec ceph $CEPH_CLI -s -f json | python -c 'import sys, json; print sum ( [ i["count"] for i in json.load(sys.stdin)["pgmap"]["pgs_by_state"] if "active+clean" in i["state_name"]])')" + RET=$? + test $RET -eq 0 && return 0 + sleep $DELAY + let RETRIES=RETRIES-1 + done + # PGs not clean, exiting with return code 1 + echo "Error while running 'ceph $CEPH_CLI -s', PGs were not reported as active+clean" + echo "It is possible that the cluster has less OSDs than the replica configuration" + echo "Will refuse to continue" + $docker_exec ceph $CEPH_CLI -s + $docker_exec ceph $CEPH_CLI osd dump + $docker_exec ceph $CEPH_CLI osd tree + $docker_exec ceph $CEPH_CLI osd crush rule dump + exit 1 +} + +wait_for_socket_in_docker() { + osd_mount_point=$(docker exec "$1" df --output=target | grep '/var/lib/ceph/osd/') + whoami=$(docker exec "$1" cat $osd_mount_point/whoami) + if ! docker exec "$1" timeout 10 bash -c "while [ ! -e /var/run/ceph/{{ cluster }}-osd.${whoami}.asok ]; do sleep 1 ; done"; then + echo "Timed out while trying to look for a Ceph OSD socket." + echo "Abort mission!" + exit 1 + fi +} + +get_dev_name() { + echo $1 | sed -r 's/ceph-osd@([a-z]{1,4}|nvme[0-9]+n[0-9]+)\.service/\1/' +} + +get_docker_id_from_dev_name() { + local id + local count + count=10 + while [ $count -ne 0 ]; do + id=$(docker ps -q -f "name=${1}$") + test "$id" != "" && break + sleep $DELAY + let count=count-1 + done + echo "$id" +} + +for unit in $(systemctl list-units | grep -E "loaded * active" | grep -oE "ceph-osd@([a-z]+|nvme[0-9]+n[0-9]+).service"); do + dev_name=$(get_dev_name "$unit") + container_id=$(get_docker_id_from_dev_name "$dev_name") + wait_for_socket_in_docker "$container_id" + osd_id=$whoami + # Stop and Disable the unit based on device name + systemctl stop ceph-osd@${dev_name} + systemctl disable ceph-osd@${dev_name} + # Enable and Start the unit based on OSD id + systemctl enable ceph-osd@${osd_id} + systemctl start ceph-osd@${osd_id} + container_id=$(get_docker_id_from_dev_name "ceph-osd-${osd_id}") + docker_exec="docker exec $container_id" + SOCKET=/var/run/ceph/{{ cluster }}-osd.${osd_id}.asok + COUNT=10 + while [ $COUNT -ne 0 ]; do + RETRIES="{{ handler_health_osd_check_retries }}" + $docker_exec test -S "$SOCKET" && check_pgs && continue 2 + sleep $DELAY + let COUNT=COUNT-1 + done + # If we reach this point, it means the socket is not present. + echo "Socket file ${SOCKET} could not be found, which means the osd daemon is not running. Showing ceph-osd unit logs now:" + journalctl -u "ceph-osd@${osd_id}.service" + exit 1 +done