mirror of https://github.com/ceph/ceph-ansible.git
ceph-osd: use OSD id with systemd ceph-disk
When using containerized deployment we have to create the systemd service unit based on a template. The current implementation with ceph-disk is using the device name as paramater to the systemd service and for the container name too. $ systemctl start ceph-osd@sdb $ docker ps --filter 'name=ceph-osd-*' CONTAINER ID IMAGE NAMES 065530d0a27f ceph/daemon:latest-luminous ceph-osd-strg0-sdb This is the only scenario (compared to non containerized or ceph-volume based deployment) that isn't using the OSD id. $ systemctl start ceph-osd@0 $ docker ps --filter 'name=ceph-osd-*' CONTAINER ID IMAGE NAMES d34552ec157e ceph/daemon:latest-luminous ceph-osd-0 Also if the device mapping doesn't persist to system reboot (ie sdb might be remapped to sde) then the OSD service won't come back after the reboot. This patch allows to use the OSD id with the ceph-osd systemd service but requires to activate the OSD manually with ceph-disk first in order to affect the ID to that OSD. Closes: https://bugzilla.redhat.com/show_bug.cgi?id=1670734 Signed-off-by: Dimitri Savineau <dsavinea@redhat.com>pull/4273/head
parent
df46d10c27
commit
bedc0ab69d
|
@ -36,10 +36,6 @@ wait_for_socket_in_docker() {
|
|||
fi
|
||||
}
|
||||
|
||||
get_dev_name() {
|
||||
echo $1 | sed -r 's/ceph-osd@([a-z]{1,4})\.service/\1/'
|
||||
}
|
||||
|
||||
get_docker_id_from_dev_name() {
|
||||
local id
|
||||
local count
|
||||
|
@ -53,26 +49,17 @@ get_docker_id_from_dev_name() {
|
|||
echo "$id"
|
||||
}
|
||||
|
||||
# For containerized deployments, the unit file looks like: ceph-osd@sda.service
|
||||
# For non-containerized deployments, the unit file looks like: ceph-osd@NNN.service where NNN is OSD ID
|
||||
for unit in $(systemctl list-units | grep -E "loaded * active" | grep -oE "ceph-osd@([0-9]+|[a-z]+).service"); do
|
||||
# The unit file looks like: ceph-osd@NNN.service where NNN is OSD ID
|
||||
for unit in $(systemctl list-units | grep -E "loaded * active" | grep -oE "ceph-osd@[0-9]+.service"); do
|
||||
# First, restart daemon(s)
|
||||
systemctl restart "${unit}"
|
||||
# We need to wait because it may take some time for the socket to actually exists
|
||||
COUNT=10
|
||||
# Wait and ensure the socket exists after restarting the daemon
|
||||
{% if containerized_deployment and osd_scenario != 'lvm' -%}
|
||||
id=$(get_dev_name "$unit")
|
||||
container_id=$(get_docker_id_from_dev_name "$id")
|
||||
wait_for_socket_in_docker "$container_id"
|
||||
osd_id=$whoami
|
||||
docker_exec="docker exec $container_id"
|
||||
{% elif containerized_deployment and osd_scenario == 'lvm' %}
|
||||
osd_id=$(echo ${unit#ceph-osd@} | grep -oE '[0-9]+')
|
||||
{% if containerized_deployment -%}
|
||||
container_id=$(get_docker_id_from_dev_name "ceph-osd-${osd_id}")
|
||||
docker_exec="docker exec $container_id"
|
||||
{% else %}
|
||||
osd_id=$(echo ${unit#ceph-osd@} | grep -oE '[0-9]+')
|
||||
{% endif %}
|
||||
SOCKET=/var/run/ceph/{{ cluster }}-osd.${osd_id}.asok
|
||||
while [ $COUNT -ne 0 ]; do
|
||||
|
|
|
@ -11,21 +11,52 @@
|
|||
when:
|
||||
- ceph_docker_on_openstack
|
||||
|
||||
- name: with non lvm scenario
|
||||
when: osd_scenario != 'lvm'
|
||||
block:
|
||||
- name: test if the container image has directory {{ container_bin_path }}
|
||||
command: "docker run --rm --entrypoint=test {{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }} -d {{ container_bin_path }}"
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
register: test_container_bin_path
|
||||
when:
|
||||
- osd_scenario != 'lvm'
|
||||
|
||||
- name: test if the container image has the disk_list function
|
||||
command: "docker run --rm --entrypoint=stat {{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }} {{ container_bin_path + '/disk_list.sh' if test_container_bin_path.rc == 0 else 'disk_list.sh' }}"
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
register: disk_list
|
||||
|
||||
- name: test activated ceph-disk osds
|
||||
shell: |
|
||||
ls /var/lib/ceph/osd/ | sed 's/.*-//'
|
||||
register: activated_osds
|
||||
|
||||
- name: activate containerized osd(s)
|
||||
shell: |
|
||||
DOCKER_ENV=$(docker run --rm --net=host --ulimit nofile=1024:1024 \
|
||||
--privileged=true -v /dev/:/dev/ -v /etc/ceph:/etc/ceph:z \
|
||||
-e CLUSTER={{ cluster }} -e OSD_DEVICE={{ item }} \
|
||||
{{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }} \
|
||||
disk_list)
|
||||
docker run --rm --net=host \
|
||||
--ulimit nofile=1024:1024 \
|
||||
--ipc=host --pid=host --privileged=true \
|
||||
-v /etc/ceph:/etc/ceph:z \
|
||||
-v /var/lib/ceph/:/var/lib/ceph/:z \
|
||||
-v /dev:/dev \
|
||||
-v /etc/localtime:/etc/localtime:ro \
|
||||
-e DEBUG=verbose \
|
||||
-e CLUSTER={{ cluster }} \
|
||||
-e CEPH_DAEMON=OSD_CEPH_DISK_ACTIVATE_ONLY \
|
||||
-e OSD_DEVICE={{ item }} \
|
||||
${DOCKER_ENV} \
|
||||
{{ docker_env_args }} \
|
||||
{{ ceph_osd_docker_prepare_env }} \
|
||||
{{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }}
|
||||
with_items: "{{ devices }}"
|
||||
when:
|
||||
- osd_scenario != 'lvm'
|
||||
- devices is defined
|
||||
- devices | length > activated_osds.stdout_lines | length
|
||||
|
||||
- name: generate ceph osd docker run script
|
||||
become: true
|
||||
|
@ -44,18 +75,21 @@
|
|||
- name: get osd ids
|
||||
shell: |
|
||||
ls /var/lib/ceph/osd/ | sed 's/.*-//'
|
||||
register: osd_ids_non_container
|
||||
register: ceph_disk_osd_ids
|
||||
when: osd_scenario != 'lvm'
|
||||
|
||||
- name: set_fact docker_exec_start_osd
|
||||
set_fact:
|
||||
docker_exec_start_osd: "{{ 'docker run --rm --ulimit nofile=1024:1024 --privileged=true -v /run/lvm/lvmetad.socket:/run/lvm/lvmetad.socket -v /var/run/udev/:/var/run/udev/:z -v /etc/ceph:/etc/ceph:z -v /dev:/dev --entrypoint=ceph-volume ' + ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment else 'ceph-volume' }}"
|
||||
when: osd_scenario == 'lvm'
|
||||
|
||||
- name: collect osd ids
|
||||
shell: >
|
||||
{{ docker_exec_start_osd }} lvm list --format json
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
register: ceph_osd_ids
|
||||
register: ceph_volume_osd_ids
|
||||
when: osd_scenario == 'lvm'
|
||||
|
||||
- name: generate systemd unit file
|
||||
become: true
|
||||
|
@ -70,13 +104,41 @@
|
|||
when:
|
||||
- containerized_deployment
|
||||
|
||||
- name: device to ID migration
|
||||
when:
|
||||
- containerized_deployment | bool
|
||||
- osd_scenario != 'lvm'
|
||||
block:
|
||||
- name: check ceph-osd service using device name
|
||||
shell: |
|
||||
systemctl list-units | grep -E "loaded * active" | grep -coE "ceph-osd@([a-z]+|nvme[0-9]+n[0-9]+).service"
|
||||
register: ceph_osd_device_name
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
|
||||
- name: copy systemd-device-to-id.sh script
|
||||
template:
|
||||
src: systemd-device-to-id.sh.j2
|
||||
dest: /tmp/systemd-device-to-id.sh
|
||||
owner: root
|
||||
group: root
|
||||
mode: 0750
|
||||
when: ceph_osd_device_name.stdout|int != 0
|
||||
|
||||
- name: run the systemd-device-to-id.sh script
|
||||
command: /usr/bin/env bash /tmp/systemd-device-to-id.sh
|
||||
when: ceph_osd_device_name.stdout|int != 0
|
||||
with_items: "{{ groups[osd_group_name] }}"
|
||||
delegate_to: "{{ item }}"
|
||||
run_once: true
|
||||
|
||||
- name: systemd start osd
|
||||
systemd:
|
||||
name: ceph-osd@{{ item | regex_replace('/dev/', '') if osd_scenario != 'lvm' and containerized_deployment else item }}
|
||||
name: ceph-osd@{{ item }}
|
||||
state: started
|
||||
enabled: yes
|
||||
daemon_reload: yes
|
||||
with_items: "{{ devices if osd_scenario != 'lvm' and containerized_deployment else ((ceph_osd_ids.stdout | from_json).keys() | list) if osd_scenario == 'lvm' and not containerized_deployment else osd_ids_non_container.stdout_lines }}"
|
||||
with_items: "{{ ((ceph_volume_osd_ids.stdout | from_json).keys() | list) if osd_scenario == 'lvm' else ceph_disk_osd_ids.stdout_lines }}"
|
||||
|
||||
- name: ensure systemd service override directory exists
|
||||
file:
|
||||
|
|
|
@ -12,8 +12,20 @@ DOCKER_ENV=""
|
|||
#############
|
||||
# FUNCTIONS #
|
||||
#############
|
||||
function id_to_device () {
|
||||
{% if dmcrypt | bool %}
|
||||
docker run --rm --net=host --ulimit nofile=1024:1024 --ipc=host --pid=host --privileged=true -v /etc/ceph:/etc/ceph:z -v /var/lib/ceph/:/var/lib/ceph/:z -v /dev:/dev -v /etc/localtime:/etc/localtime:ro -e DEBUG=verbose -e CLUSTER={{ cluster }} {{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }} osd_ceph_disk_dmcrypt_data_map
|
||||
{% endif %}
|
||||
DATA_PART=$(docker run --rm --ulimit nofile=1024:1024 --privileged=true -v /dev/:/dev/ -v /etc/ceph:/etc/ceph:z --entrypoint ceph-disk {{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }} list | grep ", osd\.${1}," | awk '{ print $1 }')
|
||||
if [[ "${DATA_PART}" =~ ^/dev/(cciss|nvme) ]]; then
|
||||
OSD_DEVICE=${DATA_PART:0:-2}
|
||||
else
|
||||
OSD_DEVICE=${DATA_PART:0:-1}
|
||||
fi
|
||||
}
|
||||
|
||||
function expose_partitions () {
|
||||
DOCKER_ENV=$(docker run --rm --net=host --name expose_partitions_${1} --privileged=true -v /dev/:/dev/ -v /etc/ceph:/etc/ceph:z -e CLUSTER={{ cluster }} -e OSD_DEVICE=/dev/${1} {{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }} disk_list)
|
||||
DOCKER_ENV=$(docker run --rm --net=host --privileged=true -v /dev/:/dev/ -v /etc/ceph:/etc/ceph:z -e CLUSTER={{ cluster }} -e OSD_DEVICE=${1} {{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }} disk_list)
|
||||
}
|
||||
{% else -%}
|
||||
# NOTE(leseb): maintains backwards compatibility with old ceph-docker Jewel images
|
||||
|
@ -54,7 +66,8 @@ function expose_partitions {
|
|||
|
||||
{% endif -%}
|
||||
|
||||
expose_partitions "$1"
|
||||
id_to_device "$1"
|
||||
expose_partitions "${OSD_DEVICE}"
|
||||
|
||||
# discover osd_objectstore for ceph-disk based osds
|
||||
if [[ $DOCKER_ENV =~ "BLUESTORE" ]]; then
|
||||
|
@ -122,12 +135,11 @@ numactl \
|
|||
-v /run/lvm/lvmetad.socket:/run/lvm/lvmetad.socket \
|
||||
-e CEPH_DAEMON=OSD_CEPH_VOLUME_ACTIVATE \
|
||||
-e OSD_ID="$1" \
|
||||
--name=ceph-osd-"$1" \
|
||||
{% else -%}
|
||||
$DOCKER_ENV \
|
||||
-e CEPH_DAEMON=OSD_CEPH_DISK_ACTIVATE \
|
||||
-e OSD_DEVICE=/dev/"${1}" \
|
||||
--name=ceph-osd-{{ ansible_hostname }}-"${1}" \
|
||||
-e OSD_DEVICE="${OSD_DEVICE}" \
|
||||
{% endif -%}
|
||||
--name=ceph-osd-"$1" \
|
||||
{{ ceph_osd_docker_extra_env }} \
|
||||
{{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }}
|
||||
|
|
|
@ -0,0 +1,81 @@
|
|||
#!/bin/bash
|
||||
|
||||
DELAY="{{ handler_health_osd_check_delay }}"
|
||||
CEPH_CLI="--name client.bootstrap-osd --keyring /var/lib/ceph/bootstrap-osd/{{ cluster }}.keyring --cluster {{ cluster }}"
|
||||
|
||||
check_pgs() {
|
||||
num_pgs=$($docker_exec ceph $CEPH_CLI -s -f json|python -c 'import sys, json; print(json.load(sys.stdin)["pgmap"]["num_pgs"])')
|
||||
if [[ "$num_pgs" == "0" ]]; then
|
||||
return 0
|
||||
fi
|
||||
while [ $RETRIES -ne 0 ]; do
|
||||
test "$($docker_exec ceph $CEPH_CLI -s -f json | python -c 'import sys, json; print(json.load(sys.stdin)["pgmap"]["num_pgs"])')" -eq "$($docker_exec ceph $CEPH_CLI -s -f json | python -c 'import sys, json; print sum ( [ i["count"] for i in json.load(sys.stdin)["pgmap"]["pgs_by_state"] if "active+clean" in i["state_name"]])')"
|
||||
RET=$?
|
||||
test $RET -eq 0 && return 0
|
||||
sleep $DELAY
|
||||
let RETRIES=RETRIES-1
|
||||
done
|
||||
# PGs not clean, exiting with return code 1
|
||||
echo "Error while running 'ceph $CEPH_CLI -s', PGs were not reported as active+clean"
|
||||
echo "It is possible that the cluster has less OSDs than the replica configuration"
|
||||
echo "Will refuse to continue"
|
||||
$docker_exec ceph $CEPH_CLI -s
|
||||
$docker_exec ceph $CEPH_CLI osd dump
|
||||
$docker_exec ceph $CEPH_CLI osd tree
|
||||
$docker_exec ceph $CEPH_CLI osd crush rule dump
|
||||
exit 1
|
||||
}
|
||||
|
||||
wait_for_socket_in_docker() {
|
||||
osd_mount_point=$(docker exec "$1" df --output=target | grep '/var/lib/ceph/osd/')
|
||||
whoami=$(docker exec "$1" cat $osd_mount_point/whoami)
|
||||
if ! docker exec "$1" timeout 10 bash -c "while [ ! -e /var/run/ceph/{{ cluster }}-osd.${whoami}.asok ]; do sleep 1 ; done"; then
|
||||
echo "Timed out while trying to look for a Ceph OSD socket."
|
||||
echo "Abort mission!"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
get_dev_name() {
|
||||
echo $1 | sed -r 's/ceph-osd@([a-z]{1,4}|nvme[0-9]+n[0-9]+)\.service/\1/'
|
||||
}
|
||||
|
||||
get_docker_id_from_dev_name() {
|
||||
local id
|
||||
local count
|
||||
count=10
|
||||
while [ $count -ne 0 ]; do
|
||||
id=$(docker ps -q -f "name=${1}$")
|
||||
test "$id" != "" && break
|
||||
sleep $DELAY
|
||||
let count=count-1
|
||||
done
|
||||
echo "$id"
|
||||
}
|
||||
|
||||
for unit in $(systemctl list-units | grep -E "loaded * active" | grep -oE "ceph-osd@([a-z]+|nvme[0-9]+n[0-9]+).service"); do
|
||||
dev_name=$(get_dev_name "$unit")
|
||||
container_id=$(get_docker_id_from_dev_name "$dev_name")
|
||||
wait_for_socket_in_docker "$container_id"
|
||||
osd_id=$whoami
|
||||
# Stop and Disable the unit based on device name
|
||||
systemctl stop ceph-osd@${dev_name}
|
||||
systemctl disable ceph-osd@${dev_name}
|
||||
# Enable and Start the unit based on OSD id
|
||||
systemctl enable ceph-osd@${osd_id}
|
||||
systemctl start ceph-osd@${osd_id}
|
||||
container_id=$(get_docker_id_from_dev_name "ceph-osd-${osd_id}")
|
||||
docker_exec="docker exec $container_id"
|
||||
SOCKET=/var/run/ceph/{{ cluster }}-osd.${osd_id}.asok
|
||||
COUNT=10
|
||||
while [ $COUNT -ne 0 ]; do
|
||||
RETRIES="{{ handler_health_osd_check_retries }}"
|
||||
$docker_exec test -S "$SOCKET" && check_pgs && continue 2
|
||||
sleep $DELAY
|
||||
let COUNT=COUNT-1
|
||||
done
|
||||
# If we reach this point, it means the socket is not present.
|
||||
echo "Socket file ${SOCKET} could not be found, which means the osd daemon is not running. Showing ceph-osd unit logs now:"
|
||||
journalctl -u "ceph-osd@${osd_id}.service"
|
||||
exit 1
|
||||
done
|
Loading…
Reference in New Issue