docker: osd, do not use priviledged container anymore

Oh yeah! This patch adds more fine grained control on how we run the
activation osd container. We now use --device to give a read, write and
mknodaccess to a specific device to be consumed by Ceph. We also use
SYS_ADMIN cap to allow mount operations, ceph-disk needs to temporary
mount the osd data directory during the activation sequence.

This patch also enables the support of dedicated journal devices when
deploying ceph-docker with ceph-ansible.

Depends on https://github.com/ceph/ceph-docker/pull/478

Signed-off-by: Sébastien Han <seb@redhat.com>
pull/1287/head
Sébastien Han 2017-02-07 22:00:53 +01:00
parent a002508a91
commit 73cf0378c2
5 changed files with 86 additions and 22 deletions

View File

@ -134,6 +134,11 @@ dummy:
# - /dev/sdf
# - /dev/sdg
# - /dev/sdg
#
# NOTE(leseb):
# On a containerized scenario we only support A SINGLE journal
# for all the OSDs on a given machine. If you don't, bad things will happen
# This is a limitation we plan to fix at some point.
#raw_journal_devices: []
@ -176,6 +181,11 @@ dummy:
#kv_type: etcd
#kv_endpoint: 127.0.0.1
#kv_port: 4001
# Add -e OSD_JOURNAL={{ raw_journal_devices }} to configure a journal device to ceph_osd_docker_prepare_env variable
# make sure you only pass a single device, otherwise this will fail horribly.
#
# Add -e OSD_DMCRYPT=1 to use the collocated dmcrypt scenario to the ceph_osd_docker_prepare_env and ceph_osd_docker_extra_env variables
#ceph_osd_docker_prepare_env: -e CLUSTER={{ cluster }} -e OSD_JOURNAL_SIZE={{ journal_size }} -e OSD_FORCE_ZAP=1
#ceph_docker_image: "ceph/daemon"
#ceph_docker_image_tag: latest

View File

@ -126,6 +126,11 @@ raw_multi_journal: false
# - /dev/sdf
# - /dev/sdg
# - /dev/sdg
#
# NOTE(leseb):
# On a containerized scenario we only support A SINGLE journal
# for all the OSDs on a given machine. If you don't, bad things will happen
# This is a limitation we plan to fix at some point.
raw_journal_devices: []
@ -168,6 +173,11 @@ osd_containerized_deployment_with_kv: false
kv_type: etcd
kv_endpoint: 127.0.0.1
kv_port: 4001
# Add -e OSD_JOURNAL={{ raw_journal_devices }} to configure a journal device to ceph_osd_docker_prepare_env variable
# make sure you only pass a single device, otherwise this will fail horribly.
#
# Add -e OSD_DMCRYPT=1 to use the collocated dmcrypt scenario to the ceph_osd_docker_prepare_env and ceph_osd_docker_extra_env variables
ceph_osd_docker_prepare_env: -e CLUSTER={{ cluster }} -e OSD_JOURNAL_SIZE={{ journal_size }} -e OSD_FORCE_ZAP=1
ceph_docker_image: "ceph/daemon"
ceph_docker_image_tag: latest

View File

@ -24,13 +24,13 @@
docker run --net=host \
--pid=host \
--privileged=true \
--name="{{ ansible_hostname }}-osd-prepare-{{ item.0 |
regex_replace('/', '') }}" \
--name="{{ ansible_hostname }}-osd-prepare-{{ item.0 | regex_replace('/', '') }}" \
-v /etc/ceph:/etc/ceph \
-v /var/lib/ceph/:/var/lib/ceph/ \
-v /dev:/dev \
-v /etc/localtime:/etc/localtime:ro \
-e "OSD_DEVICE={{ item.0 }}" \
-e "OSD_JOURNAL_UUID=$(python -c "import uuid; print uuid.uuid5(uuid.NAMESPACE_DNS, '{{ ansible_machine_id }}{{ item.0 }}')")" \
-e CEPH_DAEMON=OSD_CEPH_DISK_PREPARE \
{{ ceph_osd_docker_prepare_env }} \
"{{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }}"
@ -47,11 +47,11 @@
docker run --net=host \
--pid=host \
--privileged=true \
--name="{{ ansible_hostname }}-osd-prepare-{{ item.0 |
regex_replace('/', '') }}" \
--name="{{ ansible_hostname }}-osd-prepare-{{ item.0 | regex_replace('/', '') }}" \
-v /dev:/dev \
-v /etc/localtime:/etc/localtime:ro \
-e "OSD_DEVICE={{ item.0 }}" \
-e "OSD_JOURNAL_UUID=$(python -c "import uuid; print uuid.uuid5(uuid.NAMESPACE_DNS, '{{ ansible_machine_id }}{{ item.0 }}')")" \
-e "{{ ceph_osd_docker_prepare_env }}" \
-e CEPH_DAEMON=OSD_CEPH_DISK_PREPARE \
-e KV_TYPE={{kv_type}} \
@ -67,6 +67,15 @@
- ceph_osd_docker_prepare_env is defined
- osd_containerized_deployment_with_kv
- name: generate ceph osd docker run script
become: true
template:
src: "{{ role_path }}/templates/ceph-osd-run.sh.j2"
dest: /usr/share/ceph-osd-run.sh
owner: "root"
group: "root"
mode: "0744"
- name: generate systemd unit file
become: true
template:

View File

@ -0,0 +1,50 @@
#!/bin/bash
# {{ ansible_managed }}
if [[ "$(blkid -t TYPE=crypto_LUKS -o value -s PARTUUID /dev/${1}* | wc -l)" -gt 0 ]] ; then
for part in /dev/${1}*; do
if [[ "$(blkid -t TYPE=crypto_LUKS -o value -s PARTUUID ${part} | wc -l)" -gt 0 ]]; then
DEVICES="${DEVICES} --device=/dev/disk/by-partuuid/$(blkid -t TYPE=crypto_LUKS -o value -s PARTUUID ${part}) "
fi
done
# we test if the dm exist, if it does we add it to --device list
# if not we don't add it, the first activation will fail
# however the dm will be created, on the second run it'll added to the device list
# the second run will succeed
blkid -t TYPE=crypto_LUKS -o value -s PARTUUID /dev/${1}1
# make sure blkid returns 0 otherwise we will test /dev/mapper/ which always exists
if [[ -e /dev/mapper/$(blkid -t TYPE=crypto_LUKS -o value -s PARTUUID /dev/${1}1) && "$?" -eq 0 ]]; then
DEVICES="${DEVICES} --device=/dev/disk/by-partuuid/$(blkid -t PARTLABEL="ceph lockbox" -o value -s PARTUUID /dev/${1}3) --device=/dev/${1}3 --device=/dev/mapper/control --device=/dev/mapper/$(blkid -t TYPE=crypto_LUKS -o value -s PARTUUID /dev/${1}2) --device=/dev/mapper/$(blkid -t TYPE=crypto_LUKS -o value -s PARTUUID /dev/${1}1)"
else
DEVICES="${DEVICES} --device=/dev/disk/by-partuuid/$(blkid -t PARTLABEL="ceph lockbox" -o value -s PARTUUID /dev/${1}3) --device=/dev/${1}3 --device=/dev/mapper/control --device=/dev/mapper/$(blkid -t TYPE=crypto_LUKS -o value -s PARTUUID /dev/${1}2)"
fi
fi
/usr/bin/docker run \
--rm \
--net=host \
--cap-add SYS_ADMIN \
--pid=host \
{% if not osd_containerized_deployment_with_kv -%}
-v /var/lib/ceph:/var/lib/ceph \
-v /etc/ceph:/etc/ceph \
{% else -%}
-e KV_TYPE={{kv_type}} \
-e KV_IP={{kv_endpoint}} \
-e KV_PORT={{kv_port}} \
{% endif -%}
-v /etc/localtime:/etc/localtime:ro \
--device=/dev/${1} \
--device=/dev/${1}1 \
{% if raw_journal_devices|length > 0 -%}
-e OSD_JOURNAL={{ raw_journal_devices[0] }} \
--device={{ raw_journal_devices[0] }} \
{% else -%}
--device=/dev/${1}2 \
{% endif -%}
--device=/dev/disk/by-partuuid/$(python -c "import uuid; f = open('/etc/machine-id', 'r').read(); print uuid.uuid5(uuid.NAMESPACE_DNS, f.strip() + '/dev/$1')") ${DEVICES} \
-e CEPH_DAEMON=OSD_CEPH_DISK_ACTIVATE \
-e OSD_DEVICE=/dev/${1} \
{{ ceph_osd_docker_extra_env }} \
--name={{ ansible_hostname }}-osd-dev${1} \
{{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }}

View File

@ -1,3 +1,4 @@
# {{ ansible_managed }}
[Unit]
Description=Ceph OSD
After=docker.service
@ -5,24 +6,8 @@ After=docker.service
[Service]
EnvironmentFile=-/etc/environment
ExecStartPre=-/usr/bin/docker stop {{ ansible_hostname }}-osd-dev%i
ExecStartPre=-/usr/bin/docker rm -f {{ ansible_hostname }}-osd-dev%i
ExecStart=/usr/bin/docker run --rm --net=host --pid=host\
{% if not osd_containerized_deployment_with_kv -%}
-v /var/lib/ceph:/var/lib/ceph \
-v /etc/ceph:/etc/ceph \
{% else -%}
-e KV_TYPE={{kv_type}} \
-e KV_IP={{kv_endpoint}} \
-e KV_PORT={{kv_port}} \
{% endif -%}
-v /etc/localtime:/etc/localtime:ro \
-v /dev:/dev \
--privileged \
-e CEPH_DAEMON=OSD_CEPH_DISK_ACTIVATE \
-e OSD_DEVICE=/dev/%i \
{{ ceph_osd_docker_extra_env }} \
--name={{ ansible_hostname }}-osd-dev%i \
{{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }}
ExecStartPre=-/usr/bin/docker rm -f {{ ansible_hostname }}-osd-dev%i
ExecStart=/usr/share/ceph-osd-run.sh %i
ExecStop=-/usr/bin/docker stop {{ ansible_hostname }}-osd-dev%i
Restart=always
RestartSec=10s