From 73cf0378c20f52a6e408e5be84c2826627e570fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Han?= Date: Tue, 7 Feb 2017 22:00:53 +0100 Subject: [PATCH] docker: osd, do not use priviledged container anymore MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Oh yeah! This patch adds more fine grained control on how we run the activation osd container. We now use --device to give a read, write and mknodaccess to a specific device to be consumed by Ceph. We also use SYS_ADMIN cap to allow mount operations, ceph-disk needs to temporary mount the osd data directory during the activation sequence. This patch also enables the support of dedicated journal devices when deploying ceph-docker with ceph-ansible. Depends on https://github.com/ceph/ceph-docker/pull/478 Signed-off-by: Sébastien Han --- group_vars/osds.yml.sample | 10 ++++ roles/ceph-osd/defaults/main.yml | 10 ++++ .../tasks/docker/start_docker_osd.yml | 17 +++++-- roles/ceph-osd/templates/ceph-osd-run.sh.j2 | 50 +++++++++++++++++++ roles/ceph-osd/templates/ceph-osd.service.j2 | 21 ++------ 5 files changed, 86 insertions(+), 22 deletions(-) create mode 100644 roles/ceph-osd/templates/ceph-osd-run.sh.j2 diff --git a/group_vars/osds.yml.sample b/group_vars/osds.yml.sample index 7ee4363f9..20a234c91 100644 --- a/group_vars/osds.yml.sample +++ b/group_vars/osds.yml.sample @@ -134,6 +134,11 @@ dummy: # - /dev/sdf # - /dev/sdg # - /dev/sdg +# +# NOTE(leseb): +# On a containerized scenario we only support A SINGLE journal +# for all the OSDs on a given machine. If you don't, bad things will happen +# This is a limitation we plan to fix at some point. #raw_journal_devices: [] @@ -176,6 +181,11 @@ dummy: #kv_type: etcd #kv_endpoint: 127.0.0.1 #kv_port: 4001 + +# Add -e OSD_JOURNAL={{ raw_journal_devices }} to configure a journal device to ceph_osd_docker_prepare_env variable +# make sure you only pass a single device, otherwise this will fail horribly. +# +# Add -e OSD_DMCRYPT=1 to use the collocated dmcrypt scenario to the ceph_osd_docker_prepare_env and ceph_osd_docker_extra_env variables #ceph_osd_docker_prepare_env: -e CLUSTER={{ cluster }} -e OSD_JOURNAL_SIZE={{ journal_size }} -e OSD_FORCE_ZAP=1 #ceph_docker_image: "ceph/daemon" #ceph_docker_image_tag: latest diff --git a/roles/ceph-osd/defaults/main.yml b/roles/ceph-osd/defaults/main.yml index b0e00ce0f..8672d9ba8 100644 --- a/roles/ceph-osd/defaults/main.yml +++ b/roles/ceph-osd/defaults/main.yml @@ -126,6 +126,11 @@ raw_multi_journal: false # - /dev/sdf # - /dev/sdg # - /dev/sdg +# +# NOTE(leseb): +# On a containerized scenario we only support A SINGLE journal +# for all the OSDs on a given machine. If you don't, bad things will happen +# This is a limitation we plan to fix at some point. raw_journal_devices: [] @@ -168,6 +173,11 @@ osd_containerized_deployment_with_kv: false kv_type: etcd kv_endpoint: 127.0.0.1 kv_port: 4001 + +# Add -e OSD_JOURNAL={{ raw_journal_devices }} to configure a journal device to ceph_osd_docker_prepare_env variable +# make sure you only pass a single device, otherwise this will fail horribly. +# +# Add -e OSD_DMCRYPT=1 to use the collocated dmcrypt scenario to the ceph_osd_docker_prepare_env and ceph_osd_docker_extra_env variables ceph_osd_docker_prepare_env: -e CLUSTER={{ cluster }} -e OSD_JOURNAL_SIZE={{ journal_size }} -e OSD_FORCE_ZAP=1 ceph_docker_image: "ceph/daemon" ceph_docker_image_tag: latest diff --git a/roles/ceph-osd/tasks/docker/start_docker_osd.yml b/roles/ceph-osd/tasks/docker/start_docker_osd.yml index a694523f2..973569e7c 100644 --- a/roles/ceph-osd/tasks/docker/start_docker_osd.yml +++ b/roles/ceph-osd/tasks/docker/start_docker_osd.yml @@ -24,13 +24,13 @@ docker run --net=host \ --pid=host \ --privileged=true \ - --name="{{ ansible_hostname }}-osd-prepare-{{ item.0 | - regex_replace('/', '') }}" \ + --name="{{ ansible_hostname }}-osd-prepare-{{ item.0 | regex_replace('/', '') }}" \ -v /etc/ceph:/etc/ceph \ -v /var/lib/ceph/:/var/lib/ceph/ \ -v /dev:/dev \ -v /etc/localtime:/etc/localtime:ro \ -e "OSD_DEVICE={{ item.0 }}" \ + -e "OSD_JOURNAL_UUID=$(python -c "import uuid; print uuid.uuid5(uuid.NAMESPACE_DNS, '{{ ansible_machine_id }}{{ item.0 }}')")" \ -e CEPH_DAEMON=OSD_CEPH_DISK_PREPARE \ {{ ceph_osd_docker_prepare_env }} \ "{{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }}" @@ -47,11 +47,11 @@ docker run --net=host \ --pid=host \ --privileged=true \ - --name="{{ ansible_hostname }}-osd-prepare-{{ item.0 | - regex_replace('/', '') }}" \ + --name="{{ ansible_hostname }}-osd-prepare-{{ item.0 | regex_replace('/', '') }}" \ -v /dev:/dev \ -v /etc/localtime:/etc/localtime:ro \ -e "OSD_DEVICE={{ item.0 }}" \ + -e "OSD_JOURNAL_UUID=$(python -c "import uuid; print uuid.uuid5(uuid.NAMESPACE_DNS, '{{ ansible_machine_id }}{{ item.0 }}')")" \ -e "{{ ceph_osd_docker_prepare_env }}" \ -e CEPH_DAEMON=OSD_CEPH_DISK_PREPARE \ -e KV_TYPE={{kv_type}} \ @@ -67,6 +67,15 @@ - ceph_osd_docker_prepare_env is defined - osd_containerized_deployment_with_kv +- name: generate ceph osd docker run script + become: true + template: + src: "{{ role_path }}/templates/ceph-osd-run.sh.j2" + dest: /usr/share/ceph-osd-run.sh + owner: "root" + group: "root" + mode: "0744" + - name: generate systemd unit file become: true template: diff --git a/roles/ceph-osd/templates/ceph-osd-run.sh.j2 b/roles/ceph-osd/templates/ceph-osd-run.sh.j2 new file mode 100644 index 000000000..0689be9e0 --- /dev/null +++ b/roles/ceph-osd/templates/ceph-osd-run.sh.j2 @@ -0,0 +1,50 @@ +#!/bin/bash +# {{ ansible_managed }} + +if [[ "$(blkid -t TYPE=crypto_LUKS -o value -s PARTUUID /dev/${1}* | wc -l)" -gt 0 ]] ; then + for part in /dev/${1}*; do + if [[ "$(blkid -t TYPE=crypto_LUKS -o value -s PARTUUID ${part} | wc -l)" -gt 0 ]]; then +DEVICES="${DEVICES} --device=/dev/disk/by-partuuid/$(blkid -t TYPE=crypto_LUKS -o value -s PARTUUID ${part}) " + fi + done + # we test if the dm exist, if it does we add it to --device list + # if not we don't add it, the first activation will fail + # however the dm will be created, on the second run it'll added to the device list + # the second run will succeed + blkid -t TYPE=crypto_LUKS -o value -s PARTUUID /dev/${1}1 + # make sure blkid returns 0 otherwise we will test /dev/mapper/ which always exists + if [[ -e /dev/mapper/$(blkid -t TYPE=crypto_LUKS -o value -s PARTUUID /dev/${1}1) && "$?" -eq 0 ]]; then + DEVICES="${DEVICES} --device=/dev/disk/by-partuuid/$(blkid -t PARTLABEL="ceph lockbox" -o value -s PARTUUID /dev/${1}3) --device=/dev/${1}3 --device=/dev/mapper/control --device=/dev/mapper/$(blkid -t TYPE=crypto_LUKS -o value -s PARTUUID /dev/${1}2) --device=/dev/mapper/$(blkid -t TYPE=crypto_LUKS -o value -s PARTUUID /dev/${1}1)" + else + DEVICES="${DEVICES} --device=/dev/disk/by-partuuid/$(blkid -t PARTLABEL="ceph lockbox" -o value -s PARTUUID /dev/${1}3) --device=/dev/${1}3 --device=/dev/mapper/control --device=/dev/mapper/$(blkid -t TYPE=crypto_LUKS -o value -s PARTUUID /dev/${1}2)" + fi +fi + +/usr/bin/docker run \ + --rm \ + --net=host \ + --cap-add SYS_ADMIN \ + --pid=host \ + {% if not osd_containerized_deployment_with_kv -%} + -v /var/lib/ceph:/var/lib/ceph \ + -v /etc/ceph:/etc/ceph \ + {% else -%} + -e KV_TYPE={{kv_type}} \ + -e KV_IP={{kv_endpoint}} \ + -e KV_PORT={{kv_port}} \ + {% endif -%} + -v /etc/localtime:/etc/localtime:ro \ + --device=/dev/${1} \ + --device=/dev/${1}1 \ + {% if raw_journal_devices|length > 0 -%} + -e OSD_JOURNAL={{ raw_journal_devices[0] }} \ + --device={{ raw_journal_devices[0] }} \ + {% else -%} + --device=/dev/${1}2 \ + {% endif -%} + --device=/dev/disk/by-partuuid/$(python -c "import uuid; f = open('/etc/machine-id', 'r').read(); print uuid.uuid5(uuid.NAMESPACE_DNS, f.strip() + '/dev/$1')") ${DEVICES} \ + -e CEPH_DAEMON=OSD_CEPH_DISK_ACTIVATE \ + -e OSD_DEVICE=/dev/${1} \ + {{ ceph_osd_docker_extra_env }} \ + --name={{ ansible_hostname }}-osd-dev${1} \ + {{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }} diff --git a/roles/ceph-osd/templates/ceph-osd.service.j2 b/roles/ceph-osd/templates/ceph-osd.service.j2 index 46bee9ff6..cd55833a5 100644 --- a/roles/ceph-osd/templates/ceph-osd.service.j2 +++ b/roles/ceph-osd/templates/ceph-osd.service.j2 @@ -1,3 +1,4 @@ +# {{ ansible_managed }} [Unit] Description=Ceph OSD After=docker.service @@ -5,24 +6,8 @@ After=docker.service [Service] EnvironmentFile=-/etc/environment ExecStartPre=-/usr/bin/docker stop {{ ansible_hostname }}-osd-dev%i -ExecStartPre=-/usr/bin/docker rm -f {{ ansible_hostname }}-osd-dev%i -ExecStart=/usr/bin/docker run --rm --net=host --pid=host\ - {% if not osd_containerized_deployment_with_kv -%} - -v /var/lib/ceph:/var/lib/ceph \ - -v /etc/ceph:/etc/ceph \ - {% else -%} - -e KV_TYPE={{kv_type}} \ - -e KV_IP={{kv_endpoint}} \ - -e KV_PORT={{kv_port}} \ - {% endif -%} - -v /etc/localtime:/etc/localtime:ro \ - -v /dev:/dev \ - --privileged \ - -e CEPH_DAEMON=OSD_CEPH_DISK_ACTIVATE \ - -e OSD_DEVICE=/dev/%i \ - {{ ceph_osd_docker_extra_env }} \ - --name={{ ansible_hostname }}-osd-dev%i \ - {{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }} +ExecStartPre=-/usr/bin/docker rm -f {{ ansible_hostname }}-osd-dev%i +ExecStart=/usr/share/ceph-osd-run.sh %i ExecStop=-/usr/bin/docker stop {{ ansible_hostname }}-osd-dev%i Restart=always RestartSec=10s