ceph-ansible/roles/ceph-osd/tasks/start_osds.yml

162 lines
5.7 KiB
YAML
Raw Normal View History

---
- block:
# For openstack VMs modify the mount point below depending on if the Openstack
# VM deploy tool defaults to mounting ephemeral disks
- name: umount ceph disk (if on openstack)
mount:
name: /mnt
src: /dev/vdb
fstype: ext3
state: unmounted
when:
- ceph_docker_on_openstack
- name: with non lvm scenario
when: osd_scenario != 'lvm'
block:
- name: test if the container image has directory {{ container_bin_path }}
command: "docker run --rm --entrypoint=test {{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }} -d {{ container_bin_path }}"
changed_when: false
failed_when: false
register: test_container_bin_path
- name: test if the container image has the disk_list function
command: "docker run --rm --entrypoint=stat {{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }} {{ container_bin_path + '/disk_list.sh' if test_container_bin_path.rc == 0 else 'disk_list.sh' }}"
changed_when: false
failed_when: false
register: disk_list
- name: test activated ceph-disk osds
shell: |
ls /var/lib/ceph/osd/ | sed 's/.*-//'
register: activated_osds
- name: activate containerized osd(s)
shell: |
DOCKER_ENV=$(docker run --rm --net=host --ulimit nofile=1024:4096 \
--privileged=true -v /dev/:/dev/ -v /etc/ceph:/etc/ceph:z \
-e CLUSTER={{ cluster }} -e OSD_DEVICE={{ item }} \
{{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }} \
disk_list)
docker run --rm --net=host \
--ulimit nofile=1024:4096 \
--ipc=host --pid=host --privileged=true \
-v /etc/ceph:/etc/ceph:z \
-v /var/lib/ceph/:/var/lib/ceph/:z \
-v /dev:/dev \
-v /etc/localtime:/etc/localtime:ro \
-e DEBUG=verbose \
-e CLUSTER={{ cluster }} \
-e CEPH_DAEMON=OSD_CEPH_DISK_ACTIVATE_ONLY \
-e OSD_DEVICE={{ item }} \
${DOCKER_ENV} \
{{ docker_env_args }} \
{{ ceph_osd_docker_prepare_env }} \
{{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }}
with_items: "{{ devices }}"
when:
- devices is defined
- devices | length > activated_osds.stdout_lines | length
- name: generate ceph osd docker run script
become: true
template:
src: "{{ role_path }}/templates/ceph-osd-run.sh.j2"
dest: "{{ ceph_osd_docker_run_script_path }}/ceph-osd-run.sh"
owner: "root"
group: "root"
mode: "0744"
notify:
- restart ceph osds
when:
- containerized_deployment
# this is for ceph-disk, the ceph-disk command is gone so we have to list /var/lib/ceph
- name: get osd ids
shell: |
ls /var/lib/ceph/osd/ | sed 's/.*-//'
register: ceph_disk_osd_ids
when: osd_scenario != 'lvm'
- name: set_fact docker_exec_start_osd
set_fact:
docker_exec_start_osd: "{{ 'docker run --rm --ulimit nofile=1024:4096 --privileged=true -v /run/lvm/lvmetad.socket:/run/lvm/lvmetad.socket -v /var/run/udev/:/var/run/udev/:z -v /etc/ceph:/etc/ceph:z -v /dev:/dev --entrypoint=ceph-volume ' + ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment else 'ceph-volume' }}"
when: osd_scenario == 'lvm'
- name: collect osd ids
shell: >
{{ docker_exec_start_osd }} lvm list --format json
changed_when: false
failed_when: false
register: ceph_volume_osd_ids
when: osd_scenario == 'lvm'
- name: generate systemd unit file
become: true
template:
src: "{{ role_path }}/templates/ceph-osd.service.j2"
dest: /etc/systemd/system/ceph-osd@.service
owner: "root"
group: "root"
mode: "0644"
notify:
- restart ceph osds
Do not search osd ids if ceph-volume Description of problem: The 'get osd id' task goes through all the 10 times (and its respective timeouts) to make sure that the number of OSDs in the osd directory match the number of devices. This happens always, regardless if the setup and deployment is correct. Version-Release number of selected component (if applicable): Surely the latest. But any ceph-ansible version that contains ceph-volume support is affected. How reproducible: 100% Steps to Reproduce: 1. Use ceph-volume (LVM) to deploy OSDs 2. Avoid using anything in the 'devices' section 3. Deploy the cluster Actual results: TASK [ceph-osd : get osd id _uses_shell=True, _raw_params=ls /var/lib/ceph/osd/ | sed 's/.*-//'] ********************************************************************************************************************************************** task path: /Users/alfredo/python/upstream/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/.tox/xenial-filestore-dmcrypt/tmp/ceph-ansible/roles/ceph-osd/tasks/start_osds.yml:6 FAILED - RETRYING: get osd id (10 retries left). FAILED - RETRYING: get osd id (9 retries left). FAILED - RETRYING: get osd id (8 retries left). FAILED - RETRYING: get osd id (7 retries left). FAILED - RETRYING: get osd id (6 retries left). FAILED - RETRYING: get osd id (5 retries left). FAILED - RETRYING: get osd id (4 retries left). FAILED - RETRYING: get osd id (3 retries left). FAILED - RETRYING: get osd id (2 retries left). FAILED - RETRYING: get osd id (1 retries left). ok: [osd0] => { "attempts": 10, "changed": false, "cmd": "ls /var/lib/ceph/osd/ | sed 's/.*-//'", "delta": "0:00:00.002717", "end": "2018-01-21 18:10:31.237933", "failed": true, "failed_when_result": false, "rc": 0, "start": "2018-01-21 18:10:31.235216" } STDOUT: 0 1 2 Expected results: There aren't any (or just a few) timeouts while the OSDs are found Additional info: This is happening because the check is mapping the number of "devices" defined for ceph-disk (in this case it would be 0) to match the number of OSDs found. Basically this line: until: osd_id.stdout_lines|length == devices|unique|length Means in this 2 OSD case it is trying to ensure the following incorrect condition: until: 2 == 0 Closes: https://bugzilla.redhat.com/show_bug.cgi?id=1537103
2018-01-29 21:28:23 +08:00
when:
- containerized_deployment
- name: device to id migration
when:
- containerized_deployment | bool
- osd_scenario != 'lvm'
block:
- name: check ceph-osd service using device name
shell: |
systemctl list-units | grep -E "loaded * active" | grep -coE "ceph-osd@([a-z]+|nvme[0-9]+n[0-9]+).service"
register: ceph_osd_device_name
changed_when: false
failed_when: false
- name: copy systemd-device-to-id.sh script
template:
src: systemd-device-to-id.sh.j2
dest: /tmp/systemd-device-to-id.sh
owner: root
group: root
mode: 0750
with_items: "{{ groups[osd_group_name] }}"
delegate_to: "{{ item }}"
run_once: true
when: ceph_osd_device_name.stdout | int != 0
- name: run the systemd-device-to-id.sh script
command: /usr/bin/env bash /tmp/systemd-device-to-id.sh
with_items: "{{ inventory_hostname if rolling_update else groups[osd_group_name] }}"
delegate_to: "{{ item }}"
run_once: true
when: ceph_osd_device_name.stdout | int != 0
- name: systemd start osd
systemd:
name: ceph-osd@{{ item }}
state: started
enabled: yes
daemon_reload: yes
with_items: "{{ ((ceph_volume_osd_ids.stdout | from_json).keys() | list) if osd_scenario == 'lvm' else ceph_disk_osd_ids.stdout_lines }}"
- name: ensure systemd service override directory exists
file:
state: directory
path: "/etc/systemd/system/ceph-osd@.service.d/"
when:
- ceph_osd_systemd_overrides is defined
- ansible_service_mgr == 'systemd'
- name: add ceph-osd systemd service overrides
config_template:
src: "ceph-osd.service.d-overrides.j2"
dest: "/etc/systemd/system/ceph-osd@.service.d/ceph-osd-systemd-overrides.conf"
config_overrides: "{{ ceph_osd_systemd_overrides | default({}) }}"
config_type: "ini"
when:
- ceph_osd_systemd_overrides is defined
- ansible_service_mgr == 'systemd'