2017-07-26 05:48:13 +08:00
|
|
|
---
|
2018-11-07 18:45:29 +08:00
|
|
|
- block:
|
|
|
|
# For openstack VMs modify the mount point below depending on if the Openstack
|
|
|
|
# VM deploy tool defaults to mounting ephemeral disks
|
|
|
|
- name: umount ceph disk (if on openstack)
|
|
|
|
mount:
|
|
|
|
name: /mnt
|
|
|
|
src: /dev/vdb
|
|
|
|
fstype: ext3
|
|
|
|
state: unmounted
|
|
|
|
when:
|
|
|
|
- ceph_docker_on_openstack
|
|
|
|
|
2019-05-14 05:18:52 +08:00
|
|
|
- name: with non lvm scenario
|
|
|
|
when: osd_scenario != 'lvm'
|
|
|
|
block:
|
|
|
|
- name: test if the container image has directory {{ container_bin_path }}
|
|
|
|
command: "docker run --rm --entrypoint=test {{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }} -d {{ container_bin_path }}"
|
|
|
|
changed_when: false
|
|
|
|
failed_when: false
|
|
|
|
register: test_container_bin_path
|
2019-03-09 15:55:12 +08:00
|
|
|
|
2019-05-14 05:18:52 +08:00
|
|
|
- name: test if the container image has the disk_list function
|
|
|
|
command: "docker run --rm --entrypoint=stat {{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }} {{ container_bin_path + '/disk_list.sh' if test_container_bin_path.rc == 0 else 'disk_list.sh' }}"
|
|
|
|
changed_when: false
|
|
|
|
failed_when: false
|
|
|
|
register: disk_list
|
|
|
|
|
|
|
|
- name: test activated ceph-disk osds
|
|
|
|
shell: |
|
|
|
|
ls /var/lib/ceph/osd/ | sed 's/.*-//'
|
|
|
|
register: activated_osds
|
|
|
|
|
|
|
|
- name: activate containerized osd(s)
|
|
|
|
shell: |
|
2019-08-06 23:52:59 +08:00
|
|
|
DOCKER_ENV=$(docker run --rm --net=host --ulimit nofile=1024:4096 \
|
2019-05-14 05:18:52 +08:00
|
|
|
--privileged=true -v /dev/:/dev/ -v /etc/ceph:/etc/ceph:z \
|
|
|
|
-e CLUSTER={{ cluster }} -e OSD_DEVICE={{ item }} \
|
|
|
|
{{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }} \
|
|
|
|
disk_list)
|
|
|
|
docker run --rm --net=host \
|
2019-08-06 23:52:59 +08:00
|
|
|
--ulimit nofile=1024:4096 \
|
2019-05-14 05:18:52 +08:00
|
|
|
--ipc=host --pid=host --privileged=true \
|
|
|
|
-v /etc/ceph:/etc/ceph:z \
|
|
|
|
-v /var/lib/ceph/:/var/lib/ceph/:z \
|
|
|
|
-v /dev:/dev \
|
|
|
|
-v /etc/localtime:/etc/localtime:ro \
|
|
|
|
-e DEBUG=verbose \
|
|
|
|
-e CLUSTER={{ cluster }} \
|
|
|
|
-e CEPH_DAEMON=OSD_CEPH_DISK_ACTIVATE_ONLY \
|
|
|
|
-e OSD_DEVICE={{ item }} \
|
|
|
|
${DOCKER_ENV} \
|
|
|
|
{{ docker_env_args }} \
|
|
|
|
{{ ceph_osd_docker_prepare_env }} \
|
|
|
|
{{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }}
|
|
|
|
with_items: "{{ devices }}"
|
|
|
|
when:
|
|
|
|
- devices is defined
|
|
|
|
- devices | length > activated_osds.stdout_lines | length
|
2018-11-29 07:10:29 +08:00
|
|
|
|
2018-11-07 18:45:29 +08:00
|
|
|
- name: generate ceph osd docker run script
|
|
|
|
become: true
|
|
|
|
template:
|
|
|
|
src: "{{ role_path }}/templates/ceph-osd-run.sh.j2"
|
|
|
|
dest: "{{ ceph_osd_docker_run_script_path }}/ceph-osd-run.sh"
|
|
|
|
owner: "root"
|
|
|
|
group: "root"
|
|
|
|
mode: "0744"
|
|
|
|
notify:
|
|
|
|
- restart ceph osds
|
|
|
|
when:
|
|
|
|
- containerized_deployment
|
|
|
|
|
2018-11-29 21:59:25 +08:00
|
|
|
# this is for ceph-disk, the ceph-disk command is gone so we have to list /var/lib/ceph
|
|
|
|
- name: get osd ids
|
|
|
|
shell: |
|
|
|
|
ls /var/lib/ceph/osd/ | sed 's/.*-//'
|
2019-05-14 05:18:52 +08:00
|
|
|
register: ceph_disk_osd_ids
|
|
|
|
when: osd_scenario != 'lvm'
|
2018-11-29 21:59:25 +08:00
|
|
|
|
2018-11-07 18:45:29 +08:00
|
|
|
- name: set_fact docker_exec_start_osd
|
|
|
|
set_fact:
|
2019-08-06 23:52:59 +08:00
|
|
|
docker_exec_start_osd: "{{ 'docker run --rm --ulimit nofile=1024:4096 --privileged=true -v /run/lvm/lvmetad.socket:/run/lvm/lvmetad.socket -v /var/run/udev/:/var/run/udev/:z -v /etc/ceph:/etc/ceph:z -v /dev:/dev --entrypoint=ceph-volume ' + ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment else 'ceph-volume' }}"
|
2019-05-14 05:18:52 +08:00
|
|
|
when: osd_scenario == 'lvm'
|
2018-11-07 18:45:29 +08:00
|
|
|
|
|
|
|
- name: collect osd ids
|
|
|
|
shell: >
|
|
|
|
{{ docker_exec_start_osd }} lvm list --format json
|
2017-07-26 05:48:13 +08:00
|
|
|
changed_when: false
|
|
|
|
failed_when: false
|
2019-05-14 05:18:52 +08:00
|
|
|
register: ceph_volume_osd_ids
|
|
|
|
when: osd_scenario == 'lvm'
|
2018-11-07 18:45:29 +08:00
|
|
|
|
|
|
|
- name: generate systemd unit file
|
|
|
|
become: true
|
|
|
|
template:
|
|
|
|
src: "{{ role_path }}/templates/ceph-osd.service.j2"
|
|
|
|
dest: /etc/systemd/system/ceph-osd@.service
|
|
|
|
owner: "root"
|
|
|
|
group: "root"
|
|
|
|
mode: "0644"
|
|
|
|
notify:
|
|
|
|
- restart ceph osds
|
Do not search osd ids if ceph-volume
Description of problem: The 'get osd id' task goes through all the 10 times (and its respective timeouts) to make sure that the number of OSDs in the osd directory match the number of devices.
This happens always, regardless if the setup and deployment is correct.
Version-Release number of selected component (if applicable): Surely the latest. But any ceph-ansible version that contains ceph-volume support is affected.
How reproducible: 100%
Steps to Reproduce:
1. Use ceph-volume (LVM) to deploy OSDs
2. Avoid using anything in the 'devices' section
3. Deploy the cluster
Actual results:
TASK [ceph-osd : get osd id _uses_shell=True, _raw_params=ls /var/lib/ceph/osd/ | sed 's/.*-//'] **********************************************************************************************************************************************
task path: /Users/alfredo/python/upstream/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/.tox/xenial-filestore-dmcrypt/tmp/ceph-ansible/roles/ceph-osd/tasks/start_osds.yml:6
FAILED - RETRYING: get osd id (10 retries left).
FAILED - RETRYING: get osd id (9 retries left).
FAILED - RETRYING: get osd id (8 retries left).
FAILED - RETRYING: get osd id (7 retries left).
FAILED - RETRYING: get osd id (6 retries left).
FAILED - RETRYING: get osd id (5 retries left).
FAILED - RETRYING: get osd id (4 retries left).
FAILED - RETRYING: get osd id (3 retries left).
FAILED - RETRYING: get osd id (2 retries left).
FAILED - RETRYING: get osd id (1 retries left).
ok: [osd0] => {
"attempts": 10,
"changed": false,
"cmd": "ls /var/lib/ceph/osd/ | sed 's/.*-//'",
"delta": "0:00:00.002717",
"end": "2018-01-21 18:10:31.237933",
"failed": true,
"failed_when_result": false,
"rc": 0,
"start": "2018-01-21 18:10:31.235216"
}
STDOUT:
0
1
2
Expected results:
There aren't any (or just a few) timeouts while the OSDs are found
Additional info:
This is happening because the check is mapping the number of "devices" defined for ceph-disk (in this case it would be 0) to match the number of OSDs found.
Basically this line:
until: osd_id.stdout_lines|length == devices|unique|length
Means in this 2 OSD case it is trying to ensure the following incorrect condition:
until: 2 == 0
Closes: https://bugzilla.redhat.com/show_bug.cgi?id=1537103
2018-01-29 21:28:23 +08:00
|
|
|
when:
|
2018-11-07 18:45:29 +08:00
|
|
|
- containerized_deployment
|
|
|
|
|
osd: copy systemd-device-to-id.sh on all osd nodes before running it
Otherwise it will fail when running rolling_update.yml playbook because
of `serial: 1` usage.
The task which copies the script is run against the current node being
played only whereas the task which runs the script is run against all
nodes in a loop, it ends up with the typical error:
```
2019-08-08 17:47:05,115 p=14905 u=ubuntu | failed: [magna023 -> magna030] (item=magna030) => {
"changed": true,
"cmd": [
"/usr/bin/env",
"bash",
"/tmp/systemd-device-to-id.sh"
],
"delta": "0:00:00.004339",
"end": "2019-08-08 17:46:59.059670",
"invocation": {
"module_args": {
"_raw_params": "/usr/bin/env bash /tmp/systemd-device-to-id.sh",
"_uses_shell": false,
"argv": null,
"chdir": null,
"creates": null,
"executable": null,
"removes": null,
"stdin": null,
"warn": true
}
},
"item": "magna030",
"msg": "non-zero return code",
"rc": 127,
"start": "2019-08-08 17:46:59.055331",
"stderr": "bash: /tmp/systemd-device-to-id.sh: No such file or directory",
"stderr_lines": [
"bash: /tmp/systemd-device-to-id.sh: No such file or directory"
],
"stdout": "",
"stdout_lines": []
}
```
Closes: https://bugzilla.redhat.com/show_bug.cgi?id=1739209
Signed-off-by: Guillaume Abrioux <gabrioux@redhat.com>
2019-08-12 16:01:43 +08:00
|
|
|
- name: device to id migration
|
2019-05-14 05:18:52 +08:00
|
|
|
when:
|
|
|
|
- containerized_deployment | bool
|
|
|
|
- osd_scenario != 'lvm'
|
|
|
|
block:
|
|
|
|
- name: check ceph-osd service using device name
|
|
|
|
shell: |
|
|
|
|
systemctl list-units | grep -E "loaded * active" | grep -coE "ceph-osd@([a-z]+|nvme[0-9]+n[0-9]+).service"
|
|
|
|
register: ceph_osd_device_name
|
|
|
|
changed_when: false
|
|
|
|
failed_when: false
|
|
|
|
|
|
|
|
- name: copy systemd-device-to-id.sh script
|
|
|
|
template:
|
|
|
|
src: systemd-device-to-id.sh.j2
|
|
|
|
dest: /tmp/systemd-device-to-id.sh
|
|
|
|
owner: root
|
|
|
|
group: root
|
|
|
|
mode: 0750
|
osd: copy systemd-device-to-id.sh on all osd nodes before running it
Otherwise it will fail when running rolling_update.yml playbook because
of `serial: 1` usage.
The task which copies the script is run against the current node being
played only whereas the task which runs the script is run against all
nodes in a loop, it ends up with the typical error:
```
2019-08-08 17:47:05,115 p=14905 u=ubuntu | failed: [magna023 -> magna030] (item=magna030) => {
"changed": true,
"cmd": [
"/usr/bin/env",
"bash",
"/tmp/systemd-device-to-id.sh"
],
"delta": "0:00:00.004339",
"end": "2019-08-08 17:46:59.059670",
"invocation": {
"module_args": {
"_raw_params": "/usr/bin/env bash /tmp/systemd-device-to-id.sh",
"_uses_shell": false,
"argv": null,
"chdir": null,
"creates": null,
"executable": null,
"removes": null,
"stdin": null,
"warn": true
}
},
"item": "magna030",
"msg": "non-zero return code",
"rc": 127,
"start": "2019-08-08 17:46:59.055331",
"stderr": "bash: /tmp/systemd-device-to-id.sh: No such file or directory",
"stderr_lines": [
"bash: /tmp/systemd-device-to-id.sh: No such file or directory"
],
"stdout": "",
"stdout_lines": []
}
```
Closes: https://bugzilla.redhat.com/show_bug.cgi?id=1739209
Signed-off-by: Guillaume Abrioux <gabrioux@redhat.com>
2019-08-12 16:01:43 +08:00
|
|
|
with_items: "{{ groups[osd_group_name] }}"
|
|
|
|
delegate_to: "{{ item }}"
|
|
|
|
run_once: true
|
|
|
|
when: ceph_osd_device_name.stdout | int != 0
|
2019-05-14 05:18:52 +08:00
|
|
|
|
|
|
|
- name: run the systemd-device-to-id.sh script
|
|
|
|
command: /usr/bin/env bash /tmp/systemd-device-to-id.sh
|
osd: copy systemd-device-to-id.sh on all osd nodes before running it
Otherwise it will fail when running rolling_update.yml playbook because
of `serial: 1` usage.
The task which copies the script is run against the current node being
played only whereas the task which runs the script is run against all
nodes in a loop, it ends up with the typical error:
```
2019-08-08 17:47:05,115 p=14905 u=ubuntu | failed: [magna023 -> magna030] (item=magna030) => {
"changed": true,
"cmd": [
"/usr/bin/env",
"bash",
"/tmp/systemd-device-to-id.sh"
],
"delta": "0:00:00.004339",
"end": "2019-08-08 17:46:59.059670",
"invocation": {
"module_args": {
"_raw_params": "/usr/bin/env bash /tmp/systemd-device-to-id.sh",
"_uses_shell": false,
"argv": null,
"chdir": null,
"creates": null,
"executable": null,
"removes": null,
"stdin": null,
"warn": true
}
},
"item": "magna030",
"msg": "non-zero return code",
"rc": 127,
"start": "2019-08-08 17:46:59.055331",
"stderr": "bash: /tmp/systemd-device-to-id.sh: No such file or directory",
"stderr_lines": [
"bash: /tmp/systemd-device-to-id.sh: No such file or directory"
],
"stdout": "",
"stdout_lines": []
}
```
Closes: https://bugzilla.redhat.com/show_bug.cgi?id=1739209
Signed-off-by: Guillaume Abrioux <gabrioux@redhat.com>
2019-08-12 16:01:43 +08:00
|
|
|
with_items: "{{ inventory_hostname if rolling_update else groups[osd_group_name] }}"
|
2019-05-14 05:18:52 +08:00
|
|
|
delegate_to: "{{ item }}"
|
|
|
|
run_once: true
|
osd: copy systemd-device-to-id.sh on all osd nodes before running it
Otherwise it will fail when running rolling_update.yml playbook because
of `serial: 1` usage.
The task which copies the script is run against the current node being
played only whereas the task which runs the script is run against all
nodes in a loop, it ends up with the typical error:
```
2019-08-08 17:47:05,115 p=14905 u=ubuntu | failed: [magna023 -> magna030] (item=magna030) => {
"changed": true,
"cmd": [
"/usr/bin/env",
"bash",
"/tmp/systemd-device-to-id.sh"
],
"delta": "0:00:00.004339",
"end": "2019-08-08 17:46:59.059670",
"invocation": {
"module_args": {
"_raw_params": "/usr/bin/env bash /tmp/systemd-device-to-id.sh",
"_uses_shell": false,
"argv": null,
"chdir": null,
"creates": null,
"executable": null,
"removes": null,
"stdin": null,
"warn": true
}
},
"item": "magna030",
"msg": "non-zero return code",
"rc": 127,
"start": "2019-08-08 17:46:59.055331",
"stderr": "bash: /tmp/systemd-device-to-id.sh: No such file or directory",
"stderr_lines": [
"bash: /tmp/systemd-device-to-id.sh: No such file or directory"
],
"stdout": "",
"stdout_lines": []
}
```
Closes: https://bugzilla.redhat.com/show_bug.cgi?id=1739209
Signed-off-by: Guillaume Abrioux <gabrioux@redhat.com>
2019-08-12 16:01:43 +08:00
|
|
|
when: ceph_osd_device_name.stdout | int != 0
|
2019-05-14 05:18:52 +08:00
|
|
|
|
2018-11-07 18:45:29 +08:00
|
|
|
- name: systemd start osd
|
|
|
|
systemd:
|
2019-05-14 05:18:52 +08:00
|
|
|
name: ceph-osd@{{ item }}
|
2018-11-07 18:45:29 +08:00
|
|
|
state: started
|
|
|
|
enabled: yes
|
|
|
|
daemon_reload: yes
|
2019-05-14 05:18:52 +08:00
|
|
|
with_items: "{{ ((ceph_volume_osd_ids.stdout | from_json).keys() | list) if osd_scenario == 'lvm' else ceph_disk_osd_ids.stdout_lines }}"
|
2017-07-26 05:48:13 +08:00
|
|
|
|
2017-07-05 21:47:48 +08:00
|
|
|
- name: ensure systemd service override directory exists
|
|
|
|
file:
|
|
|
|
state: directory
|
|
|
|
path: "/etc/systemd/system/ceph-osd@.service.d/"
|
|
|
|
when:
|
|
|
|
- ceph_osd_systemd_overrides is defined
|
|
|
|
- ansible_service_mgr == 'systemd'
|
|
|
|
|
|
|
|
- name: add ceph-osd systemd service overrides
|
|
|
|
config_template:
|
|
|
|
src: "ceph-osd.service.d-overrides.j2"
|
|
|
|
dest: "/etc/systemd/system/ceph-osd@.service.d/ceph-osd-systemd-overrides.conf"
|
|
|
|
config_overrides: "{{ ceph_osd_systemd_overrides | default({}) }}"
|
|
|
|
config_type: "ini"
|
|
|
|
when:
|
|
|
|
- ceph_osd_systemd_overrides is defined
|
2018-11-07 18:45:29 +08:00
|
|
|
- ansible_service_mgr == 'systemd'
|