shrink-osd: Stop ceph-disk container based on ID

Since bedc0ab we now manage ceph-osd systemd unit scripts based on ID
instead of device name but it was not present in the shrink-osd
playbook (ceph-disk version).
To keep backward compatibility on deployment that didn't do yet the
transition on OSD id then we should stop unit scripts for both device
and ID.
This commit adds the ulimit nofile container option to get better
performance on ceph-disk commands.
It also fixes an issue when the OSD id matches multiple OSD ids with
the same first digit.

$ ceph-disk list | grep osd.1
 /dev/sdb1 ceph data, prepared, cluster ceph, osd.1, block /dev/sdb2
 /dev/sdg1 ceph data, prepared, cluster ceph, osd.12, block /dev/sdg2

Finally removing the shrinked OSD directory.

Signed-off-by: Dimitri Savineau <dsavinea@redhat.com>
pull/4327/head v3.2.22
Dimitri Savineau 2019-08-05 14:32:18 -04:00 committed by Guillaume Abrioux
parent d12e6e626d
commit 343eec7a53
1 changed files with 26 additions and 6 deletions

View File

@ -106,9 +106,9 @@
# NOTE(leseb): using '>' is the only way I could have the command working
- name: find osd device based on the id
shell: >
{{ 'docker run --privileged=true -v /dev:/dev --entrypoint' if containerized_deployment else '' }} /usr/sbin/ceph-disk
{{ 'docker run --privileged=true --ulimit nofile=1024:4096 -v /dev:/dev --entrypoint' if containerized_deployment else '' }} /usr/sbin/ceph-disk
{{ ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment else '' }}
list | awk -v pattern=osd.{{ item.0 }} '$0 ~ pattern {print $1}'
list | awk -v pattern=osd.{{ item.0 }}, '$0 ~ pattern {print $1}'
with_together:
- "{{ osd_to_kill.split(',') }}"
- "{{ osd_hosts }}"
@ -117,9 +117,9 @@
- name: find osd dedicated devices - container
shell: >
docker run --privileged=true -v /dev:/dev --entrypoint /usr/sbin/ceph-disk
docker run --privileged=true --ulimit nofile=1024:4096 -v /dev:/dev --entrypoint /usr/sbin/ceph-disk
{{ ceph_docker_registry}}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }}
list | grep osd.{{ item.0 }} | grep -Eo '/dev/([hsv]d[a-z]{1,2})[0-9]{1,2}|/dev/nvme[0-9]n[0-9]p[0-9]'
list | grep osd.{{ item.0 }}, | grep -Eo '/dev/([hsv]d[a-z]{1,2})[0-9]{1,2}|/dev/nvme[0-9]n[0-9]p[0-9]'
with_together:
- "{{ osd_to_kill.split(',') }}"
- "{{ osd_hosts }}"
@ -129,7 +129,7 @@
- containerized_deployment
- name: find osd dedicated devices - non container
shell: ceph-disk list | grep osd.{{ item.0 }} | grep -Eo '/dev/([hsv]d[a-z]{1,2})[0-9]{1,2}|/dev/nvme[0-9]n[0-9]p[0-9]'
shell: ceph-disk list | grep osd.{{ item.0 }}, | grep -Eo '/dev/([hsv]d[a-z]{1,2})[0-9]{1,2}|/dev/nvme[0-9]n[0-9]p[0-9]'
with_together:
- "{{ osd_to_kill.split(',') }}"
- "{{ osd_hosts }}"
@ -140,7 +140,7 @@
# if nvme then osd_to_kill_disks is nvme0n1, we need nvme0
# if ssd or hdd then osd_to_kill_disks is sda1, we need sda
- name: stop osd services (container)
- name: stop osd services with device (container)
service:
name: "ceph-osd@{{ item.0.stdout[:-2] | regex_replace('/dev/', '') if 'nvme' in item.0.stdout else item.0.stdout[:-1] | regex_replace('/dev/', '') }}"
state: stopped
@ -152,6 +152,17 @@
when:
- containerized_deployment
- name: stop osd services with ID (container)
service:
name: "ceph-osd@{{ item.0 }}"
state: stopped
enabled: no
with_together:
- "{{ osd_to_kill.split(',') }}"
- "{{ osd_hosts }}"
delegate_to: "{{ item.1 }}"
when: containerized_deployment | bool
- name: resolve parent device
command: lsblk --nodeps -no pkname "{{ item.0.stdout }}"
register: resolved_parent_device
@ -278,6 +289,15 @@
- not containerized_deployment
- item.1 | length > 0
- name: remove ceph osd directory
file:
name: '/var/lib/ceph/osd/{{ cluster }}-{{ item.0 }}'
state: absent
with_together:
- "{{ osd_to_kill.split(',') }}"
- "{{ osd_hosts }}"
delegate_to: "{{ item.1 }}"
- name: show ceph health
command: "{{ docker_exec_cmd }} ceph --cluster {{ cluster }} -s"
delegate_to: "{{ groups[mon_group_name][0] }}"