common: disable/enable pg_autoscaler

The PG autoscaler can disrupt the PG checks so the idea here is to
disable it and re-enable it back after the restart is done.

Signed-off-by: Guillaume Abrioux <gabrioux@redhat.com>
(cherry picked from commit 13036115e2)
pull/6735/head
Guillaume Abrioux 2021-06-14 18:01:41 +02:00 committed by Dimitri Savineau
parent 5213612eaf
commit f7882bbc02
4 changed files with 181 additions and 37 deletions

View File

@ -383,6 +383,38 @@
name: ceph-facts
tasks_from: container_binary.yml
- name: get pool list
command: "{{ ceph_cmd }} --cluster {{ cluster }} osd dump -f json"
register: pool_list
run_once: true
changed_when: false
- name: get balancer module status
command: "{{ ceph_cmd }} --cluster {{ cluster }} balancer status -f json"
register: balancer_status
run_once: true
changed_when: false
- name: set_fact pools_pgautoscaler_mode
set_fact:
pools_pgautoscaler_mode: "{{ pools_pgautoscaler_mode | default([]) | union([{'name': item.pool_name, 'mode': item.pg_autoscale_mode}]) }}"
run_once: true
with_items: "{{ (pool_list.stdout | from_json)['pools'] }}"
- name: disable balancer
command: "{{ ceph_cmd }} --cluster {{ cluster }} balancer off"
run_once: true
changed_when: false
when: (balancer_status.stdout | from_json)['active'] | bool
- name: disable pg autoscale on pools
command: "{{ ceph_cmd }} --cluster {{ cluster }} osd pool set {{ item.name }} pg_autoscale_mode off"
with_items: "{{ pools_pgautoscaler_mode }}"
run_once: true
when:
- pools_pgautoscaler_mode is defined
- item.mode == 'on'
- name: set osd flags
ceph_osd_flag:
name: "{{ item }}"
@ -490,6 +522,14 @@
name: ceph-facts
tasks_from: container_binary.yml
- name: re-enable pg autoscale on pools
command: "{{ ceph_cmd }} --cluster {{ cluster }} osd pool set {{ item.name }} pg_autoscale_mode on"
with_items: "{{ pools_pgautoscaler_mode }}"
run_once: true
when:
- pools_pgautoscaler_mode is defined
- item.mode == 'on'
- name: unset osd flags
ceph_osd_flag:
name: "{{ item }}"
@ -502,6 +542,12 @@
- noout
- nodeep-scrub
- name: re-enable balancer
command: "{{ ceph_cmd }} --cluster {{ cluster }} balancer on"
run_once: true
changed_when: false
when: (balancer_status.stdout | from_json)['active'] | bool
- name: set_fact container_exec_cmd_osd
set_fact:
container_exec_cmd_update_osd: "{{ container_binary }} exec ceph-mon-{{ ansible_facts['hostname'] }}"

View File

@ -214,6 +214,38 @@
- import_role:
name: ceph-facts
- name: get pool list
command: "{{ ceph_cmd }} --cluster {{ cluster }} osd dump -f json"
register: pool_list
run_once: true
changed_when: false
- name: get balancer module status
command: "{{ ceph_cmd }} --cluster {{ cluster }} balancer status -f json"
register: balancer_status
run_once: true
changed_when: false
- name: set_fact pools_pgautoscaler_mode
set_fact:
pools_pgautoscaler_mode: "{{ pools_pgautoscaler_mode | default([]) | union([{'name': item.pool_name, 'mode': item.pg_autoscale_mode}]) }}"
run_once: true
with_items: "{{ (pool_list.stdout | from_json)['pools'] }}"
- name: disable balancer
command: "{{ ceph_cmd }} --cluster {{ cluster }} balancer off"
run_once: true
changed_when: false
when: (balancer_status.stdout | from_json)['active'] | bool
- name: disable pg autoscale on pools
command: "{{ ceph_cmd }} --cluster {{ cluster }} osd pool set {{ item.name }} pg_autoscale_mode off"
with_items: "{{ pools_pgautoscaler_mode }}"
run_once: true
when:
- pools_pgautoscaler_mode is defined
- item.mode == 'on'
- name: set osd flags
ceph_osd_flag:
name: "{{ item }}"
@ -372,7 +404,15 @@
- import_role:
name: ceph-facts
- name: set osd flags
- name: re-enable pg autoscale on pools
command: "{{ ceph_cmd }} --cluster {{ cluster }} osd pool set {{ item.name }} pg_autoscale_mode on"
with_items: "{{ pools_pgautoscaler_mode }}"
run_once: true
when:
- pools_pgautoscaler_mode is defined
- item.mode == 'on'
- name: unset osd flags
ceph_osd_flag:
name: "{{ item }}"
cluster: "{{ cluster }}"
@ -384,6 +424,13 @@
- noout
- nodeep-scrub
- name: re-enable balancer
command: "{{ ceph_cmd }} --cluster {{ cluster }} balancer on"
run_once: true
changed_when: false
when: (balancer_status.stdout | from_json)['active'] | bool
- name: switching from non-containerized to containerized ceph mds
hosts: "{{ mds_group_name|default('mdss') }}"

View File

@ -15,6 +15,10 @@
set_fact:
is_podman: "{{ podman_binary.stat.exists }}"
- name: set_fact ceph_cmd
set_fact:
ceph_cmd: "{{ container_binary + ' run --rm --net=host -v /etc/ceph:/etc/ceph:z -v /var/lib/ceph:/var/lib/ceph:z -v /var/run/ceph:/var/run/ceph:z --entrypoint=ceph ' + ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment | bool else 'ceph' }}"
# In case ansible_python_interpreter is set by the user,
# ansible will not discover python and discovered_interpreter_python
# will not be set

View File

@ -1,43 +1,90 @@
---
- name: set _osd_handler_called before restart
- name: set_fact trigger_restart
set_fact:
_osd_handler_called: True
- name: unset noup flag
ceph_osd_flag:
name: noup
cluster: "{{ cluster }}"
state: absent
environment:
CEPH_CONTAINER_IMAGE: "{{ ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment | bool else None }}"
CEPH_CONTAINER_BINARY: "{{ container_binary }}"
delegate_to: "{{ groups[mon_group_name][0] }}"
trigger_restart: true
loop: "{{ groups[osd_group_name] }}"
when: hostvars[item]['handler_osd_status'] | default(False) | bool
run_once: true
- name: osd handler
when: trigger_restart | default(False) | bool
block:
- name: get pool list
command: "{{ ceph_cmd }} --cluster {{ cluster }} osd dump -f json"
register: pool_list
delegate_to: "{{ groups.get(mon_group_name, [])[0] }}"
run_once: true
changed_when: false
- name: get balancer module status
command: "{{ ceph_cmd }} --cluster {{ cluster }} balancer status -f json"
register: balancer_status
run_once: true
delegate_to: "{{ groups[mon_group_name][0] }}"
changed_when: false
- name: set_fact pools_pgautoscaler_mode
set_fact:
pools_pgautoscaler_mode: "{{ pools_pgautoscaler_mode | default([]) | union([{'name': item.pool_name, 'mode': item.pg_autoscale_mode}]) }}"
run_once: true
with_items: "{{ (pool_list.stdout | from_json)['pools'] }}"
- name: disable balancer
command: "{{ ceph_cmd }} --cluster {{ cluster }} balancer off"
run_once: true
delegate_to: "{{ groups[mon_group_name][0] }}"
changed_when: false
when: (balancer_status.stdout | from_json)['active'] | bool
- name: disable pg autoscale on pools
command: "{{ ceph_cmd }} --cluster {{ cluster }} osd pool set {{ item.name }} pg_autoscale_mode off"
with_items: "{{ pools_pgautoscaler_mode }}"
delegate_to: "{{ groups.get(mon_group_name, [])[0] }}"
run_once: true
when:
- pools_pgautoscaler_mode is defined
- item.mode == 'on'
# This does not just restart OSDs but everything else too. Unfortunately
# at this time the ansible role does not have an OSD id list to use
# for restarting them specifically.
# This does not need to run during a rolling update as the playbook will
# restart all OSDs using the tasks "start ceph osd" or
# "restart containerized ceph osd"
- name: copy osd restart script
template:
src: restart_osd_daemon.sh.j2
dest: /tmp/restart_osd_daemon.sh
owner: root
group: root
mode: 0750
# at this time the ansible role does not have an OSD id list to use
# for restarting them specifically.
# This does not need to run during a rolling update as the playbook will
# restart all OSDs using the tasks "start ceph osd" or
# "restart containerized ceph osd"
- name: copy osd restart script
template:
src: restart_osd_daemon.sh.j2
dest: /tmp/restart_osd_daemon.sh
owner: root
group: root
mode: 0750
- name: restart ceph osds daemon(s)
command: /usr/bin/env bash /tmp/restart_osd_daemon.sh
when:
- hostvars[item]['handler_osd_status'] | default(False) | bool
- handler_health_osd_check | bool
- hostvars[item]['_osd_handler_called'] | default(False) | bool
with_items: "{{ groups[osd_group_name] | intersect(ansible_play_batch) }}"
delegate_to: "{{ item }}"
run_once: True
- name: restart ceph osds daemon(s)
command: /usr/bin/env bash /tmp/restart_osd_daemon.sh
when:
- hostvars[item]['handler_osd_status'] | default(False) | bool
- handler_health_osd_check | bool
- hostvars[item]['_osd_handler_called'] | default(False) | bool
with_items: "{{ groups[osd_group_name] | intersect(ansible_play_batch) }}"
delegate_to: "{{ item }}"
run_once: True
- name: set _osd_handler_called after restart
set_fact:
_osd_handler_called: False
- name: set _osd_handler_called after restart
set_fact:
_osd_handler_called: False
- name: re-enable pg autoscale on pools
command: "{{ ceph_cmd }} --cluster {{ cluster }} osd pool set {{ item.name }} pg_autoscale_mode on"
with_items: "{{ pools_pgautoscaler_mode }}"
delegate_to: "{{ groups.get(mon_group_name, [])[0] }}"
run_once: true
when:
- pools_pgautoscaler_mode is defined
- item.mode == 'on'
- name: re-enable balancer
command: "{{ ceph_cmd }} --cluster {{ cluster }} balancer on"
run_once: true
delegate_to: "{{ groups[mon_group_name][0] }}"
changed_when: false
when: (balancer_status.stdout | from_json)['active'] | bool