common: disable/enable pg_autoscaler

The PG autoscaler can disrupt the PG checks so the idea here is to
disable it and re-enable it back after the restart is done.

Signed-off-by: Guillaume Abrioux <gabrioux@redhat.com>
pull/6732/head
Guillaume Abrioux 2021-06-14 18:01:41 +02:00
parent cd06e7c046
commit 13036115e2
5 changed files with 297 additions and 38 deletions

View File

@ -443,6 +443,48 @@
- import_role:
name: ceph-defaults
- name: get pool list
command: "{{ ceph_cmd }} --cluster {{ cluster }} osd dump -f json"
register: pool_list
run_once: true
delegate_to: "{{ groups[mon_group_name][0] }}"
changed_when: false
- name: get balancer module status
command: "{{ ceph_cmd }} --cluster {{ cluster }} balancer status -f json"
register: balancer_status
run_once: true
delegate_to: "{{ groups[mon_group_name][0] }}"
changed_when: false
- name: set_fact pools_pgautoscaler_mode
set_fact:
pools_pgautoscaler_mode: "{{ pools_pgautoscaler_mode | default([]) | union([{'name': item.pool_name, 'mode': item.pg_autoscale_mode}]) }}"
run_once: true
with_items: "{{ (pool_list.stdout | from_json)['pools'] }}"
- name: disable balancer
command: "{{ ceph_cmd }} --cluster {{ cluster }} balancer off"
run_once: true
delegate_to: "{{ groups[mon_group_name][0] }}"
changed_when: false
when: (balancer_status.stdout | from_json)['active'] | bool
- name: disable pg autoscale on pools
ceph_pool:
name: "{{ item.name }}"
cluster: "{{ cluster }}"
pg_autoscale_mode: false
with_items: "{{ pools_pgautoscaler_mode }}"
delegate_to: "{{ groups[mon_group_name][0] }}"
run_once: true
when:
- pools_pgautoscaler_mode is defined
- item.mode == 'on'
environment:
CEPH_CONTAINER_IMAGE: "{{ ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment | bool else None }}"
CEPH_CONTAINER_BINARY: "{{ container_binary }}"
- name: set osd flags
ceph_osd_flag:
cluster: "{{ cluster }}"
@ -555,6 +597,21 @@
- import_role:
name: ceph-defaults
- name: re-enable pg autoscale on pools
ceph_pool:
name: "{{ item.name }}"
cluster: "{{ cluster }}"
pg_autoscale_mode: true
with_items: "{{ pools_pgautoscaler_mode }}"
delegate_to: "{{ groups[mon_group_name][0] }}"
run_once: true
when:
- pools_pgautoscaler_mode is defined
- item.mode == 'on'
environment:
CEPH_CONTAINER_IMAGE: "{{ ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment | bool else None }}"
CEPH_CONTAINER_BINARY: "{{ container_binary }}"
- name: unset osd flags
ceph_osd_flag:
cluster: "{{ cluster }}"
@ -569,6 +626,13 @@
CEPH_CONTAINER_IMAGE: "{{ ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment | bool else None }}"
CEPH_CONTAINER_BINARY: "{{ container_binary }}"
- name: re-enable balancer
command: "{{ ceph_cmd }} --cluster {{ cluster }} balancer on"
run_once: true
delegate_to: "{{ groups[mon_group_name][0] }}"
changed_when: false
when: (balancer_status.stdout | from_json)['active'] | bool
- name: redeploy mds daemons
hosts: "{{ mds_group_name|default('mdss') }}"
become: true

View File

@ -392,6 +392,44 @@
name: ceph-facts
tasks_from: container_binary.yml
- name: get pool list
command: "{{ ceph_cmd }} --cluster {{ cluster }} osd dump -f json"
register: pool_list
run_once: true
changed_when: false
- name: get balancer module status
command: "{{ ceph_cmd }} --cluster {{ cluster }} balancer status -f json"
register: balancer_status
run_once: true
changed_when: false
- name: set_fact pools_pgautoscaler_mode
set_fact:
pools_pgautoscaler_mode: "{{ pools_pgautoscaler_mode | default([]) | union([{'name': item.pool_name, 'mode': item.pg_autoscale_mode}]) }}"
run_once: true
with_items: "{{ (pool_list.stdout | from_json)['pools'] }}"
- name: disable balancer
command: "{{ ceph_cmd }} --cluster {{ cluster }} balancer off"
run_once: true
changed_when: false
when: (balancer_status.stdout | from_json)['active'] | bool
- name: disable pg autoscale on pools
ceph_pool:
name: "{{ item.name }}"
cluster: "{{ cluster }}"
pg_autoscale_mode: false
with_items: "{{ pools_pgautoscaler_mode }}"
run_once: true
when:
- pools_pgautoscaler_mode is defined
- item.mode == 'on'
environment:
CEPH_CONTAINER_IMAGE: "{{ ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment | bool else None }}"
CEPH_CONTAINER_BINARY: "{{ container_binary }}"
- name: set osd flags
ceph_osd_flag:
name: "{{ item }}"
@ -494,6 +532,20 @@
name: ceph-facts
tasks_from: container_binary.yml
- name: re-enable pg autoscale on pools
ceph_pool:
name: "{{ item.name }}"
cluster: "{{ cluster }}"
pg_autoscale_mode: true
run_once: true
with_items: "{{ pools_pgautoscaler_mode }}"
when:
- pools_pgautoscaler_mode is defined
- item.mode == 'on'
environment:
CEPH_CONTAINER_IMAGE: "{{ ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment | bool else None }}"
CEPH_CONTAINER_BINARY: "{{ container_binary }}"
- name: unset osd flags
ceph_osd_flag:
name: "{{ item }}"
@ -506,6 +558,12 @@
- noout
- nodeep-scrub
- name: re-enable balancer
command: "{{ ceph_cmd }} --cluster {{ cluster }} balancer on"
run_once: true
changed_when: false
when: (balancer_status.stdout | from_json)['active'] | bool
- name: upgrade ceph mdss cluster, deactivate all rank > 0
hosts: "{{ mon_group_name | default('mons') }}[0]"
become: true

View File

@ -215,6 +215,44 @@
name: ceph-facts
tasks_from: container_binary.yml
- name: get pool list
command: "{{ ceph_cmd }} --cluster {{ cluster }} osd dump -f json"
register: pool_list
run_once: true
changed_when: false
- name: get balancer module status
command: "{{ ceph_cmd }} --cluster {{ cluster }} balancer status -f json"
register: balancer_status
run_once: true
changed_when: false
- name: set_fact pools_pgautoscaler_mode
set_fact:
pools_pgautoscaler_mode: "{{ pools_pgautoscaler_mode | default([]) | union([{'name': item.pool_name, 'mode': item.pg_autoscale_mode}]) }}"
run_once: true
with_items: "{{ (pool_list.stdout | from_json)['pools'] }}"
- name: disable balancer
command: "{{ ceph_cmd }} --cluster {{ cluster }} balancer off"
run_once: true
changed_when: false
when: (balancer_status.stdout | from_json)['active'] | bool
- name: disable pg autoscale on pools
ceph_pool:
name: "{{ item.name }}"
cluster: "{{ cluster }}"
pg_autoscale_mode: false
with_items: "{{ pools_pgautoscaler_mode }}"
run_once: true
when:
- pools_pgautoscaler_mode is defined
- item.mode == 'on'
environment:
CEPH_CONTAINER_IMAGE: "{{ ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment | bool else None }}"
CEPH_CONTAINER_BINARY: "{{ container_binary }}"
- name: set osd flags
ceph_osd_flag:
name: "{{ item }}"
@ -366,7 +404,21 @@
name: ceph-facts
tasks_from: container_binary.yml
- name: set osd flags
- name: re-enable pg autoscale on pools
ceph_pool:
name: "{{ item.name }}"
cluster: "{{ cluster }}"
pg_autoscale_mode: true
with_items: "{{ pools_pgautoscaler_mode }}"
run_once: true
when:
- pools_pgautoscaler_mode is defined
- item.mode == 'on'
environment:
CEPH_CONTAINER_IMAGE: "{{ ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment | bool else None }}"
CEPH_CONTAINER_BINARY: "{{ container_binary }}"
- name: unset osd flags
ceph_osd_flag:
name: "{{ item }}"
cluster: "{{ cluster }}"
@ -378,6 +430,13 @@
- noout
- nodeep-scrub
- name: re-enable balancer
command: "{{ ceph_cmd }} --cluster {{ cluster }} balancer on"
run_once: true
changed_when: false
when: (balancer_status.stdout | from_json)['active'] | bool
- name: switching from non-containerized to containerized ceph mds
hosts: "{{ mds_group_name|default('mdss') }}"

View File

@ -11,6 +11,10 @@
- name: import_tasks container_binary.yml
import_tasks: container_binary.yml
- name: set_fact ceph_cmd
set_fact:
ceph_cmd: "{{ container_binary + ' run --rm --net=host -v /etc/ceph:/etc/ceph:z -v /var/lib/ceph:/var/lib/ceph:z -v /var/run/ceph:/var/run/ceph:z --entrypoint=ceph ' + ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment | bool else 'ceph' }}"
# In case ansible_python_interpreter is set by the user,
# ansible will not discover python and discovered_interpreter_python
# will not be set

View File

@ -1,9 +1,19 @@
---
- name: set _osd_handler_called before restart
- name: set_fact trigger_restart
set_fact:
trigger_restart: true
loop: "{{ groups[osd_group_name] }}"
when: hostvars[item]['handler_osd_status'] | default(False) | bool
run_once: true
- name: osd handler
when: trigger_restart | default(False) | bool
block:
- name: set _osd_handler_called before restart
set_fact:
_osd_handler_called: True
- name: unset noup flag
- name: unset noup flag
ceph_osd_flag:
name: noup
cluster: "{{ cluster }}"
@ -14,13 +24,13 @@
delegate_to: "{{ groups[mon_group_name][0] }}"
run_once: true
# This does not just restart OSDs but everything else too. Unfortunately
# at this time the ansible role does not have an OSD id list to use
# for restarting them specifically.
# This does not need to run during a rolling update as the playbook will
# restart all OSDs using the tasks "start ceph osd" or
# "restart containerized ceph osd"
- name: copy osd restart script
# This does not just restart OSDs but everything else too. Unfortunately
# at this time the ansible role does not have an OSD id list to use
# for restarting them specifically.
# This does not need to run during a rolling update as the playbook will
# restart all OSDs using the tasks "start ceph osd" or
# "restart containerized ceph osd"
- name: copy osd restart script
template:
src: restart_osd_daemon.sh.j2
dest: "{{ tmpdirpath.path }}/restart_osd_daemon.sh"
@ -28,7 +38,49 @@
group: root
mode: 0750
- name: restart ceph osds daemon(s)
- name: get pool list
command: "{{ ceph_cmd }} --cluster {{ cluster }} osd dump -f json"
register: pool_list
delegate_to: "{{ groups.get(mon_group_name, [])[0] }}"
run_once: true
changed_when: false
- name: get balancer module status
command: "{{ ceph_cmd }} --cluster {{ cluster }} balancer status -f json"
register: balancer_status
run_once: true
delegate_to: "{{ groups[mon_group_name][0] }}"
changed_when: false
- name: set_fact pools_pgautoscaler_mode
set_fact:
pools_pgautoscaler_mode: "{{ pools_pgautoscaler_mode | default([]) | union([{'name': item.pool_name, 'mode': item.pg_autoscale_mode}]) }}"
run_once: true
with_items: "{{ (pool_list.stdout | from_json)['pools'] }}"
- name: disable balancer
command: "{{ ceph_cmd }} --cluster {{ cluster }} balancer off"
run_once: true
delegate_to: "{{ groups[mon_group_name][0] }}"
changed_when: false
when: (balancer_status.stdout | from_json)['active'] | bool
- name: disable pg autoscale on pools
ceph_pool:
name: "{{ item.name }}"
cluster: "{{ cluster }}"
pg_autoscale_mode: false
with_items: "{{ pools_pgautoscaler_mode }}"
delegate_to: "{{ groups.get(mon_group_name, [])[0] }}"
run_once: true
when:
- pools_pgautoscaler_mode is defined
- item.mode == 'on'
environment:
CEPH_CONTAINER_IMAGE: "{{ ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment | bool else None }}"
CEPH_CONTAINER_BINARY: "{{ container_binary }}"
- name: restart ceph osds daemon(s)
command: /usr/bin/env bash {{ hostvars[item]['tmpdirpath']['path'] }}/restart_osd_daemon.sh
when:
- hostvars[item]['handler_osd_status'] | default(False) | bool
@ -38,6 +90,28 @@
delegate_to: "{{ item }}"
run_once: True
- name: set _osd_handler_called after restart
- name: set _osd_handler_called after restart
set_fact:
_osd_handler_called: False
- name: re-enable pg autoscale on pools
ceph_pool:
name: "{{ item.name }}"
cluster: "{{ cluster }}"
pg_autoscale_mode: true
with_items: "{{ pools_pgautoscaler_mode }}"
run_once: true
delegate_to: "{{ groups.get(mon_group_name, [])[0] }}"
when:
- pools_pgautoscaler_mode is defined
- item.mode == 'on'
environment:
CEPH_CONTAINER_IMAGE: "{{ ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment | bool else None }}"
CEPH_CONTAINER_BINARY: "{{ container_binary }}"
- name: re-enable balancer
command: "{{ ceph_cmd }} --cluster {{ cluster }} balancer on"
run_once: true
delegate_to: "{{ groups[mon_group_name][0] }}"
changed_when: false
when: (balancer_status.stdout | from_json)['active'] | bool