shrink-mon: support updating ceph config file after mon removal

This add the possibility for users to opt in/out about monitor removal
from the ceph config file on all nodes.

By default, it won't update the ceph config file. If you want the playbook
to update it, you have to pass the extra variable `-e shrink_mon_update_cfg=true`

Signed-off-by: Guillaume Abrioux <gabrioux@redhat.com>
shrink-mon-update-config
Guillaume Abrioux 2022-07-28 14:15:38 +02:00
parent e2076e439b
commit 5c327bf1a4
4 changed files with 191 additions and 97 deletions

View File

@ -22,7 +22,7 @@
- debug: msg="gather facts on all Ceph hosts for following reference" - debug: msg="gather facts on all Ceph hosts for following reference"
- name: confirm whether user really meant to remove monitor from the ceph cluster - name: confirm whether user really meant to remove monitor from the ceph cluster
hosts: "{{ groups[mon_group_name][0] }}" hosts: localhost
become: true become: true
vars_prompt: vars_prompt:
- name: ireallymeanit - name: ireallymeanit
@ -33,115 +33,208 @@
mon_group_name: mons mon_group_name: mons
pre_tasks: pre_tasks:
- name: exit playbook, if only one monitor is present in cluster - name: get current monitor status
fail: delegate_to: "{{ groups.get(mon_group_name)[0] }}"
msg: "You are about to shrink the only monitor present in the cluster. block:
If you really want to do that, please use the purge-cluster playbook." - import_role:
when: groups[mon_group_name] | length | int == 1 name: ceph-defaults
- name: exit playbook, if no monitor was given - import_role:
fail: name: ceph-facts
msg: "mon_to_kill must be declared tasks_from: container_binary
Exiting shrink-cluster playbook, no monitor was removed.
On the command line when invoking the playbook, you can use
-e mon_to_kill=ceph-mon01 argument. You can only remove a single monitor each time the playbook runs."
when: mon_to_kill is not defined
- name: exit playbook, if the monitor is not part of the inventory - name: "set_fact container_exec_cmd build {{ container_binary }} exec command (containerized)"
fail: set_fact:
msg: "It seems that the host given is not part of your inventory, please make sure it is." container_exec_cmd: "{{ container_binary }} exec ceph-mon-{{ hostvars[groups.get(mon_group_name)[0]]['ansible_facts']['hostname'] }}"
when: mon_to_kill not in groups[mon_group_name] when: containerized_deployment | bool
- name: exit playbook, if user did not mean to shrink cluster - name: get current quorum status
fail: command: "{{ container_exec_cmd }} ceph --cluster {{ cluster }} quorum_status -f json"
msg: "Exiting shrink-mon playbook, no monitor was removed. changed_when: false
To shrink the cluster, either say 'yes' on the prompt or failed_when: false
or use `-e ireallymeanit=yes` on the command line when register: current_quorum_status
invoking the playbook"
when: ireallymeanit != 'yes'
- import_role:
name: ceph-defaults
- import_role:
name: ceph-facts
tasks_from: container_binary
tasks:
- name: pick a monitor different than the one we want to remove - name: pick a monitor different than the one we want to remove
set_fact: set_fact:
mon_host: "{{ item }}" mon_host: "{{ item }}"
with_items: "{{ groups[mon_group_name] }}" loop: "{{ (current_quorum_status.stdout | from_json)['quorum_names'] }}"
when: item != mon_to_kill when: item != mon_to_kill
- name: "set_fact container_exec_cmd build {{ container_binary }} exec command (containerized)" - name: fail if basic requirements aren't satisfied
set_fact:
container_exec_cmd: "{{ container_binary }} exec ceph-mon-{{ hostvars[mon_host]['ansible_facts']['hostname'] }}"
when: containerized_deployment | bool
- name: exit playbook, if can not connect to the cluster
command: "{{ container_exec_cmd }} timeout 5 ceph --cluster {{ cluster }} health"
register: ceph_health
changed_when: false
until: ceph_health.stdout.find("HEALTH") > -1
delegate_to: "{{ mon_host }}" delegate_to: "{{ mon_host }}"
retries: 5 block:
delay: 2 - import_role:
name: ceph-defaults
- name: set_fact mon_to_kill_hostname - import_role:
name: ceph-facts
tasks_from: container_binary
- name: "set_fact container_exec_cmd build {{ container_binary }} exec command (containerized)"
set_fact:
container_exec_cmd: "{{ container_binary }} exec ceph-mon-{{ hostvars[mon_host]['ansible_facts']['hostname'] }}"
when: containerized_deployment | bool
- name: exit playbook, if only one monitor is present in cluster
fail:
msg: "You are about to shrink the only monitor present in the cluster.
If you really want to do that, please use the purge-cluster playbook."
when: groups[mon_group_name] | length | int == 1
- name: exit playbook, if no monitor was given
fail:
msg: "mon_to_kill must be declared
Exiting shrink-cluster playbook, no monitor was removed.
On the command line when invoking the playbook, you can use
-e mon_to_kill=ceph-mon01 argument. You can only remove a single monitor each time the playbook runs."
when: mon_to_kill is not defined
- name: exit playbook, if the monitor is not part of the inventory
fail:
msg: "It seems that the host given is not part of your inventory, please make sure it is."
when: mon_to_kill not in groups[mon_group_name]
- name: exit playbook, if user did not mean to shrink cluster
fail:
msg: "Exiting shrink-mon playbook, no monitor was removed.
To shrink the cluster, either say 'yes' on the prompt or
or use `-e ireallymeanit=yes` on the command line when
invoking the playbook"
when: ireallymeanit != 'yes'
- name: set_fact mon_to_kill_hostname
set_fact:
mon_to_kill_hostname: "{{ hostvars[mon_to_kill]['ansible_facts']['hostname'] }}"
- name: set_fact valid_mon_to_kill
set_fact: set_fact:
mon_to_kill_hostname: "{{ hostvars[mon_to_kill]['ansible_facts']['hostname'] }}" valid_mon_to_kill: "{{ mon_to_kill_hostname in (current_quorum_status.stdout | from_json)['quorum_names'] }}"
- name: stop monitor service(s)
service:
name: ceph-mon@{{ mon_to_kill_hostname }}
state: stopped
enabled: no
delegate_to: "{{ mon_to_kill }}"
failed_when: false
- name: purge monitor store tasks:
file: - name: shrink selected monitor
path: /var/lib/ceph/mon/{{ cluster }}-{{ mon_to_kill_hostname }}
state: absent
delegate_to: "{{ mon_to_kill }}"
- name: remove monitor from the quorum
command: "{{ container_exec_cmd }} ceph --cluster {{ cluster }} mon remove {{ mon_to_kill_hostname }}"
changed_when: false
failed_when: false
delegate_to: "{{ mon_host }}" delegate_to: "{{ mon_host }}"
when: valid_mon_to_kill | bool
block:
- name: exit playbook, if can not connect to the cluster
command: "{{ container_exec_cmd }} timeout 5 ceph --cluster {{ cluster }} health"
register: ceph_health
changed_when: false
until: ceph_health.stdout.find("HEALTH") > -1
retries: 5
delay: 2
- name: stop monitor service(s)
service:
name: ceph-mon@{{ mon_to_kill_hostname }}
state: stopped
enabled: no
delegate_to: "{{ mon_to_kill }}"
failed_when: false
- name: purge monitor store
file:
path: /var/lib/ceph/mon/{{ cluster }}-{{ mon_to_kill_hostname }}
state: absent
delegate_to: "{{ mon_to_kill }}"
- name: remove monitor from the quorum
command: "{{ container_exec_cmd }} ceph --cluster {{ cluster }} mon remove {{ mon_to_kill_hostname }}"
changed_when: false
failed_when: false
post_tasks: post_tasks:
- name: verify the monitor is out of the cluster - name: post verifications
command: "{{ container_exec_cmd }} ceph --cluster {{ cluster }} quorum_status -f json"
delegate_to: "{{ mon_host }}" delegate_to: "{{ mon_host }}"
changed_when: false when: valid_mon_to_kill | bool
failed_when: false block:
register: result - name: verify the monitor is out of the cluster
until: mon_to_kill_hostname not in (result.stdout | from_json)['quorum_names'] command: "{{ container_exec_cmd }} ceph --cluster {{ cluster }} quorum_status -f json"
retries: 2 changed_when: false
delay: 10 failed_when: false
register: result
until: mon_to_kill_hostname not in (result.stdout | from_json)['quorum_names']
retries: 2
delay: 10
- name: please remove the monitor from your ceph configuration file - name: fail if monitor is still part of the cluster
debug: fail:
msg: "The monitor has been successfully removed from the cluster. msg: "Monitor appears to still be part of the cluster, please check what happened."
Please remove the monitor entry from the rest of your ceph configuration files, cluster wide." run_once: true
run_once: true when: mon_to_kill_hostname in (result.stdout | from_json)['quorum_names']
when: mon_to_kill_hostname not in (result.stdout | from_json)['quorum_names']
- name: fail if monitor is still part of the cluster - name: remove monitor entry from ceph config file
fail: hosts:
msg: "Monitor appears to still be part of the cluster, please check what happened." - mons
run_once: true - osds
when: mon_to_kill_hostname in (result.stdout | from_json)['quorum_names'] - mdss
- rgws
- nfss
- rbdmirrors
- clients
- iscsigws
- mgrs
- monitoring
gather_facts: false
become: True
any_errors_fatal: true
tasks:
- name: update ceph config file
when:
- shrink_mon_update_cfg | default(false) | bool
- hostvars['localhost']['valid_mon_to_kill'] | bool
block:
- name: gather and delegate facts
setup:
gather_subset:
- 'all'
- '!facter'
- '!ohai'
delegate_to: "{{ item }}"
delegate_facts: True
with_items: "{{ groups['all'] | difference(groups.get('clients', [])) }}"
run_once: true
tags: always
- name: show ceph health - import_role:
command: "{{ container_exec_cmd }} ceph --cluster {{ cluster }} -s" name: ceph-defaults
- import_role:
name: ceph-facts
- import_role:
name: ceph-handler
- import_role:
name: ceph-config
- name: show ceph status
hosts: localhost
become: true
tasks:
- name: show ceph status
delegate_to: "{{ mon_host }}" delegate_to: "{{ mon_host }}"
changed_when: false block:
- import_role:
name: ceph-defaults
- name: show ceph mon status - name: set_fact ceph_cmd
command: "{{ container_exec_cmd }} ceph --cluster {{ cluster }} mon stat" set_fact:
delegate_to: "{{ mon_host }}" ceph_cmd: "{{ container_binary + ' run --rm --net=host -v /etc/ceph:/etc/ceph:z -v /var/lib/ceph:/var/lib/ceph:ro -v /var/run/ceph:/var/run/ceph:z --entrypoint=ceph ' + ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment | bool else 'ceph' }} --cluster {{ cluster }}"
changed_when: false
- name: show ceph mon status
command: "{{ ceph_cmd }} mon stat"
changed_when: false
- name: show ceph health
command: "{{ ceph_cmd }} -s"
changed_when: false
- name: warn about ceph config file
fail:
msg: |
`shrink_mon_update_cfg` wasn't set to `true`.
Please, update manually your ceph config file on all nodes or rerun this playbook with `-e shrink_mon_update_cfg=true`
when: not shrink_mon_update_cfg | default(false) | bool
ignore_errors: true

View File

@ -23,7 +23,7 @@ osd crush chooseleaf type = 0
{% endif %} {% endif %}
{% if nb_mon > 0 and inventory_hostname in groups.get(mon_group_name, []) %} {% if nb_mon > 0 and inventory_hostname in groups.get(mon_group_name, []) %}
mon initial members = {% for host in groups[mon_group_name] %} mon initial members = {% for host in groups[mon_group_name] | difference([mon_to_kill | default('')]) %}
{% if hostvars[host]['ansible_facts']['hostname'] is defined -%} {% if hostvars[host]['ansible_facts']['hostname'] is defined -%}
{{ hostvars[host]['ansible_facts']['hostname'] }} {{ hostvars[host]['ansible_facts']['hostname'] }}
{%- endif %} {%- endif %}

View File

@ -2,7 +2,7 @@
- name: set_fact _monitor_addresses to monitor_address_block ipv4 - name: set_fact _monitor_addresses to monitor_address_block ipv4
set_fact: set_fact:
_monitor_addresses: "{{ _monitor_addresses | default([]) + [{ 'name': item, 'addr': hostvars[item]['ansible_facts']['all_ipv4_addresses'] | ips_in_ranges(hostvars[item]['monitor_address_block'].split(',')) | first }] }}" _monitor_addresses: "{{ _monitor_addresses | default([]) + [{ 'name': item, 'addr': hostvars[item]['ansible_facts']['all_ipv4_addresses'] | ips_in_ranges(hostvars[item]['monitor_address_block'].split(',')) | first }] }}"
with_items: "{{ groups.get(mon_group_name, []) }}" loop: "{{ groups.get(mon_group_name, []) | difference(mon_to_kill | default('')) }}"
when: when:
- "item not in _monitor_addresses | default([]) | selectattr('name', 'defined') | map(attribute='name') | list" - "item not in _monitor_addresses | default([]) | selectattr('name', 'defined') | map(attribute='name') | list"
- hostvars[item]['monitor_address_block'] is defined - hostvars[item]['monitor_address_block'] is defined
@ -12,7 +12,7 @@
- name: set_fact _monitor_addresses to monitor_address_block ipv6 - name: set_fact _monitor_addresses to monitor_address_block ipv6
set_fact: set_fact:
_monitor_addresses: "{{ _monitor_addresses | default([]) + [{ 'name': item, 'addr': hostvars[item]['ansible_facts']['all_ipv6_addresses'] | ips_in_ranges(hostvars[item]['monitor_address_block'].split(',')) | last | ansible.utils.ipwrap }] }}" _monitor_addresses: "{{ _monitor_addresses | default([]) + [{ 'name': item, 'addr': hostvars[item]['ansible_facts']['all_ipv6_addresses'] | ips_in_ranges(hostvars[item]['monitor_address_block'].split(',')) | last | ansible.utils.ipwrap }] }}"
with_items: "{{ groups.get(mon_group_name, []) }}" loop: "{{ groups.get(mon_group_name, []) | difference(mon_to_kill | default('')) }}"
when: when:
- "item not in _monitor_addresses | default([]) | selectattr('name', 'defined') | map(attribute='name') | list" - "item not in _monitor_addresses | default([]) | selectattr('name', 'defined') | map(attribute='name') | list"
- hostvars[item]['monitor_address_block'] is defined - hostvars[item]['monitor_address_block'] is defined
@ -22,7 +22,7 @@
- name: set_fact _monitor_addresses to monitor_address - name: set_fact _monitor_addresses to monitor_address
set_fact: set_fact:
_monitor_addresses: "{{ _monitor_addresses | default([]) + [{ 'name': item, 'addr': hostvars[item]['monitor_address'] | ansible.utils.ipwrap}] }}" _monitor_addresses: "{{ _monitor_addresses | default([]) + [{ 'name': item, 'addr': hostvars[item]['monitor_address'] | ansible.utils.ipwrap}] }}"
with_items: "{{ groups.get(mon_group_name, []) }}" loop: "{{ groups.get(mon_group_name, []) | difference(mon_to_kill | default('')) }}"
when: when:
- "item not in _monitor_addresses | default([]) | selectattr('name', 'defined') | map(attribute='name') | list" - "item not in _monitor_addresses | default([]) | selectattr('name', 'defined') | map(attribute='name') | list"
- hostvars[item]['monitor_address'] is defined - hostvars[item]['monitor_address'] is defined
@ -31,7 +31,7 @@
- name: set_fact _monitor_addresses to monitor_interface - ipv4 - name: set_fact _monitor_addresses to monitor_interface - ipv4
set_fact: set_fact:
_monitor_addresses: "{{ _monitor_addresses | default([]) + [{ 'name': item, 'addr': hostvars[item]['ansible_facts'][(hostvars[item]['monitor_interface']|replace('-', '_'))][ip_version]['address'] | ansible.utils.ipwrap }] }}" _monitor_addresses: "{{ _monitor_addresses | default([]) + [{ 'name': item, 'addr': hostvars[item]['ansible_facts'][(hostvars[item]['monitor_interface']|replace('-', '_'))][ip_version]['address'] | ansible.utils.ipwrap }] }}"
with_items: "{{ groups.get(mon_group_name, []) }}" loop: "{{ groups.get(mon_group_name, []) | difference(mon_to_kill | default('')) }}"
when: when:
- "item not in _monitor_addresses | default([]) | selectattr('name', 'defined') | map(attribute='name') | list" - "item not in _monitor_addresses | default([]) | selectattr('name', 'defined') | map(attribute='name') | list"
- ip_version == 'ipv4' - ip_version == 'ipv4'
@ -42,7 +42,7 @@
- name: set_fact _monitor_addresses to monitor_interface - ipv6 - name: set_fact _monitor_addresses to monitor_interface - ipv6
set_fact: set_fact:
_monitor_addresses: "{{ _monitor_addresses | default([]) + [{ 'name': item, 'addr': hostvars[item]['ansible_facts'][(hostvars[item]['monitor_interface']|replace('-', '_'))][ip_version][0]['address'] | ansible.utils.ipwrap }] }}" _monitor_addresses: "{{ _monitor_addresses | default([]) + [{ 'name': item, 'addr': hostvars[item]['ansible_facts'][(hostvars[item]['monitor_interface']|replace('-', '_'))][ip_version][0]['address'] | ansible.utils.ipwrap }] }}"
with_items: "{{ groups.get(mon_group_name, []) }}" loop: "{{ groups.get(mon_group_name, []) | difference(mon_to_kill | default('')) }}"
when: when:
- "item not in _monitor_addresses | default([]) | selectattr('name', 'defined') | map(attribute='name') | list" - "item not in _monitor_addresses | default([]) | selectattr('name', 'defined') | map(attribute='name') | list"
- ip_version == 'ipv6' - ip_version == 'ipv6'

View File

@ -118,6 +118,7 @@ commands=
ansible-playbook -vv -i {changedir}/{env:INVENTORY} {toxinidir}/infrastructure-playbooks/shrink-mon.yml --extra-vars "\ ansible-playbook -vv -i {changedir}/{env:INVENTORY} {toxinidir}/infrastructure-playbooks/shrink-mon.yml --extra-vars "\
ireallymeanit=yes \ ireallymeanit=yes \
mon_to_kill={env:MON_TO_KILL:mon2} \ mon_to_kill={env:MON_TO_KILL:mon2} \
shrink_mon_update_cfg=true \
" "
[shrink-osd] [shrink-osd]
commands= commands=