--- # This playbook shrinks the Ceph monitors from your cluster # It can remove a Ceph of monitor from the cluster and ALL ITS DATA # # Use it like this: # ansible-playbook shrink-mon.yml -e mon_to_kill=ceph-mon01 # Prompts for confirmation to shrink, defaults to no and # doesn't shrink the cluster. yes shrinks the cluster. # # ansible-playbook -e ireallymeanit=yes|no shrink-mon.yml # Overrides the prompt using -e option. Can be used in # automation scripts to avoid interactive prompt. - name: Gather facts and check the init system hosts: "{{ mon_group_name|default('mons') }}" become: true tasks: - name: Gather facts on all Ceph hosts for following reference ansible.builtin.debug: msg: "gather facts on all Ceph hosts for following reference" - name: Confirm whether user really meant to remove monitor from the ceph cluster hosts: mons[0] become: true vars_prompt: - name: ireallymeanit # noqa: name[casing] prompt: Are you sure you want to shrink the cluster? default: 'no' private: false vars: mon_group_name: mons pre_tasks: - name: Exit playbook, if only one monitor is present in cluster ansible.builtin.fail: msg: "You are about to shrink the only monitor present in the cluster. If you really want to do that, please use the purge-cluster playbook." when: groups[mon_group_name] | length | int == 1 - name: Exit playbook, if no monitor was given ansible.builtin.fail: msg: "mon_to_kill must be declared Exiting shrink-cluster playbook, no monitor was removed. On the command line when invoking the playbook, you can use -e mon_to_kill=ceph-mon01 argument. You can only remove a single monitor each time the playbook runs." when: mon_to_kill is not defined - name: Exit playbook, if the monitor is not part of the inventory ansible.builtin.fail: msg: "It seems that the host given is not part of your inventory, please make sure it is." when: mon_to_kill not in groups[mon_group_name] - name: Exit playbook, if user did not mean to shrink cluster ansible.builtin.fail: msg: "Exiting shrink-mon playbook, no monitor was removed. To shrink the cluster, either say 'yes' on the prompt or or use `-e ireallymeanit=yes` on the command line when invoking the playbook" when: ireallymeanit != 'yes' - name: Import ceph-defaults role ansible.builtin.import_role: name: ceph-defaults - name: Import ceph-facts role ansible.builtin.import_role: name: ceph-facts tasks_from: container_binary tasks: - name: Pick a monitor different than the one we want to remove ansible.builtin.set_fact: mon_host: "{{ item }}" with_items: "{{ groups[mon_group_name] }}" when: item != mon_to_kill - name: Set container_exec_cmd fact ansible.builtin.set_fact: container_exec_cmd: "{{ container_binary }} exec ceph-mon-{{ hostvars[mon_host]['ansible_facts']['hostname'] }}" when: containerized_deployment | bool - name: Exit playbook, if can not connect to the cluster ansible.builtin.command: "{{ container_exec_cmd }} timeout 5 ceph --cluster {{ cluster }} health" register: ceph_health changed_when: false until: ceph_health.stdout.find("HEALTH") > -1 delegate_to: "{{ mon_host }}" retries: 5 delay: 2 - name: Set_fact mon_to_kill_hostname ansible.builtin.set_fact: mon_to_kill_hostname: "{{ hostvars[mon_to_kill]['ansible_facts']['hostname'] }}" - name: Stop monitor service(s) ansible.builtin.service: name: ceph-mon@{{ mon_to_kill_hostname }} state: stopped enabled: false delegate_to: "{{ mon_to_kill }}" failed_when: false - name: Purge monitor store ansible.builtin.file: path: /var/lib/ceph/mon/{{ cluster }}-{{ mon_to_kill_hostname }} state: absent delegate_to: "{{ mon_to_kill }}" - name: Remove monitor from the quorum ansible.builtin.command: "{{ container_exec_cmd }} ceph --cluster {{ cluster }} mon remove {{ mon_to_kill_hostname }}" changed_when: false failed_when: false delegate_to: "{{ mon_host }}" post_tasks: - name: Verify the monitor is out of the cluster ansible.builtin.command: "{{ container_exec_cmd }} ceph --cluster {{ cluster }} quorum_status -f json" delegate_to: "{{ mon_host }}" changed_when: false failed_when: false register: result until: mon_to_kill_hostname not in (result.stdout | from_json)['quorum_names'] retries: 2 delay: 10 - name: Please remove the monitor from your ceph configuration file ansible.builtin.debug: msg: "The monitor has been successfully removed from the cluster. Please remove the monitor entry from the rest of your ceph configuration files, cluster wide." run_once: true when: mon_to_kill_hostname not in (result.stdout | from_json)['quorum_names'] - name: Fail if monitor is still part of the cluster ansible.builtin.fail: msg: "Monitor appears to still be part of the cluster, please check what happened." run_once: true when: mon_to_kill_hostname in (result.stdout | from_json)['quorum_names'] - name: Show ceph health ansible.builtin.command: "{{ container_exec_cmd }} ceph --cluster {{ cluster }} -s" delegate_to: "{{ mon_host }}" changed_when: false - name: Show ceph mon status ansible.builtin.command: "{{ container_exec_cmd }} ceph --cluster {{ cluster }} mon stat" delegate_to: "{{ mon_host }}" changed_when: false