--- # This playbook shrinks the Ceph monitors from your cluster # It can remove a Ceph of monitor from the cluster and ALL ITS DATA # # Use it like this: # ansible-playbook shrink-mon.yml -e mon_to_kill=ceph-mon01 # Prompts for confirmation to shrink, defaults to no and # doesn't shrink the cluster. yes shrinks the cluster. # # ansible-playbook -e ireallymeanit=yes|no shrink-mon.yml # Overrides the prompt using -e option. Can be used in # automation scripts to avoid interactive prompt. - name: gather facts and check the init system hosts: "{{ mon_group_name|default('mons') }}" become: true tasks: - debug: msg="gather facts on all Ceph hosts for following reference" - name: confirm whether user really meant to remove monitor from the ceph cluster hosts: "{{ groups[mon_group_name][0] }}" become: true vars_prompt: - name: ireallymeanit prompt: Are you sure you want to shrink the cluster? default: 'no' private: no vars: mon_group_name: mons pre_tasks: - name: exit playbook, if only one monitor is present in cluster fail: msg: "You are about to shrink the only monitor present in the cluster. If you really want to do that, please use the purge-cluster playbook." when: groups[mon_group_name] | length | int == 1 - name: exit playbook, if no monitor was given fail: msg: "mon_to_kill must be declared Exiting shrink-cluster playbook, no monitor was removed. On the command line when invoking the playbook, you can use -e mon_to_kill=ceph-mon01 argument. You can only remove a single monitor each time the playbook runs." when: mon_to_kill is not defined - name: exit playbook, if the monitor is not part of the inventory fail: msg: "It seems that the host given is not part of your inventory, please make sure it is." when: mon_to_kill not in groups[mon_group_name] - name: exit playbook, if user did not mean to shrink cluster fail: msg: "Exiting shrink-mon playbook, no monitor was removed. To shrink the cluster, either say 'yes' on the prompt or or use `-e ireallymeanit=yes` on the command line when invoking the playbook" when: ireallymeanit != 'yes' - import_role: name: ceph-defaults - import_role: name: ceph-facts tasks_from: container_binary tasks: - name: pick a monitor different than the one we want to remove set_fact: mon_host: "{{ item }}" with_items: "{{ groups[mon_group_name] }}" when: item != mon_to_kill - name: "set_fact container_exec_cmd build {{ container_binary }} exec command (containerized)" set_fact: container_exec_cmd: "{{ container_binary }} exec ceph-mon-{{ hostvars[mon_host]['ansible_hostname'] }}" when: containerized_deployment | bool - name: exit playbook, if can not connect to the cluster command: "{{ container_exec_cmd }} timeout 5 ceph --cluster {{ cluster }} health" register: ceph_health until: ceph_health.stdout.find("HEALTH") > -1 delegate_to: "{{ mon_host }}" retries: 5 delay: 2 - name: set_fact mon_to_kill_hostname set_fact: mon_to_kill_hostname: "{{ hostvars[mon_to_kill]['ansible_hostname'] }}" - name: stop monitor service(s) service: name: ceph-mon@{{ mon_to_kill_hostname }} state: stopped enabled: no delegate_to: "{{ mon_to_kill }}" failed_when: false - name: purge monitor store file: path: /var/lib/ceph/mon/{{ cluster }}-{{ mon_to_kill_hostname }} state: absent delegate_to: "{{ mon_to_kill }}" - name: remove monitor from the quorum command: "{{ container_exec_cmd }} ceph --cluster {{ cluster }} mon remove {{ mon_to_kill_hostname }}" failed_when: false delegate_to: "{{ mon_host }}" post_tasks: - name: verify the monitor is out of the cluster command: "{{ container_exec_cmd }} ceph --cluster {{ cluster }} quorum_status -f json" delegate_to: "{{ mon_host }}" failed_when: false register: result until: mon_to_kill_hostname not in (result.stdout | from_json)['quorum_names'] retries: 2 delay: 10 - name: please remove the monitor from your ceph configuration file debug: msg: "The monitor has been successfully removed from the cluster. Please remove the monitor entry from the rest of your ceph configuration files, cluster wide." run_once: true when: mon_to_kill_hostname not in (result.stdout | from_json)['quorum_names'] - name: fail if monitor is still part of the cluster fail: msg: "Monitor appears to still be part of the cluster, please check what happened." run_once: true when: mon_to_kill_hostname in (result.stdout | from_json)['quorum_names'] - name: show ceph health command: "{{ container_exec_cmd }} ceph --cluster {{ cluster }} -s" delegate_to: "{{ mon_host }}" - name: show ceph mon status command: "{{ container_exec_cmd }} ceph --cluster {{ cluster }} mon stat" delegate_to: "{{ mon_host }}"