--- # This playbook shrinks the Ceph monitors from your cluster # It can remove a Ceph of monitor from the cluster and ALL ITS DATA # # Use it like this: # ansible-playbook shrink-mon.yml -e mon_to_kill=ceph-mon01 # Prompts for confirmation to shrink, defaults to no and # doesn't shrink the cluster. yes shrinks the cluster. # # ansible-playbook -e ireallymeanit=yes|no shrink-mon.yml # Overrides the prompt using -e option. Can be used in # automation scripts to avoid interactive prompt. - name: gather facts and check the init system hosts: - "{{ mon_group_name|default('mons') }}" become: true tasks: - debug: msg="gather facts on all Ceph hosts for following reference" - name: confirm whether user really meant to remove monitor from the ceph cluster hosts: - localhost become: true vars_prompt: - name: ireallymeanit prompt: Are you sure you want to shrink the cluster? default: 'no' private: no vars: mon_group_name: mons pre_tasks: - name: exit playbook, if only one monitor is present in cluster fail: msg: "You are about to shrink the only monitor present in the cluster. If you really want to do that, please use the purge-cluster playbook." when: - groups[mon_group_name] | length | int == 1 - name: exit playbook, if no monitor was given fail: msg: "mon_to_kill must be declared Exiting shrink-cluster playbook, no monitor was removed. On the command line when invoking the playbook, you can use -e mon_to_kill=ceph-mon01 argument. You can only remove a single monitor each time the playbook runs." when: - mon_to_kill is not defined - name: exit playbook, if the monitor is not part of the inventory fail: msg: "It seems that the host given is not part of your inventory, please make sure it is." when: - mon_to_kill not in groups[mon_group_name] - name: exit playbook, if user did not mean to shrink cluster fail: msg: "Exiting shrink-mon playbook, no monitor was removed. To shrink the cluster, either say 'yes' on the prompt or or use `-e ireallymeanit=yes` on the command line when invoking the playbook" when: - ireallymeanit != 'yes' roles: - ceph-defaults post_tasks: - name: pick a monitor different than the one we want to remove set_fact: mon_host: "{{ item }}" with_items: "{{ groups[mon_group_name] }}" when: - item != mon_to_kill - name: set_fact docker_exec_cmd build docker exec command (containerized) set_fact: docker_exec_cmd: "docker exec ceph-mon-{{ hostvars[mon_host]['ansible_hostname'] }}" when: containerized_deployment - name: exit playbook, if can not connect to the cluster command: "{{ docker_exec_cmd }} timeout 5 ceph --cluster {{ cluster }} health" register: ceph_health until: ceph_health.stdout.find("HEALTH") > -1 delegate_to: "{{ mon_host }}" retries: 5 delay: 2 - name: set_fact mon_to_kill_hostname set_fact: mon_to_kill_hostname: "{{ hostvars[mon_to_kill]['ansible_hostname'] }}" - name: stop monitor service(s) service: name: ceph-mon@{{ mon_to_kill_hostname }} state: stopped enabled: no delegate_to: "{{ mon_to_kill }}" failed_when: false - name: purge monitor store file: path: /var/lib/ceph/mon/{{ cluster }}-{{ mon_to_kill_hostname }} state: absent delegate_to: "{{ mon_to_kill }}" - name: remove monitor from the quorum command: "{{ docker_exec_cmd }} ceph --cluster {{ cluster }} mon remove {{ mon_to_kill_hostname }}" failed_when: false delegate_to: "{{ mon_host }}" # NOTE (leseb): sorry for the 'sleep' command # but it will take a couple of seconds for other monitors # to notice that one member has left. # 'sleep 5' is not that bad and should be sufficient - name: verify the monitor is out of the cluster shell: | {{ docker_exec_cmd }} ceph --cluster {{ cluster }} -s -f json | python -c 'import sys, json; print(json.load(sys.stdin)["quorum_names"])' delegate_to: "{{ mon_host }}" failed_when: false register: result until: mon_to_kill_hostname not in result.stdout retries: 2 delay: 10 - name: please remove the monitor from your ceph configuration file debug: msg: "The monitor has been successfully removed from the cluster. Please remove the monitor entry from the rest of your ceph configuration files, cluster wide." run_once: true when: - mon_to_kill_hostname not in result.stdout - name: fail if monitor is still part of the cluster fail: msg: "Monitor appears to still be part of the cluster, please check what happened." run_once: true when: - mon_to_kill_hostname in result.stdout - name: show ceph health command: "{{ docker_exec_cmd }} ceph --cluster {{ cluster }} -s" delegate_to: "{{ mon_host }}" - name: show ceph mon status command: "{{ docker_exec_cmd }} ceph --cluster {{ cluster }} mon stat" delegate_to: "{{ mon_host }}"