From 864acaae10ef476fb19be27061eb6d44d6cca70b Mon Sep 17 00:00:00 2001 From: Dimitri Savineau Date: Wed, 20 Jan 2021 17:39:44 -0500 Subject: [PATCH] cephadm-adopt: make the playbook idempotent If the cephadm-adopt.yml fails during the first execution and some daemons have already been adopted by cephadm then we can't rerun the playbook because the old container won't exist anymore. Error: no container with name or ID ceph-mon-xxx found: no such container If the daemons are adopted then the old systemd unit doesn't exist anymore so any call to that unit with systemd will fail. Closes: https://bugzilla.redhat.com/show_bug.cgi?id=1918424 Signed-off-by: Dimitri Savineau (cherry picked from commit 6886700a002e5d59bed5c18d082f8902bd57978c) --- infrastructure-playbooks/cephadm-adopt.yml | 75 ++++++++++++++-------- 1 file changed, 47 insertions(+), 28 deletions(-) diff --git a/infrastructure-playbooks/cephadm-adopt.yml b/infrastructure-playbooks/cephadm-adopt.yml index f5cf3ec58..994208a56 100644 --- a/infrastructure-playbooks/cephadm-adopt.yml +++ b/infrastructure-playbooks/cephadm-adopt.yml @@ -169,18 +169,17 @@ - name: set_fact ceph_cmd set_fact: - container_exec_cmd: "{{ container_binary }} exec ceph-mon-{{ hostvars[groups[mon_group_name][0]]['ansible_facts']['hostname'] }}" - when: containerized_deployment | bool + ceph_cmd: "{{ container_binary + ' run --rm --net=host -v /etc/ceph:/etc/ceph:z -v /var/lib/ceph:/var/lib/ceph:z -v /var/run/ceph:/var/run/ceph:z --entrypoint=ceph ' + ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment | bool else 'ceph' }} --cluster {{ cluster }}" - name: get current fsid - command: "{{ container_exec_cmd | default('') }} ceph --admin-daemon /var/run/ceph/{{ cluster }}-mon.{{ hostvars[groups[mon_group_name][0]]['ansible_facts']['hostname'] }}.asok config get fsid --format json" + command: "{{ ceph_cmd }} fsid" register: current_fsid run_once: true changed_when: false delegate_to: "{{ groups[mon_group_name][0] }}" - name: get a minimal ceph configuration - command: "{{ container_exec_cmd }} ceph config generate-minimal-conf" + command: "{{ ceph_cmd }} config generate-minimal-conf" register: minimal_config run_once: true changed_when: false @@ -188,28 +187,29 @@ - name: set_fact fsid set_fact: - fsid: "{{ (current_fsid.stdout | from_json).fsid }}" + fsid: "{{ current_fsid.stdout }}" + run_once: true - name: enable cephadm mgr module - command: "{{ container_exec_cmd | default('') }} ceph --cluster {{ cluster }} mgr module enable cephadm" + command: "{{ ceph_cmd }} mgr module enable cephadm" changed_when: false run_once: true delegate_to: '{{ groups[mon_group_name][0] }}' - name: set cephadm as orchestrator backend - command: "{{ container_exec_cmd | default('') }} ceph --cluster {{ cluster }} orch set backend cephadm" + command: "{{ ceph_cmd }} orch set backend cephadm" changed_when: false run_once: true delegate_to: '{{ groups[mon_group_name][0] }}' - name: generate cephadm ssh key - command: "{{ container_exec_cmd | default('') }} ceph --cluster {{ cluster }} cephadm generate-key" + command: "{{ ceph_cmd }} cephadm generate-key" changed_when: false run_once: true delegate_to: '{{ groups[mon_group_name][0] }}' - name: get the cephadm ssh pub key - command: "{{ container_exec_cmd | default('') }} ceph --cluster {{ cluster }} cephadm get-pub-key" + command: "{{ ceph_cmd }} cephadm get-pub-key" changed_when: false run_once: true register: cephadm_pubpkey @@ -221,7 +221,7 @@ key: '{{ cephadm_pubpkey.stdout }}' - name: set cephadm ssh user to {{ cephadm_ssh_user | default('root') }} - command: "{{ container_exec_cmd | default('') }} ceph --cluster {{ cluster }} cephadm set-user {{ cephadm_ssh_user | default('root') }}" + command: "{{ ceph_cmd }} cephadm set-user {{ cephadm_ssh_user | default('root') }}" changed_when: false run_once: true delegate_to: "{{ groups[mon_group_name][0] }}" @@ -233,13 +233,13 @@ CEPHADM_IMAGE: '{{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }}' - name: set default container image in ceph configuration - command: "{{ container_exec_cmd | default('') }} ceph --cluster {{ cluster }} config set global container_image {{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }}" + command: "{{ ceph_cmd }} config set global container_image {{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }}" changed_when: false run_once: true delegate_to: '{{ groups[mon_group_name][0] }}' - name: set container image base in ceph configuration - command: "{{ container_exec_cmd | default('') }} ceph --cluster {{ cluster }} config set mgr mgr/cephadm/container_image_base {{ ceph_docker_registry }}/{{ ceph_docker_image }}" + command: "{{ ceph_cmd }} config set mgr mgr/cephadm/container_image_base {{ ceph_docker_registry }}/{{ ceph_docker_image }}" changed_when: false run_once: true delegate_to: '{{ groups[mon_group_name][0] }}' @@ -249,32 +249,32 @@ run_once: true block: - name: set alertmanager container image in ceph configuration - command: "{{ container_exec_cmd | default('') }} ceph --cluster {{ cluster }} config set mgr mgr/cephadm/container_image_alertmanager {{ alertmanager_container_image }}" + command: "{{ ceph_cmd }} config set mgr mgr/cephadm/container_image_alertmanager {{ alertmanager_container_image }}" changed_when: false delegate_to: '{{ groups[mon_group_name][0] }}' - name: set grafana container image in ceph configuration - command: "{{ container_exec_cmd | default('') }} ceph --cluster {{ cluster }} config set mgr mgr/cephadm/container_image_grafana {{ grafana_container_image }}" + command: "{{ ceph_cmd }} config set mgr mgr/cephadm/container_image_grafana {{ grafana_container_image }}" changed_when: false delegate_to: '{{ groups[mon_group_name][0] }}' - name: set node-exporter container image in ceph configuration - command: "{{ container_exec_cmd | default('') }} ceph --cluster {{ cluster }} config set mgr mgr/cephadm/container_image_node_exporter {{ node_exporter_container_image }}" + command: "{{ ceph_cmd }} config set mgr mgr/cephadm/container_image_node_exporter {{ node_exporter_container_image }}" changed_when: false delegate_to: '{{ groups[mon_group_name][0] }}' - name: set prometheus container image in ceph configuration - command: "{{ container_exec_cmd | default('') }} ceph --cluster {{ cluster }} config set mgr mgr/cephadm/container_image_prometheus {{ prometheus_container_image }}" + command: "{{ ceph_cmd }} config set mgr mgr/cephadm/container_image_prometheus {{ prometheus_container_image }}" changed_when: false delegate_to: '{{ groups[mon_group_name][0] }}' - name: manage nodes with cephadm - command: "{{ container_exec_cmd | default('') }} ceph --cluster {{ cluster }} orch host add {{ ansible_facts['hostname'] }} {{ ansible_facts['default_ipv4']['address'] }} {{ group_names | join(' ') }}" + command: "{{ ceph_cmd }} orch host add {{ ansible_facts['hostname'] }} {{ ansible_facts['default_ipv4']['address'] }} {{ group_names | join(' ') }}" changed_when: false delegate_to: '{{ groups[mon_group_name][0] }}' - name: add ceph label for core component - command: "{{ container_exec_cmd | default('') }} ceph --cluster {{ cluster }} orch host label add {{ ansible_facts['hostname'] }} ceph" + command: "{{ ceph_cmd }} orch host label add {{ ansible_facts['hostname'] }} ceph" changed_when: false delegate_to: '{{ groups[mon_group_name][0] }}' when: inventory_hostname in groups.get(mon_group_name, []) or @@ -284,12 +284,8 @@ inventory_hostname in groups.get(mgr_group_name, []) or inventory_hostname in groups.get(rbdmirror_group_name, []) - - name: set_fact ceph_cmd - set_fact: - ceph_cmd: "{{ container_binary + ' run --rm --net=host -v /etc/ceph:/etc/ceph:z -v /var/lib/ceph:/var/lib/ceph:z -v /var/run/ceph:/var/run/ceph:z --entrypoint=ceph ' + ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment | bool else 'ceph' }}" - - name: get the client.admin keyring - command: "{{ ceph_cmd }} --cluster {{ cluster }} auth get client.admin" + command: "{{ ceph_cmd }} auth get client.admin" changed_when: false run_once: true delegate_to: '{{ groups[mon_group_name][0] }}' @@ -312,7 +308,7 @@ - "{{ groups.get(rbdmirror_group_name, []) }}" - name: assimilate ceph configuration - command: "{{ ceph_cmd }} --cluster {{ cluster }} config assimilate-conf -i /etc/ceph/{{ cluster }}.conf" + command: "{{ ceph_cmd }} config assimilate-conf -i /etc/ceph/{{ cluster }}.conf" changed_when: false when: inventory_hostname in groups.get(mon_group_name, []) or inventory_hostname in groups.get(osd_group_name, []) or @@ -483,7 +479,7 @@ name: ceph-defaults - name: get pool list - command: "{{ ceph_cmd }} --cluster {{ cluster }} osd pool ls detail -f json" + command: "{{ ceph_cmd }} osd pool ls detail -f json" register: pool_list run_once: true delegate_to: "{{ groups[mon_group_name][0] }}" @@ -491,7 +487,7 @@ check_mode: false - name: get balancer module status - command: "{{ ceph_cmd }} --cluster {{ cluster }} balancer status -f json" + command: "{{ ceph_cmd }} balancer status -f json" register: balancer_status_adopt run_once: true delegate_to: "{{ groups[mon_group_name][0] }}" @@ -505,7 +501,7 @@ with_items: "{{ pool_list.stdout | default('{}') | from_json }}" - name: disable balancer - command: "{{ ceph_cmd }} --cluster {{ cluster }} balancer off" + command: "{{ ceph_cmd }} balancer off" run_once: true delegate_to: "{{ groups[mon_group_name][0] }}" changed_when: false @@ -599,6 +595,12 @@ - /etc/systemd/system/ceph-osd.target - "{{ ceph_osd_docker_run_script_path | default('/usr/share') }}/ceph-osd-run.sh" + - name: remove osd directory + file: + path: "/var/lib/ceph/osd/{{ cluster }}-{{ item }}" + state: absent + loop: '{{ (osd_list.stdout | from_json).keys() | list }}' + - name: waiting for clean pgs... command: "{{ cephadm_cmd }} shell --fsid {{ fsid }} -- ceph --cluster {{ cluster }} pg stat --format json" changed_when: false @@ -647,7 +649,7 @@ CEPHADM_IMAGE: '{{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }}' - name: re-enable balancer - command: "{{ ceph_cmd }} --cluster {{ cluster }} balancer on" + command: "{{ ceph_cmd }} balancer on" run_once: true delegate_to: "{{ groups[mon_group_name][0] }}" changed_when: false @@ -685,6 +687,7 @@ name: "ceph-mds@{{ ansible_facts['hostname'] }}" state: stopped enabled: false + failed_when: false - name: stop and disable ceph-mds systemd target service: @@ -796,6 +799,7 @@ name: "ceph-radosgw@rgw.{{ ansible_facts['hostname'] }}.{{ item.instance_name }}" state: stopped enabled: false + failed_when: false loop: '{{ rgw_instances }}' - name: stop and disable ceph-radosgw systemd target @@ -1001,6 +1005,7 @@ name: "ceph-rbd-mirror@rbd-mirror.{{ ansible_facts['hostname'] }}" state: stopped enabled: false + failed_when: false - name: stop and disable rbd-mirror systemd target service: @@ -1088,12 +1093,23 @@ - name: with dashboard enabled when: dashboard_enabled | bool block: + - name: ensure alertmanager/prometheus data directories are present + file: + path: "{{ item }}" + state: directory + owner: "{{ prometheus_user_id }}" + group: "{{ prometheus_user_id }}" + with_items: + - "{{ alertmanager_data_dir }}" + - "{{ prometheus_data_dir }}" + # (workaround) cephadm adopt alertmanager only stops prometheus-alertmanager systemd service - name: stop and disable alertmanager systemd unit service: name: alertmanager state: stopped enabled: false + failed_when: false # (workaround) cephadm adopt alertmanager only uses /etc/prometheus/alertmanager.yml - name: create alertmanager config symlink @@ -1133,6 +1149,7 @@ name: prometheus state: stopped enabled: false + failed_when: false - name: remove alertmanager data symlink file: @@ -1196,6 +1213,7 @@ name: grafana-server state: stopped enabled: false + failed_when: false - name: adopt grafana daemon cephadm_adopt: @@ -1242,6 +1260,7 @@ name: node_exporter state: stopped enabled: false + failed_when: false - name: remove node_exporter systemd unit file file: