mirror of https://github.com/ceph/ceph-ansible.git
ceph-crash: introduce new role ceph-crash
This commit introduces a new role `ceph-crash` in order to deploy everything needed for the ceph-crash daemon. Signed-off-by: Guillaume Abrioux <gabrioux@redhat.com>pull/5571/head
parent
d490968fc8
commit
9d2f2108e1
|
@ -173,6 +173,16 @@
|
|||
tasks_from: systemd.yml
|
||||
when: inventory_hostname in groups.get(rgw_group_name, [])
|
||||
|
||||
- import_role:
|
||||
name: ceph-crash
|
||||
tasks_from: systemd.yml
|
||||
when: inventory_hostname in groups.get(mon_group_name, []) or
|
||||
inventory_hostname in groups.get(osd_group_name, []) or
|
||||
inventory_hostname in groups.get(mds_group_name, []) or
|
||||
inventory_hostname in groups.get(rgw_group_name, []) or
|
||||
inventory_hostname in groups.get(mgr_group_name, []) or
|
||||
inventory_hostname in groups.get(rbdmirror_group_name, [])
|
||||
|
||||
- name: dashboard configuration
|
||||
when: dashboard_enabled | bool
|
||||
block:
|
||||
|
|
|
@ -610,6 +610,29 @@
|
|||
- /var/lib/ceph/bootstrap-mgr
|
||||
- /var/lib/ceph/tmp
|
||||
|
||||
- name: purge ceph-crash daemons
|
||||
hosts:
|
||||
- "{{ mon_group_name | default('mons') }}"
|
||||
- "{{ osd_group_name | default('osds') }}"
|
||||
- "{{ mds_group_name | default('mdss') }}"
|
||||
- "{{ rgw_group_name | default('rgws') }}"
|
||||
- "{{ rbdmirror_group_name | default('rbdmirrors') }}"
|
||||
- "{{ mgr_group_name | default('mgrs') }}"
|
||||
gather_facts: false
|
||||
become: true
|
||||
tasks:
|
||||
- name: stop ceph-crash service
|
||||
service:
|
||||
name: ceph-crash.service
|
||||
state: stopped
|
||||
enabled: no
|
||||
failed_when: false
|
||||
|
||||
- name: remove /var/lib/ceph/crash
|
||||
file:
|
||||
path: /var/lib/ceph/crash
|
||||
state: absent
|
||||
|
||||
|
||||
- name: final cleanup - check any running ceph, purge ceph packages, purge config and remove data
|
||||
|
||||
|
|
|
@ -468,6 +468,35 @@
|
|||
failed_when: false
|
||||
when: dashboard_enabled | bool
|
||||
|
||||
- name: purge ceph-crash containers
|
||||
hosts:
|
||||
- "{{ mon_group_name | default('mons') }}"
|
||||
- "{{ osd_group_name | default('osds') }}"
|
||||
- "{{ mds_group_name | default('mdss') }}"
|
||||
- "{{ rgw_group_name | default('rgws') }}"
|
||||
- "{{ rbdmirror_group_name | default('rbdmirrors') }}"
|
||||
- "{{ mgr_group_name | default('mgrs') }}"
|
||||
gather_facts: false
|
||||
become: true
|
||||
tasks:
|
||||
- name: stop ceph-crash container
|
||||
service:
|
||||
name: "ceph-crash@{{ ansible_hostname }}"
|
||||
state: stopped
|
||||
enabled: no
|
||||
failed_when: false
|
||||
|
||||
- name: remove service file
|
||||
file:
|
||||
name: "/etc/systemd/system/ceph-crash.service"
|
||||
state: absent
|
||||
failed_when: false
|
||||
|
||||
- name: remove /var/lib/ceph/crash
|
||||
file:
|
||||
path: /var/lib/ceph/crash
|
||||
state: absent
|
||||
|
||||
- name: check container hosts
|
||||
|
||||
hosts:
|
||||
|
|
|
@ -912,6 +912,27 @@
|
|||
- import_role:
|
||||
name: ceph-client
|
||||
|
||||
- name: upgrade ceph-crash daemons
|
||||
hosts:
|
||||
- "{{ mon_group_name | default('mons') }}"
|
||||
- "{{ osd_group_name | default('osds') }}"
|
||||
- "{{ mds_group_name | default('mdss') }}"
|
||||
- "{{ rgw_group_name | default('rgws') }}"
|
||||
- "{{ rbdmirror_group_name | default('rbdmirrors') }}"
|
||||
- "{{ mgr_group_name | default('mgrs') }}"
|
||||
gather_facts: false
|
||||
become: true
|
||||
tasks:
|
||||
- import_role:
|
||||
name: ceph-defaults
|
||||
- import_role:
|
||||
name: ceph-facts
|
||||
tasks_from: container_binary.yml
|
||||
- import_role:
|
||||
name: ceph-handler
|
||||
- import_role:
|
||||
name: ceph-crash
|
||||
|
||||
- name: complete upgrade
|
||||
hosts:
|
||||
- "{{ mon_group_name | default('mons') }}"
|
||||
|
|
|
@ -546,3 +546,37 @@
|
|||
|
||||
- import_role:
|
||||
name: ceph-nfs
|
||||
|
||||
- name: switching from non-containerized to containerized ceph-crash
|
||||
|
||||
hosts:
|
||||
- "{{ mon_group_name | default('mons') }}"
|
||||
- "{{ osd_group_name | default('osds') }}"
|
||||
- "{{ mds_group_name | default('mdss') }}"
|
||||
- "{{ rgw_group_name | default('rgws') }}"
|
||||
- "{{ rbdmirror_group_name | default('rbdmirrors') }}"
|
||||
- "{{ mgr_group_name | default('mgrs') }}"
|
||||
|
||||
vars:
|
||||
containerized_deployment: true
|
||||
serial: 1
|
||||
become: true
|
||||
tasks:
|
||||
- name: stop non-containerized ceph-crash
|
||||
service:
|
||||
name: ceph-crash
|
||||
state: stopped
|
||||
enabled: no
|
||||
|
||||
- import_role:
|
||||
name: ceph-defaults
|
||||
|
||||
- import_role:
|
||||
name: ceph-facts
|
||||
tasks_from: container_binary.yml
|
||||
|
||||
- import_role:
|
||||
name: ceph-handler
|
||||
|
||||
- import_role:
|
||||
name: ceph-crash
|
|
@ -63,6 +63,14 @@
|
|||
- ceph_nfs_container_stat.get('rc') == 0
|
||||
- ceph_nfs_container_stat.get('stdout_lines', [])|length != 0
|
||||
|
||||
- name: inspect ceph crash container
|
||||
command: "{{ container_binary }} inspect {{ ceph_crash_container_stat.stdout }}"
|
||||
changed_when: false
|
||||
register: ceph_crash_inspect
|
||||
when:
|
||||
- ceph_crash_container_stat.get('rc') == 0
|
||||
- ceph_crash_container_stat.get('stdout_lines', [])|length != 0
|
||||
|
||||
# NOTE(leseb): using failed_when to handle the case when the image is not present yet
|
||||
- name: "inspecting ceph mon container image before pulling"
|
||||
command: "{{ container_binary }} inspect {{ (ceph_mon_inspect.stdout | from_json)[0].Image }}"
|
||||
|
@ -127,6 +135,13 @@
|
|||
- nfs_group_name in group_names
|
||||
- ceph_nfs_inspect.get('rc') == 0
|
||||
|
||||
- name: "inspecting ceph crash container image before pulling"
|
||||
command: "{{ container_binary }} inspect {{ (ceph_crash_inspect.stdout | from_json)[0].Image }}"
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
register: ceph_crash_container_inspect_before_pull
|
||||
when: ceph_crash_inspect.get('rc') == 0
|
||||
|
||||
- name: set_fact ceph_mon_image_repodigest_before_pulling
|
||||
set_fact:
|
||||
ceph_mon_image_repodigest_before_pulling: "{{ (ceph_mon_container_inspect_before_pull.stdout | from_json)[0].Id }}"
|
||||
|
@ -162,6 +177,11 @@
|
|||
- mgr_group_name in group_names
|
||||
- ceph_mgr_container_inspect_before_pull.get('rc') == 0
|
||||
|
||||
- name: set_fact ceph_crash_image_repodigest_before_pulling
|
||||
set_fact:
|
||||
ceph_crash_image_repodigest_before_pulling: "{{ (ceph_crash_container_inspect_before_pull.stdout | from_json)[0].Id }}"
|
||||
when: ceph_crash_container_inspect_before_pull.get('rc') == 0
|
||||
|
||||
- name: set_fact ceph_rbd_mirror_image_repodigest_before_pulling
|
||||
set_fact:
|
||||
ceph_rbd_mirror_image_repodigest_before_pulling: "{{ (ceph_rbd_mirror_container_inspect_before_pull.stdout | from_json)[0].Id }}"
|
||||
|
@ -266,6 +286,15 @@
|
|||
- ceph_nfs_container_inspect_before_pull.get('rc') == 0
|
||||
- ceph_nfs_image_repodigest_before_pulling != image_repodigest_after_pulling
|
||||
|
||||
- name: set_fact ceph_crash_image_updated
|
||||
set_fact:
|
||||
ceph_crash_image_updated: "{{ ceph_crash_image_repodigest_before_pulling != image_repodigest_after_pulling }}"
|
||||
changed_when: true
|
||||
notify: restart ceph crash
|
||||
when:
|
||||
- ceph_crash_container_inspect_before_pull.get('rc') == 0
|
||||
- ceph_crash_image_repodigest_before_pulling != image_repodigest_after_pulling
|
||||
|
||||
- name: export local ceph dev image
|
||||
command: >
|
||||
{{ container_binary }} save -o "/tmp/{{ ceph_docker_username }}-{{ ceph_docker_imagename }}-{{ ceph_docker_image_tag }}.tar"
|
||||
|
|
|
@ -0,0 +1,15 @@
|
|||
---
|
||||
galaxy_info:
|
||||
company: Red Hat
|
||||
author: Guillaume Abrioux
|
||||
description: Deploy ceph-crash
|
||||
license: Apache
|
||||
min_ansible_version: 2.7
|
||||
platforms:
|
||||
- name: EL
|
||||
versions:
|
||||
- 7
|
||||
- 8
|
||||
galaxy_tags:
|
||||
- system
|
||||
dependencies: []
|
|
@ -0,0 +1,71 @@
|
|||
---
|
||||
- name: create and copy client.crash keyring
|
||||
when: cephx | bool
|
||||
block:
|
||||
- name: create client.crash keyring
|
||||
ceph_key:
|
||||
state: present
|
||||
name: "client.crash"
|
||||
caps: "{{ {'mon': 'allow profile crash', 'mgr': 'allow profile crash'} }}"
|
||||
cluster: "{{ cluster }}"
|
||||
dest: "{{ ceph_conf_key_directory }}"
|
||||
import_key: True
|
||||
mode: "{{ ceph_keyring_permissions }}"
|
||||
owner: "{{ ceph_uid if containerized_deployment else 'ceph' }}"
|
||||
group: "{{ ceph_uid if containerized_deployment else 'ceph' }}"
|
||||
environment:
|
||||
CEPH_CONTAINER_IMAGE: "{{ ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment else None }}"
|
||||
CEPH_CONTAINER_BINARY: "{{ container_binary }}"
|
||||
delegate_to: "{{ groups.get(mon_group_name, [])[0] }}"
|
||||
run_once: True
|
||||
|
||||
- name: get keys from monitors
|
||||
command: "{{ hostvars[groups[mon_group_name][0]]['container_exec_cmd'] | default('') }} ceph --cluster {{ cluster }} auth get client.crash"
|
||||
register: _crash_keys
|
||||
delegate_to: "{{ groups.get(mon_group_name)[0] }}"
|
||||
run_once: true
|
||||
|
||||
- name: get a list of node where the keyring should be copied
|
||||
set_fact:
|
||||
list_target_node: "{{ list_target_node | default([]) | union(((groups.get('all') | difference(groups.get(grafana_server_group_name, []) + groups.get(client_group_name, []) + groups.get(nfs_group_name, []) + groups.get(iscsi_gw_group_name, []))) + groups.get(item, [])) | unique) }}"
|
||||
run_once: True
|
||||
with_items:
|
||||
- "{{ mon_group_name if groups.get(mon_group_name, []) | length > 0 else [] }}"
|
||||
- "{{ osd_group_name if groups.get(osd_group_name, []) | length > 0 else [] }}"
|
||||
- "{{ mds_group_name if groups.get(mds_group_name, []) | length > 0 else [] }}"
|
||||
- "{{ rgw_group_name if groups.get(rgw_group_name, []) | length > 0 else [] }}"
|
||||
- "{{ rbdmirror_group_name if groups.get(rbdmirror_group_name, []) | length > 0 else [] }}"
|
||||
- "{{ mgr_group_name if groups.get(mgr_group_name, []) | length > 0 else [] }}"
|
||||
|
||||
- name: copy ceph key(s) if needed
|
||||
copy:
|
||||
dest: "{{ ceph_conf_key_directory }}/{{ cluster }}.client.crash.keyring"
|
||||
content: "{{ _crash_keys.stdout + '\n' }}"
|
||||
owner: "{{ ceph_uid if containerized_deployment else 'ceph' }}"
|
||||
group: "{{ ceph_uid if containerized_deployment else 'ceph' }}"
|
||||
mode: "{{ ceph_keyring_permissions }}"
|
||||
with_items: "{{ list_target_node }}"
|
||||
delegate_to: "{{ item }}"
|
||||
run_once: True
|
||||
|
||||
- name: start ceph-crash daemon
|
||||
when: containerized_deployment | bool
|
||||
block:
|
||||
- name: create /var/lib/ceph/crash/posted
|
||||
file:
|
||||
path: /var/lib/ceph/crash/posted
|
||||
state: directory
|
||||
mode: '0755'
|
||||
owner: "{{ ceph_uid }}"
|
||||
group: "{{ ceph_uid }}"
|
||||
|
||||
- name: include_tasks systemd.yml
|
||||
include_tasks: systemd.yml
|
||||
|
||||
- name: start the ceph-crash service
|
||||
systemd:
|
||||
name: "{{ 'ceph-crash@' + ansible_hostname if containerized_deployment | bool else 'ceph-crash.service' }}"
|
||||
state: started
|
||||
enabled: yes
|
||||
masked: no
|
||||
daemon_reload: yes
|
|
@ -0,0 +1,9 @@
|
|||
---
|
||||
- name: generate systemd unit file for ceph-crash container
|
||||
template:
|
||||
src: "{{ role_path }}/templates/ceph-crash.service.j2"
|
||||
dest: /etc/systemd/system/ceph-crash@.service
|
||||
owner: "root"
|
||||
group: "root"
|
||||
mode: "0644"
|
||||
notify: restart ceph crash
|
|
@ -0,0 +1,41 @@
|
|||
[Unit]
|
||||
Description=Ceph crash dump collector
|
||||
{% if container_binary == 'docker' %}
|
||||
After=docker.service
|
||||
Requires=docker.service
|
||||
{% else %}
|
||||
After=network.target
|
||||
{% endif %}
|
||||
|
||||
[Service]
|
||||
{% if container_binary == 'podman' %}
|
||||
ExecStartPre=-/usr/bin/rm -f /%t/%n-pid /%t/%n-cid
|
||||
ExecStartPre=-/usr/bin/{{ container_binary }} rm -f ceph-crash-%i
|
||||
{% endif %}
|
||||
ExecStart=/usr/bin/{{ container_binary }} run --rm --name ceph-crash-%i \
|
||||
{% if container_binary == 'podman' %}
|
||||
-d --conmon-pidfile /%t/%n-pid --cidfile /%t/%n-cid \
|
||||
{% endif %}
|
||||
--net=host \
|
||||
-v /var/lib/ceph:/var/lib/ceph:z \
|
||||
-v /etc/localtime:/etc/localtime:ro \
|
||||
--entrypoint=/usr/bin/ceph-crash {{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }}
|
||||
{% if container_binary == 'podman' %}
|
||||
ExecStop=-/usr/bin/sh -c "/usr/bin/{{ container_binary }} rm -f `cat /%t/%n-cid`"
|
||||
{% else %}
|
||||
ExecStop=-/usr/bin/{{ container_binary }} stop ceph-crash-%i
|
||||
{% endif %}
|
||||
StartLimitInterval=10min
|
||||
StartLimitBurst=30
|
||||
{% if container_binary == 'podman' %}
|
||||
Type=forking
|
||||
PIDFile=/%t/%n-pid
|
||||
{% endif %}
|
||||
KillMode=none
|
||||
Restart=always
|
||||
RestartSec=10s
|
||||
TimeoutStartSec=120
|
||||
TimeoutStopSec=10
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
|
@ -66,6 +66,10 @@
|
|||
when: iscsi_gw_group_name in group_names
|
||||
listen: "restart ceph rbd-target-api-gw"
|
||||
|
||||
- name: ceph crash handler
|
||||
include_tasks: handler_crash.yml
|
||||
listen: "restart ceph crash"
|
||||
|
||||
- name: remove tempdir for scripts
|
||||
file:
|
||||
path: "{{ tmpdirpath.path }}"
|
||||
|
|
|
@ -78,3 +78,10 @@
|
|||
failed_when: false
|
||||
check_mode: no
|
||||
when: inventory_hostname in groups.get(iscsi_gw_group_name, [])
|
||||
|
||||
- name: check for a ceph-crash container
|
||||
command: "{{ container_binary }} ps -q --filter='name=ceph-crash-{{ ansible_hostname }}'"
|
||||
register: ceph_crash_container_stat
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
check_mode: no
|
|
@ -216,3 +216,10 @@
|
|||
failed_when: false
|
||||
check_mode: no
|
||||
when: inventory_hostname in groups.get(iscsi_gw_group_name, [])
|
||||
|
||||
- name: check for a ceph-crash process
|
||||
command: pgrep ceph-crash
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
check_mode: no
|
||||
register: crash_process
|
|
@ -0,0 +1,18 @@
|
|||
---
|
||||
- name: set _crash_handler_called before restart
|
||||
set_fact:
|
||||
_crash_handler_called: True
|
||||
|
||||
- name: restart the ceph-crash service
|
||||
systemd:
|
||||
name: ceph-crash@{{ ansible_hostname }}
|
||||
state: restarted
|
||||
enabled: yes
|
||||
masked: no
|
||||
daemon_reload: yes
|
||||
ignore_errors: true
|
||||
when: hostvars[inventory_hostname]['_crash_handler_called'] | default(False) | bool
|
||||
|
||||
- name: set _crash_handler_called after restart
|
||||
set_fact:
|
||||
_crash_handler_called: False
|
|
@ -37,3 +37,14 @@
|
|||
set_fact:
|
||||
handler_mgr_status: "{{ (mgr_socket_stat.get('rc') == 0) if not containerized_deployment | bool else (ceph_mgr_container_stat.get('rc') == 0 and ceph_mgr_container_stat.get('stdout_lines', []) | length != 0) }}"
|
||||
when: inventory_hostname in groups.get(mgr_group_name, [])
|
||||
|
||||
- name: set_fact handler_crash_status
|
||||
set_fact:
|
||||
handler_crash_status: "{{ crash_process.get('rc') == 0 if not containerized_deployment | bool else (ceph_crash_container_stat.get('rc') == 0 and ceph_crash_container_stat.get('stdout_lines', []) | length != 0) }}"
|
||||
when:
|
||||
- inventory_hostname in groups.get(mon_group_name, [])
|
||||
or inventory_hostname in groups.get(mgr_group_name, [])
|
||||
or inventory_hostname in groups.get(osd_group_name, [])
|
||||
or inventory_hostname in groups.get(mds_group_name, [])
|
||||
or inventory_hostname in groups.get(rgw_group_name, [])
|
||||
or inventory_hostname in groups.get(rbdmirror_group_name, [])
|
|
@ -424,6 +424,30 @@
|
|||
- dashboard_enabled | bool
|
||||
- groups.get(grafana_server_group_name, []) | length > 0
|
||||
|
||||
- hosts:
|
||||
- mons
|
||||
- osds
|
||||
- mdss
|
||||
- rgws
|
||||
- rbdmirrors
|
||||
- mgrs
|
||||
|
||||
gather_facts: false
|
||||
become: True
|
||||
any_errors_fatal: true
|
||||
|
||||
tasks:
|
||||
- import_role:
|
||||
name: ceph-defaults
|
||||
- import_role:
|
||||
name: ceph-facts
|
||||
tasks_from: container_binary.yml
|
||||
- import_role:
|
||||
name: ceph-handler
|
||||
- import_role:
|
||||
name: ceph-crash
|
||||
|
||||
|
||||
- hosts: mons
|
||||
gather_facts: false
|
||||
become: True
|
||||
|
|
|
@ -446,6 +446,29 @@
|
|||
- dashboard_enabled | bool
|
||||
- groups.get(grafana_server_group_name, []) | length > 0
|
||||
|
||||
- hosts:
|
||||
- mons
|
||||
- osds
|
||||
- mdss
|
||||
- rgws
|
||||
- rbdmirrors
|
||||
- mgrs
|
||||
|
||||
gather_facts: false
|
||||
become: True
|
||||
any_errors_fatal: true
|
||||
|
||||
tasks:
|
||||
- import_role:
|
||||
name: ceph-defaults
|
||||
- import_role:
|
||||
name: ceph-facts
|
||||
tasks_from: container_binary.yml
|
||||
- import_role:
|
||||
name: ceph-handler
|
||||
- import_role:
|
||||
name: ceph-crash
|
||||
|
||||
- hosts: mons
|
||||
gather_facts: false
|
||||
become: True
|
||||
|
|
|
@ -127,6 +127,9 @@ def node(host, request):
|
|||
request.function, group_names)
|
||||
pytest.skip(reason)
|
||||
|
||||
if request.node.get_closest_marker('ceph_crash') and group_names in [['nfss'], ['iscsigws'], ['clients'], ['grafana-server']]:
|
||||
pytest.skip('Not a valid test for nfs, client or iscsigw nodes')
|
||||
|
||||
if request.node.get_closest_marker("no_docker") and docker:
|
||||
pytest.skip(
|
||||
"Not a valid test for containerized deployments or atomic hosts")
|
||||
|
|
|
@ -29,3 +29,18 @@ class TestCephConf(object):
|
|||
if pattern.search(mon_host_line) is None:
|
||||
result = False
|
||||
assert result
|
||||
|
||||
class TestCephCrash(object):
|
||||
@pytest.mark.no_docker
|
||||
@pytest.mark.ceph_crash
|
||||
def test_ceph_crash_service_enabled_and_running(self, node, host):
|
||||
s = host.service("ceph-crash")
|
||||
assert s.is_enabled
|
||||
assert s.is_running
|
||||
|
||||
@pytest.mark.docker
|
||||
@pytest.mark.ceph_crash
|
||||
def test_ceph_crash_service_enabled_and_running_container(self, node, host):
|
||||
s = host.service("ceph-crash@{hostname}".format(hostname=node["vars"]["inventory_hostname"]))
|
||||
assert s.is_enabled
|
||||
assert s.is_running
|
Loading…
Reference in New Issue