ceph-exporter: add installation role

Signed-off-by: Seena Fallah <seenafallah@gmail.com>
pull/7491/head
Seena Fallah 2024-03-04 02:03:19 +01:00 committed by Guillaume Abrioux
parent c8eeae243e
commit 2b72ea991d
25 changed files with 480 additions and 2 deletions

View File

@ -0,0 +1,30 @@
---
# Variables here are applicable to all host groups NOT roles
# This sample file generated by generate_group_vars_sample.sh
# Dummy variable to avoid error because ansible does not recognize the
# file as a good configuration file when no variable in it.
dummy:
###########
# GENERAL #
###########
#ceph_exporter_addr: "0.0.0.0"
#ceph_exporter_port: 9926
#ceph_exporter_stats_period: 5 # seconds
##########
# DOCKER #
##########
# If you want to add parameters, you should retain the existing ones and include the new ones.
#ceph_exporter_container_params:
# args:
# - -f
# - --sock-dir=/var/run/ceph
# - --addrs={{ ceph_exporter_addr }}
# - --port={{ ceph_exporter_port }}
# - --stats-period={{ ceph_exporter_stats_period }}

View File

@ -26,7 +26,6 @@ dummy:
# Resource limitation
# For the whole list of limits you can apply see: docs.docker.com/engine/admin/resource_constraints
# Default values are based from: https://access.redhat.com/documentation/en-us/red_hat_ceph_storage/2/html/red_hat_ceph_storage_hardware_guide/minimum_recommendations
# These options can be passed using the 'ceph_mds_docker_extra_env' variable.
#ceph_mds_docker_memory_limit: "{{ ansible_facts['memtotal_mb'] }}m"
#ceph_mds_docker_cpu_limit: 4

View File

@ -101,6 +101,11 @@
name: ceph-crash
when: containerized_deployment | bool
- name: Import ceph-exporter role
ansible.builtin.import_role:
name: ceph-exporter
when: containerized_deployment | bool
- name: Update config file on OSD nodes
hosts: osds
gather_facts: true

View File

@ -1187,6 +1187,43 @@
CEPHADM_IMAGE: '{{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }}'
- name: Redeploy ceph-exporter daemons
hosts:
- "{{ mon_group_name|default('mons') }}"
- "{{ osd_group_name|default('osds') }}"
- "{{ mds_group_name|default('mdss') }}"
- "{{ rgw_group_name|default('rgws') }}"
- "{{ mgr_group_name|default('mgrs') }}"
- "{{ rbdmirror_group_name|default('rbdmirrors') }}"
become: true
gather_facts: false
any_errors_fatal: true
tasks:
- name: Import ceph-defaults role
ansible.builtin.import_role:
name: ceph-defaults
- name: Stop and disable ceph-exporter systemd service
ansible.builtin.service:
name: "{{ 'ceph-exporter@' + ansible_facts['hostname'] if containerized_deployment | bool else 'ceph-exporter.service' }}"
state: stopped
enabled: false
failed_when: false
- name: Remove ceph-exporter systemd unit file
ansible.builtin.file:
path: /etc/systemd/system/ceph-exporter@.service
state: absent
- name: Update the placement of ceph-exporter hosts
ansible.builtin.command: "{{ cephadm_cmd }} shell -k /etc/ceph/{{ cluster }}.client.admin.keyring --fsid {{ fsid }} -- ceph orch apply ceph-exporter --placement='label:ceph'"
run_once: true
changed_when: false
delegate_to: '{{ groups[mon_group_name][0] }}'
environment:
CEPHADM_IMAGE: '{{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }}'
- name: Redeploy alertmanager/grafana/prometheus daemons
hosts: "{{ monitoring_group_name|default('monitoring') }}"
serial: 1

View File

@ -312,6 +312,12 @@
environment:
CEPHADM_IMAGE: '{{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }}'
- name: Update the placement of ceph-exporter hosts
ansible.builtin.command: "{{ cephadm_cmd }} shell -- ceph --cluster {{ cluster }} orch apply ceph-exporter --placement='label:ceph'"
changed_when: false
environment:
CEPHADM_IMAGE: '{{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }}'
- name: Adjust monitoring service placement
hosts: "{{ monitoring_group_name|default('monitoring') }}"
become: true

View File

@ -194,6 +194,17 @@
inventory_hostname in groups.get(mgr_group_name, []) or
inventory_hostname in groups.get(rbdmirror_group_name, [])
- name: Import ceph-exporter role
ansible.builtin.import_role:
name: ceph-exporter
tasks_from: systemd.yml
when: inventory_hostname in groups.get(mon_group_name, []) or
inventory_hostname in groups.get(osd_group_name, []) or
inventory_hostname in groups.get(mds_group_name, []) or
inventory_hostname in groups.get(rgw_group_name, []) or
inventory_hostname in groups.get(mgr_group_name, []) or
inventory_hostname in groups.get(rbdmirror_group_name, [])
- name: Dashboard configuration
when: dashboard_enabled | bool
block:

View File

@ -726,6 +726,40 @@
state: absent
- name: Purge ceph-exporter daemons
hosts:
- mons
- osds
- mdss
- rgws
- rbdmirrors
- mgrs
gather_facts: false
become: true
tasks:
- name: Import ceph-defaults role
ansible.builtin.import_role:
name: ceph-defaults
- name: Stop ceph-exporter service
ansible.builtin.service:
name: "{{ 'ceph-exporter@' + ansible_facts['hostname'] if containerized_deployment | bool else 'ceph-exporter.service' }}"
state: stopped
enabled: false
failed_when: false
- name: Systemctl reset-failed ceph-exporter # noqa command-instead-of-module
ansible.builtin.command: "systemctl reset-failed {{ 'ceph-exporter@' + ansible_facts['hostname'] if containerized_deployment | bool else 'ceph-exporter.service' }}"
changed_when: false
failed_when: false
- name: Remove service file
ansible.builtin.file:
name: "/etc/systemd/system/ceph-exporter{{ '@' if containerized_deployment | bool else '' }}.service"
state: absent
failed_when: false
- name: Check container hosts
hosts:
- mons

View File

@ -1143,6 +1143,50 @@
ansible.builtin.import_role:
name: ceph-crash
- name: Upgrade ceph-exporter daemons
hosts:
- "{{ mon_group_name | default('mons') }}"
- "{{ osd_group_name | default('osds') }}"
- "{{ mds_group_name | default('mdss') }}"
- "{{ rgw_group_name | default('rgws') }}"
- "{{ rbdmirror_group_name | default('rbdmirrors') }}"
- "{{ mgr_group_name | default('mgrs') }}"
tags:
- post_upgrade
- ceph-exporter
gather_facts: false
become: true
tasks:
- name: Exit ceph-exporter upgrade if non containerized deployment
ansible.builtin.meta: end_play
when: not containerized_deployment | bool
- name: Stop the ceph-exporter service
ansible.builtin.systemd:
name: "{{ 'ceph-exporter@' + ansible_facts['hostname'] if containerized_deployment | bool else 'ceph-exporter.service' }}"
state: stopped
# it needs to be done in a separate task otherwise the stop just before doesn't work.
- name: Mask and disable the ceph-exporter service
ansible.builtin.systemd:
name: "{{ 'ceph-exporter@' + ansible_facts['hostname'] if containerized_deployment | bool else 'ceph-exporter.service' }}"
enabled: false
masked: true
- name: Import ceph-defaults role
ansible.builtin.import_role:
name: ceph-defaults
- name: Import ceph-facts role
ansible.builtin.import_role:
name: ceph-facts
tasks_from: container_binary.yml
- name: Import ceph-handler role
ansible.builtin.import_role:
name: ceph-handler
- name: Import ceph-exporter role
ansible.builtin.import_role:
name: ceph-exporter
- name: Complete upgrade
hosts: "{{ mon_group_name | default('mons') }}"
tags: post_upgrade

View File

@ -755,6 +755,37 @@
ansible.builtin.import_role:
name: ceph-crash
- name: Switching from non-containerized to containerized ceph-exporter
hosts:
- "{{ mon_group_name | default('mons') }}"
- "{{ osd_group_name | default('osds') }}"
- "{{ mds_group_name | default('mdss') }}"
- "{{ rgw_group_name | default('rgws') }}"
- "{{ rbdmirror_group_name | default('rbdmirrors') }}"
- "{{ mgr_group_name | default('mgrs') }}"
vars:
containerized_deployment: true
become: true
tasks:
- name: Import ceph-defaults role
ansible.builtin.import_role:
name: ceph-defaults
- name: Import ceph-facts role
ansible.builtin.import_role:
name: ceph-facts
tasks_from: container_binary.yml
- name: Import ceph-handler role
ansible.builtin.import_role:
name: ceph-handler
- name: Import ceph-exporter role
ansible.builtin.import_role:
name: ceph-exporter
- name: Final task
hosts:
- "{{ mon_group_name|default('mons') }}"

View File

@ -34,6 +34,7 @@ class CallbackModule(CallbackBase):
'installer_phase_ceph_grafana',
'installer_phase_ceph_node_exporter',
'installer_phase_ceph_crash',
'installer_phase_ceph_exporter',
]
# Define the attributes of the installer phases
@ -90,6 +91,10 @@ class CallbackModule(CallbackBase):
'title': 'Install Ceph Crash',
'playbook': 'roles/ceph-crash/tasks/main.yml'
},
'installer_phase_ceph_exporter': {
'title': 'Install Ceph Exporter',
'playbook': 'roles/ceph-exporter/tasks/main.yml'
},
}
# Find the longest phase title

View File

@ -0,0 +1,24 @@
---
###########
# GENERAL #
###########
ceph_exporter_addr: "0.0.0.0"
ceph_exporter_port: 9926
ceph_exporter_stats_period: 5 # seconds
ceph_exporter_prio_limit: 5
##########
# DOCKER #
##########
# If you want to add parameters, you should retain the existing ones and include the new ones.
ceph_exporter_container_params:
args:
- -f
- -n=client.ceph-exporter
- --sock-dir=/var/run/ceph
- --addrs={{ ceph_exporter_addr }}
- --port={{ ceph_exporter_port }}
- --stats-period={{ ceph_exporter_stats_period }}
- --prio-limit={{ ceph_exporter_prio_limit }}

View File

@ -0,0 +1,14 @@
---
galaxy_info:
company: Red Hat
author: Guillaume Abrioux
description: Deploy ceph-exporter
license: Apache
min_ansible_version: '2.7'
platforms:
- name: EL
versions:
- 'all'
galaxy_tags:
- system
dependencies: []

View File

@ -0,0 +1,58 @@
---
- name: Create and copy client.ceph-exporter keyring
when: cephx | bool
block:
- name: Create client.ceph-exporter keyring
ceph_key:
name: "client.ceph-exporter"
caps:
mon: 'allow r'
mgr: 'allow r'
osd: 'allow r'
cluster: "{{ cluster }}"
dest: "{{ ceph_conf_key_directory }}"
import_key: true
mode: "{{ ceph_keyring_permissions }}"
owner: "{{ ceph_uid if containerized_deployment | bool else 'ceph' }}"
group: "{{ ceph_uid if containerized_deployment | bool else 'ceph' }}"
environment:
CEPH_CONTAINER_IMAGE: "{{ ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment | bool else None }}"
CEPH_CONTAINER_BINARY: "{{ container_binary }}"
delegate_to: "{{ groups.get(mon_group_name, [])[0] }}"
run_once: true
no_log: "{{ no_log_on_ceph_key_tasks }}"
- name: Get keys from monitors
ceph_key:
name: client.ceph-exporter
cluster: "{{ cluster }}"
output_format: plain
state: info
environment:
CEPH_CONTAINER_IMAGE: "{{ ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment | bool else None }}"
CEPH_CONTAINER_BINARY: "{{ container_binary }}"
register: _exporter_keys
delegate_to: "{{ groups.get(mon_group_name)[0] }}"
run_once: true
no_log: "{{ no_log_on_ceph_key_tasks }}"
- name: Copy ceph key(s) if needed
ansible.builtin.copy:
dest: "{{ ceph_conf_key_directory }}/{{ cluster }}.client.ceph-exporter.keyring"
content: "{{ _exporter_keys.stdout + '\n' }}"
owner: "{{ ceph_uid if containerized_deployment | bool else 'ceph' }}"
group: "{{ ceph_uid if containerized_deployment | bool else 'ceph' }}"
mode: "{{ ceph_keyring_permissions }}"
no_log: "{{ no_log_on_ceph_key_tasks }}"
- name: Include_tasks systemd.yml
ansible.builtin.include_tasks: systemd.yml
when: containerized_deployment | bool
- name: Start the ceph-exporter service
ansible.builtin.systemd:
name: "{{ 'ceph-exporter@' + ansible_facts['hostname'] if containerized_deployment | bool else 'ceph-exporter.service' }}"
state: started
enabled: true
masked: false
daemon_reload: true

View File

@ -0,0 +1,9 @@
---
- name: Generate systemd unit file for ceph-exporter container
ansible.builtin.template:
src: "{{ role_path }}/templates/ceph-exporter.service.j2"
dest: /etc/systemd/system/ceph-exporter@.service
owner: "root"
group: "root"
mode: "0644"
notify: Restart ceph exporter

View File

@ -0,0 +1,50 @@
[Unit]
Description=Ceph exporter
{% if container_binary == 'docker' %}
After=docker.service network-online.target local-fs.target time-sync.target
Requires=docker.service
{% else %}
After=network-online.target local-fs.target time-sync.target
{% endif %}
Wants=network-online.target local-fs.target time-sync.target
[Service]
{% if container_binary == 'podman' %}
ExecStartPre=-/usr/bin/rm -f /%t/%n-pid /%t/%n-cid
ExecStartPre=-/usr/bin/{{ container_binary }} rm --storage ceph-exporter-%i
{% endif %}
ExecStartPre=-/usr/bin/{{ container_binary }} rm -f ceph-exporter-%i
ExecStart=/usr/bin/{{ container_binary }} run --rm --name ceph-exporter-%i \
{% if container_binary == 'podman' %}
-d --log-driver journald --conmon-pidfile /%t/%n-pid --cidfile /%t/%n-cid \
{% endif %}
--pids-limit={{ 0 if container_binary == 'podman' else -1 }} \
--security-opt label=disable \
--net=host \
{% for v in ceph_common_container_params['volumes'] + ceph_exporter_container_params['volumes'] | default([]) %}
-v {{ v }} \
{% endfor %}
{% for k, v in (ceph_common_container_params['envs'] | combine(ceph_exporter_container_params['envs'] | default({}))).items() %}
-e {{ k }}={{ v }} \
{% endfor %}
--entrypoint=/usr/bin/ceph-exporter {{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }} \
{{ (ceph_common_container_params['args'] + ceph_exporter_container_params['args'] | default([])) | join(' ') }}
{% if container_binary == 'podman' %}
ExecStop=-/usr/bin/sh -c "/usr/bin/{{ container_binary }} rm -f `cat /%t/%n-cid`"
{% else %}
ExecStop=-/usr/bin/{{ container_binary }} stop ceph-exporter-%i
{% endif %}
StartLimitInterval=10min
StartLimitBurst=30
{% if container_binary == 'podman' %}
Type=forking
PIDFile=/%t/%n-pid
{% endif %}
KillMode=none
Restart=always
RestartSec=10s
TimeoutStartSec=120
TimeoutStopSec=10
[Install]
WantedBy=ceph.target

View File

@ -65,6 +65,17 @@
or inventory_hostname in groups.get(rgw_group_name, [])
or inventory_hostname in groups.get(rbdmirror_group_name, [])
- name: Ceph exporter handler
ansible.builtin.include_tasks: handler_exporter.yml
listen: "Restart ceph exporter"
when:
- inventory_hostname in groups.get(mon_group_name, [])
or inventory_hostname in groups.get(mgr_group_name, [])
or inventory_hostname in groups.get(osd_group_name, [])
or inventory_hostname in groups.get(mds_group_name, [])
or inventory_hostname in groups.get(rgw_group_name, [])
or inventory_hostname in groups.get(rbdmirror_group_name, [])
- name: Remove tempdir for scripts
ansible.builtin.file:
path: "{{ tmpdirpath.path }}"
@ -80,6 +91,7 @@
register: tmpdirpath
when:
- tmpdirpath.path is defined
- not _exporter_handler_called | default(false) | bool
- not _crash_handler_called | default(false) | bool
- not _mds_handler_called | default(false) | bool
- not _mgr_handler_called | default(false) | bool

View File

@ -68,3 +68,17 @@
or inventory_hostname in groups.get(mds_group_name, [])
or inventory_hostname in groups.get(rgw_group_name, [])
or inventory_hostname in groups.get(rbdmirror_group_name, [])
- name: Check for a ceph-exporter container
ansible.builtin.command: "{{ container_binary }} ps -q --filter='name=ceph-exporter-{{ ansible_facts['hostname'] }}'"
register: ceph_exporter_container_stat
changed_when: false
failed_when: false
check_mode: false
when:
- inventory_hostname in groups.get(mon_group_name, [])
or inventory_hostname in groups.get(mgr_group_name, [])
or inventory_hostname in groups.get(osd_group_name, [])
or inventory_hostname in groups.get(mds_group_name, [])
or inventory_hostname in groups.get(rgw_group_name, [])
or inventory_hostname in groups.get(rbdmirror_group_name, [])

View File

@ -218,3 +218,17 @@
or inventory_hostname in groups.get(mds_group_name, [])
or inventory_hostname in groups.get(rgw_group_name, [])
or inventory_hostname in groups.get(rbdmirror_group_name, [])
- name: Check for a ceph-exporter process
ansible.builtin.command: pgrep ceph-exporter
changed_when: false
failed_when: false
check_mode: false
register: exporter_process
when:
- inventory_hostname in groups.get(mon_group_name, [])
or inventory_hostname in groups.get(mgr_group_name, [])
or inventory_hostname in groups.get(osd_group_name, [])
or inventory_hostname in groups.get(mds_group_name, [])
or inventory_hostname in groups.get(rgw_group_name, [])
or inventory_hostname in groups.get(rbdmirror_group_name, [])

View File

@ -0,0 +1,18 @@
---
- name: Set _exporter_handler_called before restart
ansible.builtin.set_fact:
_exporter_handler_called: true
- name: Restart the ceph-exporter service # noqa: ignore-errors
ansible.builtin.systemd:
name: ceph-exporter@{{ ansible_facts['hostname'] }}
state: restarted
enabled: true
masked: false
daemon_reload: true
ignore_errors: true
when: hostvars[inventory_hostname]['_exporter_handler_called'] | default(False) | bool
- name: Set _exporter_handler_called after restart
ansible.builtin.set_fact:
_exporter_handler_called: false

View File

@ -48,3 +48,14 @@
or inventory_hostname in groups.get(mds_group_name, [])
or inventory_hostname in groups.get(rgw_group_name, [])
or inventory_hostname in groups.get(rbdmirror_group_name, [])
- name: Set_fact handler_exporter_status
ansible.builtin.set_fact:
handler_exporter_status: "{{ exporter_process.get('rc') == 0 if not containerized_deployment | bool else (ceph_exporter_container_stat.get('rc') == 0 and ceph_exporter_container_stat.get('stdout_lines', []) | length != 0) }}"
when:
- inventory_hostname in groups.get(mon_group_name, [])
or inventory_hostname in groups.get(mgr_group_name, [])
or inventory_hostname in groups.get(osd_group_name, [])
or inventory_hostname in groups.get(mds_group_name, [])
or inventory_hostname in groups.get(rgw_group_name, [])
or inventory_hostname in groups.get(rbdmirror_group_name, [])

View File

@ -18,7 +18,6 @@ copy_admin_key: false
# Resource limitation
# For the whole list of limits you can apply see: docs.docker.com/engine/admin/resource_constraints
# Default values are based from: https://access.redhat.com/documentation/en-us/red_hat_ceph_storage/2/html/red_hat_ceph_storage_hardware_guide/minimum_recommendations
# These options can be passed using the 'ceph_mds_docker_extra_env' variable.
ceph_mds_docker_memory_limit: "{{ ansible_facts['memtotal_mb'] }}m"
ceph_mds_docker_cpu_limit: 4

View File

@ -468,6 +468,46 @@
status: "Complete"
end: "{{ lookup('pipe', 'date +%Y%m%d%H%M%SZ') }}"
- hosts:
- mons
- osds
- mdss
- rgws
- rbdmirrors
- mgrs
gather_facts: false
become: True
any_errors_fatal: true
pre_tasks:
- name: set ceph exporter install 'In Progress'
run_once: true
set_stats:
data:
installer_phase_ceph_exporter:
status: "In Progress"
start: "{{ lookup('pipe', 'date +%Y%m%d%H%M%SZ') }}"
tasks:
- import_role:
name: ceph-defaults
- import_role:
name: ceph-facts
tasks_from: container_binary.yml
- import_role:
name: ceph-handler
- import_role:
name: ceph-exporter
post_tasks:
- name: set ceph exporter install 'Complete'
run_once: true
set_stats:
data:
installer_phase_ceph_exporter:
status: "Complete"
end: "{{ lookup('pipe', 'date +%Y%m%d%H%M%SZ') }}"
- hosts: mons[0]
gather_facts: false
become: True

View File

@ -171,6 +171,9 @@ def node(host, request):
if request.node.get_closest_marker('ceph_crash') and sanitized_group_names in [['nfss'], ['clients'], ['monitoring']]:
pytest.skip('Not a valid test for nfs or client nodes')
if request.node.get_closest_marker('ceph_exporter') and sanitized_group_names in [['nfss'], ['clients'], ['monitoring']]:
pytest.skip('Not a valid test for nfs or client nodes')
if request.node.get_closest_marker("no_docker") and docker:
pytest.skip(
"Not a valid test for containerized deployments or atomic hosts")

View File

@ -45,3 +45,12 @@ class TestCephCrash(object):
s = host.service("ceph-crash@{hostname}".format(hostname=node["vars"]["inventory_hostname"]))
assert s.is_enabled
assert s.is_running
class TestCephExporter(object):
@pytest.mark.docker
@pytest.mark.ceph_exporter
def test_ceph_exporter_service_enabled_and_running_container(self, node, host):
s = host.service("ceph-exporter@{hostname}".format(hostname=node["vars"]["inventory_hostname"]))
assert s.is_enabled
assert s.is_running

View File

@ -2,6 +2,7 @@
# dir really is.
[pytest]
markers =
ceph_exporter: environment with ceph exporter enabled
ceph_crash: environment with ceph crash enabled
dashboard: environment with dashboard enabled
no_docker: environment without containers