Merge cephmetrics/dashboard-ansible repo

This commit will merge dashboard-ansible installation scripts with
ceph-ansible. This includes several new roles to setup ceph-dashboard
and the underlying technologies like prometheus and grafana server.

Signed-off-by: Boris Ranto & Zack Cerza <team-gmeno@redhat.com>
Co-authored-by: Zack Cerza <zcerza@redhat.com>
Co-authored-by: Guillaume Abrioux <gabrioux@redhat.com>
pull/3991/head
Boris Ranto 2018-12-05 19:59:47 +01:00 committed by Guillaume Abrioux
parent 5320aa11c4
commit 2f141a6e80
42 changed files with 1294 additions and 1 deletions

View File

@ -69,6 +69,7 @@ dummy:
#ceph_nfs_firewall_zone: public
#ceph_rbdmirror_firewall_zone: public
#ceph_iscsi_firewall_zone: public
#ceph_dashboard_firewall_zone: public
# Generate local ceph.conf in fetch directory
#ceph_conf_local: false
@ -687,6 +688,17 @@ dummy:
# - { name: client.openstack, caps: { mon: "profile rbd", osd: "profile rbd pool={{ openstack_glance_pool.name }}, profile rbd pool={{ openstack_nova_pool.name }}, profile rbd pool={{ openstack_cinder_pool.name }}, profile rbd pool={{ openstack_cinder_backup_pool.name }}"}, mode: "0600" }
#############
# DASHBOARD #
#############
#dashboard_enabled: False
#dashboard_network_name: ceph-dashboard
# Choose http or https
# For https, you should set dashboard.crt/key and grafana.crt/key
#dashboard_protocol: http
#dashboard_port: 8234
###############
# DEPRECATION #
###############

View File

@ -69,6 +69,7 @@ fetch_directory: ~/ceph-ansible-keys
#ceph_nfs_firewall_zone: public
#ceph_rbdmirror_firewall_zone: public
#ceph_iscsi_firewall_zone: public
#ceph_dashboard_firewall_zone: public
# Generate local ceph.conf in fetch directory
#ceph_conf_local: false
@ -687,6 +688,17 @@ ceph_docker_registry: "registry.access.redhat.com"
# - { name: client.openstack, caps: { mon: "profile rbd", osd: "profile rbd pool={{ openstack_glance_pool.name }}, profile rbd pool={{ openstack_nova_pool.name }}, profile rbd pool={{ openstack_cinder_pool.name }}, profile rbd pool={{ openstack_cinder_backup_pool.name }}"}, mode: "0600" }
#############
# DASHBOARD #
#############
#dashboard_enabled: False
#dashboard_network_name: ceph-dashboard
# Choose http or https
# For https, you should set dashboard.crt/key and grafana.crt/key
#dashboard_protocol: http
#dashboard_port: 8234
###############
# DEPRECATION #
###############

View File

@ -42,12 +42,130 @@
- "{{ nfs_group_name|default('nfss') }}"
- "{{ client_group_name|default('clients') }}"
- "{{ mgr_group_name|default('mgrs') }}"
- grafana-server
become: true
tasks:
- debug: msg="gather facts on all Ceph hosts for following reference"
- name: purge node-exporter
hosts:
- "{{ mon_group_name|default('mons') }}"
- "{{ osd_group_name|default('osds') }}"
- "{{ mds_group_name|default('mdss') }}"
- "{{ rgw_group_name|default('rgws') }}"
- "{{ rbdmirror_group_name|default('rbdmirrors') }}"
- "{{ nfs_group_name|default('nfss') }}"
- "{{ client_group_name|default('clients') }}"
- "{{ mgr_group_name|default('mgrs') }}"
- agents
- grafana-server
- clients
- iscsigws
- iscsi-gws # for backward compatibility only!
become: true
tasks:
- name: set ceph_docker_registry value if not set
set_fact:
ceph_docker_registry: "docker.io"
when: ceph_docker_registry is not defined
- name: disable node_exporter service
service:
name: node_exporter
state: stopped
enabled: no
failed_when: false
- name: remove node-exporter container
docker_container:
name: node_exporter
state: absent
failed_when: false
- name: remove node_exporter service file
file:
name: /etc/systemd/system/node_exporter.service
state: absent
- name: remove node-exporter image
docker_image:
image: "{{ ceph_docker_registry }}/prom/node-exporter"
state: absent
force: yes
tags:
- remove_img
failed_when: false
- name: purge ceph grafana-server
hosts: grafana-server
become: true
vars:
grafana_services:
- grafana-server
- prometheus
- alertmanager
tasks:
- name: set ceph_docker_registry value if not set
set_fact:
ceph_docker_registry: "docker.io"
when: ceph_docker_registry is not defined
- name: stop services
service:
name: "{{ item }}"
state: stopped
enabled: no
with_items: "{{ grafana_services }}"
failed_when: false
- name: remove containers
docker_container:
name: "{{ item }}"
state: absent
with_items: "{{ grafana_services }}"
failed_when: false
- name: remove service files
file:
name: "/etc/systemd/system/{{ item }}.service"
state: absent
with_items: "{{ grafana_services }}"
failed_when: false
- name: remove images
docker_image:
name: "{{ item }}"
state: absent
force: yes
with_items:
- "{{ ceph_docker_registry }}/prom/prometheus"
- "{{ ceph_docker_registry }}/grafana/grafana"
- "{{ ceph_docker_registry }}/prom/alertmanager"
failed_when: false
- name: remove data
file:
name: "{{ item }}"
state: absent
with_items:
- /etc/grafana/dashboards
- /etc/grafana/grafana.ini
- /etc/grafana/provisioning
- /var/lib/grafana
- /etc/alertmanager
- /var/lib/alertmanager
- /var/lib/prometheus
- /etc/prometheus
failed_when: false
- name: purge ceph mds cluster
vars:
@ -445,6 +563,7 @@
- ceph-release
- ceph-radosgw
- calamari-server
- ceph-grafana-dashboards
ceph_remaining_packages:
- libcephfs1
@ -466,6 +585,7 @@
- "{{ nfs_group_name|default('nfss') }}"
- "{{ client_group_name|default('clients') }}"
- "{{ mgr_group_name|default('mgrs') }}"
- grafana-server
gather_facts: false # Already gathered previously

View File

@ -444,6 +444,128 @@
tags: remove_img
ignore_errors: true
- name: purge node-exporter
hosts:
- "{{ mon_group_name|default('mons') }}"
- "{{ osd_group_name|default('osds') }}"
- "{{ mds_group_name|default('mdss') }}"
- "{{ rgw_group_name|default('rgws') }}"
- "{{ rbdmirror_group_name|default('rbdmirrors') }}"
- "{{ nfs_group_name|default('nfss') }}"
- "{{ mgr_group_name|default('mgrs') }}"
- agents
- grafana-server
- iscsigws
- iscsi-gws # for backward compatibility only!
- clients
gather_facts: false
become: true
tasks:
- name: set ceph_docker_registry value if not set
set_fact:
ceph_docker_registry: "docker.io"
when: ceph_docker_registry is not defined
- name: disable node_exporter service
service:
name: node_exporter
state: stopped
enabled: no
failed_when: false
- name: remove node-exporter container
docker_container:
name: node_exporter
state: absent
failed_when: false
- name: remove node_exporter service file
file:
name: /etc/systemd/system/node_exporter.service
state: absent
- name: remove node-exporter image
docker_image:
image: "{{ ceph_docker_registry }}/prom/node-exporter"
state: absent
force: yes
tags:
- remove_img
failed_when: false
- name: purge ceph-grafana
hosts: grafana-server
gather_facts: false
become: true
vars:
grafana_services:
- grafana-server
- prometheus
- alertmanager
tasks:
- name: set ceph_docker_registry value if not set
set_fact:
ceph_docker_registry: "docker.io"
when: ceph_docker_registry is not defined
- name: stop services
service:
name: "{{ item }}"
state: stopped
enabled: no
with_items: "{{ grafana_services }}"
failed_when: false
- name: remove containers
docker_container:
name: "{{ item }}"
state: absent
with_items: "{{ grafana_services }}"
failed_when: false
- name: remove service files
file:
name: "/etc/systemd/system/{{ item }}.service"
state: absent
with_items: "{{ grafana_services }}"
failed_when: false
- name: remove images
docker_image:
name: "{{ item }}"
state: absent
force: yes
with_items:
- "{{ ceph_docker_registry }}/prom/prometheus"
- "{{ ceph_docker_registry }}/grafana/grafana"
- "{{ ceph_docker_registry }}/prom/alertmanager"
failed_when: false
- name: remove data
file:
name: "{{ item }}"
state: absent
with_items:
- /etc/grafana/grafana.ini
- /etc/grafana/provisioning
- /var/lib/grafana
- /etc/alertmanager
- /var/lib/alertmanager
- /var/lib/prometheus
- /etc/prometheus
failed_when: false
- name: check container hosts
hosts:

View File

@ -18,6 +18,7 @@
- name: include fetch_image.yml
include_tasks: fetch_image.yml
tags: fetch_container_image
when: containerized_deployment
- name: get ceph version
command: >
@ -27,10 +28,13 @@
changed_when: false
check_mode: no
register: ceph_version
when: containerized_deployment
- name: set_fact ceph_version ceph_version.stdout.split
set_fact:
ceph_version: "{{ ceph_version.stdout.split(' ')[2] }}"
when: containerized_deployment
- name: include release.yml
include_tasks: release.yml
when: containerized_deployment

View File

@ -0,0 +1,12 @@
---
dashboard_admin_user: admin
dashboard_admin_password: admin
# We only need this for SSL (https) connections
dashboard_crt: ''
dashboard_key: ''
dashboard_rgw_api_user_id: ceph-dashboard
dashboard_rgw_api_host: ''
dashboard_rgw_api_port: ''
dashboard_rgw_api_scheme: ''
dashboard_rgw_api_admin_resource: ''
dashboard_rgw_api_no_ssl_verify: ''

View File

@ -0,0 +1,14 @@
---
galaxy_info:
company: Red Hat
author: Boris Ranto
description: Configures Ceph Dashboard
license: Apache
min_ansible_version: 2.4
platforms:
- name: EL
versions:
- 7
galaxy_tags:
- system
dependencies: []

View File

@ -0,0 +1,162 @@
---
- name: set mgr_prefix default
set_fact:
mgr_prefix: ""
- block:
- name: check to see if the mgr is containerized
command: "{{ container_binary }} inspect ceph-mgr-{{ ansible_hostname }}"
register: mgr_container
failed_when: false
changed_when: false
- name: choose the correct container name
set_fact:
container_name: "{% if mgr_container.rc == 0 %}ceph-mgr-{{ ansible_hostname }}{% endif %}"
- name: prefix the mgr command with a {{ container_binary }} command
set_fact:
mgr_prefix: "{{ container_binary }} exec {{ container_name }}"
when: container_name != ""
when: container_binary != ""
- name: disable SSL for dashboard
shell: |
{{ mgr_prefix }} ceph config set mgr mgr/dashboard/ssl false || \
{{ mgr_prefix }} ceph config-key set mgr/dashboard/ssl false
when: dashboard_protocol != "https"
- name: enable SSL for dashboard
shell: |
{{ mgr_prefix }} ceph config set mgr mgr/dashboard/ssl true || \
{{ mgr_prefix }} ceph config-key set mgr/dashboard/ssl true
when: dashboard_protocol == "https"
- name: copy dashboard SSL certificate file
copy:
src: "{{ dashboard_crt }}"
dest: "/etc/ceph/ceph-dashboard.crt"
owner: root
group: root
mode: 0644
when:
- dashboard_crt
- dashboard_protocol == "https"
- name: copy dashboard SSL certificate key
copy:
src: "{{ dashboard_key }}"
dest: "/etc/ceph/ceph-dashboard.key"
owner: root
group: root
mode: 0644
when:
- dashboard_key
- dashboard_protocol == "https"
- name: generate a Self Signed OpenSSL certificate for dashboard
shell: |
test -f /etc/ceph/ceph-dashboard.key -a -f /etc/ceph/ceph-dashboard.crt || \
openssl req -new -nodes -x509 -subj '/O=IT/CN=ceph-dashboard' -days 3650 -keyout /etc/ceph/ceph-dashboard.key -out /etc/ceph/ceph-dashboard.crt -extensions v3_ca
when:
- dashboard_protocol == "https"
- not dashboard_key or not dashboard_crt
- name: import dashboard certificate file
command: "{{ mgr_prefix }} ceph config-key set mgr/dashboard/crt -i /etc/ceph/ceph-dashboard.crt"
changed_when: false
when: dashboard_protocol == "https"
- name: import dashboard certificate key
command: "{{ mgr_prefix }} ceph config-key set mgr/dashboard/key -i /etc/ceph/ceph-dashboard.key"
changed_when: false
when: dashboard_protocol == "https"
- name: "set the dashboard port ({{ dashboard_port }})"
shell: |
{{ mgr_prefix }} ceph config set mgr mgr/dashboard/server_port {{ dashboard_port }} || \
{{ mgr_prefix }} ceph config-key set mgr/dashboard/server_port {{ dashboard_port }}
- name: disable mgr dashboard module (restart)
command: "{{ mgr_prefix }} ceph mgr module disable dashboard"
changed_when: false
- name: enable mgr dashboard module (restart)
command: "{{ mgr_prefix }} ceph mgr module enable dashboard"
changed_when: false
- name: set or update dashboard admin username and password
shell: |
if {{ mgr_prefix }} ceph dashboard ac-user-show {{ dashboard_admin_user }}; then
{{ mgr_prefix }} ceph dashboard ac-user-set-password {{ dashboard_admin_user }} {{ dashboard_admin_password }}
else
{{ mgr_prefix }} ceph dashboard ac-user-create {{ dashboard_admin_user }} {{ dashboard_admin_password }} administrator
fi
retries: 6
delay: 5
register: ac_result
until: ac_result.rc == 0
- name: set grafana url
command: "{{ mgr_prefix }} ceph dashboard set-grafana-api-url {{ dashboard_protocol }}://{{ groups['grafana-server'][0] }}:3000/"
changed_when: false
- name: set alertmanager host
command: "{{ mgr_prefix }} ceph dashboard set-alertmanager-api-host {{ dashboard_protocol }}://{{ groups['grafana-server'][0] }}:9093/"
changed_when: false
- name: create radosgw system user
shell: "timeout 20 {{ mgr_prefix }} radosgw-admin user create --uid={{ dashboard_rgw_api_user_id }} --display-name='Ceph dashboard' --system"
register: rgw_user_output
until: rgw_user_output.rc == 0
retries: 3
- name: get the rgw access and secret keys
set_fact:
rgw_access_key: "{{ (rgw_user_output.stdout | from_json)['keys'][0]['access_key'] }}"
rgw_secret_key: "{{ (rgw_user_output.stdout | from_json)['keys'][0]['secret_key'] }}"
- name: set the rgw user
command: "{{ mgr_prefix }} ceph dashboard set-rgw-api-user-id {{ dashboard_rgw_api_user_id }}"
changed_when: false
- name: set the rgw access key
command: "{{ mgr_prefix }} ceph dashboard set-rgw-api-access-key {{ rgw_access_key }}"
changed_when: false
- name: set the rgw secret key
command: "{{ mgr_prefix }} ceph dashboard set-rgw-api-secret-key {{ rgw_secret_key }}"
changed_when: false
- name: set the rgw host
command: "{{ mgr_prefix }} ceph dashboard set-rgw-api-host {{ dashboard_rgw_api_host }}"
changed_when: false
when: dashboard_rgw_api_host
- name: set the rgw port
command: "{{ mgr_prefix }} ceph dashboard set-rgw-api-port {{ dashboard_rgw_api_port }}"
changed_when: false
when: dashboard_rgw_api_port
- name: set the rgw scheme
command: "{{ mgr_prefix }} ceph dashboard set-rgw-api-scheme {{ dashboard_rgw_api_scheme }}"
changed_when: false
when: dashboard_rgw_api_scheme
- name: set the rgw admin resource
command: "{{ mgr_prefix }} ceph dashboard set-rgw-api-admin-resource {{ dashboard_rgw_api_admin_resource }}"
changed_when: false
when: dashboard_rgw_api_admin_resource
- name: disable ssl verification for rgw
command: "{{ mgr_prefix }} ceph dashboard set-rgw-api-ssl-verify False"
changed_when: false
when: dashboard_rgw_api_no_ssl_verify
- name: disable mgr dashboard module (restart)
command: "{{ mgr_prefix }} ceph mgr module disable dashboard"
changed_when: false
- name: enable mgr dashboard module (restart)
command: "{{ mgr_prefix }} ceph mgr module enable dashboard"
changed_when: false

View File

@ -0,0 +1,7 @@
---
- name: include configure_dashboard.yml
include_tasks: configure_dashboard.yml
- name: print dashboard URL
debug:
msg: "The dashboard has been deployed! You can access your dashboard web UI at {{ dashboard_protocol }}://{{ ansible_fqdn }}:{{ dashboard_port }}/ as an '{{ dashboard_admin_user }}' user with '{{ dashboard_admin_password }}' password."

View File

@ -61,6 +61,7 @@ ceph_mds_firewall_zone: public
ceph_nfs_firewall_zone: public
ceph_rbdmirror_firewall_zone: public
ceph_iscsi_firewall_zone: public
ceph_dashboard_firewall_zone: public
# Generate local ceph.conf in fetch directory
ceph_conf_local: false
@ -679,6 +680,17 @@ openstack_keys:
- { name: client.openstack, caps: { mon: "profile rbd", osd: "profile rbd pool={{ openstack_glance_pool.name }}, profile rbd pool={{ openstack_nova_pool.name }}, profile rbd pool={{ openstack_cinder_pool.name }}, profile rbd pool={{ openstack_cinder_backup_pool.name }}"}, mode: "0600" }
#############
# DASHBOARD #
#############
dashboard_enabled: False
dashboard_network_name: ceph-dashboard
# Choose http or https
# For https, you should set dashboard.crt/key and grafana.crt/key
dashboard_protocol: http
dashboard_port: 8234
###############
# DEPRECATION #
###############

View File

@ -0,0 +1,17 @@
---
grafana_admin_user: admin
grafana_admin_password: admin
# We only need this for SSL (https) connections
grafana_crt: ''
grafana_key: ''
grafana_container_image: "grafana/grafana:latest"
grafana_container_cpu_period: 100000
grafana_container_cpu_cores: 2
# container_memory is in GB
grafana_container_memory: 4
grafana_uid: 472
grafana_datasource: Dashboard
grafana_dashboards_path: "/etc/grafana/dashboards/ceph-dashboard"
grafana_plugins:
- vonage-status-panel
- grafana-piechart-panel

View File

@ -0,0 +1,17 @@
# This file is managed by ansible, don't make changes here - they will be
# overwritten.
[Unit]
Description=grafana-server
After=docker.service
[Service]
EnvironmentFile=-/etc/environment
ExecStart=/usr/bin/docker start --attach grafana-server
ExecStop=-/usr/bin/docker stop grafana-server
Restart=always
RestartSec=10s
TimeoutStartSec=120
TimeoutStopSec=15
[Install]
WantedBy=multi-user.target

View File

@ -0,0 +1 @@
deb https://packagecloud.io/grafana/stable/debian/ jessie main

View File

@ -0,0 +1,8 @@
---
- name: enable service
# We use the systemd module here so we can use the daemon_reload feature,
# since we're shipping the .service file ourselves
systemd:
name: grafana-server
daemon_reload: true
enabled: true

View File

@ -0,0 +1,14 @@
---
galaxy_info:
company: Red Hat
author: Boris Ranto
description: Configures Grafana for Ceph Dashboard
license: Apache
min_ansible_version: 2.4
platforms:
- name: EL
versions:
- 7
galaxy_tags:
- system
dependencies: []

View File

@ -0,0 +1,83 @@
---
- name: make sure grafana is down
service:
name: grafana-server
state: stopped
- name: wait for grafana to be stopped
wait_for:
port: 3000
state: stopped
- name: make sure grafana configuration directories exist
file:
path: "{{ item }}"
state: directory
recurse: yes
with_items:
- "/etc/grafana/provisioning/datasources"
- "/etc/grafana/provisioning/dashboards"
- name: write grafana.ini
template:
src: grafana.ini
dest: /etc/grafana/grafana.ini
mode: 0640
- name: write datasources provisioning config file
template:
src: datasources-ceph-dashboard.yml
dest: /etc/grafana/provisioning/datasources/ceph-dashboard.yml
mode: 0640
- name: Write dashboards provisioning config file
template:
src: dashboards-ceph-dashboard.yml
dest: /etc/grafana/provisioning/dashboards/ceph-dashboard.yml
mode: 0640
- name: copy grafana SSL certificate file
copy:
src: "{{ grafana_crt }}"
dest: "/etc/grafana/ceph-dashboard.crt"
mode: 0640
when:
- grafana_crt
- dashboard_protocol == "https"
- name: copy grafana SSL certificate key
copy:
src: "{{ grafana_key }}"
dest: "/etc/grafana/ceph-dashboard.key"
mode: 0640
when:
- grafana_key
- dashboard_protocol == "https"
- name: generate a Self Signed OpenSSL certificate for dashboard
shell: |
test -f /etc/grafana/ceph-dashboard.key -a -f /etc/grafana/ceph-dashboard.crt || \
openssl req -new -nodes -x509 -subj '/O=IT/CN=ceph-grafana' -days 3650 -keyout /etc/grafana/ceph-dashboard.key -out /etc/grafana/ceph-dashboard.crt -extensions v3_ca
when:
- dashboard_protocol == "https"
- not grafana_key or not grafana_crt
- name: set owner/group on /etc/grafana
file:
path: /etc/grafana
state: directory
# This is the UID used by the grafana container
owner: "{{ grafana_uid }}"
# This group is used by the grafana rpm
group: "grafana"
recurse: true
- name: enable and start grafana
service:
name: grafana-server
state: restarted
enabled: true
- name: wait for grafana to start
wait_for:
port: 3000

View File

@ -0,0 +1,6 @@
---
- name: include setup_container.yml
include_tasks: setup_container.yml
- name: include configure_grafana.yml
include_tasks: configure_grafana.yml

View File

@ -0,0 +1,64 @@
---
- name: include ceph-container-common
include_role:
name: ceph-container-common
allow_duplicates: false
- name: create grafana user
user:
name: grafana
shell: '/bin/false'
createhome: false
system: true
- name: create /etc/grafana and /var/lib/grafana
file:
path: "{{ item }}"
state: directory
owner: "{{ grafana_uid }}"
recurse: true
with_items:
- /etc/grafana
- /var/lib/grafana
- name: make sure the grafana-server service is down
service:
name: grafana-server
state: stopped
failed_when: false
- name: create docker container
docker_container:
name: grafana-server
image: "{{ grafana_container_image }}"
state: present
# restart to allow updates
restart: true
restart_policy: no
force_kill: yes
published_ports: '3000:3000'
detach: true
volumes:
- "/etc/grafana:/etc/grafana:Z"
- "/var/lib/grafana:/var/lib/grafana:Z"
networks:
- name: "{{ dashboard_network_name }}"
keep_volumes: true
pull: true
cpu_period: "{{ grafana_container_cpu_period }}"
# As of ansible-2.5.2, this module doesn't support the equivalent of the
# --cpus flag, so we must use period/quota for now
cpu_quota: "{{ grafana_container_cpu_period * grafana_container_cpu_cores }}"
memory: "{{ grafana_container_memory }}GB"
memory_swap: "{{ grafana_container_memory * 2 }}GB"
env:
GF_INSTALL_PLUGINS: "{{ grafana_plugins|join(',') }}"
- name: ship systemd service
copy:
src: grafana-server.service
dest: "/etc/systemd/system/"
owner: root
group: root
mode: 0644
notify: enable service

View File

@ -0,0 +1,12 @@
apiVersion: 1
providers:
- name: 'Ceph Dashboard'
orgId: 1
folder: 'ceph-dashboard'
type: file
disableDeletion: false
updateIntervalSeconds: 3
editable: false
options:
path: '{{ grafana_dashboards_path }}'

View File

@ -0,0 +1,26 @@
apiVersion: 1
# list of datasources that should be deleted from the database
deleteDatasources:
- name: '{{ grafana_datasource }}'
orgId: 1
# list of datasources to insert/update depending
# what's available in the database
datasources:
# <string, required> name of the datasource. Required
- name: '{{ grafana_datasource }}'
# <string, required> datasource type. Required
type: 'prometheus'
# <string, required> access mode. proxy or direct (Server or Browser in the UI). Required
access: 'proxy'
# <int> org id. will default to orgId 1 if not specified
orgId: 1
# <string> url
url: 'http://prometheus:9090'
# <bool> enable/disable basic auth
basicAuth: false
# <bool> mark as default datasource. Max one per org
isDefault: true
# <bool> allow users to edit datasources from the UI.
editable: false

View File

@ -0,0 +1,26 @@
# [server]
# root_url = %(protocol)s://%(domain)s:%(http_port)s/api/grafana/proxy
[users]
default_theme = light
#################################### Anonymous Auth ##########################
[auth.anonymous]
# enable anonymous access
enabled = true
# specify organization name that should be used for unauthenticated users
org_name = Main Org.
# specify role for unauthenticated users
org_role = Viewer
[server]
cert_file = /etc/grafana/ceph-dashboard.crt
cert_key = /etc/grafana/ceph-dashboard.key
domain = {{ ansible_fqdn }}
protocol = {{ dashboard_protocol }}
[security]
admin_user = {{ grafana_admin_user }}
admin_password = {{ grafana_admin_password }}

View File

@ -0,0 +1,9 @@
[grafana]
name=grafana
baseurl=https://packagecloud.io/grafana/stable/el/{{ ansible_distribution_major_version }}/$basearch
repo_gpgcheck=1
enabled=1
gpgcheck=1
gpgkey=https://packagecloud.io/gpg.key https://grafanarel.s3.amazonaws.com/RPM-GPG-KEY-grafana
sslverify=1
sslcacert=/etc/pki/tls/certs/ca-bundle.crt

View File

@ -458,3 +458,13 @@
set_fact:
_rbd_target_api_handler_called: False
listen: "restart ceph rbd-target-api"
- name: restart node-exporter service
listen: "restart node-exporter service"
# We use the systemd module here so we can use the daemon_reload feature,
# since we're shipping the .service file ourselves
systemd:
name: 'node_exporter'
daemon_reload: true
enabled: true
state: restarted

View File

@ -155,4 +155,38 @@
- iscsi_gw_group_name in group_names
tags: firewall
- block:
- name: open grafana port
firewalld:
port: "3000/tcp"
zone: "{{ ceph_dashboard_firewall_zone }}"
permanent: true
immediate: true
state: enabled
- name: open node_exporter port
firewalld:
port: "9100/tcp"
zone: "{{ ceph_dashboard_firewall_zone }}"
permanent: true
immediate: true
state: enabled
- name: open mgr/prometheus port
firewalld:
port: "9283/tcp"
zone: "{{ ceph_dashboard_firewall_zone }}"
permanent: true
immediate: true
state: enabled
- name: open dashboard port
firewalld:
port: "{{ dashboard_port }}/tcp"
zone: "{{ ceph_dashboard_firewall_zone }}"
permanent: true
immediate: true
state: enabled
when: dashboard_enabled
- meta: flush_handlers

View File

@ -17,6 +17,6 @@
- name: include mgr_modules.yml
include_tasks: mgr_modules.yml
when:
- ceph_mgr_modules | length > 0
- ceph_mgr_modules | length > 0 or dashboard_enabled
- ((groups[mgr_group_name] | default([]) | length == 0 and inventory_hostname == groups[mon_group_name] | last) or
(groups[mgr_group_name] | default([]) | length > 0 and inventory_hostname == groups[mgr_group_name] | last))

View File

@ -7,6 +7,15 @@
until: result is succeeded
when: ansible_os_family in ['RedHat', 'Suse']
- name: install ceph-grafana-dashboards package on RedHat or SUSE
package:
name: ceph-grafana-dashboards
state: "{{ (upgrade_ceph_packages|bool) | ternary('latest','present') }}"
register: result
until: result is succeeded
when:
- ansible_os_family in ['RedHat', 'Suse']
- name: install ceph-mgr packages for debian
apt:
name: '{{ ceph_mgr_packages }}'

View File

@ -0,0 +1,2 @@
---
node_exporter_container_image: prom/node-exporter:latest

View File

@ -0,0 +1,20 @@
# This file is managed by ansible, don't make changes here - they will be
# overwritten.
[Unit]
Description=Node Exporter
After=docker.service
[Service]
EnvironmentFile=-/etc/environment
ExecStart=/usr/bin/docker start --attach node-exporter
# Make sure the cfg80211 is loaded before running the container, the node
# exporter needs this module loaded to test for presence of wi-fi devices
ExecStartPre=/usr/sbin/modprobe cfg80211
ExecStop=-/usr/bin/docker stop node-exporter
Restart=always
RestartSec=10s
TimeoutStartSec=120
TimeoutStopSec=15
[Install]
WantedBy=multi-user.target

View File

@ -0,0 +1,14 @@
---
galaxy_info:
company: Red Hat
author: Boris Ranto
description: Configures Prometheus Node Exporter
license: Apache
min_ansible_version: 2.4
platforms:
- name: EL
versions:
- 7
galaxy_tags:
- system
dependencies: []

View File

@ -0,0 +1,3 @@
---
- name: include setup_container.yml
include_tasks: setup_container.yml

View File

@ -0,0 +1,42 @@
---
- name: include ceph-container-common
include_role:
name: ceph-container-common
allow_duplicates: false
- name: make sure the node_exporter service is down
service:
name: node_exporter
state: stopped
failed_when: false
- name: start docker container
docker_container:
name: node-exporter
image: "{{ node_exporter_container_image }}"
state: started
command:
- '--path.procfs=/host/proc'
- '--path.sysfs=/host/sys'
- '--no-collector.timex'
# restart to allow updates
restart: true
restart_policy: no
force_kill: yes
detach: true
volumes:
- '/proc:/host/proc:ro'
- '/sys:/host/sys:ro'
network_mode: host
keep_volumes: true
pull: true
notify: restart node-exporter service
- name: ship systemd service
copy:
src: node_exporter.service
dest: "/etc/systemd/system/"
owner: root
group: root
mode: 0644
notify: restart node-exporter service

View File

@ -0,0 +1,17 @@
---
prometheus_container_image: prom/prometheus:latest
prometheus_container_cpu_period: 100000
prometheus_container_cpu_cores: 2
# container_memory is in GB
prometheus_container_memory: 4
prometheus_data_dir: /var/lib/prometheus
prometheus_conf_dir: /etc/prometheus
prometheus_user_id: '65534' # This is the UID used by the prom/prometheus docker image
alertmanager_container_image: prom/alertmanager:latest
alertmanager_container_cpu_period: 100000
alertmanager_container_cpu_cores: 2
# container_memory is in GB
alertmanager_container_memory: 4
alertmanager_data_dir: /var/lib/alertmanager
alertmanager_conf_dir: /etc/alertmanager

View File

@ -0,0 +1,17 @@
# This file is managed by ansible, don't make changes here - they will be
# overwritten.
[Unit]
Description=alertmanager
After=docker.service
[Service]
EnvironmentFile=-/etc/environment
ExecStart=/usr/bin/docker start --attach alertmanager
ExecStop=/usr/bin/docker stop alertmanager
Restart=always
RestartSec=10s
TimeoutStartSec=120
TimeoutStopSec=15
[Install]
WantedBy=multi-user.target

View File

@ -0,0 +1,17 @@
# This file is managed by ansible, don't make changes here - they will be
# overwritten.
[Unit]
Description=prometheus
After=docker.service
[Service]
EnvironmentFile=-/etc/environment
ExecStart=/usr/bin/docker start --attach prometheus
ExecStop=/usr/bin/docker stop prometheus
Restart=always
RestartSec=10s
TimeoutStartSec=120
TimeoutStopSec=15
[Install]
WantedBy=multi-user.target

View File

@ -0,0 +1,12 @@
---
- name: service handler
# We use the systemd module here so we can use the daemon_reload feature,
# since we're shipping the .service file ourselves
systemd:
name: "{{ item }}"
daemon_reload: true
enabled: true
state: restarted
with_items:
- 'alertmanager'
- 'prometheus'

View File

@ -0,0 +1,3 @@
---
dependencies:
- role: ceph-defaults

View File

@ -0,0 +1,35 @@
---
- name: create prometheus directories
file:
path: "{{ item }}"
state: directory
owner: "{{ prometheus_user_id }}"
with_items:
- "{{ prometheus_conf_dir }}"
- "{{ prometheus_data_dir }}"
- name: write prometheus config file
template:
src: prometheus.yml
dest: "{{ prometheus_conf_dir }}/"
owner: "{{ prometheus_user_id }}"
notify: service handler
- name: create alertmanager directories
file:
path: "{{ item }}"
state: directory
owner: "root"
with_items:
- "{{ alertmanager_conf_dir }}"
- "{{ alertmanager_data_dir }}"
- name: write alertmanager config file
template:
src: alertmanager.yml
dest: "{{ alertmanager_conf_dir }}/"
owner: "root"
notify: service handler
- name: include setup_container.yml
include_tasks: setup_container.yml

View File

@ -0,0 +1,93 @@
---
- name: include ceph-container-common
include_role:
name: ceph-container-common
allow_duplicates: false
- name: make sure the alertmanager service is down
service:
name: alertmanager
state: stopped
failed_when: false
- name: start alertmanager container
docker_container:
name: alertmanager
image: "{{ alertmanager_container_image }}"
state: started
command:
- '--config.file=/etc/alertmanager/alertmanager.yml'
- '--storage.path=/alertmanager'
# restart to allow updates
restart: true
restart_policy: no
force_kill: yes
published_ports: '9093:9093'
detach: true
volumes:
- "{{ alertmanager_conf_dir }}:/etc/alertmanager:Z"
- "{{ alertmanager_data_dir }}:/alertmanager:Z"
networks:
- name: "{{ dashboard_network_name }}"
keep_volumes: true
pull: true
cpu_period: "{{ alertmanager_container_cpu_period }}"
# As of ansible-2.5.2, this module doesn't support the equivalent of the
# --cpus flag, so we must use period/quota for now
cpu_quota: "{{ alertmanager_container_cpu_period * alertmanager_container_cpu_cores }}"
#memory: 0
#memory_swap: 0
memory: "{{ alertmanager_container_memory }}GB"
memory_swap: "{{ alertmanager_container_memory * 2 }}GB"
notify: service handler
- name: make sure the prometheus service is down
service:
name: prometheus
state: stopped
failed_when: false
- name: start prometheus docker container
docker_container:
name: prometheus
image: "{{ prometheus_container_image }}"
state: started
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
- '--web.external-url=http://{{ inventory_hostname }}:9090/'
# restart to allow updates
restart: true
restart_policy: no
force_kill: yes
published_ports: '9090:9090'
detach: true
volumes:
- "{{ prometheus_conf_dir }}:/etc/prometheus:Z"
- "{{ prometheus_data_dir }}:/prometheus:Z"
networks:
- name: "{{ dashboard_network_name }}"
user: "{{ prometheus_user_id }}"
keep_volumes: true
pull: true
cpu_period: "{{ prometheus_container_cpu_period }}"
# As of ansible-2.5.2, this module doesn't support the equivalent of the
# --cpus flag, so we must use period/quota for now
cpu_quota: "{{ prometheus_container_cpu_period * prometheus_container_cpu_cores }}"
#memory: 0
#memory_swap: 0
memory: "{{ prometheus_container_memory }}GB"
memory_swap: "{{ prometheus_container_memory * 2 }}GB"
notify: service handler
- name: ship systemd services
copy:
src: "{{ item }}"
dest: "/etc/systemd/system/"
owner: root
group: root
mode: 0644
with_items:
- 'alertmanager.service'
- 'prometheus.service'
notify: service handler

View File

@ -0,0 +1,15 @@
global:
resolve_timeout: 5m
route:
group_by: ['alertname']
group_wait: 10s
group_interval: 10s
repeat_interval: 1h
receiver: 'ceph-dashboard'
receivers:
- name: 'ceph-dashboard'
webhook_configs:
{% for host in groups['mgrs'] | default(groups['mons']) %}
- url: '{{ dashboard_protocol }}://{{ host }}:{{ dashboard_port }}/api/prometheus_receiver'
{% endfor %}

View File

@ -0,0 +1,47 @@
global:
scrape_interval: 15s
evaluation_interval: 15s
rule_files:
- '/etc/prometheus/alerts/*'
scrape_configs:
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
- job_name: 'ceph'
honor_labels: true
static_configs:
{% for host in groups['mgrs'] | default(groups['mons']) %}
- targets: ['{{ host }}:9283']
labels:
instance: 'ceph_cluster'
{% endfor %}
- job_name: 'node'
static_configs:
{% for host in (groups['all'] | difference(groups['grafana-server'])) %}
- targets: ['{{ host }}:9100']
labels:
instance: "{{ hostvars[host]['ansible_nodename'] }}"
{% endfor %}
- job_name: 'grafana'
static_configs:
{% for host in groups['grafana-server'] %}
- targets: ['{{ host }}:9100']
labels:
instance: "{{ hostvars[host]['ansible_nodename'] }}"
{% endfor %}
{% if 'iscsigws' in groups %}
- job_name: 'iscsi-gws'
static_configs:
{% for host in groups['iscsigws'] %}
- targets: ['{{ host }}:9287']
labels:
instance: "{{ hostvars[host]['ansible_nodename'] }}"
{% endfor %}
{% endif %}
alerting:
alertmanagers:
- scheme: http
static_configs:
- targets: ['alertmanager:9093']

View File

@ -13,6 +13,7 @@
- iscsigws
- iscsi-gws # for backward compatibility only!
- mgrs
- grafana-server
gather_facts: false
become: True
@ -113,6 +114,9 @@
tags: ['ceph_update_config']
- import_role:
name: ceph-handler
- import_role:
name: ceph-node-exporter
when: dashboard_enabled
- import_role:
name: ceph-container-common
- import_role:
@ -157,6 +161,9 @@
tags: ['ceph_update_config']
- import_role:
name: ceph-handler
- import_role:
name: ceph-node-exporter
when: dashboard_enabled
- import_role:
name: ceph-container-common
- import_role:
@ -196,6 +203,9 @@
tags: ['ceph_update_config']
- import_role:
name: ceph-handler
- import_role:
name: ceph-node-exporter
when: dashboard_enabled
- import_role:
name: ceph-container-common
- import_role:
@ -235,6 +245,9 @@
tags: ['ceph_update_config']
- import_role:
name: ceph-handler
- import_role:
name: ceph-node-exporter
when: dashboard_enabled
- import_role:
name: ceph-container-common
- import_role:
@ -274,6 +287,9 @@
tags: ['ceph_update_config']
- import_role:
name: ceph-handler
- import_role:
name: ceph-node-exporter
when: dashboard_enabled
- import_role:
name: ceph-container-common
- import_role:
@ -313,6 +329,9 @@
tags: ['ceph_update_config']
- import_role:
name: ceph-handler
- import_role:
name: ceph-node-exporter
when: dashboard_enabled
- import_role:
name: ceph-container-common
- import_role:
@ -352,6 +371,9 @@
tags: ['ceph_update_config']
- import_role:
name: ceph-handler
- import_role:
name: ceph-node-exporter
when: dashboard_enabled
- import_role:
name: ceph-container-common
- import_role:
@ -391,6 +413,9 @@
tags: ['ceph_update_config']
- import_role:
name: ceph-handler
- import_role:
name: ceph-node-exporter
when: dashboard_enabled
- import_role:
name: ceph-container-common
when: inventory_hostname == groups.get('clients', ['']) | first
@ -433,6 +458,9 @@
tags: ['ceph_update_config']
- import_role:
name: ceph-handler
- import_role:
name: ceph-node-exporter
when: dashboard_enabled
- import_role:
name: ceph-container-common
- import_role:
@ -482,3 +510,45 @@
delegate_to: "{{ groups[mon_group_name][0] }}"
run_once: true
when: not ceph_status.failed
- hosts: grafana-server
become: true
tasks:
- import_role:
name: ceph-defaults
tags: ['ceph_update_config']
when: dashboard_enabled
- import_role:
name: ceph-facts
tags: ['ceph_update_config']
when: dashboard_enabled
- import_role:
name: ceph-handler
when: dashboard_enabled
- import_role:
name: ceph-node-exporter
when: dashboard_enabled
- import_role:
name: ceph-common
when: dashboard_enabled
- import_role:
name: ceph-config
tags: ['ceph_update_config']
when: dashboard_enabled
- import_role:
name: ceph-prometheus
when: dashboard_enabled
- import_role:
name: ceph-grafana
when: dashboard_enabled
- hosts: '{{ (groups["mgrs"] | default(groups["mons"]))[0] }}'
become: true
tasks:
- import_role:
name: ceph-defaults
tags: ['ceph_update_config']
when: dashboard_enabled
- import_role:
name: ceph-dashboard
when: dashboard_enabled

View File

@ -13,6 +13,7 @@
- mgrs
- iscsigws
- iscsi-gws # for backward compatibility only!
- grafana-server
gather_facts: false
any_errors_fatal: true
@ -92,6 +93,9 @@
tags: ['ceph_update_config']
- import_role:
name: ceph-handler
- import_role:
name: ceph-node-exporter
when: dashboard_enabled
- import_role:
name: ceph-common
- import_role:
@ -134,6 +138,9 @@
tags: ['ceph_update_config']
- import_role:
name: ceph-handler
- import_role:
name: ceph-node-exporter
when: dashboard_enabled
- import_role:
name: ceph-common
- import_role:
@ -173,6 +180,9 @@
tags: ['ceph_update_config']
- import_role:
name: ceph-handler
- import_role:
name: ceph-node-exporter
when: dashboard_enabled
- import_role:
name: ceph-common
- import_role:
@ -212,6 +222,9 @@
tags: ['ceph_update_config']
- import_role:
name: ceph-handler
- import_role:
name: ceph-node-exporter
when: dashboard_enabled
- import_role:
name: ceph-common
- import_role:
@ -251,6 +264,9 @@
tags: ['ceph_update_config']
- import_role:
name: ceph-handler
- import_role:
name: ceph-node-exporter
when: dashboard_enabled
- import_role:
name: ceph-common
- import_role:
@ -290,6 +306,9 @@
tags: ['ceph_update_config']
- import_role:
name: ceph-handler
- import_role:
name: ceph-node-exporter
when: dashboard_enabled
- import_role:
name: ceph-common
- import_role:
@ -329,6 +348,9 @@
tags: ['ceph_update_config']
- import_role:
name: ceph-handler
- import_role:
name: ceph-node-exporter
when: dashboard_enabled
- import_role:
name: ceph-common
- import_role:
@ -368,6 +390,9 @@
tags: ['ceph_update_config']
- import_role:
name: ceph-handler
- import_role:
name: ceph-node-exporter
when: dashboard_enabled
- import_role:
name: ceph-common
- import_role:
@ -407,6 +432,9 @@
tags: ['ceph_update_config']
- import_role:
name: ceph-handler
- import_role:
name: ceph-node-exporter
when: dashboard_enabled
- import_role:
name: ceph-common
- import_role:
@ -448,6 +476,9 @@
tags: ['ceph_update_config']
- import_role:
name: ceph-handler
- import_role:
name: ceph-node-exporter
when: dashboard_enabled
- import_role:
name: ceph-common
- import_role:
@ -485,3 +516,45 @@
delegate_to: "{{ groups[mon_group_name][0] }}"
run_once: true
when: not ceph_status.failed
- hosts: grafana-server
become: true
tasks:
- import_role:
name: ceph-defaults
tags: ['ceph_update_config']
when: dashboard_enabled
- import_role:
name: ceph-facts
tags: ['ceph_update_config']
when: dashboard_enabled
- import_role:
name: ceph-handler
when: dashboard_enabled
- import_role:
name: ceph-node-exporter
when: dashboard_enabled
- import_role:
name: ceph-common
when: dashboard_enabled
- import_role:
name: ceph-config
tags: ['ceph_update_config']
when: dashboard_enabled
- import_role:
name: ceph-prometheus
when: dashboard_enabled
- import_role:
name: ceph-grafana
when: dashboard_enabled
- hosts: '{{ (groups["mgrs"] | default(groups["mons"]))[0] }}'
become: true
tasks:
- import_role:
name: ceph-defaults
tags: ['ceph_update_config']
when: dashboard_enabled
- import_role:
name: ceph-dashboard
when: dashboard_enabled