diff --git a/roles/ceph-grafana/handlers/main.yml b/roles/ceph-grafana/handlers/main.yml deleted file mode 100644 index fd1bbd465..000000000 --- a/roles/ceph-grafana/handlers/main.yml +++ /dev/null @@ -1,8 +0,0 @@ ---- -- name: enable service - # We use the systemd module here so we can use the daemon_reload feature, - # since we're shipping the .service file ourselves - systemd: - name: grafana-server - daemon_reload: true - enabled: true diff --git a/roles/ceph-grafana/tasks/setup_container.yml b/roles/ceph-grafana/tasks/setup_container.yml index 6c9c9f8f7..b7647f112 100644 --- a/roles/ceph-grafana/tasks/setup_container.yml +++ b/roles/ceph-grafana/tasks/setup_container.yml @@ -16,31 +16,6 @@ - /etc/grafana - /var/lib/grafana -- name: make sure the grafana-server service is down - service: - name: grafana-server - state: stopped - failed_when: false - -# Make sure we re-create the container -- name: remove old grafana-server container - command: "{{ container_binary }} rm -f grafana-server" - changed_when: false - failed_when: false - -- name: create grafana-server container - shell: | - {{ container_binary }} create --name grafana-server \ - -v "/etc/grafana:/etc/grafana:Z" \ - -v "/var/lib/grafana:/var/lib/grafana:Z" \ - "--net=host" \ - "--cpu-period={{ grafana_container_cpu_period }}" \ - "--cpu-quota={{ grafana_container_cpu_period * grafana_container_cpu_cores }}" \ - "--memory={{ grafana_container_memory }}GB" \ - "--memory-swap={{ grafana_container_memory * 2 }}GB" \ - -e "GF_INSTALL_PLUGINS={{ grafana_plugins|join(',') }}" \ - "{{ grafana_container_image }}" - - name: ship systemd service template: src: grafana-server.service.j2 @@ -48,4 +23,11 @@ owner: root group: root mode: 0644 - notify: enable service + +- name: start the grafana-server service + systemd: + name: grafana-server + state: started + enabled: yes + daemon_reload: yes + failed_when: false \ No newline at end of file diff --git a/roles/ceph-grafana/templates/grafana-server.service.j2 b/roles/ceph-grafana/templates/grafana-server.service.j2 index dd647495d..530c9b9b4 100644 --- a/roles/ceph-grafana/templates/grafana-server.service.j2 +++ b/roles/ceph-grafana/templates/grafana-server.service.j2 @@ -1,19 +1,30 @@ -# This file is managed by ansible, don't make changes here - they will be -# overwritten. -[Unit] -Description=grafana-server -{% if container_binary == 'docker' %} -After=docker.service -{% endif %} +# This file is managed by ansible, don't make changes here - they will be +# overwritten. +[Unit] +Description=grafana-server +{% if container_binary == 'docker' %} +After=docker.service +{% endif %} - [Service] -EnvironmentFile=-/etc/environment -ExecStart=/usr/bin/{{ container_binary }} start --attach grafana-server -ExecStop=-/usr/bin/{{ container_binary }} stop grafana-server -Restart=always -RestartSec=10s -TimeoutStartSec=120 -TimeoutStopSec=15 +[Service] +EnvironmentFile=-/etc/environment +ExecStartPre=-/usr/bin/{{ container_binary }} stop grafana-server +ExecStartPre=-/usr/bin/{{ container_binary }} rm grafana-server +ExecStart=/usr/bin/{{ container_binary }} run --rm --name=grafana-server \ + -v /etc/grafana:/etc/grafana:Z \ + -v /var/lib/grafana:/var/lib/grafana:Z \ + --net=host \ + --cpu-period={{ grafana_container_cpu_period }} \ + --cpu-quota={{ grafana_container_cpu_period * grafana_container_cpu_cores }} \ + --memory={{ grafana_container_memory }}GB \ + --memory-swap={{ grafana_container_memory * 2 }}GB \ + -e GF_INSTALL_PLUGINS={{ grafana_plugins|join(',') }} \ + {{ grafana_container_image }} +ExecStop=-/usr/bin/{{ container_binary }} stop grafana-server +Restart=always +RestartSec=10s +TimeoutStartSec=120 +TimeoutStopSec=15 - [Install] -WantedBy=multi-user.target \ No newline at end of file +[Install] +WantedBy=multi-user.target diff --git a/roles/ceph-handler/handlers/main.yml b/roles/ceph-handler/handlers/main.yml index 2a9f16dc7..9333b9cc2 100644 --- a/roles/ceph-handler/handlers/main.yml +++ b/roles/ceph-handler/handlers/main.yml @@ -458,13 +458,3 @@ set_fact: _rbd_target_api_handler_called: False listen: "restart ceph rbd-target-api" - - - name: restart node-exporter service - listen: "restart node-exporter service" - # We use the systemd module here so we can use the daemon_reload feature, - # since we're shipping the .service file ourselves - systemd: - name: 'node_exporter' - daemon_reload: true - enabled: true - state: restarted diff --git a/roles/ceph-node-exporter/tasks/setup_container.yml b/roles/ceph-node-exporter/tasks/setup_container.yml index 473c6cf4f..2dd4dd100 100644 --- a/roles/ceph-node-exporter/tasks/setup_container.yml +++ b/roles/ceph-node-exporter/tasks/setup_container.yml @@ -1,27 +1,4 @@ --- -- name: make sure the node_exporter service is down - service: - name: node_exporter - state: stopped - failed_when: false - -# Make sure we re-create the container -- name: remove old node-exporter container - command: "{{ container_binary }} rm -f node-exporter" - changed_when: false - failed_when: false - -- name: start node-exporter container - shell: | - {{ container_binary }} run --detach --name node-exporter \ - -v /proc:/host/proc:ro -v /sys:/host/sys:ro \ - --net=host \ - {{ node_exporter_container_image }} \ - '--path.procfs=/host/proc' \ - '--path.sysfs=/host/sys' \ - '--no-collector.timex' - notify: restart node-exporter service - - name: ship systemd service template: src: node_exporter.service.j2 @@ -29,4 +6,11 @@ owner: root group: root mode: 0644 - notify: restart node-exporter service + +- name: start the node_exporter service + systemd: + name: node_exporter + state: started + enabled: yes + daemon_reload: yes + failed_when: false \ No newline at end of file diff --git a/roles/ceph-node-exporter/templates/node_exporter.service.j2 b/roles/ceph-node-exporter/templates/node_exporter.service.j2 index 489ba7275..7d89a119e 100644 --- a/roles/ceph-node-exporter/templates/node_exporter.service.j2 +++ b/roles/ceph-node-exporter/templates/node_exporter.service.j2 @@ -8,7 +8,14 @@ After=docker.service [Service] EnvironmentFile=-/etc/environment -ExecStart=/usr/bin/{{ container_binary }} start --attach node-exporter +ExecStartPre=-/usr/bin/{{ container_binary }} rm -f node-exporter +ExecStart=/usr/bin/{{ container_binary }} run --name=node-exporter \ + -v /proc:/host/proc:ro -v /sys:/host/sys:ro \ + --net=host \ + --path.procfs=/host/proc \ + --path.sysfs=/host/sys \ + --no-collector.timex \ + {{ node_exporter_container_image }} # Make sure the cfg80211 is loaded before running the container, the node # exporter needs this module loaded to test for presence of wi-fi devices ExecStartPre=/usr/sbin/modprobe cfg80211 diff --git a/roles/ceph-prometheus/handlers/main.yml b/roles/ceph-prometheus/handlers/main.yml index 1e84e565a..bd4a153f7 100644 --- a/roles/ceph-prometheus/handlers/main.yml +++ b/roles/ceph-prometheus/handlers/main.yml @@ -9,4 +9,4 @@ state: restarted with_items: - 'alertmanager' - - 'prometheus' + - 'prometheus' \ No newline at end of file diff --git a/roles/ceph-prometheus/tasks/setup_container.yml b/roles/ceph-prometheus/tasks/setup_container.yml index 057185a28..b07354718 100644 --- a/roles/ceph-prometheus/tasks/setup_container.yml +++ b/roles/ceph-prometheus/tasks/setup_container.yml @@ -1,60 +1,4 @@ --- -- name: make sure the alertmanager service is down - service: - name: alertmanager - state: stopped - failed_when: false - -# Make sure we re-create the container -- name: remove old alertmanager container - command: "{{ container_binary }} rm -f alertmanager" - changed_when: false - failed_when: false - -- name: start alertmanager container - shell: | - {{ container_binary }} run --detach --name alertmanager \ - -v "{{ alertmanager_conf_dir }}:/etc/alertmanager:Z" \ - -v "{{ alertmanager_data_dir }}:/alertmanager:Z" \ - "--net=host" \ - "--cpu-period={{ alertmanager_container_cpu_period }}" \ - "--cpu-quota={{ alertmanager_container_cpu_period * alertmanager_container_cpu_cores }}" \ - "--memory={{ alertmanager_container_memory }}GB" \ - "--memory-swap={{ alertmanager_container_memory * 2 }}GB" \ - "{{ alertmanager_container_image }}" \ - "--config.file=/etc/alertmanager/alertmanager.yml" \ - "--storage.path=/alertmanager" - notify: service handler - -- name: make sure the prometheus service is down - service: - name: prometheus - state: stopped - failed_when: false - -# Make sure we re-create the container -- name: remove old prometheus container - command: "{{ container_binary }} rm -f prometheus" - changed_when: false - failed_when: false - -- name: start prometheus container - shell: | - {{ container_binary }} run --detach --name prometheus \ - -v "{{ prometheus_conf_dir }}:/etc/prometheus:Z" \ - -v "{{ prometheus_data_dir }}:/prometheus:Z" \ - "--net=host" \ - "--user={{ prometheus_user_id }}" \ - "--cpu-period={{ prometheus_container_cpu_period }}" \ - "--cpu-quota={{ prometheus_container_cpu_period * prometheus_container_cpu_cores }}" \ - "--memory={{ prometheus_container_memory }}GB" \ - "--memory-swap={{ prometheus_container_memory * 2 }}GB" \ - "{{ prometheus_container_image }}" \ - "--config.file=/etc/prometheus/prometheus.yml" \ - "--storage.tsdb.path=/prometheus" \ - "--web.external-url=http://{{ inventory_hostname }}:9090/" - notify: service handler - - name: ship systemd services template: src: "{{ item }}.j2" @@ -66,3 +10,13 @@ - 'alertmanager.service' - 'prometheus.service' notify: service handler + +- name: start prometheus services + systemd: + name: "{{ item }}" + daemon_reload: true + enabled: true + state: started + with_items: + - prometheus + - alertmanager diff --git a/roles/ceph-prometheus/templates/alertmanager.service.j2 b/roles/ceph-prometheus/templates/alertmanager.service.j2 index 80eb573f3..6a9c9c524 100644 --- a/roles/ceph-prometheus/templates/alertmanager.service.j2 +++ b/roles/ceph-prometheus/templates/alertmanager.service.j2 @@ -8,7 +8,18 @@ After=docker.service [Service] EnvironmentFile=-/etc/environment -ExecStart=/usr/bin/{{ container_binary }} start --attach alertmanager +ExecStartPre=-/usr/bin/{{ container_binary }} rm -f alertmanager +ExecStart=/usr/bin/{{ container_binary }} run --name=alertmanager \ + -v "{{ alertmanager_conf_dir }}:/etc/alertmanager:Z" \ + -v "{{ alertmanager_data_dir }}:/alertmanager:Z" \ + --net=host \ + --cpu-period={{ alertmanager_container_cpu_period }} \ + --cpu-quota={{ alertmanager_container_cpu_period * alertmanager_container_cpu_cores }} \ + --memory={{ alertmanager_container_memory }}GB \ + --memory-swap={{ alertmanager_container_memory * 2 }}GB \ + {{ alertmanager_container_image }} \ + --config.file=/etc/alertmanager/alertmanager.yml \ + --storage.path=/alertmanager" ExecStop=/usr/bin/{{ container_binary }} stop alertmanager Restart=always RestartSec=10s diff --git a/roles/ceph-prometheus/templates/prometheus.service.j2 b/roles/ceph-prometheus/templates/prometheus.service.j2 index 6971eadfc..1d99c7e05 100644 --- a/roles/ceph-prometheus/templates/prometheus.service.j2 +++ b/roles/ceph-prometheus/templates/prometheus.service.j2 @@ -8,7 +8,20 @@ After=docker.service [Service] EnvironmentFile=-/etc/environment -ExecStart=/usr/bin/{{ container_binary }} start --attach prometheus +ExecStartPre=-/usr/bin/{{ container_binary }} rm -f prometheus +ExecStart=/usr/bin/{{ container_binary }} run --name=prometheus \ + -v "{{ prometheus_conf_dir }}:/etc/prometheus:Z" \ + -v "{{ prometheus_data_dir }}:/prometheus:Z" \ + --net=host \ + --user={{ prometheus_user_id }} \ + --cpu-period={{ prometheus_container_cpu_period }} \ + --cpu-quota={{ prometheus_container_cpu_period * prometheus_container_cpu_cores }} \ + --memory={{ prometheus_container_memory }}GB \ + --memory-swap={{ prometheus_container_memory * 2 }}GB \ + {{ prometheus_container_image }} \ + --config.file=/etc/prometheus/prometheus.yml \ + --storage.tsdb.path=/prometheus \ + --web.external-url=http://{{ inventory_hostname }}:9090/" ExecStop=/usr/bin/{{ container_binary }} stop prometheus Restart=always RestartSec=10s