diff --git a/group_vars/all.yml.sample b/group_vars/all.yml.sample index f20886e18..8e8743c61 100644 --- a/group_vars/all.yml.sample +++ b/group_vars/all.yml.sample @@ -692,7 +692,6 @@ dummy: # DASHBOARD # ############# #dashboard_enabled: False -#dashboard_network_name: ceph-dashboard # Choose http or https # For https, you should set dashboard.crt/key and grafana.crt/key #dashboard_protocol: http diff --git a/group_vars/rhcs.yml.sample b/group_vars/rhcs.yml.sample index c4a3ce23c..8b132571a 100644 --- a/group_vars/rhcs.yml.sample +++ b/group_vars/rhcs.yml.sample @@ -692,7 +692,6 @@ ceph_docker_registry: "registry.access.redhat.com" # DASHBOARD # ############# #dashboard_enabled: False -#dashboard_network_name: ceph-dashboard # Choose http or https # For https, you should set dashboard.crt/key and grafana.crt/key #dashboard_protocol: http diff --git a/roles/ceph-defaults/defaults/main.yml b/roles/ceph-defaults/defaults/main.yml index 4236d01b2..ec5c9d4fa 100644 --- a/roles/ceph-defaults/defaults/main.yml +++ b/roles/ceph-defaults/defaults/main.yml @@ -684,7 +684,6 @@ openstack_keys: # DASHBOARD # ############# dashboard_enabled: False -dashboard_network_name: ceph-dashboard # Choose http or https # For https, you should set dashboard.crt/key and grafana.crt/key dashboard_protocol: http diff --git a/roles/ceph-grafana/tasks/setup_container.yml b/roles/ceph-grafana/tasks/setup_container.yml index f0c164028..3bb8dfcc2 100644 --- a/roles/ceph-grafana/tasks/setup_container.yml +++ b/roles/ceph-grafana/tasks/setup_container.yml @@ -27,35 +27,27 @@ state: stopped failed_when: false -- name: create docker container - docker_container: - name: grafana-server - image: "{{ grafana_container_image }}" - state: present - # restart to allow updates - restart: true - restart_policy: no - force_kill: yes - published_ports: '3000:3000' - detach: true - volumes: - - "/etc/grafana:/etc/grafana:Z" - - "/var/lib/grafana:/var/lib/grafana:Z" - networks: - - name: "{{ dashboard_network_name }}" - keep_volumes: true - pull: true - cpu_period: "{{ grafana_container_cpu_period }}" - # As of ansible-2.5.2, this module doesn't support the equivalent of the - # --cpus flag, so we must use period/quota for now - cpu_quota: "{{ grafana_container_cpu_period * grafana_container_cpu_cores }}" - memory: "{{ grafana_container_memory }}GB" - memory_swap: "{{ grafana_container_memory * 2 }}GB" - env: - GF_INSTALL_PLUGINS: "{{ grafana_plugins|join(',') }}" +# Make sure we re-create the container +- name: remove old grafana-server container + command: "{{ container_binary }} rm -f grafana-server" + changed_when: false + failed_when: false + +- name: create grafana-server container + shell: | + {{ container_binary }} create --name grafana-server \ + -v "/etc/grafana:/etc/grafana:Z" \ + -v "/var/lib/grafana:/var/lib/grafana:Z" \ + "--net=host" \ + "--cpu-period={{ grafana_container_cpu_period }}" \ + "--cpu-quota={{ grafana_container_cpu_period * grafana_container_cpu_cores }}" \ + "--memory={{ grafana_container_memory }}GB" \ + "--memory-swap={{ grafana_container_memory * 2 }}GB" \ + -e "GF_INSTALL_PLUGINS={{ grafana_plugins|join(',') }}" \ + "{{ grafana_container_image }}" - name: ship systemd service - copy: + template: src: grafana-server.service dest: "/etc/systemd/system/" owner: root diff --git a/roles/ceph-grafana/templates/datasources-ceph-dashboard.yml b/roles/ceph-grafana/templates/datasources-ceph-dashboard.yml index b2947b72a..5d5ab0125 100644 --- a/roles/ceph-grafana/templates/datasources-ceph-dashboard.yml +++ b/roles/ceph-grafana/templates/datasources-ceph-dashboard.yml @@ -17,7 +17,7 @@ datasources: # org id. will default to orgId 1 if not specified orgId: 1 # url - url: 'http://prometheus:9090' + url: 'http://{{ groups["grafana-server"][0] }}:9090' # enable/disable basic auth basicAuth: false # mark as default datasource. Max one per org diff --git a/roles/ceph-grafana/files/grafana-server.service b/roles/ceph-grafana/templates/grafana-server.service similarity index 61% rename from roles/ceph-grafana/files/grafana-server.service rename to roles/ceph-grafana/templates/grafana-server.service index fab8f5191..b242a18f8 100644 --- a/roles/ceph-grafana/files/grafana-server.service +++ b/roles/ceph-grafana/templates/grafana-server.service @@ -2,12 +2,14 @@ # overwritten. [Unit] Description=grafana-server +{% if container_binary == 'docker' %} After=docker.service +{% endif %} [Service] EnvironmentFile=-/etc/environment -ExecStart=/usr/bin/docker start --attach grafana-server -ExecStop=-/usr/bin/docker stop grafana-server +ExecStart=/usr/bin/{{ container_binary }} start --attach grafana-server +ExecStop=-/usr/bin/{{ container_binary }} stop grafana-server Restart=always RestartSec=10s TimeoutStartSec=120 diff --git a/roles/ceph-node-exporter/tasks/setup_container.yml b/roles/ceph-node-exporter/tasks/setup_container.yml index 09035e28f..a72e5c20c 100644 --- a/roles/ceph-node-exporter/tasks/setup_container.yml +++ b/roles/ceph-node-exporter/tasks/setup_container.yml @@ -10,30 +10,25 @@ state: stopped failed_when: false -- name: start docker container - docker_container: - name: node-exporter - image: "{{ node_exporter_container_image }}" - state: started - command: - - '--path.procfs=/host/proc' - - '--path.sysfs=/host/sys' - - '--no-collector.timex' - # restart to allow updates - restart: true - restart_policy: no - force_kill: yes - detach: true - volumes: - - '/proc:/host/proc:ro' - - '/sys:/host/sys:ro' - network_mode: host - keep_volumes: true - pull: true +# Make sure we re-create the container +- name: remove old node-exporter container + command: "{{ container_binary }} rm -f node-exporter" + changed_when: false + failed_when: false + +- name: start node-exporter container + shell: | + {{ container_binary }} run --detach --name node-exporter \ + -v /proc:/host/proc:ro -v /sys:/host/sys:ro \ + --net=host \ + {{ node_exporter_container_image }} \ + '--path.procfs=/host/proc' \ + '--path.sysfs=/host/sys' \ + '--no-collector.timex' notify: restart node-exporter service - name: ship systemd service - copy: + template: src: node_exporter.service dest: "/etc/systemd/system/" owner: root diff --git a/roles/ceph-node-exporter/files/node_exporter.service b/roles/ceph-node-exporter/templates/node_exporter.service similarity index 72% rename from roles/ceph-node-exporter/files/node_exporter.service rename to roles/ceph-node-exporter/templates/node_exporter.service index ebf57b162..489ba7275 100644 --- a/roles/ceph-node-exporter/files/node_exporter.service +++ b/roles/ceph-node-exporter/templates/node_exporter.service @@ -2,15 +2,17 @@ # overwritten. [Unit] Description=Node Exporter +{% if container_binary == 'docker' %} After=docker.service +{% endif %} [Service] EnvironmentFile=-/etc/environment -ExecStart=/usr/bin/docker start --attach node-exporter +ExecStart=/usr/bin/{{ container_binary }} start --attach node-exporter # Make sure the cfg80211 is loaded before running the container, the node # exporter needs this module loaded to test for presence of wi-fi devices ExecStartPre=/usr/sbin/modprobe cfg80211 -ExecStop=-/usr/bin/docker stop node-exporter +ExecStop=-/usr/bin/{{ container_binary }} stop node-exporter Restart=always RestartSec=10s TimeoutStartSec=120 diff --git a/roles/ceph-prometheus/defaults/main.yml b/roles/ceph-prometheus/defaults/main.yml index 4e92142d0..e1fc51ce0 100644 --- a/roles/ceph-prometheus/defaults/main.yml +++ b/roles/ceph-prometheus/defaults/main.yml @@ -6,7 +6,7 @@ prometheus_container_cpu_cores: 2 prometheus_container_memory: 4 prometheus_data_dir: /var/lib/prometheus prometheus_conf_dir: /etc/prometheus -prometheus_user_id: '65534' # This is the UID used by the prom/prometheus docker image +prometheus_user_id: '65534' # This is the UID used by the prom/prometheus container image alertmanager_container_image: prom/alertmanager:latest alertmanager_container_cpu_period: 100000 diff --git a/roles/ceph-prometheus/tasks/setup_container.yml b/roles/ceph-prometheus/tasks/setup_container.yml index 2574ab4f8..fddbed7d4 100644 --- a/roles/ceph-prometheus/tasks/setup_container.yml +++ b/roles/ceph-prometheus/tasks/setup_container.yml @@ -10,35 +10,25 @@ state: stopped failed_when: false +# Make sure we re-create the container +- name: remove old alertmanager container + command: "{{ container_binary }} rm -f alertmanager" + changed_when: false + failed_when: false + - name: start alertmanager container - docker_container: - name: alertmanager - image: "{{ alertmanager_container_image }}" - state: started - command: - - '--config.file=/etc/alertmanager/alertmanager.yml' - - '--storage.path=/alertmanager' - # restart to allow updates - restart: true - restart_policy: no - force_kill: yes - published_ports: '9093:9093' - detach: true - volumes: - - "{{ alertmanager_conf_dir }}:/etc/alertmanager:Z" - - "{{ alertmanager_data_dir }}:/alertmanager:Z" - networks: - - name: "{{ dashboard_network_name }}" - keep_volumes: true - pull: true - cpu_period: "{{ alertmanager_container_cpu_period }}" - # As of ansible-2.5.2, this module doesn't support the equivalent of the - # --cpus flag, so we must use period/quota for now - cpu_quota: "{{ alertmanager_container_cpu_period * alertmanager_container_cpu_cores }}" - #memory: 0 - #memory_swap: 0 - memory: "{{ alertmanager_container_memory }}GB" - memory_swap: "{{ alertmanager_container_memory * 2 }}GB" + shell: | + {{ container_binary }} run --detach --name alertmanager \ + -v "{{ alertmanager_conf_dir }}:/etc/alertmanager:Z" \ + -v "{{ alertmanager_data_dir }}:/alertmanager:Z" \ + "--net=host" \ + "--cpu-period={{ alertmanager_container_cpu_period }}" \ + "--cpu-quota={{ alertmanager_container_cpu_period * alertmanager_container_cpu_cores }}" \ + "--memory={{ alertmanager_container_memory }}GB" \ + "--memory-swap={{ alertmanager_container_memory * 2 }}GB" \ + "{{ alertmanager_container_image }}" \ + "--config.file=/etc/alertmanager/alertmanager.yml" \ + "--storage.path=/alertmanager" notify: service handler - name: make sure the prometheus service is down @@ -47,41 +37,31 @@ state: stopped failed_when: false -- name: start prometheus docker container - docker_container: - name: prometheus - image: "{{ prometheus_container_image }}" - state: started - command: - - '--config.file=/etc/prometheus/prometheus.yml' - - '--storage.tsdb.path=/prometheus' - - '--web.external-url=http://{{ inventory_hostname }}:9090/' - # restart to allow updates - restart: true - restart_policy: no - force_kill: yes - published_ports: '9090:9090' - detach: true - volumes: - - "{{ prometheus_conf_dir }}:/etc/prometheus:Z" - - "{{ prometheus_data_dir }}:/prometheus:Z" - networks: - - name: "{{ dashboard_network_name }}" - user: "{{ prometheus_user_id }}" - keep_volumes: true - pull: true - cpu_period: "{{ prometheus_container_cpu_period }}" - # As of ansible-2.5.2, this module doesn't support the equivalent of the - # --cpus flag, so we must use period/quota for now - cpu_quota: "{{ prometheus_container_cpu_period * prometheus_container_cpu_cores }}" - #memory: 0 - #memory_swap: 0 - memory: "{{ prometheus_container_memory }}GB" - memory_swap: "{{ prometheus_container_memory * 2 }}GB" +# Make sure we re-create the container +- name: remove old prometheus container + command: "{{ container_binary }} rm -f prometheus" + changed_when: false + failed_when: false + +- name: start prometheus container + shell: | + {{ container_binary }} run --detach --name prometheus \ + -v "{{ prometheus_conf_dir }}:/etc/prometheus:Z" \ + -v "{{ prometheus_data_dir }}:/prometheus:Z" \ + "--net=host" \ + "--user={{ prometheus_user_id }}" \ + "--cpu-period={{ prometheus_container_cpu_period }}" \ + "--cpu-quota={{ prometheus_container_cpu_period * prometheus_container_cpu_cores }}" \ + "--memory={{ prometheus_container_memory }}GB" \ + "--memory-swap={{ prometheus_container_memory * 2 }}GB" \ + "{{ prometheus_container_image }}" \ + "--config.file=/etc/prometheus/prometheus.yml" \ + "--storage.tsdb.path=/prometheus" \ + "--web.external-url=http://{{ inventory_hostname }}:9090/" notify: service handler - name: ship systemd services - copy: + template: src: "{{ item }}" dest: "/etc/systemd/system/" owner: root diff --git a/roles/ceph-prometheus/files/alertmanager.service b/roles/ceph-prometheus/templates/alertmanager.service similarity index 62% rename from roles/ceph-prometheus/files/alertmanager.service rename to roles/ceph-prometheus/templates/alertmanager.service index 2683c231f..80eb573f3 100644 --- a/roles/ceph-prometheus/files/alertmanager.service +++ b/roles/ceph-prometheus/templates/alertmanager.service @@ -2,12 +2,14 @@ # overwritten. [Unit] Description=alertmanager +{% if container_binary == 'docker' %} After=docker.service +{% endif %} [Service] EnvironmentFile=-/etc/environment -ExecStart=/usr/bin/docker start --attach alertmanager -ExecStop=/usr/bin/docker stop alertmanager +ExecStart=/usr/bin/{{ container_binary }} start --attach alertmanager +ExecStop=/usr/bin/{{ container_binary }} stop alertmanager Restart=always RestartSec=10s TimeoutStartSec=120 diff --git a/roles/ceph-prometheus/files/prometheus.service b/roles/ceph-prometheus/templates/prometheus.service similarity index 62% rename from roles/ceph-prometheus/files/prometheus.service rename to roles/ceph-prometheus/templates/prometheus.service index 147093542..6971eadfc 100644 --- a/roles/ceph-prometheus/files/prometheus.service +++ b/roles/ceph-prometheus/templates/prometheus.service @@ -2,12 +2,14 @@ # overwritten. [Unit] Description=prometheus +{% if container_binary == 'docker' %} After=docker.service +{% endif %} [Service] EnvironmentFile=-/etc/environment -ExecStart=/usr/bin/docker start --attach prometheus -ExecStop=/usr/bin/docker stop prometheus +ExecStart=/usr/bin/{{ container_binary }} start --attach prometheus +ExecStop=/usr/bin/{{ container_binary }} stop prometheus Restart=always RestartSec=10s TimeoutStartSec=120 diff --git a/roles/ceph-prometheus/templates/prometheus.yml b/roles/ceph-prometheus/templates/prometheus.yml index 70262e4ed..71dcbb6a1 100644 --- a/roles/ceph-prometheus/templates/prometheus.yml +++ b/roles/ceph-prometheus/templates/prometheus.yml @@ -44,4 +44,4 @@ alerting: alertmanagers: - scheme: http static_configs: - - targets: ['alertmanager:9093'] + - targets: ['{{ groups["grafana-server"][0] }}:9093']