989 lines
39 KiB
YAML
989 lines
39 KiB
YAML
apiVersion: v1
|
|
data:
|
|
default.tmpl: |
|
|
{{ define "__alertmanager" }}AlertManager{{ end }}
|
|
{{ define "__alertmanagerURL" }}{{ .ExternalURL }}/#/alerts?receiver={{ .Receiver }}{{ end }}
|
|
|
|
{{ define "__subject" }}[{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}] {{ .GroupLabels.SortedPairs.Values | join " " }} {{ if gt (len .CommonLabels) (len .GroupLabels) }}({{ with .CommonLabels.Remove .GroupLabels.Names }}{{ .Values | join " " }}{{ end }}){{ end }}{{ end }}
|
|
{{ define "__description" }}{{ end }}
|
|
|
|
{{ define "__text_alert_list" }}{{ range . }}Labels:
|
|
{{ range .Labels.SortedPairs }} - {{ .Name }} = {{ .Value }}
|
|
{{ end }}Annotations:
|
|
{{ range .Annotations.SortedPairs }} - {{ .Name }} = {{ .Value }}
|
|
{{ end }}Source: {{ .GeneratorURL }}
|
|
{{ end }}{{ end }}
|
|
|
|
|
|
{{ define "slack.default.title" }}{{ template "__subject" . }}{{ end }}
|
|
{{ define "slack.default.username" }}{{ template "__alertmanager" . }}{{ end }}
|
|
{{ define "slack.default.fallback" }}{{ template "slack.default.title" . }} | {{ template "slack.default.titlelink" . }}{{ end }}
|
|
{{ define "slack.default.pretext" }}{{ end }}
|
|
{{ define "slack.default.titlelink" }}{{ template "__alertmanagerURL" . }}{{ end }}
|
|
{{ define "slack.default.iconemoji" }}{{ end }}
|
|
{{ define "slack.default.iconurl" }}{{ end }}
|
|
{{ define "slack.default.text" }}{{ end }}
|
|
|
|
|
|
{{ define "hipchat.default.from" }}{{ template "__alertmanager" . }}{{ end }}
|
|
{{ define "hipchat.default.message" }}{{ template "__subject" . }}{{ end }}
|
|
|
|
|
|
{{ define "pagerduty.default.description" }}{{ template "__subject" . }}{{ end }}
|
|
{{ define "pagerduty.default.client" }}{{ template "__alertmanager" . }}{{ end }}
|
|
{{ define "pagerduty.default.clientURL" }}{{ template "__alertmanagerURL" . }}{{ end }}
|
|
{{ define "pagerduty.default.instances" }}{{ template "__text_alert_list" . }}{{ end }}
|
|
|
|
|
|
{{ define "opsgenie.default.message" }}{{ template "__subject" . }}{{ end }}
|
|
{{ define "opsgenie.default.description" }}{{ .CommonAnnotations.SortedPairs.Values | join " " }}
|
|
{{ if gt (len .Alerts.Firing) 0 -}}
|
|
Alerts Firing:
|
|
{{ template "__text_alert_list" .Alerts.Firing }}
|
|
{{- end }}
|
|
{{ if gt (len .Alerts.Resolved) 0 -}}
|
|
Alerts Resolved:
|
|
{{ template "__text_alert_list" .Alerts.Resolved }}
|
|
{{- end }}
|
|
{{- end }}
|
|
{{ define "opsgenie.default.source" }}{{ template "__alertmanagerURL" . }}{{ end }}
|
|
|
|
|
|
{{ define "victorops.default.message" }}{{ template "__subject" . }} | {{ template "__alertmanagerURL" . }}{{ end }}
|
|
{{ define "victorops.default.from" }}{{ template "__alertmanager" . }}{{ end }}
|
|
|
|
|
|
{{ define "email.default.subject" }}{{ template "__subject" . }}{{ end }}
|
|
{{ define "email.default.html" }}
|
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
|
<!--
|
|
Style and HTML derived from https://github.com/mailgun/transactional-email-templates
|
|
|
|
|
|
The MIT License (MIT)
|
|
|
|
Copyright (c) 2014 Mailgun
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
of this software and associated documentation files (the "Software"), to deal
|
|
in the Software without restriction, including without limitation the rights
|
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
copies of the Software, and to permit persons to whom the Software is
|
|
furnished to do so, subject to the following conditions:
|
|
|
|
The above copyright notice and this permission notice shall be included in all
|
|
copies or substantial portions of the Software.
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
SOFTWARE.
|
|
-->
|
|
<html xmlns="http://www.w3.org/1999/xhtml" xmlns="http://www.w3.org/1999/xhtml" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">
|
|
<head style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">
|
|
<meta name="viewport" content="width=device-width" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />
|
|
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />
|
|
<title style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">{{ template "__subject" . }}</title>
|
|
|
|
</head>
|
|
|
|
<body itemscope="" itemtype="http://schema.org/EmailMessage" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; -webkit-font-smoothing: antialiased; -webkit-text-size-adjust: none; height: 100%; line-height: 1.6em; width: 100% !important; background-color: #f6f6f6; margin: 0; padding: 0;" bgcolor="#f6f6f6">
|
|
|
|
<table style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; width: 100%; background-color: #f6f6f6; margin: 0;" bgcolor="#f6f6f6">
|
|
<tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">
|
|
<td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; margin: 0;" valign="top"></td>
|
|
<td width="600" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; display: block !important; max-width: 600px !important; clear: both !important; width: 100% !important; margin: 0 auto; padding: 0;" valign="top">
|
|
<div style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; max-width: 600px; display: block; margin: 0 auto; padding: 0;">
|
|
<table width="100%" cellpadding="0" cellspacing="0" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; border-radius: 3px; background-color: #fff; margin: 0; border: 1px solid #e9e9e9;" bgcolor="#fff">
|
|
<tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">
|
|
<td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 16px; vertical-align: top; color: #fff; font-weight: 500; text-align: center; border-radius: 3px 3px 0 0; background-color: #E6522C; margin: 0; padding: 20px;" align="center" bgcolor="#E6522C" valign="top">
|
|
{{ .Alerts | len }} alert{{ if gt (len .Alerts) 1 }}s{{ end }} for {{ range .GroupLabels.SortedPairs }}
|
|
{{ .Name }}={{ .Value }}
|
|
{{ end }}
|
|
</td>
|
|
</tr>
|
|
<tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">
|
|
<td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; margin: 0; padding: 10px;" valign="top">
|
|
<table width="100%" cellpadding="0" cellspacing="0" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">
|
|
<tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">
|
|
<td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; margin: 0; padding: 0 0 20px;" valign="top">
|
|
<a href="{{ template "__alertmanagerURL" . }}" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; color: #FFF; text-decoration: none; line-height: 2em; font-weight: bold; text-align: center; cursor: pointer; display: inline-block; border-radius: 5px; text-transform: capitalize; background-color: #348eda; margin: 0; border-color: #348eda; border-style: solid; border-width: 10px 20px;">View in {{ template "__alertmanager" . }}</a>
|
|
</td>
|
|
</tr>
|
|
{{ if gt (len .Alerts.Firing) 0 }}
|
|
<tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">
|
|
<td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; margin: 0; padding: 0 0 20px;" valign="top">
|
|
<strong style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">[{{ .Alerts.Firing | len }}] Firing</strong>
|
|
</td>
|
|
</tr>
|
|
{{ end }}
|
|
{{ range .Alerts.Firing }}
|
|
<tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">
|
|
<td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; margin: 0; padding: 0 0 20px;" valign="top">
|
|
<strong style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">Labels</strong><br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />
|
|
{{ range .Labels.SortedPairs }}{{ .Name }} = {{ .Value }}<br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />{{ end }}
|
|
{{ if gt (len .Annotations) 0 }}<strong style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">Annotations</strong><br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />{{ end }}
|
|
{{ range .Annotations.SortedPairs }}{{ .Name }} = {{ .Value }}<br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />{{ end }}
|
|
<a href="{{ .GeneratorURL }}" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; color: #348eda; text-decoration: underline; margin: 0;">Source</a><br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />
|
|
</td>
|
|
</tr>
|
|
{{ end }}
|
|
|
|
{{ if gt (len .Alerts.Resolved) 0 }}
|
|
{{ if gt (len .Alerts.Firing) 0 }}
|
|
<tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">
|
|
<td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; margin: 0; padding: 0 0 20px;" valign="top">
|
|
<br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />
|
|
<hr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />
|
|
<br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />
|
|
</td>
|
|
</tr>
|
|
{{ end }}
|
|
<tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">
|
|
<td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; margin: 0; padding: 0 0 20px;" valign="top">
|
|
<strong style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">[{{ .Alerts.Resolved | len }}] Resolved</strong>
|
|
</td>
|
|
</tr>
|
|
{{ end }}
|
|
{{ range .Alerts.Resolved }}
|
|
<tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">
|
|
<td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; margin: 0; padding: 0 0 20px;" valign="top">
|
|
<strong style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">Labels</strong><br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />
|
|
{{ range .Labels.SortedPairs }}{{ .Name }} = {{ .Value }}<br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />{{ end }}
|
|
{{ if gt (len .Annotations) 0 }}<strong style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">Annotations</strong><br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />{{ end }}
|
|
{{ range .Annotations.SortedPairs }}{{ .Name }} = {{ .Value }}<br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />{{ end }}
|
|
<a href="{{ .GeneratorURL }}" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; color: #348eda; text-decoration: underline; margin: 0;">Source</a><br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />
|
|
</td>
|
|
</tr>
|
|
{{ end }}
|
|
</table>
|
|
</td>
|
|
</tr>
|
|
</table>
|
|
|
|
<div style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; width: 100%; clear: both; color: #999; margin: 0; padding: 20px;">
|
|
<table width="100%" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">
|
|
<tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">
|
|
<td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 12px; vertical-align: top; text-align: center; color: #999; margin: 0; padding: 0 0 20px;" align="center" valign="top"><a href="{{ .ExternalURL }}" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 12px; color: #999; text-decoration: underline; margin: 0;">Sent by {{ template "__alertmanager" . }}</a></td>
|
|
</tr>
|
|
</table>
|
|
</div></div>
|
|
</td>
|
|
<td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; margin: 0;" valign="top"></td>
|
|
</tr>
|
|
</table>
|
|
|
|
</body>
|
|
</html>
|
|
|
|
{{ end }}
|
|
|
|
{{ define "pushover.default.title" }}{{ template "__subject" . }}{{ end }}
|
|
{{ define "pushover.default.message" }}{{ .CommonAnnotations.SortedPairs.Values | join " " }}
|
|
{{ if gt (len .Alerts.Firing) 0 }}
|
|
Alerts Firing:
|
|
{{ template "__text_alert_list" .Alerts.Firing }}
|
|
{{ end }}
|
|
{{ if gt (len .Alerts.Resolved) 0 }}
|
|
Alerts Resolved:
|
|
{{ template "__text_alert_list" .Alerts.Resolved }}
|
|
{{ end }}
|
|
{{ end }}
|
|
{{ define "pushover.default.url" }}{{ template "__alertmanagerURL" . }}{{ end }}
|
|
slack.tmpl: |
|
|
{{ define "slack.devops.text" }}
|
|
{{range .Alerts}}{{.Annotations.DESCRIPTION}}
|
|
{{end}}
|
|
{{ end }}
|
|
kind: ConfigMap
|
|
metadata:
|
|
creationTimestamp: null
|
|
name: alertmanager-templates
|
|
namespace: monitoring
|
|
---
|
|
kind: ConfigMap
|
|
apiVersion: v1
|
|
metadata:
|
|
name: alertmanager
|
|
namespace: monitoring
|
|
data:
|
|
config.yml: |-
|
|
global:
|
|
# ResolveTimeout is the time after which an alert is declared resolved
|
|
# if it has not been updated.
|
|
resolve_timeout: 5m
|
|
|
|
# The smarthost and SMTP sender used for mail notifications.
|
|
smtp_smarthost: 'smtp.gmail.com:587'
|
|
smtp_from: 'foo@bar.com'
|
|
smtp_auth_username: 'foo@bar.com'
|
|
smtp_auth_password: 'barfoo'
|
|
|
|
# The API URL to use for Slack notifications.
|
|
slack_api_url: 'https://hooks.slack.com/services/some/api/token'
|
|
|
|
# # The directory from which notification templates are read.
|
|
templates:
|
|
- '/etc/alertmanager-templates/*.tmpl'
|
|
|
|
# The root route on which each incoming alert enters.
|
|
route:
|
|
|
|
# The labels by which incoming alerts are grouped together. For example,
|
|
# multiple alerts coming in for cluster=A and alertname=LatencyHigh would
|
|
# be batched into a single group.
|
|
|
|
group_by: ['alertname', 'cluster', 'service']
|
|
|
|
# When a new group of alerts is created by an incoming alert, wait at
|
|
# least 'group_wait' to send the initial notification.
|
|
# This way ensures that you get multiple alerts for the same group that start
|
|
# firing shortly after another are batched together on the first
|
|
# notification.
|
|
|
|
group_wait: 30s
|
|
|
|
# When the first notification was sent, wait 'group_interval' to send a batch
|
|
# of new alerts that started firing for that group.
|
|
|
|
group_interval: 5m
|
|
|
|
# If an alert has successfully been sent, wait 'repeat_interval' to
|
|
# resend them.
|
|
|
|
#repeat_interval: 1m
|
|
repeat_interval: 15m
|
|
|
|
# A default receiver
|
|
|
|
# If an alert isn't caught by a route, send it to default.
|
|
receiver: default
|
|
|
|
# All the above attributes are inherited by all child routes and can
|
|
# overwritten on each.
|
|
|
|
# The child route trees.
|
|
routes:
|
|
# Send severity=slack alerts to slack.
|
|
- match:
|
|
severity: slack
|
|
receiver: slack_alert
|
|
# - match:
|
|
# severity: email
|
|
# receiver: email_alert
|
|
|
|
receivers:
|
|
- name: 'default'
|
|
slack_configs:
|
|
- channel: '#alertmanager-test'
|
|
text: '<!channel>{{ template "slack.devops.text" . }}'
|
|
send_resolved: true
|
|
|
|
- name: 'slack_alert'
|
|
slack_configs:
|
|
- channel: '#alertmanager-test'
|
|
send_resolved: true
|
|
---
|
|
apiVersion: extensions/v1beta1
|
|
kind: Deployment
|
|
metadata:
|
|
name: alertmanager
|
|
namespace: monitoring
|
|
spec:
|
|
replicas: 1
|
|
selector:
|
|
matchLabels:
|
|
app: alertmanager
|
|
template:
|
|
metadata:
|
|
name: alertmanager
|
|
labels:
|
|
app: alertmanager
|
|
spec:
|
|
containers:
|
|
- name: alertmanager
|
|
image: sz-pg-oam-docker-hub-001.tendcloud.com/library/prometheus-alertmanager:v0.7.1
|
|
args:
|
|
- '-config.file=/etc/alertmanager/config.yml'
|
|
- '-storage.path=/alertmanager'
|
|
ports:
|
|
- name: alertmanager
|
|
containerPort: 9093
|
|
volumeMounts:
|
|
- name: config-volume
|
|
mountPath: /etc/alertmanager
|
|
- name: templates-volume
|
|
mountPath: /etc/alertmanager-templates
|
|
- name: alertmanager
|
|
mountPath: /alertmanager
|
|
volumes:
|
|
- name: config-volume
|
|
configMap:
|
|
name: alertmanager
|
|
- name: templates-volume
|
|
configMap:
|
|
name: alertmanager-templates
|
|
- name: alertmanager
|
|
emptyDir: {}
|
|
---
|
|
apiVersion: v1
|
|
kind: Service
|
|
metadata:
|
|
annotations:
|
|
prometheus.io/scrape: 'true'
|
|
prometheus.io/path: '/metrics'
|
|
labels:
|
|
name: alertmanager
|
|
name: alertmanager
|
|
namespace: monitoring
|
|
spec:
|
|
selector:
|
|
app: alertmanager
|
|
type: NodePort
|
|
ports:
|
|
- name: alertmanager
|
|
protocol: TCP
|
|
port: 9093
|
|
targetPort: 9093
|
|
---
|
|
apiVersion: extensions/v1beta1
|
|
kind: Deployment
|
|
metadata:
|
|
name: grafana-core
|
|
namespace: monitoring
|
|
labels:
|
|
app: grafana
|
|
component: core
|
|
spec:
|
|
replicas: 1
|
|
template:
|
|
metadata:
|
|
labels:
|
|
app: grafana
|
|
component: core
|
|
spec:
|
|
containers:
|
|
- image: sz-pg-oam-docker-hub-001.tendcloud.com/library/grafana:4.2.0
|
|
name: grafana-core
|
|
imagePullPolicy: IfNotPresent
|
|
# env:
|
|
resources:
|
|
# keep request = limit to keep this container in guaranteed class
|
|
limits:
|
|
cpu: 100m
|
|
memory: 100Mi
|
|
requests:
|
|
cpu: 100m
|
|
memory: 100Mi
|
|
env:
|
|
# The following env variables set up basic auth twith the default admin user and admin password.
|
|
- name: GF_AUTH_BASIC_ENABLED
|
|
value: "true"
|
|
- name: GF_AUTH_ANONYMOUS_ENABLED
|
|
value: "false"
|
|
# - name: GF_AUTH_ANONYMOUS_ORG_ROLE
|
|
# value: Admin
|
|
# does not really work, because of template variables in exported dashboards:
|
|
# - name: GF_DASHBOARDS_JSON_ENABLED
|
|
# value: "true"
|
|
readinessProbe:
|
|
httpGet:
|
|
path: /login
|
|
port: 3000
|
|
# initialDelaySeconds: 30
|
|
# timeoutSeconds: 1
|
|
volumeMounts:
|
|
- name: grafana-persistent-storage
|
|
mountPath: /var
|
|
volumes:
|
|
- name: grafana-persistent-storage
|
|
emptyDir: {}
|
|
---
|
|
apiVersion: batch/v1
|
|
kind: Job
|
|
metadata:
|
|
name: grafana-import-dashboards
|
|
namespace: monitoring
|
|
labels:
|
|
app: grafana
|
|
component: import-dashboards
|
|
spec:
|
|
template:
|
|
metadata:
|
|
name: grafana-import-dashboards
|
|
labels:
|
|
app: grafana
|
|
component: import-dashboards
|
|
annotations:
|
|
pod.beta.kubernetes.io/init-containers: '[
|
|
{
|
|
"name": "wait-for-endpoints",
|
|
"image": "sz-pg-oam-docker-hub-001.tendcloud.com/library/centos:7.2.1511
|
|
"imagePullPolicy": "IfNotPresent",
|
|
"command": ["bash", "-c", "echo \"waiting for endpoints...\"; while true; set endpoints (curl -sX GET -H \"Authorization:bearer `cat /var/run/secrets/kubernetes.io/serviceaccount/token`\" -k https://kubernetes.default/api/v1/namespaces/monitoring/endpoints/grafana); echo $endpoints | jq \".\"; if test (echo $endpoints | jq -r \".subsets[]?.addresses // [] | length\") -gt 0; exit 0; end; echo \"waiting...\";sleep 1; end"],
|
|
"args": ["monitoring", "grafana"]
|
|
}
|
|
]'
|
|
spec:
|
|
serviceAccountName: prometheus-k8s
|
|
containers:
|
|
- name: grafana-import-dashboards
|
|
image: sz-pg-oam-docker-hub-001.tendcloud.com/library/centos:7.2.1511
|
|
command: ["/bin/sh", "-c"]
|
|
workingDir: /opt/grafana-import-dashboards
|
|
args:
|
|
- >
|
|
for file in *-datasource.json ; do
|
|
if [ -e "$file" ] ; then
|
|
echo "importing $file" &&
|
|
curl --silent --fail --show-error \
|
|
--request POST http://admin:admin@grafana:3000/api/datasources \
|
|
--header "Content-Type: application/json" \
|
|
--data-binary "@$file" ;
|
|
echo "" ;
|
|
fi
|
|
done ;
|
|
for file in *-dashboard.json ; do
|
|
if [ -e "$file" ] ; then
|
|
echo "importing $file" &&
|
|
( echo '{"dashboard":'; \
|
|
cat "$file"; \
|
|
echo ',"overwrite":true,"inputs":[{"name":"DS_PROMETHEUS","type":"datasource","pluginId":"prometheus","value":"prometheus"}]}' ) \
|
|
| jq -c '.' \
|
|
| curl --silent --fail --show-error \
|
|
--request POST http://admin:admin@grafana:3000/api/dashboards/import \
|
|
--header "Content-Type: application/json" \
|
|
--data-binary "@-" ;
|
|
echo "" ;
|
|
fi
|
|
done
|
|
|
|
volumeMounts:
|
|
- name: config-volume
|
|
mountPath: /opt/grafana-import-dashboards
|
|
restartPolicy: Never
|
|
volumes:
|
|
- name: config-volume
|
|
configMap:
|
|
name: grafana-import-dashboards
|
|
---
|
|
# apiVersion: extensions/v1beta1
|
|
# kind: Ingress
|
|
# metadata:
|
|
# name: grafana
|
|
# namespace: monitoring
|
|
# spec:
|
|
# rules:
|
|
# - host: <yourchoice>.<cluster-id>.k8s.jimmysong.io
|
|
# http:
|
|
# paths:
|
|
# - path: /
|
|
# backend:
|
|
# serviceName: grafana
|
|
# servicePort: 3000
|
|
---
|
|
apiVersion: v1
|
|
kind: Service
|
|
metadata:
|
|
name: grafana
|
|
namespace: monitoring
|
|
labels:
|
|
app: grafana
|
|
component: core
|
|
spec:
|
|
type: NodePort
|
|
ports:
|
|
- port: 3000
|
|
selector:
|
|
app: grafana
|
|
component: core
|
|
---
|
|
apiVersion: v1
|
|
data:
|
|
prometheus.yaml: |
|
|
global:
|
|
scrape_interval: 10s
|
|
scrape_timeout: 10s
|
|
evaluation_interval: 10s
|
|
rule_files:
|
|
- "/etc/prometheus-rules/*.rules"
|
|
|
|
scrape_configs:
|
|
- job_name: 'kubernetes-apiservers'
|
|
|
|
kubernetes_sd_configs:
|
|
- role: endpoints
|
|
|
|
scheme: https
|
|
tls_config:
|
|
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
|
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
|
relabel_configs:
|
|
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
|
|
action: keep
|
|
regex: default;kubernetes;https
|
|
- job_name: 'kubernetes-nodes'
|
|
scheme: https
|
|
tls_config:
|
|
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
|
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
|
|
|
kubernetes_sd_configs:
|
|
- role: node
|
|
|
|
relabel_configs:
|
|
- action: labelmap
|
|
regex: __meta_kubernetes_node_label_(.+)
|
|
- target_label: __address__
|
|
replacement: kubernetes.default:443
|
|
- source_labels: [__meta_kubernetes_node_name]
|
|
regex: (.+)
|
|
target_label: __metrics_path__
|
|
replacement: /api/v1/nodes/${1}/proxy/metrics
|
|
|
|
- job_name: 'kubernetes-cadvisor'
|
|
scheme: https
|
|
tls_config:
|
|
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
|
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
|
|
|
kubernetes_sd_configs:
|
|
- role: node
|
|
|
|
relabel_configs:
|
|
- action: labelmap
|
|
regex: __meta_kubernetes_node_label_(.+)
|
|
- target_label: __address__
|
|
replacement: kubernetes.default:443
|
|
- source_labels: [__meta_kubernetes_node_name]
|
|
regex: (.+)
|
|
target_label: __metrics_path__
|
|
replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
|
|
|
|
- job_name: 'kubernetes-service-endpoints'
|
|
|
|
kubernetes_sd_configs:
|
|
- role: endpoints
|
|
|
|
relabel_configs:
|
|
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
|
|
action: keep
|
|
regex: true
|
|
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
|
|
action: replace
|
|
target_label: __scheme__
|
|
regex: (https?)
|
|
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
|
|
action: replace
|
|
target_label: __metrics_path__
|
|
regex: (.+)
|
|
- source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
|
|
action: replace
|
|
target_label: __address__
|
|
regex: ([^:]+)(?::\d+)?;(\d+)
|
|
replacement: $1:$2
|
|
- action: labelmap
|
|
regex: __meta_kubernetes_service_label_(.+)
|
|
- source_labels: [__meta_kubernetes_namespace]
|
|
action: replace
|
|
target_label: kubernetes_namespace
|
|
- source_labels: [__meta_kubernetes_service_name]
|
|
action: replace
|
|
target_label: kubernetes_name
|
|
|
|
- job_name: 'kubernetes-services'
|
|
|
|
metrics_path: /probe
|
|
params:
|
|
module: [http_2xx]
|
|
|
|
kubernetes_sd_configs:
|
|
- role: service
|
|
|
|
relabel_configs:
|
|
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_probe]
|
|
action: keep
|
|
regex: true
|
|
- source_labels: [__address__]
|
|
target_label: __param_target
|
|
- target_label: __address__
|
|
replacement: blackbox-exporter.example.com:9115
|
|
- source_labels: [__param_target]
|
|
target_label: instance
|
|
- action: labelmap
|
|
regex: __meta_kubernetes_service_label_(.+)
|
|
- source_labels: [__meta_kubernetes_namespace]
|
|
target_label: kubernetes_namespace
|
|
- source_labels: [__meta_kubernetes_service_name]
|
|
target_label: kubernetes_name
|
|
|
|
- job_name: 'kubernetes-pods'
|
|
|
|
kubernetes_sd_configs:
|
|
- role: pod
|
|
|
|
relabel_configs:
|
|
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
|
|
action: keep
|
|
regex: true
|
|
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
|
|
action: replace
|
|
target_label: __metrics_path__
|
|
regex: (.+)
|
|
- source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
|
|
action: replace
|
|
regex: ([^:]+)(?::\d+)?;(\d+)
|
|
replacement: $1:$2
|
|
target_label: __address__
|
|
- action: labelmap
|
|
regex: __meta_kubernetes_pod_label_(.+)
|
|
- source_labels: [__meta_kubernetes_namespace]
|
|
action: replace
|
|
target_label: kubernetes_namespace
|
|
- source_labels: [__meta_kubernetes_pod_name]
|
|
action: replace
|
|
target_label: kubernetes_pod_name
|
|
kind: ConfigMap
|
|
metadata:
|
|
creationTimestamp: null
|
|
name: prometheus-core
|
|
namespace: monitoring
|
|
|
|
---
|
|
apiVersion: extensions/v1beta1
|
|
kind: Deployment
|
|
metadata:
|
|
name: prometheus-core
|
|
namespace: monitoring
|
|
labels:
|
|
app: prometheus
|
|
component: core
|
|
spec:
|
|
replicas: 1
|
|
template:
|
|
metadata:
|
|
name: prometheus-main
|
|
labels:
|
|
app: prometheus
|
|
component: core
|
|
spec:
|
|
serviceAccountName: prometheus-k8s
|
|
containers:
|
|
- name: prometheus
|
|
image: sz-pg-oam-docker-hub-001.tendcloud.com/library/prom-prometheus:v1.7.0
|
|
args:
|
|
- '-storage.local.retention=12h'
|
|
- '-storage.local.memory-chunks=500000'
|
|
- '-config.file=/etc/prometheus/prometheus.yaml'
|
|
- '-alertmanager.url=http://alertmanager:9093/'
|
|
ports:
|
|
- name: webui
|
|
containerPort: 9090
|
|
resources:
|
|
requests:
|
|
cpu: 500m
|
|
memory: 500M
|
|
limits:
|
|
cpu: 500m
|
|
memory: 500M
|
|
volumeMounts:
|
|
- name: config-volume
|
|
mountPath: /etc/prometheus
|
|
- name: rules-volume
|
|
mountPath: /etc/prometheus-rules
|
|
volumes:
|
|
- name: config-volume
|
|
configMap:
|
|
name: prometheus-core
|
|
- name: rules-volume
|
|
configMap:
|
|
name: prometheus-rules
|
|
---
|
|
apiVersion: extensions/v1beta1
|
|
kind: Deployment
|
|
metadata:
|
|
name: kube-state-metrics
|
|
namespace: monitoring
|
|
spec:
|
|
replicas: 2
|
|
template:
|
|
metadata:
|
|
labels:
|
|
app: kube-state-metrics
|
|
spec:
|
|
serviceAccountName: kube-state-metrics
|
|
containers:
|
|
- name: kube-state-metrics
|
|
image: sz-pg-oam-docker-hub-001.tendcloud.com/library/kube-state-metrics:v1.0.1
|
|
ports:
|
|
- containerPort: 8080
|
|
---
|
|
# ---
|
|
# apiVersion: rbac.authorization.k8s.io/v1beta1
|
|
# kind: ClusterRoleBinding
|
|
# metadata:
|
|
# name: kube-state-metrics
|
|
# roleRef:
|
|
# apiGroup: rbac.authorization.k8s.io
|
|
# kind: ClusterRole
|
|
# name: kube-state-metrics
|
|
# subjects:
|
|
# - kind: ServiceAccount
|
|
# name: kube-state-metrics
|
|
# namespace: monitoring
|
|
# ---
|
|
# apiVersion: rbac.authorization.k8s.io/v1beta1
|
|
# kind: ClusterRole
|
|
# metadata:
|
|
# name: kube-state-metrics
|
|
# rules:
|
|
# - apiGroups: [""]
|
|
# resources:
|
|
# - nodes
|
|
# - pods
|
|
# - services
|
|
# - resourcequotas
|
|
# - replicationcontrollers
|
|
# - limitranges
|
|
# verbs: ["list", "watch"]
|
|
# - apiGroups: ["extensions"]
|
|
# resources:
|
|
# - daemonsets
|
|
# - deployments
|
|
# - replicasets
|
|
# verbs: ["list", "watch"]
|
|
# ---
|
|
---
|
|
apiVersion: v1
|
|
kind: Service
|
|
metadata:
|
|
annotations:
|
|
prometheus.io/scrape: 'true'
|
|
name: kube-state-metrics
|
|
namespace: monitoring
|
|
labels:
|
|
app: kube-state-metrics
|
|
spec:
|
|
ports:
|
|
- name: kube-state-metrics
|
|
port: 8080
|
|
protocol: TCP
|
|
selector:
|
|
app: kube-state-metrics
|
|
|
|
---
|
|
apiVersion: extensions/v1beta1
|
|
kind: DaemonSet
|
|
metadata:
|
|
name: node-directory-size-metrics
|
|
namespace: monitoring
|
|
annotations:
|
|
description: |
|
|
This `DaemonSet` provides metrics in Prometheus format about disk usage on the nodes.
|
|
The container `read-du` reads in sizes of all directories below /mnt and writes that to `/tmp/metrics`. It only reports directories larger then `100M` for now.
|
|
The other container `caddy` just hands out the contents of that file on request via `http` on `/metrics` at port `9102` which are the defaults for Prometheus.
|
|
These are scheduled on every node in the Kubernetes cluster.
|
|
To choose directories from the node to check, just mount them on the `read-du` container below `/mnt`.
|
|
spec:
|
|
template:
|
|
metadata:
|
|
labels:
|
|
app: node-directory-size-metrics
|
|
annotations:
|
|
prometheus.io/scrape: 'true'
|
|
prometheus.io/port: '9102'
|
|
description: |
|
|
This `Pod` provides metrics in Prometheus format about disk usage on the node.
|
|
The container `read-du` reads in sizes of all directories below /mnt and writes that to `/tmp/metrics`. It only reports directories larger then `100M` for now.
|
|
The other container `caddy` just hands out the contents of that file on request on `/metrics` at port `9102` which are the defaults for Prometheus.
|
|
This `Pod` is scheduled on every node in the Kubernetes cluster.
|
|
To choose directories from the node to check just mount them on `read-du` below `/mnt`.
|
|
spec:
|
|
containers:
|
|
- name: read-du
|
|
image: sz-pg-oam-docker-hub-001.tendcloud.com/library/giantswarm-tiny-tools
|
|
imagePullPolicy: Always
|
|
# FIXME threshold via env var
|
|
# The
|
|
command:
|
|
- fish
|
|
- --command
|
|
- |
|
|
touch /tmp/metrics-temp
|
|
while true
|
|
for directory in (du --bytes --separate-dirs --threshold=100M /mnt)
|
|
echo $directory | read size path
|
|
echo "node_directory_size_bytes{path=\"$path\"} $size" \
|
|
>> /tmp/metrics-temp
|
|
end
|
|
mv /tmp/metrics-temp /tmp/metrics
|
|
sleep 300
|
|
end
|
|
volumeMounts:
|
|
- name: host-fs-var
|
|
mountPath: /mnt/var
|
|
readOnly: true
|
|
- name: metrics
|
|
mountPath: /tmp
|
|
- name: caddy
|
|
image: sz-pg-oam-docker-hub-001.tendcloud.com/library/dockermuenster-caddy:0.9.3
|
|
command:
|
|
- "caddy"
|
|
- "-port=9102"
|
|
- "-root=/var/www"
|
|
ports:
|
|
- containerPort: 9102
|
|
volumeMounts:
|
|
- name: metrics
|
|
mountPath: /var/www
|
|
volumes:
|
|
- name: host-fs-var
|
|
hostPath:
|
|
path: /var
|
|
- name: metrics
|
|
emptyDir:
|
|
medium: Memory
|
|
---
|
|
apiVersion: extensions/v1beta1
|
|
kind: DaemonSet
|
|
metadata:
|
|
name: prometheus-node-exporter
|
|
namespace: monitoring
|
|
labels:
|
|
app: prometheus
|
|
component: node-exporter
|
|
spec:
|
|
template:
|
|
metadata:
|
|
name: prometheus-node-exporter
|
|
labels:
|
|
app: prometheus
|
|
component: node-exporter
|
|
spec:
|
|
containers:
|
|
- image: sz-pg-oam-docker-hub-001.tendcloud.com/library/prom-node-exporter:v0.14.0
|
|
name: prometheus-node-exporter
|
|
ports:
|
|
- name: prom-node-exp
|
|
#^ must be an IANA_SVC_NAME (at most 15 characters, ..)
|
|
containerPort: 9100
|
|
hostPort: 9100
|
|
hostNetwork: true
|
|
hostPID: true
|
|
---
|
|
apiVersion: v1
|
|
kind: Service
|
|
metadata:
|
|
annotations:
|
|
prometheus.io/scrape: 'true'
|
|
name: prometheus-node-exporter
|
|
namespace: monitoring
|
|
labels:
|
|
app: prometheus
|
|
component: node-exporter
|
|
spec:
|
|
clusterIP: None
|
|
ports:
|
|
- name: prometheus-node-exporter
|
|
port: 9100
|
|
protocol: TCP
|
|
selector:
|
|
app: prometheus
|
|
component: node-exporter
|
|
type: ClusterIP
|
|
---
|
|
apiVersion: v1
|
|
data:
|
|
cpu-usage.rules: |
|
|
ALERT NodeCPUUsage
|
|
IF (100 - (avg by (instance) (irate(node_cpu{name="node-exporter",mode="idle"}[5m])) * 100)) > 75
|
|
FOR 2m
|
|
LABELS {
|
|
severity="page"
|
|
}
|
|
ANNOTATIONS {
|
|
SUMMARY = "{{$labels.instance}}: High CPU usage detected",
|
|
DESCRIPTION = "{{$labels.instance}}: CPU usage is above 75% (current value is: {{ $value }})"
|
|
}
|
|
instance-availability.rules: |
|
|
ALERT InstanceDown
|
|
IF up == 0
|
|
FOR 1m
|
|
LABELS { severity = "page" }
|
|
ANNOTATIONS {
|
|
summary = "Instance {{ $labels.instance }} down",
|
|
description = "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minute.",
|
|
}
|
|
low-disk-space.rules: |
|
|
ALERT NodeLowRootDisk
|
|
IF ((node_filesystem_size{mountpoint="/root-disk"} - node_filesystem_free{mountpoint="/root-disk"} ) / node_filesystem_size{mountpoint="/root-disk"} * 100) > 75
|
|
FOR 2m
|
|
LABELS {
|
|
severity="page"
|
|
}
|
|
ANNOTATIONS {
|
|
SUMMARY = "{{$labels.instance}}: Low root disk space",
|
|
DESCRIPTION = "{{$labels.instance}}: Root disk usage is above 75% (current value is: {{ $value }})"
|
|
}
|
|
|
|
ALERT NodeLowDataDisk
|
|
IF ((node_filesystem_size{mountpoint="/data-disk"} - node_filesystem_free{mountpoint="/data-disk"} ) / node_filesystem_size{mountpoint="/data-disk"} * 100) > 75
|
|
FOR 2m
|
|
LABELS {
|
|
severity="page"
|
|
}
|
|
ANNOTATIONS {
|
|
SUMMARY = "{{$labels.instance}}: Low data disk space",
|
|
DESCRIPTION = "{{$labels.instance}}: Data disk usage is above 75% (current value is: {{ $value }})"
|
|
}
|
|
mem-usage.rules: |
|
|
ALERT NodeSwapUsage
|
|
IF (((node_memory_SwapTotal-node_memory_SwapFree)/node_memory_SwapTotal)*100) > 75
|
|
FOR 2m
|
|
LABELS {
|
|
severity="page"
|
|
}
|
|
ANNOTATIONS {
|
|
SUMMARY = "{{$labels.instance}}: Swap usage detected",
|
|
DESCRIPTION = "{{$labels.instance}}: Swap usage usage is above 75% (current value is: {{ $value }})"
|
|
}
|
|
|
|
ALERT NodeMemoryUsage
|
|
IF (((node_memory_MemTotal-node_memory_MemFree-node_memory_Cached)/(node_memory_MemTotal)*100)) > 75
|
|
FOR 2m
|
|
LABELS {
|
|
severity="page"
|
|
}
|
|
ANNOTATIONS {
|
|
SUMMARY = "{{$labels.instance}}: High memory usage detected",
|
|
DESCRIPTION = "{{$labels.instance}}: Memory usage is above 75% (current value is: {{ $value }})"
|
|
}
|
|
kind: ConfigMap
|
|
metadata:
|
|
creationTimestamp: null
|
|
name: prometheus-rules
|
|
namespace: monitoring
|
|
---
|
|
apiVersion: v1
|
|
kind: Service
|
|
metadata:
|
|
name: prometheus
|
|
namespace: monitoring
|
|
labels:
|
|
app: prometheus
|
|
component: core
|
|
annotations:
|
|
prometheus.io/scrape: 'true'
|
|
spec:
|
|
type: NodePort
|
|
ports:
|
|
- port: 9090
|
|
protocol: TCP
|
|
name: webui
|
|
selector:
|
|
app: prometheus
|
|
component: core
|