mirror of https://github.com/easzlab/kubeasz.git
更新 prometheus 告警发送钉钉配置和文档
parent
f19339d7f1
commit
ad3ea3eab5
|
@ -98,18 +98,49 @@ $ helm del --tls grafana --purge
|
|||
## 验证告警
|
||||
|
||||
- 修改`prom-alertsmanager.yaml`文件中邮件告警为有效的配置内容,并使用 helm upgrade更新安装
|
||||
- 查看`prom-alertrules.yaml`文件,确认文件中设置了内存使用超过90%的告警规则
|
||||
- 部署测试应用,并压力测试使其内存超过90%,看是否触发告警并发送告警邮件
|
||||
- 手动临时关闭 master 节点的 kubelet 服务,等待几分钟看是否有告警邮件发送
|
||||
|
||||
``` bash
|
||||
# 创建deploy和service
|
||||
$ kubectl run nginx1 --image=nginx --port=80 --expose --limits='cpu=500m,memory=4Mi'
|
||||
# 在 master 节点运行
|
||||
$ systemctl stop kubelet
|
||||
```
|
||||
|
||||
# 增加负载(可用Ctrl + C 停止)
|
||||
$ kubectl run --rm -it load-generator --image=busybox /bin/sh
|
||||
Hit enter for command prompt
|
||||
$ while true; do wget -q -O- http://nginx1; done;
|
||||
## [可选] 配置钉钉告警
|
||||
|
||||
# 等待约几分钟查看是否有告警
|
||||
- 创建钉钉群,获取群机器人 webhook 地址
|
||||
|
||||
使用钉钉创建群聊以后可以方便设置群机器人,【群设置】-【群机器人】-【添加】-【自定义】-【添加】,然后按提示操作即可,参考 https://open-doc.dingtalk.com/docs/doc.htm?spm=a219a.7629140.0.0.666d4a97eCG7XA&treeId=257&articleId=105735&docType=1
|
||||
|
||||
上述配置好群机器人,获得这个机器人对应的Webhook地址,记录下来,后续配置钉钉告警插件要用,格式如下
|
||||
|
||||
```
|
||||
https://oapi.dingtalk.com/robot/send?access_token=xxxxxxxx
|
||||
```
|
||||
|
||||
- 创建钉钉告警插件,参考 http://theo.im/blog/2017/10/16/release-prometheus-alertmanager-webhook-for-dingtalk/
|
||||
|
||||
``` bash
|
||||
# 编辑修改文件中 access_token=xxxxxx 为上一步你获得的机器人认证 token
|
||||
$ vi /etc/ansible/manifests/prometheus/dingtalk-webhook.yaml
|
||||
# 运行插件
|
||||
$ kubectl apply -f /etc/ansible/manifests/prometheus/dingtalk-webhook.yaml
|
||||
```
|
||||
|
||||
- 修改 alertsmanager 告警配置后,更新 helm prometheus 部署,成功后如上节测试告警发送
|
||||
|
||||
``` bash
|
||||
# 修改 alertsmanager 告警配置
|
||||
$ cd /etc/ansible/manifests/prometheus
|
||||
$ vi prom-alertsmanager.yaml
|
||||
# 增加 receiver dingtalk,然后在 route 配置使用 receiver: dingtalk
|
||||
receivers:
|
||||
- name: dingtalk
|
||||
webhook_configs:
|
||||
- send_resolved: false
|
||||
url: http://webhook-dingtalk.monitoring.svc.cluster.local:8060/dingtalk/webhook1/send
|
||||
# ...
|
||||
# 更新 helm prometheus 部署
|
||||
$ helm upgrade --tls monitor -f prom-settings.yaml -f prom-alertsmanager.yaml -f prom-alertrules.yaml prometheus
|
||||
```
|
||||
|
||||
## 下一步
|
||||
|
|
|
@ -0,0 +1,42 @@
|
|||
---
|
||||
apiVersion: extensions/v1beta1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
labels:
|
||||
run: dingtalk
|
||||
name: webhook-dingtalk
|
||||
namespace: monitoring
|
||||
spec:
|
||||
replicas: 1
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
run: dingtalk
|
||||
spec:
|
||||
containers:
|
||||
- name: dingtalk
|
||||
image: timonwong/prometheus-webhook-dingtalk:v0.3.0
|
||||
imagePullPolicy: IfNotPresent
|
||||
# 设置钉钉群聊自定义机器人后,使用实际 access_token 替换下面 xxxxxx部分
|
||||
args:
|
||||
- --ding.profile=webhook1=https://oapi.dingtalk.com/robot/send?access_token=xxxxxx
|
||||
ports:
|
||||
- containerPort: 8060
|
||||
protocol: TCP
|
||||
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
labels:
|
||||
run: dingtalk
|
||||
name: webhook-dingtalk
|
||||
namespace: monitoring
|
||||
spec:
|
||||
ports:
|
||||
- port: 8060
|
||||
protocol: TCP
|
||||
targetPort: 8060
|
||||
selector:
|
||||
run: dingtalk
|
||||
sessionAffinity: None
|
|
@ -11,11 +11,17 @@ alertmanagerFiles:
|
|||
group_by: ['alertname', 'pod_name']
|
||||
group_wait: 10s
|
||||
group_interval: 5m
|
||||
receiver: AlertMail
|
||||
#receiver: AlertMail
|
||||
receiver: dingtalk
|
||||
repeat_interval: 3h
|
||||
|
||||
receivers:
|
||||
- name: 'AlertMail'
|
||||
email_configs:
|
||||
- to: 'xxxx@163.com'
|
||||
- name: dingtalk
|
||||
webhook_configs:
|
||||
- send_resolved: false
|
||||
# 需要运行插件 dingtalk-webhook.yaml,详情阅读 docs/guide/prometheus.md
|
||||
url: http://webhook-dingtalk.monitoring.svc.cluster.local:8060/dingtalk/webhook1/send
|
||||
|
||||
|
|
Loading…
Reference in New Issue