kubeasz/manifests/prometheus/prom-alertrules.yaml

22 lines
1.1 KiB
YAML

serverFiles:
alerts:
groups:
- name: k8s_alert_rules
rules:
# ALERT when container memory usage exceed 90%
- alert: container_mem_over_90
expr: (sum(container_memory_working_set_bytes{image!="",name=~"^k8s_.*", pod_name!=""}) by (pod_name)) / (sum (container_spec_memory_limit_bytes{image!="",name=~"^k8s_.*", pod_name!=""}) by (pod_name)) > 0.9 and (sum(container_memory_working_set_bytes{image!="",name=~"^k8s_.*", pod_name!=""}) by (pod_name)) / (sum (container_spec_memory_limit_bytes{image!="",name=~"^k8s_.*", pod_name!=""}) by (pod_name)) < 2
for: 2m
annotations:
summary: "{{ $labels.pod_name }}'s memory usage alert"
description: "Memory Usage of Pod {{ $labels.pod_name }} on {{ $labels.kubernetes_io_hostname }} has exceeded 90% for more than 2 minutes."
# ALERT when node is down
- alert: node_down
expr: up == 0
for: 60s
annotations:
summary: "Node {{ $labels.kubernetes_io_hostname }} is down"
description: "Node {{ $labels.kubernetes_io_hostname }} is down"