[WIP]使用Prometheus监控kubernetes集群

pull/147/head
Jimmy Song 2017-09-25 21:41:08 +08:00
parent cf3001e2a5
commit 5b14e81b7c
111 changed files with 6338 additions and 153 deletions

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,67 @@
apiVersion: batch/v1
kind: Job
metadata:
name: grafana-import-dashboards
namespace: monitoring
labels:
app: grafana
component: import-dashboards
spec:
template:
metadata:
name: grafana-import-dashboards
labels:
app: grafana
component: import-dashboards
annotations:
pod.beta.kubernetes.io/init-containers: '[
{
"name": "wait-for-endpoints",
"image": "sz-pg-oam-docker-hub-001.tendcloud.com/library/giantswarm-tiny-tools",
"imagePullPolicy": "IfNotPresent",
"command": ["fish", "-c", "echo \"waiting for endpoints...\"; while true; set endpoints (curl -s --cacert /var/run/secrets/kubernetes.io/serviceaccount/ca.crt --header \"Authorization: Bearer \"(cat /var/run/secrets/kubernetes.io/serviceaccount/token) https://kubernetes.default/api/v1/namespaces/monitoring/endpoints/grafana); echo $endpoints | jq \".\"; if test (echo $endpoints | jq -r \".subsets[]?.addresses // [] | length\") -gt 0; exit 0; end; echo \"waiting...\";sleep 1; end"],
"args": ["monitoring", "grafana"]
}
]'
spec:
serviceAccountName: prometheus-k8s
containers:
- name: grafana-import-dashboards
image: sz-pg-oam-docker-hub-001.tendcloud.com/library/giantswarm-tiny-tools
command: ["/bin/sh", "-c"]
workingDir: /opt/grafana-import-dashboards
args:
- >
for file in *-datasource.json ; do
if [ -e "$file" ] ; then
echo "importing $file" &&
curl --silent --fail --show-error \
--request POST http://admin:admin@grafana:3000/api/datasources \
--header "Content-Type: application/json" \
--data-binary "@$file" ;
echo "" ;
fi
done ;
for file in *-dashboard.json ; do
if [ -e "$file" ] ; then
echo "importing $file" &&
( echo '{"dashboard":'; \
cat "$file"; \
echo ',"overwrite":true,"inputs":[{"name":"DS_PROMETHEUS","type":"datasource","pluginId":"prometheus","value":"prometheus"}]}' ) \
| jq -c '.' \
| curl --silent --fail --show-error \
--request POST http://admin:admin@grafana:3000/api/dashboards/import \
--header "Content-Type: application/json" \
--data-binary "@-" ;
echo "" ;
fi
done
volumeMounts:
- name: config-volume
mountPath: /opt/grafana-import-dashboards
restartPolicy: Never
volumes:
- name: config-volume
configMap:
name: grafana-import-dashboards

View File

@ -0,0 +1,40 @@
2017-09-25T11:53:14.559200871Z E0925 11:53:14.558983 1 reflector.go:201] k8s.io/kube-state-metrics/collectors/statefulset.go:68: Failed to list *v1beta1.StatefulSet: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list statefulsets.apps at the cluster scope. (get statefulsets.apps)
2017-09-25T11:53:14.560711186Z E0925 11:53:14.560539 1 reflector.go:201] k8s.io/kube-state-metrics/collectors/persistentvolumeclaim.go:60: Failed to list *v1.PersistentVolumeClaim: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list persistentvolumeclaims at the cluster scope. (get persistentvolumeclaims)
2017-09-25T11:53:14.561043368Z E0925 11:53:14.560920 1 reflector.go:201] k8s.io/kube-state-metrics/collectors/cronjob.go:86: Failed to list *v2alpha1.CronJob: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list cronjobs.batch at the cluster scope. (get cronjobs.batch)
2017-09-25T11:53:14.56211475Z E0925 11:53:14.561906 1 reflector.go:201] k8s.io/kube-state-metrics/collectors/job.go:106: Failed to list *v1.Job: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list jobs.batch at the cluster scope. (get jobs.batch)
2017-09-25T11:53:15.560928538Z E0925 11:53:15.560732 1 reflector.go:201] k8s.io/kube-state-metrics/collectors/statefulset.go:68: Failed to list *v1beta1.StatefulSet: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list statefulsets.apps at the cluster scope. (get statefulsets.apps)
2017-09-25T11:53:15.562265859Z E0925 11:53:15.562102 1 reflector.go:201] k8s.io/kube-state-metrics/collectors/persistentvolumeclaim.go:60: Failed to list *v1.PersistentVolumeClaim: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list persistentvolumeclaims at the cluster scope. (get persistentvolumeclaims)
2017-09-25T11:53:15.563239559Z E0925 11:53:15.563067 1 reflector.go:201] k8s.io/kube-state-metrics/collectors/cronjob.go:86: Failed to list *v2alpha1.CronJob: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list cronjobs.batch at the cluster scope. (get cronjobs.batch)
2017-09-25T11:53:15.564390281Z E0925 11:53:15.564196 1 reflector.go:201] k8s.io/kube-state-metrics/collectors/job.go:106: Failed to list *v1.Job: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list jobs.batch at the cluster scope. (get jobs.batch)
2017-09-25T11:53:16.562666898Z E0925 11:53:16.562450 1 reflector.go:201] k8s.io/kube-state-metrics/collectors/statefulset.go:68: Failed to list *v1beta1.StatefulSet: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list statefulsets.apps at the cluster scope. (get statefulsets.apps)
2017-09-25T11:53:16.563807986Z E0925 11:53:16.563638 1 reflector.go:201] k8s.io/kube-state-metrics/collectors/persistentvolumeclaim.go:60: Failed to list *v1.PersistentVolumeClaim: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list persistentvolumeclaims at the cluster scope. (get persistentvolumeclaims)
2017-09-25T11:53:16.564821972Z E0925 11:53:16.564628 1 reflector.go:201] k8s.io/kube-state-metrics/collectors/cronjob.go:86: Failed to list *v2alpha1.CronJob: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list cronjobs.batch at the cluster scope. (get cronjobs.batch)
2017-09-25T11:53:16.565848893Z E0925 11:53:16.565669 1 reflector.go:201] k8s.io/kube-state-metrics/collectors/job.go:106: Failed to list *v1.Job: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list jobs.batch at the cluster scope. (get jobs.batch)
2017-09-25T11:53:17.56438821Z E0925 11:53:17.564155 1 reflector.go:201] k8s.io/kube-state-metrics/collectors/statefulset.go:68: Failed to list *v1beta1.StatefulSet: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list statefulsets.apps at the cluster scope. (get statefulsets.apps)
2017-09-25T11:53:17.565381358Z E0925 11:53:17.565189 1 reflector.go:201] k8s.io/kube-state-metrics/collectors/persistentvolumeclaim.go:60: Failed to list *v1.PersistentVolumeClaim: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list persistentvolumeclaims at the cluster scope. (get persistentvolumeclaims)
2017-09-25T11:53:17.566231354Z E0925 11:53:17.566131 1 reflector.go:201] k8s.io/kube-state-metrics/collectors/cronjob.go:86: Failed to list *v2alpha1.CronJob: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list cronjobs.batch at the cluster scope. (get cronjobs.batch)
2017-09-25T11:53:17.567286798Z E0925 11:53:17.567131 1 reflector.go:201] k8s.io/kube-state-metrics/collectors/job.go:106: Failed to list *v1.Job: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list jobs.batch at the cluster scope. (get jobs.batch)
2017-09-25T11:53:18.570368569Z E0925 11:53:18.570150 1 reflector.go:201] k8s.io/kube-state-metrics/collectors/job.go:106: Failed to list *v1.Job: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list jobs.batch at the cluster scope. (get jobs.batch)
2017-09-25T11:53:18.570406501Z E0925 11:53:18.570163 1 reflector.go:201] k8s.io/kube-state-metrics/collectors/cronjob.go:86: Failed to list *v2alpha1.CronJob: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list cronjobs.batch at the cluster scope. (get cronjobs.batch)
2017-09-25T11:53:18.570413661Z E0925 11:53:18.570184 1 reflector.go:201] k8s.io/kube-state-metrics/collectors/persistentvolumeclaim.go:60: Failed to list *v1.PersistentVolumeClaim: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list persistentvolumeclaims at the cluster scope. (get persistentvolumeclaims)
2017-09-25T11:53:18.57041935Z E0925 11:53:18.570218 1 reflector.go:201] k8s.io/kube-state-metrics/collectors/statefulset.go:68: Failed to list *v1beta1.StatefulSet: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list statefulsets.apps at the cluster scope. (get statefulsets.apps)
2017-09-25T11:53:19.57212411Z E0925 11:53:19.571840 1 reflector.go:201] k8s.io/kube-state-metrics/collectors/job.go:106: Failed to list *v1.Job: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list jobs.batch at the cluster scope. (get jobs.batch)
2017-09-25T11:53:19.573109252Z E0925 11:53:19.572911 1 reflector.go:201] k8s.io/kube-state-metrics/collectors/cronjob.go:86: Failed to list *v2alpha1.CronJob: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list cronjobs.batch at the cluster scope. (get cronjobs.batch)
2017-09-25T11:53:19.574044784Z E0925 11:53:19.573810 1 reflector.go:201] k8s.io/kube-state-metrics/collectors/persistentvolumeclaim.go:60: Failed to list *v1.PersistentVolumeClaim: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list persistentvolumeclaims at the cluster scope. (get persistentvolumeclaims)
2017-09-25T11:53:19.575346655Z E0925 11:53:19.575102 1 reflector.go:201] k8s.io/kube-state-metrics/collectors/statefulset.go:68: Failed to list *v1beta1.StatefulSet: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list statefulsets.apps at the cluster scope. (get statefulsets.apps)
2017-09-25T11:53:20.573827161Z E0925 11:53:20.573560 1 reflector.go:201] k8s.io/kube-state-metrics/collectors/job.go:106: Failed to list *v1.Job: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list jobs.batch at the cluster scope. (get jobs.batch)
2017-09-25T11:53:20.574666239Z E0925 11:53:20.574441 1 reflector.go:201] k8s.io/kube-state-metrics/collectors/cronjob.go:86: Failed to list *v2alpha1.CronJob: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list cronjobs.batch at the cluster scope. (get cronjobs.batch)
2017-09-25T11:53:20.57573655Z E0925 11:53:20.575493 1 reflector.go:201] k8s.io/kube-state-metrics/collectors/persistentvolumeclaim.go:60: Failed to list *v1.PersistentVolumeClaim: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list persistentvolumeclaims at the cluster scope. (get persistentvolumeclaims)
2017-09-25T11:53:20.576839576Z E0925 11:53:20.576603 1 reflector.go:201] k8s.io/kube-state-metrics/collectors/statefulset.go:68: Failed to list *v1beta1.StatefulSet: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list statefulsets.apps at the cluster scope. (get statefulsets.apps)
2017-09-25T11:53:21.575665021Z E0925 11:53:21.575429 1 reflector.go:201] k8s.io/kube-state-metrics/collectors/job.go:106: Failed to list *v1.Job: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list jobs.batch at the cluster scope. (get jobs.batch)
2017-09-25T11:53:21.576522006Z E0925 11:53:21.576324 1 reflector.go:201] k8s.io/kube-state-metrics/collectors/cronjob.go:86: Failed to list *v2alpha1.CronJob: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list cronjobs.batch at the cluster scope. (get cronjobs.batch)
2017-09-25T11:53:21.577614983Z E0925 11:53:21.577404 1 reflector.go:201] k8s.io/kube-state-metrics/collectors/persistentvolumeclaim.go:60: Failed to list *v1.PersistentVolumeClaim: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list persistentvolumeclaims at the cluster scope. (get persistentvolumeclaims)
2017-09-25T11:53:21.578577469Z E0925 11:53:21.578373 1 reflector.go:201] k8s.io/kube-state-metrics/collectors/statefulset.go:68: Failed to list *v1beta1.StatefulSet: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list statefulsets.apps at the cluster scope. (get statefulsets.apps)
2017-09-25T11:53:22.577373226Z E0925 11:53:22.577121 1 reflector.go:201] k8s.io/kube-state-metrics/collectors/job.go:106: Failed to list *v1.Job: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list jobs.batch at the cluster scope. (get jobs.batch)
2017-09-25T11:53:22.578267576Z E0925 11:53:22.578043 1 reflector.go:201] k8s.io/kube-state-metrics/collectors/cronjob.go:86: Failed to list *v2alpha1.CronJob: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list cronjobs.batch at the cluster scope. (get cronjobs.batch)
2017-09-25T11:53:22.579199644Z E0925 11:53:22.579002 1 reflector.go:201] k8s.io/kube-state-metrics/collectors/persistentvolumeclaim.go:60: Failed to list *v1.PersistentVolumeClaim: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list persistentvolumeclaims at the cluster scope. (get persistentvolumeclaims)
2017-09-25T11:53:22.580366842Z E0925 11:53:22.580177 1 reflector.go:201] k8s.io/kube-state-metrics/collectors/statefulset.go:68: Failed to list *v1beta1.StatefulSet: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list statefulsets.apps at the cluster scope. (get statefulsets.apps)
2017-09-25T11:53:23.578999887Z E0925 11:53:23.578734 1 reflector.go:201] k8s.io/kube-state-metrics/collectors/job.go:106: Failed to list *v1.Job: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list jobs.batch at the cluster scope. (get jobs.batch)
2017-09-25T11:53:23.58002011Z E0925 11:53:23.579820 1 reflector.go:201] k8s.io/kube-state-metrics/collectors/cronjob.go:86: Failed to list *v2alpha1.CronJob: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list cronjobs.batch at the cluster scope. (get cronjobs.batch)

View File

@ -0,0 +1,4 @@
apiVersion: v1
kind: Namespace
metadata:
name: monitoring

View File

@ -0,0 +1,75 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: prometheus-k8s
namespace: monitoring
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRole
metadata:
name: prometheus
rules:
- apiGroups: [""]
resources: ["nodes", "services", "endpoints", "pods"]
verbs: ["get", "list", "watch"]
- apiGroups: [""]
resources: ["configmaps"]
verbs: ["get"]
- nonResourceURLs: ["/metrics"]
verbs: ["get"]
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: kube-state-metrics
namespace: monitoring
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRoleBinding
metadata:
name: prometheus
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: prometheus
subjects:
- kind: ServiceAccount
name: prometheus-k8s
namespace: monitoring
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRole
metadata:
name: kube-state-metrics
rules:
- apiGroups: [""]
resources: ["nodes","pods","services","resourcequotas","replicationcontrollers","limitranges"]
verbs: ["list", "watch"]
- apiGroups: ["extensions"]
resources: ["daemonsets","deployments","replicasets"]
verbs: ["list", "watch"]
- apiGroups: ["batch/v1"]
resources: ["job"]
verbs: ["list", "watch"]
- apiGroups: ["v1"]
resources: ["persistentvolumeclaim"]
verbs: ["list", "watch"]
- apiGroups: ["apps"]
resources: ["statefulset"]
verbs: ["list", "watch"]
- apiGroups: ["batch/v2alpha1"]
resources: ["cronjob"]
verbs: ["list", "watch"]
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRoleBinding
metadata:
name: kube-state-metrics
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: kube-state-metrics
subjects:
- kind: ServiceAccount
name: kube-state-metrics
namespace: monitoring

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,19 @@
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
name: test
namespace: monitoring
labels:
app: test
spec:
replicas: 1
template:
metadata:
labels:
app: test
spec:
serviceAccountName: prometheus-k8s
containers:
- image: sz-pg-oam-docker-hub-001.tendcloud.com/library/centos:7.2.1511
name: test
imagePullPolicy: IfNotPresent

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

Some files were not shown because too many files have changed in this diff Show More