安装prometheus显示图标

2017-09-26 16:58:22 +08:00 · 2017-09-26 16:58:22 +08:00 · 198ce94364
parent f8d487c75a
commit 198ce94364
11 changed files with 3980 additions and 1961 deletions
--- a/images/kubernetes-prometheus-monitoring.jpg
+++ b/images/kubernetes-prometheus-monitoring.jpg
--- a/manifests/prometheus/grafana-import-dashboards-job.yaml
+++ b/manifests/prometheus/grafana-import-dashboards-job.yaml
@ -1,67 +0,0 @@
-apiVersion: batch/v1
-kind: Job
-metadata:
-  name: grafana-import-dashboards
-  namespace: monitoring
-  labels:
-    app: grafana
-    component: import-dashboards
-spec:
-  template:
-    metadata:
-      name: grafana-import-dashboards
-      labels:
-        app: grafana
-        component: import-dashboards
-      annotations:
-        pod.beta.kubernetes.io/init-containers: '[
-          {
-            "name": "wait-for-endpoints",
-            "image": "sz-pg-oam-docker-hub-001.tendcloud.com/library/giantswarm-tiny-tools",
-            "imagePullPolicy": "IfNotPresent",
-            "command": ["fish", "-c", "echo \"waiting for endpoints...\"; while true; set endpoints (curl -s --cacert /var/run/secrets/kubernetes.io/serviceaccount/ca.crt --header \"Authorization: Bearer \"(cat /var/run/secrets/kubernetes.io/serviceaccount/token) https://kubernetes.default/api/v1/namespaces/monitoring/endpoints/grafana); echo $endpoints | jq \".\"; if test (echo $endpoints | jq -r \".subsets[]?.addresses // [] | length\") -gt 0; exit 0; end; echo \"waiting...\";sleep 1; end"],
-            "args": ["monitoring", "grafana"]
-          }
-        ]'
-    spec:
-      serviceAccountName: prometheus-k8s
-      containers:
-      - name: grafana-import-dashboards
-        image: sz-pg-oam-docker-hub-001.tendcloud.com/library/giantswarm-tiny-tools
-        command: ["/bin/sh", "-c"]
-        workingDir: /opt/grafana-import-dashboards
-        args:
-          - >
-            for file in *-datasource.json ; do
-              if [ -e "$file" ] ; then
-                echo "importing $file" &&
-                curl --silent --fail --show-error \
-                  --request POST http://admin:admin@grafana:3000/api/datasources \
-                  --header "Content-Type: application/json" \
-                  --data-binary "@$file" ;
-                echo "" ;
-              fi
-            done ;
-            for file in *-dashboard.json ; do
-              if [ -e "$file" ] ; then
-                echo "importing $file" &&
-                ( echo '{"dashboard":'; \
-                  cat "$file"; \
-                  echo ',"overwrite":true,"inputs":[{"name":"DS_PROMETHEUS","type":"datasource","pluginId":"prometheus","value":"prometheus"}]}' ) \
-                | jq -c '.' \
-                | curl --silent --fail --show-error \
-                  --request POST http://admin:admin@grafana:3000/api/dashboards/import \
-                  --header "Content-Type: application/json" \
-                  --data-binary "@-" ;
-                echo "" ;
-              fi
-            done
-
-        volumeMounts:
-        - name: config-volume
-          mountPath: /opt/grafana-import-dashboards
-      restartPolicy: Never
-      volumes:
-      - name: config-volume
-        configMap:
-          name: grafana-import-dashboards
--- a/manifests/prometheus/grafana-net-2-dashboard.json
+++ b/manifests/prometheus/grafana-net-2-dashboard.json
@ -0,0 +1,623 @@
+   {
+      "__inputs": [{
+        "name": "DS_PROMETHEUS",
+        "label": "Prometheus",
+        "description": "",
+        "type": "datasource",
+        "pluginId": "prometheus",
+        "pluginName": "Prometheus"
+      }],
+      "__requires": [{
+        "type": "panel",
+        "id": "singlestat",
+        "name": "Singlestat",
+        "version": ""
+      }, {
+        "type": "panel",
+        "id": "text",
+        "name": "Text",
+        "version": ""
+      }, {
+        "type": "panel",
+        "id": "graph",
+        "name": "Graph",
+        "version": ""
+      }, {
+        "type": "grafana",
+        "id": "grafana",
+        "name": "Grafana",
+        "version": "3.1.0"
+      }, {
+        "type": "datasource",
+        "id": "prometheus",
+        "name": "Prometheus",
+        "version": "1.0.0"
+      }],
+      "id": null,
+      "title": "Prometheus Stats",
+      "tags": [],
+      "style": "dark",
+      "timezone": "browser",
+      "editable": true,
+      "hideControls": true,
+      "sharedCrosshair": false,
+      "rows": [{
+        "collapse": false,
+        "editable": true,
+        "height": 178,
+        "panels": [{
+          "cacheTimeout": null,
+          "colorBackground": false,
+          "colorValue": false,
+          "colors": ["rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)"],
+          "datasource": "",
+          "decimals": 1,
+          "editable": true,
+          "error": false,
+          "format": "s",
+          "id": 5,
+          "interval": null,
+          "links": [],
+          "maxDataPoints": 100,
+          "nullPointMode": "connected",
+          "nullText": null,
+          "postfix": "",
+          "postfixFontSize": "50%",
+          "prefix": "",
+          "prefixFontSize": "50%",
+          "span": 3,
+          "sparkline": {
+            "fillColor": "rgba(31, 118, 189, 0.18)",
+            "full": false,
+            "lineColor": "rgb(31, 120, 193)",
+            "show": false
+          },
+          "targets": [{
+            "expr": "(time() - process_start_time_seconds{job=\"prometheus\"})",
+            "intervalFactor": 2,
+            "refId": "A",
+            "step": 4
+          }],
+          "thresholds": "",
+          "title": "Uptime",
+          "type": "singlestat",
+          "valueFontSize": "80%",
+          "valueMaps": [{
+            "op": "=",
+            "text": "N/A",
+            "value": "null"
+          }],
+          "valueName": "current",
+          "mappingTypes": [{
+            "name": "value to text",
+            "value": 1
+          }, {
+            "name": "range to text",
+            "value": 2
+          }],
+          "rangeMaps": [{
+            "from": "null",
+            "to": "null",
+            "text": "N/A"
+          }],
+          "mappingType": 1,
+          "gauge": {
+            "show": false,
+            "minValue": 0,
+            "maxValue": 100,
+            "thresholdMarkers": true,
+            "thresholdLabels": false
+          }
+        }, {
+          "cacheTimeout": null,
+          "colorBackground": false,
+          "colorValue": false,
+          "colors": ["rgba(50, 172, 45, 0.97)", "rgba(237, 129, 40, 0.89)", "rgba(245, 54, 54, 0.9)"],
+          "datasource": "",
+          "editable": true,
+          "error": false,
+          "format": "none",
+          "id": 6,
+          "interval": null,
+          "links": [],
+          "maxDataPoints": 100,
+          "nullPointMode": "connected",
+          "nullText": null,
+          "postfix": "",
+          "postfixFontSize": "50%",
+          "prefix": "",
+          "prefixFontSize": "50%",
+          "span": 3,
+          "sparkline": {
+            "fillColor": "rgba(31, 118, 189, 0.18)",
+            "full": false,
+            "lineColor": "rgb(31, 120, 193)",
+            "show": true
+          },
+          "targets": [{
+            "expr": "prometheus_local_storage_memory_series",
+            "intervalFactor": 2,
+            "refId": "A",
+            "step": 4
+          }],
+          "thresholds": "1,5",
+          "title": "Local Storage Memory Series",
+          "type": "singlestat",
+          "valueFontSize": "70%",
+          "valueMaps": [],
+          "valueName": "current",
+          "mappingTypes": [{
+            "name": "value to text",
+            "value": 1
+          }, {
+            "name": "range to text",
+            "value": 2
+          }],
+          "rangeMaps": [{
+            "from": "null",
+            "to": "null",
+            "text": "N/A"
+          }],
+          "mappingType": 1,
+          "gauge": {
+            "show": false,
+            "minValue": 0,
+            "maxValue": 100,
+            "thresholdMarkers": true,
+            "thresholdLabels": false
+          }
+        }, {
+          "cacheTimeout": null,
+          "colorBackground": false,
+          "colorValue": true,
+          "colors": ["rgba(50, 172, 45, 0.97)", "rgba(237, 129, 40, 0.89)", "rgba(245, 54, 54, 0.9)"],
+          "datasource": "",
+          "editable": true,
+          "error": false,
+          "format": "none",
+          "id": 7,
+          "interval": null,
+          "links": [],
+          "maxDataPoints": 100,
+          "nullPointMode": "connected",
+          "nullText": null,
+          "postfix": "",
+          "postfixFontSize": "50%",
+          "prefix": "",
+          "prefixFontSize": "50%",
+          "span": 3,
+          "sparkline": {
+            "fillColor": "rgba(31, 118, 189, 0.18)",
+            "full": false,
+            "lineColor": "rgb(31, 120, 193)",
+            "show": true
+          },
+          "targets": [{
+            "expr": "prometheus_local_storage_indexing_queue_length",
+            "intervalFactor": 2,
+            "refId": "A",
+            "step": 4
+          }],
+          "thresholds": "500,4000",
+          "title": "Interal Storage Queue Length",
+          "type": "singlestat",
+          "valueFontSize": "70%",
+          "valueMaps": [{
+            "op": "=",
+            "text": "Empty",
+            "value": "0"
+          }],
+          "valueName": "current",
+          "mappingTypes": [{
+            "name": "value to text",
+            "value": 1
+          }, {
+            "name": "range to text",
+            "value": 2
+          }],
+          "rangeMaps": [{
+            "from": "null",
+            "to": "null",
+            "text": "N/A"
+          }],
+          "mappingType": 1,
+          "gauge": {
+            "show": false,
+            "minValue": 0,
+            "maxValue": 100,
+            "thresholdMarkers": true,
+            "thresholdLabels": false
+          }
+        }, {
+          "content": "<img src=\"http://prometheus.io/assets/prometheus_logo_grey.svg\" alt=\"Prometheus logo\" style=\"height: 40px;\">\n<span style=\"font-family: 'Open Sans', 'Helvetica Neue', Helvetica; font-size: 25px;vertical-align: text-top;color: #bbbfc2;margin-left: 10px;\">Prometheus</span>\n\n<p style=\"margin-top: 10px;\">You're using Prometheus, an open-source systems monitoring and alerting toolkit originally built at SoundCloud. For more information, check out the <a href=\"http://www.grafana.org/\">Grafana</a> and <a href=\"http://prometheus.io/\">Prometheus</a> projects.</p>",
+          "editable": true,
+          "error": false,
+          "id": 9,
+          "links": [],
+          "mode": "html",
+          "span": 3,
+          "style": {},
+          "title": "",
+          "transparent": true,
+          "type": "text"
+        }],
+        "title": "New row"
+      }, {
+        "collapse": false,
+        "editable": true,
+        "height": 227,
+        "panels": [{
+          "aliasColors": {
+            "prometheus": "#C15C17",
+            "{instance=\"localhost:9090\",job=\"prometheus\"}": "#C15C17"
+          },
+          "bars": false,
+          "datasource": "",
+          "editable": true,
+          "error": false,
+          "fill": 1,
+          "grid": {
+            "threshold1": null,
+            "threshold1Color": "rgba(216, 200, 27, 0.27)",
+            "threshold2": null,
+            "threshold2Color": "rgba(234, 112, 112, 0.22)"
+          },
+          "id": 3,
+          "legend": {
+            "avg": false,
+            "current": false,
+            "max": false,
+            "min": false,
+            "show": true,
+            "total": false,
+            "values": false
+          },
+          "lines": true,
+          "linewidth": 2,
+          "links": [],
+          "nullPointMode": "connected",
+          "percentage": false,
+          "pointradius": 2,
+          "points": false,
+          "renderer": "flot",
+          "seriesOverrides": [],
+          "span": 9,
+          "stack": false,
+          "steppedLine": false,
+          "targets": [{
+            "expr": "rate(prometheus_local_storage_ingested_samples_total[5m])",
+            "interval": "",
+            "intervalFactor": 2,
+            "legendFormat": "{{job}}",
+            "metric": "",
+            "refId": "A",
+            "step": 2
+          }],
+          "timeFrom": null,
+          "timeShift": null,
+          "title": "Samples ingested (rate-5m)",
+          "tooltip": {
+            "shared": true,
+            "value_type": "cumulative",
+            "ordering": "alphabetical",
+            "msResolution": false
+          },
+          "type": "graph",
+          "yaxes": [{
+            "show": true,
+            "min": null,
+            "max": null,
+            "logBase": 1,
+            "format": "short"
+          }, {
+            "show": true,
+            "min": null,
+            "max": null,
+            "logBase": 1,
+            "format": "short"
+          }],
+          "xaxis": {
+            "show": true
+          }
+        }, {
+          "content": "#### Samples Ingested\nThis graph displays the count of samples ingested by the Prometheus server, as measured over the last 5 minutes, per time series in the range vector. When troubleshooting an issue on IRC or Github, this is often the first stat requested by the Prometheus team. ",
+          "editable": true,
+          "error": false,
+          "id": 8,
+          "links": [],
+          "mode": "markdown",
+          "span": 2.995914043583536,
+          "style": {},
+          "title": "",
+          "transparent": true,
+          "type": "text"
+        }],
+        "title": "New row"
+      }, {
+        "collapse": false,
+        "editable": true,
+        "height": "250px",
+        "panels": [{
+          "aliasColors": {
+            "prometheus": "#F9BA8F",
+            "{instance=\"localhost:9090\",interval=\"5s\",job=\"prometheus\"}": "#F9BA8F"
+          },
+          "bars": false,
+          "datasource": "",
+          "editable": true,
+          "error": false,
+          "fill": 1,
+          "grid": {
+            "threshold1": null,
+            "threshold1Color": "rgba(216, 200, 27, 0.27)",
+            "threshold2": null,
+            "threshold2Color": "rgba(234, 112, 112, 0.22)"
+          },
+          "id": 2,
+          "legend": {
+            "avg": false,
+            "current": false,
+            "max": false,
+            "min": false,
+            "show": true,
+            "total": false,
+            "values": false
+          },
+          "lines": true,
+          "linewidth": 2,
+          "links": [],
+          "nullPointMode": "connected",
+          "percentage": false,
+          "pointradius": 5,
+          "points": false,
+          "renderer": "flot",
+          "seriesOverrides": [],
+          "span": 5,
+          "stack": false,
+          "steppedLine": false,
+          "targets": [{
+            "expr": "rate(prometheus_target_interval_length_seconds_count[5m])",
+            "intervalFactor": 2,
+            "legendFormat": "{{job}}",
+            "refId": "A",
+            "step": 2
+          }],
+          "timeFrom": null,
+          "timeShift": null,
+          "title": "Target Scrapes (last 5m)",
+          "tooltip": {
+            "shared": true,
+            "value_type": "cumulative",
+            "ordering": "alphabetical",
+            "msResolution": false
+          },
+          "type": "graph",
+          "yaxes": [{
+            "show": true,
+            "min": null,
+            "max": null,
+            "logBase": 1,
+            "format": "short"
+          }, {
+            "show": true,
+            "min": null,
+            "max": null,
+            "logBase": 1,
+            "format": "short"
+          }],
+          "xaxis": {
+            "show": true
+          }
+        }, {
+          "aliasColors": {},
+          "bars": false,
+          "datasource": "",
+          "editable": true,
+          "error": false,
+          "fill": 1,
+          "grid": {
+            "threshold1": null,
+            "threshold1Color": "rgba(216, 200, 27, 0.27)",
+            "threshold2": null,
+            "threshold2Color": "rgba(234, 112, 112, 0.22)"
+          },
+          "id": 14,
+          "legend": {
+            "avg": false,
+            "current": false,
+            "max": false,
+            "min": false,
+            "show": true,
+            "total": false,
+            "values": false
+          },
+          "lines": true,
+          "linewidth": 2,
+          "links": [],
+          "nullPointMode": "connected",
+          "percentage": false,
+          "pointradius": 5,
+          "points": false,
+          "renderer": "flot",
+          "seriesOverrides": [],
+          "span": 4,
+          "stack": false,
+          "steppedLine": false,
+          "targets": [{
+            "expr": "prometheus_target_interval_length_seconds{quantile!=\"0.01\", quantile!=\"0.05\"}",
+            "interval": "",
+            "intervalFactor": 2,
+            "legendFormat": "{{quantile}} ({{interval}})",
+            "metric": "",
+            "refId": "A",
+            "step": 2
+          }],
+          "timeFrom": null,
+          "timeShift": null,
+          "title": "Scrape Duration",
+          "tooltip": {
+            "shared": true,
+            "value_type": "cumulative",
+            "ordering": "alphabetical",
+            "msResolution": false
+          },
+          "type": "graph",
+          "yaxes": [{
+            "show": true,
+            "min": null,
+            "max": null,
+            "logBase": 1,
+            "format": "short"
+          }, {
+            "show": true,
+            "min": null,
+            "max": null,
+            "logBase": 1,
+            "format": "short"
+          }],
+          "xaxis": {
+            "show": true
+          }
+        }, {
+          "content": "#### Scrapes\nPrometheus scrapes metrics from instrumented jobs, either directly or via an intermediary push gateway for short-lived jobs. Target scrapes will show how frequently targets are scraped, as measured over the last 5 minutes, per time series in the range vector. Scrape Duration will show how long the scrapes are taking, with percentiles available as series. ",
+          "editable": true,
+          "error": false,
+          "id": 11,
+          "links": [],
+          "mode": "markdown",
+          "span": 3,
+          "style": {},
+          "title": "",
+          "transparent": true,
+          "type": "text"
+        }],
+        "title": "New row"
+      }, {
+        "collapse": false,
+        "editable": true,
+        "height": "250px",
+        "panels": [{
+          "aliasColors": {},
+          "bars": false,
+          "datasource": "",
+          "decimals": null,
+          "editable": true,
+          "error": false,
+          "fill": 1,
+          "grid": {
+            "threshold1": null,
+            "threshold1Color": "rgba(216, 200, 27, 0.27)",
+            "threshold2": null,
+            "threshold2Color": "rgba(234, 112, 112, 0.22)"
+          },
+          "id": 12,
+          "legend": {
+            "alignAsTable": false,
+            "avg": false,
+            "current": false,
+            "hideEmpty": true,
+            "max": false,
+            "min": false,
+            "show": true,
+            "total": false,
+            "values": false
+          },
+          "lines": true,
+          "linewidth": 2,
+          "links": [],
+          "nullPointMode": "connected",
+          "percentage": false,
+          "pointradius": 5,
+          "points": false,
+          "renderer": "flot",
+          "seriesOverrides": [],
+          "span": 9,
+          "stack": false,
+          "steppedLine": false,
+          "targets": [{
+            "expr": "prometheus_evaluator_duration_milliseconds{quantile!=\"0.01\", quantile!=\"0.05\"}",
+            "interval": "",
+            "intervalFactor": 2,
+            "legendFormat": "{{quantile}}",
+            "refId": "A",
+            "step": 2
+          }],
+          "timeFrom": null,
+          "timeShift": null,
+          "title": "Rule Eval Duration",
+          "tooltip": {
+            "shared": true,
+            "value_type": "cumulative",
+            "ordering": "alphabetical",
+            "msResolution": false
+          },
+          "type": "graph",
+          "yaxes": [{
+            "show": true,
+            "min": null,
+            "max": null,
+            "logBase": 1,
+            "format": "percentunit",
+            "label": ""
+          }, {
+            "show": true,
+            "min": null,
+            "max": null,
+            "logBase": 1,
+            "format": "short"
+          }],
+          "xaxis": {
+            "show": true
+          }
+        }, {
+          "content": "#### Rule Evaluation Duration\nThis graph panel plots the duration for all evaluations to execute. The 50th percentile, 90th percentile and 99th percentile are shown as three separate series to help identify outliers that may be skewing the data.",
+          "editable": true,
+          "error": false,
+          "id": 15,
+          "links": [],
+          "mode": "markdown",
+          "span": 3,
+          "style": {},
+          "title": "",
+          "transparent": true,
+          "type": "text"
+        }],
+        "title": "New row"
+      }],
+      "time": {
+        "from": "now-5m",
+        "to": "now"
+      },
+      "timepicker": {
+        "now": true,
+        "refresh_intervals": ["5s", "10s", "30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d"],
+        "time_options": ["5m", "15m", "1h", "6h", "12h", "24h", "2d", "7d", "30d"]
+      },
+      "templating": {
+        "list": []
+      },
+      "annotations": {
+        "list": []
+      },
+      "refresh": false,
+      "schemaVersion": 12,
+      "version": 0,
+      "links": [{
+        "icon": "info",
+        "tags": [],
+        "targetBlank": true,
+        "title": "Grafana Docs",
+        "tooltip": "",
+        "type": "link",
+        "url": "http://www.grafana.org/docs"
+      }, {
+        "icon": "info",
+        "tags": [],
+        "targetBlank": true,
+        "title": "Prometheus Docs",
+        "type": "link",
+        "url": "http://prometheus.io/docs/introduction/overview/"
+      }],
+      "gnetId": 2,
+      "description": "The  official, pre-built Prometheus Stats Dashboard."
+    }
--- a/manifests/prometheus/grafana-net-737-dashboard.json
+++ b/manifests/prometheus/grafana-net-737-dashboard.json
--- a/manifests/prometheus/import-grafana-dashboard.yaml
+++ b/manifests/prometheus/import-grafana-dashboard.yaml
--- a/manifests/prometheus/kube-state-metrics.log
+++ b/manifests/prometheus/kube-state-metrics.log
@ -1,40 +0,0 @@
-
-2017-09-25T11:53:14.559200871Z E0925 11:53:14.558983       1 reflector.go:201] k8s.io/kube-state-metrics/collectors/statefulset.go:68: Failed to list *v1beta1.StatefulSet: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list statefulsets.apps at the cluster scope. (get statefulsets.apps)
-2017-09-25T11:53:14.560711186Z E0925 11:53:14.560539       1 reflector.go:201] k8s.io/kube-state-metrics/collectors/persistentvolumeclaim.go:60: Failed to list *v1.PersistentVolumeClaim: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list persistentvolumeclaims at the cluster scope. (get persistentvolumeclaims)
-2017-09-25T11:53:14.561043368Z E0925 11:53:14.560920       1 reflector.go:201] k8s.io/kube-state-metrics/collectors/cronjob.go:86: Failed to list *v2alpha1.CronJob: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list cronjobs.batch at the cluster scope. (get cronjobs.batch)
-2017-09-25T11:53:14.56211475Z E0925 11:53:14.561906       1 reflector.go:201] k8s.io/kube-state-metrics/collectors/job.go:106: Failed to list *v1.Job: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list jobs.batch at the cluster scope. (get jobs.batch)
-2017-09-25T11:53:15.560928538Z E0925 11:53:15.560732       1 reflector.go:201] k8s.io/kube-state-metrics/collectors/statefulset.go:68: Failed to list *v1beta1.StatefulSet: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list statefulsets.apps at the cluster scope. (get statefulsets.apps)
-2017-09-25T11:53:15.562265859Z E0925 11:53:15.562102       1 reflector.go:201] k8s.io/kube-state-metrics/collectors/persistentvolumeclaim.go:60: Failed to list *v1.PersistentVolumeClaim: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list persistentvolumeclaims at the cluster scope. (get persistentvolumeclaims)
-2017-09-25T11:53:15.563239559Z E0925 11:53:15.563067       1 reflector.go:201] k8s.io/kube-state-metrics/collectors/cronjob.go:86: Failed to list *v2alpha1.CronJob: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list cronjobs.batch at the cluster scope. (get cronjobs.batch)
-2017-09-25T11:53:15.564390281Z E0925 11:53:15.564196       1 reflector.go:201] k8s.io/kube-state-metrics/collectors/job.go:106: Failed to list *v1.Job: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list jobs.batch at the cluster scope. (get jobs.batch)
-2017-09-25T11:53:16.562666898Z E0925 11:53:16.562450       1 reflector.go:201] k8s.io/kube-state-metrics/collectors/statefulset.go:68: Failed to list *v1beta1.StatefulSet: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list statefulsets.apps at the cluster scope. (get statefulsets.apps)
-2017-09-25T11:53:16.563807986Z E0925 11:53:16.563638       1 reflector.go:201] k8s.io/kube-state-metrics/collectors/persistentvolumeclaim.go:60: Failed to list *v1.PersistentVolumeClaim: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list persistentvolumeclaims at the cluster scope. (get persistentvolumeclaims)
-2017-09-25T11:53:16.564821972Z E0925 11:53:16.564628       1 reflector.go:201] k8s.io/kube-state-metrics/collectors/cronjob.go:86: Failed to list *v2alpha1.CronJob: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list cronjobs.batch at the cluster scope. (get cronjobs.batch)
-2017-09-25T11:53:16.565848893Z E0925 11:53:16.565669       1 reflector.go:201] k8s.io/kube-state-metrics/collectors/job.go:106: Failed to list *v1.Job: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list jobs.batch at the cluster scope. (get jobs.batch)
-2017-09-25T11:53:17.56438821Z E0925 11:53:17.564155       1 reflector.go:201] k8s.io/kube-state-metrics/collectors/statefulset.go:68: Failed to list *v1beta1.StatefulSet: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list statefulsets.apps at the cluster scope. (get statefulsets.apps)
-2017-09-25T11:53:17.565381358Z E0925 11:53:17.565189       1 reflector.go:201] k8s.io/kube-state-metrics/collectors/persistentvolumeclaim.go:60: Failed to list *v1.PersistentVolumeClaim: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list persistentvolumeclaims at the cluster scope. (get persistentvolumeclaims)
-2017-09-25T11:53:17.566231354Z E0925 11:53:17.566131       1 reflector.go:201] k8s.io/kube-state-metrics/collectors/cronjob.go:86: Failed to list *v2alpha1.CronJob: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list cronjobs.batch at the cluster scope. (get cronjobs.batch)
-2017-09-25T11:53:17.567286798Z E0925 11:53:17.567131       1 reflector.go:201] k8s.io/kube-state-metrics/collectors/job.go:106: Failed to list *v1.Job: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list jobs.batch at the cluster scope. (get jobs.batch)
-2017-09-25T11:53:18.570368569Z E0925 11:53:18.570150       1 reflector.go:201] k8s.io/kube-state-metrics/collectors/job.go:106: Failed to list *v1.Job: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list jobs.batch at the cluster scope. (get jobs.batch)
-2017-09-25T11:53:18.570406501Z E0925 11:53:18.570163       1 reflector.go:201] k8s.io/kube-state-metrics/collectors/cronjob.go:86: Failed to list *v2alpha1.CronJob: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list cronjobs.batch at the cluster scope. (get cronjobs.batch)
-2017-09-25T11:53:18.570413661Z E0925 11:53:18.570184       1 reflector.go:201] k8s.io/kube-state-metrics/collectors/persistentvolumeclaim.go:60: Failed to list *v1.PersistentVolumeClaim: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list persistentvolumeclaims at the cluster scope. (get persistentvolumeclaims)
-2017-09-25T11:53:18.57041935Z E0925 11:53:18.570218       1 reflector.go:201] k8s.io/kube-state-metrics/collectors/statefulset.go:68: Failed to list *v1beta1.StatefulSet: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list statefulsets.apps at the cluster scope. (get statefulsets.apps)
-2017-09-25T11:53:19.57212411Z E0925 11:53:19.571840       1 reflector.go:201] k8s.io/kube-state-metrics/collectors/job.go:106: Failed to list *v1.Job: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list jobs.batch at the cluster scope. (get jobs.batch)
-2017-09-25T11:53:19.573109252Z E0925 11:53:19.572911       1 reflector.go:201] k8s.io/kube-state-metrics/collectors/cronjob.go:86: Failed to list *v2alpha1.CronJob: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list cronjobs.batch at the cluster scope. (get cronjobs.batch)
-2017-09-25T11:53:19.574044784Z E0925 11:53:19.573810       1 reflector.go:201] k8s.io/kube-state-metrics/collectors/persistentvolumeclaim.go:60: Failed to list *v1.PersistentVolumeClaim: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list persistentvolumeclaims at the cluster scope. (get persistentvolumeclaims)
-2017-09-25T11:53:19.575346655Z E0925 11:53:19.575102       1 reflector.go:201] k8s.io/kube-state-metrics/collectors/statefulset.go:68: Failed to list *v1beta1.StatefulSet: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list statefulsets.apps at the cluster scope. (get statefulsets.apps)
-2017-09-25T11:53:20.573827161Z E0925 11:53:20.573560       1 reflector.go:201] k8s.io/kube-state-metrics/collectors/job.go:106: Failed to list *v1.Job: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list jobs.batch at the cluster scope. (get jobs.batch)
-2017-09-25T11:53:20.574666239Z E0925 11:53:20.574441       1 reflector.go:201] k8s.io/kube-state-metrics/collectors/cronjob.go:86: Failed to list *v2alpha1.CronJob: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list cronjobs.batch at the cluster scope. (get cronjobs.batch)
-2017-09-25T11:53:20.57573655Z E0925 11:53:20.575493       1 reflector.go:201] k8s.io/kube-state-metrics/collectors/persistentvolumeclaim.go:60: Failed to list *v1.PersistentVolumeClaim: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list persistentvolumeclaims at the cluster scope. (get persistentvolumeclaims)
-2017-09-25T11:53:20.576839576Z E0925 11:53:20.576603       1 reflector.go:201] k8s.io/kube-state-metrics/collectors/statefulset.go:68: Failed to list *v1beta1.StatefulSet: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list statefulsets.apps at the cluster scope. (get statefulsets.apps)
-2017-09-25T11:53:21.575665021Z E0925 11:53:21.575429       1 reflector.go:201] k8s.io/kube-state-metrics/collectors/job.go:106: Failed to list *v1.Job: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list jobs.batch at the cluster scope. (get jobs.batch)
-2017-09-25T11:53:21.576522006Z E0925 11:53:21.576324       1 reflector.go:201] k8s.io/kube-state-metrics/collectors/cronjob.go:86: Failed to list *v2alpha1.CronJob: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list cronjobs.batch at the cluster scope. (get cronjobs.batch)
-2017-09-25T11:53:21.577614983Z E0925 11:53:21.577404       1 reflector.go:201] k8s.io/kube-state-metrics/collectors/persistentvolumeclaim.go:60: Failed to list *v1.PersistentVolumeClaim: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list persistentvolumeclaims at the cluster scope. (get persistentvolumeclaims)
-2017-09-25T11:53:21.578577469Z E0925 11:53:21.578373       1 reflector.go:201] k8s.io/kube-state-metrics/collectors/statefulset.go:68: Failed to list *v1beta1.StatefulSet: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list statefulsets.apps at the cluster scope. (get statefulsets.apps)
-2017-09-25T11:53:22.577373226Z E0925 11:53:22.577121       1 reflector.go:201] k8s.io/kube-state-metrics/collectors/job.go:106: Failed to list *v1.Job: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list jobs.batch at the cluster scope. (get jobs.batch)
-2017-09-25T11:53:22.578267576Z E0925 11:53:22.578043       1 reflector.go:201] k8s.io/kube-state-metrics/collectors/cronjob.go:86: Failed to list *v2alpha1.CronJob: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list cronjobs.batch at the cluster scope. (get cronjobs.batch)
-2017-09-25T11:53:22.579199644Z E0925 11:53:22.579002       1 reflector.go:201] k8s.io/kube-state-metrics/collectors/persistentvolumeclaim.go:60: Failed to list *v1.PersistentVolumeClaim: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list persistentvolumeclaims at the cluster scope. (get persistentvolumeclaims)
-2017-09-25T11:53:22.580366842Z E0925 11:53:22.580177       1 reflector.go:201] k8s.io/kube-state-metrics/collectors/statefulset.go:68: Failed to list *v1beta1.StatefulSet: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list statefulsets.apps at the cluster scope. (get statefulsets.apps)
-2017-09-25T11:53:23.578999887Z E0925 11:53:23.578734       1 reflector.go:201] k8s.io/kube-state-metrics/collectors/job.go:106: Failed to list *v1.Job: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list jobs.batch at the cluster scope. (get jobs.batch)
-2017-09-25T11:53:23.58002011Z E0925 11:53:23.579820       1 reflector.go:201] k8s.io/kube-state-metrics/collectors/cronjob.go:86: Failed to list *v2alpha1.CronJob: User "system:serviceaccount:monitoring:kube-state-metrics" cannot list cronjobs.batch at the cluster scope. (get cronjobs.batch)
-
--- a/manifests/prometheus/prometheus-configmaps.yaml
+++ b/manifests/prometheus/prometheus-configmaps.yaml
--- a/manifests/prometheus/prometheus-datasource.json
+++ b/manifests/prometheus/prometheus-datasource.json
@ -0,0 +1,6 @@
+      "name": "prometheus",
+      "type": "prometheus",
+      "url": "http://prometheus:9090",
+      "access": "proxy",
+      "basicAuth": false
+    }
--- a/manifests/prometheus/prometheus-monitoring-serviceaccount.yaml
+++ b/manifests/prometheus/prometheus-monitoring-serviceaccount.yaml
@ -0,0 +1,11 @@
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: prometheus-k8s
+  namespace: monitoring
+---
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: kube-state-metrics
+  namespace: monitoring
--- a/manifests/prometheus/prometheus-monitoring.yaml
+++ b/manifests/prometheus/prometheus-monitoring.yaml
--- a/practice/using-prometheus-to-monitor-kuberentes-cluster.md
+++ b/practice/using-prometheus-to-monitor-kuberentes-cluster.md
@ -26,22 +26,58 @@

 ## 部署

+我将部署时需要用的的配置文件分成了 namespace、serviceaccount、configmaps、clusterrolebinding 和最后的部署 prometheus、grafana 的过程。
+
 ```yaml
 ## 创建 monitoring namespaece
 kubectl create -f prometheus-monitoring-ns.yaml
-## 创建 RBAC
-kubectl create -f prometheus-monitoring-rbac.yaml
-## 部署 Premetheus
+## 创建 serviceaccount
+kubectl create -f prometheus-monitoring-serviceaccount.yaml
+## 创建 configmaps
+kubectl create -f prometheus-configmaps.yaml
+## 创建 clusterrolebinding
+kubectl create clusterrolebinding kube-state-metrics --clusterrole=cluster-admin --serviceaccount=monitoring:kube-state-metrics
+kubectl create clusterrolebinding prometheus --clusterrole=cluster-admin --serviceaccount=monitoring:prometheus
+## 部署 Prometheus
 kubectl create -f prometheus-monitoring.yaml
 ```

-创建 RBAC 的过程考虑替换成下面的命令：
+访问 kubernetes 任何一个 node 上的 Grafana service 的 nodeport：
+
+![Grafana页面](../images/kubernetes-prometheus-monitoring.jpg)
+
+该图中的数据显示明显有问题，还需要修正。
+
+`prometheus-monitoring.yaml` 文件中有一个 Job 就是用来导入 grafana dashboard 配置信息的，如果该 Job 执行失败，可以单独在在 `monitoring` 的 namespace 中启动一个容器，将 `manifests/prometheus` 目录下的 json 文件复制到容器中，然后进入容器 json 文件的目录下执行：

 ```bash
-kubectl create clusterrolebinding prometheus-monitoring --clusterrole=cluster-admin --serviceaccount=monitoring:default
+ for file in *-datasource.json ; do
+              if [ -e "$file" ] ; then
+                echo "importing $file" &&
+                curl --silent --fail --show-error \
+                  --request POST http://admin:admin@grafana:3000/api/datasources \
+                  --header "Content-Type: application/json" \
+                  --data-binary "@$file" ;
+                echo "" ;
+              fi
+            done ;
+            for file in *-dashboard.json ; do
+              if [ -e "$file" ] ; then
+                echo "importing $file" &&
+                ( echo '{"dashboard":'; \
+                  cat "$file"; \
+                  echo ',"overwrite":true,"inputs":[{"name":"DS_PROMETHEUS","type":"datasource","pluginId":"prometheus","value":"prometheus"}]}' ) \
+                | jq -c '.' \
+                | curl --silent --fail --show-error \
+                  --request POST http://admin:admin@grafana:3000/api/dashboards/import \
+                  --header "Content-Type: application/json" \
+                  --data-binary "@-" ;
+                echo "" ;
+              fi
+            done
 ```

-注意需要修改 YAML 文件中的 serviceaccount 和 clusterrolebinding 目前还未完成。
+这样也可以向 grafana 中导入 dashboard。

 ## 存在的问题

@ -56,14 +92,15 @@ kubectl create clusterrolebinding prometheus-monitoring --clusterrole=cluster-ad

 在部署 Prometheus 之前应该先创建 serviceaccount、clusterrole、clusterrolebinding 等对象，否则在安装过程中可能因为权限问题而导致各种错误，所以这些配置应该写在一个单独的文件中，而不应该跟其他部署写在一起，即使要写在一个文件中，也应该写在文件的最前面，因为使用 `kubectl` 部署的时候，kubectl 不会判断 YAML 文件中的资源依赖关系，只是简单的从头部开始执行部署，因此写在文件前面的对象会先部署。

-也可以绕过复杂的 RBAC 设置，直接使用下面的命令设置为 serviceaccount 设置成 admin 模式。
+**解决方法**
+
+也可以绕过复杂的 RBAC 设置，直接使用下面的命令将对应的 serviceaccount 设置成 admin 权限，如下：

 ```bash
-kubectl create clusterrolebinding prometheus-monitoring --clusterrole=cluster-admin --serviceaccount=monitoring:default
+kubectl create clusterrolebinding kube-state-metrics --clusterrole=cluster-admin --serviceaccount=monitoring:kube-state-metrics
+kubectl create clusterrolebinding prometheus --clusterrole=cluster-admin --serviceaccount=monitoring:prometheus
 ```

-这需要修改原配置中的 serviceaccount，并去掉原来的 clusterrolebinding。
-
 参考 [RBAC——基于角色的访问控制](../guide/rbac.md)

 ### 2. API 兼容问题