2017-09-25 21:41:08 +08:00
<!DOCTYPE HTML>
< html lang = "zh-hans" >
< head >
< meta charset = "UTF-8" >
< meta content = "text/html; charset=utf-8" http-equiv = "Content-Type" >
< title > 4.3.8 使用Prometheus监控kubernetes集群 · Kubernetes Handbook< / title >
< meta http-equiv = "X-UA-Compatible" content = "IE=edge" / >
< meta name = "description" content = "" >
< meta name = "generator" content = "GitBook 3.2.2" >
< meta name = "author" content = "Jimmy Song" >
< link rel = "stylesheet" href = "../gitbook/style.css" >
< link rel = "stylesheet" href = "../gitbook/gitbook-plugin-splitter/splitter.css" >
< link rel = "stylesheet" href = "../gitbook/gitbook-plugin-page-toc-button/plugin.css" >
< link rel = "stylesheet" href = "../gitbook/gitbook-plugin-image-captions/image-captions.css" >
< link rel = "stylesheet" href = "../gitbook/gitbook-plugin-page-footer-ex/style/plugin.css" >
< link rel = "stylesheet" href = "../gitbook/gitbook-plugin-back-to-top-button/plugin.css" >
< link rel = "stylesheet" href = "../gitbook/gitbook-plugin-search-plus/search.css" >
< link rel = "stylesheet" href = "../gitbook/gitbook-plugin-highlight/website.css" >
< link rel = "stylesheet" href = "../gitbook/gitbook-plugin-fontsettings/website.css" >
< meta name = "HandheldFriendly" content = "true" / >
< meta name = "viewport" content = "width=device-width, initial-scale=1, user-scalable=no" >
< meta name = "apple-mobile-web-app-capable" content = "yes" >
< meta name = "apple-mobile-web-app-status-bar-style" content = "black" >
< link rel = "apple-touch-icon-precomposed" sizes = "152x152" href = "../gitbook/images/apple-touch-icon-precomposed-152.png" >
< link rel = "shortcut icon" href = "../gitbook/images/favicon.ico" type = "image/x-icon" >
< link rel = "next" href = "storage.html" / >
< link rel = "prev" href = "manage-compute-resources-container.html" / >
< / head >
< body >
< div class = "book" >
< div class = "book-summary" >
< div id = "book-search-input" role = "search" >
< input type = "text" placeholder = "输入并搜索" / >
< / div >
< nav role = "navigation" >
< ul class = "summary" >
< li class = "chapter " data-level = "1.1" data-path = "../" >
< a href = "../" >
1. 前言
< / a >
< / li >
< li class = "chapter " data-level = "1.2" data-path = "../concepts/" >
< a href = "../concepts/" >
2. 概念原理
< / a >
< ul class = "articles" >
< li class = "chapter " data-level = "1.2.1" data-path = "../concepts/concepts.html" >
< a href = "../concepts/concepts.html" >
2.1 设计理念
< / a >
< / li >
< li class = "chapter " data-level = "1.2.2" data-path = "../concepts/objects.html" >
< a href = "../concepts/objects.html" >
2.2 Objects
< / a >
< ul class = "articles" >
< li class = "chapter " data-level = "1.2.2.1" data-path = "../concepts/pod-overview.html" >
< a href = "../concepts/pod-overview.html" >
2.2.1 Pod
< / a >
< ul class = "articles" >
< li class = "chapter " data-level = "1.2.2.1.1" data-path = "../concepts/pod.html" >
< a href = "../concepts/pod.html" >
2.2.1.1 Pod解析
< / a >
< / li >
< li class = "chapter " data-level = "1.2.2.1.2" data-path = "../concepts/init-containers.html" >
< a href = "../concepts/init-containers.html" >
2.2.1.2 Init容器
< / a >
< / li >
< li class = "chapter " data-level = "1.2.2.1.3" data-path = "../concepts/pod-security-policy.html" >
< a href = "../concepts/pod-security-policy.html" >
2.2.1.3 Pod安全策略
< / a >
< / li >
< li class = "chapter " data-level = "1.2.2.1.4" data-path = "../concepts/pod-lifecycle.html" >
< a href = "../concepts/pod-lifecycle.html" >
2.2.1.4 Pod的生命周期
< / a >
< / li >
< / ul >
< / li >
< li class = "chapter " data-level = "1.2.2.2" data-path = "../concepts/node.html" >
< a href = "../concepts/node.html" >
2.2.2 Node
< / a >
< / li >
< li class = "chapter " data-level = "1.2.2.3" data-path = "../concepts/namespace.html" >
< a href = "../concepts/namespace.html" >
2.2.3 Namespace
< / a >
< / li >
< li class = "chapter " data-level = "1.2.2.4" data-path = "../concepts/service.html" >
< a href = "../concepts/service.html" >
2.2.4 Service
< / a >
< / li >
< li class = "chapter " data-level = "1.2.2.5" data-path = "../concepts/volume.html" >
< a href = "../concepts/volume.html" >
2.2.5 Volume和Persistent Volume
< / a >
< / li >
< li class = "chapter " data-level = "1.2.2.6" data-path = "../concepts/deployment.html" >
< a href = "../concepts/deployment.html" >
2.2.6 Deployment
< / a >
< / li >
< li class = "chapter " data-level = "1.2.2.7" data-path = "../concepts/secret.html" >
< a href = "../concepts/secret.html" >
2.2.7 Secret
< / a >
< / li >
< li class = "chapter " data-level = "1.2.2.8" data-path = "../concepts/statefulset.html" >
< a href = "../concepts/statefulset.html" >
2.2.8 StatefulSet
< / a >
< / li >
< li class = "chapter " data-level = "1.2.2.9" data-path = "../concepts/daemonset.html" >
< a href = "../concepts/daemonset.html" >
2.2.9 DaemonSet
< / a >
< / li >
< li class = "chapter " data-level = "1.2.2.10" data-path = "../concepts/serviceaccount.html" >
< a href = "../concepts/serviceaccount.html" >
2.2.10 ServiceAccount
< / a >
< / li >
< li class = "chapter " data-level = "1.2.2.11" data-path = "../concepts/replicaset.html" >
< a href = "../concepts/replicaset.html" >
2.2.11 ReplicationController和ReplicaSet
< / a >
< / li >
< li class = "chapter " data-level = "1.2.2.12" data-path = "../concepts/job.html" >
< a href = "../concepts/job.html" >
2.2.12 Job
< / a >
< / li >
< li class = "chapter " data-level = "1.2.2.13" data-path = "../concepts/cronjob.html" >
< a href = "../concepts/cronjob.html" >
2.2.13 CronJob
< / a >
< / li >
< li class = "chapter " data-level = "1.2.2.14" data-path = "../concepts/ingress.html" >
< a href = "../concepts/ingress.html" >
2.2.14 Ingress
< / a >
< / li >
< li class = "chapter " data-level = "1.2.2.15" data-path = "../concepts/configmap.html" >
< a href = "../concepts/configmap.html" >
2.2.15 ConfigMap
< / a >
< / li >
< li class = "chapter " data-level = "1.2.2.16" data-path = "../concepts/horizontal-pod-autoscaling.html" >
< a href = "../concepts/horizontal-pod-autoscaling.html" >
2.2.16 Horizontal Pod Autoscaling
< / a >
< / li >
< li class = "chapter " data-level = "1.2.2.17" data-path = "../concepts/label.html" >
< a href = "../concepts/label.html" >
2.2.17 Label
< / a >
< / li >
< li class = "chapter " data-level = "1.2.2.18" data-path = "../concepts/garbage-collection.html" >
< a href = "../concepts/garbage-collection.html" >
2.2.18 垃圾收集
< / a >
< / li >
< li class = "chapter " data-level = "1.2.2.19" data-path = "../concepts/network-policy.html" >
< a href = "../concepts/network-policy.html" >
2.2.19 NetworkPolicy
< / a >
< / li >
< / ul >
< / li >
< / ul >
< / li >
< li class = "chapter " data-level = "1.3" data-path = "../guide/" >
< a href = "../guide/" >
3. 用户指南
< / a >
< ul class = "articles" >
< li class = "chapter " data-level = "1.3.1" data-path = "../guide/resource-configuration.html" >
< a href = "../guide/resource-configuration.html" >
3.1 资源配置
< / a >
< ul class = "articles" >
< li class = "chapter " data-level = "1.3.1.1" data-path = "../guide/configure-liveness-readiness-probes.html" >
< a href = "../guide/configure-liveness-readiness-probes.html" >
3.1.1 配置Pod的liveness和readiness探针
< / a >
< / li >
< li class = "chapter " data-level = "1.3.1.2" data-path = "../guide/configure-pod-service-account.html" >
< a href = "../guide/configure-pod-service-account.html" >
3.1.2 配置Pod的Service Account
< / a >
< / li >
< / ul >
< / li >
< li class = "chapter " data-level = "1.3.2" data-path = "../guide/command-usage.html" >
< a href = "../guide/command-usage.html" >
3.2 命令使用
< / a >
< ul class = "articles" >
< li class = "chapter " data-level = "1.3.2.1" data-path = "../guide/using-kubectl.html" >
< a href = "../guide/using-kubectl.html" >
3.2.1 使用kubectl
< / a >
< / li >
< li class = "chapter " data-level = "1.3.2.2" data-path = "../guide/docker-cli-to-kubectl.html" >
< a href = "../guide/docker-cli-to-kubectl.html" >
3.2.2 docker用户过度到kubectl命令行指南
< / a >
< / li >
< / ul >
< / li >
< li class = "chapter " data-level = "1.3.3" data-path = "../guide/cluster-security-management.html" >
< a href = "../guide/cluster-security-management.html" >
3.3 集群安全性管理
< / a >
< ul class = "articles" >
< li class = "chapter " data-level = "1.3.3.1" data-path = "../guide/managing-tls-in-a-cluster.html" >
< a href = "../guide/managing-tls-in-a-cluster.html" >
3.3.1 管理集群中的TLS
< / a >
< / li >
< li class = "chapter " data-level = "1.3.3.2" data-path = "../guide/kubelet-authentication-authorization.html" >
< a href = "../guide/kubelet-authentication-authorization.html" >
3.3.2 kubelet的认证授权
< / a >
< / li >
< li class = "chapter " data-level = "1.3.3.3" data-path = "../guide/tls-bootstrapping.html" >
< a href = "../guide/tls-bootstrapping.html" >
3.3.3 TLS bootstrap
< / a >
< / li >
< li class = "chapter " data-level = "1.3.3.4" data-path = "../guide/kubectl-user-authentication-authorization.html" >
< a href = "../guide/kubectl-user-authentication-authorization.html" >
3.3.4 kubectl的用户认证授权
< / a >
< / li >
< li class = "chapter " data-level = "1.3.3.5" data-path = "../guide/rbac.html" >
< a href = "../guide/rbac.html" >
3.3.5 RBAC——基于角色的访问控制
< / a >
< / li >
< li class = "chapter " data-level = "1.3.3.6" data-path = "../guide/ip-masq-agent.html" >
< a href = "../guide/ip-masq-agent.html" >
3.3.6 IP伪装代理
< / a >
< / li >
< / ul >
< / li >
< li class = "chapter " data-level = "1.3.4" data-path = "../guide/access-kubernetes-cluster.html" >
< a href = "../guide/access-kubernetes-cluster.html" >
3.4 访问 Kubernetes 集群
< / a >
< ul class = "articles" >
< li class = "chapter " data-level = "1.3.4.1" data-path = "../guide/access-cluster.html" >
< a href = "../guide/access-cluster.html" >
3.4.1 访问集群
< / a >
< / li >
< li class = "chapter " data-level = "1.3.4.2" data-path = "../guide/authenticate-across-clusters-kubeconfig.html" >
< a href = "../guide/authenticate-across-clusters-kubeconfig.html" >
3.4.2 使用 kubeconfig 文件配置跨集群认证
< / a >
< / li >
< li class = "chapter " data-level = "1.3.4.3" data-path = "../guide/connecting-to-applications-port-forward.html" >
< a href = "../guide/connecting-to-applications-port-forward.html" >
3.4.3 通过端口转发访问集群中的应用程序
< / a >
< / li >
< li class = "chapter " data-level = "1.3.4.4" data-path = "../guide/service-access-application-cluster.html" >
< a href = "../guide/service-access-application-cluster.html" >
3.4.4 使用 service 访问群集中的应用程序
< / a >
< / li >
< / ul >
< / li >
< li class = "chapter " data-level = "1.3.5" data-path = "../guide/application-development-deployment-flow.html" >
< a href = "../guide/application-development-deployment-flow.html" >
3.5 在kubernetes中开发部署应用
< / a >
< ul class = "articles" >
< li class = "chapter " data-level = "1.3.5.1" data-path = "../guide/deploy-applications-in-kubernetes.html" >
< a href = "../guide/deploy-applications-in-kubernetes.html" >
3.5.1 适用于kubernetes的应用开发部署流程
< / a >
< / li >
< li class = "chapter " data-level = "1.3.5.2" data-path = "../guide/migrating-hadoop-yarn-to-kubernetes.html" >
< a href = "../guide/migrating-hadoop-yarn-to-kubernetes.html" >
3.5.2 迁移传统应用到kubernetes中——以Hadoop YARN为例
< / a >
< / li >
< / ul >
< / li >
< / ul >
< / li >
< li class = "chapter " data-level = "1.4" data-path = "./" >
< a href = "./" >
4. 最佳实践
< / a >
< ul class = "articles" >
< li class = "chapter " data-level = "1.4.1" data-path = "install-kbernetes1.6-on-centos.html" >
< a href = "install-kbernetes1.6-on-centos.html" >
4.1 在CentOS上部署kubernetes1.6集群
< / a >
< ul class = "articles" >
< li class = "chapter " data-level = "1.4.1.1" data-path = "create-tls-and-secret-key.html" >
< a href = "create-tls-and-secret-key.html" >
4.1.1 创建TLS证书和秘钥
< / a >
< / li >
< li class = "chapter " data-level = "1.4.1.2" data-path = "create-kubeconfig.html" >
< a href = "create-kubeconfig.html" >
4.1.2 创建kubeconfig文件
< / a >
< / li >
< li class = "chapter " data-level = "1.4.1.3" data-path = "etcd-cluster-installation.html" >
< a href = "etcd-cluster-installation.html" >
4.1.3 创建高可用etcd集群
< / a >
< / li >
< li class = "chapter " data-level = "1.4.1.4" data-path = "kubectl-installation.html" >
< a href = "kubectl-installation.html" >
4.1.4 安装kubectl命令行工具
< / a >
< / li >
< li class = "chapter " data-level = "1.4.1.5" data-path = "master-installation.html" >
< a href = "master-installation.html" >
4.1.5 部署master节点
< / a >
< / li >
< li class = "chapter " data-level = "1.4.1.6" data-path = "node-installation.html" >
< a href = "node-installation.html" >
4.1.6 部署node节点
< / a >
< / li >
< li class = "chapter " data-level = "1.4.1.7" data-path = "kubedns-addon-installation.html" >
< a href = "kubedns-addon-installation.html" >
4.1.7 安装kubedns插件
< / a >
< / li >
< li class = "chapter " data-level = "1.4.1.8" data-path = "dashboard-addon-installation.html" >
< a href = "dashboard-addon-installation.html" >
4.1.8 安装dashboard插件
< / a >
< / li >
< li class = "chapter " data-level = "1.4.1.9" data-path = "heapster-addon-installation.html" >
< a href = "heapster-addon-installation.html" >
4.1.9 安装heapster插件
< / a >
< / li >
< li class = "chapter " data-level = "1.4.1.10" data-path = "efk-addon-installation.html" >
< a href = "efk-addon-installation.html" >
4.1.10 安装EFK插件
< / a >
< / li >
< / ul >
< / li >
< li class = "chapter " data-level = "1.4.2" data-path = "service-discovery-and-loadbalancing.html" >
< a href = "service-discovery-and-loadbalancing.html" >
4.2 服务发现与负载均衡
< / a >
< ul class = "articles" >
< li class = "chapter " data-level = "1.4.2.1" data-path = "traefik-ingress-installation.html" >
< a href = "traefik-ingress-installation.html" >
4.2.1 安装Traefik ingress
< / a >
< / li >
< li class = "chapter " data-level = "1.4.2.2" data-path = "distributed-load-test.html" >
< a href = "distributed-load-test.html" >
4.2.2 分布式负载测试
< / a >
< / li >
< li class = "chapter " data-level = "1.4.2.3" data-path = "network-and-cluster-perfermance-test.html" >
< a href = "network-and-cluster-perfermance-test.html" >
4.2.3 网络和集群性能测试
< / a >
< / li >
< li class = "chapter " data-level = "1.4.2.4" data-path = "edge-node-configuration.html" >
< a href = "edge-node-configuration.html" >
4.2.4 边缘节点配置
< / a >
< / li >
< / ul >
< / li >
< li class = "chapter " data-level = "1.4.3" data-path = "operation.html" >
< a href = "operation.html" >
4.3 运维管理
< / a >
< ul class = "articles" >
< li class = "chapter " data-level = "1.4.3.1" data-path = "service-rolling-update.html" >
< a href = "service-rolling-update.html" >
4.3.1 服务滚动升级
< / a >
< / li >
< li class = "chapter " data-level = "1.4.3.2" data-path = "app-log-collection.html" >
< a href = "app-log-collection.html" >
4.3.2 应用日志收集
< / a >
< / li >
< li class = "chapter " data-level = "1.4.3.3" data-path = "configuration-best-practice.html" >
< a href = "configuration-best-practice.html" >
4.3.3 配置最佳实践
< / a >
< / li >
< li class = "chapter " data-level = "1.4.3.4" data-path = "monitor.html" >
< a href = "monitor.html" >
4.3.4 集群及应用监控
< / a >
< / li >
< li class = "chapter " data-level = "1.4.3.5" data-path = "jenkins-ci-cd.html" >
< a href = "jenkins-ci-cd.html" >
4.3.5 使用Jenkins进行持续构建与发布
< / a >
< / li >
< li class = "chapter " data-level = "1.4.3.6" data-path = "data-persistence-problem.html" >
< a href = "data-persistence-problem.html" >
4.3.6 数据持久化问题
< / a >
< / li >
< li class = "chapter " data-level = "1.4.3.7" data-path = "manage-compute-resources-container.html" >
< a href = "manage-compute-resources-container.html" >
4.3.7 管理容器的计算资源
< / a >
< / li >
< li class = "chapter active" data-level = "1.4.3.8" data-path = "using-prometheus-to-monitor-kuberentes-cluster.html" >
< a href = "using-prometheus-to-monitor-kuberentes-cluster.html" >
4.3.8 使用Prometheus监控kubernetes集群
< / a >
< / li >
< / ul >
< / li >
< li class = "chapter " data-level = "1.4.4" data-path = "storage.html" >
< a href = "storage.html" >
4.4 存储管理
< / a >
< ul class = "articles" >
< li class = "chapter " data-level = "1.4.4.1" data-path = "glusterfs.html" >
< a href = "glusterfs.html" >
4.4.1 GlusterFS
< / a >
< ul class = "articles" >
< li class = "chapter " data-level = "1.4.4.1.1" data-path = "using-glusterfs-for-persistent-storage.html" >
< a href = "using-glusterfs-for-persistent-storage.html" >
4.4.1.1 使用GlusterFS做持久化存储
< / a >
< / li >
< li class = "chapter " data-level = "1.4.4.1.2" data-path = "storage-for-containers-using-glusterfs-with-openshift.html" >
< a href = "storage-for-containers-using-glusterfs-with-openshift.html" >
4.4.1.2 在OpenShift中使用GlusterFS做持久化存储
< / a >
< / li >
< / ul >
< / li >
< li class = "chapter " data-level = "1.4.4.2" data-path = "cephfs.html" >
< a href = "cephfs.html" >
4.4.2 CephFS
< / a >
< ul class = "articles" >
< li class = "chapter " data-level = "1.4.4.2.1" data-path = "using-ceph-for-persistent-storage.html" >
< a href = "using-ceph-for-persistent-storage.html" >
4.4.2.1 使用Ceph做持久化存储
< / a >
< / li >
< / ul >
< / li >
< / ul >
< / li >
< / ul >
< / li >
< li class = "chapter " data-level = "1.5" data-path = "../usecases/" >
< a href = "../usecases/" >
5. 领域应用
< / a >
< ul class = "articles" >
< li class = "chapter " data-level = "1.5.1" data-path = "../usecases/microservices.html" >
< a href = "../usecases/microservices.html" >
5.1 微服务架构
< / a >
< ul class = "articles" >
< li class = "chapter " data-level = "1.5.1.1" data-path = "../usecases/service-discovery-in-microservices.html" >
< a href = "../usecases/service-discovery-in-microservices.html" >
5.1.1 微服务中的服务发现
< / a >
< / li >
< / ul >
< / li >
< li class = "chapter " data-level = "1.5.2" data-path = "../usecases/service-mesh.html" >
< a href = "../usecases/service-mesh.html" >
5.2 Service Mesh 服务网格
< / a >
< ul class = "articles" >
< li class = "chapter " data-level = "1.5.2.1" data-path = "../usecases/istio.html" >
< a href = "../usecases/istio.html" >
5.1.1 Istio
< / a >
< ul class = "articles" >
< li class = "chapter " data-level = "1.5.2.1.1" data-path = "../usecases/istio-installation.html" >
< a href = "../usecases/istio-installation.html" >
5.1.1.1 安装istio
< / a >
< / li >
< li class = "chapter " data-level = "1.5.2.1.2" data-path = "../usecases/configuring-request-routing.html" >
< a href = "../usecases/configuring-request-routing.html" >
5.1.1.2 配置请求的路由规则
< / a >
< / li >
< / ul >
< / li >
< li class = "chapter " data-level = "1.5.2.2" data-path = "../usecases/linkerd.html" >
< a href = "../usecases/linkerd.html" >
5.1.2 Linkerd
< / a >
< ul class = "articles" >
< li class = "chapter " data-level = "1.5.2.2.1" data-path = "../usecases/linkerd-user-guide.html" >
< a href = "../usecases/linkerd-user-guide.html" >
5.1.2.1 Linkerd 使用指南
< / a >
< / li >
< / ul >
< / li >
< / ul >
< / li >
< li class = "chapter " data-level = "1.5.3" data-path = "../usecases/big-data.html" >
< a href = "../usecases/big-data.html" >
5.2 大数据
< / a >
< ul class = "articles" >
< li class = "chapter " data-level = "1.5.3.1" data-path = "../usecases/spark-standalone-on-kubernetes.html" >
< a href = "../usecases/spark-standalone-on-kubernetes.html" >
5.2.1 Spark standalone on Kubernetes
< / a >
< / li >
< li class = "chapter " data-level = "1.5.3.2" data-path = "../usecases/running-spark-with-kubernetes-native-scheduler.html" >
< a href = "../usecases/running-spark-with-kubernetes-native-scheduler.html" >
5.2.2 运行支持kubernetes原生调度的Spark程序
< / a >
< / li >
< / ul >
< / li >
< li class = "chapter " data-level = "1.5.4" data-path = "../usecases/serverless.html" >
< a href = "../usecases/serverless.html" >
5.3 Serverless架构
< / a >
< / li >
< / ul >
< / li >
< li class = "chapter " data-level = "1.6" data-path = "../develop/" >
< a href = "../develop/" >
6. 开发指南
< / a >
< ul class = "articles" >
< li class = "chapter " data-level = "1.6.1" data-path = "../develop/developing-environment.html" >
< a href = "../develop/developing-environment.html" >
6.1 开发环境搭建
< / a >
< / li >
< li class = "chapter " data-level = "1.6.2" data-path = "../develop/testing.html" >
< a href = "../develop/testing.html" >
6.2 单元测试和集成测试
< / a >
< / li >
< li class = "chapter " data-level = "1.6.3" data-path = "../develop/client-go-sample.html" >
< a href = "../develop/client-go-sample.html" >
6.3 client-go示例
< / a >
< / li >
< li class = "chapter " data-level = "1.6.4" data-path = "../develop/contribute.html" >
< a href = "../develop/contribute.html" >
6.4 社区贡献
< / a >
< / li >
< / ul >
< / li >
< li class = "chapter " data-level = "1.7" data-path = "../appendix/" >
< a href = "../appendix/" >
7. 附录
< / a >
< ul class = "articles" >
< li class = "chapter " data-level = "1.7.1" data-path = "../appendix/docker-best-practice.html" >
< a href = "../appendix/docker-best-practice.html" >
7.1 Docker最佳实践
< / a >
< / li >
< li class = "chapter " data-level = "1.7.2" data-path = "../appendix/issues.html" >
< a href = "../appendix/issues.html" >
7.2 问题记录
< / a >
< / li >
< li class = "chapter " data-level = "1.7.3" data-path = "../appendix/tricks.html" >
< a href = "../appendix/tricks.html" >
7.3 使用技巧
< / a >
< / li >
< li class = "chapter " data-level = "1.7.4" data-path = "../appendix/debug-kubernetes-services.html" >
< a href = "../appendix/debug-kubernetes-services.html" >
7.4 kubernetes service中的故障排查
< / a >
< / li >
< / ul >
< / li >
< li class = "divider" > < / li >
< li >
< a href = "https://www.gitbook.com" target = "blank" class = "gitbook-link" >
本书使用 GitBook 发布
< / a >
< / li >
< / ul >
< / nav >
< / div >
< div class = "book-body" >
< div class = "body-inner" >
< div class = "book-header" role = "navigation" >
<!-- Title -->
< h1 >
< i class = "fa fa-circle-o-notch fa-spin" > < / i >
< a href = ".." > 4.3.8 使用Prometheus监控kubernetes集群< / a >
< / h1 >
< / div >
< div class = "page-wrapper" tabindex = "-1" role = "main" >
< div class = "page-inner" >
< div class = "search-plus" id = "book-search-results" >
< div class = "search-noresults" >
< section class = "normal markdown-section" >
< h1 id = "使用prometheus监控kubernetes集群" > 使 用 Prometheus监 控 kubernetes集 群 < / h1 >
< p > 我 们 使 用 Giantswarm 开 源 的 < a href = "https://github.com/giantswarm/kubernetes-prometheus" target = "_blank" > kubernetes-promethues< / a > 来 监 控 kubernetes 集 群 , 所 有 的 YAML 文 件 可 以 在 < a href = "../manifests/prometheus" > manifests/prometheus< / a > 目 录 下 找 到 。 < / p >
< p > 需 要 用 到 的 镜 像 有 : < / p >
< ul >
< li > sz-pg-oam-docker-hub-001.tendcloud.com/library/prometheus-alertmanager:v0.7.1< / li >
< li > sz-pg-oam-docker-hub-001.tendcloud.com/library/grafana:4.2.0< / li >
< li > sz-pg-oam-docker-hub-001.tendcloud.com/library/giantswarm-tiny-tools:latest< / li >
< li > sz-pg-oam-docker-hub-001.tendcloud.com/library/prom-prometheus:v1.7.0< / li >
< li > sz-pg-oam-docker-hub-001.tendcloud.com/library/kube-state-metrics:v1.0.1< / li >
< li > sz-pg-oam-docker-hub-001.tendcloud.com/library/dockermuenster-caddy:0.9.3< / li >
< li > sz-pg-oam-docker-hub-001.tendcloud.com/library/prom-node-exporter:v0.14.0< / li >
< / ul >
< p > 同 时 备 份 到 时 速 云 : < / p >
< ul >
< li > index.tenxcloud.com/jimmy/prometheus-alertmanager:v0.7.1< / li >
< li > index.tenxcloud.com/jimmy/grafana:4.2.0< / li >
< li > index.tenxcloud.com/jimmy/giantswarm-tiny-tools:latest< / li >
< li > index.tenxcloud.com/jimmy/prom-prometheus:v1.7.0< / li >
< li > index.tenxcloud.com/jimmy/kube-state-metrics:v1.0.1< / li >
< li > index.tenxcloud.com/jimmy/dockermuenster-caddy:0.9.3< / li >
< li > index.tenxcloud.com/jimmy/prom-node-exporter:v0.14.0< / li >
< / ul >
< p > < strong > 注 < / strong > : 所 有 镜 像 都 是 从 官 方 镜 像 仓 库 下 载 下 。 < / p >
< h2 id = "部署" > 部 署 < / h2 >
2017-09-26 16:59:46 +08:00
< p > 我 将 部 署 时 需 要 用 的 的 配 置 文 件 分 成 了 namespace、 serviceaccount、 configmaps、 clusterrolebinding 和 最 后 的 部 署 prometheus、 grafana 的 过 程 。 < / p >
2017-09-25 21:41:08 +08:00
< pre > < code class = "lang-yaml" > < span class = "hljs-comment" > ## 创 建 monitoring namespaece< / span >
kubectl create -f prometheus-monitoring-ns.yaml
2017-09-26 16:59:46 +08:00
< span class = "hljs-comment" > ## 创 建 serviceaccount< / span >
kubectl create -f prometheus-monitoring-serviceaccount.yaml
< span class = "hljs-comment" > ## 创 建 configmaps< / span >
kubectl create -f prometheus-configmaps.yaml
< span class = "hljs-comment" > ## 创 建 clusterrolebinding< / span >
kubectl create clusterrolebinding kube-state-metrics --clusterrole=cluster-admin --serviceaccount=monitoring:kube-state-metrics
kubectl create clusterrolebinding prometheus --clusterrole=cluster-admin --serviceaccount=monitoring:prometheus
< span class = "hljs-comment" > ## 部 署 Prometheus< / span >
2017-09-25 21:41:08 +08:00
kubectl create -f prometheus-monitoring.yaml
< / code > < / pre >
2017-09-26 16:59:46 +08:00
< p > 访 问 kubernetes 任 何 一 个 node 上 的 Grafana service 的 nodeport: < / p >
< figure id = "fig1.4.3.8.1" > < img src = "../images/kubernetes-prometheus-monitoring.jpg" alt = "Grafana页面" > < figcaption > Figure: Grafana页 面 < / figcaption > < / figure >
< p > 该 图 中 的 数 据 显 示 明 显 有 问 题 , 还 需 要 修 正 。 < / p >
< p > < code > prometheus-monitoring.yaml< / code > 文 件 中 有 一 个 Job 就 是 用 来 导 入 grafana dashboard 配 置 信 息 的 , 如 果 该 Job 执 行 失 败 , 可 以 单 独 在 在 < code > monitoring< / code > 的 namespace 中 启 动 一 个 容 器 , 将 < code > manifests/prometheus< / code > 目 录 下 的 json 文 件 复 制 到 容 器 中 , 然 后 进 入 容 器 json 文 件 的 目 录 下 执 行 : < / p >
< pre > < code class = "lang-bash" > < span class = "hljs-keyword" > for< / span > file < span class = "hljs-keyword" > in< / span > *-datasource.json ; < span class = "hljs-keyword" > do< / span >
< span class = "hljs-keyword" > if< / span > [ < span class = "hljs-_" > -e< / span > < span class = "hljs-string" > " < span class = "hljs-variable" > $file< / span > " < / span > ] ; < span class = "hljs-keyword" > then< / span >
< span class = "hljs-built_in" > echo< / span > < span class = "hljs-string" > " importing < span class = "hljs-variable" > $file< / span > " < / span > & &
curl --silent --fail --show-error \
--request POST http://admin:admin@grafana:3000/api/datasources \
--header < span class = "hljs-string" > " Content-Type: application/json" < / span > \
--data-binary < span class = "hljs-string" > " @< span class = "hljs-variable" > $file< / span > " < / span > ;
< span class = "hljs-built_in" > echo< / span > < span class = "hljs-string" > " " < / span > ;
< span class = "hljs-keyword" > fi< / span >
< span class = "hljs-keyword" > done< / span > ;
< span class = "hljs-keyword" > for< / span > file < span class = "hljs-keyword" > in< / span > *-dashboard.json ; < span class = "hljs-keyword" > do< / span >
< span class = "hljs-keyword" > if< / span > [ < span class = "hljs-_" > -e< / span > < span class = "hljs-string" > " < span class = "hljs-variable" > $file< / span > " < / span > ] ; < span class = "hljs-keyword" > then< / span >
< span class = "hljs-built_in" > echo< / span > < span class = "hljs-string" > " importing < span class = "hljs-variable" > $file< / span > " < / span > & &
( < span class = "hljs-built_in" > echo< / span > < span class = "hljs-string" > ' {" dashboard" :' < / span > ; \
cat < span class = "hljs-string" > " < span class = "hljs-variable" > $file< / span > " < / span > ; \
< span class = "hljs-built_in" > echo< / span > < span class = "hljs-string" > ' ," overwrite" :true," inputs" :[{" name" :" DS_PROMETHEUS" ," type" :" datasource" ," pluginId" :" prometheus" ," value" :" prometheus" }]}' < / span > ) \
| jq -c < span class = "hljs-string" > ' .' < / span > \
| curl --silent --fail --show-error \
--request POST http://admin:admin@grafana:3000/api/dashboards/import \
--header < span class = "hljs-string" > " Content-Type: application/json" < / span > \
--data-binary < span class = "hljs-string" > " @-" < / span > ;
< span class = "hljs-built_in" > echo< / span > < span class = "hljs-string" > " " < / span > ;
< span class = "hljs-keyword" > fi< / span >
< span class = "hljs-keyword" > done< / span >
2017-09-25 21:41:08 +08:00
< / code > < / pre >
2017-09-26 16:59:46 +08:00
< p > 这 样 也 可 以 向 grafana 中 导 入 dashboard。 < / p >
2017-09-25 21:41:08 +08:00
< h2 id = "存在的问题" > 存 在 的 问 题 < / h2 >
< p > 该 项 目 的 代 码 中 存 在 几 个 问 题 。 < / p >
< h3 id = "1-rbac-角色授权问题" > 1. RBAC 角 色 授 权 问 题 < / h3 >
< p > 需 要 用 到 两 个 clusterrolebinding: < / p >
< ul >
< li > < code > kube-state-metrics< / code > , 对 应 的 < code > serviceaccount< / code > 是 < code > kube-state-metrics< / code > < / li >
< li > < code > prometheus< / code > , 对 应 的 < code > serviceaccount< / code > 是 < code > prometheus-k8s< / code > < / li >
< / ul >
< p > 在 部 署 Prometheus 之 前 应 该 先 创 建 serviceaccount、 clusterrole、 clusterrolebinding 等 对 象 , 否 则 在 安 装 过 程 中 可 能 因 为 权 限 问 题 而 导 致 各 种 错 误 , 所 以 这 些 配 置 应 该 写 在 一 个 单 独 的 文 件 中 , 而 不 应 该 跟 其 他 部 署 写 在 一 起 , 即 使 要 写 在 一 个 文 件 中 , 也 应 该 写 在 文 件 的 最 前 面 , 因 为 使 用 < code > kubectl< / code > 部 署 的 时 候 , kubectl 不 会 判 断 YAML 文 件 中 的 资 源 依 赖 关 系 , 只 是 简 单 的 从 头 部 开 始 执 行 部 署 , 因 此 写 在 文 件 前 面 的 对 象 会 先 部 署 。 < / p >
2017-09-26 16:59:46 +08:00
< p > < strong > 解 决 方 法 < / strong > < / p >
< p > 也 可 以 绕 过 复 杂 的 RBAC 设 置 , 直 接 使 用 下 面 的 命 令 将 对 应 的 serviceaccount 设 置 成 admin 权 限 , 如 下 : < / p >
< pre > < code class = "lang-bash" > kubectl create clusterrolebinding kube-state-metrics --clusterrole=cluster-admin --serviceaccount=monitoring:kube-state-metrics
kubectl create clusterrolebinding prometheus --clusterrole=cluster-admin --serviceaccount=monitoring:prometheus
2017-09-25 21:41:08 +08:00
< / code > < / pre >
< p > 参 考 < a href = "../guide/rbac.html" > RBAC— — 基 于 角 色 的 访 问 控 制 < / a > < / p >
< h3 id = "2-api-兼容问题" > 2. API 兼 容 问 题 < / h3 >
< p > 从 < code > kube-state-metrics< / code > 日 志 中 可 以 看 出 用 户 kube-state-metrics 没 有 权 限 访 问 如 下 资 源 类 型 : < / p >
< ul >
< li > *v1.Job< / li >
< li > *v1.PersistentVolumeClaim< / li >
< li > *v1beta1.StatefulSet< / li >
< li > *v2alpha1.CronJob< / li >
< / ul >
< p > 而 在 我 们 使 用 的 kubernetes 1.6.0 版 本 的 集 群 中 API 路 径 跟 < code > kube-state-metrics< / code > 中 不 同 , 无 法 list 以 上 三 种 资 源 对 象 的 资 源 。 详 情 见 : < a href = "https://github.com/giantswarm/kubernetes-prometheus/issues/77" target = "_blank" > https://github.com/giantswarm/kubernetes-prometheus/issues/77< / a > < / p >
< h3 id = "3-job-中的权限认证问题" > 3. Job 中 的 权 限 认 证 问 题 < / h3 >
< p > 在 < code > grafana-import-dashboards< / code > 这 个 job 中 有 个 < code > init-containers< / code > 其 中 指 定 的 command 执 行 错 误 , 应 该 使 用 < / p >
< pre > < code class = "lang-bash" > curl < span class = "hljs-_" > -s< / span > X GET -H < span class = "hljs-string" > " Authorization:bearer `cat /var/run/secrets/kubernetes.io/serviceaccount/token`" < / span > -k https://kubernetes.default/api/v1/namespaces/monitoring/endpoints/grafana
< / code > < / pre >
< p > 不 需 要 指 定 csr 文 件 , 只 需 要 token 即 可 。 < / p >
< p > 参 考 < a href = "https://github.com/giantswarm/kubernetes-prometheus/issues/56" target = "_blank" > wait-for-endpoints init-containers fails to load with k8s 1.6.0 #56< / a > < / p >
< h2 id = "参考" > 参 考 < / h2 >
< p > < a href = "https://github.com/giantswarm/kubernetes-prometheus" target = "_blank" > Kubernetes Setup for Prometheus and Grafana< / a > < / p >
< p > < a href = "../guide/rbac.html" > RBAC— — 基 于 角 色 的 访 问 控 制 < / a > < / p >
< p > < a href = "https://github.com/giantswarm/kubernetes-prometheus/issues/56" target = "_blank" > wait-for-endpoints init-containers fails to load with k8s 1.6.0 #56< / a > < / p >
2017-09-26 16:59:46 +08:00
< footer class = "page-footer-ex" > < span class = "page-footer-ex-copyright" > © All Rights Reserved < / span >                     < span class = "page-footer-ex-footer-update" > updated 2017-09-26 16:57:39 < / span > < / footer >
2017-09-25 21:41:08 +08:00
< / section >
< / div >
< div class = "search-results" >
< div class = "has-results" >
< h1 class = "search-results-title" > < span class = 'search-results-count' > < / span > results matching "< span class = 'search-query' > < / span > "< / h1 >
< ul class = "search-results-list" > < / ul >
< / div >
< div class = "no-results" >
< h1 class = "search-results-title" > No results matching "< span class = 'search-query' > < / span > "< / h1 >
< / div >
< / div >
< / div >
< / div >
< / div >
< / div >
< a href = "manage-compute-resources-container.html" class = "navigation navigation-prev " aria-label = "Previous page: 4.3.7 管理容器的计算资源" >
< i class = "fa fa-angle-left" > < / i >
< / a >
< a href = "storage.html" class = "navigation navigation-next " aria-label = "Next page: 4.4 存储管理" >
< i class = "fa fa-angle-right" > < / i >
< / a >
< / div >
< script >
var gitbook = gitbook || [];
gitbook.push(function() {
2017-09-27 19:33:55 +08:00
gitbook.page.hasChanged({"page":{"title":"4.3.8 使用Prometheus监控kubernetes集群","level":"1.4.3.8","depth":3,"next":{"title":"4.4 存储管理","level":"1.4.4","depth":2,"path":"practice/storage.md","ref":"practice/storage.md","articles":[{"title":"4.4.1 GlusterFS","level":"1.4.4.1","depth":3,"path":"practice/glusterfs.md","ref":"practice/glusterfs.md","articles":[{"title":"4.4.1.1 使用GlusterFS做持久化存储","level":"1.4.4.1.1","depth":4,"path":"practice/using-glusterfs-for-persistent-storage.md","ref":"practice/using-glusterfs-for-persistent-storage.md","articles":[]},{"title":"4.4.1.2 在OpenShift中使用GlusterFS做持久化存储","level":"1.4.4.1.2","depth":4,"path":"practice/storage-for-containers-using-glusterfs-with-openshift.md","ref":"practice/storage-for-containers-using-glusterfs-with-openshift.md","articles":[]}]},{"title":"4.4.2 CephFS","level":"1.4.4.2","depth":3,"path":"practice/cephfs.md","ref":"practice/cephfs.md","articles":[{"title":"4.4.2.1 使用Ceph做持久化存储","level":"1.4.4.2.1","depth":4,"path":"practice/using-ceph-for-persistent-storage.md","ref":"practice/using-ceph-for-persistent-storage.md","articles":[]}]}]},"previous":{"title":"4.3.7 管理容器的计算资源","level":"1.4.3.7","depth":3,"path":"practice/manage-compute-resources-container.md","ref":"practice/manage-compute-resources-container.md","articles":[]},"dir":"ltr"},"config":{"plugins":["github","codesnippet","splitter","page-toc-button","image-captions","page-footer-ex","editlink","back-to-top-button","-lunr","-search","search-plus"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"github":{"url":"https://github.com/rootsongjc/kubernetes-handbook"},"editlink":{"label":"编辑本页","multilingual":false,"base":"https://github.com/rootsongjc/kubernetes-handbook/blob/master/"},"page-footer-ex":{"copyright":"© All Rights Reserved","markdown":false,"update_format":"YYYY-MM-DD HH:mm:ss","update_label":"updated"},"splitter":{},"codesnippet":{},"fontsettings":{"theme":"white","family":"sans","size":2},"highlight":{},"page-toc-button":{},"back-to-top-button":{},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":false},"search-plus":{},"image-captions":{"variable_name":"_pictures"}},"page-footer-ex":{"copyright":"Jimmy Song","update_label":"最后更新于:","update_format":"YYYY-MM-DD HH:mm:ss"},"theme":"default","author":"Jimmy Song","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{"_pictures":[{"backlink":"concepts/index.html#fig1.2.1","level":"1.2","list_caption":"Figure: Borg架构","alt":"Borg架构","nro":1,"url":"../images/borg.png","index":1,"caption_template":"Figure: _CAPTION_","label":"Borg架构","attributes":{},"skip":false,"key":"1.2.1"},{"backlink":"concepts/index.html#fig1.2.2","level":"1.2","list_caption":"Figure: Kubernetes架构","alt":"Kubernetes架构","nro":2,"url":"../images/architecture.png","index":2,"caption_template":"Figure: _CAPTION_","label":"Kubernetes架构","attributes":{},"skip":false,"key":"1.2.2"},{"backlink":"concepts/index.html#fig1.2.3","level":"1.2","list_caption":"Figure: kubernetes整体架构示意图","alt":"kubernetes整体架构示意图","nro":3,"url":"../images/kubernetes-whole-arch.png","index":3,"caption_template":"Figure: _CAPTION_","label":"kubernetes整体架构示意图","attributes":{},"skip":false,"key":"1.2.3"},{"backlink":"concepts/index.html#fig1.2.4","level":"
2017-09-25 21:41:08 +08:00
});
< / script >
< / div >
< script src = "../gitbook/gitbook.js" > < / script >
< script src = "../gitbook/theme.js" > < / script >
< script src = "../gitbook/gitbook-plugin-github/plugin.js" > < / script >
< script src = "../gitbook/gitbook-plugin-splitter/splitter.js" > < / script >
< script src = "../gitbook/gitbook-plugin-page-toc-button/plugin.js" > < / script >
< script src = "../gitbook/gitbook-plugin-editlink/plugin.js" > < / script >
< script src = "../gitbook/gitbook-plugin-back-to-top-button/plugin.js" > < / script >
< script src = "../gitbook/gitbook-plugin-search-plus/jquery.mark.min.js" > < / script >
< script src = "../gitbook/gitbook-plugin-search-plus/search.js" > < / script >
< script src = "../gitbook/gitbook-plugin-sharing/buttons.js" > < / script >
< script src = "../gitbook/gitbook-plugin-fontsettings/fontsettings.js" > < / script >
< / body >
< / html >