From c15c735c0ce1010a00ac7f745e70a7fa40f00516 Mon Sep 17 00:00:00 2001 From: Jimmy Song Date: Thu, 18 May 2017 15:24:07 +0800 Subject: [PATCH] =?UTF-8?q?=E9=87=8D=E6=9E=84=E6=96=87=E7=AB=A0=E7=9B=AE?= =?UTF-8?q?=E5=BD=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 20-deployment概念解析.md | 744 ----------------- README.md | 55 +- SUMMARY.md | 132 +++- architecture/deployment.md | 746 ++++++++++++++++++ architecture/objects.md | 20 + .../centos/create-kubeconfig.md | 0 .../centos/create-tls-and-secret-key.md | 0 .../centos/dashboard-addon-installation.md | 0 .../centos/efk-addon-installation.md | 4 +- .../centos/etcd-cluster-installation.md | 0 .../centos/heapster-addon-installation.md | 4 +- .../centos/install-kbernetes1.6-on-centos.md | 24 +- .../centos/kubectl-installation.md | 0 .../centos/kubedns-addon-installation.md | 0 .../centos/master-installation.md | 0 .../centos/node-installation.md | 0 deploy/index.md | 11 + .../kubernetes-configuration-best-practice.md | 0 network/network-configuration.md | 5 + .../network-modes-in-kubernetes.md | 2 +- .../app-log-collection.md | 2 +- ops/opration-administration.md | 6 + .../service-rolling-update.md | 0 .../rbac-support-in-kubernetes.md | 4 +- security/security-configuration.md | 3 + .../distributed-load-test.md | 13 +- .../edge-node-configuration.md | 2 +- .../ingress-concept.md | 0 .../network-and-cluster-perfermance-test.md | 0 .../service-discovery-and-load-balancing.md | 7 + .../traefik-ingress-installation.md | 8 +- storage/storage-configuration.md | 4 + .../using-glusterfs-for-persistent-storage.md | 0 33 files changed, 945 insertions(+), 851 deletions(-) delete mode 100644 20-deployment概念解析.md create mode 100644 architecture/objects.md rename 02-创建kubeconfig文件.md => deploy/centos/create-kubeconfig.md (100%) rename 01-创建TLS证书和密钥.md => deploy/centos/create-tls-and-secret-key.md (100%) rename 08-安装dashboard插件.md => deploy/centos/dashboard-addon-installation.md (100%) rename 10-安装EFK插件.md => deploy/centos/efk-addon-installation.md (98%) rename 03-创建高可用etcd集群.md => deploy/centos/etcd-cluster-installation.md (100%) rename 09-安装heapster插件.md => deploy/centos/heapster-addon-installation.md (98%) rename 00-kubernetes安装前言.md => deploy/centos/install-kbernetes1.6-on-centos.md (71%) rename 04-安装kubectl命令行工具.md => deploy/centos/kubectl-installation.md (100%) rename 07-安装kubedns插件.md => deploy/centos/kubedns-addon-installation.md (100%) rename 05-部署高可用master集群.md => deploy/centos/master-installation.md (100%) rename 06-部署node节点.md => deploy/centos/node-installation.md (100%) rename 22-kubernetes配置最佳实践.md => deploy/kubernetes-configuration-best-practice.md (100%) create mode 100644 network/network-configuration.md rename 16-kubernetes中的网络模式解析.md => network/network-modes-in-kubernetes.md (98%) rename 21-应用日志收集.md => ops/app-log-collection.md (98%) create mode 100644 ops/opration-administration.md rename 19-服务滚动升级.md => ops/service-rolling-update.md (100%) rename 13-kubernetes中的RBAC支持.md => security/rbac-support-in-kubernetes.md (98%) create mode 100644 security/security-configuration.md rename 14-分布式负载测试.md => service-discovery-lb/distributed-load-test.md (87%) rename 18-边缘节点配置.md => service-discovery-lb/edge-node-configuration.md (98%) rename 11-ingress解析.md => service-discovery-lb/ingress-concept.md (100%) rename 15-kubernetes网络和集群性能测试.md => service-discovery-lb/network-and-cluster-perfermance-test.md (100%) create mode 100644 service-discovery-lb/service-discovery-and-load-balancing.md rename 12-安装traefik-ingress.md => service-discovery-lb/traefik-ingress-installation.md (96%) create mode 100644 storage/storage-configuration.md rename 17-使用glusterfs做持久化存储.md => storage/using-glusterfs-for-persistent-storage.md (100%) diff --git a/20-deployment概念解析.md b/20-deployment概念解析.md deleted file mode 100644 index 75abdf580..000000000 --- a/20-deployment概念解析.md +++ /dev/null @@ -1,744 +0,0 @@ -# Deployment概念解析 - -本文翻译自kubernetes官方文档:https://github.com/kubernetes/kubernetes.github.io/blob/master/docs/concepts/workloads/controllers/deployment.md - -根据2017年5月10日的Commit 8481c02 翻译。 - -## Deployment是什么? - -Deployment为Pod和Replica Set(下一代Replication Controller)提供声明式更新。 - -你只需要在Deployment中描述你想要的目标状态是什么,Deployment controller就会帮你将Pod和Replica Set的实际状态改变到你的目标状态。你可以定义一个全新的Deployment,也可以创建一个新的替换旧的Deployment。 - -一个典型的用例如下: - -- 使用Deployment来创建ReplicaSet。ReplicaSet在后台创建pod。检查启动状态,看它是成功还是失败。 -- 然后,通过更新Deployment的PodTemplateSpec字段来声明Pod的新状态。这会创建一个新的ReplicaSet,Deployment会按照控制的速率将pod从旧的ReplicaSet移动到新的ReplicaSet中。 -- 如果当前状态不稳定,回滚到之前的Deployment revision。每次回滚都会更新Deployment的revision。 -- 扩容Deployment以满足更高的负载。 -- 暂停Deployment来应用PodTemplateSpec的多个修复,然后恢复上线。 -- 根据Deployment 的状态判断上线是否hang住了。 -- 清除旧的不必要的ReplicaSet。 - -## 创建Deployment - -下面是一个Deployment示例,它创建了一个Replica Set来启动3个nginx pod。 - -下载示例文件并执行命令: - -```shell -$ kubectl create -f docs/user-guide/nginx-deployment.yaml --record -deployment "nginx-deployment" created -``` - -将kubectl的 `—record` 的flag设置为 `true`可以在annotation中记录当前命令创建或者升级了该资源。这在未来会很有用,例如,查看在每个Deployment revision中执行了哪些命令。 - -然后立即执行`get`í将获得如下结果: - -```shell -$ kubectl get deployments -NAME DESIRED CURRENT UP-TO-DATE AVAILABLE AGE -nginx-deployment 3 0 0 0 1s -``` - -输出结果表明我们希望的repalica数是3(根据deployment中的`.spec.replicas`配置)当前replica数( `.status.replicas`)是0, 最新的replica数(`.status.updatedReplicas`)是0,可用的replica数(`.status.availableReplicas`)是0。 - -过几秒后再执行`get`命令,将获得如下输出: - -```shell -$ kubectl get deployments -NAME DESIRED CURRENT UP-TO-DATE AVAILABLE AGE -nginx-deployment 3 3 3 3 18s -``` - -我们可以看到Deployment已经创建了3个replica,所有的replica都已经是最新的了(包含最新的pod template),可用的(根据Deployment中的`.spec.minReadySeconds`声明,处于已就绪状态的pod的最少个数)。执行`kubectl get rs`和`kubectl get pods`会显示Replica Set(RS)和Pod已创建。 - -```shell -$ kubectl get rs -NAME DESIRED CURRENT READY AGE -nginx-deployment-2035384211 3 3 0 18s -``` - -你可能会注意到Replica Set的名字总是`-`。 - -```shell -$ kubectl get pods --show-labels -NAME READY STATUS RESTARTS AGE LABELS -nginx-deployment-2035384211-7ci7o 1/1 Running 0 18s app=nginx,pod-template-hash=2035384211 -nginx-deployment-2035384211-kzszj 1/1 Running 0 18s app=nginx,pod-template-hash=2035384211 -nginx-deployment-2035384211-qqcnn 1/1 Running 0 18s app=nginx,pod-template-hash=2035384211 -``` - -刚创建的Replica Set将保证总是有3个nginx的pod存在。 - -**注意:** 你必须在Deployment中的selector指定正确pod template label(在该示例中是 `app = nginx`),不要跟其他的controller搞混了(包括Deployment、Replica Set、Replication Controller等)。**Kubernetes本身不会阻止你这么做**,如果你真的这么做了,这些controller之间会相互打架,并可能导致不正确的行为。 - - -## 更新Deployment - -**注意:** Deployment的rollout当且仅当Deployment的pod template(例如`.spec.template`)中的label更新或者镜像更改时被触发。其他更新,例如扩容Deployment不会触发rollout。 - -假如我们现在想要让nginx pod使用`nginx:1.9.1`的镜像来代替原来的`nginx:1.7.9`的镜像。 - -```shell -$ kubectl set image deployment/nginx-deployment nginx=nginx:1.9.1 -deployment "nginx-deployment" image updated -``` - -我们可以使用`edit`命令来编辑Deployment,修改 `.spec.template.spec.containers[0].image` ,将`nginx:1.7.9` 改写成 `nginx:1.9.1`。 - -```shell -$ kubectl edit deployment/nginx-deployment -deployment "nginx-deployment" edited -``` - -查看rollout的状态,只要执行: - -```shell -$ kubectl rollout status deployment/nginx-deployment -Waiting for rollout to finish: 2 out of 3 new replicas have been updated... -deployment "nginx-deployment" successfully rolled out -``` - -Rollout成功后,`get` Deployment: - -```shell -$ kubectl get deployments -NAME DESIRED CURRENT UP-TO-DATE AVAILABLE AGE -nginx-deployment 3 3 3 3 36s -``` - -UP-TO-DATE的replica的数目已经达到了配置中要求的数目。 - -CURRENT的replica数表示Deployment管理的replica数量,AVAILABLE的replica数是当前可用的replica数量。 - -We can run `kubectl get rs` to see that the Deployment updated the Pods by creating a new Replica Set and scaling it up to 3 replicas, as well as scaling down the old Replica Set to 0 replicas. - -我们通过执行`kubectl get rs`可以看到Deployment更新了Pod,通过创建一个新的Replica Set并扩容了3个replica,同时将原来的Replica Set缩容到了0个replica。 - -```shell -$ kubectl get rs -NAME DESIRED CURRENT READY AGE -nginx-deployment-1564180365 3 3 0 6s -nginx-deployment-2035384211 0 0 0 36s -``` - -执行 `get pods`只会看到当前的新的pod: - -```shell -$ kubectl get pods -NAME READY STATUS RESTARTS AGE -nginx-deployment-1564180365-khku8 1/1 Running 0 14s -nginx-deployment-1564180365-nacti 1/1 Running 0 14s -nginx-deployment-1564180365-z9gth 1/1 Running 0 14s -``` - -下次更新这些pod的时候,只需要更新Deployment中的pod的template即可。 - -Deployment可以保证在升级时只有一定数量的Pod是down的。默认的,它会确保至少有比期望的Pod数量少一个的Pod是up状态(最多一个不可用)。 - -Deployment同时也可以确保只创建出超过期望数量的一定数量的Pod。默认的,它会确保最多比期望的Pod数量多一个的Pod是up的(最多1个surge)。 - -**在未来的Kuberentes版本中,将从1-1变成25%-25%)。** - -例如,如果你自己看下上面的Deployment,你会发现,开始创建一个新的Pod,然后删除一些旧的Pod再创建一个新的。当新的Pod创建出来之前不会杀掉旧的Pod。这样能够确保可用的Pod数量至少有2个,Pod的总数最多4个。 - -```shell -$ kubectl describe deployments -Name: nginx-deployment -Namespace: default -CreationTimestamp: Tue, 15 Mar 2016 12:01:06 -0700 -Labels: app=nginx -Selector: app=nginx -Replicas: 3 updated | 3 total | 3 available | 0 unavailable -StrategyType: RollingUpdate -MinReadySeconds: 0 -RollingUpdateStrategy: 1 max unavailable, 1 max surge -OldReplicaSets: -NewReplicaSet: nginx-deployment-1564180365 (3/3 replicas created) -Events: - FirstSeen LastSeen Count From SubobjectPath Type Reason Message - --------- -------- ----- ---- ------------- -------- ------ ------- - 36s 36s 1 {deployment-controller } Normal ScalingReplicaSet Scaled up replica set nginx-deployment-2035384211 to 3 - 23s 23s 1 {deployment-controller } Normal ScalingReplicaSet Scaled up replica set nginx-deployment-1564180365 to 1 - 23s 23s 1 {deployment-controller } Normal ScalingReplicaSet Scaled down replica set nginx-deployment-2035384211 to 2 - 23s 23s 1 {deployment-controller } Normal ScalingReplicaSet Scaled up replica set nginx-deployment-1564180365 to 2 - 21s 21s 1 {deployment-controller } Normal ScalingReplicaSet Scaled down replica set nginx-deployment-2035384211 to 0 - 21s 21s 1 {deployment-controller } Normal ScalingReplicaSet Scaled up replica set nginx-deployment-1564180365 to 3 -``` - -我们可以看到当我们刚开始创建这个Deployment的时候,创建了一个Replica Set(nginx-deployment-2035384211),并直接扩容到了3个replica。 - -当我们更新这个Deployment的时候,它会创建一个新的Replica Set(nginx-deployment-1564180365),将它扩容到1个replica,然后缩容原先的Replica Set到2个replica,此时满足至少2个Pod是可用状态,同一时刻最多有4个Pod处于创建的状态。 - -接着继续使用相同的rolling update策略扩容新的Replica Set和缩容旧的Replica Set。最终,将会在新的Replica Set中有3个可用的replica,旧的Replica Set的replica数目变成0。 - -### Rollover(多个rollout并行) - -每当Deployment controller观测到有新的deployment被创建时,如果没有已存在的Replica Set来创建期望个数的Pod的话,就会创建出一个新的Replica Set来做这件事。已存在的Replica Set控制label匹配`.spec.selector`但是template跟`.spec.template`不匹配的Pod缩容。最终,新的Replica Set将会扩容出`.spec.replicas`指定数目的Pod,旧的Replica Set会缩容到0。 - -如果你更新了一个的已存在并正在进行中的Deployment,每次更新Deployment都会创建一个新的Replica Set并扩容它,同时回滚之前扩容的Replica Set——将它添加到旧的Replica Set列表,开始缩容。 - -例如,假如你创建了一个有5个`niginx:1.7.9` replica的Deployment,但是当还只有3个`nginx:1.7.9`的replica创建出来的时候你就开始更新含有5个`nginx:1.9.1` replica的Deployment。在这种情况下,Deployment会立即杀掉已创建的3个`nginx:1.7.9`的Pod,并开始创建`nginx:1.9.1`的Pod。它不会等到所有的5个`nginx:1.7.9`的Pod都创建完成后才开始改变航道。 - -## 回退Deployment - -有时候你可能想回退一个Deployment,例如,当Deployment不稳定时,比如一直crash looping。 - -默认情况下,kubernetes会在系统中保存前两次的Deployment的rollout历史记录,以便你可以随时会退(你可以修改`revision history limit`来更改保存的revision数)。ß - -**注意:** 只要Deployment的rollout被触发就会创建一个revision。也就是说当且仅当Deployment的Pod template(如`.spec.template`)被更改,例如更新template中的label和容器镜像时,就会创建出一个新的revision。 - -其他的更新,比如扩容Deployment不会创建revision——因此我们可以很方便的手动或者自动扩容。这意味着当你回退到历史revision是,直邮Deployment中的Pod template部分才会回退。 - -假设我们在更新Deployment的时候犯了一个拼写错误,将镜像的名字写成了`nginx:1.91`,而正确的名字应该是`nginx:1.9.1`: - -```shell -$ kubectl set image deployment/nginx-deployment nginx=nginx:1.91 -deployment "nginx-deployment" image updated -``` - -Rollout将会卡住。 - -```shell -$ kubectl rollout status deployments nginx-deployment -Waiting for rollout to finish: 2 out of 3 new replicas have been updated... -``` - -按住Ctrl-C停止上面的rollout状态监控。 - -你会看到旧的replicas(nginx-deployment-1564180365 和 nginx-deployment-2035384211)和新的replicas (nginx-deployment-3066724191)数目都是2个。 - -```shell -$ kubectl get rs -NAME DESIRED CURRENT READY AGE -nginx-deployment-1564180365 2 2 0 25s -nginx-deployment-2035384211 0 0 0 36s -nginx-deployment-3066724191 2 2 2 6s -``` - -看下创建Pod,你会看到有两个新的呃Replica Set创建的Pod处于ImagePullBackOff状态,循环拉取镜像。 - -```shell -$ kubectl get pods -NAME READY STATUS RESTARTS AGE -nginx-deployment-1564180365-70iae 1/1 Running 0 25s -nginx-deployment-1564180365-jbqqo 1/1 Running 0 25s -nginx-deployment-3066724191-08mng 0/1 ImagePullBackOff 0 6s -nginx-deployment-3066724191-eocby 0/1 ImagePullBackOff 0 6s -``` - -注意,Deployment controller会自动停止坏的rollout,并停止扩容新的Replica Set。 - -```shell -$ kubectl describe deployment -Name: nginx-deployment -Namespace: default -CreationTimestamp: Tue, 15 Mar 2016 14:48:04 -0700 -Labels: app=nginx -Selector: app=nginx -Replicas: 2 updated | 3 total | 2 available | 2 unavailable -StrategyType: RollingUpdate -MinReadySeconds: 0 -RollingUpdateStrategy: 1 max unavailable, 1 max surge -OldReplicaSets: nginx-deployment-1564180365 (2/2 replicas created) -NewReplicaSet: nginx-deployment-3066724191 (2/2 replicas created) -Events: - FirstSeen LastSeen Count From SubobjectPath Type Reason Message - --------- -------- ----- ---- ------------- -------- ------ ------- - 1m 1m 1 {deployment-controller } Normal ScalingReplicaSet Scaled up replica set nginx-deployment-2035384211 to 3 - 22s 22s 1 {deployment-controller } Normal ScalingReplicaSet Scaled up replica set nginx-deployment-1564180365 to 1 - 22s 22s 1 {deployment-controller } Normal ScalingReplicaSet Scaled down replica set nginx-deployment-2035384211 to 2 - 22s 22s 1 {deployment-controller } Normal ScalingReplicaSet Scaled up replica set nginx-deployment-1564180365 to 2 - 21s 21s 1 {deployment-controller } Normal ScalingReplicaSet Scaled down replica set nginx-deployment-2035384211 to 0 - 21s 21s 1 {deployment-controller } Normal ScalingReplicaSet Scaled up replica set nginx-deployment-1564180365 to 3 - 13s 13s 1 {deployment-controller } Normal ScalingReplicaSet Scaled up replica set nginx-deployment-3066724191 to 1 - 13s 13s 1 {deployment-controller } Normal ScalingReplicaSet Scaled down replica set nginx-deployment-1564180365 to 2 - 13s 13s 1 {deployment-controller } Normal ScalingReplicaSet Scaled up replica set nginx-deployment-3066724191 to 2 -``` - -为了修复这个问题,我们需要回退到稳定的Deployment revision。 - -### 检查Deployment升级的历史记录 - -首先,检查下Deployment的revision: - -```shell -$ kubectl rollout history deployment/nginx-deployment -deployments "nginx-deployment": -REVISION CHANGE-CAUSE -1 kubectl create -f docs/user-guide/nginx-deployment.yaml --record -2 kubectl set image deployment/nginx-deployment nginx=nginx:1.9.1 -3 kubectl set image deployment/nginx-deployment nginx=nginx:1.91 -``` - -因为我们创建Deployment的时候使用了`—recored`参数可以记录命令,我们可以很方便的查看每次revison的变化。 - -查看单个revision的详细信息: - -```shell -$ kubectl rollout history deployment/nginx-deployment --revision=2 -deployments "nginx-deployment" revision 2 - Labels: app=nginx - pod-template-hash=1159050644 - Annotations: kubernetes.io/change-cause=kubectl set image deployment/nginx-deployment nginx=nginx:1.9.1 - Containers: - nginx: - Image: nginx:1.9.1 - Port: 80/TCP - QoS Tier: - cpu: BestEffort - memory: BestEffort - Environment Variables: - No volumes. -``` - -### 回退到历史版本 - -现在,我们可以决定回退当前的rollout到之前的版本: - -```shell -$ kubectl rollout undo deployment/nginx-deployment -deployment "nginx-deployment" rolled back -``` - -也可以使用 `--revision`参数指定某个历史版本: - -```shell -$ kubectl rollout undo deployment/nginx-deployment --to-revision=2 -deployment "nginx-deployment" rolled back -``` - -与rollout相关的命令详细文档见[kubectl rollout](https://github.com/kubernetes/kubernetes.github.io/blob/master/docs/user-guide/kubectl/v1.6/#rollout)。 - -该Deployment现在已经回退到了先前的稳定版本。如你所见,Deployment controller产生了一个回退到revison 2的`DeploymentRollback`的event。 - -```shell -$ kubectl get deployment -NAME DESIRED CURRENT UP-TO-DATE AVAILABLE AGE -nginx-deployment 3 3 3 3 30m - -$ kubectl describe deployment -Name: nginx-deployment -Namespace: default -CreationTimestamp: Tue, 15 Mar 2016 14:48:04 -0700 -Labels: app=nginx -Selector: app=nginx -Replicas: 3 updated | 3 total | 3 available | 0 unavailable -StrategyType: RollingUpdate -MinReadySeconds: 0 -RollingUpdateStrategy: 1 max unavailable, 1 max surge -OldReplicaSets: -NewReplicaSet: nginx-deployment-1564180365 (3/3 replicas created) -Events: - FirstSeen LastSeen Count From SubobjectPath Type Reason Message - --------- -------- ----- ---- ------------- -------- ------ ------- - 30m 30m 1 {deployment-controller } Normal ScalingReplicaSet Scaled up replica set nginx-deployment-2035384211 to 3 - 29m 29m 1 {deployment-controller } Normal ScalingReplicaSet Scaled up replica set nginx-deployment-1564180365 to 1 - 29m 29m 1 {deployment-controller } Normal ScalingReplicaSet Scaled down replica set nginx-deployment-2035384211 to 2 - 29m 29m 1 {deployment-controller } Normal ScalingReplicaSet Scaled up replica set nginx-deployment-1564180365 to 2 - 29m 29m 1 {deployment-controller } Normal ScalingReplicaSet Scaled down replica set nginx-deployment-2035384211 to 0 - 29m 29m 1 {deployment-controller } Normal ScalingReplicaSet Scaled up replica set nginx-deployment-3066724191 to 2 - 29m 29m 1 {deployment-controller } Normal ScalingReplicaSet Scaled up replica set nginx-deployment-3066724191 to 1 - 29m 29m 1 {deployment-controller } Normal ScalingReplicaSet Scaled down replica set nginx-deployment-1564180365 to 2 - 2m 2m 1 {deployment-controller } Normal ScalingReplicaSet Scaled down replica set nginx-deployment-3066724191 to 0 - 2m 2m 1 {deployment-controller } Normal DeploymentRollback Rolled back deployment "nginx-deployment" to revision 2 - 29m 2m 2 {deployment-controller } Normal ScalingReplicaSet Scaled up replica set nginx-deployment-1564180365 to 3 -``` - -### 清理Policy - -你可以通过设置`.spec.revisonHistoryLimit`项来指定deployment最多保留多少revison历史记录。默认的会保留所有的revision;如果将该项设置为0,Deployment就不允许回退了。 - -## Deployment扩容 - -你可以使用以下命令扩容Deployment: - -```shell -$ kubectl scale deployment nginx-deployment --replicas 10 -deployment "nginx-deployment" scaled -``` - -假设你的集群中启用了[horizontal pod autoscaling](https://github.com/kubernetes/kubernetes.github.io/blob/master/docs/tasks/run-application/horizontal-pod-autoscale-walkthrough),你可以给Deployment设置一个autoscaler,基于当前Pod的CPU利用率选择最少和最多的Pod数。 - -```shell -$ kubectl autoscale deployment nginx-deployment --min=10 --max=15 --cpu-percent=80 -deployment "nginx-deployment" autoscaled -``` - -## 比例扩容 - -RollingUpdate Deployment支持同时运行一个应用的多个版本。当你活着autoscaler扩容RollingUpdate Deployment的时候,正在中途的rollout(进行中或者已经暂停的),为了降低风险,Deployment controller将会平衡已存在的活动中的ReplicaSets(有Pod的ReplicaSets)和新加入的replicas。这被称为比例扩容。 - -例如,你正在运行中含有10个replica的Deployment。maxSurge=3,maxUnavailable=2。 - -```shell -$ kubectl get deploy -NAME DESIRED CURRENT UP-TO-DATE AVAILABLE AGE -nginx-deployment 10 10 10 10 50s -``` - -你更新了一个镜像,而在集群内部无法解析。 - -```shell -$ kubectl set image deploy/nginx-deployment nginx=nginx:sometag -deployment "nginx-deployment" image updated -``` - -镜像更新启动了一个包含ReplicaSet nginx-deployment-1989198191的新的rollout,但是它被阻塞了,因为我们上面提到的maxUnavailable。 - -```shell -$ kubectl get rs -NAME DESIRED CURRENT READY AGE -nginx-deployment-1989198191 5 5 0 9s -nginx-deployment-618515232 8 8 8 1m -``` - -然后发起了一个新的Deployment扩容请求。autoscaler将Deployment的repllica数目增加到了15个。Deployment controller需要判断在哪里增加这5个新的replica。如果我们没有谁用比例扩容,所有的5个replica都会加到一个新的ReplicaSet中。如果使用比例扩容,新添加的replica将传播到所有的ReplicaSet中。大的部分加入replica数最多的ReplicaSet中,小的部分加入到replica数少的ReplciaSet中。0个replica的ReplicaSet不会被扩容。 - -在我们上面的例子中,3个replica将添加到旧的ReplicaSet中,2个replica将添加到新的ReplicaSet中。rollout进程最终会将所有的replica移动到新的ReplicaSet中,假设新的replica成为健康状态。 - -```shell -$ kubectl get deploy -NAME DESIRED CURRENT UP-TO-DATE AVAILABLE AGE -nginx-deployment 15 18 7 8 7m -$ kubectl get rs -NAME DESIRED CURRENT READY AGE -nginx-deployment-1989198191 7 7 0 7m -nginx-deployment-618515232 11 11 11 7m -``` - -## 暂停和恢复Deployment - -你可以在出发一次或多次更新前暂停一个Deployment,然后再恢复它。这样你就能多次暂停和恢复Deployment,在此期间进行一些修复工作,而不会出发不必要的rollout。 - -例如使用刚刚创建Deployment: - -```shell -$ kubectl get deploy -NAME DESIRED CURRENT UP-TO-DATE AVAILABLE AGE -nginx 3 3 3 3 1m -[mkargaki@dhcp129-211 kubernetes]$ kubectl get rs -NAME DESIRED CURRENT READY AGE -nginx-2142116321 3 3 3 1m -``` - -使用以下命令暂停Deployment: - -```shell -$ kubectl rollout pause deployment/nginx-deployment -deployment "nginx-deployment" paused -``` - -然后更新Deplyment中的镜像: - -```shell -$ kubectl set image deploy/nginx nginx=nginx:1.9.1 -deployment "nginx-deployment" image updated -``` - -注意新的rollout启动了: - -```shell -$ kubectl rollout history deploy/nginx -deployments "nginx" -REVISION CHANGE-CAUSE -1 - -$ kubectl get rs -NAME DESIRED CURRENT READY AGE -nginx-2142116321 3 3 3 2m -``` - -你可以进行任意多次更新,例如更新使用的资源: - -```shell -$ kubectl set resources deployment nginx -c=nginx --limits=cpu=200m,memory=512Mi -deployment "nginx" resource requirements updated -``` - -Deployment暂停前的初始状态将继续它的功能,而不会对Deployment的更新产生任何影响,只要Deployment是暂停的。 - -最后,恢复这个Deployment,观察完成更新的ReplicaSet已经创建出来了: - -```shell -$ kubectl rollout resume deploy nginx -deployment "nginx" resumed -$ KUBECTL get rs -w -NAME DESIRED CURRENT READY AGE -nginx-2142116321 2 2 2 2m -nginx-3926361531 2 2 0 6s -nginx-3926361531 2 2 1 18s -nginx-2142116321 1 2 2 2m -nginx-2142116321 1 2 2 2m -nginx-3926361531 3 2 1 18s -nginx-3926361531 3 2 1 18s -nginx-2142116321 1 1 1 2m -nginx-3926361531 3 3 1 18s -nginx-3926361531 3 3 2 19s -nginx-2142116321 0 1 1 2m -nginx-2142116321 0 1 1 2m -nginx-2142116321 0 0 0 2m -nginx-3926361531 3 3 3 20s -^C -$ KUBECTL get rs -NAME DESIRED CURRENT READY AGE -nginx-2142116321 0 0 0 2m -nginx-3926361531 3 3 3 28s -``` - -**注意:** 在恢复Deployment之前你无法回退一个暂停了个Deployment。 - -## Deployment状态 - -Deployment在生命周期中有多种状态。在创建一个新的ReplicaSet的时候它可以是 [progressing](https://github.com/kubernetes/kubernetes.github.io/blob/master/docs/concepts/workloads/controllers/deployment.md#progressing-deployment) 状态, [complete](https://github.com/kubernetes/kubernetes.github.io/blob/master/docs/concepts/workloads/controllers/deployment.md#complete-deployment) 状态,或者[fail to progress](https://github.com/kubernetes/kubernetes.github.io/blob/master/docs/concepts/workloads/controllers/deployment.md#failed-deployment)状态。 - -### Progressing Deployment - -Kubernetes将执行过下列任务之一的Deployment标记为*progressing*状态: - -- Deployment正在创建新的ReplicaSet过程中。 -- Deployment正在扩容一个已有的ReplicaSet。 -- Deployment正在缩容一个已有的ReplicaSet。 -- 有新的可用的pod出现。 - -你可以使用`kubectl roullout status`命令监控Deployment的进度。 - -### Complete Deployment - -Kubernetes将包括以下特性的Deployment标记为*complete*状态: - -- Deployment最小可用。最小可用意味着Deployment的可用replica个数等于或者超过Deployment策略中的期望个数。 -- 所有与该Deployment相关的replica都被更新到了你指定版本,也就说更新完成。 -- 该Deployment中没有旧的Pod存在。 - -你可以用`kubectl rollout status`命令查看Deployment是否完成。如果rollout成功完成,`kubectl rollout status`将返回一个0值的Exit Code。 - -``` -$ kubectl rollout status deploy/nginx -Waiting for rollout to finish: 2 of 3 updated replicas are available... -deployment "nginx" successfully rolled out -$ echo $? -0 -``` - -### Failed Deployment - -你的Deployment在尝试部署新的ReplicaSet的时候可能卡住,用于也不会完成。这可能是因为以下几个因素引起的: - -- 无效的引用 -- 不可读的probe failure -- 镜像拉取错误 -- 权限不够 -- 范围限制 -- 程序运行时配置错误 - -探测这种情况的一种方式是,在你的Deployment spec中指定[`spec.progressDeadlineSeconds`](https://github.com/kubernetes/kubernetes.github.io/blob/master/docs/concepts/workloads/controllers/deployment.md#progress-deadline-seconds)。`spec.progressDeadlineSeconds` 表示Deployment controller等待多少秒才能确定(通过Deployment status)Deployment进程是卡住的。 - -下面的`kubectl`命令设置`progressDeadlineSeconds` 使controller在Deployment在进度卡住10分钟后报告: - -``` -$ kubectl patch deployment/nginx-deployment -p '{"spec":{"progressDeadlineSeconds":600}}' -"nginx-deployment" patched -``` - -Once the deadline has been exceeded, the Deployment controller adds a with the following attributes to the Deployment's - -当超过截止时间后,Deployment controller会在Deployment的 `status.conditions`中增加一条DeploymentCondition,它包括如下属性: - -- Type=Progressing -- Status=False -- Reason=ProgressDeadlineExceeded - -浏览 [Kubernetes API conventions](https://github.com/kubernetes/community/blob/master/contributors/devel/api-conventions.md#typical-status-properties) 查看关于status conditions的更多信息。 - -**注意:** kubernetes除了报告`Reason=ProgressDeadlineExceeded`状态信息外不会对卡住的Deployment做任何操作。更高层次的协调器可以利用它并采取相应行动,例如,回滚Deployment到之前的版本。 - -**注意:** 如果你暂停了一个Deployment,在暂停的这段时间内kubernetnes不会检查你指定的deadline。你可以在Deployment的rollout途中安全的暂停它,然后再恢复它,这不会触发超过deadline的状态。 - -你可能在使用Deployment的时候遇到一些短暂的错误,这些可能是由于你设置了太短的timeout,也有可能是因为各种其他错误导致的短暂错误。例如,假设你使用了无效的引用。当你Describe Deployment的时候可能会注意到如下信息: - -``` -$ kubectl describe deployment nginx-deployment -<...> -Conditions: - Type Status Reason - ---- ------ ------ - Available True MinimumReplicasAvailable - Progressing True ReplicaSetUpdated - ReplicaFailure True FailedCreate -<...> -``` - -执行 `kubectl get deployment nginx-deployment -o yaml`,Deployement 的状态可能看起来像这个样子: - -```yaml -status: - availableReplicas: 2 - conditions: - - lastTransitionTime: 2016-10-04T12:25:39Z - lastUpdateTime: 2016-10-04T12:25:39Z - message: Replica set "nginx-deployment-4262182780" is progressing. - reason: ReplicaSetUpdated - status: "True" - type: Progressing - - lastTransitionTime: 2016-10-04T12:25:42Z - lastUpdateTime: 2016-10-04T12:25:42Z - message: Deployment has minimum availability. - reason: MinimumReplicasAvailable - status: "True" - type: Available - - lastTransitionTime: 2016-10-04T12:25:39Z - lastUpdateTime: 2016-10-04T12:25:39Z - message: 'Error creating: pods "nginx-deployment-4262182780-" is forbidden: exceeded quota: - object-counts, requested: pods=1, used: pods=3, limited: pods=2' - reason: FailedCreate - status: "True" - type: ReplicaFailure - observedGeneration: 3 - replicas: 2 - unavailableReplicas: 2 -``` - -最终,一旦超过Deployment进程的deadline,kuberentes会更新状态和导致Progressing状态的原因: - -``` -Conditions: - Type Status Reason - ---- ------ ------ - Available True MinimumReplicasAvailable - Progressing False ProgressDeadlineExceeded - ReplicaFailure True FailedCreate - -``` - -你可以通过缩容Deployment的方式解决配额不足的问题,或者增加你的namespace的配额。如果你满足了配额条件后,Deployment controller就会完成你的Deployment rollout,你将看到Deployment的状态更新为成功状态(`Status=True`并且`Reason=NewReplicaSetAvailable`)。 - -``` -Conditions: - Type Status Reason - ---- ------ ------ - Available True MinimumReplicasAvailable - Progressing True NewReplicaSetAvailable - -``` - -`Type=Available`、 `Status=True` 以为这你的Deployment有最小可用性。 最小可用性是在Deployment策略中指定的参数。`Type=Progressing` 、 `Status=True`意味着你的Deployment 或者在部署过程中,或者已经成功部署,达到了期望的最少的可用replica数量(查看特定状态的Reason——在我们的例子中`Reason=NewReplicaSetAvailable` 意味着Deployment已经完成)。 - -你可以使用`kubectl rollout status`命令查看Deployment进程是否失败。当Deployment过程超过了deadline,`kubectl rollout status`将返回非0的exit code。 - -``` -$ kubectl rollout status deploy/nginx -Waiting for rollout to finish: 2 out of 3 new replicas have been updated... -error: deployment "nginx" exceeded its progress deadline -$ echo $? -1 -``` - -### 操作失败的Deployment - -所有对完成的Deployment的操作都适用于失败的Deployment。你可以对它阔/缩容,回退到历史版本,你甚至可以多次暂停它来应用Deployment pod template。 - -## 清理Policy - -你可以设置Deployment中的 `.spec.revisionHistoryLimit` 项来指定保留多少旧的ReplicaSet。 余下的将在后台被当作垃圾收集。默认的,所有的revision历史就都会被保留。在未来的版本中,将会更改为2。 - -**注意:** 将该值设置为0,将导致所有的Deployment历史记录都会被清除,该Deploynent就无法再回退了。 - -## 用例 - -### 金丝雀Deployment - -如果你想要使用Deployment对部分用户或服务器发布relaese,你可以创建多个Deployment,每个对一个release,参照[managing resources](https://github.com/kubernetes/kubernetes.github.io/blob/master/docs/concepts/cluster-administration/manage-deployment/#canary-deployments) 中对金丝雀模式的描述。 - -## 编写Deployment Spec - -在所有的Kubernetes配置中,Deployment也需要`apiVersion`,`kind`和`metadata`这些配置项。配置文件的通用使用说明查看[部署应用](https://github.com/kubernetes/kubernetes.github.io/blob/master/docs/tutorials/stateless-application/run-stateless-application-deployment),配置容器,和[使用kubeclt管理资源](https://github.com/kubernetes/kubernetes.github.io/blob/master/docs/tutorials/object-management-kubectl/object-management)文档。 - -Deployment也需要 [`.spec` section](https://github.com/kubernetes/community/blob/master/contributors/devel/api-conventions.md#spec-and-status). - -### Pod Template - - `.spec.template` 是 `.spec`中唯一要求的字段。 - -`.spec.template` 是 [pod template](https://github.com/kubernetes/kubernetes.github.io/blob/master/docs/user-guide/replication-controller/#pod-template). 它跟 [Pod](https://github.com/kubernetes/kubernetes.github.io/blob/master/docs/user-guide/pods)有一模一样的schema,除了它是嵌套的并且不需要`apiVersion` 和 `kind`字段。 - -另外为了划分Pod的范围,Deployment中的pod template必须指定适当的label(不要跟其他controller重复了,参考[selector](https://github.com/kubernetes/kubernetes.github.io/blob/master/docs/concepts/workloads/controllers/deployment.md#selector))和适当的重启策略。 - -[`.spec.template.spec.restartPolicy`](https://github.com/kubernetes/kubernetes.github.io/blob/master/docs/concepts/workloads/pods/pod-lifecycle) 可以设置为 `Always` , 如果不指定的话这就是默认配置。 - -### Replicas - -`.spec.replicas` 是可以选字段,指定期望的pod数量,默认是1。 - -### Selector - -`.spec.selector`是可选字段,用来指定 [label selector](https://github.com/kubernetes/kubernetes.github.io/blob/master/docs/concepts/overview/working-with-objects/labels) ,圈定Deployment管理的pod范围。 - -如果被指定, `.spec.selector` 必须匹配 `.spec.template.metadata.labels`,否则它将被API拒绝。如果 `.spec.selector` 没有被指定, `.spec.selector.matchLabels` 默认是 `.spec.template.metadata.labels`。 - -在Pod的template跟`.spec.template`不同或者数量超过了`.spec.replicas`规定的数量的情况下,Deployment会杀掉label跟selector不同的Pod。 - -**注意:** 你不应该再创建其他label跟这个selector匹配的pod,或者通过其他Deployment,或者通过其他Controller,例如ReplicaSet和ReplicationController。否则该Deployment会被把它们当成都是自己创建的。Kubernetes不会阻止你这么做。 - -如果你有多个controller使用了重复的selector,controller们就会互相打架并导致不正确的行为。 - -### 策略 - -`.spec.strategy` 指定新的Pod替换旧的Pod的策略。 `.spec.strategy.type` 可以是"Recreate"或者是 "RollingUpdate"。"RollingUpdate"是默认值。 - -#### Recreate Deployment - -`.spec.strategy.type==Recreate`时,在创建出新的Pod之前会先杀掉所有已存在的Pod。 - -#### Rolling Update Deployment - -`.spec.strategy.type==RollingUpdate`时,Deployment使用[rolling update](https://github.com/kubernetes/kubernetes.github.io/blob/master/docs/tasks/run-application/rolling-update-replication-controller) 的方式更新Pod 。你可以指定`maxUnavailable` 和 `maxSurge` 来控制 rolling update 进程。 - -##### Max Unavailable - -`.spec.strategy.rollingUpdate.maxUnavailable` 是可选配置项,用来指定在升级过程中不可用Pod的最大数量。该值可以是一个绝对值(例如5),也可以是期望Pod数量的百分比(例如10%)。通过计算百分比的绝对值向下取整。如果`.spec.strategy.rollingUpdate.maxSurge` 为0时,这个值不可以为0。默认值是1。 - -例如,该值设置成30%,启动rolling update后旧的ReplicatSet将会立即缩容到期望的Pod数量的70%。新的Pod ready后,随着新的ReplicaSet的扩容,旧的ReplicaSet会进一步缩容,确保在升级的所有时刻可以用的Pod数量至少是期望Pod数量的70%。 - -##### Max Surge - -`.spec.strategy.rollingUpdate.maxSurge` 是可选配置项,用来指定可以超过期望的Pod数量的最大个数。该值可以是一个绝对值(例如5)或者是期望的Pod数量的百分比(例如10%)。当`MaxUnavailable`为0时该值不可以为0。通过百分比计算的绝对值向上取整。默认值是1。 - -例如,该值设置成30%,启动rolling update后新的ReplicatSet将会立即扩容,新老Pod的总数不能超过期望的Pod数量的130%。旧的Pod被杀掉后,新的ReplicaSet将继续扩容,旧的ReplicaSet会进一步缩容,确保在升级的所有时刻所有的Pod数量和不会超过期望Pod数量的130%。 - -### Progress Deadline Seconds - -`.spec.progressDeadlineSeconds` 是可选配置项,用来指定在系统报告Deployment的[failed progressing](https://github.com/kubernetes/kubernetes.github.io/blob/master/docs/concepts/workloads/controllers/deployment.md#failed-deployment) ——表现为resource的状态中`type=Progressing`、`Status=False`、 `Reason=ProgressDeadlineExceeded`前可以等待的Deployment进行的秒数。Deployment controller会继续重试该Deployment。未来,在实现了自动回滚后, deployment controller在观察到这种状态时就会自动回滚。 - -如果设置该参数,该值必须大于 `.spec.minReadySeconds`。 - -### Min Ready Seconds - -`.spec.minReadySeconds`是一个可选配置项,用来指定没有任何容器crash的Pod并被认为是可用状态的最小秒数。默认是0(Pod在ready后就会被认为是可用状态)。进一步了解什么什么后Pod会被认为是ready状态,参阅 [Container Probes](https://github.com/kubernetes/kubernetes.github.io/blob/master/docs/concepts/workloads/pods/pod-lifecycle/#container-probes)。 - -### Rollback To - -`.spec.rollbackTo` 是一个可以选配置项,用来配置Deployment回退的配置。设置该参数将触发回退操作,每次回退完成后,该值就会被清除。 - -#### Revision - -`.spec.rollbackTo.revision`是一个可选配置项,用来指定回退到的revision。默认是0,意味着回退到历史中最老的revision。 - -### Revision History Limit - -Deployment revision history存储在它控制的ReplicaSets中。 - -`.spec.revisionHistoryLimit` 是一个可选配置项,用来指定可以保留的旧的ReplicaSet数量。该理想值取决于心Deployment的频率和稳定性。如果该值没有设置的话,默认所有旧的Replicaset或会被保留,将资源存储在etcd中,是用`kubectl get rs`查看输出。每个Deployment的该配置都保存在ReplicaSet中,然而,一旦你删除的旧的RepelicaSet,你的Deployment就无法再回退到那个revison了。 - -如果你将该值设置为0,所有具有0个replica的ReplicaSet都会被删除。在这种情况下,新的Deployment rollout无法撤销,因为revision history都被清理掉了。 - -### Paused - -`.spec.paused`是可以可选配置项,boolean值。用来指定暂停和恢复Deployment。Paused和没有paused的Deployment之间的唯一区别就是,所有对paused deployment中的PodTemplateSpec的修改都不会触发新的rollout。Deployment被创建之后默认是非paused。 - -## Alternative to Deployments - -### kubectl rolling update - -[Kubectl rolling update](https://github.com/kubernetes/kubernetes.github.io/blob/master/docs/user-guide/kubectl/v1.6/#rolling-update) 虽然使用类似的方式更新Pod和ReplicationController。但是我们推荐使用Deployment,因为它是声明式的,客户端侧,具有附加特性,例如即使滚动升级结束后也可以回滚到任何历史版本。 \ No newline at end of file diff --git a/README.md b/README.md index 51ade61bd..fd96f0e5e 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,16 @@ # Kubernetes Handbook -玩转Kubernetes,我就看kubernetes handbook! +Kubernetes是谷歌开源的容器集群管理系统,是Google多年大规模容器管理技术Borg的开源版本,也是CNCF最重要的组件之一,主要功能包括: + +- 基于容器的应用部署、维护和滚动升级 +- 负载均衡和服务发现 +- 跨机器和跨地区的集群调度 +- 自动伸缩 +- 无状态服务和有状态服务 +- 广泛的Volume支持 +- 插件机制保证扩展性 + +Kubernetes发展非常迅速,已经成为容器编排领域的领导者。Kubernetes的中文资料也非常丰富,但系统化和紧跟社区更新的则就比较少见了。《Kubernetes指南》开源电子书旨在整理平时在开发和使用Kubernetes时的参考指南和实践心得,更是为了形成一个系统化的参考指南以方便查阅。欢迎大家关注,更欢迎大家一起添加更多更好的内容。 本书所有的组件安装、示例和操作等都基于**Kubernetes1.6.0**版本。 @@ -8,42 +18,7 @@ GitHub地址:https://github.com/rootsongjc/kubernetes-handbook -## 目录 - -- [0.0 介绍](README.md) -- [1.0 Kubernetes集群安装](00-kubernetes安装前言.md) - - [1.1 创建 TLS 证书和秘钥](01-创建TLS证书和密钥.md) - - [1.2 创建kubeconfig 文件](02-创建kubeconfig文件.md) - - [1.3 创建高可用etcd集群](03-创建高可用etcd集群.md) - - [1.4 安装kubectl命令行工具](04-安装kubectl命令行工具.md) - - [1.5 部署高可用master集群](05-部署高可用master集群.md) - - [1.6 部署node节点](06-部署node节点.md) - - [1.7 安装kubedns插件](07-安装kubedns插件.md) - - [1.8 安装dashboard插件](08-安装dashboard插件.md) - - [1.9 安装heapster插件](09-安装heapster插件.md) - - [1.10 安装EFK插件](10-安装EFK插件.md) -- 2.0 Kubernetes服务发现与负载均衡 - - [2.1 Ingress解析](11-ingress解析.md) - - [2.2 安装traefik ingress](12-安装traefik-ingress.md) - - [2.3 分布式负载测试](14-分布式负载测试.md) - - [2.4 kubernetes网络和集群性能测试](15-kubernetes网络和集群性能测试.md) - - [2.5 边缘节点配置](18-边缘节点配置.md) -- 3.0 Kubernetes中的容器设计模式 TODO -- 4.0 Kubernetes中的概念解析 - - [4.1 Deployment概念解析](20-deployment概念解析.md) - - [4.2 kubernetes配置最佳实践.md](22-kubernetes配置最佳实践.md) -- 5.0 Kubernetes的安全设置 - - [5.1 Kubernetes中的RBAC支持](13-kubernetes中的RBAC支持.md) -- 6.0 Kubernetes网络配置 - - [6.1 Kubernetes中的网络模式解析](16-kubernetes中的网络模式解析.md) -- 7.0 Kubernetes存储配置 - - [7.1 使用glusterfs做持久化存储](17-使用glusterfs做持久化存储.md) -- 8.0 集群运维管理 - - [8.1 服务滚动升级](19-服务滚动升级.md) - - [8.2 应用日志收集](21-应用日志收集.md) -- 9.0 Kubernetes领域应用 - - 9.1 Spark on Kubernetes TODO -- [10.0 问题记录](issues.md) +[文章目录](SUMMARY.md) ## 说明 @@ -51,7 +26,7 @@ GitHub地址:https://github.com/rootsongjc/kubernetes-handbook [Kubernetes集群安装部分](00-kubernetes安装前言.md)(1.0-1.10章节)在[opsnull](https://github.com/opsnull/follow-me-install-kubernetes-cluster)的基础上进行了编辑、修改和整理而成。 -## 如何使用 +## 如何阅读 **在线浏览** @@ -103,5 +78,7 @@ pandoc --latex-engine=xelatex --template=pm-template input.md -o output.pdf [Jimmy Song](http://rootsongjc.github.io/about) -[opsnull](http://github.com/opsnull) +[feiskyer](https://github.com/feiskyer) + + diff --git a/SUMMARY.md b/SUMMARY.md index 40fd23d33..e611baf1b 100644 --- a/SUMMARY.md +++ b/SUMMARY.md @@ -1,36 +1,100 @@ # Summary -* [0.0 介绍](README.md) -* [1.0 Kubernetes集群安装](00-kubernetes安装前言.md) - * [1.1 创建TLS证书和秘钥](01-创建TLS证书和密钥.md) - * [1.2 创建kubeconfig 文件](02-创建kubeconfig文件.md) - * [1.3 创建高可用etcd集群](03-创建高可用etcd集群.md) - * [1.4 安装kubectl命令行工具](04-安装kubectl命令行工具.md) - * [1.5 部署高可用master集群](05-部署高可用master集群.md) - * [1.6 部署node节点](06-部署node节点.md) - * [1.7 安装kubedns插件](07-安装kubedns插件.md) - * [1.8 安装dashboard插件](08-安装dashboard插件.md) - * [1.9 安装heapster插件](09-安装heapster插件.md) - * [1.10 安装EFK插件](10-安装EFK插件.md) -* [2.0 Kubernetes服务发现与负载均衡]() - * [2.1 Ingress解析](11-ingress解析.md) - * [2.2 安装Traefik ingress](12-安装traefik-ingress.md) - * [2.3 分布式负载测试](14-分布式负载测试.md) - * [2.4 kubernetes网络和集群性能测试](15-kubernetes网络和集群性能测试.md) - * [2.5 边缘节点配置](18-边缘节点配置.md) -* [3.0 Kubernetes中的容器设计模式]() -* [4.0 Kubernetes中的概念解析]() - * [4.1 Deployment概念解析](20-deployment概念解析.md) - * [4.2 kubernetes配置最佳实践.md](22-kubernetes配置最佳实践.md) -* [5.0 Kubernetes的安全设置]() - * [5.1 Kubernetes中的RBAC支持](13-kubernetes中的RBAC支持.md) -* [6.0 Kubernetes网络配置]() - * [6.1 Kubernetes中的网络模式解析](16-kubernetes中的网络模式解析.md) -* [7.0 Kubernetes存储配置]() - * [7.1 使用glusterfs做持久化存储](17-使用glusterfs做持久化存储.md) -- [8.0 集群运维管理]() - - [8.1 服务滚动升级](19-服务滚动升级.md) - - [8.2 应用日志收集](21-应用日志收集.md) -- [9.0 Kubernetes领域应用]() -- [10.0 问题记录](issues.md) - +- [前言](README.md) +- [1. Kubernetes简介](introduction/index.md) + - [1.1 核心概念](introduction/concepts.md) + - [1.2 Kubernetes 101](introduction/101.md) + - [1.3 Kubernetes 201](introduction/201.md) + - [1.4 Kubernetes集群](introduction/cluster.md) +- [2. 核心原理](architecture/index.md) + - [2.1 设计理念](architecture/concepts.md) + - [2.2 主要概念](architecture/objects.md) + - [2.2.1 Pod, Service, Namespace和Node](introduction/concepts.md) + - [2.2.2 Service](architecture/Service.md) + - [2.2.3 Volume和Persistent Volume](architecture/Volume.md) + - [2.2.4 Deployment](architecture/deployment.md) + - [2.2.5 Secret](architecture/Secret.md) + - [2.2.6 StatefulSet](architecture/statefulset.md) + - [2.2.7 DaemonSet](architecture/daemonset.md) + - [2.2.8 ServiceAccount](architecture/serviceaccount.md) + - [2.2.9 ReplicationController和ReplicaSet](architecture/replicaset.md) + - [2.2.10 Job](architecture/job.md) + - [2.2.11 CronJob](architecture/cronjob.md) + - SecurityContext + - Resource Quota + - Pod Security Policy + - Horizontal Pod Autoscaling + - Network Policy + - Ingress + - ThirdPartyResources + - [2.3 核心组件的工作原理](components/index.md) + - Etcd + - API Server + - Scheduler + - Controller Manager + - Kubelet + - Kube Proxy + - Kube DNS + - hyperkube + - Federation + - [kubeadm](architecture/kubeadm.md) +- [3. 插件指南](plugins/index.md) + - [3.1 认证和授权插件](plugins/auth.md) + - [3.2 网络插件](plugins/network.md) + - [3.3 Volume插件](plugins/volume.md) + - [3.4 Container Runtime Interface](plugins/CRI.md) + - 3.5 Network Policy + - 3.6 Ingress Controller + - 3.7 Cloud Provider + - 3.8 Scheduler + - [3.9 其他](plugins/other.md) +- [4. 常用技巧](deploy/index.md) + - [4.1 部署](deploy/index.md) + - [4.1.1 单机部署](deploy/single.md) + - [4.1.2 集群部署](deploy/cluster.md) + - [4.1.3 kubeadm](deploy/kubeadm.md) + - [4.1.4 附加组件](addons/index.md) + - [ 4.2 在CentOS上部署kubernetes1.6集群](deploy/centos/install-kbernetes1.6-on-centos.md) + - [4.2.1 创建TLS证书和秘钥](deploy/centos/create-tls-and-secret-key.md) + - [4.2.2 创建kubeconfig 文件](deploy/centos/create-kubeconfig.md) + - [4.2.3 创建高可用etcd集群](deploy/centos/etcd-cluster-installation.md) + - [4.2.4 安装kubectl命令行工具](deploy/centos/kubectl-installation.md) + - [4.2.5 部署高可用master集群](deploy/centos/master-installation.md) + - [4.2.6 部署node节点](deploy/centos/node-installation.md) + - [4.2.7 安装kubedns插件](deploy/centos/kubedns-addon-installation.md) + - [4.2.8 安装dashboard插件](deploy/centos/dashboard-addon-installation.md) + - [4.2.9 安装heapster插件](deploy/centos/heapster-addon-installation.md) + - [4.2.10 安装EFK插件](deploy/centos/efk-addon-installation.md) + - [4.2 监控](monitor/index.md) + - [4.3 日志](deploy/logging.md) + - [4.4 高可用](ha/index.md) + - [4.5 调试](debugging/index.md) + - [4.6 kubernetes配置最佳实践](deploy/kubernetes-configuration-best-practice.md) + - [4.7 安全配置](security/security-configuration.md) + * [5.1 Kubernetes中的RBAC支持](./security/rbac-support-in-kubernetes.md) + - [4.8 网络配置](network/network-configuration.md) + * [6.1 Kubernetes中的网络模式解析](network/network-modes-in-kubernetes.md) + - [4.9 存储配置](storage/storage-configuration.md) + - [7.1 使用glusterfs做持久化存储](storage/using-glusterfs-for-persistent-storage.md) +- [5. 服务发现与负载均衡](service-discovery-lb/service-discovery-and-load-balancing.md) + * [5.1 Ingress解析](service-discovery-lb/ingress-concept.md) + * [5.2 安装Traefik ingress](service-discovery-lb/traefik-ingress-installation.md) + * [5.3 分布式负载测试](service-discovery-lb/distributed-load-test.md) + * [5.4 网络和集群性能测试](service-discovery-lb/network-and-cluster-perfermance-test.md) + * [5.5 边缘节点配置](service-discovery-lb/edge-node-configuration.md) +- [6. 开发指南](dev/index.md) + - [5.1 开发环境搭建](dev/index.md) + - [5.2 单元测试和集成测试](dev/testing.md) + - [5.3 社区贡献](dev/contribute.md) +- [7. 应用管理](apps/index.md) + - [6.1 Helm](apps/helm-app.md) + - [6.2 Deis workflow](apps/deis.md) +- [8. 运维管理](ops/opration-administration.md) + - [8.1 服务滚动升级](ops/service-rolling-update.md) + - [8.2 应用日志收集](ops/app-log-collection.md) +- [9. 附录](appendix/index.md) + - [7.1 awesome-docker](appendix/awesome-docker.md) + - [7.2 awesome-kubernetes](appendix/awesome-kubernetes.md) + - [7.3 Kubernetes ecosystem](ecosystem.md) + - [7.4 参考文档](reference.md) +- [10. 问题记录](issues.md) \ No newline at end of file diff --git a/architecture/deployment.md b/architecture/deployment.md index 3a27d5b82..a65943eb1 100644 --- a/architecture/deployment.md +++ b/architecture/deployment.md @@ -1,5 +1,7 @@ # Deployment +## 简述 + Deployment为Pod和ReplicaSet提供了一个声明式定义(declarative)方法,用来替代以前的ReplicationController来方便的管理应用。典型的应用场景包括: - 定义Deployment来创建Pod和ReplicaSet @@ -51,3 +53,747 @@ kubectl set image deployment/nginx-deployment nginx=nginx:1.9.1 ``` kubectl rollout undo deployment/nginx-deployment ``` + +## Deployment概念详细解析 + +本文翻译自kubernetes官方文档:https://github.com/kubernetes/kubernetes.github.io/blob/master/docs/concepts/workloads/controllers/deployment.md + +根据2017年5月10日的Commit 8481c02 翻译。 + +## Deployment是什么? + +Deployment为Pod和Replica Set(下一代Replication Controller)提供声明式更新。 + +你只需要在Deployment中描述你想要的目标状态是什么,Deployment controller就会帮你将Pod和Replica Set的实际状态改变到你的目标状态。你可以定义一个全新的Deployment,也可以创建一个新的替换旧的Deployment。 + +一个典型的用例如下: + +- 使用Deployment来创建ReplicaSet。ReplicaSet在后台创建pod。检查启动状态,看它是成功还是失败。 +- 然后,通过更新Deployment的PodTemplateSpec字段来声明Pod的新状态。这会创建一个新的ReplicaSet,Deployment会按照控制的速率将pod从旧的ReplicaSet移动到新的ReplicaSet中。 +- 如果当前状态不稳定,回滚到之前的Deployment revision。每次回滚都会更新Deployment的revision。 +- 扩容Deployment以满足更高的负载。 +- 暂停Deployment来应用PodTemplateSpec的多个修复,然后恢复上线。 +- 根据Deployment 的状态判断上线是否hang住了。 +- 清除旧的不必要的ReplicaSet。 + +## 创建Deployment + +下面是一个Deployment示例,它创建了一个Replica Set来启动3个nginx pod。 + +下载示例文件并执行命令: + +```shell +$ kubectl create -f docs/user-guide/nginx-deployment.yaml --record +deployment "nginx-deployment" created +``` + +将kubectl的 `—record` 的flag设置为 `true`可以在annotation中记录当前命令创建或者升级了该资源。这在未来会很有用,例如,查看在每个Deployment revision中执行了哪些命令。 + +然后立即执行`get`í将获得如下结果: + +```shell +$ kubectl get deployments +NAME DESIRED CURRENT UP-TO-DATE AVAILABLE AGE +nginx-deployment 3 0 0 0 1s +``` + +输出结果表明我们希望的repalica数是3(根据deployment中的`.spec.replicas`配置)当前replica数( `.status.replicas`)是0, 最新的replica数(`.status.updatedReplicas`)是0,可用的replica数(`.status.availableReplicas`)是0。 + +过几秒后再执行`get`命令,将获得如下输出: + +```shell +$ kubectl get deployments +NAME DESIRED CURRENT UP-TO-DATE AVAILABLE AGE +nginx-deployment 3 3 3 3 18s +``` + +我们可以看到Deployment已经创建了3个replica,所有的replica都已经是最新的了(包含最新的pod template),可用的(根据Deployment中的`.spec.minReadySeconds`声明,处于已就绪状态的pod的最少个数)。执行`kubectl get rs`和`kubectl get pods`会显示Replica Set(RS)和Pod已创建。 + +```shell +$ kubectl get rs +NAME DESIRED CURRENT READY AGE +nginx-deployment-2035384211 3 3 0 18s +``` + +你可能会注意到Replica Set的名字总是`-`。 + +```shell +$ kubectl get pods --show-labels +NAME READY STATUS RESTARTS AGE LABELS +nginx-deployment-2035384211-7ci7o 1/1 Running 0 18s app=nginx,pod-template-hash=2035384211 +nginx-deployment-2035384211-kzszj 1/1 Running 0 18s app=nginx,pod-template-hash=2035384211 +nginx-deployment-2035384211-qqcnn 1/1 Running 0 18s app=nginx,pod-template-hash=2035384211 +``` + +刚创建的Replica Set将保证总是有3个nginx的pod存在。 + +**注意:** 你必须在Deployment中的selector指定正确pod template label(在该示例中是 `app = nginx`),不要跟其他的controller搞混了(包括Deployment、Replica Set、Replication Controller等)。**Kubernetes本身不会阻止你这么做**,如果你真的这么做了,这些controller之间会相互打架,并可能导致不正确的行为。 + +## 更新Deployment + +**注意:** Deployment的rollout当且仅当Deployment的pod template(例如`.spec.template`)中的label更新或者镜像更改时被触发。其他更新,例如扩容Deployment不会触发rollout。 + +假如我们现在想要让nginx pod使用`nginx:1.9.1`的镜像来代替原来的`nginx:1.7.9`的镜像。 + +```shell +$ kubectl set image deployment/nginx-deployment nginx=nginx:1.9.1 +deployment "nginx-deployment" image updated +``` + +我们可以使用`edit`命令来编辑Deployment,修改 `.spec.template.spec.containers[0].image` ,将`nginx:1.7.9` 改写成 `nginx:1.9.1`。 + +```shell +$ kubectl edit deployment/nginx-deployment +deployment "nginx-deployment" edited +``` + +查看rollout的状态,只要执行: + +```shell +$ kubectl rollout status deployment/nginx-deployment +Waiting for rollout to finish: 2 out of 3 new replicas have been updated... +deployment "nginx-deployment" successfully rolled out +``` + +Rollout成功后,`get` Deployment: + +```shell +$ kubectl get deployments +NAME DESIRED CURRENT UP-TO-DATE AVAILABLE AGE +nginx-deployment 3 3 3 3 36s +``` + +UP-TO-DATE的replica的数目已经达到了配置中要求的数目。 + +CURRENT的replica数表示Deployment管理的replica数量,AVAILABLE的replica数是当前可用的replica数量。 + +We can run `kubectl get rs` to see that the Deployment updated the Pods by creating a new Replica Set and scaling it up to 3 replicas, as well as scaling down the old Replica Set to 0 replicas. + +我们通过执行`kubectl get rs`可以看到Deployment更新了Pod,通过创建一个新的Replica Set并扩容了3个replica,同时将原来的Replica Set缩容到了0个replica。 + +```shell +$ kubectl get rs +NAME DESIRED CURRENT READY AGE +nginx-deployment-1564180365 3 3 0 6s +nginx-deployment-2035384211 0 0 0 36s +``` + +执行 `get pods`只会看到当前的新的pod: + +```shell +$ kubectl get pods +NAME READY STATUS RESTARTS AGE +nginx-deployment-1564180365-khku8 1/1 Running 0 14s +nginx-deployment-1564180365-nacti 1/1 Running 0 14s +nginx-deployment-1564180365-z9gth 1/1 Running 0 14s +``` + +下次更新这些pod的时候,只需要更新Deployment中的pod的template即可。 + +Deployment可以保证在升级时只有一定数量的Pod是down的。默认的,它会确保至少有比期望的Pod数量少一个的Pod是up状态(最多一个不可用)。 + +Deployment同时也可以确保只创建出超过期望数量的一定数量的Pod。默认的,它会确保最多比期望的Pod数量多一个的Pod是up的(最多1个surge)。 + +**在未来的Kuberentes版本中,将从1-1变成25%-25%)。** + +例如,如果你自己看下上面的Deployment,你会发现,开始创建一个新的Pod,然后删除一些旧的Pod再创建一个新的。当新的Pod创建出来之前不会杀掉旧的Pod。这样能够确保可用的Pod数量至少有2个,Pod的总数最多4个。 + +```shell +$ kubectl describe deployments +Name: nginx-deployment +Namespace: default +CreationTimestamp: Tue, 15 Mar 2016 12:01:06 -0700 +Labels: app=nginx +Selector: app=nginx +Replicas: 3 updated | 3 total | 3 available | 0 unavailable +StrategyType: RollingUpdate +MinReadySeconds: 0 +RollingUpdateStrategy: 1 max unavailable, 1 max surge +OldReplicaSets: +NewReplicaSet: nginx-deployment-1564180365 (3/3 replicas created) +Events: + FirstSeen LastSeen Count From SubobjectPath Type Reason Message + --------- -------- ----- ---- ------------- -------- ------ ------- + 36s 36s 1 {deployment-controller } Normal ScalingReplicaSet Scaled up replica set nginx-deployment-2035384211 to 3 + 23s 23s 1 {deployment-controller } Normal ScalingReplicaSet Scaled up replica set nginx-deployment-1564180365 to 1 + 23s 23s 1 {deployment-controller } Normal ScalingReplicaSet Scaled down replica set nginx-deployment-2035384211 to 2 + 23s 23s 1 {deployment-controller } Normal ScalingReplicaSet Scaled up replica set nginx-deployment-1564180365 to 2 + 21s 21s 1 {deployment-controller } Normal ScalingReplicaSet Scaled down replica set nginx-deployment-2035384211 to 0 + 21s 21s 1 {deployment-controller } Normal ScalingReplicaSet Scaled up replica set nginx-deployment-1564180365 to 3 +``` + +我们可以看到当我们刚开始创建这个Deployment的时候,创建了一个Replica Set(nginx-deployment-2035384211),并直接扩容到了3个replica。 + +当我们更新这个Deployment的时候,它会创建一个新的Replica Set(nginx-deployment-1564180365),将它扩容到1个replica,然后缩容原先的Replica Set到2个replica,此时满足至少2个Pod是可用状态,同一时刻最多有4个Pod处于创建的状态。 + +接着继续使用相同的rolling update策略扩容新的Replica Set和缩容旧的Replica Set。最终,将会在新的Replica Set中有3个可用的replica,旧的Replica Set的replica数目变成0。 + +### Rollover(多个rollout并行) + +每当Deployment controller观测到有新的deployment被创建时,如果没有已存在的Replica Set来创建期望个数的Pod的话,就会创建出一个新的Replica Set来做这件事。已存在的Replica Set控制label匹配`.spec.selector`但是template跟`.spec.template`不匹配的Pod缩容。最终,新的Replica Set将会扩容出`.spec.replicas`指定数目的Pod,旧的Replica Set会缩容到0。 + +如果你更新了一个的已存在并正在进行中的Deployment,每次更新Deployment都会创建一个新的Replica Set并扩容它,同时回滚之前扩容的Replica Set——将它添加到旧的Replica Set列表,开始缩容。 + +例如,假如你创建了一个有5个`niginx:1.7.9` replica的Deployment,但是当还只有3个`nginx:1.7.9`的replica创建出来的时候你就开始更新含有5个`nginx:1.9.1` replica的Deployment。在这种情况下,Deployment会立即杀掉已创建的3个`nginx:1.7.9`的Pod,并开始创建`nginx:1.9.1`的Pod。它不会等到所有的5个`nginx:1.7.9`的Pod都创建完成后才开始改变航道。 + +## 回退Deployment + +有时候你可能想回退一个Deployment,例如,当Deployment不稳定时,比如一直crash looping。 + +默认情况下,kubernetes会在系统中保存前两次的Deployment的rollout历史记录,以便你可以随时会退(你可以修改`revision history limit`来更改保存的revision数)。ß + +**注意:** 只要Deployment的rollout被触发就会创建一个revision。也就是说当且仅当Deployment的Pod template(如`.spec.template`)被更改,例如更新template中的label和容器镜像时,就会创建出一个新的revision。 + +其他的更新,比如扩容Deployment不会创建revision——因此我们可以很方便的手动或者自动扩容。这意味着当你回退到历史revision是,直邮Deployment中的Pod template部分才会回退。 + +假设我们在更新Deployment的时候犯了一个拼写错误,将镜像的名字写成了`nginx:1.91`,而正确的名字应该是`nginx:1.9.1`: + +```shell +$ kubectl set image deployment/nginx-deployment nginx=nginx:1.91 +deployment "nginx-deployment" image updated +``` + +Rollout将会卡住。 + +```shell +$ kubectl rollout status deployments nginx-deployment +Waiting for rollout to finish: 2 out of 3 new replicas have been updated... +``` + +按住Ctrl-C停止上面的rollout状态监控。 + +你会看到旧的replicas(nginx-deployment-1564180365 和 nginx-deployment-2035384211)和新的replicas (nginx-deployment-3066724191)数目都是2个。 + +```shell +$ kubectl get rs +NAME DESIRED CURRENT READY AGE +nginx-deployment-1564180365 2 2 0 25s +nginx-deployment-2035384211 0 0 0 36s +nginx-deployment-3066724191 2 2 2 6s +``` + +看下创建Pod,你会看到有两个新的呃Replica Set创建的Pod处于ImagePullBackOff状态,循环拉取镜像。 + +```shell +$ kubectl get pods +NAME READY STATUS RESTARTS AGE +nginx-deployment-1564180365-70iae 1/1 Running 0 25s +nginx-deployment-1564180365-jbqqo 1/1 Running 0 25s +nginx-deployment-3066724191-08mng 0/1 ImagePullBackOff 0 6s +nginx-deployment-3066724191-eocby 0/1 ImagePullBackOff 0 6s +``` + +注意,Deployment controller会自动停止坏的rollout,并停止扩容新的Replica Set。 + +```shell +$ kubectl describe deployment +Name: nginx-deployment +Namespace: default +CreationTimestamp: Tue, 15 Mar 2016 14:48:04 -0700 +Labels: app=nginx +Selector: app=nginx +Replicas: 2 updated | 3 total | 2 available | 2 unavailable +StrategyType: RollingUpdate +MinReadySeconds: 0 +RollingUpdateStrategy: 1 max unavailable, 1 max surge +OldReplicaSets: nginx-deployment-1564180365 (2/2 replicas created) +NewReplicaSet: nginx-deployment-3066724191 (2/2 replicas created) +Events: + FirstSeen LastSeen Count From SubobjectPath Type Reason Message + --------- -------- ----- ---- ------------- -------- ------ ------- + 1m 1m 1 {deployment-controller } Normal ScalingReplicaSet Scaled up replica set nginx-deployment-2035384211 to 3 + 22s 22s 1 {deployment-controller } Normal ScalingReplicaSet Scaled up replica set nginx-deployment-1564180365 to 1 + 22s 22s 1 {deployment-controller } Normal ScalingReplicaSet Scaled down replica set nginx-deployment-2035384211 to 2 + 22s 22s 1 {deployment-controller } Normal ScalingReplicaSet Scaled up replica set nginx-deployment-1564180365 to 2 + 21s 21s 1 {deployment-controller } Normal ScalingReplicaSet Scaled down replica set nginx-deployment-2035384211 to 0 + 21s 21s 1 {deployment-controller } Normal ScalingReplicaSet Scaled up replica set nginx-deployment-1564180365 to 3 + 13s 13s 1 {deployment-controller } Normal ScalingReplicaSet Scaled up replica set nginx-deployment-3066724191 to 1 + 13s 13s 1 {deployment-controller } Normal ScalingReplicaSet Scaled down replica set nginx-deployment-1564180365 to 2 + 13s 13s 1 {deployment-controller } Normal ScalingReplicaSet Scaled up replica set nginx-deployment-3066724191 to 2 +``` + +为了修复这个问题,我们需要回退到稳定的Deployment revision。 + +### 检查Deployment升级的历史记录 + +首先,检查下Deployment的revision: + +```shell +$ kubectl rollout history deployment/nginx-deployment +deployments "nginx-deployment": +REVISION CHANGE-CAUSE +1 kubectl create -f docs/user-guide/nginx-deployment.yaml --record +2 kubectl set image deployment/nginx-deployment nginx=nginx:1.9.1 +3 kubectl set image deployment/nginx-deployment nginx=nginx:1.91 +``` + +因为我们创建Deployment的时候使用了`—recored`参数可以记录命令,我们可以很方便的查看每次revison的变化。 + +查看单个revision的详细信息: + +```shell +$ kubectl rollout history deployment/nginx-deployment --revision=2 +deployments "nginx-deployment" revision 2 + Labels: app=nginx + pod-template-hash=1159050644 + Annotations: kubernetes.io/change-cause=kubectl set image deployment/nginx-deployment nginx=nginx:1.9.1 + Containers: + nginx: + Image: nginx:1.9.1 + Port: 80/TCP + QoS Tier: + cpu: BestEffort + memory: BestEffort + Environment Variables: + No volumes. +``` + +### 回退到历史版本 + +现在,我们可以决定回退当前的rollout到之前的版本: + +```shell +$ kubectl rollout undo deployment/nginx-deployment +deployment "nginx-deployment" rolled back +``` + +也可以使用 `--revision`参数指定某个历史版本: + +```shell +$ kubectl rollout undo deployment/nginx-deployment --to-revision=2 +deployment "nginx-deployment" rolled back +``` + +与rollout相关的命令详细文档见[kubectl rollout](https://github.com/kubernetes/kubernetes.github.io/blob/master/docs/user-guide/kubectl/v1.6/#rollout)。 + +该Deployment现在已经回退到了先前的稳定版本。如你所见,Deployment controller产生了一个回退到revison 2的`DeploymentRollback`的event。 + +```shell +$ kubectl get deployment +NAME DESIRED CURRENT UP-TO-DATE AVAILABLE AGE +nginx-deployment 3 3 3 3 30m + +$ kubectl describe deployment +Name: nginx-deployment +Namespace: default +CreationTimestamp: Tue, 15 Mar 2016 14:48:04 -0700 +Labels: app=nginx +Selector: app=nginx +Replicas: 3 updated | 3 total | 3 available | 0 unavailable +StrategyType: RollingUpdate +MinReadySeconds: 0 +RollingUpdateStrategy: 1 max unavailable, 1 max surge +OldReplicaSets: +NewReplicaSet: nginx-deployment-1564180365 (3/3 replicas created) +Events: + FirstSeen LastSeen Count From SubobjectPath Type Reason Message + --------- -------- ----- ---- ------------- -------- ------ ------- + 30m 30m 1 {deployment-controller } Normal ScalingReplicaSet Scaled up replica set nginx-deployment-2035384211 to 3 + 29m 29m 1 {deployment-controller } Normal ScalingReplicaSet Scaled up replica set nginx-deployment-1564180365 to 1 + 29m 29m 1 {deployment-controller } Normal ScalingReplicaSet Scaled down replica set nginx-deployment-2035384211 to 2 + 29m 29m 1 {deployment-controller } Normal ScalingReplicaSet Scaled up replica set nginx-deployment-1564180365 to 2 + 29m 29m 1 {deployment-controller } Normal ScalingReplicaSet Scaled down replica set nginx-deployment-2035384211 to 0 + 29m 29m 1 {deployment-controller } Normal ScalingReplicaSet Scaled up replica set nginx-deployment-3066724191 to 2 + 29m 29m 1 {deployment-controller } Normal ScalingReplicaSet Scaled up replica set nginx-deployment-3066724191 to 1 + 29m 29m 1 {deployment-controller } Normal ScalingReplicaSet Scaled down replica set nginx-deployment-1564180365 to 2 + 2m 2m 1 {deployment-controller } Normal ScalingReplicaSet Scaled down replica set nginx-deployment-3066724191 to 0 + 2m 2m 1 {deployment-controller } Normal DeploymentRollback Rolled back deployment "nginx-deployment" to revision 2 + 29m 2m 2 {deployment-controller } Normal ScalingReplicaSet Scaled up replica set nginx-deployment-1564180365 to 3 +``` + +### 清理Policy + +你可以通过设置`.spec.revisonHistoryLimit`项来指定deployment最多保留多少revison历史记录。默认的会保留所有的revision;如果将该项设置为0,Deployment就不允许回退了。 + +## Deployment扩容 + +你可以使用以下命令扩容Deployment: + +```shell +$ kubectl scale deployment nginx-deployment --replicas 10 +deployment "nginx-deployment" scaled +``` + +假设你的集群中启用了[horizontal pod autoscaling](https://github.com/kubernetes/kubernetes.github.io/blob/master/docs/tasks/run-application/horizontal-pod-autoscale-walkthrough),你可以给Deployment设置一个autoscaler,基于当前Pod的CPU利用率选择最少和最多的Pod数。 + +```shell +$ kubectl autoscale deployment nginx-deployment --min=10 --max=15 --cpu-percent=80 +deployment "nginx-deployment" autoscaled +``` + +## 比例扩容 + +RollingUpdate Deployment支持同时运行一个应用的多个版本。当你活着autoscaler扩容RollingUpdate Deployment的时候,正在中途的rollout(进行中或者已经暂停的),为了降低风险,Deployment controller将会平衡已存在的活动中的ReplicaSets(有Pod的ReplicaSets)和新加入的replicas。这被称为比例扩容。 + +例如,你正在运行中含有10个replica的Deployment。maxSurge=3,maxUnavailable=2。 + +```shell +$ kubectl get deploy +NAME DESIRED CURRENT UP-TO-DATE AVAILABLE AGE +nginx-deployment 10 10 10 10 50s +``` + +你更新了一个镜像,而在集群内部无法解析。 + +```shell +$ kubectl set image deploy/nginx-deployment nginx=nginx:sometag +deployment "nginx-deployment" image updated +``` + +镜像更新启动了一个包含ReplicaSet nginx-deployment-1989198191的新的rollout,但是它被阻塞了,因为我们上面提到的maxUnavailable。 + +```shell +$ kubectl get rs +NAME DESIRED CURRENT READY AGE +nginx-deployment-1989198191 5 5 0 9s +nginx-deployment-618515232 8 8 8 1m +``` + +然后发起了一个新的Deployment扩容请求。autoscaler将Deployment的repllica数目增加到了15个。Deployment controller需要判断在哪里增加这5个新的replica。如果我们没有谁用比例扩容,所有的5个replica都会加到一个新的ReplicaSet中。如果使用比例扩容,新添加的replica将传播到所有的ReplicaSet中。大的部分加入replica数最多的ReplicaSet中,小的部分加入到replica数少的ReplciaSet中。0个replica的ReplicaSet不会被扩容。 + +在我们上面的例子中,3个replica将添加到旧的ReplicaSet中,2个replica将添加到新的ReplicaSet中。rollout进程最终会将所有的replica移动到新的ReplicaSet中,假设新的replica成为健康状态。 + +```shell +$ kubectl get deploy +NAME DESIRED CURRENT UP-TO-DATE AVAILABLE AGE +nginx-deployment 15 18 7 8 7m +$ kubectl get rs +NAME DESIRED CURRENT READY AGE +nginx-deployment-1989198191 7 7 0 7m +nginx-deployment-618515232 11 11 11 7m +``` + +## 暂停和恢复Deployment + +你可以在出发一次或多次更新前暂停一个Deployment,然后再恢复它。这样你就能多次暂停和恢复Deployment,在此期间进行一些修复工作,而不会出发不必要的rollout。 + +例如使用刚刚创建Deployment: + +```shell +$ kubectl get deploy +NAME DESIRED CURRENT UP-TO-DATE AVAILABLE AGE +nginx 3 3 3 3 1m +[mkargaki@dhcp129-211 kubernetes]$ kubectl get rs +NAME DESIRED CURRENT READY AGE +nginx-2142116321 3 3 3 1m +``` + +使用以下命令暂停Deployment: + +```shell +$ kubectl rollout pause deployment/nginx-deployment +deployment "nginx-deployment" paused +``` + +然后更新Deplyment中的镜像: + +```shell +$ kubectl set image deploy/nginx nginx=nginx:1.9.1 +deployment "nginx-deployment" image updated +``` + +注意新的rollout启动了: + +```shell +$ kubectl rollout history deploy/nginx +deployments "nginx" +REVISION CHANGE-CAUSE +1 + +$ kubectl get rs +NAME DESIRED CURRENT READY AGE +nginx-2142116321 3 3 3 2m +``` + +你可以进行任意多次更新,例如更新使用的资源: + +```shell +$ kubectl set resources deployment nginx -c=nginx --limits=cpu=200m,memory=512Mi +deployment "nginx" resource requirements updated +``` + +Deployment暂停前的初始状态将继续它的功能,而不会对Deployment的更新产生任何影响,只要Deployment是暂停的。 + +最后,恢复这个Deployment,观察完成更新的ReplicaSet已经创建出来了: + +```shell +$ kubectl rollout resume deploy nginx +deployment "nginx" resumed +$ KUBECTL get rs -w +NAME DESIRED CURRENT READY AGE +nginx-2142116321 2 2 2 2m +nginx-3926361531 2 2 0 6s +nginx-3926361531 2 2 1 18s +nginx-2142116321 1 2 2 2m +nginx-2142116321 1 2 2 2m +nginx-3926361531 3 2 1 18s +nginx-3926361531 3 2 1 18s +nginx-2142116321 1 1 1 2m +nginx-3926361531 3 3 1 18s +nginx-3926361531 3 3 2 19s +nginx-2142116321 0 1 1 2m +nginx-2142116321 0 1 1 2m +nginx-2142116321 0 0 0 2m +nginx-3926361531 3 3 3 20s +^C +$ KUBECTL get rs +NAME DESIRED CURRENT READY AGE +nginx-2142116321 0 0 0 2m +nginx-3926361531 3 3 3 28s +``` + +**注意:** 在恢复Deployment之前你无法回退一个暂停了个Deployment。 + +## Deployment状态 + +Deployment在生命周期中有多种状态。在创建一个新的ReplicaSet的时候它可以是 [progressing](https://github.com/kubernetes/kubernetes.github.io/blob/master/docs/concepts/workloads/controllers/deployment.md#progressing-deployment) 状态, [complete](https://github.com/kubernetes/kubernetes.github.io/blob/master/docs/concepts/workloads/controllers/deployment.md#complete-deployment) 状态,或者[fail to progress](https://github.com/kubernetes/kubernetes.github.io/blob/master/docs/concepts/workloads/controllers/deployment.md#failed-deployment)状态。 + +### Progressing Deployment + +Kubernetes将执行过下列任务之一的Deployment标记为*progressing*状态: + +- Deployment正在创建新的ReplicaSet过程中。 +- Deployment正在扩容一个已有的ReplicaSet。 +- Deployment正在缩容一个已有的ReplicaSet。 +- 有新的可用的pod出现。 + +你可以使用`kubectl roullout status`命令监控Deployment的进度。 + +### Complete Deployment + +Kubernetes将包括以下特性的Deployment标记为*complete*状态: + +- Deployment最小可用。最小可用意味着Deployment的可用replica个数等于或者超过Deployment策略中的期望个数。 +- 所有与该Deployment相关的replica都被更新到了你指定版本,也就说更新完成。 +- 该Deployment中没有旧的Pod存在。 + +你可以用`kubectl rollout status`命令查看Deployment是否完成。如果rollout成功完成,`kubectl rollout status`将返回一个0值的Exit Code。 + +``` +$ kubectl rollout status deploy/nginx +Waiting for rollout to finish: 2 of 3 updated replicas are available... +deployment "nginx" successfully rolled out +$ echo $? +0 +``` + +### Failed Deployment + +你的Deployment在尝试部署新的ReplicaSet的时候可能卡住,用于也不会完成。这可能是因为以下几个因素引起的: + +- 无效的引用 +- 不可读的probe failure +- 镜像拉取错误 +- 权限不够 +- 范围限制 +- 程序运行时配置错误 + +探测这种情况的一种方式是,在你的Deployment spec中指定[`spec.progressDeadlineSeconds`](https://github.com/kubernetes/kubernetes.github.io/blob/master/docs/concepts/workloads/controllers/deployment.md#progress-deadline-seconds)。`spec.progressDeadlineSeconds` 表示Deployment controller等待多少秒才能确定(通过Deployment status)Deployment进程是卡住的。 + +下面的`kubectl`命令设置`progressDeadlineSeconds` 使controller在Deployment在进度卡住10分钟后报告: + +``` +$ kubectl patch deployment/nginx-deployment -p '{"spec":{"progressDeadlineSeconds":600}}' +"nginx-deployment" patched +``` + +Once the deadline has been exceeded, the Deployment controller adds a with the following attributes to the Deployment's + +当超过截止时间后,Deployment controller会在Deployment的 `status.conditions`中增加一条DeploymentCondition,它包括如下属性: + +- Type=Progressing +- Status=False +- Reason=ProgressDeadlineExceeded + +浏览 [Kubernetes API conventions](https://github.com/kubernetes/community/blob/master/contributors/devel/api-conventions.md#typical-status-properties) 查看关于status conditions的更多信息。 + +**注意:** kubernetes除了报告`Reason=ProgressDeadlineExceeded`状态信息外不会对卡住的Deployment做任何操作。更高层次的协调器可以利用它并采取相应行动,例如,回滚Deployment到之前的版本。 + +**注意:** 如果你暂停了一个Deployment,在暂停的这段时间内kubernetnes不会检查你指定的deadline。你可以在Deployment的rollout途中安全的暂停它,然后再恢复它,这不会触发超过deadline的状态。 + +你可能在使用Deployment的时候遇到一些短暂的错误,这些可能是由于你设置了太短的timeout,也有可能是因为各种其他错误导致的短暂错误。例如,假设你使用了无效的引用。当你Describe Deployment的时候可能会注意到如下信息: + +``` +$ kubectl describe deployment nginx-deployment +<...> +Conditions: + Type Status Reason + ---- ------ ------ + Available True MinimumReplicasAvailable + Progressing True ReplicaSetUpdated + ReplicaFailure True FailedCreate +<...> +``` + +执行 `kubectl get deployment nginx-deployment -o yaml`,Deployement 的状态可能看起来像这个样子: + +```yaml +status: + availableReplicas: 2 + conditions: + - lastTransitionTime: 2016-10-04T12:25:39Z + lastUpdateTime: 2016-10-04T12:25:39Z + message: Replica set "nginx-deployment-4262182780" is progressing. + reason: ReplicaSetUpdated + status: "True" + type: Progressing + - lastTransitionTime: 2016-10-04T12:25:42Z + lastUpdateTime: 2016-10-04T12:25:42Z + message: Deployment has minimum availability. + reason: MinimumReplicasAvailable + status: "True" + type: Available + - lastTransitionTime: 2016-10-04T12:25:39Z + lastUpdateTime: 2016-10-04T12:25:39Z + message: 'Error creating: pods "nginx-deployment-4262182780-" is forbidden: exceeded quota: + object-counts, requested: pods=1, used: pods=3, limited: pods=2' + reason: FailedCreate + status: "True" + type: ReplicaFailure + observedGeneration: 3 + replicas: 2 + unavailableReplicas: 2 +``` + +最终,一旦超过Deployment进程的deadline,kuberentes会更新状态和导致Progressing状态的原因: + +``` +Conditions: + Type Status Reason + ---- ------ ------ + Available True MinimumReplicasAvailable + Progressing False ProgressDeadlineExceeded + ReplicaFailure True FailedCreate + +``` + +你可以通过缩容Deployment的方式解决配额不足的问题,或者增加你的namespace的配额。如果你满足了配额条件后,Deployment controller就会完成你的Deployment rollout,你将看到Deployment的状态更新为成功状态(`Status=True`并且`Reason=NewReplicaSetAvailable`)。 + +``` +Conditions: + Type Status Reason + ---- ------ ------ + Available True MinimumReplicasAvailable + Progressing True NewReplicaSetAvailable + +``` + +`Type=Available`、 `Status=True` 以为这你的Deployment有最小可用性。 最小可用性是在Deployment策略中指定的参数。`Type=Progressing` 、 `Status=True`意味着你的Deployment 或者在部署过程中,或者已经成功部署,达到了期望的最少的可用replica数量(查看特定状态的Reason——在我们的例子中`Reason=NewReplicaSetAvailable` 意味着Deployment已经完成)。 + +你可以使用`kubectl rollout status`命令查看Deployment进程是否失败。当Deployment过程超过了deadline,`kubectl rollout status`将返回非0的exit code。 + +``` +$ kubectl rollout status deploy/nginx +Waiting for rollout to finish: 2 out of 3 new replicas have been updated... +error: deployment "nginx" exceeded its progress deadline +$ echo $? +1 +``` + +### 操作失败的Deployment + +所有对完成的Deployment的操作都适用于失败的Deployment。你可以对它阔/缩容,回退到历史版本,你甚至可以多次暂停它来应用Deployment pod template。 + +## 清理Policy + +你可以设置Deployment中的 `.spec.revisionHistoryLimit` 项来指定保留多少旧的ReplicaSet。 余下的将在后台被当作垃圾收集。默认的,所有的revision历史就都会被保留。在未来的版本中,将会更改为2。 + +**注意:** 将该值设置为0,将导致所有的Deployment历史记录都会被清除,该Deploynent就无法再回退了。 + +## 用例 + +### 金丝雀Deployment + +如果你想要使用Deployment对部分用户或服务器发布relaese,你可以创建多个Deployment,每个对一个release,参照[managing resources](https://github.com/kubernetes/kubernetes.github.io/blob/master/docs/concepts/cluster-administration/manage-deployment/#canary-deployments) 中对金丝雀模式的描述。 + +## 编写Deployment Spec + +在所有的Kubernetes配置中,Deployment也需要`apiVersion`,`kind`和`metadata`这些配置项。配置文件的通用使用说明查看[部署应用](https://github.com/kubernetes/kubernetes.github.io/blob/master/docs/tutorials/stateless-application/run-stateless-application-deployment),配置容器,和[使用kubeclt管理资源](https://github.com/kubernetes/kubernetes.github.io/blob/master/docs/tutorials/object-management-kubectl/object-management)文档。 + +Deployment也需要 [`.spec` section](https://github.com/kubernetes/community/blob/master/contributors/devel/api-conventions.md#spec-and-status). + +### Pod Template + + `.spec.template` 是 `.spec`中唯一要求的字段。 + +`.spec.template` 是 [pod template](https://github.com/kubernetes/kubernetes.github.io/blob/master/docs/user-guide/replication-controller/#pod-template). 它跟 [Pod](https://github.com/kubernetes/kubernetes.github.io/blob/master/docs/user-guide/pods)有一模一样的schema,除了它是嵌套的并且不需要`apiVersion` 和 `kind`字段。 + +另外为了划分Pod的范围,Deployment中的pod template必须指定适当的label(不要跟其他controller重复了,参考[selector](https://github.com/kubernetes/kubernetes.github.io/blob/master/docs/concepts/workloads/controllers/deployment.md#selector))和适当的重启策略。 + +[`.spec.template.spec.restartPolicy`](https://github.com/kubernetes/kubernetes.github.io/blob/master/docs/concepts/workloads/pods/pod-lifecycle) 可以设置为 `Always` , 如果不指定的话这就是默认配置。 + +### Replicas + +`.spec.replicas` 是可以选字段,指定期望的pod数量,默认是1。 + +### Selector + +`.spec.selector`是可选字段,用来指定 [label selector](https://github.com/kubernetes/kubernetes.github.io/blob/master/docs/concepts/overview/working-with-objects/labels) ,圈定Deployment管理的pod范围。 + +如果被指定, `.spec.selector` 必须匹配 `.spec.template.metadata.labels`,否则它将被API拒绝。如果 `.spec.selector` 没有被指定, `.spec.selector.matchLabels` 默认是 `.spec.template.metadata.labels`。 + +在Pod的template跟`.spec.template`不同或者数量超过了`.spec.replicas`规定的数量的情况下,Deployment会杀掉label跟selector不同的Pod。 + +**注意:** 你不应该再创建其他label跟这个selector匹配的pod,或者通过其他Deployment,或者通过其他Controller,例如ReplicaSet和ReplicationController。否则该Deployment会被把它们当成都是自己创建的。Kubernetes不会阻止你这么做。 + +如果你有多个controller使用了重复的selector,controller们就会互相打架并导致不正确的行为。 + +### 策略 + +`.spec.strategy` 指定新的Pod替换旧的Pod的策略。 `.spec.strategy.type` 可以是"Recreate"或者是 "RollingUpdate"。"RollingUpdate"是默认值。 + +#### Recreate Deployment + +`.spec.strategy.type==Recreate`时,在创建出新的Pod之前会先杀掉所有已存在的Pod。 + +#### Rolling Update Deployment + +`.spec.strategy.type==RollingUpdate`时,Deployment使用[rolling update](https://github.com/kubernetes/kubernetes.github.io/blob/master/docs/tasks/run-application/rolling-update-replication-controller) 的方式更新Pod 。你可以指定`maxUnavailable` 和 `maxSurge` 来控制 rolling update 进程。 + +##### Max Unavailable + +`.spec.strategy.rollingUpdate.maxUnavailable` 是可选配置项,用来指定在升级过程中不可用Pod的最大数量。该值可以是一个绝对值(例如5),也可以是期望Pod数量的百分比(例如10%)。通过计算百分比的绝对值向下取整。如果`.spec.strategy.rollingUpdate.maxSurge` 为0时,这个值不可以为0。默认值是1。 + +例如,该值设置成30%,启动rolling update后旧的ReplicatSet将会立即缩容到期望的Pod数量的70%。新的Pod ready后,随着新的ReplicaSet的扩容,旧的ReplicaSet会进一步缩容,确保在升级的所有时刻可以用的Pod数量至少是期望Pod数量的70%。 + +##### Max Surge + +`.spec.strategy.rollingUpdate.maxSurge` 是可选配置项,用来指定可以超过期望的Pod数量的最大个数。该值可以是一个绝对值(例如5)或者是期望的Pod数量的百分比(例如10%)。当`MaxUnavailable`为0时该值不可以为0。通过百分比计算的绝对值向上取整。默认值是1。 + +例如,该值设置成30%,启动rolling update后新的ReplicatSet将会立即扩容,新老Pod的总数不能超过期望的Pod数量的130%。旧的Pod被杀掉后,新的ReplicaSet将继续扩容,旧的ReplicaSet会进一步缩容,确保在升级的所有时刻所有的Pod数量和不会超过期望Pod数量的130%。 + +### Progress Deadline Seconds + +`.spec.progressDeadlineSeconds` 是可选配置项,用来指定在系统报告Deployment的[failed progressing](https://github.com/kubernetes/kubernetes.github.io/blob/master/docs/concepts/workloads/controllers/deployment.md#failed-deployment) ——表现为resource的状态中`type=Progressing`、`Status=False`、 `Reason=ProgressDeadlineExceeded`前可以等待的Deployment进行的秒数。Deployment controller会继续重试该Deployment。未来,在实现了自动回滚后, deployment controller在观察到这种状态时就会自动回滚。 + +如果设置该参数,该值必须大于 `.spec.minReadySeconds`。 + +### Min Ready Seconds + +`.spec.minReadySeconds`是一个可选配置项,用来指定没有任何容器crash的Pod并被认为是可用状态的最小秒数。默认是0(Pod在ready后就会被认为是可用状态)。进一步了解什么什么后Pod会被认为是ready状态,参阅 [Container Probes](https://github.com/kubernetes/kubernetes.github.io/blob/master/docs/concepts/workloads/pods/pod-lifecycle/#container-probes)。 + +### Rollback To + +`.spec.rollbackTo` 是一个可以选配置项,用来配置Deployment回退的配置。设置该参数将触发回退操作,每次回退完成后,该值就会被清除。 + +#### Revision + +`.spec.rollbackTo.revision`是一个可选配置项,用来指定回退到的revision。默认是0,意味着回退到历史中最老的revision。 + +### Revision History Limit + +Deployment revision history存储在它控制的ReplicaSets中。 + +`.spec.revisionHistoryLimit` 是一个可选配置项,用来指定可以保留的旧的ReplicaSet数量。该理想值取决于心Deployment的频率和稳定性。如果该值没有设置的话,默认所有旧的Replicaset或会被保留,将资源存储在etcd中,是用`kubectl get rs`查看输出。每个Deployment的该配置都保存在ReplicaSet中,然而,一旦你删除的旧的RepelicaSet,你的Deployment就无法再回退到那个revison了。 + +如果你将该值设置为0,所有具有0个replica的ReplicaSet都会被删除。在这种情况下,新的Deployment rollout无法撤销,因为revision history都被清理掉了。 + +### Paused + +`.spec.paused`是可以可选配置项,boolean值。用来指定暂停和恢复Deployment。Paused和没有paused的Deployment之间的唯一区别就是,所有对paused deployment中的PodTemplateSpec的修改都不会触发新的rollout。Deployment被创建之后默认是非paused。 + +## Alternative to Deployments + +### kubectl rolling update + +[Kubectl rolling update](https://github.com/kubernetes/kubernetes.github.io/blob/master/docs/user-guide/kubectl/v1.6/#rolling-update) 虽然使用类似的方式更新Pod和ReplicationController。但是我们推荐使用Deployment,因为它是声明式的,客户端侧,具有附加特性,例如即使滚动升级结束后也可以回滚到任何历史版本。 \ No newline at end of file diff --git a/architecture/objects.md b/architecture/objects.md new file mode 100644 index 000000000..b1f57488d --- /dev/null +++ b/architecture/objects.md @@ -0,0 +1,20 @@ +# 主要概念 + +- Pod, Service, Namespace和Node +- Service +- Volume和Persistent Volume +- Deployment +- Secret +- StatefulSet +- DaemonSet +- ServiceAccount +- ReplicationController和ReplicaSet +- Job +- CronJob +- SecurityContext +- Resource Quota +- Pod Security Policy +- Horizontal Pod Autoscaling +- Network Policy +- Ingress +- ThirdPartyResources \ No newline at end of file diff --git a/02-创建kubeconfig文件.md b/deploy/centos/create-kubeconfig.md similarity index 100% rename from 02-创建kubeconfig文件.md rename to deploy/centos/create-kubeconfig.md diff --git a/01-创建TLS证书和密钥.md b/deploy/centos/create-tls-and-secret-key.md similarity index 100% rename from 01-创建TLS证书和密钥.md rename to deploy/centos/create-tls-and-secret-key.md diff --git a/08-安装dashboard插件.md b/deploy/centos/dashboard-addon-installation.md similarity index 100% rename from 08-安装dashboard插件.md rename to deploy/centos/dashboard-addon-installation.md diff --git a/10-安装EFK插件.md b/deploy/centos/efk-addon-installation.md similarity index 98% rename from 10-安装EFK插件.md rename to deploy/centos/efk-addon-installation.md index cb073703b..6fbb8b594 100644 --- a/10-安装EFK插件.md +++ b/deploy/centos/efk-addon-installation.md @@ -152,8 +152,8 @@ server.basePath: /api/v1/proxy/namespaces/kube-system/services/kibana-logging 如果你在这里发现Create按钮是灰色的无法点击,且Time-filed name中没有选项,fluentd要读取`/var/log/containers/`目录下的log日志,这些日志是从`/var/lib/docker/containers/${CONTAINER_ID}/${CONTAINER_ID}-json.log`链接过来的,查看你的docker配置,`—log-dirver`需要设置为**json-file**格式,默认的可能是**journald**,参考[docker logging]([https://docs.docker.com/engine/admin/logging/overview/#examples](https://docs.docker.com/engine/admin/logging/overview/#examples))。 -![es-setting](./images/es-setting.png) +![es-setting](../../images/es-setting.png) 创建Index后,可以在 `Discover` 下看到 ElasticSearch logging 中汇聚的日志; -![es-home](./images/kubernetes-efk-kibana.jpg) \ No newline at end of file +![es-home](../../images/kubernetes-efk-kibana.jpg) \ No newline at end of file diff --git a/03-创建高可用etcd集群.md b/deploy/centos/etcd-cluster-installation.md similarity index 100% rename from 03-创建高可用etcd集群.md rename to deploy/centos/etcd-cluster-installation.md diff --git a/09-安装heapster插件.md b/deploy/centos/heapster-addon-installation.md similarity index 98% rename from 09-安装heapster插件.md rename to deploy/centos/heapster-addon-installation.md index 0d7940aec..b99836ab8 100644 --- a/09-安装heapster插件.md +++ b/deploy/centos/heapster-addon-installation.md @@ -178,7 +178,7 @@ monitoring-influxdb-1411048194-lzrpc 1/1 Running 0 2m 浏览器访问 URL:`http://172.20.0.113:8086/api/v1/proxy/namespaces/kube-system/services/monitoring-grafana` -![grafana](./images/kubernetes-heapster-grafana.jpg) +![grafana](../../images/kubernetes-heapster-grafana.jpg) ## 访问 influxdb admin UI @@ -193,5 +193,5 @@ monitoring-influxdb 10.254.22.46 8086:32299/TCP,8083:30269/T 在页面的 “Connection Settings” 的 Host 中输入 node IP, Port 中输入 8086 映射的 nodePort 如上面的 32299,点击 “Save” 即可(我的集群中的地址是172.20.0.113:32299): -![kubernetes-influxdb-heapster](./images/kubernetes-influxdb-heapster.jpg) +![kubernetes-influxdb-heapster](../../images/kubernetes-influxdb-heapster.jpg) diff --git a/00-kubernetes安装前言.md b/deploy/centos/install-kbernetes1.6-on-centos.md similarity index 71% rename from 00-kubernetes安装前言.md rename to deploy/centos/install-kbernetes1.6-on-centos.md index fc2e29c03..0ea52bd7b 100644 --- a/00-kubernetes安装前言.md +++ b/deploy/centos/install-kbernetes1.6-on-centos.md @@ -1,4 +1,4 @@ -# 部署kubernetes集群 +# 在CentOS上部署kubernetes1.6集群 本系列文档介绍使用二进制部署 `kubernetes` 集群的所有步骤,而不是使用 `kubeadm` 等自动化方式来部署集群,同时开启了集群的TLS安全认证; @@ -32,16 +32,16 @@ ## 步骤介绍 -- [1 创建 TLS 证书和秘钥](01-创建TLS证书和密钥.md) -- [2 创建kubeconfig 文件](02-创建kubeconfig文件.md) -- [3 创建高可用etcd集群](03-创建高可用etcd集群.md) -- [4 安装kubectl命令行工具](04-安装kubectl命令行工具.md) -- [5 部署高可用master集群](05-部署高可用master集群.md) -- [6 部署node节点](06-部署node节点.md) -- [7 安装kubedns插件](07-安装kubedns插件.md) -- [8 安装dashboard插件](08-安装dashboard插件.md) -- [9 安装heapster插件](09-安装heapster插件.md) -- [10 安装EFK插件](10-安装EFK插件.md) +- [1 创建 TLS 证书和秘钥](create-tls-and-secret-key.md) +- [2 创建kubeconfig 文件](create-kubeconfig.md) +- [3 创建高可用etcd集群](etcd-cluster-installation.md) +- [4 安装kubectl命令行工具](kubectl-installation.md) +- [5 部署高可用master集群](master-installation.md) +- [6 部署node节点](node-installation.md) +- [7 安装kubedns插件](kubedns-addon-installation.md) +- [8 安装dashboard插件](dashboard-addon-installation.md.md) +- [9 安装heapster插件](heapster-addon-installation.md) +- [10 安装EFK插件](efk-addon-installation.md) ## 提醒 @@ -52,5 +52,3 @@ [Jimmy Song](http://rootsongjc.github.io/about) -[我的Kubernetes相关文章](http://rootsongjc.github.io/tags/kubernetes) - diff --git a/04-安装kubectl命令行工具.md b/deploy/centos/kubectl-installation.md similarity index 100% rename from 04-安装kubectl命令行工具.md rename to deploy/centos/kubectl-installation.md diff --git a/07-安装kubedns插件.md b/deploy/centos/kubedns-addon-installation.md similarity index 100% rename from 07-安装kubedns插件.md rename to deploy/centos/kubedns-addon-installation.md diff --git a/05-部署高可用master集群.md b/deploy/centos/master-installation.md similarity index 100% rename from 05-部署高可用master集群.md rename to deploy/centos/master-installation.md diff --git a/06-部署node节点.md b/deploy/centos/node-installation.md similarity index 100% rename from 06-部署node节点.md rename to deploy/centos/node-installation.md diff --git a/deploy/index.md b/deploy/index.md index 9ad83ba69..f823fd66e 100644 --- a/deploy/index.md +++ b/deploy/index.md @@ -5,5 +5,16 @@ - [kubeadm](kubeadm.md) - [frakti](frakti/index.md) - [证书生成示例](certificate.md) +- [在CentOS上部署kubernetes1.6集群](在CentOS上部署kubernetes1.6集群.md) + - [创建TLS证书和秘钥](deploy/centos/01-创建TLS证书和密钥.md) + - [创建kubeconfig 文件](创建kubeconfig文件.md) + - [创建高可用etcd集群](03-创建高可用etcd集群.md) + - [安装kubectl命令行工具](04-安装kubectl命令行工具.md) + - [部署高可用master集群](05-部署高可用master集群.md) + - [部署node节点](06-部署node节点.md) + - [安装kubedns插件](07-安装kubedns插件.md) + - [安装dashboard插件](08-安装dashboard插件.md) + - [安装heapster插件](09-安装heapster插件.md) + - [安装EFK插件](10-安装EFK插件.md) diff --git a/22-kubernetes配置最佳实践.md b/deploy/kubernetes-configuration-best-practice.md similarity index 100% rename from 22-kubernetes配置最佳实践.md rename to deploy/kubernetes-configuration-best-practice.md diff --git a/network/network-configuration.md b/network/network-configuration.md new file mode 100644 index 000000000..9f4c32b3b --- /dev/null +++ b/network/network-configuration.md @@ -0,0 +1,5 @@ +# 网络配置 + +- [Kubernetes中的网络模式解析](network-modes-in-kubernetes.md) + + diff --git a/16-kubernetes中的网络模式解析.md b/network/network-modes-in-kubernetes.md similarity index 98% rename from 16-kubernetes中的网络模式解析.md rename to network/network-modes-in-kubernetes.md index 7d7736c4f..9cc7b14cd 100644 --- a/16-kubernetes中的网络模式解析.md +++ b/network/network-modes-in-kubernetes.md @@ -22,7 +22,7 @@ Flannel的host-gw模式映射容器到容器的路由信息,kubernetes的每 下图是flannel host-gw模式的架构图 -![arch](images/flannel-host-gw-arch.png) +![arch](../images/flannel-host-gw-arch.png) 图片来源:[OpenShift Doc](https://docs.openshift.com/container-platform/3.4/architecture/additional_concepts/flannel.html) diff --git a/21-应用日志收集.md b/ops/app-log-collection.md similarity index 98% rename from 21-应用日志收集.md rename to ops/app-log-collection.md index d6d030684..2b75f67ef 100644 --- a/21-应用日志收集.md +++ b/ops/app-log-collection.md @@ -159,7 +159,7 @@ green open filebeat-2017.05.17 1qatsSajSYqAV42_XYwLsQ 5 1 1189 访问Kibana的web页面,查看`filebeat-2017.05.17`的索引,可以看到logstash收集到了app日志。 -![Kibana页面](images/filebeat-test-kibana.jpg) +![Kibana页面](../images/filebeat-test-kibana.jpg) **问题记录** diff --git a/ops/opration-administration.md b/ops/opration-administration.md new file mode 100644 index 000000000..984ba706b --- /dev/null +++ b/ops/opration-administration.md @@ -0,0 +1,6 @@ +# 运维管理 + +- [服务滚动升级](service-rolling-update.md) +- [应用日志收集](app-log-collection.md) + + diff --git a/19-服务滚动升级.md b/ops/service-rolling-update.md similarity index 100% rename from 19-服务滚动升级.md rename to ops/service-rolling-update.md diff --git a/13-kubernetes中的RBAC支持.md b/security/rbac-support-in-kubernetes.md similarity index 98% rename from 13-kubernetes中的RBAC支持.md rename to security/rbac-support-in-kubernetes.md index 6930f4752..7700edf97 100644 --- a/13-kubernetes中的RBAC支持.md +++ b/security/rbac-support-in-kubernetes.md @@ -24,7 +24,7 @@ ABAC(Attribute Based Access Control)本来是不错的概念,但是在 Kub 需要理解 RBAC 一些基础的概念和思路,RBAC 是让用户能够访问 [Kubernetes API 资源](https://kubernetes.io/docs/api-reference/v1.6/)的授权方式。 -![RBAC架构图1](images/rbac1.png) +![RBAC架构图1](../images/rbac1.png) 在 RBAC 中定义了两个对象,用于描述在用户和资源之间的连接权限。 @@ -36,7 +36,7 @@ ABAC(Attribute Based Access Control)本来是不错的概念,但是在 Kub RoleBinding 把角色映射到用户,从而让这些用户继承角色在 namespace 中的权限。ClusterRoleBinding 让用户继承 ClusterRole 在整个集群中的权限。 -![RBAC架构图2](images/rbac2.png) +![RBAC架构图2](../images/rbac2.png) diff --git a/security/security-configuration.md b/security/security-configuration.md new file mode 100644 index 000000000..53c159f05 --- /dev/null +++ b/security/security-configuration.md @@ -0,0 +1,3 @@ +# 安全设置 + +- [Kubernetes中的RBAC支持](rbac-support-in-kubernetes.md) \ No newline at end of file diff --git a/14-分布式负载测试.md b/service-discovery-lb/distributed-load-test.md similarity index 87% rename from 14-分布式负载测试.md rename to service-discovery-lb/distributed-load-test.md index 9e5bbe92d..5bc6f343b 100644 --- a/14-分布式负载测试.md +++ b/service-discovery-lb/distributed-load-test.md @@ -60,7 +60,7 @@ $ kubectl scale --replicas=20 replicationcontrollers locust-worker ``` 当然你也可以通过WebUI:Dashboard - Workloads - Replication Controllers - **ServiceName** - Scale来扩容。 -![dashboard-scale](images/dashbaord-scale.jpg) +![dashboard-scale](../images/dashbaord-scale.jpg) ### 配置Traefik @@ -80,24 +80,21 @@ $ kubectl scale --replicas=20 replicationcontrollers locust-worker 通过Traefik的dashboard就可以看到刚增加的`traefik.locust.io`节点。 -![traefik-dashboard-locust](images/traefik-dashboard-locust.jpg) +![traefik-dashboard-locust](../images/traefik-dashboard-locust.jpg) ## 执行测试 打开`http://traefik.locust.io`页面,点击`Edit`输入伪造的用户数和用户每秒发送的请求个数,点击`Start Swarming`就可以开始测试了。 -![locust-start-swarming](images/locust-start-swarming.jpg) +![locust-start-swarming](../images/locust-start-swarming.jpg) 在测试过程中调整`sample-webapp`的pod个数(默认设置了1个pod),观察pod的负载变化情况。 -![sample-webapp-rc](images/sample-webapp-rc.jpg) +![sample-webapp-rc](../images/sample-webapp-rc.jpg) 从一段时间的观察中可以看到负载被平均分配给了3个pod。 在locust的页面中可以实时观察也可以下载测试结果。 -![locust-dashboard](images/locust-dashboard.jpg) +![locust-dashboard](../images/locust-dashboard.jpg) -## License - -This code is Apache 2.0 licensed and more information can be found in `LICENSE`. For information on licenses for third party software and libraries, refer to the `docker-image/licenses` directory. \ No newline at end of file diff --git a/18-边缘节点配置.md b/service-discovery-lb/edge-node-configuration.md similarity index 98% rename from 18-边缘节点配置.md rename to service-discovery-lb/edge-node-configuration.md index 1502bbf3b..29076c188 100644 --- a/18-边缘节点配置.md +++ b/service-discovery-lb/edge-node-configuration.md @@ -21,7 +21,7 @@ 选择Kubernetes的三个node作为边缘节点,并安装keepalived。 -![边缘节点架构](images/node-edge-arch.jpg) +![边缘节点架构](../images/node-edge-arch.jpg) ## 准备 diff --git a/11-ingress解析.md b/service-discovery-lb/ingress-concept.md similarity index 100% rename from 11-ingress解析.md rename to service-discovery-lb/ingress-concept.md diff --git a/15-kubernetes网络和集群性能测试.md b/service-discovery-lb/network-and-cluster-perfermance-test.md similarity index 100% rename from 15-kubernetes网络和集群性能测试.md rename to service-discovery-lb/network-and-cluster-perfermance-test.md diff --git a/service-discovery-lb/service-discovery-and-load-balancing.md b/service-discovery-lb/service-discovery-and-load-balancing.md new file mode 100644 index 000000000..888209a56 --- /dev/null +++ b/service-discovery-lb/service-discovery-and-load-balancing.md @@ -0,0 +1,7 @@ +# 服务发现与负载均衡 + +- [Ingress解析](ingress-concept.md) +- [安装Traefik ingress](traefik-ingress-installation.md) +- [分布式负载测试](distributed-load-test.md) +- [网络和集群性能测试](network-and-cluster-perfermance-test.md) +- [边缘节点配置](edge-node-configuration.md) \ No newline at end of file diff --git a/12-安装traefik-ingress.md b/service-discovery-lb/traefik-ingress-installation.md similarity index 96% rename from 12-安装traefik-ingress.md rename to service-discovery-lb/traefik-ingress-installation.md index 43c9e2dc4..ab42dbc78 100644 --- a/12-安装traefik-ingress.md +++ b/service-discovery-lb/traefik-ingress-installation.md @@ -18,7 +18,7 @@ Ingress Controller 实质上可以理解为是个监视器,Ingress Controller [Traefik](https://traefik.io/)是一款开源的反向代理与负载均衡工具。它最大的优点是能够与常见的微服务系统直接整合,可以实现自动化动态配置。目前支持Docker, Swarm, Mesos/Marathon, Mesos, Kubernetes, Consul, Etcd, Zookeeper, BoltDB, Rest API等等后端模型。 -以下配置文件可以在[kubernetes-handbook](https://github.com/rootsongjc/kubernetes-handbook)GitHub仓库中的[manifests/traefik-ingress/](manifests/traefik-ingress/)目录下找到。 +以下配置文件可以在[kubernetes-handbook](https://github.com/rootsongjc/kubernetes-handbook)GitHub仓库中的[manifests/traefik-ingress/](../manifests/traefik-ingress/)目录下找到。 **创建ingress-rbac.yaml** @@ -166,7 +166,7 @@ kubectl create -f . 访问该地址`http://172.20.0.115:8580/`将可以看到dashboard。 -![kubernetes-dashboard](images/traefik-dashboard.jpg) +![kubernetes-dashboard](../images/traefik-dashboard.jpg) 左侧黄色部分部分列出的是所有的rule,右侧绿色部分是所有的backend。 @@ -218,11 +218,11 @@ Traefik会解析http请求header里的Host参数将流量转发给Ingress配置 修改hosts后就就可以在kubernetes集群外访问以上两个service,如下图: -![traefik-nginx](images/traefik-nginx.jpg) +![traefik-nginx](../images/traefik-nginx.jpg) -![traefik-guestbook](images/traefik-guestbook.jpg) +![traefik-guestbook](../images/traefik-guestbook.jpg) ## 参考 diff --git a/storage/storage-configuration.md b/storage/storage-configuration.md new file mode 100644 index 000000000..7f0cd0de4 --- /dev/null +++ b/storage/storage-configuration.md @@ -0,0 +1,4 @@ +# 存储配置 + +- [使用glusterfs做持久化存储](using-glusterfs-for-persistent-storage.md) + diff --git a/17-使用glusterfs做持久化存储.md b/storage/using-glusterfs-for-persistent-storage.md similarity index 100% rename from 17-使用glusterfs做持久化存储.md rename to storage/using-glusterfs-for-persistent-storage.md