diff --git a/ezctl b/ezctl index fd423a8..fc6da56 100755 --- a/ezctl +++ b/ezctl @@ -20,7 +20,7 @@ Cluster setups: destroy to destroy the k8s cluster backup to backup the cluster state (etcd snapshot) restore to restore the cluster state from backups - start-aio to quickly setup an all-in-one cluster with 'default' settings + start-aio to quickly setup an all-in-one cluster with default settings Cluster ops: add-etcd to add a etcd-node to the etcd cluster @@ -31,6 +31,7 @@ Cluster ops: del-node to delete a work node from the k8s cluster Extra operation: + kca-renew to force renew CA certs and all the other certs (with caution) kcfg-adm to manage client kubeconfig of the k8s cluster Use "ezctl help " for more information about a given command. @@ -80,6 +81,11 @@ function help-info() { (del-node) echo -e "read more > 'https://github.com/easzlab/kubeasz/blob/master/docs/op/op-node.md'" ;; + (kca-renew) + echo -e "WARNNING: this command should be used with caution" + echo -e "force to recreate CA certs and all of the others certs used in the cluster" + echo -e "it should be used only when the admin.conf leaked" + ;; (kcfg-adm) usage-kcfg-adm ;; @@ -453,6 +459,21 @@ function start-aio(){ } ### Extra functions ############################################# +function renew-ca() { + [[ -d "clusters/$1" ]] || { logger error "invalid cluster, run 'ezctl new $1' first"; return 1; } + + logger warn "WARNNING: this script should be used with greate caution" + logger warn "WARNNING: it will recreate CA certs and all of the others certs used in the cluster" + + COMMAND="ansible-playbook -i clusters/$1/hosts -e CHANGE_CA=true -e @clusters/$1/config.yml playbooks/96.update-certs.yml -t force_change_certs" + echo "$COMMAND" + logger info "cluster:$1 process begins in 5s, press any key to abort:\n" + ! (read -r -t5 -n1) || { logger warn "process abort"; return 1; } + + ${COMMAND} || return 1 +} + + EXPIRY=4800h # default cert will expire in 200 days USER_TYPE=admin # admin/view, admin=clusterrole:cluster-admin view=clusterrole:view USER_NAME=user @@ -631,6 +652,10 @@ function main() { start-aio ;; ### extra operations ############################## + (kca-renew) + [ "$#" -eq 2 ] || { usage >&2; exit 2; } + renew-ca "$2" + ;; (kcfg-adm) [ "$#" -gt 2 ] || { usage-kcfg-adm >&2; exit 2; } kcfg-adm "${@:2}" diff --git a/playbooks/96.update-certs.yml b/playbooks/96.update-certs.yml new file mode 100644 index 0000000..1fdb664 --- /dev/null +++ b/playbooks/96.update-certs.yml @@ -0,0 +1,47 @@ +# Note: this scripts should be used with caution. +# Force to recreate CA certs and all of the others certs used in the cluster. +# It should be used when the admin.conf leaked, and a new one will be created in place of the leaked one. + +# to create CA, kubeconfig, kube-proxy.kubeconfig etc. +# need to set 'CHANGE_CA=true' +- hosts: localhost + roles: + - deploy + +# to install etcd cluster +# to run with '-t force_change_certs' +- hosts: etcd + roles: + - etcd + +# to set up 'kube_master' nodes +# to run with '-t force_change_certs' +- hosts: kube_master + roles: + - kube-master + +# to set up 'kube_node' nodes +# to run with '-t force_change_certs' +- hosts: + - kube_master + - kube_node + roles: + - kube-node + +# to install network plugin, only one can be choosen +# to run with '-t force_change_certs' +- hosts: + - kube_master + - kube_node + roles: + - { role: calico, when: "CLUSTER_NETWORK == 'calico'" } + - { role: cilium, when: "CLUSTER_NETWORK == 'cilium'" } + - { role: flannel, when: "CLUSTER_NETWORK == 'flannel'" } + - { role: kube-router, when: "CLUSTER_NETWORK == 'kube-router'" } + - { role: kube-ovn, when: "CLUSTER_NETWORK == 'kube-ovn'" } + +# to install cluster-addons +- hosts: + - kube_node + roles: + - cluster-addon diff --git a/roles/calico/tasks/main.yml b/roles/calico/tasks/main.yml index e88934e..c439032 100644 --- a/roles/calico/tasks/main.yml +++ b/roles/calico/tasks/main.yml @@ -9,9 +9,8 @@ -config=ca-config.json \ -profile=kubernetes calico-csr.json|{{ base_dir }}/bin/cfssljson -bare calico" - - name: get calico-etcd-secrets info - shell: "{{ base_dir }}/bin/kubectl get secrets -n kube-system" - register: secrets_info + - name: 删除旧 calico-etcd-secrets + shell: "{{ base_dir }}/bin/kubectl -n kube-system delete secrets calico-etcd-secrets || echo NotFound" - name: 创建 calico-etcd-secrets shell: "cd {{ cluster_dir }}/ssl && \ @@ -19,15 +18,18 @@ --from-file=etcd-ca=ca.pem \ --from-file=etcd-key=calico-key.pem \ --from-file=etcd-cert=calico.pem" - when: '"calico-etcd-secrets" not in secrets_info.stdout' - name: 配置 calico DaemonSet yaml文件 template: src=calico-{{ calico_ver_main }}.yaml.j2 dest={{ cluster_dir }}/yml/calico.yaml + - name: 删除 calico网络 + shell: "{{ base_dir }}/bin/kubectl delete -f {{ cluster_dir }}/yml/calico.yaml || echo NotFound" + - name: 运行 calico网络 - shell: "{{ base_dir }}/bin/kubectl apply -f {{ cluster_dir }}/yml/calico.yaml" + shell: "sleep 5 && {{ base_dir }}/bin/kubectl apply -f {{ cluster_dir }}/yml/calico.yaml" run_once: true connection: local + tags: force_change_certs - name: 在节点创建相关目录 file: name={{ item }} state=directory @@ -40,6 +42,7 @@ - ca.pem - calico.pem - calico-key.pem + tags: force_change_certs - name: 删除默认cni配置 file: path=/etc/cni/net.d/10-default.conf state=absent @@ -62,6 +65,8 @@ delay: 15 ignore_errors: true connection: local + tags: force_change_certs - import_tasks: calico-rr.yml when: 'CALICO_RR_ENABLED|bool' + tags: force_change_certs diff --git a/roles/deploy/tasks/main.yml b/roles/deploy/tasks/main.yml index 8dfac15..61a1fed 100644 --- a/roles/deploy/tasks/main.yml +++ b/roles/deploy/tasks/main.yml @@ -14,36 +14,42 @@ - name: 读取ca证书stat信息 stat: path="{{ cluster_dir }}/ssl/ca.pem" register: p + tags: force_change_certs - name: 准备CA配置文件和签名请求 template: src={{ item }}.j2 dest={{ cluster_dir }}/ssl/{{ item }} with_items: - "ca-config.json" - "ca-csr.json" - when: p.stat.isreg is not defined + when: "p.stat.isreg is not defined or CHANGE_CA|bool" + tags: force_change_certs - name: 生成 CA 证书和私钥 - when: p.stat.isreg is not defined + when: "p.stat.isreg is not defined or CHANGE_CA|bool" + tags: force_change_certs shell: "cd {{ cluster_dir }}/ssl && \ {{ base_dir }}/bin/cfssl gencert -initca ca-csr.json | {{ base_dir }}/bin/cfssljson -bare ca" #----------- 创建配置文件: kubectl.kubeconfig - import_tasks: create-kubectl-kubeconfig.yml - tags: create_kctl_cfg + tags: create_kctl_cfg, force_change_certs #----------- 创建个性化客户端配置文件 - import_tasks: add-custom-kubectl-kubeconfig.yml - tags: add-kcfg + tags: add-kcfg, force_change_certs when: "ADD_KCFG|bool" #------------创建配置文件: kube-proxy.kubeconfig - import_tasks: create-kube-proxy-kubeconfig.yml + tags: force_change_certs #------------创建配置文件: kube-controller-manager.kubeconfig - import_tasks: create-kube-controller-manager-kubeconfig.yml + tags: force_change_certs #------------创建配置文件: kube-scheduler.kubeconfig - import_tasks: create-kube-scheduler-kubeconfig.yml + tags: force_change_certs # ansible 控制端一些易用性配置 - name: 本地创建 ezdown/ezctl 工具的软连接 diff --git a/roles/deploy/vars/main.yml b/roles/deploy/vars/main.yml index 94525c5..9d6d06e 100644 --- a/roles/deploy/vars/main.yml +++ b/roles/deploy/vars/main.yml @@ -4,3 +4,6 @@ KUBE_APISERVER: "https://{{ groups['kube_master'][0] }}:{{ SECURE_PORT }}" # ADD_KCFG: false CUSTOM_EXPIRY: "438000h" + +# CHANGE_CA: when set true, force to change ca certs +CHANGE_CA: false diff --git a/roles/etcd/tasks/main.yml b/roles/etcd/tasks/main.yml index 1642aa0..46ac279 100644 --- a/roles/etcd/tasks/main.yml +++ b/roles/etcd/tasks/main.yml @@ -12,6 +12,7 @@ template: src=etcd-csr.json.j2 dest={{ cluster_dir }}/ssl/etcd-csr.json connection: local run_once: true + tags: force_change_certs - name: 创建 etcd证书和私钥 shell: "cd {{ cluster_dir }}/ssl && {{ base_dir }}/bin/cfssl gencert \ @@ -21,6 +22,7 @@ -profile=kubernetes etcd-csr.json | {{ base_dir }}/bin/cfssljson -bare etcd" connection: local run_once: true + tags: force_change_certs - name: 分发etcd证书相关 copy: src={{ cluster_dir }}/ssl/{{ item }} dest={{ ca_dir }}/{{ item }} @@ -28,6 +30,7 @@ - ca.pem - etcd.pem - etcd-key.pem + tags: force_change_certs - name: 创建etcd的systemd unit文件 template: src=etcd.service.j2 dest=/etc/systemd/system/etcd.service @@ -40,7 +43,7 @@ - name: 开启etcd服务 shell: systemctl daemon-reload && systemctl restart etcd ignore_errors: true - tags: upgrade_etcd, restart_etcd + tags: upgrade_etcd, restart_etcd, force_change_certs - name: 以轮询的方式等待服务同步完成 shell: "systemctl is-active etcd.service" @@ -48,4 +51,4 @@ until: '"active" in etcd_status.stdout' retries: 8 delay: 8 - tags: upgrade_etcd, restart_etcd + tags: upgrade_etcd, restart_etcd, force_change_certs diff --git a/roles/kube-master/tasks/main.yml b/roles/kube-master/tasks/main.yml index ade8964..a47fa08 100644 --- a/roles/kube-master/tasks/main.yml +++ b/roles/kube-master/tasks/main.yml @@ -12,19 +12,20 @@ with_items: - kube-controller-manager.kubeconfig - kube-scheduler.kubeconfig + tags: force_change_certs - name: 注册变量 KUBERNETES_SVC_IP shell: echo {{ SERVICE_CIDR }}|cut -d/ -f1|awk -F. '{print $1"."$2"."$3"."$4+1}' register: KUBERNETES_SVC_IP - tags: change_cert + tags: change_cert, force_change_certs - name: 设置变量 CLUSTER_KUBERNETES_SVC_IP set_fact: CLUSTER_KUBERNETES_SVC_IP={{ KUBERNETES_SVC_IP.stdout }} - tags: change_cert + tags: change_cert, force_change_certs - name: 创建 kubernetes 证书签名请求 template: src=kubernetes-csr.json.j2 dest={{ cluster_dir }}/ssl/kubernetes-csr.json - tags: change_cert + tags: change_cert, force_change_certs connection: local - name: 创建 kubernetes 证书和私钥 @@ -33,13 +34,14 @@ -ca-key=ca-key.pem \ -config=ca-config.json \ -profile=kubernetes kubernetes-csr.json | {{ base_dir }}/bin/cfssljson -bare kubernetes" - tags: change_cert + tags: change_cert, force_change_certs connection: local # 创建aggregator proxy相关证书 - name: 创建 aggregator proxy证书签名请求 template: src=aggregator-proxy-csr.json.j2 dest={{ cluster_dir }}/ssl/aggregator-proxy-csr.json connection: local + tags: force_change_certs - name: 创建 aggregator-proxy证书和私钥 shell: "cd {{ cluster_dir }}/ssl && {{ base_dir }}/bin/cfssl gencert \ @@ -48,6 +50,7 @@ -config=ca-config.json \ -profile=kubernetes aggregator-proxy-csr.json | {{ base_dir }}/bin/cfssljson -bare aggregator-proxy" connection: local + tags: force_change_certs - name: 分发 kubernetes证书 copy: src={{ cluster_dir }}/ssl/{{ item }} dest={{ ca_dir }}/{{ item }} @@ -58,7 +61,7 @@ - kubernetes-key.pem - aggregator-proxy.pem - aggregator-proxy-key.pem - tags: change_cert + tags: change_cert, force_change_certs - name: 替换 kubeconfig 的 apiserver 地址 lineinfile: @@ -68,6 +71,7 @@ with_items: - "/etc/kubernetes/kube-controller-manager.kubeconfig" - "/etc/kubernetes/kube-scheduler.kubeconfig" + tags: force_change_certs - name: 创建 master 服务的 systemd unit 文件 template: src={{ item }}.j2 dest=/etc/systemd/system/{{ item }} @@ -84,7 +88,7 @@ - name: 启动 master 服务 shell: "systemctl daemon-reload && systemctl restart kube-apiserver && \ systemctl restart kube-controller-manager && systemctl restart kube-scheduler" - tags: upgrade_k8s, restart_master + tags: upgrade_k8s, restart_master, force_change_certs # 轮询等待kube-apiserver启动完成 - name: 轮询等待kube-apiserver启动 @@ -93,7 +97,7 @@ until: '"active" in api_status.stdout' retries: 10 delay: 3 - tags: upgrade_k8s, restart_master + tags: upgrade_k8s, restart_master, force_change_certs # 轮询等待kube-controller-manager启动完成 - name: 轮询等待kube-controller-manager启动 @@ -102,7 +106,7 @@ until: '"active" in cm_status.stdout' retries: 8 delay: 3 - tags: upgrade_k8s, restart_master + tags: upgrade_k8s, restart_master, force_change_certs # 轮询等待kube-scheduler启动完成 - name: 轮询等待kube-scheduler启动 @@ -111,17 +115,19 @@ until: '"active" in sch_status.stdout' retries: 8 delay: 3 - tags: upgrade_k8s, restart_master + tags: upgrade_k8s, restart_master, force_change_certs - block: - name: 复制kubectl.kubeconfig shell: 'cd {{ cluster_dir }} && cp -f kubectl.kubeconfig {{ inventory_hostname }}-kubectl.kubeconfig' + tags: upgrade_k8s, restart_master, force_change_certs - name: 替换 kubeconfig 的 apiserver 地址 lineinfile: dest: "{{ cluster_dir }}/{{ inventory_hostname }}-kubectl.kubeconfig" regexp: "^ server" line: " server: https://{{ inventory_hostname }}:{{ SECURE_PORT }}" + tags: upgrade_k8s, restart_master, force_change_certs - name: 轮询等待master服务启动完成 command: "{{ base_dir }}/bin/kubectl --kubeconfig={{ cluster_dir }}/{{ inventory_hostname }}-kubectl.kubeconfig get node" @@ -129,7 +135,7 @@ until: result.rc == 0 retries: 5 delay: 6 - tags: upgrade_k8s, restart_master + tags: upgrade_k8s, restart_master, force_change_certs - name: 获取user:kubernetes是否已经绑定对应角色 shell: "{{ base_dir }}/bin/kubectl get clusterrolebindings|grep kubernetes-crb || echo 'notfound'" diff --git a/roles/kube-node/tasks/main.yml b/roles/kube-node/tasks/main.yml index 81f2bca..40784f3 100644 --- a/roles/kube-node/tasks/main.yml +++ b/roles/kube-node/tasks/main.yml @@ -18,6 +18,7 @@ ##----------kubelet 配置部分-------------- # 创建 kubelet 相关证书及 kubelet.kubeconfig - import_tasks: create-kubelet-kubeconfig.yml + tags: force_change_certs - name: 准备 cni配置文件 template: src=cni-default.conf.j2 dest=/etc/cni/net.d/10-default.conf @@ -43,17 +44,19 @@ - name: 开启kubelet 服务 shell: systemctl daemon-reload && systemctl restart kubelet - tags: upgrade_k8s, restart_node + tags: upgrade_k8s, restart_node, force_change_certs ##-------kube-proxy部分---------------- - name: 分发 kube-proxy.kubeconfig配置文件 copy: src={{ cluster_dir }}/kube-proxy.kubeconfig dest=/etc/kubernetes/kube-proxy.kubeconfig + tags: force_change_certs - name: 替换 kube-proxy.kubeconfig 的 apiserver 地址 lineinfile: dest: /etc/kubernetes/kube-proxy.kubeconfig regexp: "^ server" line: " server: {{ KUBE_APISERVER }}" + tags: force_change_certs - name: 创建kube-proxy 配置 template: src=kube-proxy-config.yaml.j2 dest=/var/lib/kube-proxy/kube-proxy-config.yaml @@ -69,7 +72,7 @@ - name: 开启kube-proxy 服务 shell: systemctl daemon-reload && systemctl restart kube-proxy - tags: reload-kube-proxy, upgrade_k8s, restart_node + tags: reload-kube-proxy, upgrade_k8s, restart_node, force_change_certs # 轮询等待kube-proxy启动完成 - name: 轮询等待kube-proxy启动 @@ -78,7 +81,7 @@ until: '"active" in kubeproxy_status.stdout' retries: 4 delay: 2 - tags: reload-kube-proxy, upgrade_k8s, restart_node + tags: reload-kube-proxy, upgrade_k8s, restart_node, force_change_certs # 轮询等待kubelet启动完成 - name: 轮询等待kubelet启动 @@ -87,7 +90,7 @@ until: '"active" in kubelet_status.stdout' retries: 4 delay: 2 - tags: reload-kube-proxy, upgrade_k8s, restart_node + tags: reload-kube-proxy, upgrade_k8s, restart_node, force_change_certs - name: 轮询等待node达到Ready状态 shell: "{{ base_dir }}/bin/kubectl get node {{ inventory_hostname }}|awk 'NR>1{print $2}'" @@ -95,7 +98,7 @@ until: node_status.stdout == "Ready" or node_status.stdout == "Ready,SchedulingDisabled" retries: 8 delay: 8 - tags: upgrade_k8s, restart_node + tags: upgrade_k8s, restart_node, force_change_certs connection: local - name: 设置node节点role