From de47026ef61fb08b1640359bedc5a21be55b20e3 Mon Sep 17 00:00:00 2001 From: gjmzj Date: Tue, 26 Jun 2018 22:00:37 +0800 Subject: [PATCH] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E6=9B=BF=E6=8D=A2=E9=9B=86?= =?UTF-8?q?=E7=BE=A4=E7=BD=91=E7=BB=9C=E6=8F=92=E4=BB=B6=E7=9A=84=E8=84=9A?= =?UTF-8?q?=E6=9C=AC=E5=92=8C=E6=96=87=E6=A1=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/op/clean_k8s_network.md | 39 ++++++++++++++++++ docs/op/op-index.md | 1 + tools/clean_k8s_network.yml | 78 ++++++++++++++++++++++++++++++++++++ 3 files changed, 118 insertions(+) create mode 100644 docs/op/clean_k8s_network.md create mode 100644 tools/clean_k8s_network.yml diff --git a/docs/op/clean_k8s_network.md b/docs/op/clean_k8s_network.md new file mode 100644 index 0000000..2fc0fad --- /dev/null +++ b/docs/op/clean_k8s_network.md @@ -0,0 +1,39 @@ +# 替换k8s集群的网络插件 + +有时候我们在测试环境的k8s集群中希望试用多种网络插件(calico/flannel/kube-router),又不希望每测试一次就全部清除集群然后重建,那么你就需要这个文档。 +- WARNNING:重新安装k8s网络插件会短暂中断已有运行在k8s上的服务 + - 请在熟悉kubeasz的安装流程和k8s网络插件安装流程的基础上谨慎操作 + - 如果k8s集群已经运行庞大业务pod,重装网络插件时会引起所有pod的删除、重建,短时间内将给apiserver带来压力,可能引起master节点夯住 + +## 替换流程 + +kubeasz使用标准cni方式安装k8s集群的网络插件;cni负载创建容器网卡和IP分配(IPAM),不同的网络插件(calico,flannel等)创建容器网卡和IP分配方式不一样,所以在替换网络插件时候需要现有pod全部删除,然后自动按照新网络插件的方式重建pod网络;请参考[k8s网络插件章节](../06-安装网络组件.md)。 + +- 1.清除现有集群网络插件 +``` bash +ansible-playbook /etc/ansible/tools/clean_k8s_network.yml +``` + +对照脚本`clean_k8s_network.yml` 大致流程为: + - 根据实际运行情况,删除现有网络组件的daemonset pod + - 如果现有组件是kube-router 需要进行一些额外清理和可能需要恢复默认kube-proxy服务 + - 清理cni网络配置和具体插件的运行、配置文件 + - 清理生成的容器网络组件(bridge,tunl等) + - 如果现有组件是calico 需要额外清理bgp路由 + - 最后删除所有k8s上已运行的pod(会由controller负责重建) + +- 2.修改ansible hosts文件指定新网络插件后,然后重新执行安装 +``` bash +ansible-playbook /etc/ansible/06.network.yml +``` + +## 验证新网络插件 + +参照[calico](../06.calico.md) [flannel](../06.flannel.md) [kube-router](../06.kube-router.md) + +## 已知BUG + +如果现有网络是kube-router, 按上述步骤完成替换成其他网络时,需要额外执行一次pod重建: +``` bash +ansible-playbook /etc/ansible/tools/clean_k8s_network.yml -t reload_pods +``` diff --git a/docs/op/op-index.md b/docs/op/op-index.md index a340a4f..d13ed70 100644 --- a/docs/op/op-index.md +++ b/docs/op/op-index.md @@ -6,3 +6,4 @@ - [修改多主集群VIP地址](ChangeVIP.md) - [修改AIO部署的系统IP](change_ip_allinone.md) - [集群删除单个节点](del_one_node.md) +- [替换集群使用的网络插件](clean_k8s_network.md) diff --git a/tools/clean_k8s_network.yml b/tools/clean_k8s_network.yml new file mode 100644 index 0000000..f066065 --- /dev/null +++ b/tools/clean_k8s_network.yml @@ -0,0 +1,78 @@ +- hosts: + - kube-master + - new-master + - kube-node + - new-node + tasks: + - name: 获取所有已经创建的POD信息 + command: "{{ bin_dir }}/kubectl get daemonset -n kube-system" + register: pod_info + run_once: true + + - name: 删除原network插件calico部署 + shell: "{{ bin_dir }}/kubectl delete -f /opt/kube/kube-system/calico/ || \ + {{ bin_dir }}/kubectl delete -f /root/local/kube-system/calico/" + ignore_errors: true + run_once: true + when: '"calico" in pod_info.stdout' + + - name: 删除原network插件flannel部署 + shell: "{{ bin_dir }}/kubectl delete -f /opt/kube/kube-system/flannel/ || \ + {{ bin_dir }}/kubectl delete -f /root/local/kube-system/flannel/" + ignore_errors: true + run_once: true + when: '"flannel" in pod_info.stdout' + + - name: 删除原network插件kube-router部署 + shell: "{{ bin_dir }}/kubectl delete -f /opt/kube/kube-system/kube-router/ || \ + {{ bin_dir }}/kubectl delete -f /root/local/kube-system/kube-router/" + ignore_errors: true + run_once: true + when: '"kube-router" in pod_info.stdout' + + - name: 清理kube-router相关 + shell: "{{ bin_dir }}/docker run --privileged --net=host cloudnativelabs/kube-router --cleanup-config" + ignore_errors: true + when: '"kube-router" in pod_info.stdout' + + # 如果使用了kube-router的service-proxy,删除kube-router后还需开启kube-proxy + - name: 恢复默认的kube-proxy服务 + shell: "systemctl start kube-proxy.service; systemctl enable kube-proxy.service" + ignore_errors: true + when: "'kube-router' in pod_info.stdout and SERVICE_PROXY == 'IPVS'" + + - name: 清理目录和文件 + file: name={{ item }} state=absent + with_items: + - "/etc/cni/" + - "/run/flannel/" + - "/etc/calico/" + - "/var/run/calico/" + - "/var/log/calico/" + - "/var/lib/cni/" + - "/var/lib/kube-router/" + - "/opt/kube/kube-system/" + + - name: 清理网络 + shell: "ip link del tunl0; \ + ip link del flannel.1; \ + ip link del cni0; \ + ip link del mynet0; \ + ip link del kube-bridge; \ + ip link del dummy0; \ + systemctl restart networking; \ + systemctl restart network" + ignore_errors: true + + - name: 清理calico残留路由 + shell: "for rt in `ip route|grep bird|sed 's/blackhole//'|awk '{print $1}'`;do ip route del $rt;done;" + when: '"calico" in pod_info.stdout' + ignore_errors: true + + # 删除所有运行pod,由controller自动重建 + - name: 重启所有pod + shell: "for NS in $({{ bin_dir }}/kubectl get ns|awk 'NR>1{print $1}'); \ + do {{ bin_dir }}/kubectl delete pod --all -n $NS; done;" + ignore_errors: true + run_once: true + tags: reload_pods