废弃clean_one_node.yml

2019-06-08 16:07:46 +08:00 · 2019-06-08 16:07:46 +08:00 · 1e81881436
parent 04993419b9
commit 1e81881436
8 changed files with 20 additions and 336 deletions
--- a/README.md
+++ b/README.md
@ -49,12 +49,12 @@
    </tr>
    <tr>
        <td><strong>集群管理</strong><a href="docs/op/op-index.md">+</a></td>
-        <td><a href="docs/op/AddNode.md">增加node节点</a></td>
-        <td><a href="docs/op/AddMaster.md">增加master节点</a></td>
-        <td><a href="docs/op/op-etcd.md">管理etcd集群</a></td>
-        <td><a href="docs/op/clean_one_node.md">删除节点</a></td>
+        <td><a href="docs/op/op-node.md">管理node节点</a></td>
+        <td><a href="docs/op/op-master.md">管理master节点</a></td>
+        <td><a href="docs/op/op-etcd.md">管理etcd节点</a></td>
        <td><a href="docs/op/upgrade.md">升级集群</a></td>
        <td><a href="docs/op/cluster_restore.md">备份恢复</a></td>
+        <td><a href=""></a></td>
    </tr>
    <tr>
        <td><strong>特性实验</strong></td>
--- a/docs/op/clean_one_node.md
+++ b/docs/op/clean_one_node.md
@ -1,42 +0,0 @@
-# 删除节点
-
-本文档所指删除的节点是指使用kubeasz项目安装的节点角色（可能是kube-master, kube-node, etcd, lb节点）
-
- 警告：此操作将清理单个node节点，包含k8s集群可能使用的数据，特别的：如果有pod使用了本地存储类型，请自行判断重要性
-
-## 删除流程解释
-
- 0.判断待删除节点不是 etcd/master 组的唯一节点，否则不允许删除
- 1.待删除节点可能是kube-node节点，因此先执行`kubectl drain`，如果不是忽略执行报错
- 2.参照`99.clean.yml`脚本方式删除节点可能的服务和配置，忽略执行报错
- 3.待删除节点可能是kube-node节点，执行`kubectl delete node`, 如果不是忽略执行报错
- 4.修改ansible hosts，移除删除节点
-
-## 删除操作
-
-可以使用以下三种方式删除节点（i.e. 192.168.1.1）
-
-``` bash
-# 1.推荐使用 easzctl 工具
-$ easzctl clean-node 192.168.1.1
-
-# 2.ansible-playbook 带参数执行如下
-$ ansible-playbook /etc/ansible/tools/clean_one_node.yml -e NODE_TO_DEL=192.168.1.1
-
-# 3.ansible-playbook 不带参数执行，然后根据提示输入/确认
-$ ansible-playbook /etc/ansible/tools/clean_one_node.yml
-```
-
-## 验证
-
- 验证删除节点上是否相关服务均已停止
- 验证 ansible hosts 文件中已删除节点
-
-## Debug
-
-如果出现清理失败，类似报错：`... Device or resource busy: '/var/run/docker/netns/xxxxxxxxxx'`，需要手动umount该目录后重新清理  
-
-``` bash
-$ umount /var/run/docker/netns/xxxxxxxxxx
-$ ansible-playbook /etc/ansible/tools/clean_one_node.yml
-```
--- a/docs/op/op-index.md
+++ b/docs/op/op-index.md
@ -1,12 +1,11 @@
 # 集群运维管理指南 operation guide

- [集群添加 NODE 节点](AddNode.md)
- [集群添加 MASTER 节点](AddMaster.md)
- [集群添加 ETCD 节点](AddEtcd.md)
+- [管理 NODE 节点](op-node.md)
+- [管理 MASTER 节点](op-master.md)
+- [管理 ETCD 节点](op-etcd.md)
 - [升级 K8S 版本](upgrade.md)
 - [修改多主集群VIP地址](ChangeVIP.md)
 - [修改AIO部署的系统IP](change_ip_allinone.md)
- [集群删除单个节点](clean_one_node.md)
 - [替换集群使用的网络插件](change_k8s_network.md)
 - [集群备份与恢复](cluster_restore.md)
 - [设置只读权限 kubeconfig](readonly_kubectl.md)
--- a/roles/calico/tasks/main.yml
+++ b/roles/calico/tasks/main.yml
@ -87,6 +87,7 @@
  #- calico-ipam
  #- loopback
  - calicoctl
+  ignore_errors: true

 - name: 准备 calicoctl配置文件
  template: src=calicoctl.cfg.j2 dest=/etc/calico/calicoctl.cfg
--- a/tools/12.delnode.yml
+++ b/tools/12.delnode.yml
@ -16,6 +16,8 @@
  vars:
    DEL_NODE: "yes"
    DEL_LB: "yes"
+    DEL_CHRONY: "yes"
+    DEL_ENV: "yes"
  roles:
  - clean
  tasks:
--- a/tools/13.delmaster.yml
+++ b/tools/13.delmaster.yml
@ -17,6 +17,8 @@
    DEL_MASTER: "yes"
    DEL_NODE: "yes"
    DEL_LB: "yes"
+    DEL_CHRONY: "yes"
+    DEL_ENV: "yes"
  roles:
  - clean
  tasks:
--- a/tools/clean_one_node.yml
+++ b/tools/clean_one_node.yml
@ -1,257 +0,0 @@
-# 警告：此脚本将清理单个node节点，使用请详细参阅 docs/op/clean_one_node.md
-# 请三思后运行此脚本，特别的：如果有pod使用了本地存储类型，请自行判断重要性
-# 使用：
-# 1. 执行 ansible-playbook /etc/ansible/tools/clean_one_node.yml
-# 2. 按照提示输入待删除节点
-
- hosts: deploy
-  vars_prompt:
-  - name: "NODE_TO_DEL"
-    prompt: "which node is about to be deleted?(e.g 192.168.1.1)"
-    private: no
-    confirm: yes
-  tasks:
-  - name: fail info1
-    fail: msg="you CAN NOT delete the last member of etcd cluster!"
-    when: "groups['etcd']|length < 2 and NODE_TO_DEL in groups['etcd']"
-
-  - name: fail info2
-    fail: msg="you CAN NOT delete the last member of kube-master!"
-    when: "groups['kube-master']|length < 2 and NODE_TO_DEL in groups['kube-master']"
-
-  - name: 执行kubectl drain(节点可能是kube-node节点)
-    shell: "{{ bin_dir }}/kubectl drain {{ NODE_TO_DEL }} --ignore-daemonsets --delete-local-data"
-    ignore_errors: true
-
-  - block:
-    # 清理 kube-node 相关服务
-    - name: stop and disable kube-node service
-      service: name={{ item }} state=stopped enabled=no
-      with_items:
-      - kubelet
-      - kube-proxy
-      ignore_errors: true
-  
-    - name: umount kubelet 挂载的目录
-      shell: "mount | grep '/var/lib/kubelet'| awk '{print $3}'|xargs umount"
-      args:
-        warn: false
-      ignore_errors: true
-  
-    - name: 清理目录和文件
-      file: name={{ item }} state=absent
-      with_items:
-      - "/var/lib/kubelet/"
-      - "/var/lib/kube-proxy/"
-      - "/etc/systemd/system/kubelet.service"
-      - "/etc/systemd/system/kube-proxy.service"
-      - "/opt/kube/kube-system/"
-  
-    # 清理 kube-master 相关
-    - name: stop and disable kube-master service
-      service: name={{ item }} state=stopped enabled=no
-      with_items:
-      - kube-apiserver
-      - kube-controller-manager
-      - kube-scheduler
-      ignore_errors: true
-  
-    - name: 清理目录和文件
-      file: name={{ item }} state=absent
-      with_items:
-      - "/var/run/kubernetes"
-      - "/etc/systemd/system/kube-apiserver.service"
-      - "/etc/systemd/system/kube-controller-manager.service"
-      - "/etc/systemd/system/kube-scheduler.service"
-  
-    # 清理集群docker服务、网络相关
-    - block:
-        - name: 获取是否运行名为'kubeasz'的容器
-          shell: 'docker ps|grep kubeasz || echo "NOT FOUND"'
-          register: install_info
-      
-        - name: fail info3
-          fail: msg="you CAN NOT delete dockerd, because container 'kubeasz' is running!"
-          when: "'kubeasz' in install_info.stdout"
-      
-        - name: stop and disable docker service
-          service:
-            name: docker
-            state: stopped
-            enabled: no
-          ignore_errors: true
-      
-        - name: unmount docker filesystem-1
-          mount: path=/var/run/docker/netns/default state=unmounted
-      
-        - name: unmount docker filesystem-2
-          mount: path=/var/lib/docker/overlay state=unmounted
-      
-        - name: remove files and dirs
-          file: name={{ item }} state=absent
-          with_items:
-          - "/var/lib/docker/"
-          - "/var/run/docker/"
-          - "/etc/systemd/system/docker.service"
-          - "/etc/systemd/system/docker.service.requires/"
-          - "/etc/systemd/system/docker.service.d/"
-          - "/etc/bash_completion.d/docker"
-          - "/usr/bin/docker"
-      when: CONTAINER_RUNTIME == 'docker'
-
-    - block:
-      - name: stop and disable containerd service
-        service:
-          name: containerd
-          state: stopped
-          enabled: no
-        ignore_errors: true
-
-      - name: umount containerd filesystems
-        shell: "mount | grep 'containerd/io.containerd'| awk '{print $3}'|xargs umount || exit 0"
-        args:
-          warn: false
-        ignore_errors: true
-
-      - name: remove files and dirs
-        file: name={{ item }} state=absent
-        with_items:
-        - "/etc/containerd/"
-        - "/etc/crictl.yaml"
-        - "/etc/systemd/system/containerd.service"
-        - "/opt/containerd/"
-        - "/var/lib/containerd/"
-        - "/var/run/containerd/"
-      when: CONTAINER_RUNTIME == 'containerd'
-
-    - name: remove files and dirs2
-      file: name={{ item }} state=absent
-      with_items:
-      - "/etc/cni/"
-      - "/run/flannel/"
-      - "/etc/calico/"
-      - "/var/run/calico/"
-      - "/var/lib/calico/"
-      - "/var/log/calico/"
-      - "/etc/cilium/"
-      - "/var/run/cilium/"
-      - "/sys/fs/bpf/tc/"
-      - "/var/lib/cni/"
-      - "/var/lib/kube-router/"
-      - "/opt/kube/kube-system/"
-      - "/var/run/openvswitch/"
-      - "/etc/origin/openvswitch/"
-      - "/etc/openvswitch/"
-      - "/var/log/openvswitch/"
-  
-    - name: cleanup iptables
-      shell: "iptables -F && iptables -X \
-          && iptables -F -t nat && iptables -X -t nat \
-          && iptables -F -t raw && iptables -X -t raw \
-          && iptables -F -t mangle && iptables -X -t mangle"
-  
-    - name: cleanup networks1
-      shell: "ip link del tunl0; \
-          ip link del flannel.1; \
-          ip link del cni0; \
-          ip link del mynet0; \
-          ip link del kube-bridge; \
-          ip link del dummy0; \
-          ip link del kube-ipvs0; \
-          ip link del cilium_net; \
-          ip link del cilium_vxlan; \
-          ip link del ovn0; \
-          ip link del ovs-system"
-      ignore_errors: true
-  
-    - name: cleanup networks2
-      shell: "ip link del docker0; \
-          systemctl restart networking; \
-          systemctl restart network"
-      ignore_errors: true
-  
-    - name: cleanup 'calico' routes
-      shell: "for rt in `ip route|grep bird|sed 's/blackhole//'|awk '{print $1}'`;do ip route del $rt;done;"
-      when: "CLUSTER_NETWORK == 'calico'"
-      ignore_errors: true
-  
-    # 清理etcd 集群相关
-    - name: stop and disable etcd service
-      service:
-        name: etcd
-        state: stopped
-        enabled: no
-      ignore_errors: true
-  
-    - name: 清理目录和文件
-      file: name={{ item }} state=absent
-      with_items:
-      - "/var/lib/etcd"
-      - "/etc/etcd/"
-      - "/backup/k8s"
-      - "/etc/systemd/system/etcd.service"
-  
-    # 清理负载均衡相关
-    - name: stop keepalived service
-      shell: systemctl disable keepalived && systemctl stop keepalived
-      ignore_errors: true
-  
-    - name: stop haproxy service
-      shell: systemctl disable haproxy && systemctl stop haproxy
-      ignore_errors: true
-  
-    - name: 清理LB 配置文件目录
-      file: name={{ item }} state=absent
-      with_items:
-      - "/etc/haproxy"
-      - "/etc/keepalived"
-  
-    # 清理其他
-    - name: stop and disable chrony in Ubuntu
-      service: name=chrony state=stopped enabled=no
-      ignore_errors: true
-      when:
-      - 'ansible_distribution in ["Ubuntu","Debian"]'
-      - "groups['chrony']|length > 0"
-  
-    - name: stop and disable chronyd in CentOS/RedHat
-      service: name=chronyd state=stopped enabled=no
-      ignore_errors: true
-      when:
-      - 'ansible_distribution in ["CentOS","RedHat","Amazon"]' 
-      - "groups['chrony']|length > 0"
-  
-    - name: 清理证书目录和文件
-      file: name={{ item }} state=absent
-      with_items:
-      - "/etc/kubernetes/"
-      - "{{ ca_dir }}" 
-      - "/root/.kube/"
-      - "/etc/docker/"
-  
-    - name: 清理自动生成的PATH
-      lineinfile:
-        dest: ~/.bashrc
-        state: absent
-        regexp: '{{ item }}'
-      with_items:
-      - 'kubeasz'
-      - 'helm completion'
-      - 'crictl completion'
-      - 'kubectl completion'
-      - 'HELM_TLS_ENABLE'
-    delegate_to: "{{ NODE_TO_DEL }}"
-    run_once: true
-
-  # 执行kubectl delete(节点可能是kube-node节点)
-  - name: 执行kubectl delete(节点可能是kube-node节点)
-    shell: "{{ bin_dir }}/kubectl delete node {{ NODE_TO_DEL }}"
-    ignore_errors: true
-
-  # 删除 ansible hosts 中节点配置
-  - name: rm {{ NODE_TO_DEL }} in ansible hosts
-    lineinfile:
-      dest: "{{ base_dir }}/hosts"
-      state: absent
-      regexp: '^{{ NODE_TO_DEL }}$|^{{ NODE_TO_DEL }}[^0-9]'
-    connection: local
--- a/tools/easzctl
+++ b/tools/easzctl
@ -11,8 +11,8 @@ function usage() {
 Usage: easzctl COMMAND [args]

 Cluster-wide operation:
-    checkout		To switch to cluster <clustername> context, or create it if not existed
-    destroy		To destroy the current cluster, with '--purge' option to also delete the context
+    checkout		To switch to context <clustername>, or create it if not existed
+    destroy		To destroy the current cluster, '--purge' to also delete the context
    list		To list all of clusters managed
    setup		To setup a cluster using the current context
    start-aio		To quickly setup an all-in-one cluster for testing (like minikube)
@ -21,7 +21,6 @@ In-cluster operation:
    add-etcd		To add a etcd-node to the etcd cluster
    add-master		To add a kube-master(master node) to the k8s cluster
    add-node		To add a kube-node(work node) to the k8s cluster
-    clean-node		To clean a node, whatever role the node plays
    del-etcd		To delete a etcd-node from the etcd cluster
    del-master		To delete a kube-master from the k8s cluster
    del-node		To delete a kube-node from the k8s cluster
@ -36,15 +35,15 @@ EOF

 function help-info() {
    case "$1" in
-        (add-node)
-            echo -e "Usage: easzctl add-node <new_node_ip>\n\nread 'https://github.com/easzlab/kubeasz/blob/master/docs/op/AddNode.md'"
-            ;;
-        (add-master)
-            echo -e "Usage: easzctl add-master <new_master_ip>\n\nread 'https://github.com/easzlab/kubeasz/blob/master/docs/op/AddMaster.md'"
-            ;;
        (add-etcd)
            echo -e "Usage: easzctl add-etcd <new_etcd_ip>\n\nread 'https://github.com/easzlab/kubeasz/blob/master/docs/op/op-etcd.md'"
            ;;
+        (add-master)
+            echo -e "Usage: easzctl add-master <new_master_ip>\n\nread 'https://github.com/easzlab/kubeasz/blob/master/docs/op/op-master.md'"
+            ;;
+        (add-node)
+            echo -e "Usage: easzctl add-node <new_node_ip>\n\nread 'https://github.com/easzlab/kubeasz/blob/master/docs/op/op-node.md'"
+            ;;
        (del-etcd)
            echo -e "Usage: easzctl del-etcd <etcd_ip>\n\nread 'https://github.com/easzlab/kubeasz/blob/master/docs/op/op-etcd.md'"
            ;;
@ -54,9 +53,6 @@ function help-info() {
        (del-node)
            echo -e "Usage: easzctl del-node <node_ip>\n\nread 'https://github.com/easzlab/kubeasz/blob/master/docs/op/op-node.md'"
            ;;
-        (clean-node)
-            echo -e "Usage: easzctl clean-node <node_ip>\n\nread 'https://github.com/easzlab/kubeasz/blob/master/docs/op/clean_one_node.md'"
-            ;;
        (basic-auth)
            echo -e "Usage: easzctl basic-auth <options>\nOption:\t -s enable basic-auth\n\t -S disable basic-auth\n\t -u <user> set username\n\t -p <pass> set password"
            ;;
@ -192,18 +188,6 @@ function del-master() {
    return 0
 }

-function clean-node() {
-    # check node's address regexp
-    [[ $1 =~ ^(2(5[0-5]{1}|[0-4][0-9]{1})|[0-1]?[0-9]{1,2})(\.(2(5[0-5]{1}|[0-4][0-9]{1})|[0-1]?[0-9]{1,2})){3}$ ]] || { echo "[ERROR] Invalid ip address!"; return 2; }
-
-    # 
-    ansible-playbook $BASEPATH/tools/clean_one_node.yml -e NODE_TO_DEL=$1
-
-    # save current cluster context if needed
-    [ -f "$BASEPATH/.cluster/current_cluster" ] && save_context
-    return 0
-}
-
 function upgrade() {
    echo -e "[INFO] prepare the new binaries in advance"
    echo -e "[INFO] upgrade begin in 5s, press any key to abort\n:"
@ -449,11 +433,6 @@ case "$1" in
        ACTION="Action: add a k8s work node"
        CMD="add-node $2" 
        ;;
-    (clean-node)
-        [ "$#" -gt 1 ] || { usage >&2; exit 2; }
-        ACTION="Action: clean a node"
-        CMD="clean-node $2" 
-        ;;
    (del-etcd)
        [ "$#" -gt 1 ] || { usage >&2; exit 2; }
        ACTION="Action: delete a etcd node"