调整修复删除master/node/etcd节点的脚本

2019-07-24 10:19:31 +08:00 · 2019-07-24 10:19:31 +08:00 · 2830a3587b
parent 0d44da6c4f
commit 2830a3587b
6 changed files with 58 additions and 72 deletions
--- a/roles/clean/clean_node.yml
+++ b/roles/clean/clean_node.yml
@ -0,0 +1,3 @@
+- hosts: "{{ NODE_TO_CLEAN }}"
+  roles:
+  - clean
--- a/roles/deploy/deploy.yml
+++ b/roles/deploy/deploy.yml
@ -0,0 +1,4 @@
+# to create CA, kubeconfig, kube-proxy.kubeconfig etc.
+- hosts: localhost
+  roles:
+  - deploy
--- a/tools/11.deletcd.yml
+++ b/tools/11.deletcd.yml
@ -1,5 +1,5 @@
-# 警告：此脚本将清理指定 etcd 节点
-# 使用：`easzctl del-etcd 1.1.1.1` 
+# WARNNING:  this playbook will clean the etcd {{ NODE_TO_DEL }} 
+# USAGE:     easzctl del-etcd 1.1.1.1

 - hosts: localhost 
  vars_prompt:
@ -8,16 +8,15 @@
    private: no
    confirm: yes
  tasks:
-  - name: fail info1
-    fail: msg="{{ ETCD_TO_DEL }} is NOT a member of etcd cluster!"
+  # step0: run prechecks
+  - fail: msg="{{ ETCD_TO_DEL }} is NOT a member of etcd cluster!"
    when: "ETCD_TO_DEL not in groups['etcd']" 

-  - name: fail info2
-    fail: msg="you CAN NOT delete the last member of etcd cluster!"
+  - fail: msg="you CAN NOT delete the last member of etcd cluster!"
    when: "groups['etcd']|length < 2" 

  - block:
-    # 以下几个tasks是为寻找etcd集群中第一个健康节点
+    # step1: find a healthy member in the etcd cluster
    - name: set NODE_IPS of the etcd cluster
      set_fact: NODE_IPS="{% for host in groups['etcd'] %}{{ host }} {% endfor %}"

@ -40,7 +39,7 @@

    - debug: var="RUNNING_NODE.stdout"

-    # 在etcd健康节点上才能进行操作
+    # step2: remove jobs run on the healthy member if needed
    - name: get ID of etcd node to delete
      shell: "ETCDCTL_API=3 {{ bin_dir }}/etcdctl member list|grep {{ ETCD_TO_DEL }}:2380|cut -d',' -f1"
      register: ETCD_ID
@ -56,16 +55,16 @@
      delegate_to: "{{ RUNNING_NODE.stdout }}"
      when: "ETCD_ID.stdout != ''"
  
-    #- name: remove data of the deleted etcd node if possible
-    #  file: name=/var/lib/etcd state=absent
-    #  delegate_to: "{{ ETCD_TO_DEL }}"
-    #  ignore_errors: true
+    - name: clean etcd {{ NODE_TO_DEL }} if possible
+      shell: "ansible-playbook {{ base_dir }}/roles/clean/clean_node.yml \
+                -e NODE_TO_CLEAN={{ NODE_TO_DEL }} \
+                -e DEL_ETCD=yes >> /tmp/ansible-playbook.log 2>&1 \
+              || echo 'data not cleaned on {{ ETCD_TO_DEL }}'"
+      register: CLEAN_STATUS

-    - name: remove data of the deleted etcd node if possible
-      shell: "ssh {{ ETCD_TO_DEL }} rm -rf /var/lib/etcd"
-      ignore_errors: true
+    - debug: var="CLEAN_STATUS.stdout"

-    # 在[etcd]组内删除node节点，lineinfile模块不好用，只能用sed
+    # lineinfile is inadequate to delete lines between some specific line range
    - name: remove the etcd's node entry in hosts
      shell: 'sed -i "/^\[etcd/,/^\[kube-master/ {/^{{ ETCD_TO_DEL }}[^0-9]/d}" {{ base_dir }}/hosts'
      args:
@ -73,6 +72,4 @@

    - name: reconfig and restart the etcd cluster
      shell: "ansible-playbook {{ base_dir }}/02.etcd.yml > /tmp/ansible-playbook.log 2>&1"
-
-    # 满足条件才进行删除
    when: "groups['etcd']|length > 1 and ETCD_TO_DEL in groups['etcd']" 
--- a/tools/12.delnode.yml
+++ b/tools/12.delnode.yml
@ -1,34 +1,25 @@
-# 警告：此脚本将清理指定 kube-node 节点
-# 使用：`easzctl del-node 1.1.1.1` 
+# WARNNING:  this playbook will clean the node {{ NODE_TO_DEL }}
+# USAGE:     easzctl del-node 1.1.1.1

- hosts: "{{ NODE_TO_DEL }}" 
+- hosts: localhost 
  tasks:
-  - name: fail info1
-    fail: msg="you CAN NOT delete the last member of kube-master!"
+  - fail: msg="you CAN NOT delete the last member of kube-master!"
    when: "groups['kube-master']|length < 2 and NODE_TO_DEL in groups['kube-master']"

-  - name: 执行 kubectl drain 迁移 pod
+  - name: run kubectl drain @{{ NODE_TO_DEL }}
    shell: "{{ bin_dir }}/kubectl drain {{ NODE_TO_DEL }} --ignore-daemonsets --delete-local-data"
-    connection: local
    ignore_errors: true

- hosts: "{{ NODE_TO_DEL }}" 
-  vars:
-    DEL_NODE: "yes"
-    DEL_LB: "yes"
-    DEL_CHRONY: "yes"
-    DEL_ENV: "yes"
-  roles:
-  - clean
-  tasks:
-  - name: 执行 kubectl delete node
-    shell: "{{ bin_dir }}/kubectl delete node {{ NODE_TO_DEL }}"
-    connection: local
+  - name: clean node {{ NODE_TO_DEL }} if possible
+    shell: "ansible-playbook {{ base_dir }}/roles/clean/clean_node.yml \
+              -e NODE_TO_CLEAN={{ NODE_TO_DEL }} \
+              -e DEL_NODE=yes \
+              -e DEL_LB=yes >> /tmp/ansible-playbook.log 2>&1 \
+            || echo 'data not cleaned on {{ NODE_TO_DEL }}'"
+    register: CLEAN_STATUS

-# 删除 ansible hosts 中节点配置
-#  - name: rm {{ NODE_TO_DEL }} in ansible hosts
-#    lineinfile:
-#      dest: "{{ base_dir }}/hosts"
-#      state: absent
-#      regexp: '^{{ NODE_TO_DEL }}$|^{{ NODE_TO_DEL }}[^0-9]'
-#    connection: local
+  - debug: var="CLEAN_STATUS.stdout"
+
+  - name: run kubectl delete node {{ NODE_TO_DEL }}
+    shell: "{{ bin_dir }}/kubectl delete node {{ NODE_TO_DEL }}"
+    ignore_errors: true
--- a/tools/13.delmaster.yml
+++ b/tools/13.delmaster.yml
@ -1,35 +1,26 @@
-# 警告：此脚本将清理指定 kube-master 节点
-# 使用：`easzctl del-master 1.1.1.1` 
+# WARNNING:  this playbook will clean the kube-master node {{ NODE_TO_DEL }}
+# USAGE:     easzctl del-master 1.1.1.1

- hosts: "{{ NODE_TO_DEL }}" 
+- hosts: localhost
  tasks:
-  - name: fail info1
-    fail: msg="you CAN NOT delete the last member of kube-master!"
+  - fail: msg="you CAN NOT delete the last member of kube-master!"
    when: "groups['kube-master']|length < 2 and NODE_TO_DEL in groups['kube-master']"

-  - name: 执行 kubectl drain 迁移 pod
+  - name: run kubectl drain @{{ NODE_TO_DEL }}
    shell: "{{ bin_dir }}/kubectl drain {{ NODE_TO_DEL }} --ignore-daemonsets --delete-local-data"
-    connection: local
    ignore_errors: true

- hosts: "{{ NODE_TO_DEL }}" 
-  vars:
-    DEL_MASTER: "yes"
-    DEL_NODE: "yes"
-    DEL_LB: "yes"
-    DEL_CHRONY: "yes"
-    DEL_ENV: "yes"
-  roles:
-  - clean
-  tasks:
-  - name: 执行 kubectl delete node
-    shell: "{{ bin_dir }}/kubectl delete node {{ NODE_TO_DEL }}"
-    delegate_to: "{{ groups['kube-node'][0] }}"
+  - name: clean node {{ NODE_TO_DEL }} if possible
+    shell: "ansible-playbook {{ base_dir }}/roles/clean/clean_node.yml \
+              -e NODE_TO_CLEAN={{ NODE_TO_DEL }} \
+              -e DEL_MASTER=yes \
+              -e DEL_NODE=yes \
+              -e DEL_LB=yes >> /tmp/ansible-playbook.log 2>&1 \
+            || echo 'data not cleaned on {{ NODE_TO_DEL }}'"
+    register: CLEAN_STATUS

-# 删除 ansible hosts 中节点配置
-#  - name: rm {{ NODE_TO_DEL }} in ansible hosts
-#    lineinfile:
-#      dest: "{{ base_dir }}/hosts"
-#      state: absent
-#      regexp: '^{{ NODE_TO_DEL }}$|^{{ NODE_TO_DEL }}[^0-9]'
-#    connection: local
+  - debug: var="CLEAN_STATUS.stdout"
+
+  - name: run kubectl delete node {{ NODE_TO_DEL }}
+    shell: "{{ bin_dir }}/kubectl delete node {{ NODE_TO_DEL }}"
+    ignore_errors: true
--- a/tools/easzctl
+++ b/tools/easzctl
@ -178,7 +178,7 @@ function del-master() {
    sed -i '/^\[kube-node/,/^\[harbor/{/^'"$1"'[^0-9]/d}' $BASEPATH/hosts

    # reconfig kubeconfig in ansible manage node
-    ansible-playbook $BASEPATH/01.prepare.yml -t create_kctl_cfg 
+    ansible-playbook $BASEPATH/roles/deploy/deploy.yml -t create_kctl_cfg

    # reconfigure and restart the haproxy service on 'kube-node' nodes
    ansible-playbook $BASEPATH/05.kube-node.yml -t restart_lb -e MASTER_CHG=yes || { echo "[ERROR] Failed to restart the haproxy service on 'kube-node' nodes!"; return 2; }