调整修复删除master/node/etcd节点的脚本

2019-07-24 10:19:31 +08:00 · 2019-07-24 10:19:31 +08:00 · 2830a3587b
parent 0d44da6c4f
commit 2830a3587b
6 changed files with 58 additions and 72 deletions
--- a/roles/clean/clean_node.yml
+++ b/roles/clean/clean_node.yml
@ -0,0 +1,3 @@
 - hosts: "{{ NODE_TO_CLEAN }}"
  roles:
  - clean
--- a/roles/deploy/deploy.yml
+++ b/roles/deploy/deploy.yml
@ -0,0 +1,4 @@
 # to create CA, kubeconfig, kube-proxy.kubeconfig etc.
 - hosts: localhost
  roles:
  - deploy
--- a/tools/11.deletcd.yml
+++ b/tools/11.deletcd.yml
@ -1,5 +1,5 @@
-# 警告：此脚本将清理指定 etcd 节点
+# WARNNING:  this playbook will clean the etcd {{ NODE_TO_DEL }} 
-# 使用：`easzctl del-etcd 1.1.1.1` 
+# USAGE:     easzctl del-etcd 1.1.1.1
 - hosts: localhost 
  vars_prompt:
@ -8,16 +8,15 @@
    private: no
    confirm: yes
  tasks:
-  - name: fail info1
+  # step0: run prechecks
-    fail: msg="{{ ETCD_TO_DEL }} is NOT a member of etcd cluster!"
+  - fail: msg="{{ ETCD_TO_DEL }} is NOT a member of etcd cluster!"
    when: "ETCD_TO_DEL not in groups['etcd']" 
-  - name: fail info2
+  - fail: msg="you CAN NOT delete the last member of etcd cluster!"
    fail: msg="you CAN NOT delete the last member of etcd cluster!"
    when: "groups['etcd']|length < 2" 
  - block:
-    # 以下几个tasks是为寻找etcd集群中第一个健康节点
+    # step1: find a healthy member in the etcd cluster
    - name: set NODE_IPS of the etcd cluster
      set_fact: NODE_IPS="{% for host in groups['etcd'] %}{{ host }} {% endfor %}"
@ -40,7 +39,7 @@
    - debug: var="RUNNING_NODE.stdout"
-    # 在etcd健康节点上才能进行操作
+    # step2: remove jobs run on the healthy member if needed
    - name: get ID of etcd node to delete
      shell: "ETCDCTL_API=3 {{ bin_dir }}/etcdctl member list|grep {{ ETCD_TO_DEL }}:2380|cut -d',' -f1"
      register: ETCD_ID
@ -56,16 +55,16 @@
      delegate_to: "{{ RUNNING_NODE.stdout }}"
      when: "ETCD_ID.stdout != ''"
-    #- name: remove data of the deleted etcd node if possible
+    - name: clean etcd {{ NODE_TO_DEL }} if possible
-    #  file: name=/var/lib/etcd state=absent
+      shell: "ansible-playbook {{ base_dir }}/roles/clean/clean_node.yml \
-    #  delegate_to: "{{ ETCD_TO_DEL }}"
+                -e NODE_TO_CLEAN={{ NODE_TO_DEL }} \
-    #  ignore_errors: true
+                -e DEL_ETCD=yes >> /tmp/ansible-playbook.log 2>&1 \
              || echo 'data not cleaned on {{ ETCD_TO_DEL }}'"
      register: CLEAN_STATUS
-    - name: remove data of the deleted etcd node if possible
+    - debug: var="CLEAN_STATUS.stdout"
      shell: "ssh {{ ETCD_TO_DEL }} rm -rf /var/lib/etcd"
      ignore_errors: true
-    # 在[etcd]组内删除node节点，lineinfile模块不好用，只能用sed
+    # lineinfile is inadequate to delete lines between some specific line range
    - name: remove the etcd's node entry in hosts
      shell: 'sed -i "/^\[etcd/,/^\[kube-master/ {/^{{ ETCD_TO_DEL }}[^0-9]/d}" {{ base_dir }}/hosts'
      args:
@ -73,6 +72,4 @@
    - name: reconfig and restart the etcd cluster
      shell: "ansible-playbook {{ base_dir }}/02.etcd.yml > /tmp/ansible-playbook.log 2>&1"
    # 满足条件才进行删除
    when: "groups['etcd']|length > 1 and ETCD_TO_DEL in groups['etcd']" 
--- a/tools/12.delnode.yml
+++ b/tools/12.delnode.yml
@ -1,34 +1,25 @@
-# 警告：此脚本将清理指定 kube-node 节点
+# WARNNING:  this playbook will clean the node {{ NODE_TO_DEL }}
-# 使用：`easzctl del-node 1.1.1.1` 
+# USAGE:     easzctl del-node 1.1.1.1
- hosts: "{{ NODE_TO_DEL }}" 
+- hosts: localhost 
  tasks:
-  - name: fail info1
+  - fail: msg="you CAN NOT delete the last member of kube-master!"
    fail: msg="you CAN NOT delete the last member of kube-master!"
    when: "groups['kube-master']|length < 2 and NODE_TO_DEL in groups['kube-master']"
-  - name: 执行 kubectl drain 迁移 pod
+  - name: run kubectl drain @{{ NODE_TO_DEL }}
    shell: "{{ bin_dir }}/kubectl drain {{ NODE_TO_DEL }} --ignore-daemonsets --delete-local-data"
    connection: local
    ignore_errors: true
- hosts: "{{ NODE_TO_DEL }}" 
+  - name: clean node {{ NODE_TO_DEL }} if possible
-  vars:
+    shell: "ansible-playbook {{ base_dir }}/roles/clean/clean_node.yml \
-    DEL_NODE: "yes"
+              -e NODE_TO_CLEAN={{ NODE_TO_DEL }} \
-    DEL_LB: "yes"
+              -e DEL_NODE=yes \
-    DEL_CHRONY: "yes"
+              -e DEL_LB=yes >> /tmp/ansible-playbook.log 2>&1 \
-    DEL_ENV: "yes"
+            || echo 'data not cleaned on {{ NODE_TO_DEL }}'"
-  roles:
+    register: CLEAN_STATUS
  - clean
  tasks:
  - name: 执行 kubectl delete node
    shell: "{{ bin_dir }}/kubectl delete node {{ NODE_TO_DEL }}"
    connection: local
-# 删除 ansible hosts 中节点配置
+  - debug: var="CLEAN_STATUS.stdout"
-#  - name: rm {{ NODE_TO_DEL }} in ansible hosts
+
-#    lineinfile:
+  - name: run kubectl delete node {{ NODE_TO_DEL }}
-#      dest: "{{ base_dir }}/hosts"
+    shell: "{{ bin_dir }}/kubectl delete node {{ NODE_TO_DEL }}"
-#      state: absent
+    ignore_errors: true
 #      regexp: '^{{ NODE_TO_DEL }}$|^{{ NODE_TO_DEL }}[^0-9]'
 #    connection: local
--- a/tools/13.delmaster.yml
+++ b/tools/13.delmaster.yml
@ -1,35 +1,26 @@
-# 警告：此脚本将清理指定 kube-master 节点
+# WARNNING:  this playbook will clean the kube-master node {{ NODE_TO_DEL }}
-# 使用：`easzctl del-master 1.1.1.1` 
+# USAGE:     easzctl del-master 1.1.1.1
- hosts: "{{ NODE_TO_DEL }}" 
+- hosts: localhost
  tasks:
-  - name: fail info1
+  - fail: msg="you CAN NOT delete the last member of kube-master!"
    fail: msg="you CAN NOT delete the last member of kube-master!"
    when: "groups['kube-master']|length < 2 and NODE_TO_DEL in groups['kube-master']"
-  - name: 执行 kubectl drain 迁移 pod
+  - name: run kubectl drain @{{ NODE_TO_DEL }}
    shell: "{{ bin_dir }}/kubectl drain {{ NODE_TO_DEL }} --ignore-daemonsets --delete-local-data"
    connection: local
    ignore_errors: true
- hosts: "{{ NODE_TO_DEL }}" 
+  - name: clean node {{ NODE_TO_DEL }} if possible
-  vars:
+    shell: "ansible-playbook {{ base_dir }}/roles/clean/clean_node.yml \
-    DEL_MASTER: "yes"
+              -e NODE_TO_CLEAN={{ NODE_TO_DEL }} \
-    DEL_NODE: "yes"
+              -e DEL_MASTER=yes \
-    DEL_LB: "yes"
+              -e DEL_NODE=yes \
-    DEL_CHRONY: "yes"
+              -e DEL_LB=yes >> /tmp/ansible-playbook.log 2>&1 \
-    DEL_ENV: "yes"
+            || echo 'data not cleaned on {{ NODE_TO_DEL }}'"
-  roles:
+    register: CLEAN_STATUS
  - clean
  tasks:
  - name: 执行 kubectl delete node
    shell: "{{ bin_dir }}/kubectl delete node {{ NODE_TO_DEL }}"
    delegate_to: "{{ groups['kube-node'][0] }}"
-# 删除 ansible hosts 中节点配置
+  - debug: var="CLEAN_STATUS.stdout"
-#  - name: rm {{ NODE_TO_DEL }} in ansible hosts
+
-#    lineinfile:
+  - name: run kubectl delete node {{ NODE_TO_DEL }}
-#      dest: "{{ base_dir }}/hosts"
+    shell: "{{ bin_dir }}/kubectl delete node {{ NODE_TO_DEL }}"
-#      state: absent
+    ignore_errors: true
 #      regexp: '^{{ NODE_TO_DEL }}$|^{{ NODE_TO_DEL }}[^0-9]'
 #    connection: local
--- a/tools/easzctl
+++ b/tools/easzctl
@ -178,7 +178,7 @@ function del-master() {
    sed -i '/^\[kube-node/,/^\[harbor/{/^'"$1"'[^0-9]/d}' $BASEPATH/hosts
    # reconfig kubeconfig in ansible manage node
-    ansible-playbook $BASEPATH/01.prepare.yml -t create_kctl_cfg 
+    ansible-playbook $BASEPATH/roles/deploy/deploy.yml -t create_kctl_cfg
    # reconfigure and restart the haproxy service on 'kube-node' nodes
    ansible-playbook $BASEPATH/05.kube-node.yml -t restart_lb -e MASTER_CHG=yes || { echo "[ERROR] Failed to restart the haproxy service on 'kube-node' nodes!"; return 2; }