调整修复删除master/node/etcd节点的脚本

pull/646/head
gjmzj 2019-07-24 10:19:31 +08:00
parent 0d44da6c4f
commit 2830a3587b
6 changed files with 58 additions and 72 deletions

View File

@ -0,0 +1,3 @@
- hosts: "{{ NODE_TO_CLEAN }}"
roles:
- clean

View File

@ -0,0 +1,4 @@
# to create CA, kubeconfig, kube-proxy.kubeconfig etc.
- hosts: localhost
roles:
- deploy

View File

@ -1,5 +1,5 @@
# 警告:此脚本将清理指定 etcd 节点
# 使用:`easzctl del-etcd 1.1.1.1`
# WARNNING: this playbook will clean the etcd {{ NODE_TO_DEL }}
# USAGE: easzctl del-etcd 1.1.1.1
- hosts: localhost
vars_prompt:
@ -8,16 +8,15 @@
private: no
confirm: yes
tasks:
- name: fail info1
fail: msg="{{ ETCD_TO_DEL }} is NOT a member of etcd cluster!"
# step0: run prechecks
- fail: msg="{{ ETCD_TO_DEL }} is NOT a member of etcd cluster!"
when: "ETCD_TO_DEL not in groups['etcd']"
- name: fail info2
fail: msg="you CAN NOT delete the last member of etcd cluster!"
- fail: msg="you CAN NOT delete the last member of etcd cluster!"
when: "groups['etcd']|length < 2"
- block:
# 以下几个tasks是为寻找etcd集群中第一个健康节点
# step1: find a healthy member in the etcd cluster
- name: set NODE_IPS of the etcd cluster
set_fact: NODE_IPS="{% for host in groups['etcd'] %}{{ host }} {% endfor %}"
@ -40,7 +39,7 @@
- debug: var="RUNNING_NODE.stdout"
# 在etcd健康节点上才能进行操作
# step2: remove jobs run on the healthy member if needed
- name: get ID of etcd node to delete
shell: "ETCDCTL_API=3 {{ bin_dir }}/etcdctl member list|grep {{ ETCD_TO_DEL }}:2380|cut -d',' -f1"
register: ETCD_ID
@ -56,16 +55,16 @@
delegate_to: "{{ RUNNING_NODE.stdout }}"
when: "ETCD_ID.stdout != ''"
#- name: remove data of the deleted etcd node if possible
# file: name=/var/lib/etcd state=absent
# delegate_to: "{{ ETCD_TO_DEL }}"
# ignore_errors: true
- name: clean etcd {{ NODE_TO_DEL }} if possible
shell: "ansible-playbook {{ base_dir }}/roles/clean/clean_node.yml \
-e NODE_TO_CLEAN={{ NODE_TO_DEL }} \
-e DEL_ETCD=yes >> /tmp/ansible-playbook.log 2>&1 \
|| echo 'data not cleaned on {{ ETCD_TO_DEL }}'"
register: CLEAN_STATUS
- name: remove data of the deleted etcd node if possible
shell: "ssh {{ ETCD_TO_DEL }} rm -rf /var/lib/etcd"
ignore_errors: true
- debug: var="CLEAN_STATUS.stdout"
# 在[etcd]组内删除node节点lineinfile模块不好用只能用sed
# lineinfile is inadequate to delete lines between some specific line range
- name: remove the etcd's node entry in hosts
shell: 'sed -i "/^\[etcd/,/^\[kube-master/ {/^{{ ETCD_TO_DEL }}[^0-9]/d}" {{ base_dir }}/hosts'
args:
@ -73,6 +72,4 @@
- name: reconfig and restart the etcd cluster
shell: "ansible-playbook {{ base_dir }}/02.etcd.yml > /tmp/ansible-playbook.log 2>&1"
# 满足条件才进行删除
when: "groups['etcd']|length > 1 and ETCD_TO_DEL in groups['etcd']"

View File

@ -1,34 +1,25 @@
# 警告:此脚本将清理指定 kube-node 节点
# 使用:`easzctl del-node 1.1.1.1`
# WARNNING: this playbook will clean the node {{ NODE_TO_DEL }}
# USAGE: easzctl del-node 1.1.1.1
- hosts: "{{ NODE_TO_DEL }}"
- hosts: localhost
tasks:
- name: fail info1
fail: msg="you CAN NOT delete the last member of kube-master!"
- fail: msg="you CAN NOT delete the last member of kube-master!"
when: "groups['kube-master']|length < 2 and NODE_TO_DEL in groups['kube-master']"
- name: 执行 kubectl drain 迁移 pod
- name: run kubectl drain @{{ NODE_TO_DEL }}
shell: "{{ bin_dir }}/kubectl drain {{ NODE_TO_DEL }} --ignore-daemonsets --delete-local-data"
connection: local
ignore_errors: true
- hosts: "{{ NODE_TO_DEL }}"
vars:
DEL_NODE: "yes"
DEL_LB: "yes"
DEL_CHRONY: "yes"
DEL_ENV: "yes"
roles:
- clean
tasks:
- name: 执行 kubectl delete node
shell: "{{ bin_dir }}/kubectl delete node {{ NODE_TO_DEL }}"
connection: local
- name: clean node {{ NODE_TO_DEL }} if possible
shell: "ansible-playbook {{ base_dir }}/roles/clean/clean_node.yml \
-e NODE_TO_CLEAN={{ NODE_TO_DEL }} \
-e DEL_NODE=yes \
-e DEL_LB=yes >> /tmp/ansible-playbook.log 2>&1 \
|| echo 'data not cleaned on {{ NODE_TO_DEL }}'"
register: CLEAN_STATUS
# 删除 ansible hosts 中节点配置
# - name: rm {{ NODE_TO_DEL }} in ansible hosts
# lineinfile:
# dest: "{{ base_dir }}/hosts"
# state: absent
# regexp: '^{{ NODE_TO_DEL }}$|^{{ NODE_TO_DEL }}[^0-9]'
# connection: local
- debug: var="CLEAN_STATUS.stdout"
- name: run kubectl delete node {{ NODE_TO_DEL }}
shell: "{{ bin_dir }}/kubectl delete node {{ NODE_TO_DEL }}"
ignore_errors: true

View File

@ -1,35 +1,26 @@
# 警告:此脚本将清理指定 kube-master 节点
# 使用:`easzctl del-master 1.1.1.1`
# WARNNING: this playbook will clean the kube-master node {{ NODE_TO_DEL }}
# USAGE: easzctl del-master 1.1.1.1
- hosts: "{{ NODE_TO_DEL }}"
- hosts: localhost
tasks:
- name: fail info1
fail: msg="you CAN NOT delete the last member of kube-master!"
- fail: msg="you CAN NOT delete the last member of kube-master!"
when: "groups['kube-master']|length < 2 and NODE_TO_DEL in groups['kube-master']"
- name: 执行 kubectl drain 迁移 pod
- name: run kubectl drain @{{ NODE_TO_DEL }}
shell: "{{ bin_dir }}/kubectl drain {{ NODE_TO_DEL }} --ignore-daemonsets --delete-local-data"
connection: local
ignore_errors: true
- hosts: "{{ NODE_TO_DEL }}"
vars:
DEL_MASTER: "yes"
DEL_NODE: "yes"
DEL_LB: "yes"
DEL_CHRONY: "yes"
DEL_ENV: "yes"
roles:
- clean
tasks:
- name: 执行 kubectl delete node
shell: "{{ bin_dir }}/kubectl delete node {{ NODE_TO_DEL }}"
delegate_to: "{{ groups['kube-node'][0] }}"
- name: clean node {{ NODE_TO_DEL }} if possible
shell: "ansible-playbook {{ base_dir }}/roles/clean/clean_node.yml \
-e NODE_TO_CLEAN={{ NODE_TO_DEL }} \
-e DEL_MASTER=yes \
-e DEL_NODE=yes \
-e DEL_LB=yes >> /tmp/ansible-playbook.log 2>&1 \
|| echo 'data not cleaned on {{ NODE_TO_DEL }}'"
register: CLEAN_STATUS
# 删除 ansible hosts 中节点配置
# - name: rm {{ NODE_TO_DEL }} in ansible hosts
# lineinfile:
# dest: "{{ base_dir }}/hosts"
# state: absent
# regexp: '^{{ NODE_TO_DEL }}$|^{{ NODE_TO_DEL }}[^0-9]'
# connection: local
- debug: var="CLEAN_STATUS.stdout"
- name: run kubectl delete node {{ NODE_TO_DEL }}
shell: "{{ bin_dir }}/kubectl delete node {{ NODE_TO_DEL }}"
ignore_errors: true

View File

@ -178,7 +178,7 @@ function del-master() {
sed -i '/^\[kube-node/,/^\[harbor/{/^'"$1"'[^0-9]/d}' $BASEPATH/hosts
# reconfig kubeconfig in ansible manage node
ansible-playbook $BASEPATH/01.prepare.yml -t create_kctl_cfg
ansible-playbook $BASEPATH/roles/deploy/deploy.yml -t create_kctl_cfg
# reconfigure and restart the haproxy service on 'kube-node' nodes
ansible-playbook $BASEPATH/05.kube-node.yml -t restart_lb -e MASTER_CHG=yes || { echo "[ERROR] Failed to restart the haproxy service on 'kube-node' nodes!"; return 2; }