kubeasz/tools/clean_one_node.yml

215 lines
6.7 KiB
YAML
Raw Normal View History

# 警告此脚本将清理单个node节点使用请详细参阅 docs/op/clean_one_node.md
# 请三思后运行此脚本特别的如果有pod使用了本地存储类型请自行判断重要性
# 使用:
2019-02-19 16:38:10 +08:00
# 1. 执行 ansible-playbook /etc/ansible/tools/clean_one_node.yml
# 2. 按照提示输入待删除节点
- hosts: deploy
2019-02-19 16:38:10 +08:00
vars_prompt:
- name: "NODE_TO_DEL"
prompt: "which node is about to be deleted?(e.g 192.168.1.1)"
private: no
confirm: yes
tasks:
- name: fail info1
fail: msg="you CAN NOT delete the last member of etcd cluster!"
when: "groups['etcd']|length < 2 and NODE_TO_DEL in groups['etcd']"
- name: fail info2
fail: msg="you CAN NOT delete the last member of kube-master!"
when: "groups['kube-master']|length < 2 and NODE_TO_DEL in groups['kube-master']"
- name: 执行kubectl drain(节点可能是kube-node节点)
2019-02-19 16:38:10 +08:00
shell: "{{ bin_dir }}/kubectl drain {{ NODE_TO_DEL }} --ignore-daemonsets --delete-local-data"
ignore_errors: true
2019-02-19 16:38:10 +08:00
- block:
# 清理 kube-node 相关服务
- name: stop and disable kube-node service
service: name={{ item }} state=stopped enabled=no
with_items:
- kubelet
- kube-proxy
ignore_errors: true
- name: umount kubelet 挂载的目录
shell: "mount | grep '/var/lib/kubelet'| awk '{print $3}'|xargs umount"
args:
warn: false
ignore_errors: true
- name: 清理目录和文件
file: name={{ item }} state=absent
with_items:
- "/var/lib/kubelet/"
- "/var/lib/kube-proxy/"
- "/etc/systemd/system/kubelet.service"
- "/etc/systemd/system/kube-proxy.service"
- "/opt/kube/kube-system/"
# 清理 kube-master 相关
- name: stop and disable kube-master service
service: name={{ item }} state=stopped enabled=no
with_items:
- kube-apiserver
- kube-controller-manager
- kube-scheduler
ignore_errors: true
- name: 清理目录和文件
file: name={{ item }} state=absent
with_items:
- "/var/run/kubernetes"
- "/etc/systemd/system/kube-apiserver.service"
- "/etc/systemd/system/kube-controller-manager.service"
- "/etc/systemd/system/kube-scheduler.service"
# 清理集群docker服务、网络相关
- name: 清理kube-router相关
shell: "{{ bin_dir }}/docker run --privileged --net=host cloudnativelabs/kube-router --cleanup-config"
ignore_errors: true
when: "CLUSTER_NETWORK == 'kube-router'"
- name: stop and disable docker service
service:
name: docker
state: stopped
enabled: no
ignore_errors: true
# 因为calico-kube-controller使用了host网络相当于使用了docker -net=host需要
# 卸载 /var/run/docker/netns/default
- name: 卸载docker 相关fs1
mount: path=/var/run/docker/netns/default state=unmounted
- name: 卸载docker 相关fs2
mount: path=/var/lib/docker/overlay state=unmounted
- name: 清理目录和文件
file: name={{ item }} state=absent
with_items:
- "/etc/cni/"
- "/root/.kube/"
- "/run/flannel/"
- "/etc/calico/"
- "/var/run/calico/"
- "/var/lib/calico/"
- "/var/log/calico/"
- "/etc/cilium/"
- "/var/run/cilium/"
- "/sys/fs/bpf/tc/"
- "/var/lib/cni/"
- "/var/lib/docker/"
- "/var/lib/kube-router/"
- "/var/run/docker/"
- "/etc/systemd/system/calico-node.service"
- "/etc/systemd/system/docker.service"
- "/etc/systemd/system/docker.service.requires/"
- "/etc/systemd/system/docker.service.d/"
- "/opt/kube/kube-system/"
- "/etc/bash_completion.d/docker"
ignore_errors: true
- name: 清理 iptables
shell: "iptables -F && iptables -X \
&& iptables -F -t nat && iptables -X -t nat \
&& iptables -F -t raw && iptables -X -t raw \
&& iptables -F -t mangle && iptables -X -t mangle"
- name: 清理网络
shell: "ip link del docker0; \
ip link del tunl0; \
ip link del flannel.1; \
ip link del cni0; \
ip link del mynet0; \
ip link del kube-bridge; \
ip link del dummy0; \
ip link del kube-ipvs0; \
ip link del cilium_net; \
ip link del cilium_vxlan; \
systemctl restart networking; \
systemctl restart network"
ignore_errors: true
- name: 清理calico残留路由
shell: "for rt in `ip route|grep bird|sed 's/blackhole//'|awk '{print $1}'`;do ip route del $rt;done;"
when: "CLUSTER_NETWORK == 'calico'"
ignore_errors: true
# 清理etcd 集群相关
- name: stop and disable etcd service
service:
name: etcd
state: stopped
enabled: no
ignore_errors: true
- name: 清理目录和文件
file: name={{ item }} state=absent
with_items:
- "/var/lib/etcd"
- "/etc/etcd/"
- "/backup/k8s"
- "/etc/systemd/system/etcd.service"
# 清理负载均衡相关
- name: stop keepalived service
shell: systemctl disable keepalived && systemctl stop keepalived
ignore_errors: true
- name: stop haproxy service
shell: systemctl disable haproxy && systemctl stop haproxy
ignore_errors: true
- name: 清理LB 配置文件目录
file: name={{ item }} state=absent
with_items:
- "/etc/haproxy"
- "/etc/keepalived"
# 清理其他
- name: stop and disable chrony in Ubuntu
service: name=chrony state=stopped enabled=no
ignore_errors: true
tags: rm_ntp
when: ansible_distribution == "Ubuntu" or ansible_distribution == "Debian"
- name: stop and disable chronyd in CentOS/RedHat
service: name=chronyd state=stopped enabled=no
ignore_errors: true
tags: rm_ntp
when: ansible_distribution == "CentOS" or ansible_distribution == "RedHat"
- name: 清理证书目录和文件
file: name={{ item }} state=absent
with_items:
- "/etc/kubernetes/"
- "{{ ca_dir }}"
- "/root/.kube/"
- "/etc/docker/"
- name: 清理自动生成的PATH
lineinfile:
dest: ~/.bashrc
state: absent
regexp: '{{ item }}'
with_items:
- 'kubeasz'
- 'helm'
- 'kubectl completion'
delegate_to: "{{ NODE_TO_DEL }}"
run_once: true
# 执行kubectl delete(节点可能是kube-node节点)
- name: 执行kubectl delete(节点可能是kube-node节点)
shell: "{{ bin_dir }}/kubectl delete node {{ NODE_TO_DEL }}"
2018-08-28 21:53:43 +08:00
ignore_errors: true
2019-02-19 16:38:10 +08:00
# 删除 ansible hosts 中节点配置
- name: rm {{ NODE_TO_DEL }} in ansible hosts
lineinfile:
2019-02-19 16:38:10 +08:00
dest: "{{ base_dir }}/hosts"
state: absent
2019-02-19 16:38:10 +08:00
regexp: '{{ NODE_TO_DEL }}'
connection: local