kubespray/roles/reset/tasks/main.yml

423 lines
11 KiB
YAML
Raw Normal View History

---
- name: Reset | stop services
service:
name: "{{ item }}"
state: stopped
with_items:
- kubelet.service
- cri-dockerd.service
- cri-dockerd.socket
- etcd.service
- etcd-events.service
failed_when: false
tags:
- services
- name: Reset | remove services
2016-12-09 23:33:10 +08:00
file:
path: "/etc/systemd/system/{{ item }}"
2016-12-09 23:33:10 +08:00
state: absent
with_items:
- kubelet.service
- cri-dockerd.service
- cri-dockerd.socket
- calico-node.service
- containerd.service.d/http-proxy.conf
- crio.service.d/http-proxy.conf
- k8s-certs-renew.service
- k8s-certs-renew.timer
- etcd.service
- etcd-events.service
register: services_removed
tags:
- services
- containerd
- crio
- name: Reset | Remove Docker
include_role:
name: container-engine/docker
tasks_from: reset
when: container_manager == 'docker'
tags:
- docker
- name: Reset | systemctl daemon-reload # noqa no-handler
systemd:
daemon_reload: true
when: services_removed.changed
- name: Reset | check if crictl is present
stat:
path: "{{ bin_dir }}/crictl"
get_attributes: no
get_checksum: no
get_mime: no
register: crictl
- name: Reset | stop all cri containers
shell: "set -o pipefail && {{ bin_dir }}/crictl ps -q | xargs -r {{ bin_dir }}/crictl -t 60s stop -t {{ cri_stop_containers_grace_period }}"
2020-08-28 16:20:53 +08:00
args:
executable: /bin/bash
register: remove_all_cri_containers
retries: 5
until: remove_all_cri_containers.rc == 0
delay: 5
tags:
- crio
- containerd
when:
- crictl.stat.exists
- container_manager in ["crio", "containerd"]
- ansible_facts.services['containerd.service'] is defined or ansible_facts.services['cri-o.service'] is defined
ignore_errors: true # noqa ignore-errors
- name: Reset | force remove all cri containers
command: "{{ bin_dir }}/crictl rm -a -f"
register: remove_all_cri_containers
retries: 5
until: remove_all_cri_containers.rc == 0
delay: 5
tags:
- crio
- containerd
when:
- crictl.stat.exists
- container_manager in ["crio", "containerd"]
- deploy_container_engine
- ansible_facts.services['containerd.service'] is defined or ansible_facts.services['cri-o.service'] is defined
ignore_errors: true # noqa ignore-errors
- name: Reset | stop and disable crio service
service:
name: crio
state: stopped
enabled: false
failed_when: false
tags: [ crio ]
when: container_manager == "crio"
- name: Reset | forcefully wipe CRI-O's container and image storage
command: "crio wipe -f"
failed_when: false
tags: [ crio ]
when: container_manager == "crio"
- name: Reset | stop all cri pods
shell: "set -o pipefail && {{ bin_dir }}/crictl pods -q | xargs -r {{ bin_dir }}/crictl -t 60s stopp"
2020-08-28 16:20:53 +08:00
args:
executable: /bin/bash
register: remove_all_cri_containers
retries: 5
until: remove_all_cri_containers.rc == 0
delay: 5
tags: [ containerd ]
when:
- crictl.stat.exists
- container_manager == "containerd"
- ansible_facts.services['containerd.service'] is defined or ansible_facts.services['cri-o.service'] is defined
ignore_errors: true # noqa ignore-errors
- name: Reset | force remove all cri pods
block:
- name: Reset | force remove all cri pods
command: "{{ bin_dir }}/crictl rmp -a -f"
register: remove_all_cri_containers
retries: 5
until: remove_all_cri_containers.rc == 0
delay: 5
tags: [ containerd ]
when:
- crictl.stat.exists
- container_manager == "containerd"
- ansible_facts.services['containerd.service'] is defined or ansible_facts.services['cri-o.service'] is defined
rescue:
- name: Reset | force remove all cri pods (rescue)
shell: "ip netns list | cut -d' ' -f 1 | xargs -n1 ip netns delete && {{ bin_dir }}/crictl rmp -a -f"
ignore_errors: true # noqa ignore-errors
changed_when: true
- name: Reset | remove containerd
when: container_manager == 'containerd'
block:
- name: Reset | stop containerd service
service:
name: containerd
state: stopped
failed_when: false
tags:
- services
- name: Reset | remove containerd service
file:
path: /etc/systemd/system/containerd.service
state: absent
register: services_removed
tags:
- services
- name: Reset | gather mounted kubelet dirs
2020-08-28 16:20:53 +08:00
shell: set -o pipefail && mount | grep /var/lib/kubelet/ | awk '{print $3}' | tac
args:
2020-08-28 16:20:53 +08:00
executable: /bin/bash
check_mode: no
register: mounted_dirs
failed_when: false
changed_when: false
tags:
- mounts
- name: Reset | unmount kubelet dirs
command: umount -f {{ item }}
with_items: "{{ mounted_dirs.stdout_lines }}"
2018-02-21 19:41:57 +08:00
register: umount_dir
when: mounted_dirs
2018-02-21 19:41:57 +08:00
retries: 4
until: umount_dir.rc == 0
delay: 5
tags:
- mounts
- name: Flush iptables
2017-03-21 17:13:54 +08:00
iptables:
table: "{{ item }}"
2017-03-21 17:13:54 +08:00
flush: yes
with_items:
- filter
- nat
- mangle
- raw
when: flush_iptables | bool
tags:
- iptables
2017-03-21 17:13:54 +08:00
- name: Flush ip6tables
iptables:
table: "{{ item }}"
flush: yes
ip_version: ipv6
with_items:
- filter
- nat
- mangle
- raw
when: flush_iptables | bool and enable_dual_stack_networks
tags:
- ip6tables
2020-07-28 16:39:08 +08:00
- name: Clear IPVS virtual server table
command: "ipvsadm -C"
ignore_errors: true # noqa ignore-errors
when:
- kube_proxy_mode == 'ipvs' and inventory_hostname in groups['k8s_cluster']
- name: Reset | check kube-ipvs0 network device
stat:
path: /sys/class/net/kube-ipvs0
get_attributes: no
get_checksum: no
get_mime: no
register: kube_ipvs0
- name: Reset | Remove kube-ipvs0
command: "ip link del kube-ipvs0"
when:
- kube_proxy_mode == 'ipvs'
- kube_ipvs0.stat.exists
- name: Reset | check nodelocaldns network device
stat:
path: /sys/class/net/nodelocaldns
get_attributes: no
get_checksum: no
get_mime: no
register: nodelocaldns_device
- name: Reset | Remove nodelocaldns
command: "ip link del nodelocaldns"
when:
- enable_nodelocaldns | default(false) | bool
- nodelocaldns_device.stat.exists
- name: Reset | Check whether /var/lib/kubelet directory exists
stat:
path: /var/lib/kubelet
get_attributes: no
get_checksum: no
get_mime: no
register: var_lib_kubelet_directory
- name: Reset | Find files/dirs with immutable flag in /var/lib/kubelet
command: lsattr -laR /var/lib/kubelet/
become: true
register: var_lib_kubelet_files_dirs_w_attrs
changed_when: false
no_log: true
when: var_lib_kubelet_directory.stat.exists
- name: Reset | Remove immutable flag from files/dirs in /var/lib/kubelet
file:
path: "{{ filedir_path }}"
state: touch
attributes: "-i"
Upgrade ansible (#10190) * project: update all dependencies including ansible Upgrade to ansible 7.x and ansible-core 2.14.x. There seems to be issue with ansible 8/ansible-core 2.15 so we remain on those versions for now. It's quite a big bump already anyway. Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * tests: install aws galaxy collection Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * ansible-lint: disable various rules after ansible upgrade Temporarily disable a bunch of linting action following ansible upgrade. Those should be taken care of separately. Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: resolve deprecated-module ansible-lint error Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: resolve no-free-form ansible-lint error Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: resolve schema[meta] ansible-lint error Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: resolve schema[playbook] ansible-lint error Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: resolve schema[tasks] ansible-lint error Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: resolve risky-file-permissions ansible-lint error Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: resolve risky-shell-pipe ansible-lint error Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: remove deprecated warn args Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: use fqcn for non builtin tasks Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: resolve syntax-check[missing-file] for contrib playbook Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: use arithmetic inside jinja to fix ansible 6 upgrade Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> --------- Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch>
2023-06-26 18:15:45 +08:00
mode: 0644
loop: "{{ var_lib_kubelet_files_dirs_w_attrs.stdout_lines | select('search', 'Immutable') | list }}"
loop_control:
loop_var: file_dir_line
label: "{{ filedir_path }}"
vars:
filedir_path: "{{ file_dir_line.split(' ')[0] }}"
when: var_lib_kubelet_directory.stat.exists
- name: Reset | delete some files and directories
file:
path: "{{ item }}"
state: absent
with_items:
- "{{ kube_config_dir }}"
- /var/lib/kubelet
- "{{ containerd_storage_dir }}"
- "{{ ansible_env.HOME | default('/root') }}/.kube"
- "{{ ansible_env.HOME | default('/root') }}/.helm"
- "{{ ansible_env.HOME | default('/root') }}/.config/helm"
- "{{ ansible_env.HOME | default('/root') }}/.cache/helm"
- "{{ ansible_env.HOME | default('/root') }}/.local/share/helm"
- "{{ etcd_data_dir }}"
- "{{ etcd_events_data_dir }}"
- "{{ etcd_config_dir }}"
2016-12-09 23:33:10 +08:00
- /var/log/calico
- /var/log/openvswitch
- /var/log/ovn
- /var/log/kube-ovn
2016-12-09 23:33:10 +08:00
- /etc/cni
- /etc/nerdctl
- "{{ nginx_config_dir }}"
2016-12-09 23:33:10 +08:00
- /etc/dnsmasq.d
- /etc/dnsmasq.conf
- /etc/dnsmasq.d-available
- /etc/systemd/resolved.conf.d/kubespray.conf
2016-12-09 23:33:10 +08:00
- /etc/etcd.env
- /etc/calico
- /etc/NetworkManager/conf.d/calico.conf
- /etc/NetworkManager/conf.d/k8s.conf
2017-06-29 11:44:52 +08:00
- /etc/weave.env
2016-12-09 23:33:10 +08:00
- /opt/cni
- /etc/dhcp/dhclient.d/zdnsupdate.sh
- /etc/dhcp/dhclient-exit-hooks.d/zdnsupdate
2017-06-29 14:45:15 +08:00
- /run/flannel
- /etc/flannel
- /run/kubernetes
- /usr/local/share/ca-certificates/etcd-ca.crt
- /usr/local/share/ca-certificates/kube-ca.crt
2017-06-29 14:45:15 +08:00
- /etc/ssl/certs/etcd-ca.pem
- /etc/ssl/certs/kube-ca.pem
- /etc/pki/ca-trust/source/anchors/etcd-ca.crt
- /etc/pki/ca-trust/source/anchors/kube-ca.crt
2017-06-29 14:45:15 +08:00
- /var/log/pods/
2017-06-29 11:44:52 +08:00
- "{{ bin_dir }}/kubelet"
- "{{ bin_dir }}/cri-dockerd"
2017-06-29 11:44:52 +08:00
- "{{ bin_dir }}/etcd-scripts"
- "{{ bin_dir }}/etcd"
- "{{ bin_dir }}/etcd-events"
2017-06-29 11:44:52 +08:00
- "{{ bin_dir }}/etcdctl"
- "{{ bin_dir }}/etcdctl.sh"
2017-06-29 11:44:52 +08:00
- "{{ bin_dir }}/kubernetes-scripts"
- "{{ bin_dir }}/kubectl"
- "{{ bin_dir }}/kubeadm"
2017-06-29 11:44:52 +08:00
- "{{ bin_dir }}/helm"
- "{{ bin_dir }}/calicoctl"
- "{{ bin_dir }}/calicoctl.sh"
- "{{ bin_dir }}/calico-upgrade"
2017-06-29 11:44:52 +08:00
- "{{ bin_dir }}/weave"
- "{{ bin_dir }}/crictl"
- "{{ bin_dir }}/nerdctl"
- "{{ bin_dir }}/netctl"
- "{{ bin_dir }}/k8s-certs-renew.sh"
2018-09-21 03:36:25 +08:00
- /var/lib/cni
2018-09-17 22:45:05 +08:00
- /etc/openvswitch
- /run/openvswitch
- /var/lib/kube-router
- /var/lib/calico
- /etc/cilium
- /run/calico
- /etc/bash_completion.d/kubectl.sh
- /etc/bash_completion.d/crictl
- /etc/bash_completion.d/nerdctl
- /etc/bash_completion.d/krew
2021-11-09 18:43:39 +08:00
- /etc/bash_completion.d/krew.sh
- "{{ krew_root_dir }}"
- /etc/modules-load.d/kube_proxy-ipvs.conf
- /etc/modules-load.d/kubespray-br_netfilter.conf
- /etc/modules-load.d/kubespray-kata-containers.conf
- /usr/libexec/kubernetes
2021-12-16 15:39:19 +08:00
- /etc/origin/openvswitch
- /etc/origin/ovn
- "{{ sysctl_file_path }}"
- /etc/crictl.yaml
ignore_errors: true # noqa ignore-errors
tags:
- files
- name: Reset | remove containerd binary files
file:
path: "{{ containerd_bin_dir }}/{{ item }}"
state: absent
with_items:
- containerd
- containerd-shim
- containerd-shim-runc-v1
- containerd-shim-runc-v2
- containerd-stress
- crictl
- critest
- ctd-decoder
- ctr
- runc
ignore_errors: true # noqa ignore-errors
when: container_manager == 'containerd'
tags:
- files
- name: Reset | remove dns settings from dhclient.conf
blockinfile:
path: "{{ item }}"
state: absent
marker: "# Ansible entries {mark}"
failed_when: false
with_items:
- /etc/dhclient.conf
- /etc/dhcp/dhclient.conf
tags:
- files
- dns
- name: Reset | remove host entries from /etc/hosts
blockinfile:
path: "/etc/hosts"
state: absent
marker: "# Ansible inventory hosts {mark}"
tags:
- files
- dns
- name: Reset | include file with reset tasks specific to the network_plugin if exists
include_role:
name: "network_plugin/{{ kube_network_plugin }}"
tasks_from: reset
when:
2020-11-30 22:48:50 +08:00
- kube_network_plugin in ['flannel', 'cilium', 'kube-router', 'calico']
tags:
- network
- name: Reset | Restart network
service:
name: "{{ reset_restart_network_service_name }}"
state: restarted
when:
- ansible_os_family not in ["Flatcar", "Flatcar Container Linux by Kinvolk"]
- reset_restart_network | bool
tags:
- services
- network