From 682c8a59c23d6de88f6333696e54332b0692a1ad Mon Sep 17 00:00:00 2001 From: Cristian Calin <6627509+cristicalin@users.noreply.github.com> Date: Fri, 10 Dec 2021 00:09:06 +0200 Subject: [PATCH] containerd: change default resolvconf_mode to host_resolvconf (#8247) * containerd: change default resolvconf_mode to host_resolvconf * Wait for kube-apiserver to come back after pod refresh * Handle resolv.conf gracefully * Retain currently configured DNS entries to ensure we don't break the resolvers * Suse uses wickedd for network management so no dhcp hooks * Molecule: increase ansible timeout * CI: Increase ansible timeout to 120s for Packet jobs --- .gitlab-ci/packet.yml | 1 + .../group_vars/k8s_cluster/k8s-cluster.yml | 2 +- roles/adduser/molecule/default/molecule.yml | 4 ++ .../molecule/default/molecule.yml | 4 ++ .../molecule/default/molecule.yml | 4 ++ .../containerd/molecule/default/molecule.yml | 1 + .../cri-o/molecule/default/molecule.yml | 1 + .../docker/molecule/default/molecule.yml | 1 + .../gvisor/molecule/default/molecule.yml | 1 + .../molecule/default/molecule.yml | 1 + roles/kubernetes/preinstall/handlers/main.yml | 16 ++++++++ .../preinstall/tasks/0040-set_facts.yml | 37 ++++++++++++++++++- .../preinstall/tasks/0060-resolvconf.yml | 7 ++-- .../preinstall/tasks/0100-dhclient-hooks.yml | 2 +- roles/kubespray-defaults/defaults/main.yaml | 2 +- ...packet_centos7-docker-weave-upgrade-ha.yml | 1 + tests/files/packet_centos8-docker.yml | 1 + tests/files/packet_debian10-docker.yml | 1 + tests/files/packet_debian11-docker.yml | 1 + tests/files/packet_fedora34-docker-weave.yml | 1 + .../packet_ubuntu16-docker-weave-sep.yml | 1 + tests/files/packet_ubuntu18-docker.yml | 1 + tests/files/packet_ubuntu20-docker.yml | 1 + 23 files changed, 83 insertions(+), 9 deletions(-) diff --git a/.gitlab-ci/packet.yml b/.gitlab-ci/packet.yml index de02cd989..41a2a4fe4 100644 --- a/.gitlab-ci/packet.yml +++ b/.gitlab-ci/packet.yml @@ -2,6 +2,7 @@ .packet: extends: .testcases variables: + ANSIBLE_TIMEOUT: "120" CI_PLATFORM: packet SSH_USER: kubespray tags: diff --git a/inventory/sample/group_vars/k8s_cluster/k8s-cluster.yml b/inventory/sample/group_vars/k8s_cluster/k8s-cluster.yml index 062059206..90b47b86c 100644 --- a/inventory/sample/group_vars/k8s_cluster/k8s-cluster.yml +++ b/inventory/sample/group_vars/k8s_cluster/k8s-cluster.yml @@ -192,7 +192,7 @@ coredns_k8s_external_zone: k8s_external.local enable_coredns_k8s_endpoint_pod_names: false # Can be docker_dns, host_resolvconf or none -resolvconf_mode: docker_dns +resolvconf_mode: host_resolvconf # Deploy netchecker app to verify DNS resolve as an HTTP service deploy_netchecker: false # Ip address of the kubernetes skydns service diff --git a/roles/adduser/molecule/default/molecule.yml b/roles/adduser/molecule/default/molecule.yml index 4bb5dce30..80ebdad72 100644 --- a/roles/adduser/molecule/default/molecule.yml +++ b/roles/adduser/molecule/default/molecule.yml @@ -15,6 +15,10 @@ platforms: memory: 512 provisioner: name: ansible + config_options: + defaults: + callback_whitelist: profile_tasks + timeout: 120 lint: name: ansible-lint verifier: diff --git a/roles/bastion-ssh-config/molecule/default/molecule.yml b/roles/bastion-ssh-config/molecule/default/molecule.yml index 1d84db76c..c0c29ae92 100644 --- a/roles/bastion-ssh-config/molecule/default/molecule.yml +++ b/roles/bastion-ssh-config/molecule/default/molecule.yml @@ -15,6 +15,10 @@ platforms: memory: 512 provisioner: name: ansible + config_options: + defaults: + callback_whitelist: profile_tasks + timeout: 120 lint: name: ansible-lint inventory: diff --git a/roles/bootstrap-os/molecule/default/molecule.yml b/roles/bootstrap-os/molecule/default/molecule.yml index 081d929e8..0bb61eff6 100644 --- a/roles/bootstrap-os/molecule/default/molecule.yml +++ b/roles/bootstrap-os/molecule/default/molecule.yml @@ -35,6 +35,10 @@ platforms: memory: 512 provisioner: name: ansible + config_options: + defaults: + callback_whitelist: profile_tasks + timeout: 120 lint: name: ansible-lint inventory: diff --git a/roles/container-engine/containerd/molecule/default/molecule.yml b/roles/container-engine/containerd/molecule/default/molecule.yml index fb2cb9f9d..ebe3595b6 100644 --- a/roles/container-engine/containerd/molecule/default/molecule.yml +++ b/roles/container-engine/containerd/molecule/default/molecule.yml @@ -46,6 +46,7 @@ provisioner: config_options: defaults: callback_whitelist: profile_tasks + timeout: 120 lint: name: ansible-lint options: diff --git a/roles/container-engine/cri-o/molecule/default/molecule.yml b/roles/container-engine/cri-o/molecule/default/molecule.yml index 2ca990c1d..56e6abd61 100644 --- a/roles/container-engine/cri-o/molecule/default/molecule.yml +++ b/roles/container-engine/cri-o/molecule/default/molecule.yml @@ -38,6 +38,7 @@ provisioner: config_options: defaults: callback_whitelist: profile_tasks + timeout: 120 lint: name: ansible-lint options: diff --git a/roles/container-engine/docker/molecule/default/molecule.yml b/roles/container-engine/docker/molecule/default/molecule.yml index eaf6fae50..c30366215 100644 --- a/roles/container-engine/docker/molecule/default/molecule.yml +++ b/roles/container-engine/docker/molecule/default/molecule.yml @@ -18,6 +18,7 @@ provisioner: config_options: defaults: callback_whitelist: profile_tasks + timeout: 120 lint: name: ansible-lint options: diff --git a/roles/container-engine/gvisor/molecule/default/molecule.yml b/roles/container-engine/gvisor/molecule/default/molecule.yml index fc4ec0276..657dc2862 100644 --- a/roles/container-engine/gvisor/molecule/default/molecule.yml +++ b/roles/container-engine/gvisor/molecule/default/molecule.yml @@ -30,6 +30,7 @@ provisioner: config_options: defaults: callback_whitelist: profile_tasks + timeout: 120 lint: name: ansible-lint options: diff --git a/roles/container-engine/kata-containers/molecule/default/molecule.yml b/roles/container-engine/kata-containers/molecule/default/molecule.yml index 164a47083..bb9812054 100644 --- a/roles/container-engine/kata-containers/molecule/default/molecule.yml +++ b/roles/container-engine/kata-containers/molecule/default/molecule.yml @@ -30,6 +30,7 @@ provisioner: config_options: defaults: callback_whitelist: profile_tasks + timeout: 120 lint: name: ansible-lint options: diff --git a/roles/kubernetes/preinstall/handlers/main.yml b/roles/kubernetes/preinstall/handlers/main.yml index 54a5f6a67..667465b6f 100644 --- a/roles/kubernetes/preinstall/handlers/main.yml +++ b/roles/kubernetes/preinstall/handlers/main.yml @@ -9,6 +9,7 @@ - Preinstall | restart kube-controller-manager crio/containerd - Preinstall | restart kube-apiserver docker - Preinstall | restart kube-apiserver crio/containerd + - Preinstall | wait for the apiserver to be running when: not ansible_os_family in ["Flatcar", "Flatcar Container Linux by Kinvolk"] and not is_fedora_coreos - name: Preinstall | update resolvconf for Flatcar Container Linux by Kinvolk @@ -101,6 +102,21 @@ - dns_mode != 'none' - resolvconf_mode == 'host_resolvconf' +# When running this as the last phase ensure we wait for kube-apiserver to come up +- name: Preinstall | wait for the apiserver to be running + uri: + url: "{{ kube_apiserver_endpoint }}/healthz" + validate_certs: no + register: result + until: result.status == 200 + retries: 60 + delay: 1 + when: + - dns_late + - inventory_hostname in groups['kube_control_plane'] + - dns_mode != 'none' + - resolvconf_mode == 'host_resolvconf' + - name: Preinstall | Restart systemd-resolved service: name: systemd-resolved diff --git a/roles/kubernetes/preinstall/tasks/0040-set_facts.yml b/roles/kubernetes/preinstall/tasks/0040-set_facts.yml index cf6612b00..1cfd47777 100644 --- a/roles/kubernetes/preinstall/tasks/0040-set_facts.yml +++ b/roles/kubernetes/preinstall/tasks/0040-set_facts.yml @@ -34,6 +34,39 @@ changed_when: false check_mode: no +- name: check existence of /etc/resolvconf/resolv.conf.d + stat: + path: /etc/resolvconf/resolv.conf.d + get_attributes: no + get_checksum: no + get_mime: no + failed_when: false + register: resolvconfd_path + +- name: check status of /etc/resolv.conf + stat: + path: /etc/resolv.conf + follow: no + get_attributes: no + get_checksum: no + get_mime: no + failed_when: false + register: resolvconf_stat + +- block: + + - name: get content of /etc/resolv.conf + slurp: + src: /etc/resolv.conf + register: resolvconf_slurp + + - name: get currently configured nameservers + set_fact: + configured_nameservers: "{{ resolvconf_slurp.content | b64decode | regex_findall('\\s*nameserver\\s*(.*)') | ipaddr }}" + when: resolvconf_slurp.content is defined + + when: resolvconf_stat.stat.exists is defined and resolvconf_stat.stat.exists + - name: check systemd-resolved # noqa 303 Should we use service_facts for this? command: systemctl is-active systemd-resolved @@ -45,7 +78,7 @@ - name: set dns facts set_fact: resolvconf: >- - {%- if resolvconf.rc == 0 -%}true{%- else -%}false{%- endif -%} + {%- if resolvconf.rc == 0 and resolvconfd_path.stat.isdir is defined and resolvconfd_path.stat.isdir -%}true{%- else -%}false{%- endif -%} bogus_domains: |- {% for d in [ 'default.svc.' + dns_domain, 'svc.' + dns_domain ] + searchdomains|default([]) -%} {{ dns_domain }}.{{ d }}./{{ d }}.{{ d }}./com.{{ d }}./ @@ -147,7 +180,7 @@ - name: generate nameservers to resolvconf set_fact: nameserverentries: - nameserver {{ ( ( [nodelocaldns_ip] if enable_nodelocaldns else []) + coredns_server|d([]) + nameservers|d([]) + cloud_resolver|d([])) | unique | join(',nameserver ') }} + nameserver {{ ( ( [nodelocaldns_ip] if enable_nodelocaldns else []) + coredns_server|d([]) + nameservers|d([]) + cloud_resolver|d([]) + configured_nameservers|d([])) | unique | join(',nameserver ') }} supersede_nameserver: supersede domain-name-servers {{ ( coredns_server|d([]) + nameservers|d([]) + cloud_resolver|d([])) | unique | join(', ') }}; diff --git a/roles/kubernetes/preinstall/tasks/0060-resolvconf.yml b/roles/kubernetes/preinstall/tasks/0060-resolvconf.yml index a34d031b1..65b55d7fb 100644 --- a/roles/kubernetes/preinstall/tasks/0060-resolvconf.yml +++ b/roles/kubernetes/preinstall/tasks/0060-resolvconf.yml @@ -16,7 +16,7 @@ state: present insertbefore: BOF create: yes - backup: yes + backup: "{{ not resolvconf_stat.stat.islnk }}" marker: "# Ansible entries {mark}" mode: 0644 notify: Preinstall | propagate resolvconf to k8s components @@ -25,7 +25,7 @@ replace: path: "{{ item[0] }}" regexp: '^{{ item[1] }}[^#]*(?=# Ansible entries BEGIN)' - backup: yes + backup: "{{ not resolvconf_stat.stat.islnk }}" with_nested: - "{{ [resolvconffile, base|default(''), head|default('')] | difference(['']) }}" - [ 'search ', 'nameserver ', 'domain ', 'options ' ] @@ -36,13 +36,12 @@ path: "{{ item[0] }}" regexp: '(# Ansible entries END\n(?:(?!^{{ item[1] }}).*\n)*)(?:^{{ item[1] }}.*\n?)+' replace: '\1' - backup: yes + backup: "{{ not resolvconf_stat.stat.islnk }}" with_nested: - "{{ [resolvconffile, base|default(''), head|default('')] | difference(['']) }}" - [ 'search ', 'nameserver ', 'domain ', 'options ' ] notify: Preinstall | propagate resolvconf to k8s components - - name: get temporary resolveconf cloud init file content command: cat {{ resolvconffile }} register: cloud_config diff --git a/roles/kubernetes/preinstall/tasks/0100-dhclient-hooks.yml b/roles/kubernetes/preinstall/tasks/0100-dhclient-hooks.yml index 28aed0740..f240d0fdf 100644 --- a/roles/kubernetes/preinstall/tasks/0100-dhclient-hooks.yml +++ b/roles/kubernetes/preinstall/tasks/0100-dhclient-hooks.yml @@ -22,7 +22,7 @@ owner: root mode: 0755 notify: Preinstall | propagate resolvconf to k8s components - when: ansible_os_family != "RedHat" + when: ansible_os_family not in [ "RedHat", "Suse" ] - name: Configure dhclient hooks for resolv.conf (RH-only) template: diff --git a/roles/kubespray-defaults/defaults/main.yaml b/roles/kubespray-defaults/defaults/main.yaml index 4d88c2889..12a28b9af 100644 --- a/roles/kubespray-defaults/defaults/main.yaml +++ b/roles/kubespray-defaults/defaults/main.yaml @@ -106,7 +106,7 @@ nodelocaldns_secondary_skew_seconds: 5 manual_dns_server: "" # Can be docker_dns, host_resolvconf or none -resolvconf_mode: docker_dns +resolvconf_mode: host_resolvconf # Deploy netchecker app to verify DNS resolve as an HTTP service deploy_netchecker: false # Ip address of the kubernetes DNS service (called skydns for historical reasons) diff --git a/tests/files/packet_centos7-docker-weave-upgrade-ha.yml b/tests/files/packet_centos7-docker-weave-upgrade-ha.yml index 92d39306b..265c97dde 100644 --- a/tests/files/packet_centos7-docker-weave-upgrade-ha.yml +++ b/tests/files/packet_centos7-docker-weave-upgrade-ha.yml @@ -10,6 +10,7 @@ kubernetes_audit: true # Docker specific settings: container_manager: docker etcd_deployment_type: docker +resolvconf_mode: docker_dns # Needed to upgrade from 1.16 to 1.17, otherwise upgrade is partial and bug followed upgrade_cluster_setup: true diff --git a/tests/files/packet_centos8-docker.yml b/tests/files/packet_centos8-docker.yml index fb7e9ba49..3d53119cd 100644 --- a/tests/files/packet_centos8-docker.yml +++ b/tests/files/packet_centos8-docker.yml @@ -10,3 +10,4 @@ calico_iptables_backend: "Auto" # Use docker container_manager: docker etcd_deployment_type: docker +resolvconf_mode: docker_dns diff --git a/tests/files/packet_debian10-docker.yml b/tests/files/packet_debian10-docker.yml index a59371968..fc55e7f7f 100644 --- a/tests/files/packet_debian10-docker.yml +++ b/tests/files/packet_debian10-docker.yml @@ -6,3 +6,4 @@ mode: default # Use docker container_manager: docker etcd_deployment_type: docker +resolvconf_mode: docker_dns diff --git a/tests/files/packet_debian11-docker.yml b/tests/files/packet_debian11-docker.yml index 3b93dd06e..69ec8eb25 100644 --- a/tests/files/packet_debian11-docker.yml +++ b/tests/files/packet_debian11-docker.yml @@ -6,3 +6,4 @@ mode: default # Use docker container_manager: docker etcd_deployment_type: docker +resolvconf_mode: docker_dns diff --git a/tests/files/packet_fedora34-docker-weave.yml b/tests/files/packet_fedora34-docker-weave.yml index f9153538f..2fdef725d 100644 --- a/tests/files/packet_fedora34-docker-weave.yml +++ b/tests/files/packet_fedora34-docker-weave.yml @@ -9,3 +9,4 @@ kube_network_plugin: weave # Docker specific settings: container_manager: docker etcd_deployment_type: docker +resolvconf_mode: docker_dns diff --git a/tests/files/packet_ubuntu16-docker-weave-sep.yml b/tests/files/packet_ubuntu16-docker-weave-sep.yml index c49c6307e..9b268e77e 100644 --- a/tests/files/packet_ubuntu16-docker-weave-sep.yml +++ b/tests/files/packet_ubuntu16-docker-weave-sep.yml @@ -10,6 +10,7 @@ auto_renew_certificates: true # Docker specific settings: container_manager: docker etcd_deployment_type: docker +resolvconf_mode: docker_dns # Ubuntu 16 - docker containerd package available stopped at 1.4.6 docker_containerd_version: latest diff --git a/tests/files/packet_ubuntu18-docker.yml b/tests/files/packet_ubuntu18-docker.yml index 74f5fa34d..548ff371e 100644 --- a/tests/files/packet_ubuntu18-docker.yml +++ b/tests/files/packet_ubuntu18-docker.yml @@ -7,3 +7,4 @@ vm_memory: 1600Mi # Use docker container_manager: docker etcd_deployment_type: docker +resolvconf_mode: docker_dns diff --git a/tests/files/packet_ubuntu20-docker.yml b/tests/files/packet_ubuntu20-docker.yml index ca7c3c7e7..4089a6605 100644 --- a/tests/files/packet_ubuntu20-docker.yml +++ b/tests/files/packet_ubuntu20-docker.yml @@ -14,3 +14,4 @@ enable_nodelocaldns: False # Use docker container_manager: docker etcd_deployment_type: docker +resolvconf_mode: docker_dns