From ab0163a3adce4feacb39d8b1619de478a67d9100 Mon Sep 17 00:00:00 2001 From: Maxime Leroy <19607336+maxime1907@users.noreply.github.com> Date: Tue, 23 Jan 2024 15:46:13 +0100 Subject: [PATCH] fix(kubernetes): taint nodes with kubectl (#10705) Signed-off-by: Maxime Leroy <19607336+maxime1907@users.noreply.github.com> --- docs/vars.md | 2 +- playbooks/cluster.yml | 1 + playbooks/scale.yml | 1 + playbooks/upgrade_cluster.yml | 2 ++ roles/kubernetes/node-taint/tasks/main.yml | 35 +++++++++++++++++++ .../node/templates/kubelet.env.v1beta1.j2 | 12 +------ 6 files changed, 41 insertions(+), 12 deletions(-) create mode 100644 roles/kubernetes/node-taint/tasks/main.yml diff --git a/docs/vars.md b/docs/vars.md index 959260e31..9c9f00399 100644 --- a/docs/vars.md +++ b/docs/vars.md @@ -245,7 +245,7 @@ node_labels: label2_name: label2_value ``` -* *node_taints* - Taints applied to nodes via kubelet --register-with-taints parameter. +* *node_taints* - Taints applied to nodes via `kubectl taint node`. For example, taints can be set in the inventory as variables or more widely in group_vars. *node_taints* has to be defined as a list of strings in format `key=value:effect`, e.g.: diff --git a/playbooks/cluster.yml b/playbooks/cluster.yml index a6fd770b9..c433a8c69 100644 --- a/playbooks/cluster.yml +++ b/playbooks/cluster.yml @@ -48,6 +48,7 @@ - { role: kubespray-defaults } - { role: kubernetes/kubeadm, tags: kubeadm} - { role: kubernetes/node-label, tags: node-label } + - { role: kubernetes/node-taint, tags: node-taint } - { role: network_plugin, tags: network } - { role: kubernetes-apps/kubelet-csr-approver, tags: kubelet-csr-approver } diff --git a/playbooks/scale.yml b/playbooks/scale.yml index b8f87f484..171e37832 100644 --- a/playbooks/scale.yml +++ b/playbooks/scale.yml @@ -91,6 +91,7 @@ - { role: kubespray-defaults } - { role: kubernetes/kubeadm, tags: kubeadm } - { role: kubernetes/node-label, tags: node-label } + - { role: kubernetes/node-taint, tags: node-taint } - { role: network_plugin, tags: network } - name: Apply resolv.conf changes now that cluster DNS is up diff --git a/playbooks/upgrade_cluster.yml b/playbooks/upgrade_cluster.yml index a79cf0aa7..3180fec93 100644 --- a/playbooks/upgrade_cluster.yml +++ b/playbooks/upgrade_cluster.yml @@ -55,6 +55,7 @@ - { role: kubernetes/control-plane, tags: master, upgrade_cluster_setup: true } - { role: kubernetes/client, tags: client } - { role: kubernetes/node-label, tags: node-label } + - { role: kubernetes/node-taint, tags: node-taint } - { role: kubernetes-apps/cluster_roles, tags: cluster-roles } - { role: kubernetes-apps, tags: csi-driver } - { role: upgrade/post-upgrade, tags: post-upgrade } @@ -87,6 +88,7 @@ - { role: kubernetes/node, tags: node } - { role: kubernetes/kubeadm, tags: kubeadm } - { role: kubernetes/node-label, tags: node-label } + - { role: kubernetes/node-taint, tags: node-taint } - { role: upgrade/post-upgrade, tags: post-upgrade } - name: Patch Kubernetes for Windows diff --git a/roles/kubernetes/node-taint/tasks/main.yml b/roles/kubernetes/node-taint/tasks/main.yml new file mode 100644 index 000000000..0766dc439 --- /dev/null +++ b/roles/kubernetes/node-taint/tasks/main.yml @@ -0,0 +1,35 @@ +--- +- name: Set role and inventory node taint to empty list + set_fact: + role_node_taints: [] + inventory_node_taints: [] + +- name: Node taint for nvidia GPU nodes + set_fact: + role_node_taints: "{{ role_node_taints + ['nvidia.com/gpu=:NoSchedule'] }}" + when: + - nvidia_gpu_nodes is defined + - nvidia_accelerator_enabled | bool + - inventory_hostname in nvidia_gpu_nodes + +- name: Populate inventory node taint + set_fact: + inventory_node_taints: "{{ inventory_node_taints + ['%s' | format(item)] }}" + loop: "{{ node_taints | d([]) }}" + when: + - node_taints is defined + - node_taints is not string + - node_taints is not mapping + - node_taints is iterable +- debug: # noqa name[missing] + var: role_node_taints +- debug: # noqa name[missing] + var: inventory_node_taints + +- name: Set taint to node + command: >- + {{ kubectl }} taint node {{ kube_override_hostname | default(inventory_hostname) }} {{ (role_node_taints + inventory_node_taints) | join(' ') }} --overwrite=true + delegate_to: "{{ groups['kube_control_plane'][0] }}" + changed_when: false + when: + - (role_node_taints + inventory_node_taints) | length > 0 diff --git a/roles/kubernetes/node/templates/kubelet.env.v1beta1.j2 b/roles/kubernetes/node/templates/kubelet.env.v1beta1.j2 index b8a22fd1e..a5aa369df 100644 --- a/roles/kubernetes/node/templates/kubelet.env.v1beta1.j2 +++ b/roles/kubernetes/node/templates/kubelet.env.v1beta1.j2 @@ -15,17 +15,7 @@ KUBELET_HOSTNAME="--hostname-override={{ kube_override_hostname }}" --runtime-cgroups={{ kubelet_runtime_cgroups }} \ {% endset %} -{# Kubelet node taints for gpu #} -{% if nvidia_gpu_nodes is defined and nvidia_accelerator_enabled|bool %} -{% if inventory_hostname in nvidia_gpu_nodes and node_taints is defined %} -{% set dummy = node_taints.append('nvidia.com/gpu=:NoSchedule') %} -{% elif inventory_hostname in nvidia_gpu_nodes and node_taints is not defined %} -{% set node_taints = [] %} -{% set dummy = node_taints.append('nvidia.com/gpu=:NoSchedule') %} -{% endif %} -{% endif %} - -KUBELET_ARGS="{{ kubelet_args_base }} {% if node_taints|default([]) %}--register-with-taints={{ node_taints | join(',') }} {% endif %} {% if kubelet_custom_flags is string %} {{kubelet_custom_flags}} {% else %}{% for flag in kubelet_custom_flags %} {{flag}} {% endfor %}{% endif %}{% if inventory_hostname in groups['kube_node'] %}{% if kubelet_node_custom_flags is string %} {{kubelet_node_custom_flags}} {% else %}{% for flag in kubelet_node_custom_flags %} {{flag}} {% endfor %}{% endif %}{% endif %}" +KUBELET_ARGS="{{ kubelet_args_base }} {% if kubelet_custom_flags is string %} {{kubelet_custom_flags}} {% else %}{% for flag in kubelet_custom_flags %} {{flag}} {% endfor %}{% endif %}{% if inventory_hostname in groups['kube_node'] %}{% if kubelet_node_custom_flags is string %} {{kubelet_node_custom_flags}} {% else %}{% for flag in kubelet_node_custom_flags %} {{flag}} {% endfor %}{% endif %}{% endif %}" {% if kubelet_flexvolumes_plugins_dir is defined %} KUBELET_VOLUME_PLUGIN="--volume-plugin-dir={{ kubelet_flexvolumes_plugins_dir }}" {% endif %}