diff --git a/roles/container-engine/containerd/templates/containerd.service.j2 b/roles/container-engine/containerd/templates/containerd.service.j2 index 06b229084..cfd51f0a8 100644 --- a/roles/container-engine/containerd/templates/containerd.service.j2 +++ b/roles/container-engine/containerd/templates/containerd.service.j2 @@ -36,10 +36,8 @@ LimitMEMLOCK={{ containerd_limit_mem_lock }} # Only systemd 226 and above support this version. TasksMax=infinity OOMScoreAdjust=-999 -# Set the cgroup slice of the service so that kube reserved takes effect -{% if kube_reserved is defined and kube_reserved|bool %} -Slice={{ kube_reserved_cgroups_for_service_slice }} -{% endif %} +# Set the cgroup slice of the service to optionally enforce resource limitations +Slice={{ kube_slice }} [Install] WantedBy=multi-user.target diff --git a/roles/container-engine/cri-dockerd/templates/cri-dockerd.service.j2 b/roles/container-engine/cri-dockerd/templates/cri-dockerd.service.j2 index ec128150f..468b94893 100644 --- a/roles/container-engine/cri-dockerd/templates/cri-dockerd.service.j2 +++ b/roles/container-engine/cri-dockerd/templates/cri-dockerd.service.j2 @@ -35,10 +35,8 @@ LimitCORE=infinity TasksMax=infinity Delegate=yes KillMode=process -# Set the cgroup slice of the service so that kube reserved takes effect -{% if kube_reserved is defined and kube_reserved|bool %} -Slice={{ kube_reserved_cgroups_for_service_slice }} -{% endif %} +# Set the cgroup slice of the service to optionally enforce resource limitations +Slice={{ kube_slice }} [Install] WantedBy=multi-user.target diff --git a/roles/container-engine/cri-o/tasks/main.yaml b/roles/container-engine/cri-o/tasks/main.yaml index bde2e0756..c12c2fbc8 100644 --- a/roles/container-engine/cri-o/tasks/main.yaml +++ b/roles/container-engine/cri-o/tasks/main.yaml @@ -90,7 +90,7 @@ remote_src: true notify: Restart crio -- name: Cri-o | configure crio to use kube reserved cgroups +- name: Cri-o | configure crio to run in the kube slice ansible.builtin.copy: dest: /etc/systemd/system/crio.service.d/00-slice.conf owner: root @@ -98,11 +98,8 @@ mode: '0644' content: | [Service] - Slice={{ kube_reserved_cgroups_for_service_slice }} + Slice={{ kube_slice }} notify: Restart crio - when: - - kube_reserved is defined and kube_reserved is true - - kube_reserved_cgroups_for_service_slice is defined - name: Cri-o | update the bin dir for crio.service file replace: diff --git a/roles/container-engine/cri-o/templates/crio.conf.j2 b/roles/container-engine/cri-o/templates/crio.conf.j2 index 6f9b84f14..5dd76ca86 100644 --- a/roles/container-engine/cri-o/templates/crio.conf.j2 +++ b/roles/container-engine/cri-o/templates/crio.conf.j2 @@ -114,11 +114,7 @@ conmon = "{{ crio_conmon }}" {% if crio_cgroup_manager == "cgroupfs" %} conmon_cgroup = "pod" {% else %} -{% if kube_reserved is defined and kube_reserved|bool %} -conmon_cgroup = "{{ kube_reserved_cgroups_for_service_slice }}" -{% else %} -conmon_cgroup = "system.slice" -{% endif %} +conmon_cgroup = "{{ kube_slice }}" {% endif %} # Environment variable list for the conmon process, used for passing necessary diff --git a/roles/container-engine/docker/templates/docker.service.j2 b/roles/container-engine/docker/templates/docker.service.j2 index bc220197d..295a773bd 100644 --- a/roles/container-engine/docker/templates/docker.service.j2 +++ b/roles/container-engine/docker/templates/docker.service.j2 @@ -32,10 +32,8 @@ TimeoutStartSec=1min Restart=on-failure StartLimitBurst=3 StartLimitInterval=60s -# Set the cgroup slice of the service so that kube reserved takes effect -{% if kube_reserved is defined and kube_reserved|bool %} -Slice={{ kube_reserved_cgroups_for_service_slice }} -{% endif %} +# Set the cgroup slice of the service to optionally enforce resource limitations +Slice={{ kube_slice }} [Install] WantedBy=multi-user.target diff --git a/roles/kubernetes/node/defaults/main.yml b/roles/kubernetes/node/defaults/main.yml index 1df9d6418..83943cb48 100644 --- a/roles/kubernetes/node/defaults/main.yml +++ b/roles/kubernetes/node/defaults/main.yml @@ -11,15 +11,6 @@ kube_resolv_conf: "/etc/resolv.conf" # Set to empty to avoid cgroup creation kubelet_enforce_node_allocatable: "\"\"" -# Set runtime and kubelet cgroups when using systemd as cgroup driver (default) -kube_service_cgroups: "{% if kube_reserved %}{{ kube_reserved_cgroups_for_service_slice }}{% else %}system.slice{% endif %}" -kubelet_runtime_cgroups: "/{{ kube_service_cgroups }}/{{ container_manager }}.service" -kubelet_kubelet_cgroups: "/{{ kube_service_cgroups }}/kubelet.service" - -# Set runtime and kubelet cgroups when using cgroupfs as cgroup driver -kubelet_runtime_cgroups_cgroupfs: "/system.slice/{{ container_manager }}.service" -kubelet_kubelet_cgroups_cgroupfs: "/system.slice/kubelet.service" - # Set systemd service hardening features kubelet_systemd_hardening: false @@ -42,6 +33,10 @@ kube_cpu_reserved: "100m" kube_ephemeral_storage_reserved: "500Mi" kube_pid_reserved: "1000" +# Set slice for host system daemons (sshd, NetworkManager, ...) +# You probably don't want to change this +system_slice: system.slice + # Set to true to reserve resources for system daemons system_reserved: false system_reserved_cgroups_for_service_slice: system.slice diff --git a/roles/kubernetes/node/tasks/facts.yml b/roles/kubernetes/node/tasks/facts.yml index 6e8995274..d1a8bc0a2 100644 --- a/roles/kubernetes/node/tasks/facts.yml +++ b/roles/kubernetes/node/tasks/facts.yml @@ -39,12 +39,6 @@ kubelet_cgroup_driver: "{{ kubelet_cgroup_driver_detected }}" when: kubelet_cgroup_driver is undefined -- name: Set kubelet_cgroups options when cgroupfs is used - set_fact: - kubelet_runtime_cgroups: "{{ kubelet_runtime_cgroups_cgroupfs }}" - kubelet_kubelet_cgroups: "{{ kubelet_kubelet_cgroups_cgroupfs }}" - when: kubelet_cgroup_driver == 'cgroupfs' - - name: Set kubelet_config_extra_args options when cgroupfs is used set_fact: kubelet_config_extra_args: "{{ kubelet_config_extra_args | combine(kubelet_config_extra_args_cgroupfs) }}" diff --git a/roles/kubernetes/node/templates/kubelet-config.v1beta1.yaml.j2 b/roles/kubernetes/node/templates/kubelet-config.v1beta1.yaml.j2 index 0474bf9bb..8164e51ba 100644 --- a/roles/kubernetes/node/templates/kubelet-config.v1beta1.yaml.j2 +++ b/roles/kubernetes/node/templates/kubelet-config.v1beta1.yaml.j2 @@ -33,7 +33,7 @@ address: {{ kubelet_bind_address }} readOnlyPort: {{ kube_read_only_port }} healthzPort: {{ kubelet_healthz_port }} healthzBindAddress: {{ kubelet_healthz_bind_address }} -kubeletCgroups: {{ kubelet_kubelet_cgroups }} +kubeletCgroups: {{ kube_slice_cgroup ~ 'kubelet.service' }} clusterDomain: {{ dns_domain }} {% if kubelet_protect_kernel_defaults | bool %} protectKernelDefaults: true @@ -63,7 +63,7 @@ clusterDNS: {# Node reserved CPU/memory #} {% for scope in "kube", "system" %} {% if lookup('ansible.builtin.vars', scope + "_reserved") | bool %} -{{ scope }}ReservedCgroup: {{ lookup('ansible.builtin.vars', scope + '_reserved_cgroups') }} +{{ scope }}ReservedCgroup: {{ lookup('ansible.builtin.vars', scope + '_slice_cgroup') }} {% endif %} {{ scope }}Reserved: {% for resource in "cpu", "memory", "ephemeral-storage", "pid" %} diff --git a/roles/kubernetes/node/templates/kubelet.env.v1beta1.j2 b/roles/kubernetes/node/templates/kubelet.env.v1beta1.j2 index 576b9c8fd..ce823902b 100644 --- a/roles/kubernetes/node/templates/kubelet.env.v1beta1.j2 +++ b/roles/kubernetes/node/templates/kubelet.env.v1beta1.j2 @@ -11,7 +11,7 @@ KUBELET_HOSTNAME="--hostname-override={{ kube_override_hostname }}" --config={{ kube_config_dir }}/kubelet-config.yaml \ --kubeconfig={{ kube_config_dir }}/kubelet.conf \ {# end kubeadm specific settings #} ---runtime-cgroups={{ kubelet_runtime_cgroups }} \ +--runtime-cgroups={{ kube_slice_cgroup ~ container_manager ~ '.service' }} \ {% endset %} KUBELET_ARGS="{{ kubelet_args_base }} {{ kubelet_custom_flags | join(' ') }}" diff --git a/roles/kubernetes/node/templates/kubelet.service.j2 b/roles/kubernetes/node/templates/kubelet.service.j2 index be248c6a2..c1472acb3 100644 --- a/roles/kubernetes/node/templates/kubelet.service.j2 +++ b/roles/kubernetes/node/templates/kubelet.service.j2 @@ -14,9 +14,7 @@ Wants={{ kubelet_dependency }} {% endfor %} [Service] -{% if kube_reserved|bool %} -Slice={{ kube_reserved_cgroups_for_service_slice }} -{% endif %} +Slice={{ kube_slice }} EnvironmentFile=-{{ kube_config_dir }}/kubelet.env ExecStart={{ bin_dir }}/kubelet \ $KUBE_LOGTOSTDERR \ diff --git a/roles/kubernetes/node/vars/main.yml b/roles/kubernetes/node/vars/main.yml new file mode 100644 index 000000000..dec0ee622 --- /dev/null +++ b/roles/kubernetes/node/vars/main.yml @@ -0,0 +1,3 @@ +--- +kube_slice_cgroup: "/{{ kube_slice.split('-') | join('.slice/') }}/" +system_slice_cgroup: "/{{ system_slice.split('-') | join('.slice/') }}/" diff --git a/roles/kubespray-defaults/defaults/main/main.yml b/roles/kubespray-defaults/defaults/main/main.yml index 7d36da08a..d1f692282 100644 --- a/roles/kubespray-defaults/defaults/main/main.yml +++ b/roles/kubespray-defaults/defaults/main/main.yml @@ -23,6 +23,11 @@ kube_version: v1.30.4 ## The minimum version working kube_version_min_required: v1.28.0 +# TODO: put this default to more specific place -> needed by roles container-engine+kubernetes/node +# Set the systemd slice for kubernetes-related daemons: kubelet and container engine +# You probably don't want to change this +kube_slice: runtime.slice + ## Kube Proxy mode One of ['iptables', 'ipvs'] kube_proxy_mode: ipvs