optimize cgroups settings for node reserved (#9209)
* optimize cgroups settings for node reserved * fix * set cgroup slice for multi container engine * set cgroup slice for crio * add reserved cgroups variables to sample files * Compatible with cgroup path for different container managers * add cgroups doc * fix markdownpull/9632/head
parent
744c81d451
commit
1c4db6132d
|
@ -0,0 +1,72 @@
|
|||
# cgroups
|
||||
|
||||
To avoid the rivals for resources between containers or the impact on the host in Kubernetes, the kubelet components will rely on cgroups to limit the container’s resources usage.
|
||||
|
||||
## Enforcing Node Allocatable
|
||||
|
||||
You can use `kubelet_enforce_node_allocatable` to set node allocatable enforcement.
|
||||
|
||||
```yaml
|
||||
# A comma separated list of levels of node allocatable enforcement to be enforced by kubelet.
|
||||
kubelet_enforce_node_allocatable: "pods"
|
||||
# kubelet_enforce_node_allocatable: "pods,kube-reserved"
|
||||
# kubelet_enforce_node_allocatable: "pods,kube-reserved,system-reserved"
|
||||
```
|
||||
|
||||
Note that to enforce kube-reserved or system-reserved, `kube_reserved_cgroups` or `system_reserved_cgroups` needs to be specified respectively.
|
||||
|
||||
Here is an example:
|
||||
|
||||
```yaml
|
||||
kubelet_enforce_node_allocatable: "pods,kube-reserved,system-reserved"
|
||||
|
||||
# Reserve this space for kube resources
|
||||
# Set to true to reserve resources for kube daemons
|
||||
kube_reserved: true
|
||||
kube_reserved_cgroups_for_service_slice: kube.slice
|
||||
kube_reserved_cgroups: "/{{ kube_reserved_cgroups_for_service_slice }}"
|
||||
kube_memory_reserved: 256Mi
|
||||
kube_cpu_reserved: 100m
|
||||
# kube_ephemeral_storage_reserved: 2Gi
|
||||
# kube_pid_reserved: "1000"
|
||||
# Reservation for master hosts
|
||||
kube_master_memory_reserved: 512Mi
|
||||
kube_master_cpu_reserved: 200m
|
||||
# kube_master_ephemeral_storage_reserved: 2Gi
|
||||
# kube_master_pid_reserved: "1000"
|
||||
|
||||
# Set to true to reserve resources for system daemons
|
||||
system_reserved: true
|
||||
system_reserved_cgroups_for_service_slice: system.slice
|
||||
system_reserved_cgroups: "/{{ system_reserved_cgroups_for_service_slice }}"
|
||||
system_memory_reserved: 512Mi
|
||||
system_cpu_reserved: 500m
|
||||
# system_ephemeral_storage_reserved: 2Gi
|
||||
# system_pid_reserved: "1000"
|
||||
# Reservation for master hosts
|
||||
system_master_memory_reserved: 256Mi
|
||||
system_master_cpu_reserved: 250m
|
||||
# system_master_ephemeral_storage_reserved: 2Gi
|
||||
# system_master_pid_reserved: "1000"
|
||||
```
|
||||
|
||||
After the setup, the cgroups hierarchy is as follows:
|
||||
|
||||
```bash
|
||||
/ (Cgroups Root)
|
||||
├── kubepods.slice
|
||||
│ ├── ...
|
||||
│ ├── kubepods-besteffort.slice
|
||||
│ ├── kubepods-burstable.slice
|
||||
│ └── ...
|
||||
├── kube.slice
|
||||
│ ├── ...
|
||||
│ ├── {{container_manager}}.service
|
||||
│ ├── kubelet.service
|
||||
│ └── ...
|
||||
├── system.slice
|
||||
│ └── ...
|
||||
└── ...
|
||||
```
|
||||
|
||||
You can learn more in the [official kubernetes documentation](https://kubernetes.io/docs/tasks/administer-cluster/reserve-compute-resources/).
|
|
@ -261,9 +261,36 @@ podsecuritypolicy_enabled: false
|
|||
# Acceptable options are 'pods', 'system-reserved', 'kube-reserved' and ''. Default is "".
|
||||
# kubelet_enforce_node_allocatable: pods
|
||||
|
||||
## Set runtime and kubelet cgroups when using systemd as cgroup driver (default)
|
||||
# kubelet_runtime_cgroups: "{{ kube_reserved_cgroups }}/{{ container_manager }}.service"
|
||||
# kubelet_kubelet_cgroups: "{{ kube_reserved_cgroups }}/kubelet.service"
|
||||
|
||||
## Set runtime and kubelet cgroups when using cgroupfs as cgroup driver
|
||||
# kubelet_runtime_cgroups_cgroupfs: "/system.slice/{{ container_manager }}.service"
|
||||
# kubelet_kubelet_cgroups_cgroupfs: "/system.slice/kubelet.service"
|
||||
|
||||
# Optionally reserve this space for kube daemons.
|
||||
# kube_reserved: true
|
||||
## Uncomment to override default values
|
||||
## The following two items need to be set when kube_reserved is true
|
||||
# kube_reserved_cgroups_for_service_slice: kube.slice
|
||||
# kube_reserved_cgroups: "/{{ kube_reserved_cgroups_for_service_slice }}"
|
||||
# kube_memory_reserved: 256Mi
|
||||
# kube_cpu_reserved: 100m
|
||||
# kube_ephemeral_storage_reserved: 2Gi
|
||||
# kube_pid_reserved: "1000"
|
||||
# Reservation for master hosts
|
||||
# kube_master_memory_reserved: 512Mi
|
||||
# kube_master_cpu_reserved: 200m
|
||||
# kube_master_ephemeral_storage_reserved: 2Gi
|
||||
# kube_master_pid_reserved: "1000"
|
||||
|
||||
## Optionally reserve resources for OS system daemons.
|
||||
# system_reserved: true
|
||||
## Uncomment to override default values
|
||||
## The following two items need to be set when system_reserved is true
|
||||
# system_reserved_cgroups_for_service_slice: system.slice
|
||||
# system_reserved_cgroups: "/{{ system_reserved_cgroups_for_service_slice }}"
|
||||
# system_memory_reserved: 512Mi
|
||||
# system_cpu_reserved: 500m
|
||||
# system_ephemeral_storage_reserved: 2Gi
|
||||
|
|
|
@ -36,6 +36,10 @@ LimitMEMLOCK={{ containerd_limit_mem_lock }}
|
|||
# Only systemd 226 and above support this version.
|
||||
TasksMax=infinity
|
||||
OOMScoreAdjust=-999
|
||||
# Set the cgroup slice of the service so that kube reserved takes effect
|
||||
{% if kube_reserved is defined and kube_reserved|bool %}
|
||||
Slice={{ kube_reserved_cgroups_for_service_slice }}
|
||||
{% endif %}
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
|
|
|
@ -35,6 +35,10 @@ LimitCORE=infinity
|
|||
TasksMax=infinity
|
||||
Delegate=yes
|
||||
KillMode=process
|
||||
# Set the cgroup slice of the service so that kube reserved takes effect
|
||||
{% if kube_reserved is defined and kube_reserved|bool %}
|
||||
Slice={{ kube_reserved_cgroups_for_service_slice }}
|
||||
{% endif %}
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
|
|
|
@ -113,8 +113,12 @@ conmon = "{{ crio_conmon }}"
|
|||
{% if crio_cgroup_manager == "cgroupfs" %}
|
||||
conmon_cgroup = "pod"
|
||||
{% else %}
|
||||
{% if kube_reserved is defined and kube_reserved|bool %}
|
||||
conmon_cgroup = "{{ kube_reserved_cgroups_for_service_slice }}
|
||||
{% else %}
|
||||
conmon_cgroup = "system.slice"
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
|
||||
# Environment variable list for the conmon process, used for passing necessary
|
||||
# environment variables to conmon or the runtime.
|
||||
|
|
|
@ -42,6 +42,10 @@ TimeoutStartSec=1min
|
|||
Restart=on-failure
|
||||
StartLimitBurst=3
|
||||
StartLimitInterval=60s
|
||||
# Set the cgroup slice of the service so that kube reserved takes effect
|
||||
{% if kube_reserved is defined and kube_reserved|bool %}
|
||||
Slice={{ kube_reserved_cgroups_for_service_slice }}
|
||||
{% endif %}
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
|
|
|
@ -12,11 +12,11 @@ kube_resolv_conf: "/etc/resolv.conf"
|
|||
kubelet_enforce_node_allocatable: "\"\""
|
||||
|
||||
# Set runtime and kubelet cgroups when using systemd as cgroup driver (default)
|
||||
kubelet_runtime_cgroups: "/systemd/system.slice"
|
||||
kubelet_kubelet_cgroups: "/systemd/system.slice"
|
||||
kubelet_runtime_cgroups: "{{ kube_reserved_cgroups }}/{{ container_manager }}.service"
|
||||
kubelet_kubelet_cgroups: "{{ kube_reserved_cgroups }}/kubelet.service"
|
||||
|
||||
# Set runtime and kubelet cgroups when using cgroupfs as cgroup driver
|
||||
kubelet_runtime_cgroups_cgroupfs: "/system.slice/containerd.service"
|
||||
kubelet_runtime_cgroups_cgroupfs: "/system.slice/{{ container_manager }}.service"
|
||||
kubelet_kubelet_cgroups_cgroupfs: "/system.slice/kubelet.service"
|
||||
|
||||
### fail with swap on (default true)
|
||||
|
@ -32,6 +32,10 @@ kubelet_secure_addresses: >-
|
|||
{%- endfor -%}
|
||||
|
||||
# Reserve this space for kube resources
|
||||
# Set to true to reserve resources for kube daemons
|
||||
kube_reserved: false
|
||||
kube_reserved_cgroups_for_service_slice: kube.slice
|
||||
kube_reserved_cgroups: "/{{ kube_reserved_cgroups_for_service_slice }}"
|
||||
kube_memory_reserved: 256Mi
|
||||
kube_cpu_reserved: 100m
|
||||
# kube_ephemeral_storage_reserved: 2Gi
|
||||
|
@ -44,6 +48,8 @@ kube_master_cpu_reserved: 200m
|
|||
|
||||
# Set to true to reserve resources for system daemons
|
||||
system_reserved: false
|
||||
system_reserved_cgroups_for_service_slice: system.slice
|
||||
system_reserved_cgroups: "/{{ system_reserved_cgroups_for_service_slice }}"
|
||||
system_memory_reserved: 512Mi
|
||||
system_cpu_reserved: 500m
|
||||
# system_ephemeral_storage_reserved: 2Gi
|
||||
|
|
|
@ -60,6 +60,8 @@ clusterDNS:
|
|||
- {{ dns_address }}
|
||||
{% endfor %}
|
||||
{# Node reserved CPU/memory #}
|
||||
{% if kube_reserved|bool %}
|
||||
kubeReservedCgroup: {{ kube_reserved_cgroups }}
|
||||
kubeReserved:
|
||||
{% if is_kube_master|bool %}
|
||||
cpu: {{ kube_master_cpu_reserved }}
|
||||
|
@ -80,7 +82,9 @@ kubeReserved:
|
|||
pid: "{{ kube_pid_reserved }}"
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
{% if system_reserved is defined and system_reserved %}
|
||||
{% endif %}
|
||||
{% if system_reserved|bool %}
|
||||
systemReservedCgroup: {{ system_reserved_cgroups }}
|
||||
systemReserved:
|
||||
{% if is_kube_master|bool %}
|
||||
cpu: {{ system_master_cpu_reserved }}
|
||||
|
|
|
@ -10,6 +10,24 @@ Wants={{ container_manager }}.service
|
|||
|
||||
[Service]
|
||||
EnvironmentFile=-{{ kube_config_dir }}/kubelet.env
|
||||
{% if system_reserved|bool %}
|
||||
ExecStartPre=/bin/mkdir -p /sys/fs/cgroup/cpu/{{ system_reserved_cgroups_for_service_slice }}
|
||||
ExecStartPre=/bin/mkdir -p /sys/fs/cgroup/cpuacct/{{ system_reserved_cgroups_for_service_slice }}
|
||||
ExecStartPre=/bin/mkdir -p /sys/fs/cgroup/cpuset/{{ system_reserved_cgroups_for_service_slice }}
|
||||
ExecStartPre=/bin/mkdir -p /sys/fs/cgroup/hugetlb/{{ system_reserved_cgroups_for_service_slice }}
|
||||
ExecStartPre=/bin/mkdir -p /sys/fs/cgroup/memory/{{ system_reserved_cgroups_for_service_slice }}
|
||||
ExecStartPre=/bin/mkdir -p /sys/fs/cgroup/pids/{{ system_reserved_cgroups_for_service_slice }}
|
||||
ExecStartPre=/bin/mkdir -p /sys/fs/cgroup/systemd/{{ system_reserved_cgroups_for_service_slice }}
|
||||
{% endif %}
|
||||
{% if kube_reserved|bool %}
|
||||
ExecStartPre=/bin/mkdir -p /sys/fs/cgroup/cpu/{{ kube_reserved_cgroups_for_service_slice }}
|
||||
ExecStartPre=/bin/mkdir -p /sys/fs/cgroup/cpuacct/{{ kube_reserved_cgroups_for_service_slice }}
|
||||
ExecStartPre=/bin/mkdir -p /sys/fs/cgroup/cpuset/{{ kube_reserved_cgroups_for_service_slice }}
|
||||
ExecStartPre=/bin/mkdir -p /sys/fs/cgroup/hugetlb/{{ kube_reserved_cgroups_for_service_slice }}
|
||||
ExecStartPre=/bin/mkdir -p /sys/fs/cgroup/memory/{{ kube_reserved_cgroups_for_service_slice }}
|
||||
ExecStartPre=/bin/mkdir -p /sys/fs/cgroup/pids/{{ kube_reserved_cgroups_for_service_slice }}
|
||||
ExecStartPre=/bin/mkdir -p /sys/fs/cgroup/systemd/{{ kube_reserved_cgroups_for_service_slice }}
|
||||
{% endif %}
|
||||
ExecStart={{ bin_dir }}/kubelet \
|
||||
$KUBE_LOGTOSTDERR \
|
||||
$KUBE_LOG_LEVEL \
|
||||
|
|
Loading…
Reference in New Issue