optimize cgroups settings for node reserved (#9209)
* optimize cgroups settings for node reserved * fix * set cgroup slice for multi container engine * set cgroup slice for crio * add reserved cgroups variables to sample files * Compatible with cgroup path for different container managers * add cgroups doc * fix markdownpull/9632/head
parent
744c81d451
commit
1c4db6132d
|
@ -0,0 +1,72 @@
|
||||||
|
# cgroups
|
||||||
|
|
||||||
|
To avoid the rivals for resources between containers or the impact on the host in Kubernetes, the kubelet components will rely on cgroups to limit the container’s resources usage.
|
||||||
|
|
||||||
|
## Enforcing Node Allocatable
|
||||||
|
|
||||||
|
You can use `kubelet_enforce_node_allocatable` to set node allocatable enforcement.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# A comma separated list of levels of node allocatable enforcement to be enforced by kubelet.
|
||||||
|
kubelet_enforce_node_allocatable: "pods"
|
||||||
|
# kubelet_enforce_node_allocatable: "pods,kube-reserved"
|
||||||
|
# kubelet_enforce_node_allocatable: "pods,kube-reserved,system-reserved"
|
||||||
|
```
|
||||||
|
|
||||||
|
Note that to enforce kube-reserved or system-reserved, `kube_reserved_cgroups` or `system_reserved_cgroups` needs to be specified respectively.
|
||||||
|
|
||||||
|
Here is an example:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
kubelet_enforce_node_allocatable: "pods,kube-reserved,system-reserved"
|
||||||
|
|
||||||
|
# Reserve this space for kube resources
|
||||||
|
# Set to true to reserve resources for kube daemons
|
||||||
|
kube_reserved: true
|
||||||
|
kube_reserved_cgroups_for_service_slice: kube.slice
|
||||||
|
kube_reserved_cgroups: "/{{ kube_reserved_cgroups_for_service_slice }}"
|
||||||
|
kube_memory_reserved: 256Mi
|
||||||
|
kube_cpu_reserved: 100m
|
||||||
|
# kube_ephemeral_storage_reserved: 2Gi
|
||||||
|
# kube_pid_reserved: "1000"
|
||||||
|
# Reservation for master hosts
|
||||||
|
kube_master_memory_reserved: 512Mi
|
||||||
|
kube_master_cpu_reserved: 200m
|
||||||
|
# kube_master_ephemeral_storage_reserved: 2Gi
|
||||||
|
# kube_master_pid_reserved: "1000"
|
||||||
|
|
||||||
|
# Set to true to reserve resources for system daemons
|
||||||
|
system_reserved: true
|
||||||
|
system_reserved_cgroups_for_service_slice: system.slice
|
||||||
|
system_reserved_cgroups: "/{{ system_reserved_cgroups_for_service_slice }}"
|
||||||
|
system_memory_reserved: 512Mi
|
||||||
|
system_cpu_reserved: 500m
|
||||||
|
# system_ephemeral_storage_reserved: 2Gi
|
||||||
|
# system_pid_reserved: "1000"
|
||||||
|
# Reservation for master hosts
|
||||||
|
system_master_memory_reserved: 256Mi
|
||||||
|
system_master_cpu_reserved: 250m
|
||||||
|
# system_master_ephemeral_storage_reserved: 2Gi
|
||||||
|
# system_master_pid_reserved: "1000"
|
||||||
|
```
|
||||||
|
|
||||||
|
After the setup, the cgroups hierarchy is as follows:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
/ (Cgroups Root)
|
||||||
|
├── kubepods.slice
|
||||||
|
│ ├── ...
|
||||||
|
│ ├── kubepods-besteffort.slice
|
||||||
|
│ ├── kubepods-burstable.slice
|
||||||
|
│ └── ...
|
||||||
|
├── kube.slice
|
||||||
|
│ ├── ...
|
||||||
|
│ ├── {{container_manager}}.service
|
||||||
|
│ ├── kubelet.service
|
||||||
|
│ └── ...
|
||||||
|
├── system.slice
|
||||||
|
│ └── ...
|
||||||
|
└── ...
|
||||||
|
```
|
||||||
|
|
||||||
|
You can learn more in the [official kubernetes documentation](https://kubernetes.io/docs/tasks/administer-cluster/reserve-compute-resources/).
|
|
@ -261,9 +261,36 @@ podsecuritypolicy_enabled: false
|
||||||
# Acceptable options are 'pods', 'system-reserved', 'kube-reserved' and ''. Default is "".
|
# Acceptable options are 'pods', 'system-reserved', 'kube-reserved' and ''. Default is "".
|
||||||
# kubelet_enforce_node_allocatable: pods
|
# kubelet_enforce_node_allocatable: pods
|
||||||
|
|
||||||
|
## Set runtime and kubelet cgroups when using systemd as cgroup driver (default)
|
||||||
|
# kubelet_runtime_cgroups: "{{ kube_reserved_cgroups }}/{{ container_manager }}.service"
|
||||||
|
# kubelet_kubelet_cgroups: "{{ kube_reserved_cgroups }}/kubelet.service"
|
||||||
|
|
||||||
|
## Set runtime and kubelet cgroups when using cgroupfs as cgroup driver
|
||||||
|
# kubelet_runtime_cgroups_cgroupfs: "/system.slice/{{ container_manager }}.service"
|
||||||
|
# kubelet_kubelet_cgroups_cgroupfs: "/system.slice/kubelet.service"
|
||||||
|
|
||||||
|
# Optionally reserve this space for kube daemons.
|
||||||
|
# kube_reserved: true
|
||||||
|
## Uncomment to override default values
|
||||||
|
## The following two items need to be set when kube_reserved is true
|
||||||
|
# kube_reserved_cgroups_for_service_slice: kube.slice
|
||||||
|
# kube_reserved_cgroups: "/{{ kube_reserved_cgroups_for_service_slice }}"
|
||||||
|
# kube_memory_reserved: 256Mi
|
||||||
|
# kube_cpu_reserved: 100m
|
||||||
|
# kube_ephemeral_storage_reserved: 2Gi
|
||||||
|
# kube_pid_reserved: "1000"
|
||||||
|
# Reservation for master hosts
|
||||||
|
# kube_master_memory_reserved: 512Mi
|
||||||
|
# kube_master_cpu_reserved: 200m
|
||||||
|
# kube_master_ephemeral_storage_reserved: 2Gi
|
||||||
|
# kube_master_pid_reserved: "1000"
|
||||||
|
|
||||||
## Optionally reserve resources for OS system daemons.
|
## Optionally reserve resources for OS system daemons.
|
||||||
# system_reserved: true
|
# system_reserved: true
|
||||||
## Uncomment to override default values
|
## Uncomment to override default values
|
||||||
|
## The following two items need to be set when system_reserved is true
|
||||||
|
# system_reserved_cgroups_for_service_slice: system.slice
|
||||||
|
# system_reserved_cgroups: "/{{ system_reserved_cgroups_for_service_slice }}"
|
||||||
# system_memory_reserved: 512Mi
|
# system_memory_reserved: 512Mi
|
||||||
# system_cpu_reserved: 500m
|
# system_cpu_reserved: 500m
|
||||||
# system_ephemeral_storage_reserved: 2Gi
|
# system_ephemeral_storage_reserved: 2Gi
|
||||||
|
|
|
@ -36,6 +36,10 @@ LimitMEMLOCK={{ containerd_limit_mem_lock }}
|
||||||
# Only systemd 226 and above support this version.
|
# Only systemd 226 and above support this version.
|
||||||
TasksMax=infinity
|
TasksMax=infinity
|
||||||
OOMScoreAdjust=-999
|
OOMScoreAdjust=-999
|
||||||
|
# Set the cgroup slice of the service so that kube reserved takes effect
|
||||||
|
{% if kube_reserved is defined and kube_reserved|bool %}
|
||||||
|
Slice={{ kube_reserved_cgroups_for_service_slice }}
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
[Install]
|
[Install]
|
||||||
WantedBy=multi-user.target
|
WantedBy=multi-user.target
|
||||||
|
|
|
@ -35,6 +35,10 @@ LimitCORE=infinity
|
||||||
TasksMax=infinity
|
TasksMax=infinity
|
||||||
Delegate=yes
|
Delegate=yes
|
||||||
KillMode=process
|
KillMode=process
|
||||||
|
# Set the cgroup slice of the service so that kube reserved takes effect
|
||||||
|
{% if kube_reserved is defined and kube_reserved|bool %}
|
||||||
|
Slice={{ kube_reserved_cgroups_for_service_slice }}
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
[Install]
|
[Install]
|
||||||
WantedBy=multi-user.target
|
WantedBy=multi-user.target
|
||||||
|
|
|
@ -113,8 +113,12 @@ conmon = "{{ crio_conmon }}"
|
||||||
{% if crio_cgroup_manager == "cgroupfs" %}
|
{% if crio_cgroup_manager == "cgroupfs" %}
|
||||||
conmon_cgroup = "pod"
|
conmon_cgroup = "pod"
|
||||||
{% else %}
|
{% else %}
|
||||||
|
{% if kube_reserved is defined and kube_reserved|bool %}
|
||||||
|
conmon_cgroup = "{{ kube_reserved_cgroups_for_service_slice }}
|
||||||
|
{% else %}
|
||||||
conmon_cgroup = "system.slice"
|
conmon_cgroup = "system.slice"
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
# Environment variable list for the conmon process, used for passing necessary
|
# Environment variable list for the conmon process, used for passing necessary
|
||||||
# environment variables to conmon or the runtime.
|
# environment variables to conmon or the runtime.
|
||||||
|
|
|
@ -42,6 +42,10 @@ TimeoutStartSec=1min
|
||||||
Restart=on-failure
|
Restart=on-failure
|
||||||
StartLimitBurst=3
|
StartLimitBurst=3
|
||||||
StartLimitInterval=60s
|
StartLimitInterval=60s
|
||||||
|
# Set the cgroup slice of the service so that kube reserved takes effect
|
||||||
|
{% if kube_reserved is defined and kube_reserved|bool %}
|
||||||
|
Slice={{ kube_reserved_cgroups_for_service_slice }}
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
[Install]
|
[Install]
|
||||||
WantedBy=multi-user.target
|
WantedBy=multi-user.target
|
||||||
|
|
|
@ -12,11 +12,11 @@ kube_resolv_conf: "/etc/resolv.conf"
|
||||||
kubelet_enforce_node_allocatable: "\"\""
|
kubelet_enforce_node_allocatable: "\"\""
|
||||||
|
|
||||||
# Set runtime and kubelet cgroups when using systemd as cgroup driver (default)
|
# Set runtime and kubelet cgroups when using systemd as cgroup driver (default)
|
||||||
kubelet_runtime_cgroups: "/systemd/system.slice"
|
kubelet_runtime_cgroups: "{{ kube_reserved_cgroups }}/{{ container_manager }}.service"
|
||||||
kubelet_kubelet_cgroups: "/systemd/system.slice"
|
kubelet_kubelet_cgroups: "{{ kube_reserved_cgroups }}/kubelet.service"
|
||||||
|
|
||||||
# Set runtime and kubelet cgroups when using cgroupfs as cgroup driver
|
# Set runtime and kubelet cgroups when using cgroupfs as cgroup driver
|
||||||
kubelet_runtime_cgroups_cgroupfs: "/system.slice/containerd.service"
|
kubelet_runtime_cgroups_cgroupfs: "/system.slice/{{ container_manager }}.service"
|
||||||
kubelet_kubelet_cgroups_cgroupfs: "/system.slice/kubelet.service"
|
kubelet_kubelet_cgroups_cgroupfs: "/system.slice/kubelet.service"
|
||||||
|
|
||||||
### fail with swap on (default true)
|
### fail with swap on (default true)
|
||||||
|
@ -32,6 +32,10 @@ kubelet_secure_addresses: >-
|
||||||
{%- endfor -%}
|
{%- endfor -%}
|
||||||
|
|
||||||
# Reserve this space for kube resources
|
# Reserve this space for kube resources
|
||||||
|
# Set to true to reserve resources for kube daemons
|
||||||
|
kube_reserved: false
|
||||||
|
kube_reserved_cgroups_for_service_slice: kube.slice
|
||||||
|
kube_reserved_cgroups: "/{{ kube_reserved_cgroups_for_service_slice }}"
|
||||||
kube_memory_reserved: 256Mi
|
kube_memory_reserved: 256Mi
|
||||||
kube_cpu_reserved: 100m
|
kube_cpu_reserved: 100m
|
||||||
# kube_ephemeral_storage_reserved: 2Gi
|
# kube_ephemeral_storage_reserved: 2Gi
|
||||||
|
@ -44,6 +48,8 @@ kube_master_cpu_reserved: 200m
|
||||||
|
|
||||||
# Set to true to reserve resources for system daemons
|
# Set to true to reserve resources for system daemons
|
||||||
system_reserved: false
|
system_reserved: false
|
||||||
|
system_reserved_cgroups_for_service_slice: system.slice
|
||||||
|
system_reserved_cgroups: "/{{ system_reserved_cgroups_for_service_slice }}"
|
||||||
system_memory_reserved: 512Mi
|
system_memory_reserved: 512Mi
|
||||||
system_cpu_reserved: 500m
|
system_cpu_reserved: 500m
|
||||||
# system_ephemeral_storage_reserved: 2Gi
|
# system_ephemeral_storage_reserved: 2Gi
|
||||||
|
|
|
@ -60,6 +60,8 @@ clusterDNS:
|
||||||
- {{ dns_address }}
|
- {{ dns_address }}
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
{# Node reserved CPU/memory #}
|
{# Node reserved CPU/memory #}
|
||||||
|
{% if kube_reserved|bool %}
|
||||||
|
kubeReservedCgroup: {{ kube_reserved_cgroups }}
|
||||||
kubeReserved:
|
kubeReserved:
|
||||||
{% if is_kube_master|bool %}
|
{% if is_kube_master|bool %}
|
||||||
cpu: {{ kube_master_cpu_reserved }}
|
cpu: {{ kube_master_cpu_reserved }}
|
||||||
|
@ -80,7 +82,9 @@ kubeReserved:
|
||||||
pid: "{{ kube_pid_reserved }}"
|
pid: "{{ kube_pid_reserved }}"
|
||||||
{% endif %}
|
{% endif %}
|
||||||
{% endif %}
|
{% endif %}
|
||||||
{% if system_reserved is defined and system_reserved %}
|
{% endif %}
|
||||||
|
{% if system_reserved|bool %}
|
||||||
|
systemReservedCgroup: {{ system_reserved_cgroups }}
|
||||||
systemReserved:
|
systemReserved:
|
||||||
{% if is_kube_master|bool %}
|
{% if is_kube_master|bool %}
|
||||||
cpu: {{ system_master_cpu_reserved }}
|
cpu: {{ system_master_cpu_reserved }}
|
||||||
|
|
|
@ -10,6 +10,24 @@ Wants={{ container_manager }}.service
|
||||||
|
|
||||||
[Service]
|
[Service]
|
||||||
EnvironmentFile=-{{ kube_config_dir }}/kubelet.env
|
EnvironmentFile=-{{ kube_config_dir }}/kubelet.env
|
||||||
|
{% if system_reserved|bool %}
|
||||||
|
ExecStartPre=/bin/mkdir -p /sys/fs/cgroup/cpu/{{ system_reserved_cgroups_for_service_slice }}
|
||||||
|
ExecStartPre=/bin/mkdir -p /sys/fs/cgroup/cpuacct/{{ system_reserved_cgroups_for_service_slice }}
|
||||||
|
ExecStartPre=/bin/mkdir -p /sys/fs/cgroup/cpuset/{{ system_reserved_cgroups_for_service_slice }}
|
||||||
|
ExecStartPre=/bin/mkdir -p /sys/fs/cgroup/hugetlb/{{ system_reserved_cgroups_for_service_slice }}
|
||||||
|
ExecStartPre=/bin/mkdir -p /sys/fs/cgroup/memory/{{ system_reserved_cgroups_for_service_slice }}
|
||||||
|
ExecStartPre=/bin/mkdir -p /sys/fs/cgroup/pids/{{ system_reserved_cgroups_for_service_slice }}
|
||||||
|
ExecStartPre=/bin/mkdir -p /sys/fs/cgroup/systemd/{{ system_reserved_cgroups_for_service_slice }}
|
||||||
|
{% endif %}
|
||||||
|
{% if kube_reserved|bool %}
|
||||||
|
ExecStartPre=/bin/mkdir -p /sys/fs/cgroup/cpu/{{ kube_reserved_cgroups_for_service_slice }}
|
||||||
|
ExecStartPre=/bin/mkdir -p /sys/fs/cgroup/cpuacct/{{ kube_reserved_cgroups_for_service_slice }}
|
||||||
|
ExecStartPre=/bin/mkdir -p /sys/fs/cgroup/cpuset/{{ kube_reserved_cgroups_for_service_slice }}
|
||||||
|
ExecStartPre=/bin/mkdir -p /sys/fs/cgroup/hugetlb/{{ kube_reserved_cgroups_for_service_slice }}
|
||||||
|
ExecStartPre=/bin/mkdir -p /sys/fs/cgroup/memory/{{ kube_reserved_cgroups_for_service_slice }}
|
||||||
|
ExecStartPre=/bin/mkdir -p /sys/fs/cgroup/pids/{{ kube_reserved_cgroups_for_service_slice }}
|
||||||
|
ExecStartPre=/bin/mkdir -p /sys/fs/cgroup/systemd/{{ kube_reserved_cgroups_for_service_slice }}
|
||||||
|
{% endif %}
|
||||||
ExecStart={{ bin_dir }}/kubelet \
|
ExecStart={{ bin_dir }}/kubelet \
|
||||||
$KUBE_LOGTOSTDERR \
|
$KUBE_LOGTOSTDERR \
|
||||||
$KUBE_LOG_LEVEL \
|
$KUBE_LOG_LEVEL \
|
||||||
|
|
Loading…
Reference in New Issue