assert that number of pods on node does not exceed CIDR address range

The number of pods on a given node is determined by the  --max-pods=k
directive. When the address space is exhausted, no more pods can be
scheduled even if from the --max-pods-perspective, the node still has
capacity.

The special case that a pod is scheduled and uses the node IP in the
host network namespace is too "soft" to derive a guarantee.

Comparing kubelet_max_pods with kube_network_node_prefix when given
allows to assert that pod limits match the CIDR address space.
pull/2786/head
Christopher J. Ruwe 2018-05-15 14:34:03 +00:00
parent 7c93e71801
commit c1bc4615fe
4 changed files with 20 additions and 0 deletions

View File

@ -71,6 +71,10 @@ kube_apiserver_node_port_range: "30000-32767"
kubelet_load_modules: false kubelet_load_modules: false
# Configure the amount of pods able to run on single node
# default is equal to application default
kubelet_max_pods: 110
## Support custom flags to be passed to kubelet ## Support custom flags to be passed to kubelet
kubelet_custom_flags: [] kubelet_custom_flags: []

View File

@ -33,6 +33,7 @@ KUBELET_HOSTNAME="--hostname-override={{ kube_override_hostname }}"
--pod-infra-container-image={{ pod_infra_image_repo }}:{{ pod_infra_image_tag }} \ --pod-infra-container-image={{ pod_infra_image_repo }}:{{ pod_infra_image_tag }} \
--node-status-update-frequency={{ kubelet_status_update_frequency }} \ --node-status-update-frequency={{ kubelet_status_update_frequency }} \
--cgroup-driver={{ kubelet_cgroup_driver|default(kubelet_cgroup_driver_detected) }} \ --cgroup-driver={{ kubelet_cgroup_driver|default(kubelet_cgroup_driver_detected) }} \
--max-pods={{ kubelet_max_pods }} \
--docker-disable-shared-pid={{ kubelet_disable_shared_pid }} \ --docker-disable-shared-pid={{ kubelet_disable_shared_pid }} \
--anonymous-auth=false \ --anonymous-auth=false \
--read-only-port={{ kube_read_only_port }} \ --read-only-port={{ kube_read_only_port }} \

View File

@ -28,6 +28,7 @@ KUBELET_HOSTNAME="--hostname-override={{ kube_override_hostname }}"
{% endif %} {% endif %}
--cgroup-driver={{ kubelet_cgroup_driver|default(kubelet_cgroup_driver_detected) }} \ --cgroup-driver={{ kubelet_cgroup_driver|default(kubelet_cgroup_driver_detected) }} \
--cgroups-per-qos={{ kubelet_cgroups_per_qos }} \ --cgroups-per-qos={{ kubelet_cgroups_per_qos }} \
--max-pods={{ kubelet_max_pods }} \
{% if kube_version | version_compare('v1.8', '<') %} {% if kube_version | version_compare('v1.8', '<') %}
--experimental-fail-swap-on={{ kubelet_fail_swap_on|default(true)}} \ --experimental-fail-swap-on={{ kubelet_fail_swap_on|default(true)}} \
{% else %} {% else %}

View File

@ -61,6 +61,20 @@
ignore_errors: "{{ ignore_assert_errors }}" ignore_errors: "{{ ignore_assert_errors }}"
when: inventory_hostname in groups['kube-node'] when: inventory_hostname in groups['kube-node']
# This assertion will fail on the safe side: One can indeed schedule more pods
# on a node than the CIDR-range has space for when additional pods use the host
# network namespace. It is impossible to ascertain the number of such pods at
# provisioning time, so to establish a guarantee, we factor these out.
# NOTICE: the check blatantly ignores the inet6-case
- name: Guarantee that enough network address space is available for all pods
assert:
that: "{{ kubelet_max_pods <= ((32 - kube_network_node_prefix) ** 2) - 2 }}"
msg: "Do not schedule more pods on a node than inet addresses are available."
ignore_errors: "{{ ignore_assert_errors }}"
when:
- inventory_hostname in groups['kube-node']
- kube_network_node_prefix is defined
- name: Stop if ip var does not match local ips - name: Stop if ip var does not match local ips
assert: assert:
that: ip in ansible_all_ipv4_addresses that: ip in ansible_all_ipv4_addresses