From b7ae4a2cfde75b7f110c756438c77b2103cc4761 Mon Sep 17 00:00:00 2001 From: Cristian Calin <6627509+cristicalin@users.noreply.github.com> Date: Tue, 9 Nov 2021 20:01:48 +0200 Subject: [PATCH] Kata-Containers: Fix kata-containers runtime (#8068) * Kata-containes: Fix for ubuntu and centos sometimes kata containers fail to start because of access errors to /dev/vhost-vsock and /dev/vhost-net * Kata-containers: use similar testing strategy as gvisor * Kata-Containers: adjust values for 2.2.0 defaults Make CI tests actually pass * Kata-Containers: bump to 2.2.2 to fix sandbox_cgroup_only issue --- .../molecule/default/converge.yml | 1 + .../molecule/default/files/10-mynet.conf | 17 ++ .../molecule/default/files/container.json | 10 + .../molecule/default/files/sandbox.json | 10 + .../molecule/default/prepare.yml | 44 ++++- .../molecule/default/tests/test_default.py | 23 ++- .../kata-containers/tasks/main.yml | 16 ++ .../templates/configuration-qemu.toml.j2 | 175 +++++++++++++++++- roles/download/defaults/main.yml | 8 +- roles/reset/tasks/main.yml | 1 + 10 files changed, 287 insertions(+), 18 deletions(-) create mode 100644 roles/container-engine/kata-containers/molecule/default/files/10-mynet.conf create mode 100644 roles/container-engine/kata-containers/molecule/default/files/container.json create mode 100644 roles/container-engine/kata-containers/molecule/default/files/sandbox.json diff --git a/roles/container-engine/kata-containers/molecule/default/converge.yml b/roles/container-engine/kata-containers/molecule/default/converge.yml index 3456ee6f8..a6fdf812a 100644 --- a/roles/container-engine/kata-containers/molecule/default/converge.yml +++ b/roles/container-engine/kata-containers/molecule/default/converge.yml @@ -4,6 +4,7 @@ become: true vars: kata_containers_enabled: true + container_manager: containerd roles: - role: kubespray-defaults - role: container-engine/containerd diff --git a/roles/container-engine/kata-containers/molecule/default/files/10-mynet.conf b/roles/container-engine/kata-containers/molecule/default/files/10-mynet.conf new file mode 100644 index 000000000..f10935b75 --- /dev/null +++ b/roles/container-engine/kata-containers/molecule/default/files/10-mynet.conf @@ -0,0 +1,17 @@ +{ + "cniVersion": "0.2.0", + "name": "mynet", + "type": "bridge", + "bridge": "cni0", + "isGateway": true, + "ipMasq": true, + "ipam": { + "type": "host-local", + "subnet": "172.19.0.0/24", + "routes": [ + { + "dst": "0.0.0.0/0" + } + ] + } +} diff --git a/roles/container-engine/kata-containers/molecule/default/files/container.json b/roles/container-engine/kata-containers/molecule/default/files/container.json new file mode 100644 index 000000000..9ada521f4 --- /dev/null +++ b/roles/container-engine/kata-containers/molecule/default/files/container.json @@ -0,0 +1,10 @@ +{ + "metadata": { + "name": "kata1" + }, + "image": { + "image": "docker.io/library/hello-world:latest" + }, + "log_path": "kata1.0.log", + "linux": {} +} diff --git a/roles/container-engine/kata-containers/molecule/default/files/sandbox.json b/roles/container-engine/kata-containers/molecule/default/files/sandbox.json new file mode 100644 index 000000000..326a578be --- /dev/null +++ b/roles/container-engine/kata-containers/molecule/default/files/sandbox.json @@ -0,0 +1,10 @@ +{ + "metadata": { + "name": "kata1", + "namespace": "default", + "attempt": 1, + "uid": "hdishd83djaidwnduwk28bcsb" + }, + "linux": {}, + "log_directory": "/tmp" +} diff --git a/roles/container-engine/kata-containers/molecule/default/prepare.yml b/roles/container-engine/kata-containers/molecule/default/prepare.yml index 1afc51a04..9299a7e2d 100644 --- a/roles/container-engine/kata-containers/molecule/default/prepare.yml +++ b/roles/container-engine/kata-containers/molecule/default/prepare.yml @@ -1,6 +1,48 @@ --- - name: Prepare hosts: all - gather_facts: False + become: true roles: + - role: kubespray-defaults - role: bootstrap-os + - role: adduser + user: "{{ addusers.kube }}" + tasks: + - include_tasks: "../../../../download/tasks/download_file.yml" + vars: + download: "{{ download_defaults | combine(downloads.cni) }}" + +- name: Prepare container runtime + hosts: all + become: true + vars: + container_manager: containerd + kube_network_plugin: cni + roles: + - role: kubespray-defaults + - role: network_plugin/cni + - role: container-engine/crictl + tasks: + - name: Copy test container files + copy: + src: "{{ item }}" + dest: "/tmp/{{ item }}" + owner: root + mode: 0644 + with_items: + - container.json + - sandbox.json + - name: Create /etc/cni/net.d directory + file: + path: /etc/cni/net.d + state: directory + owner: kube + mode: 0755 + - name: Setup CNI + copy: + src: "{{ item }}" + dest: "/etc/cni/net.d/{{ item }}" + owner: root + mode: 0644 + with_items: + - 10-mynet.conf diff --git a/roles/container-engine/kata-containers/molecule/default/tests/test_default.py b/roles/container-engine/kata-containers/molecule/default/tests/test_default.py index 15e80825d..b34136d58 100644 --- a/roles/container-engine/kata-containers/molecule/default/tests/test_default.py +++ b/roles/container-engine/kata-containers/molecule/default/tests/test_default.py @@ -14,17 +14,24 @@ def test_run(host): assert "kata-runtime" in cmd.stdout -def test_run_pod(host): - image = "docker.io/library/hello-world:latest" - runtime = "io.containerd.kata-qemu.v2" - - pull_command = "ctr image pull {}".format(image) +def test_run_check(host): + kataruntime = "/opt/kata/bin/kata-runtime" with host.sudo(): - cmd = host.command(pull_command) + cmd = host.command(kataruntime + " check") assert cmd.rc == 0 + assert "System is capable of running" in cmd.stdout - run_command = "ctr run --runtime {} {} kata1".format(runtime, image) + +def test_run_pod(host): + runtime = "kata-qemu" + + run_command = "/usr/local/bin/crictl run --with-pull --runtime {} /tmp/container.json /tmp/sandbox.json".format(runtime) with host.sudo(): cmd = host.command(run_command) assert cmd.rc == 0 - assert "Hello from Docker!" in cmd.stdout + + with host.sudo(): + log_f = host.file("/tmp/kata1.0.log") + + assert log_f.exists + assert b"Hello from Docker!" in log_f.content diff --git a/roles/container-engine/kata-containers/tasks/main.yml b/roles/container-engine/kata-containers/tasks/main.yml index 8d99e5255..54bd25d0f 100644 --- a/roles/container-engine/kata-containers/tasks/main.yml +++ b/roles/container-engine/kata-containers/tasks/main.yml @@ -34,3 +34,19 @@ mode: 0755 with_items: - qemu + +- name: kata-containers | Load vhost kernel modules + modprobe: + state: present + name: "{{ item }}" + with_items: + - vhost_vsock + - vhost_net + +- name: kata-containers | Persist vhost kernel modules + copy: + dest: /etc/modules-load.d/kubespray-kata-containers.conf + mode: 0644 + content: | + vhost_vsock + vhost_net diff --git a/roles/container-engine/kata-containers/templates/configuration-qemu.toml.j2 b/roles/container-engine/kata-containers/templates/configuration-qemu.toml.j2 index 334a2d977..f64647bdf 100644 --- a/roles/container-engine/kata-containers/templates/configuration-qemu.toml.j2 +++ b/roles/container-engine/kata-containers/templates/configuration-qemu.toml.j2 @@ -12,10 +12,33 @@ [hypervisor.qemu] path = "/opt/kata/bin/qemu-system-x86_64" +{% if kata_containers_version is version('2.2.0', '>=') %} +kernel = "/opt/kata/share/kata-containers/vmlinux.container" +{% else %} kernel = "/opt/kata/share/kata-containers/vmlinuz.container" +{% endif %} image = "/opt/kata/share/kata-containers/kata-containers.img" machine_type = "q35" +# Enable confidential guest support. +# Toggling that setting may trigger different hardware features, ranging +# from memory encryption to both memory and CPU-state encryption and integrity. +# The Kata Containers runtime dynamically detects the available feature set and +# aims at enabling the largest possible one. +# Default false +# confidential_guest = true + +# List of valid annotation names for the hypervisor +# Each member of the list is a regular expression, which is the base name +# of the annotation, e.g. "path" for io.katacontainers.config.hypervisor.path" +enable_annotations = [] + +# List of valid annotations values for the hypervisor +# Each member of the list is a path pattern as described by glob(3). +# The default if not set is empty (all annotations rejected.) +# Your distribution recommends: ["/opt/kata/bin/qemu-system-x86_64"] +valid_hypervisor_paths = ["/opt/kata/bin/qemu-system-x86_64"] + # Optional space-separated list of options to pass to the guest kernel. # For example, use `kernel_params = "vsyscall=emulate"` if you are having # trouble running pre-2.15 glibc. @@ -37,6 +60,11 @@ firmware = "" # For example, `machine_accelerators = "nosmm,nosmbus,nosata,nopit,static-prt,nofw"` machine_accelerators="" +# CPU features +# comma-separated list of cpu features to pass to the cpu +# For example, `cpu_features = "pmu=off,vmx=off" +cpu_features="pmu=off" + # Default number of vCPUs per SB/VM: # unspecified or 0 --> will be set to 1 # < 0 --> will be set to the actual number of physical cores @@ -58,6 +86,7 @@ default_vcpus = 1 # `default_maxvcpus = 8` the memory footprint will be small, but 8 will be the maximum number of # vCPUs supported by the SB/VM. In general, we recommend that you do not edit this variable, # unless you know what are you doing. +# NOTICE: on arm platform with gicv2 interrupt controller, set it to 8. default_maxvcpus = 0 # Bridges can be used to hot plug devices. @@ -103,15 +132,24 @@ default_memory = {{ kata_containers_qemu_default_memory }} disable_block_device_use = false # Shared file system type: -# - virtio-9p (default) -# - virtio-fs +# - virtio-fs (default) +# - virtio-9p +{% if kata_containers_version is version('2.2.0', '>=') %} +shared_fs = "virtio-fs" +{% else %} shared_fs = "virtio-9p" +{% endif %} # Path to vhost-user-fs daemon. -virtio_fs_daemon = "/opt/kata/bin/virtiofsd" +virtio_fs_daemon = "/opt/kata/libexec/kata-qemu/virtiofsd" + +# List of valid annotations values for the virtiofs daemon +# The default if not set is empty (all annotations rejected.) +# Your distribution recommends: ["/opt/kata/libexec/kata-qemu/virtiofsd"] +valid_virtio_fs_daemon_paths = ["/opt/kata/libexec/kata-qemu/virtiofsd"] # Default size of DAX cache in MiB -virtio_fs_cache_size = 1024 +virtio_fs_cache_size = 0 # Extra args for virtiofsd daemon # @@ -119,7 +157,7 @@ virtio_fs_cache_size = 1024 # ["-o", "arg1=xxx,arg2", "-o", "hello world", "--arg3=yyy"] # # see `virtiofsd -h` for possible options. -virtio_fs_extra_args = [] +virtio_fs_extra_args = ["--thread-pool-size=1"] # Cache mode: # @@ -189,16 +227,40 @@ enable_vhost_user_store = false # simulated block device nodes for vhost-user devices to live. vhost_user_store_path = "/var/run/kata-containers/vhost-user" +# Enable vIOMMU, default false +# Enabling this will result in the VM having a vIOMMU device +# This will also add the following options to the kernel's +# command line: intel_iommu=on,iommu=pt +#enable_iommu = true + +# Enable IOMMU_PLATFORM, default false +# Enabling this will result in the VM device having iommu_platform=on set +#enable_iommu_platform = true + +# List of valid annotations values for the vhost user store path +# The default if not set is empty (all annotations rejected.) +# Your distribution recommends: ["/var/run/kata-containers/vhost-user"] +valid_vhost_user_store_paths = ["/var/run/kata-containers/vhost-user"] + # Enable file based guest memory support. The default is an empty string which # will disable this feature. In the case of virtio-fs, this is enabled # automatically and '/dev/shm' is used as the backing folder. # This option will be ignored if VM templating is enabled. #file_mem_backend = "" +# List of valid annotations values for the file_mem_backend annotation +# The default if not set is empty (all annotations rejected.) +# Your distribution recommends: [""] +valid_file_mem_backends = [""] + # Enable swap of vm memory. Default false. # The behaviour is undefined if mem_prealloc is also set to true #enable_swap = true +# -pflash can add image file to VM. The arguments of it should be in format +# of ["/path/to/flash0.img", "/path/to/flash1.img"] +pflashes = [] + # This option changes the default hypervisor and kernel parameters # to enable debug output where available. This extra output is added # to the proxy logs, but only when proxy debug is also enabled. @@ -257,6 +319,11 @@ enable_debug = {{ kata_containers_qemu_debug }} # all practical purposes. #entropy_source= "/dev/urandom" +# List of valid annotations values for entropy_source +# The default if not set is empty (all annotations rejected.) +# Your distribution recommends: ["/dev/urandom","/dev/random",""] +valid_entropy_sources = ["/dev/urandom","/dev/random",""] + # Path to OCI hook binaries in the *guest rootfs*. # This does not affect host-side hooks which must instead be added to # the OCI spec passed to the runtime. @@ -273,6 +340,47 @@ enable_debug = {{ kata_containers_qemu_debug }} # Warnings will be logged if any error is encountered will scanning for hooks, # but it will not abort container execution. #guest_hook_path = "/usr/share/oci/hooks" +# +# Use rx Rate Limiter to control network I/O inbound bandwidth(size in bits/sec for SB/VM). +# In Qemu, we use classful qdiscs HTB(Hierarchy Token Bucket) to discipline traffic. +# Default 0-sized value means unlimited rate. +#rx_rate_limiter_max_rate = 0 +# Use tx Rate Limiter to control network I/O outbound bandwidth(size in bits/sec for SB/VM). +# In Qemu, we use classful qdiscs HTB(Hierarchy Token Bucket) and ifb(Intermediate Functional Block) +# to discipline traffic. +# Default 0-sized value means unlimited rate. +#tx_rate_limiter_max_rate = 0 + +# Set where to save the guest memory dump file. +# If set, when GUEST_PANICKED event occurred, +# guest memeory will be dumped to host filesystem under guest_memory_dump_path, +# This directory will be created automatically if it does not exist. +# +# The dumped file(also called vmcore) can be processed with crash or gdb. +# +# WARNING: +# Dump guest’s memory can take very long depending on the amount of guest memory +# and use much disk space. +#guest_memory_dump_path="/var/crash/kata" + +# If enable paging. +# Basically, if you want to use "gdb" rather than "crash", +# or need the guest-virtual addresses in the ELF vmcore, +# then you should enable paging. +# +# See: https://www.qemu.org/docs/master/qemu-qmp-ref.html#Dump-guest-memory for details +#guest_memory_dump_paging=false + +# Enable swap in the guest. Default false. +# When enable_guest_swap is enabled, insert a raw file to the guest as the swap device +# if the swappiness of a container (set by annotation "io.katacontainers.container.resource.swappiness") +# is bigger than 0. +# The size of the swap device should be +# swap_in_bytes (set by annotation "io.katacontainers.container.resource.swap_in_bytes") - memory_limit_in_bytes. +# If swap_in_bytes is not set, the size should be memory_limit_in_bytes. +# If swap_in_bytes and memory_limit_in_bytes is not set, the size should +# be default_memory. +#enable_guest_swap = true [factory] # VM templating support. Once enabled, new VMs are created from template @@ -381,6 +489,16 @@ enable_debug = {{ kata_containers_qemu_debug }} # kernel_modules=[] +# Enable debug console. + +# If enabled, user can connect guest OS running inside hypervisor +# through "kata-runtime exec " command + +#debug_console_enabled = true + +# Agent connection dialing timeout value in seconds +# (default: 30) +#dial_timeout = 30 [netmon] # If enabled, the network monitoring process gets started when the @@ -433,6 +551,16 @@ disable_guest_seccomp=true # (default: disabled) #enable_tracing = true +# Set the full url to the Jaeger HTTP Thrift collector. +# The default if not set will be "http://localhost:14268/api/traces" +#jaeger_endpoint = "" + +# Sets the username to be used if basic auth is required for Jaeger. +#jaeger_user = "" + +# Sets the password to be used if basic auth is required for Jaeger. +#jaeger_password = "" + # If enabled, the runtime will not create a network namespace for shim and hypervisor processes. # This option may have some potential impacts to your host. It should only be used when you know what you're doing. # `disable_new_netns` conflicts with `enable_netmon` @@ -451,9 +579,46 @@ disable_guest_seccomp=true # See: https://godoc.org/github.com/kata-containers/runtime/virtcontainers#ContainerType sandbox_cgroup_only={{ kata_containers_qemu_sandbox_cgroup_only }} +# If specified, sandbox_bind_mounts identifieds host paths to be mounted (ro) into the sandboxes shared path. +# This is only valid if filesystem sharing is utilized. The provided path(s) will be bindmounted into the shared fs directory. +# If defaults are utilized, these mounts should be available in the guest at `/run/kata-containers/shared/containers/sandbox-mounts` +# These will not be exposed to the container workloads, and are only provided for potential guest services. +sandbox_bind_mounts=[] + # Enabled experimental feature list, format: ["a", "b"]. # Experimental features are features not stable enough for production, # they may break compatibility, and are prepared for a big version bump. # Supported experimental features: # (default: []) experimental=[] + +# If enabled, user can run pprof tools with shim v2 process through kata-monitor. +# (default: false) +# enable_pprof = true + +# WARNING: All the options in the following section have not been implemented yet. +# This section was added as a placeholder. DO NOT USE IT! +[image] +# Container image service. +# +# Offload the CRI image management service to the Kata agent. +# (default: false) +#service_offload = true + +# Container image decryption keys provisioning. +# Applies only if service_offload is true. +# Keys can be provisioned locally (e.g. through a special command or +# a local file) or remotely (usually after the guest is remotely attested). +# The provision setting is a complete URL that lets the Kata agent decide +# which method to use in order to fetch the keys. +# +# Keys can be stored in a local file, in a measured and attested initrd: +#provision=data:///local/key/file +# +# Keys could be fetched through a special command or binary from the +# initrd (guest) image, e.g. a firmware call: +#provision=file:///path/to/bin/fetcher/in/guest +# +# Keys can be remotely provisioned. The Kata agent fetches them from e.g. +# a HTTPS URL: +#provision=https://my-key-broker.foo/tenant/ diff --git a/roles/download/defaults/main.yml b/roles/download/defaults/main.yml index 6d19e2324..0bab159cf 100644 --- a/roles/download/defaults/main.yml +++ b/roles/download/defaults/main.yml @@ -53,7 +53,7 @@ kubeadm_version: "{{ kube_version }}" etcd_version: v3.5.0 crun_version: 1.3 runc_version: v1.0.2 -kata_containers_version: 2.2.0 +kata_containers_version: 2.2.2 gvisor_version: 20210921 # gcr and kubernetes image repo define @@ -463,15 +463,15 @@ kata_containers_binary_checksums: arm: 2.0.4: 0 2.1.1: 0 - 2.2.0: 0 + 2.2.2: 0 amd64: 2.0.4: 022a60c2d92a5ab9a5eb83d5a95154a2d06fdc2206b2a473d902ccc86766371a 2.1.1: a83591d968cd0f1adfb5025d7aa33ca1385d4b1165ff10d74602302fc3c0373f - 2.2.0: 50163e2a430e96447117f7169a4ed5a8bdea09267d62a39221d5b8b3b3f88c0e + 2.2.2: 2e3ac77b8abd4d839cf16780b57aee8f3d6e1f19489edd7d6d8069ea3cc3c18a arm64: 2.0.4: 0 2.1.1: 0 - 2.2.0: 0 + 2.2.2: 0 gvisor_runsc_binary_checksums: arm: diff --git a/roles/reset/tasks/main.yml b/roles/reset/tasks/main.yml index 6941f2150..101be1c88 100644 --- a/roles/reset/tasks/main.yml +++ b/roles/reset/tasks/main.yml @@ -326,6 +326,7 @@ - "{{ krew_root_dir | default('/usr/local/krew') }}" - /etc/modules-load.d/kube_proxy-ipvs.conf - /etc/modules-load.d/kubespray-br_netfilter.conf + - /etc/modules-load.d/kubespray-kata-containers.conf - /usr/libexec/kubernetes ignore_errors: true # noqa ignore-errors tags: