From 6dad31994ee21449ca0478ed29244418a633c88e Mon Sep 17 00:00:00 2001 From: "jin.gjm" Date: Fri, 17 Jun 2022 16:29:23 +0800 Subject: [PATCH] feat: add network-check addon --- example/config.yml | 4 + roles/cluster-addon/tasks/main.yml | 3 + roles/cluster-addon/tasks/network_check.yml | 21 + .../templates/network-check/namespace.yaml.j2 | 9 + .../network-check/network-check.yaml.j2 | 416 ++++++++++++++++++ 5 files changed, 453 insertions(+) create mode 100644 roles/cluster-addon/tasks/network_check.yml create mode 100644 roles/cluster-addon/templates/network-check/namespace.yaml.j2 create mode 100644 roles/cluster-addon/templates/network-check/network-check.yaml.j2 diff --git a/example/config.yml b/example/config.yml index b46dc07..35003cf 100644 --- a/example/config.yml +++ b/example/config.yml @@ -188,6 +188,10 @@ nfs_storage_class: "managed-nfs-storage" nfs_server: "192.168.1.10" nfs_path: "/data/nfs" +# network-check 自动安装 +network_check_enabled: "true" +network_check_schedule: "*/5 * * * *" + ############################ # role:harbor ############################ diff --git a/roles/cluster-addon/tasks/main.yml b/roles/cluster-addon/tasks/main.yml index 915c20e..025f31b 100644 --- a/roles/cluster-addon/tasks/main.yml +++ b/roles/cluster-addon/tasks/main.yml @@ -33,3 +33,6 @@ - import_tasks: cilium_connectivity_check.yml when: 'CLUSTER_NETWORK == "cilium" and cilium_connectivity_check|bool' + +- import_tasks: network_check.yml + when: 'network_check_enabled|bool and CLUSTER_NETWORK != "cilium"' diff --git a/roles/cluster-addon/tasks/network_check.yml b/roles/cluster-addon/tasks/network_check.yml new file mode 100644 index 0000000..1765e5f --- /dev/null +++ b/roles/cluster-addon/tasks/network_check.yml @@ -0,0 +1,21 @@ +- block: + - name: 准备 network-check 配置目录 + file: name={{ cluster_dir }}/yml/network-check state=directory + + - name: 准备部署文件 + template: src=network-check/{{ item }}.j2 dest={{ cluster_dir }}/yml/network-check/{{ item }} + with_items: + - "network-check.yaml" + - "namespace.yaml" + + - name: 创建测试namespace + shell: "{{ base_dir }}/bin/kubectl apply -f {{ cluster_dir }}/yml/network-check/namespace.yaml" + + - name: 创建完整测试network-check + shell: "{{ base_dir }}/bin/kubectl apply -n network-test -f {{ cluster_dir }}/yml/network-check/network-check.yaml" + + - debug: + msg: "[重要]: 请查看命名空间network-test下所有pod,如果均为Completed状态,且没有重启数增长,说明网络连接测试正常。 \ + 如果有Pending状态,部分测试需要多节点集群才能完成,如果希望禁用网络测试执行(kubectl delete ns network-test)" + run_once: true + connection: local diff --git a/roles/cluster-addon/templates/network-check/namespace.yaml.j2 b/roles/cluster-addon/templates/network-check/namespace.yaml.j2 new file mode 100644 index 0000000..8228b09 --- /dev/null +++ b/roles/cluster-addon/templates/network-check/namespace.yaml.j2 @@ -0,0 +1,9 @@ +apiVersion: v1 +kind: Namespace +metadata: + labels: + kubernetes.io/metadata.name: network-test + name: network-test +spec: + finalizers: + - kubernetes diff --git a/roles/cluster-addon/templates/network-check/network-check.yaml.j2 b/roles/cluster-addon/templates/network-check/network-check.yaml.j2 new file mode 100644 index 0000000..2942015 --- /dev/null +++ b/roles/cluster-addon/templates/network-check/network-check.yaml.j2 @@ -0,0 +1,416 @@ +# +--- +apiVersion: batch/v1 +kind: CronJob +metadata: + name: test01-pod-to-container + labels: + name: test01-pod-to-container + grp: ops-tasks + job: network-check +spec: + concurrencyPolicy: Forbid + failedJobsHistoryLimit: 1 + schedule: "{{ network_check_schedule }}" + successfulJobsHistoryLimit: 1 + jobTemplate: + spec: + template: + spec: + containers: + - name: test01-pod-to-container + image: quay.io/cilium/alpine-curl:v1.3.0 + imagePullPolicy: IfNotPresent + command: ["sh","-c","curl -sS --fail --connect-timeout 5 echo-server:8080/public"] + restartPolicy: OnFailure + +--- +apiVersion: batch/v1 +kind: CronJob +metadata: + name: test02-pod-to-node-nodeport + labels: + name: test02-pod-to-node-nodeport + grp: ops-tasks + job: network-check +spec: + concurrencyPolicy: Forbid + failedJobsHistoryLimit: 1 + schedule: "{{ network_check_schedule }}" + successfulJobsHistoryLimit: 1 + jobTemplate: + spec: + template: + spec: + containers: + - name: test02-pod-to-node-nodeport + image: quay.io/cilium/alpine-curl:v1.3.0 + imagePullPolicy: IfNotPresent + command: ["sh","-c","curl -sS --fail --connect-timeout 5 echo-server-host-headless:31314/public"] + restartPolicy: OnFailure + +--- +apiVersion: batch/v1 +kind: CronJob +metadata: + name: test03-pod-to-multi-node-clusterip + labels: + name: test03-pod-to-multi-node-clusterip + grp: ops-tasks + job: network-check +spec: + concurrencyPolicy: Forbid + failedJobsHistoryLimit: 1 + schedule: "{{ network_check_schedule }}" + successfulJobsHistoryLimit: 1 + jobTemplate: + spec: + template: + spec: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: name + operator: In + values: + - echo-server + topologyKey: kubernetes.io/hostname + containers: + - name: test03-pod-to-multi-node-clusterip + image: quay.io/cilium/alpine-curl:v1.3.0 + imagePullPolicy: IfNotPresent + command: ["sh","-c","curl -sS --fail --connect-timeout 5 echo-server:8080/public"] + restartPolicy: OnFailure + +--- +apiVersion: batch/v1 +kind: CronJob +metadata: + name: test04-pod-to-multi-node-headless + labels: + name: test04-pod-to-multi-node-headless + grp: ops-tasks + job: network-check +spec: + concurrencyPolicy: Forbid + failedJobsHistoryLimit: 1 + schedule: "{{ network_check_schedule }}" + successfulJobsHistoryLimit: 1 + jobTemplate: + spec: + template: + spec: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: name + operator: In + values: + - echo-server + topologyKey: kubernetes.io/hostname + containers: + - name: test04-pod-to-multi-node-headless + image: quay.io/cilium/alpine-curl:v1.3.0 + imagePullPolicy: IfNotPresent + command: ["sh","-c","curl -sS --fail --connect-timeout 5 echo-server-headless:8080/public"] + restartPolicy: OnFailure + +--- +apiVersion: batch/v1 +kind: CronJob +metadata: + name: test05-pod-to-multi-node-nodeport + labels: + name: test05-pod-to-multi-node-nodeport + grp: ops-tasks + job: network-check +spec: + concurrencyPolicy: Forbid + failedJobsHistoryLimit: 1 + schedule: "{{ network_check_schedule }}" + successfulJobsHistoryLimit: 1 + jobTemplate: + spec: + template: + spec: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: name + operator: In + values: + - echo-server + topologyKey: kubernetes.io/hostname + containers: + - name: test05-pod-to-multi-node-nodeport + image: quay.io/cilium/alpine-curl:v1.3.0 + imagePullPolicy: IfNotPresent + command: ["sh","-c","curl -sS --fail --connect-timeout 5 echo-server-host-headless:31314/public"] + restartPolicy: OnFailure + +--- +apiVersion: batch/v1 +kind: CronJob +metadata: + name: test06-pod-to-external-1111 + labels: + name: test06-pod-to-external-1111 + grp: ops-tasks + job: network-check +spec: + concurrencyPolicy: Forbid + failedJobsHistoryLimit: 1 + schedule: "{{ network_check_schedule }}" + successfulJobsHistoryLimit: 1 + jobTemplate: + spec: + template: + spec: + containers: + - name: test06-pod-to-external-1111 + image: quay.io/cilium/alpine-curl:v1.3.0 + imagePullPolicy: IfNotPresent + command: ["sh","-c","curl -sS --fail --connect-timeout 5 1.1.1.1"] + restartPolicy: OnFailure + +--- +apiVersion: batch/v1 +kind: CronJob +metadata: + name: test07-pod-to-external-fqdn-baidu + labels: + name: test07-pod-to-external-fqdn-baidu + grp: ops-tasks + job: network-check +spec: + concurrencyPolicy: Forbid + failedJobsHistoryLimit: 1 + schedule: "{{ network_check_schedule }}" + successfulJobsHistoryLimit: 1 + jobTemplate: + spec: + template: + spec: + containers: + - name: test07-pod-to-external-fqdn-baidu + image: quay.io/cilium/alpine-curl:v1.3.0 + imagePullPolicy: IfNotPresent + command: ["sh","-c","curl -sS --fail --connect-timeout 5 www.baidu.com"] + restartPolicy: OnFailure + +--- +apiVersion: batch/v1 +kind: CronJob +metadata: + name: test08-host-to-multi-node-clusterip + labels: + name: test08-host-to-multi-node-clusterip + grp: ops-tasks + job: network-check +spec: + concurrencyPolicy: Forbid + failedJobsHistoryLimit: 1 + schedule: "{{ network_check_schedule }}" + successfulJobsHistoryLimit: 1 + jobTemplate: + spec: + template: + spec: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: name + operator: In + values: + - echo-server + topologyKey: kubernetes.io/hostname + dnsPolicy: ClusterFirstWithHostNet + hostNetwork: true + containers: + - name: test08-host-to-multi-node-clusterip + image: quay.io/cilium/alpine-curl:v1.3.0 + imagePullPolicy: IfNotPresent + command: ["sh","-c","curl -sS --fail --connect-timeout 5 echo-server:8080/public"] + restartPolicy: OnFailure + +--- +apiVersion: batch/v1 +kind: CronJob +metadata: + name: test09-host-to-multi-node-headless + labels: + name: test09-host-to-multi-node-headless + grp: ops-tasks + job: network-check +spec: + concurrencyPolicy: Forbid + failedJobsHistoryLimit: 1 + schedule: "{{ network_check_schedule }}" + successfulJobsHistoryLimit: 1 + jobTemplate: + spec: + template: + spec: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: name + operator: In + values: + - echo-server + topologyKey: kubernetes.io/hostname + dnsPolicy: ClusterFirstWithHostNet + hostNetwork: true + containers: + - name: test09-host-to-multi-node-headless + image: quay.io/cilium/alpine-curl:v1.3.0 + imagePullPolicy: IfNotPresent + command: ["sh","-c","curl -sS --fail --connect-timeout 5 echo-server-headless:8080/public"] + restartPolicy: OnFailure + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: echo-server + labels: + name: echo-server + grp: ops-tasks + job: network-check +spec: + selector: + matchLabels: + name: echo-server + replicas: 1 + template: + metadata: + labels: + name: echo-server + spec: + hostNetwork: false + containers: + - name: echo-server-container + env: + - name: PORT + value: "8080" + ports: + - containerPort: 8080 + image: quay.io/cilium/json-mock:v1.3.0 + imagePullPolicy: IfNotPresent + readinessProbe: + timeoutSeconds: 7 + exec: + command: ["sh","-c","curl -sS --fail --connect-timeout 5 -o /dev/null localhost:8080"] + livenessProbe: + timeoutSeconds: 7 + exec: + command: ["sh","-c","curl -sS --fail --connect-timeout 5 -o /dev/null localhost:8080"] + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: echo-server-host + labels: + name: echo-server-host + grp: ops-tasks + job: network-check +spec: + selector: + matchLabels: + name: echo-server-host + replicas: 1 + template: + metadata: + labels: + name: echo-server-host + spec: + hostNetwork: true + affinity: + podAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: name + operator: In + values: + - echo-server + topologyKey: kubernetes.io/hostname + containers: + - name: echo-server-host-container + env: + - name: PORT + value: "40001" + ports: [] + image: quay.io/cilium/json-mock:v1.3.0 + imagePullPolicy: IfNotPresent + readinessProbe: + timeoutSeconds: 7 + exec: + command: ["sh","-c","curl -sS --fail --connect-timeout 5 -o /dev/null localhost:40001"] + livenessProbe: + timeoutSeconds: 7 + exec: + command: ["sh","-c","curl -sS --fail --connect-timeout 5 -o /dev/null localhost:40001"] + +--- +apiVersion: v1 +kind: Service +metadata: + name: echo-server + labels: + name: echo-server + grp: ops-tasks + job: network-check +spec: + ports: + - name: http + port: 8080 + nodePort: 31314 + type: NodePort + selector: + name: echo-server + +--- +apiVersion: v1 +kind: Service +metadata: + name: echo-server-headless + labels: + name: echo-server-headless + grp: ops-tasks + job: network-check +spec: + ports: + - name: http + port: 8080 + type: ClusterIP + selector: + name: echo-server + clusterIP: None + +--- +apiVersion: v1 +kind: Service +metadata: + name: echo-server-host-headless + labels: + name: echo-server-host-headless + grp: ops-tasks + job: network-check +spec: + ports: [] + type: ClusterIP + selector: + name: echo-server-host + clusterIP: None