From 57356f6b0db80e2ae1f11de889c2927fcc5263e3 Mon Sep 17 00:00:00 2001 From: MengxinLiu Date: Tue, 22 Dec 2020 11:06:26 +0800 Subject: [PATCH] feat: upgrade kube-ovn to 1.5.3 --- roles/clean/tasks/clean_node.yml | 4 + roles/kube-ovn/defaults/main.yml | 2 +- roles/kube-ovn/templates/crd.yaml.j2 | 251 +++++++++++++++----- roles/kube-ovn/templates/kube-ovn.yaml.j2 | 269 ++++++++++++++-------- roles/kube-ovn/templates/kubectl-ko.j2 | 150 ++++++++++-- roles/kube-ovn/templates/ovn.yaml.j2 | 181 +++++++++++---- 6 files changed, 646 insertions(+), 211 deletions(-) diff --git a/roles/clean/tasks/clean_node.yml b/roles/clean/tasks/clean_node.yml index 6352ef9..62f2e15 100644 --- a/roles/clean/tasks/clean_node.yml +++ b/roles/clean/tasks/clean_node.yml @@ -104,6 +104,10 @@ - "/etc/origin/openvswitch/" - "/etc/openvswitch/" - "/var/log/openvswitch/" + - "/var/run/ovn/" + - "/etc/origin/ovn/" + - "/etc/ovn/" + - "/var/log/ovn/" # - name: cleanup networks1 # shell: "ip link del tunl0; \ diff --git a/roles/kube-ovn/defaults/main.yml b/roles/kube-ovn/defaults/main.yml index c2d5333..f134a5d 100644 --- a/roles/kube-ovn/defaults/main.yml +++ b/roles/kube-ovn/defaults/main.yml @@ -7,4 +7,4 @@ kube_ovn_node_switch_cidr: 100.64.0.0/16 kube_ovn_enable_mirror: true # 离线镜像tar包 -kube_ovn_offline: "kube_ovn_0.9.1.tar" +kube_ovn_offline: "kube_ovn_1.5.3.tar" diff --git a/roles/kube-ovn/templates/crd.yaml.j2 b/roles/kube-ovn/templates/crd.yaml.j2 index efee4f0..9d43022 100644 --- a/roles/kube-ovn/templates/crd.yaml.j2 +++ b/roles/kube-ovn/templates/crd.yaml.j2 @@ -1,10 +1,59 @@ -apiVersion: apiextensions.k8s.io/v1beta1 +apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: name: ips.kubeovn.io spec: group: kubeovn.io - version: v1 + versions: + - name: v1 + served: true + storage: true + additionalPrinterColumns: + - name: IP + type: string + jsonPath: .spec.ipAddress + - name: Mac + type: string + jsonPath: .spec.macAddress + - name: Node + type: string + jsonPath: .spec.nodeName + - name: Subnet + type: string + jsonPath: .spec.subnet + schema: + openAPIV3Schema: + type: object + properties: + spec: + type: object + properties: + podName: + type: string + namespace: + type: string + subnet: + type: string + attachSubnets: + type: array + items: + type: string + nodeName: + type: string + ipAddress: + type: string + attachIps: + type: array + items: + type: string + macAddress: + type: string + attachMacs: + type: array + items: + type: string + containerID: + type: string scope: Cluster names: plural: ips @@ -12,27 +61,116 @@ spec: kind: IP shortNames: - ip - additionalPrinterColumns: - - name: IP - type: string - JSONPath: .spec.ipAddress - - name: Mac - type: string - JSONPath: .spec.macAddress - - name: Node - type: string - JSONPath: .spec.nodeName - - name: Subnet - type: string - JSONPath: .spec.subnet --- -apiVersion: apiextensions.k8s.io/v1beta1 +apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: name: subnets.kubeovn.io spec: group: kubeovn.io - version: v1 + versions: + - name: v1 + served: true + storage: true + subresources: + status: {} + additionalPrinterColumns: + - name: Provider + type: string + jsonPath: .spec.provider + - name: Protocol + type: string + jsonPath: .spec.protocol + - name: CIDR + type: string + jsonPath: .spec.cidrBlock + - name: Private + type: boolean + jsonPath: .spec.private + - name: NAT + type: boolean + jsonPath: .spec.natOutgoing + - name: Default + type: boolean + jsonPath: .spec.default + - name: GatewayType + type: string + jsonPath: .spec.gatewayType + - name: Used + type: number + jsonPath: .status.usingIPs + - name: Available + type: number + jsonPath: .status.availableIPs + schema: + openAPIV3Schema: + type: object + properties: + status: + type: object + properties: + availableIPs: + type: number + usingIPs: + type: number + activateGateway: + type: string + conditions: + type: array + items: + type: object + properties: + type: + type: string + status: + type: string + reason: + type: string + message: + type: string + lastUpdateTime: + type: string + lastTransitionTime: + type: string + spec: + type: object + properties: + default: + type: boolean + protocol: + type: string + cidrBlock: + type: string + namespaces: + type: array + items: + type: string + gateway: + type: string + provider: + type: string + excludeIps: + type: array + items: + type: string + gatewayType: + type: string + allowSubnets: + type: array + items: + type: string + gatewayNode: + type: string + natOutgoing: + type: boolean + private: + type: boolean + vlan: + type: string + underlayGateway: + type: boolean + disableInterConnection: + type: boolean scope: Cluster names: plural: subnets @@ -40,40 +178,47 @@ spec: kind: Subnet shortNames: - subnet - subresources: - status: {} - additionalPrinterColumns: - - name: Protocol - type: string - JSONPath: .spec.protocol - - name: CIDR - type: string - JSONPath: .spec.cidrBlock - - name: Private - type: boolean - JSONPath: .spec.private - - name: NAT - type: boolean - JSONPath: .spec.natOutgoing - - name: Default - type: boolean - JSONPath: .spec.default - - name: GatewayType - type: string - JSONPath: .spec.gatewayType - - name: Used - type: integer - JSONPath: .status.usingIPs - - name: Available - type: integer - JSONPath: .status.availableIPs - validation: - openAPIV3Schema: - properties: - spec: - required: ["cidrBlock"] + +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: vlans.kubeovn.io +spec: + group: kubeovn.io + versions: + - name: v1 + served: true + storage: true + schema: + openAPIV3Schema: + type: object properties: - cidrBlock: - type: "string" - gateway: - type: "string" \ No newline at end of file + spec: + type: object + properties: + vlanId: + type: integer + providerInterfaceName: + type: string + logicalInterfaceName: + type: string + subnet: + type: string + additionalPrinterColumns: + - name: VlanID + type: string + jsonPath: .spec.vlanId + - name: ProviderInterfaceName + type: string + jsonPath: .spec.providerInterfaceName + - name: Subnet + type: string + jsonPath: .spec.subnet + scope: Cluster + names: + plural: vlans + singular: vlan + kind: Vlan + shortNames: + - vlan diff --git a/roles/kube-ovn/templates/kube-ovn.yaml.j2 b/roles/kube-ovn/templates/kube-ovn.yaml.j2 index aab0b5d..dfa0b45 100644 --- a/roles/kube-ovn/templates/kube-ovn.yaml.j2 +++ b/roles/kube-ovn/templates/kube-ovn.yaml.j2 @@ -3,7 +3,7 @@ kind: Deployment apiVersion: apps/v1 metadata: name: kube-ovn-controller - namespace: kube-ovn + namespace: kube-system annotations: kubernetes.io/description: | kube-ovn controller @@ -25,8 +25,7 @@ spec: type: infra spec: tolerations: - - operator: Exists - effect: NoSchedule + - operator: Exists affinity: podAntiAffinity: requiredDuringSchedulingIgnoredDuringExecution: @@ -34,19 +33,25 @@ spec: matchLabels: app: kube-ovn-controller topologyKey: kubernetes.io/hostname + priorityClassName: system-cluster-critical serviceAccountName: ovn hostNetwork: true containers: - name: kube-ovn-controller - image: "index.alauda.cn/alaudak8s/kube-ovn-controller:v0.9.1" + image: "kubeovn/kube-ovn:v1.5.3" imagePullPolicy: IfNotPresent command: - - /kube-ovn/start-controller.sh + - /kube-ovn/start-controller.sh args: - - --default-cidr={{ kube_ovn_default_cidr }} - - --default-gateway={{ kube_ovn_default_gateway }} - - --node-switch-cidr={{ kube_ovn_node_switch_cidr }} + - --default-cidr={{ kube_ovn_default_cidr }} + - --default-gateway={{ kube_ovn_default_gateway }} + - --node-switch-cidr={{ kube_ovn_node_switch_cidr }} + - --network-type=geneve + - --default-interface-name= + - --default-vlan-id=100 env: + - name: ENABLE_SSL + value: "false" - name: POD_NAME valueFrom: fieldRef: @@ -59,35 +64,46 @@ spec: valueFrom: fieldRef: fieldPath: spec.nodeName + volumeMounts: + - mountPath: /var/run/tls + name: kube-ovn-tls readinessProbe: exec: command: - - nc - - -z - - -w3 - - 127.0.0.1 - - "10660" + - sh + - /kube-ovn/kube-ovn-controller-healthcheck.sh periodSeconds: 3 + timeoutSeconds: 45 livenessProbe: exec: command: - - nc - - -z - - -w3 - - 127.0.0.1 - - "10660" - initialDelaySeconds: 30 + - sh + - /kube-ovn/kube-ovn-controller-healthcheck.sh + initialDelaySeconds: 300 periodSeconds: 7 failureThreshold: 5 + timeoutSeconds: 45 + resources: + requests: + cpu: 200m + memory: 200Mi + limits: + cpu: 1000m + memory: 1Gi nodeSelector: - beta.kubernetes.io/os: "linux" + kubernetes.io/os: "linux" + volumes: + - name: kube-ovn-tls + secret: + optional: true + secretName: kube-ovn-tls --- kind: DaemonSet apiVersion: apps/v1 metadata: name: kube-ovn-cni - namespace: kube-ovn + namespace: kube-system annotations: kubernetes.io/description: | This daemon set launches the kube-ovn cni daemon. @@ -95,8 +111,6 @@ spec: selector: matchLabels: app: kube-ovn-cni - updateStrategy: - type: OnDelete template: metadata: labels: @@ -105,88 +119,115 @@ spec: type: infra spec: tolerations: - - operator: Exists - effect: NoSchedule + - operator: Exists + priorityClassName: system-cluster-critical serviceAccountName: ovn hostNetwork: true hostPID: true initContainers: - - name: install-cni - image: "index.alauda.cn/alaudak8s/kube-ovn-cni:v0.9.1" - imagePullPolicy: IfNotPresent - command: ["/kube-ovn/install-cni.sh"] - securityContext: - runAsUser: 0 - privileged: true - volumeMounts: - - mountPath: /etc/cni/net.d - name: cni-conf - - mountPath: /opt/cni/bin - name: cni-bin + - name: install-cni + image: "kubeovn/kube-ovn:v1.5.3" + imagePullPolicy: IfNotPresent + command: ["/kube-ovn/install-cni.sh"] + securityContext: + runAsUser: 0 + privileged: true + volumeMounts: + - mountPath: /opt/cni/bin + name: cni-bin containers: - - name: cni-server - image: "index.alauda.cn/alaudak8s/kube-ovn-cni:v0.9.1" - imagePullPolicy: IfNotPresent - command: - - sh - - /kube-ovn/start-cniserver.sh - args: - - --enable-mirror={{ kube_ovn_enable_mirror|string|lower }} - - --service-cluster-ip-range={{ SERVICE_CIDR }} - securityContext: - runAsUser: 0 - privileged: true - env: - - name: POD_IP - valueFrom: - fieldRef: - fieldPath: status.podIP - - name: KUBE_NODE_NAME - valueFrom: - fieldRef: - fieldPath: spec.nodeName - volumeMounts: - - mountPath: /run/openvswitch - name: host-run-ovs - readinessProbe: - exec: - command: - - nc - - -z - - -w3 - - 127.0.0.1 - - "10665" - periodSeconds: 3 - livenessProbe: - exec: - command: - - nc - - -z - - -w3 - - 127.0.0.1 - - "10665" - initialDelaySeconds: 30 - periodSeconds: 7 - failureThreshold: 5 + - name: cni-server + image: "kubeovn/kube-ovn:v1.5.3" + imagePullPolicy: IfNotPresent + command: + - sh + - /kube-ovn/start-cniserver.sh + args: + - --enable-mirror={{ kube_ovn_enable_mirror|string|lower }} + - --service-cluster-ip-range={{ SERVICE_CIDR }} + - --encap-checksum=true + - --iface= + - --network-type=geneve + - --default-interface-name= + securityContext: + runAsUser: 0 + privileged: true + env: + - name: ENABLE_SSL + value: "false" + - name: POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + - name: KUBE_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + volumeMounts: + - mountPath: /etc/cni/net.d + name: cni-conf + - mountPath: /run/openvswitch + name: host-run-ovs + - mountPath: /run/ovn + name: host-run-ovn + - mountPath: /var/run/netns + name: host-ns + mountPropagation: HostToContainer + readinessProbe: + exec: + command: + - nc + - -z + - -w3 + - 127.0.0.1 + - "10665" + periodSeconds: 3 + livenessProbe: + exec: + command: + - nc + - -z + - -w3 + - 127.0.0.1 + - "10665" + initialDelaySeconds: 30 + periodSeconds: 7 + failureThreshold: 5 + resources: + requests: + cpu: 200m + memory: 200Mi + limits: + cpu: 1000m + memory: 1Gi nodeSelector: - beta.kubernetes.io/os: "linux" + kubernetes.io/os: "linux" volumes: - name: host-run-ovs hostPath: path: /run/openvswitch + - name: host-run-ovn + hostPath: + path: /run/ovn - name: cni-conf hostPath: path: /etc/cni/net.d - name: cni-bin hostPath: path: {{ bin_dir }} + - name: host-ns + hostPath: + path: /var/run/netns --- kind: DaemonSet apiVersion: apps/v1 metadata: name: kube-ovn-pinger - namespace: kube-ovn + namespace: kube-system + annotations: + kubernetes.io/description: | + This daemon set launches the openvswitch daemon. spec: selector: matchLabels: @@ -202,17 +243,19 @@ spec: spec: tolerations: - operator: Exists - effect: NoSchedule serviceAccountName: ovn hostPID: true containers: - name: pinger - image: "index.alauda.cn/alaudak8s/kube-ovn-pinger:v0.9.1" + image: "kubeovn/kube-ovn:v1.5.3" + command: ["/kube-ovn/kube-ovn-pinger", "--external-address=114.114.114.114", "--external-dns=alauda.cn"] imagePullPolicy: IfNotPresent securityContext: runAsUser: 0 privileged: false env: + - name: ENABLE_SSL + value: "false" - name: POD_IP valueFrom: fieldRef: @@ -237,22 +280,28 @@ spec: name: host-run-ovs - mountPath: /var/run/openvswitch name: host-run-ovs + - mountPath: /var/run/ovn + name: host-run-ovn - mountPath: /sys name: host-sys readOnly: true - mountPath: /etc/openvswitch name: host-config-openvswitch - mountPath: /var/log/openvswitch - name: host-log + name: host-log-ovs + - mountPath: /var/log/ovn + name: host-log-ovn + - mountPath: /var/run/tls + name: kube-ovn-tls resources: requests: cpu: 100m - memory: 300Mi + memory: 200Mi limits: cpu: 200m memory: 400Mi nodeSelector: - beta.kubernetes.io/os: "linux" + kubernetes.io/os: "linux" volumes: - name: host-modules hostPath: @@ -260,21 +309,31 @@ spec: - name: host-run-ovs hostPath: path: /run/openvswitch + - name: host-run-ovn + hostPath: + path: /run/ovn - name: host-sys hostPath: path: /sys - name: host-config-openvswitch hostPath: path: /etc/origin/openvswitch - - name: host-log + - name: host-log-ovs hostPath: path: /var/log/openvswitch + - name: host-log-ovn + hostPath: + path: /var/log/ovn + - name: kube-ovn-tls + secret: + optional: true + secretName: kube-ovn-tls --- kind: Service apiVersion: v1 metadata: name: kube-ovn-pinger - namespace: kube-ovn + namespace: kube-system labels: app: kube-ovn-pinger spec: @@ -282,4 +341,32 @@ spec: app: kube-ovn-pinger ports: - port: 8080 - name: http + name: metrics +--- +kind: Service +apiVersion: v1 +metadata: + name: kube-ovn-controller + namespace: kube-system + labels: + app: kube-ovn-controller +spec: + selector: + app: kube-ovn-controller + ports: + - port: 10660 + name: metrics +--- +kind: Service +apiVersion: v1 +metadata: + name: kube-ovn-cni + namespace: kube-system + labels: + app: kube-ovn-cni +spec: + selector: + app: kube-ovn-cni + ports: + - port: 10665 + name: metrics diff --git a/roles/kube-ovn/templates/kubectl-ko.j2 b/roles/kube-ovn/templates/kubectl-ko.j2 index 054943a..b4d999a 100644 --- a/roles/kube-ovn/templates/kubectl-ko.j2 +++ b/roles/kube-ovn/templates/kubectl-ko.j2 @@ -1,14 +1,16 @@ #!/bin/bash set -euo pipefail -KUBE_OVN_NS=kube-ovn -CENTRAL_POD= +KUBE_OVN_NS=kube-system +OVN_NB_POD= +OVN_SB_POD= showHelp(){ echo "kubectl ko {subcommand} [option...]" echo "Available Subcommands:" echo " nbctl [ovn-nbctl options ...] invoke ovn-nbctl" echo " sbctl [ovn-sbctl options ...] invoke ovn-sbctl" + echo " vsctl {nodeName} [ovs-vsctl options ...] invoke ovs-vsctl on selected node" echo " tcpdump {namespace/podname} [tcpdump options ...] capture pod traffic" echo " trace {namespace/podname} {target ip address} {icmp|tcp|udp} [target tcp or udp port] trace ovn microflow of specific packet" echo " diagnose {all|node} [nodename] diagnose connectivity of all nodes or a specific node" @@ -24,7 +26,6 @@ tcpdump(){ hostNetwork=$(kubectl get pod "$podName" -o jsonpath={.spec.hostNetwork}) else nodeName=$(kubectl get pod "$podName" -n "$namespace" -o jsonpath={.spec.nodeName}) - mac=$(kubectl get pod "$podName" -n "$namespace" -o jsonpath={.metadata.annotations.ovn\\.kubernetes\\.io/mac_address}) hostNetwork=$(kubectl get pod "$podName" -n "$namespace" -o jsonpath={.spec.hostNetwork}) fi @@ -33,11 +34,6 @@ tcpdump(){ exit 1 fi - if [ -z "$mac" ] && [ "$hostNetwork" != "true" ]; then - echo "pod mac address not ready" - exit 1 - fi - ovnCni=$(kubectl get pod -n $KUBE_OVN_NS -o wide| grep kube-ovn-cni| grep " $nodeName " | awk '{print $1}') if [ -z "$ovnCni" ]; then echo "kube-ovn-cni not exist on node $nodeName" @@ -48,7 +44,7 @@ tcpdump(){ set -x kubectl exec -it "$ovnCni" -n $KUBE_OVN_NS -- tcpdump -nn "$@" else - nicName=$(kubectl exec -it "$ovnCni" -n $KUBE_OVN_NS -- ovs-vsctl --data=bare --no-heading --columns=name find interface mac_in_use="${mac//:/\\:}" | tr -d '\r') + nicName=$(kubectl exec -it "$ovnCni" -n $KUBE_OVN_NS -- ovs-vsctl --data=bare --no-heading --columns=name find interface external-ids:iface-id="$podName"."$namespace" | tr -d '\r') if [ -z "$nicName" ]; then echo "nic doesn't exist on node $nodeName" exit 1 @@ -71,6 +67,7 @@ trace(){ mac=$(kubectl get pod "$podName" -n "$namespace" -o jsonpath={.metadata.annotations.ovn\\.kubernetes\\.io/mac_address}) ls=$(kubectl get pod "$podName" -n "$namespace" -o jsonpath={.metadata.annotations.ovn\\.kubernetes\\.io/logical_switch}) hostNetwork=$(kubectl get pod "$podName" -n "$namespace" -o jsonpath={.spec.hostNetwork}) + nodeName=$(kubectl get pod "$podName" -n "$namespace" -o jsonpath={.spec.nodeName}) if [ "$hostNetwork" = "true" ]; then echo "Can not trace host network pod" @@ -82,7 +79,7 @@ trace(){ exit 1 fi - gwMac=$(kubectl exec -it $CENTRAL_POD -n $KUBE_OVN_NS -- ovn-nbctl --data=bare --no-heading --columns=mac find logical_router_port name=ovn-cluster-"$ls" | tr -d '\r') + gwMac=$(kubectl exec -it $OVN_NB_POD -n $KUBE_OVN_NS -- ovn-nbctl --data=bare --no-heading --columns=mac find logical_router_port name=ovn-cluster-"$ls" | tr -d '\r') if [ -z "$gwMac" ]; then echo "get gw mac failed" @@ -100,39 +97,111 @@ trace(){ case $type in icmp) set -x - kubectl exec "$CENTRAL_POD" -n $KUBE_OVN_NS -- ovn-trace --ct=new "$ls" "inport == \"$podName.$namespace\" && ip.ttl == 64 && icmp && eth.src == $mac && ip4.src == $podIP && eth.dst == $gwMac && ip4.dst == $dst" + kubectl exec "$OVN_SB_POD" -n $KUBE_OVN_NS -- ovn-trace --ct=new "$ls" "inport == \"$podName.$namespace\" && ip.ttl == 64 && icmp && eth.src == $mac && ip4.src == $podIP && eth.dst == $gwMac && ip4.dst == $dst" ;; tcp|udp) set -x - kubectl exec "$CENTRAL_POD" -n $KUBE_OVN_NS -- ovn-trace --ct=new "$ls" "inport == \"$podName.$namespace\" && ip.ttl == 64 && eth.src == $mac && ip4.src == $podIP && eth.dst == $gwMac && ip4.dst == $dst && $type.src == 10000 && $type.dst == $4" + kubectl exec "$OVN_SB_POD" -n $KUBE_OVN_NS -- ovn-trace --ct=new "$ls" "inport == \"$podName.$namespace\" && ip.ttl == 64 && eth.src == $mac && ip4.src == $podIP && eth.dst == $gwMac && ip4.dst == $dst && $type.src == 10000 && $type.dst == $4" ;; *) echo "type $type not supported" echo "kubectl ko trace {namespace/podname} {target ip address} {icmp|tcp|udp} [target tcp or udp port]" + exit 1 + ;; + esac + + set +x + echo "--------" + echo "Start OVS Tracing" + echo "" + echo "" + + ovsPod=$(kubectl get pod -n $KUBE_OVN_NS -o wide | grep " $nodeName " | grep ovs-ovn | awk '{print $1}') + if [ -z "$ovsPod" ]; then + echo "ovs pod doesn't exist on node $nodeName" + exit 1 + fi + + inPort=$(kubectl exec "$ovsPod" -n $KUBE_OVN_NS -- ovs-vsctl --format=csv --data=bare --no-heading --columns=ofport find interface external_id:iface-id="$podName"."$namespace") + case $type in + icmp) + set -x + kubectl exec "$ovsPod" -n $KUBE_OVN_NS -- ovs-appctl ofproto/trace br-int in_port="$inPort",icmp,nw_src="$podIP",nw_dst="$dst",dl_src="$mac",dl_dst="$gwMac" + ;; + tcp|udp) + set -x + kubectl exec "$ovsPod" -n $KUBE_OVN_NS -- ovs-appctl ofproto/trace br-int in_port="$inPort","$type",nw_src="$podIP",nw_dst="$dst",dl_src="$mac",dl_dst="$gwMac","$type"_src=1000,"$type"_dst="$4" + ;; + *) + echo "type $type not supported" + echo "kubectl ko trace {namespace/podname} {target ip address} {icmp|tcp|udp} [target tcp or udp port]" + exit 1 ;; esac } +vsctl(){ + nodeName="$1"; shift + kubectl get no "$nodeName" > /dev/null + ovsPod=$(kubectl get pod -n $KUBE_OVN_NS -o wide | grep " $nodeName " | grep ovs-ovn | awk '{print $1}') + if [ -z "$ovsPod" ]; then + echo "ovs pod doesn't exist on node $nodeName" + exit 1 + fi + kubectl exec "$ovsPod" -n $KUBE_OVN_NS -- ovs-vsctl "$@" +} + diagnose(){ + kubectl get crd subnets.kubeovn.io + kubectl get crd ips.kubeovn.io + kubectl get svc kube-dns -n kube-system + kubectl get svc kubernetes -n default + + kubectl get no -o wide + kubectl ko nbctl show + kubectl ko sbctl show + + checkDaemonSet kube-proxy + checkDeployment ovn-central + checkDeployment kube-ovn-controller + checkDaemonSet kube-ovn-cni + checkDaemonSet ovs-ovn + checkDeployment coredns type="$1" case $type in all) + echo "### kube-ovn-controller recent log" + set +e + kubectl logs -n $KUBE_OVN_NS -l app=kube-ovn-controller --tail=100 | grep E$(date +%m%d) + set -e + echo "" pingers=$(kubectl get pod -n $KUBE_OVN_NS | grep kube-ovn-pinger | awk '{print $1}') for pinger in $pingers do nodeName=$(kubectl get pod "$pinger" -n "$KUBE_OVN_NS" -o jsonpath={.spec.nodeName}) echo "### start to diagnose node $nodeName" + echo "#### ovn-controller log:" + kubectl exec -n $KUBE_OVN_NS -it "$pinger" -- tail /var/log/ovn/ovn-controller.log + echo "" + echo "#### ovs-vsctl show results:" + kubectl exec -n $KUBE_OVN_NS -it "$pinger" -- ovs-vsctl show + echo "" + echo "#### pinger diagnose results:" kubectl exec -n $KUBE_OVN_NS -it "$pinger" -- /kube-ovn/kube-ovn-pinger --mode=job echo "### finish diagnose node $nodeName" echo "" done ;; node) - node="$2" - pinger=$(kubectl get pod -n $KUBE_OVN_NS -o wide | grep kube-ovn-pinger | grep " $node " | awk '{print $1}') - echo "### start to diagnose node $node" + nodeName="$2" + kubectl get no "$nodeName" > /dev/null + pinger=$(kubectl get pod -n $KUBE_OVN_NS -o wide | grep kube-ovn-pinger | grep " $nodeName " | awk '{print $1}') + echo "### start to diagnose node nodeName" + echo "#### ovn-controller log:" + kubectl exec -n $KUBE_OVN_NS -it "$pinger" -- tail /var/log/ovn/ovn-controller.log + echo "" kubectl exec -n $KUBE_OVN_NS -it "$pinger" -- /kube-ovn/kube-ovn-pinger --mode=job - echo "### finish diagnose node $node" + echo "### finish diagnose node nodeName" echo "" ;; *) @@ -143,12 +212,46 @@ diagnose(){ } getOvnCentralPod(){ - centralPod=$(kubectl get pod -n $KUBE_OVN_NS | grep ovn-central | head -n 1 | awk '{print $1}') - if [ -z "$centralPod" ]; then - echo "ovn-central not exists" + NB_POD=$(kubectl get pod -n $KUBE_OVN_NS -l ovn-nb-leader=true | grep ovn-central | head -n 1 | awk '{print $1}') + if [ -z "$NB_POD" ]; then + echo "nb leader not exists" exit 1 fi - CENTRAL_POD=$centralPod + OVN_NB_POD=$NB_POD + SB_POD=$(kubectl get pod -n $KUBE_OVN_NS -l ovn-sb-leader=true | grep ovn-central | head -n 1 | awk '{print $1}') + if [ -z "$SB_POD" ]; then + echo "nb leader not exists" + exit 1 + fi + OVN_SB_POD=$SB_POD +} + +checkDaemonSet(){ + name="$1" + currentScheduled=$(kubectl get ds -n $KUBE_OVN_NS "$name" -o jsonpath={.status.currentNumberScheduled}) + desiredScheduled=$(kubectl get ds -n $KUBE_OVN_NS "$name" -o jsonpath={.status.desiredNumberScheduled}) + available=$(kubectl get ds -n $KUBE_OVN_NS "$name" -o jsonpath={.status.numberAvailable}) + ready=$(kubectl get ds -n $KUBE_OVN_NS "$name" -o jsonpath={.status.numberReady}) + if [ "$currentScheduled" = "$desiredScheduled" ] && [ "$desiredScheduled" = "$available" ] && [ "$available" = "$ready" ]; then + echo "ds $name ready" + else + echo "Error ds $name not ready" + exit 1 + fi +} + +checkDeployment(){ + name="$1" + ready=$(kubectl get deployment -n $KUBE_OVN_NS "$name" -o jsonpath={.status.readyReplicas}) + updated=$(kubectl get deployment -n $KUBE_OVN_NS "$name" -o jsonpath={.status.updatedReplicas}) + desire=$(kubectl get deployment -n $KUBE_OVN_NS "$name" -o jsonpath={.status.replicas}) + available=$(kubectl get deployment -n $KUBE_OVN_NS "$name" -o jsonpath={.status.availableReplicas}) + if [ "$ready" = "$updated" ] && [ "$updated" = "$desire" ] && [ "$desire" = "$available" ]; then + echo "deployment $name ready" + else + echo "Error deployment $name not ready" + exit 1 + fi } if [ $# -lt 1 ]; then @@ -162,10 +265,13 @@ getOvnCentralPod case $subcommand in nbctl) - kubectl exec "$CENTRAL_POD" -n $KUBE_OVN_NS -- ovn-nbctl "$@" + kubectl exec "$OVN_NB_POD" -n $KUBE_OVN_NS -- ovn-nbctl "$@" ;; sbctl) - kubectl exec "$CENTRAL_POD" -n $KUBE_OVN_NS -- ovn-sbctl "$@" + kubectl exec "$OVN_SB_POD" -n $KUBE_OVN_NS -- ovn-sbctl "$@" + ;; + vsctl) + vsctl "$@" ;; tcpdump) tcpdump "$@" diff --git a/roles/kube-ovn/templates/ovn.yaml.j2 b/roles/kube-ovn/templates/ovn.yaml.j2 index e5c6c5f..a1ddcbb 100644 --- a/roles/kube-ovn/templates/ovn.yaml.j2 +++ b/roles/kube-ovn/templates/ovn.yaml.j2 @@ -1,22 +1,44 @@ -apiVersion: v1 -kind: Namespace +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy metadata: name: kube-ovn + annotations: + seccomp.security.alpha.kubernetes.io/allowedProfileNames: '*' +spec: + privileged: true + allowPrivilegeEscalation: true + allowedCapabilities: + - '*' + volumes: + - '*' + hostNetwork: true + hostPorts: + - min: 0 + max: 65535 + hostIPC: true + hostPID: true + runAsUser: + rule: 'RunAsAny' + seLinux: + rule: 'RunAsAny' + supplementalGroups: + rule: 'RunAsAny' + fsGroup: + rule: 'RunAsAny' --- + apiVersion: v1 kind: ConfigMap metadata: name: ovn-config - namespace: kube-ovn - + namespace: kube-system --- apiVersion: v1 kind: ServiceAccount metadata: name: ovn - namespace: kube-ovn - + namespace: kube-system --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole @@ -25,12 +47,19 @@ metadata: rbac.authorization.k8s.io/system-only: "true" name: system:ovn rules: + - apiGroups: ['policy'] + resources: ['podsecuritypolicies'] + verbs: ['use'] + resourceNames: + - kube-ovn - apiGroups: - "kubeovn.io" resources: - subnets - subnets/status - ips + - vlans + - networks verbs: - "*" - apiGroups: @@ -51,12 +80,14 @@ rules: - "" - networking.k8s.io - apps + - extensions resources: - networkpolicies - services - endpoints - statefulsets - daemonsets + - deployments verbs: - get - list @@ -69,7 +100,6 @@ rules: - create - patch - update - --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding @@ -82,14 +112,13 @@ roleRef: subjects: - kind: ServiceAccount name: ovn - namespace: kube-ovn - + namespace: kube-system --- kind: Service apiVersion: v1 metadata: name: ovn-nb - namespace: kube-ovn + namespace: kube-system spec: ports: - name: ovn-nb @@ -99,14 +128,14 @@ spec: type: ClusterIP selector: app: ovn-central + ovn-nb-leader: "true" sessionAffinity: None - --- kind: Service apiVersion: v1 metadata: name: ovn-sb - namespace: kube-ovn + namespace: kube-system spec: ports: - name: ovn-sb @@ -116,14 +145,14 @@ spec: type: ClusterIP selector: app: ovn-central + ovn-sb-leader: "true" sessionAffinity: None - --- kind: Deployment apiVersion: apps/v1 metadata: name: ovn-central - namespace: kube-ovn + namespace: kube-system annotations: kubernetes.io/description: | OVN components: northd, nb and sb. @@ -145,8 +174,7 @@ spec: type: infra spec: tolerations: - - operator: Exists - effect: NoSchedule + - operator: Exists affinity: podAntiAffinity: requiredDuringSchedulingIgnoredDuringExecution: @@ -154,73 +182,108 @@ spec: matchLabels: app: ovn-central topologyKey: kubernetes.io/hostname + priorityClassName: system-cluster-critical serviceAccountName: ovn hostNetwork: true containers: - name: ovn-central - image: "index.alauda.cn/alaudak8s/kube-ovn-db:v0.9.1" + image: "kubeovn/kube-ovn:v1.5.3" imagePullPolicy: IfNotPresent + command: ["/kube-ovn/start-db.sh"] + securityContext: + capabilities: + add: ["SYS_NICE"] env: + - name: ENABLE_SSL + value: "false" - name: POD_IP valueFrom: fieldRef: fieldPath: status.podIP + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace resources: requests: - cpu: 200m - memory: 300Mi + cpu: 500m + memory: 200Mi limits: - cpu: 400m - memory: 800Mi + cpu: 3 + memory: 3Gi volumeMounts: - - mountPath: /run/openvswitch - name: host-run-ovs - mountPath: /var/run/openvswitch name: host-run-ovs + - mountPath: /var/run/ovn + name: host-run-ovn - mountPath: /sys name: host-sys readOnly: true - mountPath: /etc/openvswitch name: host-config-openvswitch + - mountPath: /etc/ovn + name: host-config-ovn - mountPath: /var/log/openvswitch - name: host-log + name: host-log-ovs + - mountPath: /var/log/ovn + name: host-log-ovn + - mountPath: /var/run/tls + name: kube-ovn-tls readinessProbe: exec: command: - sh - - /root/ovn-is-leader.sh + - /kube-ovn/ovn-is-leader.sh periodSeconds: 3 + timeoutSeconds: 45 livenessProbe: exec: command: - - sh - - /root/ovn-healthcheck.sh + - sh + - /kube-ovn/ovn-healthcheck.sh initialDelaySeconds: 30 periodSeconds: 7 failureThreshold: 5 + timeoutSeconds: 45 nodeSelector: - beta.kubernetes.io/os: "linux" + kubernetes.io/os: "linux" kube-ovn/role: "master" volumes: - name: host-run-ovs hostPath: path: /run/openvswitch + - name: host-run-ovn + hostPath: + path: /run/ovn - name: host-sys hostPath: path: /sys - name: host-config-openvswitch hostPath: path: /etc/origin/openvswitch - - name: host-log + - name: host-config-ovn + hostPath: + path: /etc/origin/ovn + - name: host-log-ovs hostPath: path: /var/log/openvswitch - + - name: host-log-ovn + hostPath: + path: /var/log/ovn + - name: kube-ovn-tls + secret: + optional: true + secretName: kube-ovn-tls --- kind: DaemonSet apiVersion: apps/v1 metadata: name: ovs-ovn - namespace: kube-ovn + namespace: kube-system annotations: kubernetes.io/description: | This daemon set launches the openvswitch daemon. @@ -238,61 +301,78 @@ spec: type: infra spec: tolerations: - - operator: Exists - effect: NoSchedule + - operator: Exists + priorityClassName: system-cluster-critical serviceAccountName: ovn hostNetwork: true hostPID: true containers: - name: openvswitch - image: "index.alauda.cn/alaudak8s/kube-ovn-node:v0.9.1" + image: "kubeovn/kube-ovn:v1.5.3" imagePullPolicy: IfNotPresent + command: ["/kube-ovn/start-ovs.sh"] securityContext: runAsUser: 0 privileged: true env: + - name: ENABLE_SSL + value: "false" - name: POD_IP valueFrom: fieldRef: fieldPath: status.podIP + - name: HW_OFFLOAD + value: "false" + - name: KUBE_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName volumeMounts: - mountPath: /lib/modules name: host-modules readOnly: true - - mountPath: /run/openvswitch - name: host-run-ovs - mountPath: /var/run/openvswitch name: host-run-ovs + - mountPath: /var/run/ovn + name: host-run-ovn - mountPath: /sys name: host-sys readOnly: true - mountPath: /etc/openvswitch name: host-config-openvswitch + - mountPath: /etc/ovn + name: host-config-ovn - mountPath: /var/log/openvswitch - name: host-log + name: host-log-ovs + - mountPath: /var/log/ovn + name: host-log-ovn + - mountPath: /var/run/tls + name: kube-ovn-tls readinessProbe: exec: command: - - sh - - /root/ovs-healthcheck.sh + - sh + - /kube-ovn/ovs-healthcheck.sh periodSeconds: 5 + timeoutSeconds: 45 livenessProbe: exec: command: - - sh - - /root/ovs-healthcheck.sh + - sh + - /kube-ovn/ovs-healthcheck.sh initialDelaySeconds: 10 periodSeconds: 5 failureThreshold: 5 + timeoutSeconds: 45 resources: requests: cpu: 200m - memory: 300Mi + memory: 200Mi limits: cpu: 1000m memory: 800Mi nodeSelector: - beta.kubernetes.io/os: "linux" + kubernetes.io/os: "linux" volumes: - name: host-modules hostPath: @@ -300,12 +380,25 @@ spec: - name: host-run-ovs hostPath: path: /run/openvswitch + - name: host-run-ovn + hostPath: + path: /run/ovn - name: host-sys hostPath: path: /sys - name: host-config-openvswitch hostPath: path: /etc/origin/openvswitch - - name: host-log + - name: host-config-ovn + hostPath: + path: /etc/origin/ovn + - name: host-log-ovs hostPath: path: /var/log/openvswitch + - name: host-log-ovn + hostPath: + path: /var/log/ovn + - name: kube-ovn-tls + secret: + optional: true + secretName: kube-ovn-tls