diff --git a/docs/netcheck.md b/docs/netcheck.md new file mode 100644 index 000000000..408b0fd8c --- /dev/null +++ b/docs/netcheck.md @@ -0,0 +1,41 @@ +Network Checker Application +=========================== + +With the ``deploy_netchecker`` var enabled (defaults to false), Kargo deploys a +Network Checker Application from the 3rd side `l23network/mcp-netchecker` docker +images. It consists of the server and agents trying to reach the server by usual +for Kubernetes applications network connectivity meanings. Therefore, this +automagically verifies a pod to pod connectivity via the cluster IP and checks +if DNS resolve is functioning as well. + +The checks are run by agents on a periodic basis and cover standard and host network +pods as well. The history of performed checks may be found in the agents' application +logs. + +To get the most recent and cluster-wide network connectivity report, run from +any of the cluster nodes: +``` +curl http://localhost:31081/api/v1/connectivity_check +``` +Note that Kargo does not invoke the check but only deploys the application, if +requested. + +There are related application specifc variables: +``` +netchecker_port: 31081 +agent_report_interval: 15 +netcheck_namespace: default +agent_img: "quay.io/l23network/mcp-netchecker-agent:v0.1" +server_img: "quay.io/l23network/mcp-netchecker-server:v0.1" +``` + +Note that the application verifies DNS resolve for FQDNs comprising only the +combination of the ``netcheck_namespace.dns_domain`` vars, for example the +``netchecker-service.default.cluster.local``. If you want to deploy the application +to the non default namespace, make sure as well to adjust the ``searchdomains`` var +so the resulting search domain records to contain that namespace, like: + +``` +search: foospace.cluster.local default.cluster.local ... +nameserver: ... +``` diff --git a/inventory/group_vars/all.yml b/inventory/group_vars/all.yml index 49abb1d03..daf641335 100644 --- a/inventory/group_vars/all.yml +++ b/inventory/group_vars/all.yml @@ -35,6 +35,8 @@ kube_users: cluster_name: cluster.local # Subdomains of DNS domain to be resolved via /etc/resolv.conf ndots: 5 +# Deploy netchecker app to verify DNS resolve as an HTTP service +deploy_netchecker: false # For some environments, each node has a pubilcally accessible # address and an address it should bind services to. These are diff --git a/roles/dnsmasq/tasks/resolvconf.yml b/roles/dnsmasq/tasks/resolvconf.yml index 9be70c7a5..ba367ac48 100644 --- a/roles/dnsmasq/tasks/resolvconf.yml +++ b/roles/dnsmasq/tasks/resolvconf.yml @@ -48,7 +48,20 @@ when: resolvconf.rc == 0 notify: Dnsmasq | update resolvconf -- name: Add search domains to resolv.conf +- name: Remove search and nameserver options from resolvconf cloud init temporary file + lineinfile: + dest: "{{resolvconffile}}" + state: absent + regexp: "^{{ item }}.*$" + backup: yes + follow: yes + with_items: + - search + - nameserver + when: ansible_os_family == "CoreOS" + notify: Dnsmasq | update resolvconf for CoreOS + +- name: Add search domains to resolvconf file lineinfile: line: "search {{searchentries}}" dest: "{{resolvconffile}}" @@ -66,7 +79,7 @@ nameserver {{ item }} {% endfor %} state: present - insertafter: "^search.*$" + insertafter: "^search default.svc.*$" create: yes backup: yes follow: yes diff --git a/roles/download/defaults/main.yml b/roles/download/defaults/main.yml index 0f47a097e..057e6bc48 100644 --- a/roles/download/defaults/main.yml +++ b/roles/download/defaults/main.yml @@ -58,6 +58,12 @@ hyperkube_image_repo: "quay.io/coreos/hyperkube" hyperkube_image_tag: "{{ kube_version }}_coreos.0" pod_infra_image_repo: "gcr.io/google_containers/pause-amd64" pod_infra_image_tag: "{{ pod_infra_version }}" +netcheck_tag: v0.1 +netcheck_kubectl_tag: v0.18.0-120-gaeb4ac55ad12b1-dirty +netcheck_agent_img_repo: "quay.io/l23network/mcp-netchecker-agent" +netcheck_server_img_repo: "quay.io/l23network/mcp-netchecker-server" +netcheck_kubectl_img_repo: "gcr.io/google_containers/kubectl" + nginx_image_repo: nginx nginx_image_tag: 1.11.4-alpine dnsmasq_version: 2.72 @@ -73,6 +79,21 @@ test_image_repo: busybox test_image_tag: latest downloads: + netcheck_server: + container: true + repo: "{{ netcheck_server_img_repo }}" + tag: "{{ netcheck_tag }}" + enabled: "{{ deploy_netchecker|bool }}" + netcheck_agent: + container: true + repo: "{{ netcheck_agent_img_repo }}" + tag: "{{ netcheck_tag }}" + enabled: "{{ deploy_netchecker|bool }}" + netcheck_kubectl: + container: true + repo: "{{ netcheck_kubectl_img_repo }}" + tag: "{{ netcheck_kubectl_tag }}" + enabled: "{{ deploy_netchecker|bool }}" weave: dest: weave/bin/weave version: "{{weave_version}}" diff --git a/roles/kubernetes-apps/ansible/defaults/main.yml b/roles/kubernetes-apps/ansible/defaults/main.yml index b84d46520..33c6f38a7 100644 --- a/roles/kubernetes-apps/ansible/defaults/main.yml +++ b/roles/kubernetes-apps/ansible/defaults/main.yml @@ -1,3 +1,6 @@ +kube_config_dir: /etc/kubernetes +kube_namespace: kube-system + # Versions kubedns_version: 1.7 kubednsmasq_version: 1.3 @@ -20,5 +23,14 @@ exechealthz_image_tag: "{{ exechealthz_version }}" calico_policy_image_repo: "calico/kube-policy-controller" calico_policy_image_tag: latest +# Netchecker +deploy_netchecker: false +netchecker_port: 31081 +agent_report_interval: 15 +netcheck_namespace: default +agent_img: "quay.io/l23network/mcp-netchecker-agent:v0.1" +server_img: "quay.io/l23network/mcp-netchecker-server:v0.1" +kubectl_image: "gcr.io/google_containers/kubectl:v0.18.0-120-gaeb4ac55ad12b1-dirty" + # SSL etcd_cert_dir: "/etc/ssl/etcd/ssl" diff --git a/roles/kubernetes-apps/ansible/tasks/calico-policy-controller.yml b/roles/kubernetes-apps/ansible/tasks/calico-policy-controller.yml index 6ad8dd220..02a49f211 100644 --- a/roles/kubernetes-apps/ansible/tasks/calico-policy-controller.yml +++ b/roles/kubernetes-apps/ansible/tasks/calico-policy-controller.yml @@ -1,5 +1,5 @@ - name: Write calico-policy-controller yaml - template: src=calico-policy-controller.yml.j2 dest=/etc/kubernetes/calico-policy-controller.yml + template: src=calico-policy-controller.yml.j2 dest={{kube_config_dir}}/calico-policy-controller.yml when: inventory_hostname == groups['kube-master'][0] @@ -7,7 +7,7 @@ kube: name: "calico-policy-controller" kubectl: "{{bin_dir}}/kubectl" - filename: "/etc/kubernetes/calico-policy-controller.yml" - namespace: "kube-system" + filename: "{{kube_config_dir}}/calico-policy-controller.yml" + namespace: "{{kube_namespace}}" resource: "rs" when: inventory_hostname == groups['kube-master'][0] diff --git a/roles/kubernetes-apps/ansible/tasks/main.yaml b/roles/kubernetes-apps/ansible/tasks/main.yaml index 130a17a6f..a65b6b527 100644 --- a/roles/kubernetes-apps/ansible/tasks/main.yaml +++ b/roles/kubernetes-apps/ansible/tasks/main.yaml @@ -1,6 +1,6 @@ --- - name: Kubernetes Apps | Lay Down KubeDNS Template - template: src={{item.file}} dest=/etc/kubernetes/{{item.file}} + template: src={{item.file}} dest={{kube_config_dir}}/{{item.file}} with_items: - {file: kubedns-rc.yml, type: rc} - {file: kubedns-svc.yml, type: svc} @@ -10,10 +10,10 @@ - name: Kubernetes Apps | Start Resources kube: name: kubedns - namespace: kube-system + namespace: "{{ kube_namespace }}" kubectl: "{{bin_dir}}/kubectl" resource: "{{item.item.type}}" - filename: /etc/kubernetes/{{item.item.file}} + filename: "{{kube_config_dir}}/{{item.item.file}}" state: "{{item.changed | ternary('latest','present') }}" with_items: "{{ manifests.results }}" when: inventory_hostname == groups['kube-master'][0] @@ -21,3 +21,7 @@ - include: tasks/calico-policy-controller.yml when: ( enable_network_policy is defined and enable_network_policy == True ) or ( kube_network_plugin == 'canal' ) + +- name: Kubernetes Apps | Netchecker + include: tasks/netchecker.yml + when: deploy_netchecker diff --git a/roles/kubernetes-apps/ansible/tasks/netchecker.yml b/roles/kubernetes-apps/ansible/tasks/netchecker.yml new file mode 100644 index 000000000..c28d921b6 --- /dev/null +++ b/roles/kubernetes-apps/ansible/tasks/netchecker.yml @@ -0,0 +1,20 @@ +- name: Kubernetes Apps | Lay Down Netchecker Template + template: src={{item.file}} dest={{kube_config_dir}}/{{item.file}} + with_items: + - {file: netchecker-agent-ds.yml, type: ds, name: netchecker-agent} + - {file: netchecker-agent-hostnet-ds.yml, type: ds, name: netchecker-agent-hostnet} + - {file: netchecker-server-pod.yml, type: po, name: netchecker-server} + - {file: netchecker-server-svc.yml, type: svc, name: netchecker-service} + register: manifests + when: inventory_hostname == groups['kube-master'][0] + +- name: Kubernetes Apps | Start Netchecker Resources + kube: + name: "{{item.item.name}}" + namespace: "{{netcheck_namespace}}" + kubectl: "{{bin_dir}}/kubectl" + resource: "{{item.item.type}}" + filename: "{{kube_config_dir}}/{{item.item.file}}" + state: "{{item.changed | ternary('latest','present') }}" + with_items: "{{ manifests.results }}" + when: inventory_hostname == groups['kube-master'][0] diff --git a/roles/kubernetes-apps/ansible/templates/calico-policy-controller.yml.j2 b/roles/kubernetes-apps/ansible/templates/calico-policy-controller.yml.j2 index 469060278..a522c80ad 100644 --- a/roles/kubernetes-apps/ansible/templates/calico-policy-controller.yml.j2 +++ b/roles/kubernetes-apps/ansible/templates/calico-policy-controller.yml.j2 @@ -2,7 +2,7 @@ apiVersion: extensions/v1beta1 kind: ReplicaSet metadata: name: calico-policy-controller - namespace: kube-system + namespace: {{ kube_namespace }} labels: k8s-app: calico-policy kubernetes.io/cluster-service: "true" diff --git a/roles/kubernetes-apps/ansible/templates/kubedns-rc.yml b/roles/kubernetes-apps/ansible/templates/kubedns-rc.yml index dc5ab5b34..0fe4d2f58 100644 --- a/roles/kubernetes-apps/ansible/templates/kubedns-rc.yml +++ b/roles/kubernetes-apps/ansible/templates/kubedns-rc.yml @@ -2,7 +2,7 @@ apiVersion: v1 kind: ReplicationController metadata: name: kubedns - namespace: kube-system + namespace: {{ kube_namespace }} labels: k8s-app: kubedns version: v19 diff --git a/roles/kubernetes-apps/ansible/templates/kubedns-svc.yml b/roles/kubernetes-apps/ansible/templates/kubedns-svc.yml index 2e21bc9e6..7f88d0666 100644 --- a/roles/kubernetes-apps/ansible/templates/kubedns-svc.yml +++ b/roles/kubernetes-apps/ansible/templates/kubedns-svc.yml @@ -2,7 +2,7 @@ apiVersion: v1 kind: Service metadata: name: kubedns - namespace: kube-system + namespace: {{ kube_namespace }} labels: k8s-app: kubedns kubernetes.io/cluster-service: "true" diff --git a/roles/kubernetes-apps/ansible/templates/netchecker-agent-ds.yml b/roles/kubernetes-apps/ansible/templates/netchecker-agent-ds.yml new file mode 100644 index 000000000..a52329e50 --- /dev/null +++ b/roles/kubernetes-apps/ansible/templates/netchecker-agent-ds.yml @@ -0,0 +1,25 @@ +apiVersion: extensions/v1beta1 +kind: DaemonSet +metadata: + labels: + app: netchecker-agent + name: netchecker-agent + namespace: {{ netcheck_namespace }} +spec: + template: + metadata: + name: netchecker-agent + labels: + app: netchecker-agent + spec: + containers: + - name: netchecker-agent + image: "{{ agent_img }}" + env: + - name: MY_POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: REPORT_INTERVAL + value: '{{ agent_report_interval }}' + imagePullPolicy: {{ k8s_image_pull_policy }} diff --git a/roles/kubernetes-apps/ansible/templates/netchecker-agent-hostnet-ds.yml b/roles/kubernetes-apps/ansible/templates/netchecker-agent-hostnet-ds.yml new file mode 100644 index 000000000..4fd03e80a --- /dev/null +++ b/roles/kubernetes-apps/ansible/templates/netchecker-agent-hostnet-ds.yml @@ -0,0 +1,26 @@ +apiVersion: extensions/v1beta1 +kind: DaemonSet +metadata: + labels: + app: netchecker-agent-hostnet + name: netchecker-agent-hostnet + namespace: {{ netcheck_namespace }} +spec: + template: + metadata: + name: netchecker-agent-hostnet + labels: + app: netchecker-agent-hostnet + spec: + hostNetwork: True + containers: + - name: netchecker-agent + image: "{{ agent_img }}" + env: + - name: MY_POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: REPORT_INTERVAL + value: '{{ agent_report_interval }}' + imagePullPolicy: {{ k8s_image_pull_policy }} diff --git a/roles/kubernetes-apps/ansible/templates/netchecker-server-pod.yml b/roles/kubernetes-apps/ansible/templates/netchecker-server-pod.yml new file mode 100644 index 000000000..6f242bc51 --- /dev/null +++ b/roles/kubernetes-apps/ansible/templates/netchecker-server-pod.yml @@ -0,0 +1,21 @@ +apiVersion: v1 +kind: Pod +metadata: + name: netchecker-server + labels: + app: netchecker-server + namespace: {{ netcheck_namespace }} +spec: + containers: + - name: netchecker-server + image: "{{ server_img }}" + env: + imagePullPolicy: {{ k8s_image_pull_policy }} + ports: + - containerPort: 8081 + hostPort: 8081 + - name: kubectl-proxy + image: "{{ kubectl_image }}" + imagePullPolicy: {{ k8s_image_pull_policy }} + args: + - proxy diff --git a/roles/kubernetes-apps/ansible/templates/netchecker-server-svc.yml b/roles/kubernetes-apps/ansible/templates/netchecker-server-svc.yml new file mode 100644 index 000000000..dc3894676 --- /dev/null +++ b/roles/kubernetes-apps/ansible/templates/netchecker-server-svc.yml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + name: netchecker-service + namespace: {{ netcheck_namespace }} +spec: + selector: + app: netchecker-server + ports: + - + protocol: TCP + port: 8081 + targetPort: 8081 + nodePort: {{ netchecker_port }} + type: NodePort diff --git a/roles/kubernetes/node/meta/main.yml b/roles/kubernetes/node/meta/main.yml index 3e1dd5b3e..a65501113 100644 --- a/roles/kubernetes/node/meta/main.yml +++ b/roles/kubernetes/node/meta/main.yml @@ -9,6 +9,15 @@ dependencies: file: "{{ downloads.nginx }}" - role: download file: "{{ downloads.testbox }}" + - role: download + file: "{{ downloads.netcheck_server }}" + when: deploy_netchecker + - role: download + file: "{{ downloads.netcheck_agent }}" + when: deploy_netchecker + - role: download + file: "{{ downloads.netcheck_kubectl }}" + when: deploy_netchecker - role: download file: "{{ downloads.kubednsmasq }}" when: not skip_dnsmasq_k8s|default(false)