Add advanced net check for DNS K8s app

* Add an option to deploy K8s app to test e2e network connectivity
  and cluster DNS resolve via Kubedns for nethost/simple pods
  (defaults to false).
* Parametrize existing k8s apps templates with kube_namespace and
  kube_config_dir instead of hardcode.
* For CoreOS, ensure nameservers from inventory to be put in the
  first place to allow hostnet pods connectivity via short names
  or FQDN and hostnet agents to pass as well, if netchecker
  deployed.

Signed-off-by: Bogdan Dobrelya <bdobrelia@mirantis.com>
pull/529/head
Bogdan Dobrelya 2016-09-30 17:23:47 +02:00
parent f6c32c3ea3
commit b7692fad09
16 changed files with 220 additions and 11 deletions

41
docs/netcheck.md 100644
View File

@ -0,0 +1,41 @@
Network Checker Application
===========================
With the ``deploy_netchecker`` var enabled (defaults to false), Kargo deploys a
Network Checker Application from the 3rd side `l23network/mcp-netchecker` docker
images. It consists of the server and agents trying to reach the server by usual
for Kubernetes applications network connectivity meanings. Therefore, this
automagically verifies a pod to pod connectivity via the cluster IP and checks
if DNS resolve is functioning as well.
The checks are run by agents on a periodic basis and cover standard and host network
pods as well. The history of performed checks may be found in the agents' application
logs.
To get the most recent and cluster-wide network connectivity report, run from
any of the cluster nodes:
```
curl http://localhost:31081/api/v1/connectivity_check
```
Note that Kargo does not invoke the check but only deploys the application, if
requested.
There are related application specifc variables:
```
netchecker_port: 31081
agent_report_interval: 15
netcheck_namespace: default
agent_img: "quay.io/l23network/mcp-netchecker-agent:v0.1"
server_img: "quay.io/l23network/mcp-netchecker-server:v0.1"
```
Note that the application verifies DNS resolve for FQDNs comprising only the
combination of the ``netcheck_namespace.dns_domain`` vars, for example the
``netchecker-service.default.cluster.local``. If you want to deploy the application
to the non default namespace, make sure as well to adjust the ``searchdomains`` var
so the resulting search domain records to contain that namespace, like:
```
search: foospace.cluster.local default.cluster.local ...
nameserver: ...
```

View File

@ -35,6 +35,8 @@ kube_users:
cluster_name: cluster.local
# Subdomains of DNS domain to be resolved via /etc/resolv.conf
ndots: 5
# Deploy netchecker app to verify DNS resolve as an HTTP service
deploy_netchecker: false
# For some environments, each node has a pubilcally accessible
# address and an address it should bind services to. These are

View File

@ -48,7 +48,20 @@
when: resolvconf.rc == 0
notify: Dnsmasq | update resolvconf
- name: Add search domains to resolv.conf
- name: Remove search and nameserver options from resolvconf cloud init temporary file
lineinfile:
dest: "{{resolvconffile}}"
state: absent
regexp: "^{{ item }}.*$"
backup: yes
follow: yes
with_items:
- search
- nameserver
when: ansible_os_family == "CoreOS"
notify: Dnsmasq | update resolvconf for CoreOS
- name: Add search domains to resolvconf file
lineinfile:
line: "search {{searchentries}}"
dest: "{{resolvconffile}}"
@ -66,7 +79,7 @@
nameserver {{ item }}
{% endfor %}
state: present
insertafter: "^search.*$"
insertafter: "^search default.svc.*$"
create: yes
backup: yes
follow: yes

View File

@ -58,6 +58,12 @@ hyperkube_image_repo: "quay.io/coreos/hyperkube"
hyperkube_image_tag: "{{ kube_version }}_coreos.0"
pod_infra_image_repo: "gcr.io/google_containers/pause-amd64"
pod_infra_image_tag: "{{ pod_infra_version }}"
netcheck_tag: v0.1
netcheck_kubectl_tag: v0.18.0-120-gaeb4ac55ad12b1-dirty
netcheck_agent_img_repo: "quay.io/l23network/mcp-netchecker-agent"
netcheck_server_img_repo: "quay.io/l23network/mcp-netchecker-server"
netcheck_kubectl_img_repo: "gcr.io/google_containers/kubectl"
nginx_image_repo: nginx
nginx_image_tag: 1.11.4-alpine
dnsmasq_version: 2.72
@ -73,6 +79,21 @@ test_image_repo: busybox
test_image_tag: latest
downloads:
netcheck_server:
container: true
repo: "{{ netcheck_server_img_repo }}"
tag: "{{ netcheck_tag }}"
enabled: "{{ deploy_netchecker|bool }}"
netcheck_agent:
container: true
repo: "{{ netcheck_agent_img_repo }}"
tag: "{{ netcheck_tag }}"
enabled: "{{ deploy_netchecker|bool }}"
netcheck_kubectl:
container: true
repo: "{{ netcheck_kubectl_img_repo }}"
tag: "{{ netcheck_kubectl_tag }}"
enabled: "{{ deploy_netchecker|bool }}"
weave:
dest: weave/bin/weave
version: "{{weave_version}}"

View File

@ -1,3 +1,6 @@
kube_config_dir: /etc/kubernetes
kube_namespace: kube-system
# Versions
kubedns_version: 1.7
kubednsmasq_version: 1.3
@ -13,5 +16,14 @@ exechealthz_image_tag: "{{ exechealthz_version }}"
calico_policy_image_repo: "calico/kube-policy-controller"
calico_policy_image_tag: latest
# Netchecker
deploy_netchecker: false
netchecker_port: 31081
agent_report_interval: 15
netcheck_namespace: default
agent_img: "quay.io/l23network/mcp-netchecker-agent:v0.1"
server_img: "quay.io/l23network/mcp-netchecker-server:v0.1"
kubectl_image: "gcr.io/google_containers/kubectl:v0.18.0-120-gaeb4ac55ad12b1-dirty"
# SSL
etcd_cert_dir: "/etc/ssl/etcd/ssl"

View File

@ -1,5 +1,5 @@
- name: Write calico-policy-controller yaml
template: src=calico-policy-controller.yml.j2 dest=/etc/kubernetes/calico-policy-controller.yml
template: src=calico-policy-controller.yml.j2 dest={{kube_config_dir}}/calico-policy-controller.yml
when: inventory_hostname == groups['kube-master'][0]
@ -7,7 +7,7 @@
kube:
name: "calico-policy-controller"
kubectl: "{{bin_dir}}/kubectl"
filename: "/etc/kubernetes/calico-policy-controller.yml"
namespace: "kube-system"
filename: "{{kube_config_dir}}/calico-policy-controller.yml"
namespace: "{{kube_namespace}}"
resource: "rs"
when: inventory_hostname == groups['kube-master'][0]

View File

@ -1,6 +1,6 @@
---
- name: Kubernetes Apps | Lay Down KubeDNS Template
template: src={{item.file}} dest=/etc/kubernetes/{{item.file}}
template: src={{item.file}} dest={{kube_config_dir}}/{{item.file}}
with_items:
- {file: kubedns-rc.yml, type: rc}
- {file: kubedns-svc.yml, type: svc}
@ -10,10 +10,10 @@
- name: Kubernetes Apps | Start Resources
kube:
name: kubedns
namespace: kube-system
namespace: "{{ kube_namespace }}"
kubectl: "{{bin_dir}}/kubectl"
resource: "{{item.item.type}}"
filename: /etc/kubernetes/{{item.item.file}}
filename: "{{kube_config_dir}}/{{item.item.file}}"
state: "{{item.changed | ternary('latest','present') }}"
with_items: "{{ manifests.results }}"
when: inventory_hostname == groups['kube-master'][0]
@ -21,3 +21,7 @@
- include: tasks/calico-policy-controller.yml
when: ( enable_network_policy is defined and enable_network_policy == True ) or
( kube_network_plugin == 'canal' )
- name: Kubernetes Apps | Netchecker
include: tasks/netchecker.yml
when: deploy_netchecker

View File

@ -0,0 +1,20 @@
- name: Kubernetes Apps | Lay Down Netchecker Template
template: src={{item.file}} dest={{kube_config_dir}}/{{item.file}}
with_items:
- {file: netchecker-agent-ds.yml, type: ds, name: netchecker-agent}
- {file: netchecker-agent-hostnet-ds.yml, type: ds, name: netchecker-agent-hostnet}
- {file: netchecker-server-pod.yml, type: po, name: netchecker-server}
- {file: netchecker-server-svc.yml, type: svc, name: netchecker-service}
register: manifests
when: inventory_hostname == groups['kube-master'][0]
- name: Kubernetes Apps | Start Netchecker Resources
kube:
name: "{{item.item.name}}"
namespace: "{{netcheck_namespace}}"
kubectl: "{{bin_dir}}/kubectl"
resource: "{{item.item.type}}"
filename: "{{kube_config_dir}}/{{item.item.file}}"
state: "{{item.changed | ternary('latest','present') }}"
with_items: "{{ manifests.results }}"
when: inventory_hostname == groups['kube-master'][0]

View File

@ -2,7 +2,7 @@ apiVersion: extensions/v1beta1
kind: ReplicaSet
metadata:
name: calico-policy-controller
namespace: kube-system
namespace: {{ kube_namespace }}
labels:
k8s-app: calico-policy
kubernetes.io/cluster-service: "true"

View File

@ -2,7 +2,7 @@ apiVersion: v1
kind: ReplicationController
metadata:
name: kubedns
namespace: kube-system
namespace: {{ kube_namespace }}
labels:
k8s-app: kubedns
version: v19

View File

@ -2,7 +2,7 @@ apiVersion: v1
kind: Service
metadata:
name: kubedns
namespace: kube-system
namespace: {{ kube_namespace }}
labels:
k8s-app: kubedns
kubernetes.io/cluster-service: "true"

View File

@ -0,0 +1,25 @@
apiVersion: extensions/v1beta1
kind: DaemonSet
metadata:
labels:
app: netchecker-agent
name: netchecker-agent
namespace: {{ netcheck_namespace }}
spec:
template:
metadata:
name: netchecker-agent
labels:
app: netchecker-agent
spec:
containers:
- name: netchecker-agent
image: "{{ agent_img }}"
env:
- name: MY_POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: REPORT_INTERVAL
value: '{{ agent_report_interval }}'
imagePullPolicy: {{ k8s_image_pull_policy }}

View File

@ -0,0 +1,26 @@
apiVersion: extensions/v1beta1
kind: DaemonSet
metadata:
labels:
app: netchecker-agent-hostnet
name: netchecker-agent-hostnet
namespace: {{ netcheck_namespace }}
spec:
template:
metadata:
name: netchecker-agent-hostnet
labels:
app: netchecker-agent-hostnet
spec:
hostNetwork: True
containers:
- name: netchecker-agent
image: "{{ agent_img }}"
env:
- name: MY_POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: REPORT_INTERVAL
value: '{{ agent_report_interval }}'
imagePullPolicy: {{ k8s_image_pull_policy }}

View File

@ -0,0 +1,21 @@
apiVersion: v1
kind: Pod
metadata:
name: netchecker-server
labels:
app: netchecker-server
namespace: {{ netcheck_namespace }}
spec:
containers:
- name: netchecker-server
image: "{{ server_img }}"
env:
imagePullPolicy: {{ k8s_image_pull_policy }}
ports:
- containerPort: 8081
hostPort: 8081
- name: kubectl-proxy
image: "{{ kubectl_image }}"
imagePullPolicy: {{ k8s_image_pull_policy }}
args:
- proxy

View File

@ -0,0 +1,15 @@
apiVersion: v1
kind: Service
metadata:
name: netchecker-service
namespace: {{ netcheck_namespace }}
spec:
selector:
app: netchecker-server
ports:
-
protocol: TCP
port: 8081
targetPort: 8081
nodePort: {{ netchecker_port }}
type: NodePort

View File

@ -9,6 +9,15 @@ dependencies:
file: "{{ downloads.nginx }}"
- role: download
file: "{{ downloads.testbox }}"
- role: download
file: "{{ downloads.netcheck_server }}"
when: deploy_netchecker
- role: download
file: "{{ downloads.netcheck_agent }}"
when: deploy_netchecker
- role: download
file: "{{ downloads.netcheck_kubectl }}"
when: deploy_netchecker
- role: download
file: "{{ downloads.kubednsmasq }}"
when: not skip_dnsmasq_k8s|default(false)