From c402feffbd9d8038f69ea9d56c823db51d18e5ca Mon Sep 17 00:00:00 2001 From: Chad Swenson Date: Fri, 14 Oct 2016 16:46:44 -0500 Subject: [PATCH 01/35] Parameterize several dependency endpoints so that they can be overridden with internal mirrors. Signed-off-by: Chad Swenson --- roles/dnsmasq/defaults/main.yml | 7 +++++++ roles/dnsmasq/templates/dnsmasq-ds.yml | 2 +- roles/download/defaults/main.yml | 7 +++++++ roles/kubernetes-apps/ansible/defaults/main.yml | 12 ++++++++++++ .../kubernetes-apps/ansible/templates/kubedns-rc.yml | 6 +++--- roles/kubernetes/node/meta/main.yml | 2 ++ roles/kubernetes/node/templates/kubelet.j2 | 6 +++--- roles/kubernetes/preinstall/defaults/main.yml | 2 ++ roles/kubernetes/preinstall/tasks/main.yml | 2 +- 9 files changed, 38 insertions(+), 8 deletions(-) create mode 100644 roles/kubernetes-apps/ansible/defaults/main.yml diff --git a/roles/dnsmasq/defaults/main.yml b/roles/dnsmasq/defaults/main.yml index 48b52c121..7a1e77023 100644 --- a/roles/dnsmasq/defaults/main.yml +++ b/roles/dnsmasq/defaults/main.yml @@ -10,3 +10,10 @@ # Max of 2 is allowed here (a 1 is reserved for the dns_server) #nameservers: # - 127.0.0.1 + +# Versions +dnsmasq_version: 2.72 + +# Images +dnsmasq_image_repo: "andyshinn/dnsmasq" +dnsmasq_image_tag: "{{ dnsmasq_version }}" \ No newline at end of file diff --git a/roles/dnsmasq/templates/dnsmasq-ds.yml b/roles/dnsmasq/templates/dnsmasq-ds.yml index f1f622bbd..49223124e 100644 --- a/roles/dnsmasq/templates/dnsmasq-ds.yml +++ b/roles/dnsmasq/templates/dnsmasq-ds.yml @@ -14,7 +14,7 @@ spec: spec: containers: - name: dnsmasq - image: andyshinn/dnsmasq:2.72 + image: "{{ dnsmasq_image_repo }}:{{ dnsmasq_image_tag }}" command: - dnsmasq args: diff --git a/roles/download/defaults/main.yml b/roles/download/defaults/main.yml index 1ef7b45df..56b21fe06 100644 --- a/roles/download/defaults/main.yml +++ b/roles/download/defaults/main.yml @@ -15,6 +15,7 @@ calico_cni_version: v1.4.2 weave_version: v1.6.1 flannel_version: v0.6.2 flannel_server_helper_version: 0.1 +pod_infra_version: 3.0 # Download URL's etcd_download_url: "https://storage.googleapis.com/kargo/{{etcd_version}}_etcd" @@ -43,6 +44,8 @@ calico_node_image_repo: "calico/node" calico_node_image_tag: "{{ calico_version }}" hyperkube_image_repo: "quay.io/coreos/hyperkube" hyperkube_image_tag: "{{ kube_version }}_coreos.0" +pod_infra_image_repo: "gcr.io/google_containers/pause-amd64" +pod_infra_image_tag: "{{ pod_infra_version }}" downloads: calico_cni_plugin: @@ -108,6 +111,10 @@ downloads: repo: "{{ calico_node_image_repo }}" tag: "{{ calico_node_image_tag }}" enabled: "{{ kube_network_plugin == 'calico' }}" + pod_infra: + container: true + repo: "{{ pod_infra_image_repo }}" + tag: "{{ pod_infra_image_tag }}" download: container: "{{ file.container|default('false') }}" diff --git a/roles/kubernetes-apps/ansible/defaults/main.yml b/roles/kubernetes-apps/ansible/defaults/main.yml new file mode 100644 index 000000000..b1086aa0d --- /dev/null +++ b/roles/kubernetes-apps/ansible/defaults/main.yml @@ -0,0 +1,12 @@ +# Versions +kubedns_version: 1.7 +kubednsmasq_version: 1.3 +exechealthz_version: 1.1 + +# Images +kubedns_image_repo: "gcr.io/google_containers/kubedns-amd64" +kubedns_image_tag: "{{ kubedns_version }}" +kubednsmasq_image_repo: "gcr.io/google_containers/kube-dnsmasq-amd64" +kubednsmasq_image_tag: "{{ kubednsmasq_version }}" +exechealthz_image_repo: "gcr.io/google_containers/exechealthz-amd64" +exechealthz_image_tag: "{{ exechealthz_version }}" \ No newline at end of file diff --git a/roles/kubernetes-apps/ansible/templates/kubedns-rc.yml b/roles/kubernetes-apps/ansible/templates/kubedns-rc.yml index 3d193d1dc..ed38d671d 100644 --- a/roles/kubernetes-apps/ansible/templates/kubedns-rc.yml +++ b/roles/kubernetes-apps/ansible/templates/kubedns-rc.yml @@ -21,7 +21,7 @@ spec: spec: containers: - name: kubedns - image: gcr.io/google_containers/kubedns-amd64:1.7 + image: "{{ kubedns_image_repo }}:{{ kubedns_image_tag }}" resources: # TODO: Set memory limits when we've profiled the container for large # clusters, then set request = limit to keep this container in @@ -63,7 +63,7 @@ spec: name: dns-tcp-local protocol: TCP - name: dnsmasq - image: gcr.io/google_containers/kube-dnsmasq-amd64:1.3 + image: "{{ kubednsmasq_image_repo }}:{{ kubednsmasq_image_tag }}" args: - --log-facility=- - --cache-size=1000 @@ -77,7 +77,7 @@ spec: name: dns-tcp protocol: TCP - name: healthz - image: gcr.io/google_containers/exechealthz-amd64:1.1 + image: "{{ exechealthz_image_repo }}:{{ exechealthz_image_tag }}" resources: # keep request = limit to keep this container in guaranteed class limits: diff --git a/roles/kubernetes/node/meta/main.yml b/roles/kubernetes/node/meta/main.yml index b9cbbd9ff..9c52b2d80 100644 --- a/roles/kubernetes/node/meta/main.yml +++ b/roles/kubernetes/node/meta/main.yml @@ -2,4 +2,6 @@ dependencies: - role: download file: "{{ downloads.hyperkube }}" + - role: download + file: "{{ downloads.pod_infra }}" - role: kubernetes/secrets diff --git a/roles/kubernetes/node/templates/kubelet.j2 b/roles/kubernetes/node/templates/kubelet.j2 index f55feefa9..53f2915d9 100644 --- a/roles/kubernetes/node/templates/kubelet.j2 +++ b/roles/kubernetes/node/templates/kubelet.j2 @@ -20,11 +20,11 @@ KUBELET_REGISTER_NODE="--register-node=false" {% endif %} # location of the api-server {% if dns_setup|bool and skip_dnsmasq|bool %} -KUBELET_ARGS="--cluster_dns={{ skydns_server }} --cluster_domain={{ dns_domain }} --kubeconfig={{ kube_config_dir}}/node-kubeconfig.yaml --config={{ kube_manifest_dir }} --resolv-conf={{ kube_resolv_conf }}" +KUBELET_ARGS="--cluster_dns={{ skydns_server }} --cluster_domain={{ dns_domain }} --kubeconfig={{ kube_config_dir}}/node-kubeconfig.yaml --config={{ kube_manifest_dir }} --resolv-conf={{ kube_resolv_conf }} --pod-infra-container-image={{ pod_infra_image_repo }}:{{ pod_infra_image_tag }}" {% elif dns_setup|bool %} -KUBELET_ARGS="--cluster_dns={{ dns_server }} --cluster_domain={{ dns_domain }} --kubeconfig={{ kube_config_dir}}/node-kubeconfig.yaml --config={{ kube_manifest_dir }} --resolv-conf={{ kube_resolv_conf }}" +KUBELET_ARGS="--cluster_dns={{ dns_server }} --cluster_domain={{ dns_domain }} --kubeconfig={{ kube_config_dir}}/node-kubeconfig.yaml --config={{ kube_manifest_dir }} --resolv-conf={{ kube_resolv_conf }} --pod-infra-container-image={{ pod_infra_image_repo }}:{{ pod_infra_image_tag }}" {% else %} -KUBELET_ARGS="--kubeconfig={{ kube_config_dir}}/kubelet.kubeconfig --config={{ kube_manifest_dir }}" +KUBELET_ARGS="--kubeconfig={{ kube_config_dir}}/kubelet.kubeconfig --config={{ kube_manifest_dir }} --pod-infra-container-image={{ pod_infra_image_repo }}:{{ pod_infra_image_tag }}" {% endif %} {% if kube_network_plugin is defined and kube_network_plugin in ["calico", "weave"] %} KUBELET_NETWORK_PLUGIN="--network-plugin=cni --network-plugin-dir=/etc/cni/net.d" diff --git a/roles/kubernetes/preinstall/defaults/main.yml b/roles/kubernetes/preinstall/defaults/main.yml index 5d1b2cd2e..61cad7467 100644 --- a/roles/kubernetes/preinstall/defaults/main.yml +++ b/roles/kubernetes/preinstall/defaults/main.yml @@ -21,6 +21,8 @@ kube_log_dir: "/var/log/kubernetes" # pods on startup kube_manifest_dir: "{{ kube_config_dir }}/manifests" +epel_rpm_download_url: "https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm" + # change to 0.0.0.0 to enable insecure access from anywhere (not recommended) kube_apiserver_insecure_bind_address: 127.0.0.1 diff --git a/roles/kubernetes/preinstall/tasks/main.yml b/roles/kubernetes/preinstall/tasks/main.yml index 8c2aecec5..49e69a907 100644 --- a/roles/kubernetes/preinstall/tasks/main.yml +++ b/roles/kubernetes/preinstall/tasks/main.yml @@ -91,7 +91,7 @@ changed_when: False - name: Install epel-release on RedHat/CentOS - shell: rpm -qa | grep epel-release || rpm -ivh https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm + shell: rpm -qa | grep epel-release || rpm -ivh {{ epel_rpm_download_url }} when: ansible_distribution in ["CentOS","RedHat"] and ansible_distribution_major_version >= 7 changed_when: False From 91a101c8551dbb90e8bde2add77a33dbd4d68707 Mon Sep 17 00:00:00 2001 From: Smana Date: Tue, 18 Oct 2016 12:52:35 +0200 Subject: [PATCH 02/35] upgrade to k8s v1.4.3 --- README.md | 2 +- roles/download/defaults/main.yml | 2 +- roles/uploads/defaults/main.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index d3ed06b4a..f050bb462 100644 --- a/README.md +++ b/README.md @@ -41,7 +41,7 @@ Supported Linux distributions Versions -------------- -[kubernetes](https://github.com/kubernetes/kubernetes/releases) v1.4.0
+[kubernetes](https://github.com/kubernetes/kubernetes/releases) v1.4.3
[etcd](https://github.com/coreos/etcd/releases) v3.0.1
[flanneld](https://github.com/coreos/flannel/releases) v0.6.2
[calicoctl](https://github.com/projectcalico/calico-docker/releases) v0.22.0
diff --git a/roles/download/defaults/main.yml b/roles/download/defaults/main.yml index 3feef7554..ffba5488a 100644 --- a/roles/download/defaults/main.yml +++ b/roles/download/defaults/main.yml @@ -5,7 +5,7 @@ local_release_dir: /tmp download_run_once: False # Versions -kube_version: v1.4.0 +kube_version: v1.4.3 etcd_version: v3.0.6 #TODO(mattymo): Move calico versions to roles/network_plugins/calico/defaults diff --git a/roles/uploads/defaults/main.yml b/roles/uploads/defaults/main.yml index fb07e4c2e..0774d324c 100644 --- a/roles/uploads/defaults/main.yml +++ b/roles/uploads/defaults/main.yml @@ -2,7 +2,7 @@ local_release_dir: /tmp # Versions -kube_version: v1.4.0 +kube_version: v1.4.3 etcd_version: v3.0.6 calico_version: v0.22.0 From 7d6fc1d6802885b23a4156c1dd1083c75d077e54 Mon Sep 17 00:00:00 2001 From: Bogdan Dobrelya Date: Tue, 18 Oct 2016 13:44:45 +0200 Subject: [PATCH 03/35] Update roadmap for the kubeadm LCM track Signed-off-by: Bogdan Dobrelya --- docs/roadmap.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/roadmap.md b/docs/roadmap.md index 298750493..4396a9b07 100644 --- a/docs/roadmap.md +++ b/docs/roadmap.md @@ -26,13 +26,14 @@ Kargo's roadmap - single test with the Ansible version n-1 per day - Test idempotency on on single OS but for all network plugins/container engines - single test on AWS per day -- test different achitectures : +- test different achitectures : - 3 instances, 3 are members of the etcd cluster, 2 of them acting as master and node, 1 as node - 5 instances, 3 are etcd and nodes, 2 are masters only - 7 instances, 3 etcd only, 2 masters, 2 nodes - test scale up cluster: +1 etcd, +1 master, +1 node ### Lifecycle +- Adopt the kubeadm tool by delegating CM tasks it is capable to accomplish well [#553](https://github.com/kubespray/kargo/issues/553) - Drain worker node when upgrading k8s components in a worker node. [#154](https://github.com/kubespray/kargo/issues/154) - Drain worker node when shutting down/deleting an instance @@ -56,7 +57,7 @@ While waiting for the issue [kubernetes/kubernetes#18174](https://github.com/kub ### Kargo API - Perform all actions through an **API** - Store inventories / configurations of mulltiple clusters -- make sure that state of cluster is completely saved in no more than one config file beyond hosts inventory +- make sure that state of cluster is completely saved in no more than one config file beyond hosts inventory ### Addons (with kpm) Include optionals deployments to init the cluster: @@ -65,7 +66,7 @@ Include optionals deployments to init the cluster: - **Prometheus** ##### Others - + ##### Dashboards: - kubernetes-dashboard - Fabric8 From 6113a3f3500feba47cd19f178a8d537a6bd75d78 Mon Sep 17 00:00:00 2001 From: Smana Date: Tue, 18 Oct 2016 13:51:36 +0200 Subject: [PATCH 04/35] update roadmap, kubeadm adoption --- docs/roadmap.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/roadmap.md b/docs/roadmap.md index 298750493..95419f00d 100644 --- a/docs/roadmap.md +++ b/docs/roadmap.md @@ -1,6 +1,10 @@ Kargo's roadmap ================= +### Kubeadm +- Propose kubeadm as an option in order to setup the kubernetes cluster. +That would probably improve deployment speed and certs management [#553](https://github.com/kubespray/kargo/issues/553) + ### Self deployment (pull-mode) [#320](https://github.com/kubespray/kargo/issues/320) - the playbook would install and configure docker/rkt and the etcd cluster - the following data would be inserted into etcd: certs,tokens,users,inventory,group_vars. From e6902d8eccb0a8790d18ba8edf3c634dc30a36d6 Mon Sep 17 00:00:00 2001 From: Chad Swenson Date: Thu, 20 Oct 2016 14:56:52 -0500 Subject: [PATCH 05/35] Use absolute path for etcdctl Small fix. The shell module won't automatically resolve the path to the etcdctl binary, so i prefixed with {{ bin_dir }}/ --- roles/etcd/tasks/configure.yml | 4 ++-- roles/etcd/tasks/set_cluster_health.yml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/roles/etcd/tasks/configure.yml b/roles/etcd/tasks/configure.yml index 56b01da3f..514a79d73 100644 --- a/roles/etcd/tasks/configure.yml +++ b/roles/etcd/tasks/configure.yml @@ -1,6 +1,6 @@ --- - name: Configure | Check if member is in cluster - shell: "etcdctl --no-sync --peers={{ etcd_access_addresses }} member list | grep -q {{ etcd_access_address }}" + shell: "{{ bin_dir }}/etcdctl --no-sync --peers={{ etcd_access_addresses }} member list | grep -q {{ etcd_access_address }}" register: etcd_member_in_cluster ignore_errors: true changed_when: false @@ -8,7 +8,7 @@ - name: Configure | Add member to the cluster if it is not there when: is_etcd_master and etcd_member_in_cluster.rc != 0 and etcd_cluster_is_healthy.rc == 0 - shell: "etcdctl --peers={{ etcd_access_addresses }} member add {{ etcd_member_name }} {{ etcd_peer_url }}" + shell: "{{ bin_dir }}/etcdctl --peers={{ etcd_access_addresses }} member add {{ etcd_member_name }} {{ etcd_peer_url }}" - name: Configure | Copy etcd.service systemd file template: diff --git a/roles/etcd/tasks/set_cluster_health.yml b/roles/etcd/tasks/set_cluster_health.yml index be0d938dd..1a27e4dcf 100644 --- a/roles/etcd/tasks/set_cluster_health.yml +++ b/roles/etcd/tasks/set_cluster_health.yml @@ -1,6 +1,6 @@ --- - name: Configure | Check if cluster is healthy - shell: "etcdctl --peers={{ etcd_access_addresses }} cluster-health | grep -q 'cluster is healthy'" + shell: "{{ bin_dir }}/etcdctl --peers={{ etcd_access_addresses }} cluster-health | grep -q 'cluster is healthy'" register: etcd_cluster_is_healthy ignore_errors: true changed_when: false From 4b7347f1cdd6730b79fe7113661e43db5a8bbb2a Mon Sep 17 00:00:00 2001 From: Matthew Mosesohn Date: Fri, 21 Oct 2016 12:59:50 +0300 Subject: [PATCH 06/35] fix dnsmasq template cloud_provider lookup --- roles/dnsmasq/templates/01-kube-dns.conf.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/roles/dnsmasq/templates/01-kube-dns.conf.j2 b/roles/dnsmasq/templates/01-kube-dns.conf.j2 index aed68a6fe..4d73eebdb 100644 --- a/roles/dnsmasq/templates/01-kube-dns.conf.j2 +++ b/roles/dnsmasq/templates/01-kube-dns.conf.j2 @@ -13,7 +13,7 @@ server=/{{ dns_domain }}/{{ skydns_server }} {% for srv in upstream_dns_servers %} server={{ srv }} {% endfor %} -{% elif cloud_provider == "gce" %} +{% elif cloud_provider is defined and cloud_provider == "gce" %} server=169.254.169.254 {% else %} server=8.8.8.8 From 65d2a3b0e55604d2caaf5a38f49a0b8d742775b0 Mon Sep 17 00:00:00 2001 From: Matthew Mosesohn Date: Fri, 21 Oct 2016 14:39:58 +0300 Subject: [PATCH 07/35] Use only native cachable hostvars for etcd set_facts --- roles/etcd/templates/etcd-proxy.j2 | 2 +- roles/etcd/templates/etcd.j2 | 2 +- roles/kubernetes/preinstall/tasks/set_facts.yml | 8 ++++++-- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/roles/etcd/templates/etcd-proxy.j2 b/roles/etcd/templates/etcd-proxy.j2 index 90d6f6470..0a1492a37 100644 --- a/roles/etcd/templates/etcd-proxy.j2 +++ b/roles/etcd/templates/etcd-proxy.j2 @@ -2,4 +2,4 @@ ETCD_DATA_DIR=/var/lib/etcd-proxy ETCD_PROXY=on ETCD_LISTEN_CLIENT_URLS={{ etcd_access_endpoint }} ETCD_NAME={{ etcd_proxy_member_name | default("etcd-proxy") }} -ETCD_INITIAL_CLUSTER={% for host in groups['etcd'] %}etcd{{ loop.index|string }}={{ hostvars[host]['etcd_peer_url'] }}{% if not loop.last %},{% endif %}{% endfor %} +ETCD_INITIAL_CLUSTER={{ etcd_peer_addresses }} diff --git a/roles/etcd/templates/etcd.j2 b/roles/etcd/templates/etcd.j2 index 1f7385939..b82116612 100644 --- a/roles/etcd/templates/etcd.j2 +++ b/roles/etcd/templates/etcd.j2 @@ -13,4 +13,4 @@ ETCD_INITIAL_CLUSTER_TOKEN=k8s_etcd ETCD_LISTEN_PEER_URLS=http://{{ etcd_address }}:2380 ETCD_NAME={{ etcd_member_name }} ETCD_PROXY=off -ETCD_INITIAL_CLUSTER={% for host in groups['etcd'] %}etcd{{ loop.index|string }}={{ hostvars[host]['etcd_peer_url'] }}{% if not loop.last %},{% endif %}{% endfor %} +ETCD_INITIAL_CLUSTER={{ etcd_peer_addresses }} diff --git a/roles/kubernetes/preinstall/tasks/set_facts.yml b/roles/kubernetes/preinstall/tasks/set_facts.yml index 19f08df78..37bd6a33c 100644 --- a/roles/kubernetes/preinstall/tasks/set_facts.yml +++ b/roles/kubernetes/preinstall/tasks/set_facts.yml @@ -29,8 +29,7 @@ - set_fact: etcd_endpoint="http://{{ etcd_authority }}" - set_fact: etcd_access_addresses: |- - {% for item in groups['etcd'] -%} - http://{{ hostvars[item].etcd_access_address }}:2379{% if not loop.last %},{% endif %} + http://{{ hostvars[item].access_ip | default(hostvars[item].ip | default(hostvars[item].ansible_default_ipv4['address'])) }}:2379{% if not loop.last %},{% endif %} {%- endfor %} - set_fact: etcd_access_endpoint="{% if etcd_multiaccess %}{{ etcd_access_addresses }}{% else %}{{ etcd_endpoint }}{% endif %}" - set_fact: @@ -38,6 +37,11 @@ {% for host in groups['etcd'] %} {% if inventory_hostname == host %}{{"etcd"+loop.index|string }}{% endif %} {% endfor %} +- set_fact: + etcd_peer_addresses: |- + {% for item in groups['etcd'] -%} + http://{{ hostvars[item].access_ip | default(hostvars[item].ip | default(hostvars[item].ansible_default_ipv4['address'])) }}:2380{% if not loop.last %},{% endif %} + {%- endfor %} - set_fact: etcd_proxy_member_name: |- {% for host in groups['k8s-cluster'] %} From 0e9d1e09e3c9533ad14e4e98e5f48713af66ff38 Mon Sep 17 00:00:00 2001 From: Matthew Mosesohn Date: Fri, 21 Oct 2016 14:43:41 +0300 Subject: [PATCH 08/35] Sync master tokens only with those in play_hosts --- roles/kubernetes/secrets/tasks/check-tokens.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/roles/kubernetes/secrets/tasks/check-tokens.yml b/roles/kubernetes/secrets/tasks/check-tokens.yml index 1ecaa7006..14cfbb124 100644 --- a/roles/kubernetes/secrets/tasks/check-tokens.yml +++ b/roles/kubernetes/secrets/tasks/check-tokens.yml @@ -27,7 +27,7 @@ sync_tokens: true when: >- {%- set tokens = {'sync': False} -%} - {%- for server in groups['kube-master'] + {%- for server in groups['kube-master'] | intersect(play_hosts) if (not hostvars[server].known_tokens.stat.exists) or (hostvars[server].known_tokens.stat.checksum != known_tokens_master.stat.checksum|default('')) -%} {%- set _ = tokens.update({'sync': True}) -%} From 11f1f71b3b9aaa0e6794b66c7da5f27deb94307d Mon Sep 17 00:00:00 2001 From: Matthew Mosesohn Date: Fri, 21 Oct 2016 14:59:27 +0300 Subject: [PATCH 09/35] dynamically calculate etcd peer names --- roles/kubernetes/preinstall/tasks/set_facts.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/roles/kubernetes/preinstall/tasks/set_facts.yml b/roles/kubernetes/preinstall/tasks/set_facts.yml index 37bd6a33c..2dd947dda 100644 --- a/roles/kubernetes/preinstall/tasks/set_facts.yml +++ b/roles/kubernetes/preinstall/tasks/set_facts.yml @@ -29,6 +29,7 @@ - set_fact: etcd_endpoint="http://{{ etcd_authority }}" - set_fact: etcd_access_addresses: |- + {% for item in groups['etcd'] -%} http://{{ hostvars[item].access_ip | default(hostvars[item].ip | default(hostvars[item].ansible_default_ipv4['address'])) }}:2379{% if not loop.last %},{% endif %} {%- endfor %} - set_fact: etcd_access_endpoint="{% if etcd_multiaccess %}{{ etcd_access_addresses }}{% else %}{{ etcd_endpoint }}{% endif %}" @@ -40,7 +41,7 @@ - set_fact: etcd_peer_addresses: |- {% for item in groups['etcd'] -%} - http://{{ hostvars[item].access_ip | default(hostvars[item].ip | default(hostvars[item].ansible_default_ipv4['address'])) }}:2380{% if not loop.last %},{% endif %} + {{ "etcd"+loop.index|string }}=http://{{ hostvars[item].access_ip | default(hostvars[item].ip | default(hostvars[item].ansible_default_ipv4['address'])) }}:2380{% if not loop.last %},{% endif %} {%- endfor %} - set_fact: etcd_proxy_member_name: |- From c59c3a1bcfeee654449f75b5675106b886cb63fd Mon Sep 17 00:00:00 2001 From: Bogdan Dobrelya Date: Mon, 24 Oct 2016 15:11:52 +0200 Subject: [PATCH 10/35] Fix idempotency/recurrence of download and preinstall * Don't push containers if not changed * Do preinstall role only once and redistribute defaults to corresponding roles Signed-off-by: Bogdan Dobrelya --- cluster.yml | 7 ++++--- roles/download/tasks/main.yml | 17 ++++++++++++++--- roles/kubernetes/master/defaults/main.yml | 18 ++++++++++++++++++ roles/kubernetes/node/defaults/main.yml | 18 ++++++++++++++++++ roles/kubernetes/preinstall/defaults/main.yml | 3 --- roles/kubernetes/secrets/defaults/main.yml | 13 +++++++++++++ 6 files changed, 67 insertions(+), 9 deletions(-) diff --git a/cluster.yml b/cluster.yml index 208382354..295bb668a 100644 --- a/cluster.yml +++ b/cluster.yml @@ -10,21 +10,22 @@ - hosts: all gather_facts: true -- hosts: etcd:!k8s-cluster +- hosts: all roles: - { role: kubernetes/preinstall, tags: preinstall } + +- hosts: etcd:!k8s-cluster + roles: - { role: etcd, tags: etcd } - hosts: k8s-cluster roles: - - { role: kubernetes/preinstall, tags: preinstall } - { role: etcd, tags: etcd } - { role: kubernetes/node, tags: node } - { role: network_plugin, tags: network } - hosts: kube-master roles: - - { role: kubernetes/preinstall, tags: preinstall } - { role: kubernetes/master, tags: master } - hosts: k8s-cluster diff --git a/roles/download/tasks/main.yml b/roles/download/tasks/main.yml index b1b307e88..e715f380d 100644 --- a/roles/download/tasks/main.yml +++ b/roles/download/tasks/main.yml @@ -61,11 +61,22 @@ - set_fact: fname: "{{local_release_dir}}/containers/{{download.repo|regex_replace('/|\0|:', '_')}}:{{download.tag|regex_replace('/|\0|:', '_')}}.tar" +- name: "Set default value for 'container_changed' to false" + set_fact: + container_changed: false + +- name: "Update the 'container_changed' fact" + set_fact: + container_changed: "{{ not 'up to date' in pull_task_result.stdout }}" + when: "{{ download.enabled|bool and download.container|bool }}" + delegate_to: "{{ groups['kube-master'][0] if download_run_once|bool else inventory_hostname }}" + run_once: "{{ download_run_once|bool }}" + - name: Download | save container images shell: docker save "{{ download.repo }}:{{ download.tag }}" > "{{ fname }}" delegate_to: "{{groups['kube-master'][0]}}" run_once: true - when: ansible_os_family != "CoreOS" and download_run_once|bool and download.enabled|bool and download.container|bool + when: ansible_os_family != "CoreOS" and download_run_once|bool and download.enabled|bool and download.container|bool and container_changed|bool - name: Download | get container images synchronize: @@ -76,8 +87,8 @@ until: get_task|success retries: 4 delay: "{{ retry_stagger | random + 3 }}" - when: ansible_os_family != "CoreOS" and inventory_hostname != groups['kube-master'][0] and download_run_once|bool and download.enabled|bool and download.container|bool + when: ansible_os_family != "CoreOS" and inventory_hostname != groups['kube-master'][0] and download_run_once|bool and download.enabled|bool and download.container|bool and container_changed|bool - name: Download | load container images shell: docker load < "{{ fname }}" - when: ansible_os_family != "CoreOS" and inventory_hostname != groups['kube-master'][0] and download_run_once|bool and download.enabled|bool and download.container|bool + when: ansible_os_family != "CoreOS" and inventory_hostname != groups['kube-master'][0] and download_run_once|bool and download.enabled|bool and download.container|bool and container_changed|bool diff --git a/roles/kubernetes/master/defaults/main.yml b/roles/kubernetes/master/defaults/main.yml index d0be14d64..ee32ccf57 100644 --- a/roles/kubernetes/master/defaults/main.yml +++ b/roles/kubernetes/master/defaults/main.yml @@ -10,3 +10,21 @@ kube_users_dir: "{{ kube_config_dir }}/users" # An experimental dev/test only dynamic volumes provisioner, # for PetSets. Works for kube>=v1.3 only. kube_hostpath_dynamic_provisioner: "false" + +# This is where you can drop yaml/json files and the kubelet will run those +# pods on startup +kube_manifest_dir: "{{ kube_config_dir }}/manifests" + +# This directory is where all the additional config stuff goes +# the kubernetes normally puts in /srv/kubernets. +# This puts them in a sane location. +# Editting this value will almost surely break something. Don't +# change it. Things like the systemd scripts are hard coded to +# look in here. Don't do it. +kube_config_dir: /etc/kubernetes + +# change to 0.0.0.0 to enable insecure access from anywhere (not recommended) +kube_apiserver_insecure_bind_address: 127.0.0.1 + +# Logging directory (sysvinit systems) +kube_log_dir: "/var/log/kubernetes" diff --git a/roles/kubernetes/node/defaults/main.yml b/roles/kubernetes/node/defaults/main.yml index ed85a3a53..8c4ce38a5 100644 --- a/roles/kubernetes/node/defaults/main.yml +++ b/roles/kubernetes/node/defaults/main.yml @@ -1,6 +1,13 @@ # This is where all the cert scripts and certs will be located kube_cert_dir: "{{ kube_config_dir }}/ssl" +# change to 0.0.0.0 to enable insecure access from anywhere (not recommended) +kube_apiserver_insecure_bind_address: 127.0.0.1 + +# This is where you can drop yaml/json files and the kubelet will run those +# pods on startup +kube_manifest_dir: "{{ kube_config_dir }}/manifests" + dns_domain: "{{ cluster_name }}" # resolv.conf to base dns config @@ -15,5 +22,16 @@ kube_proxy_masquerade_all: true # - extensions/v1beta1/daemonsets=true # - extensions/v1beta1/deployments=true +# Logging directory (sysvinit systems) +kube_log_dir: "/var/log/kubernetes" + +# This directory is where all the additional config stuff goes +# the kubernetes normally puts in /srv/kubernets. +# This puts them in a sane location. +# Editting this value will almost surely break something. Don't +# change it. Things like the systemd scripts are hard coded to +# look in here. Don't do it. +kube_config_dir: /etc/kubernetes + nginx_image_repo: nginx nginx_image_tag: 1.11.4-alpine diff --git a/roles/kubernetes/preinstall/defaults/main.yml b/roles/kubernetes/preinstall/defaults/main.yml index 61cad7467..3eae9757d 100644 --- a/roles/kubernetes/preinstall/defaults/main.yml +++ b/roles/kubernetes/preinstall/defaults/main.yml @@ -23,9 +23,6 @@ kube_manifest_dir: "{{ kube_config_dir }}/manifests" epel_rpm_download_url: "https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm" -# change to 0.0.0.0 to enable insecure access from anywhere (not recommended) -kube_apiserver_insecure_bind_address: 127.0.0.1 - common_required_pkgs: - python-httplib2 - openssl diff --git a/roles/kubernetes/secrets/defaults/main.yml b/roles/kubernetes/secrets/defaults/main.yml index a5b88d7ac..c6011a9bf 100644 --- a/roles/kubernetes/secrets/defaults/main.yml +++ b/roles/kubernetes/secrets/defaults/main.yml @@ -6,3 +6,16 @@ kube_token_dir: "{{ kube_config_dir }}/tokens" # This is where to save basic auth file kube_users_dir: "{{ kube_config_dir }}/users" + +# This directory is where all the additional config stuff goes +# the kubernetes normally puts in /srv/kubernets. +# This puts them in a sane location. +# Editting this value will almost surely break something. Don't +# change it. Things like the systemd scripts are hard coded to +# look in here. Don't do it. +kube_config_dir: /etc/kubernetes + +# This directory is where all the additional scripts go +# that Kubernetes normally puts in /srv/kubernetes. +# This puts them in a sane location +kube_script_dir: "{{ bin_dir }}/kubernetes-scripts" From 93f7a2689637cf74eb04c15c1e695c3374ec1553 Mon Sep 17 00:00:00 2001 From: Bogdan Dobrelya Date: Tue, 25 Oct 2016 12:23:49 +0200 Subject: [PATCH 11/35] Enable smart facts cache for CI jobs Signed-off-by: Bogdan Dobrelya --- .travis.yml | 4 ++-- ansible.cfg | 5 ++++- tests/ansible.cfg | 5 ++++- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index 998a0aa31..13604765e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -110,7 +110,7 @@ before_script: - cp tests/ansible.cfg . # - "echo $HOME/.local/bin/ansible-playbook -i inventory.ini -u $SSH_USER -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root -e '{\"cloud_provider\": true}' $LOG_LEVEL -e kube_network_plugin=${KUBE_NETWORK_PLUGIN} setup-kubernetes/cluster.yml" ## Configure ansible deployment logs to be collected as an artifact. Enable when GCS configured, see https://docs.travis-ci.com/user/deployment/gcs -# - $HOME/.local/bin/ansible-playbook -u $SSH_USER -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root scritps/configure-logs.yaml +# - $HOME/.local/bin/ansible-playbook -u $SSH_USER -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root scripts/configure-logs.yaml script: - > @@ -134,7 +134,7 @@ script: ## Ping the between 2 pod - $HOME/.local/bin/ansible-playbook -i inventory/inventory.ini -u $SSH_USER -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root tests/testcases/030_check-network.yml $LOG_LEVEL ## Collect env info, enable it once GCS configured, see https://docs.travis-ci.com/user/deployment/gcs -# - $HOME/.local/bin/ansible-playbook -i inventory/inventory.ini -u $SSH_USER -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root scritps/collect-info.yaml +# - $HOME/.local/bin/ansible-playbook -i inventory/inventory.ini -u $SSH_USER -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root scripts/collect-info.yaml after_script: - > diff --git a/ansible.cfg b/ansible.cfg index 2be6f4d02..f0e4ef652 100644 --- a/ansible.cfg +++ b/ansible.cfg @@ -1,4 +1,7 @@ [ssh_connection] pipelining=True -[defaults] +[defaults] host_key_checking=False +gathering = smart +fact_caching = jsonfile +fact_caching_connection = /tmp diff --git a/tests/ansible.cfg b/tests/ansible.cfg index 2be6f4d02..f0e4ef652 100644 --- a/tests/ansible.cfg +++ b/tests/ansible.cfg @@ -1,4 +1,7 @@ [ssh_connection] pipelining=True -[defaults] +[defaults] host_key_checking=False +gathering = smart +fact_caching = jsonfile +fact_caching_connection = /tmp From c7b00caeaac992e8dbb623994515dd9affe1b1b3 Mon Sep 17 00:00:00 2001 From: Matthew Mosesohn Date: Fri, 21 Oct 2016 14:21:46 +0300 Subject: [PATCH 12/35] Use tar+register instead of copy/slurp for distributing tokens and certs Related bug: https://github.com/ansible/ansible/issues/15405 Uses tar and register because synchronize module cannot sudo on the remote side correctly and copy is too slow. This patch dramatically cuts down the number of tasks to process for cert synchronization. --- roles/kubernetes/secrets/tasks/gen_certs.yml | 33 +++++++++---------- roles/kubernetes/secrets/tasks/gen_tokens.yml | 17 ++++------ 2 files changed, 22 insertions(+), 28 deletions(-) diff --git a/roles/kubernetes/secrets/tasks/gen_certs.yml b/roles/kubernetes/secrets/tasks/gen_certs.yml index 6057c0676..bec1d9f16 100644 --- a/roles/kubernetes/secrets/tasks/gen_certs.yml +++ b/roles/kubernetes/secrets/tasks/gen_certs.yml @@ -27,31 +27,30 @@ master_certs: ['ca-key.pem', 'admin.pem', 'admin-key.pem', 'apiserver-key.pem', 'apiserver.pem'] node_certs: ['ca.pem', 'node.pem', 'node-key.pem'] -- name: Gen_certs | Get the certs from first master - slurp: - src: "{{ kube_cert_dir }}/{{ item }}" +- name: Gen_certs | Gather master certs + shell: "tar cfz - -C {{ kube_cert_dir }} {{ master_certs|join(' ') }} {{ node_certs|join(' ') }} | base64 --wrap=0" + register: master_cert_data delegate_to: "{{groups['kube-master'][0]}}" - register: slurp_certs - with_items: '{{ master_certs + node_certs }}' - when: sync_certs|default(false) run_once: true - notify: set secret_changed + when: sync_certs|default(false) + +- name: Gen_certs | Gather node certs + shell: "tar cfz - -C {{ kube_cert_dir }} {{ node_certs|join(' ') }} | base64 --wrap=0" + register: node_cert_data + delegate_to: "{{groups['kube-master'][0]}}" + run_once: true + when: sync_certs|default(false) - name: Gen_certs | Copy certs on masters - copy: - content: "{{ item.content|b64decode }}" - dest: "{{ item.source }}" - with_items: '{{slurp_certs.results}}' + shell: "echo '{{master_cert_data.stdout|quote}}' | base64 -d | tar xz -C {{ kube_cert_dir }}" + changed_when: false when: inventory_hostname in groups['kube-master'] and sync_certs|default(false) and inventory_hostname != groups['kube-master'][0] - name: Gen_certs | Copy certs on nodes - copy: - content: "{{ item.content|b64decode }}" - dest: "{{ item.source }}" - with_items: '{{slurp_certs.results}}' - when: item.item in node_certs and - inventory_hostname in groups['kube-node'] and sync_certs|default(false) and + shell: "echo '{{node_cert_data.stdout|quote}}' | base64 -d | tar xz -C {{ kube_cert_dir }}" + changed_when: false + when: inventory_hostname in groups['kube-node'] and sync_certs|default(false) and inventory_hostname != groups['kube-master'][0] - name: Gen_certs | check certificate permissions diff --git a/roles/kubernetes/secrets/tasks/gen_tokens.yml b/roles/kubernetes/secrets/tasks/gen_tokens.yml index 796657f65..dbe35811b 100644 --- a/roles/kubernetes/secrets/tasks/gen_tokens.yml +++ b/roles/kubernetes/secrets/tasks/gen_tokens.yml @@ -43,20 +43,15 @@ delegate_to: "{{groups['kube-master'][0]}}" when: sync_tokens|default(false) -- name: Gen_tokens | Get the tokens from first master - slurp: - src: "{{ item }}" - register: slurp_tokens - with_items: '{{tokens_list.stdout_lines}}' - run_once: true +- name: Gen_tokens | Gather tokens + shell: "tar cfz - {{ tokens_list.stdout_lines | join(' ') }} | base64 --wrap=0" + register: tokens_data delegate_to: "{{groups['kube-master'][0]}}" + run_once: true when: sync_tokens|default(false) - notify: set secret_changed - name: Gen_tokens | Copy tokens on masters - copy: - content: "{{ item.content|b64decode }}" - dest: "{{ item.source }}" - with_items: '{{slurp_tokens.results}}' + shell: "echo '{{ tokens_data.stdout|quote }}' | base64 -d | tar xz -C /" + changed_when: false when: inventory_hostname in groups['kube-master'] and sync_tokens|default(false) and inventory_hostname != groups['kube-master'][0] From 2778ac61a432b016cc19f0671dae82f2e2d08fee Mon Sep 17 00:00:00 2001 From: Matthew Mosesohn Date: Wed, 26 Oct 2016 17:56:15 +0300 Subject: [PATCH 13/35] Add new var skip_dnsmasq_k8s If skip_dnsmasq is set, it will still not set up dnsmasq k8s pod. This enables independent setup of resolvconf section before kubelet is up. --- roles/dnsmasq/defaults/main.yml | 8 +++++++- roles/dnsmasq/tasks/main.yml | 2 +- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/roles/dnsmasq/defaults/main.yml b/roles/dnsmasq/defaults/main.yml index 7a1e77023..89ab02ab8 100644 --- a/roles/dnsmasq/defaults/main.yml +++ b/roles/dnsmasq/defaults/main.yml @@ -16,4 +16,10 @@ dnsmasq_version: 2.72 # Images dnsmasq_image_repo: "andyshinn/dnsmasq" -dnsmasq_image_tag: "{{ dnsmasq_version }}" \ No newline at end of file +dnsmasq_image_tag: "{{ dnsmasq_version }}" + +# Skip dnsmasq setup +skip_dnsmasq: false + +# Skip setting up dnsmasq daemonset +skip_dnsmasq_k8s: "{{ skip_dnsmasq }}" diff --git a/roles/dnsmasq/tasks/main.yml b/roles/dnsmasq/tasks/main.yml index 46c1604f6..6b271a1e2 100644 --- a/roles/dnsmasq/tasks/main.yml +++ b/roles/dnsmasq/tasks/main.yml @@ -1,5 +1,5 @@ --- - include: dnsmasq.yml - when: "{{ not skip_dnsmasq|bool }}" + when: "{{ not skip_dnsmasq_k8s|bool }}" - include: resolvconf.yml From 03e162b342072b8229dc61ce569dcdd9e87848ae Mon Sep 17 00:00:00 2001 From: Matthew Mosesohn Date: Fri, 28 Oct 2016 11:16:11 +0400 Subject: [PATCH 14/35] Update OWNERS --- OWNERS | 3 +++ 1 file changed, 3 insertions(+) diff --git a/OWNERS b/OWNERS index 583a0314b..6ecbee5c9 100644 --- a/OWNERS +++ b/OWNERS @@ -4,3 +4,6 @@ owners: - Smana - ant31 + - bogdando + - mattymo + - rsmitty From 50f77cca1d011e75c5b330cca3114ce54c13f502 Mon Sep 17 00:00:00 2001 From: Bogdan Dobrelya Date: Fri, 28 Oct 2016 14:56:48 +0200 Subject: [PATCH 15/35] Add CI test layouts * Drop Wily from test matrix * Replace the Wily cases dropped with extra cases to test separate roles deployment Signed-off-by: Bogdan Dobrelya --- .travis.yml | 34 ++++++++++++++++++++-------- tests/cloud_playbooks/create-gce.yml | 2 +- tests/cloud_playbooks/delete-gce.yml | 2 +- tests/templates/inventory-gce.j2 | 11 +++++++++ 4 files changed, 37 insertions(+), 12 deletions(-) diff --git a/.travis.yml b/.travis.yml index 13604765e..863374c26 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,80 +11,92 @@ env: CONTAINER_ENGINE=docker PRIVATE_KEY=$GCE_PRIVATE_KEY ANSIBLE_KEEP_REMOTE_FILES=1 + CLUSTER_MODE=default matrix: # Debian Jessie - >- KUBE_NETWORK_PLUGIN=flannel CLOUD_IMAGE=debian-8-kubespray CLOUD_REGION=europe-west1-b + CLUSTER_MODE=default - >- KUBE_NETWORK_PLUGIN=calico CLOUD_IMAGE=debian-8-kubespray CLOUD_REGION=us-central1-c + CLUSTER_MODE=default - >- KUBE_NETWORK_PLUGIN=weave CLOUD_IMAGE=debian-8-kubespray CLOUD_REGION=us-east1-d + CLUSTER_MODE=default # Centos 7 - >- KUBE_NETWORK_PLUGIN=flannel CLOUD_IMAGE=centos-7-sudo CLOUD_REGION=asia-east1-c - + CLUSTER_MODE=default - >- KUBE_NETWORK_PLUGIN=calico CLOUD_IMAGE=centos-7-sudo CLOUD_REGION=europe-west1-b - + CLUSTER_MODE=default - >- KUBE_NETWORK_PLUGIN=weave CLOUD_IMAGE=centos-7-sudo CLOUD_REGION=us-central1-c + CLUSTER_MODE=default # Redhat 7 - >- KUBE_NETWORK_PLUGIN=flannel CLOUD_IMAGE=rhel-7-sudo CLOUD_REGION=us-east1-d - + CLUSTER_MODE=default - >- KUBE_NETWORK_PLUGIN=calico CLOUD_IMAGE=rhel-7-sudo CLOUD_REGION=asia-east1-c - + CLUSTER_MODE=default - >- KUBE_NETWORK_PLUGIN=weave CLOUD_IMAGE=rhel-7-sudo CLOUD_REGION=europe-west1-b + CLUSTER_MODE=default # Ubuntu 16.04 - >- KUBE_NETWORK_PLUGIN=flannel CLOUD_IMAGE=ubuntu-1604-xenial CLOUD_REGION=us-central1-c + CLUSTER_MODE=default - >- KUBE_NETWORK_PLUGIN=calico CLOUD_IMAGE=ubuntu-1604-xenial CLOUD_REGION=us-east1-d + CLUSTER_MODE=default - >- KUBE_NETWORK_PLUGIN=weave CLOUD_IMAGE=ubuntu-1604-xenial CLOUD_REGION=asia-east1-c + CLUSTER_MODE=default - # Ubuntu 15.10 + # Extra cases for separated roles - >- KUBE_NETWORK_PLUGIN=flannel - CLOUD_IMAGE=ubuntu-1510-wily + CLOUD_IMAGE=rhel-7-sudo CLOUD_REGION=europe-west1-b + CLUSTER_MODE=separate - >- KUBE_NETWORK_PLUGIN=calico - CLOUD_IMAGE=ubuntu-1510-wily + CLOUD_IMAGE=ubuntu-1604-xenial CLOUD_REGION=us-central1-a + CLUSTER_MODE=separate - >- KUBE_NETWORK_PLUGIN=weave - CLOUD_IMAGE=ubuntu-1510-wily + CLOUD_IMAGE=debian-8-kubespray CLOUD_REGION=us-east1-d + CLUSTER_MODE=separate before_install: @@ -92,7 +104,8 @@ before_install: - pip install --user boto -U - pip install --user ansible - pip install --user netaddr - - pip install --user apache-libcloud + # W/A https://github.com/ansible/ansible-modules-core/issues/5196#issuecomment-253766186 + - pip install --user apache-libcloud==0.20.1 cache: - directories: @@ -114,7 +127,8 @@ before_script: script: - > - $HOME/.local/bin/ansible-playbook tests/cloud_playbooks/create-gce.yml -i tests/local_inventory/hosts -c local $LOG_LEVEL + $HOME/.local/bin/ansible-playbook tests/cloud_playbooks/create-gce.yml -i tests/local_inventory/hosts.cfg -c local $LOG_LEVEL + -e mode=${CLUSTER_MODE} -e test_id=${TEST_ID} -e kube_network_plugin=${KUBE_NETWORK_PLUGIN} -e gce_project_id=${GCE_PROJECT_ID} diff --git a/tests/cloud_playbooks/create-gce.yml b/tests/cloud_playbooks/create-gce.yml index 840cf2e7c..b2c3e3020 100644 --- a/tests/cloud_playbooks/create-gce.yml +++ b/tests/cloud_playbooks/create-gce.yml @@ -1,6 +1,6 @@ --- - hosts: localhost - sudo: False + become: false gather_facts: no vars: cloud_machine_type: g1-small diff --git a/tests/cloud_playbooks/delete-gce.yml b/tests/cloud_playbooks/delete-gce.yml index d42c6cc91..54902fb6f 100644 --- a/tests/cloud_playbooks/delete-gce.yml +++ b/tests/cloud_playbooks/delete-gce.yml @@ -1,6 +1,6 @@ --- - hosts: localhost - sudo: False + become: false gather_facts: no vars: cloud_machine_type: f1-micro diff --git a/tests/templates/inventory-gce.j2 b/tests/templates/inventory-gce.j2 index 72ad469de..418910771 100644 --- a/tests/templates/inventory-gce.j2 +++ b/tests/templates/inventory-gce.j2 @@ -2,6 +2,16 @@ node1 ansible_ssh_host={{gce.instance_data[0].public_ip}} node2 ansible_ssh_host={{gce.instance_data[1].public_ip}} node3 ansible_ssh_host={{gce.instance_data[2].public_ip}} +{% if mode is defined and mode == "separate" %} +[kube-master] +node1 + +[kube-node] +node2 + +[etcd] +node3 +{% else %} [kube-master] node1 node2 @@ -14,6 +24,7 @@ node3 [etcd] node1 node2 +{% endif %} [k8s-cluster:children] kube-node From 9d7142f476e50b34f0f7425adb8da8faf3430fe9 Mon Sep 17 00:00:00 2001 From: Alexander Kanevskiy Date: Fri, 28 Oct 2016 23:26:25 +0300 Subject: [PATCH 16/35] Vagrantfile: use Ubuntu 16.04 LTS Use recent supported version of Ubuntu for local development setup with Vagrant. --- Vagrantfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Vagrantfile b/Vagrantfile index 44f80db8c..4aa9b9180 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -16,7 +16,7 @@ $vm_cpus = 1 $shared_folders = {} $forwarded_ports = {} $subnet = "172.17.8" -$box = "bento/ubuntu-14.04" +$box = "bento/ubuntu-16.04" host_vars = {} From 9f976e568df027b350fd85b26af045984fedba28 Mon Sep 17 00:00:00 2001 From: Alexander Kanevskiy Date: Sat, 29 Oct 2016 00:32:56 +0300 Subject: [PATCH 17/35] Vagrantfile: setup proxy inside virtual machines In corporate networks, it is good to pre-configure proxy variables. --- Vagrantfile | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/Vagrantfile b/Vagrantfile index 44f80db8c..0fca78bd6 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -38,6 +38,13 @@ if ! File.exist?(File.join(File.dirname($inventory), "hosts")) end end +if Vagrant.has_plugin?("vagrant-proxyconf") + $no_proxy = ENV['NO_PROXY'] || ENV['no_proxy'] || "127.0.0.1,localhost" + (1..$num_instances).each do |i| + $no_proxy += ",#{$subnet}.#{i+100}" + end +end + Vagrant.configure("2") do |config| # always use Vagrants insecure key config.ssh.insert_key = false @@ -52,6 +59,12 @@ Vagrant.configure("2") do |config| config.vm.define vm_name = "%s-%02d" % [$instance_name_prefix, i] do |config| config.vm.hostname = vm_name + if Vagrant.has_plugin?("vagrant-proxyconf") + config.proxy.http = ENV['HTTP_PROXY'] || ENV['http_proxy'] || "" + config.proxy.https = ENV['HTTPS_PROXY'] || ENV['https_proxy'] || "" + config.proxy.no_proxy = $no_proxy + end + if $expose_docker_tcp config.vm.network "forwarded_port", guest: 2375, host: ($expose_docker_tcp + i - 1), auto_correct: true end From 2ca6819cdf9473dd9ddf6ba7efa2e73a37d8d787 Mon Sep 17 00:00:00 2001 From: Jan Jungnickel Date: Mon, 31 Oct 2016 10:15:10 +0100 Subject: [PATCH 18/35] Reload docker.socket after installing flannel on coreos Workaround for #569 --- roles/docker/handlers/main.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/roles/docker/handlers/main.yml b/roles/docker/handlers/main.yml index 6f54f33d5..5a9037688 100644 --- a/roles/docker/handlers/main.yml +++ b/roles/docker/handlers/main.yml @@ -4,6 +4,7 @@ notify: - Docker | reload systemd - Docker | reload docker + - Docker | reload docker.socket - Docker | pause while Docker restarts - Docker | wait for docker @@ -16,6 +17,12 @@ name: docker state: restarted +- name: Docker | reload docker.socket + service: + name: docker.socket + state: restarted + when: ansible_os_family == 'CoreOS' + - name: Docker | pause while Docker restarts pause: seconds=10 prompt="Waiting for docker restart" From f9355ea14d08c0f2b2842a7da2da74a11b2b5139 Mon Sep 17 00:00:00 2001 From: Jan Jungnickel Date: Tue, 1 Nov 2016 13:08:21 +0100 Subject: [PATCH 19/35] Swap order in which we reload docker/socket --- roles/docker/handlers/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/roles/docker/handlers/main.yml b/roles/docker/handlers/main.yml index 5a9037688..04d761796 100644 --- a/roles/docker/handlers/main.yml +++ b/roles/docker/handlers/main.yml @@ -3,8 +3,8 @@ command: /bin/true notify: - Docker | reload systemd - - Docker | reload docker - Docker | reload docker.socket + - Docker | reload docker - Docker | pause while Docker restarts - Docker | wait for docker From 3b2554217b1bf73126bfc2251e70e4679664ff48 Mon Sep 17 00:00:00 2001 From: Bogdan Dobrelya Date: Thu, 3 Nov 2016 13:57:33 +0100 Subject: [PATCH 20/35] Upload logs to GCS after failure Delete configure logs script as not needed Rework collect info script defaults Signed-off-by: Bogdan Dobrelya --- .travis.yml | 14 +++++--- scripts/collect-info.yaml | 23 ++++++------- scripts/configure-logs.yaml | 39 ----------------------- tests/cloud_playbooks/upload-logs-gcs.yml | 28 ++++++++++++++++ 4 files changed, 48 insertions(+), 56 deletions(-) delete mode 100644 scripts/configure-logs.yaml create mode 100644 tests/cloud_playbooks/upload-logs-gcs.yml diff --git a/.travis.yml b/.travis.yml index 863374c26..42c32ae5b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,6 +10,8 @@ env: TEST_ID=$TRAVIS_JOB_NUMBER CONTAINER_ENGINE=docker PRIVATE_KEY=$GCE_PRIVATE_KEY + GS_ACCESS_KEY_ID=$GS_KEY + GS_SECRET_ACCESS_KEY=$GS_SECRET ANSIBLE_KEEP_REMOTE_FILES=1 CLUSTER_MODE=default matrix: @@ -122,8 +124,6 @@ before_script: - $HOME/.local/bin/ansible-playbook --version - cp tests/ansible.cfg . # - "echo $HOME/.local/bin/ansible-playbook -i inventory.ini -u $SSH_USER -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root -e '{\"cloud_provider\": true}' $LOG_LEVEL -e kube_network_plugin=${KUBE_NETWORK_PLUGIN} setup-kubernetes/cluster.yml" - ## Configure ansible deployment logs to be collected as an artifact. Enable when GCS configured, see https://docs.travis-ci.com/user/deployment/gcs -# - $HOME/.local/bin/ansible-playbook -u $SSH_USER -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root scripts/configure-logs.yaml script: - > @@ -147,8 +147,14 @@ script: - $HOME/.local/bin/ansible-playbook -i inventory/inventory.ini -u $SSH_USER -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root tests/testcases/020_check-create-pod.yml $LOG_LEVEL ## Ping the between 2 pod - $HOME/.local/bin/ansible-playbook -i inventory/inventory.ini -u $SSH_USER -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root tests/testcases/030_check-network.yml $LOG_LEVEL - ## Collect env info, enable it once GCS configured, see https://docs.travis-ci.com/user/deployment/gcs -# - $HOME/.local/bin/ansible-playbook -i inventory/inventory.ini -u $SSH_USER -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root scripts/collect-info.yaml + +after_failure: + - $HOME/.local/bin/ansible-playbook -i inventory/inventory.ini -u $SSH_USER -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root scripts/collect-info.yaml >/dev/null + - > + $HOME/.local/bin/ansible-playbook tests/cloud_playbooks/upload-logs-gcs.yml -i "localhost," -c local + -e test_id=${TEST_ID} + -e kube_network_plugin=${KUBE_NETWORK_PLUGIN} + >/dev/null after_script: - > diff --git a/scripts/collect-info.yaml b/scripts/collect-info.yaml index 67d4c8b35..877b5bf36 100644 --- a/scripts/collect-info.yaml +++ b/scripts/collect-info.yaml @@ -6,16 +6,10 @@ vars: debug: false commands: - - name: git_info - cmd: find . -type d -name .git -execdir sh -c 'gen-gitinfos.sh global|head -12' \; - name: timedate_info cmd: timedatectl status - - name: space_info - cmd: df -h - name: kernel_info cmd: uname -r - - name: distro_info - cmd: cat /etc/issue.net - name: docker_info cmd: docker info - name: ip_info @@ -24,23 +18,26 @@ cmd: ip ro - name: proc_info cmd: ps auxf | grep -v ]$ - - name: systemctl_info - cmd: systemctl status - name: systemctl_failed_info cmd: systemctl --state=failed --no-pager - name: k8s_info cmd: kubectl get all --all-namespaces -o wide - name: errors_info cmd: journalctl -p err --utc --no-pager + - name: etcd_info + cmd: etcdctl --debug cluster-health logs: - - /var/log/ansible.log - - /var/log/ansible/ansible.log - /var/log/syslog - /var/log/daemon.log - /var/log/kern.log - - inventory/inventory.ini - - cluster.yml + - /var/log/dpkg.log + - /var/log/apt/history.log + - /var/log/yum.log + - /var/log/calico/bird/current + - /var/log/calico/bird6/current + - /var/log/calico/felix/current + - /var/log/calico/confd/current tasks: - name: Storing commands output @@ -50,7 +47,7 @@ with_items: "{{commands}}" - debug: var=item - with_items: output.results + with_items: "{{output.results}}" when: debug - name: Fetch results diff --git a/scripts/configure-logs.yaml b/scripts/configure-logs.yaml deleted file mode 100644 index d093e9279..000000000 --- a/scripts/configure-logs.yaml +++ /dev/null @@ -1,39 +0,0 @@ ---- -- hosts: localhost - become: true - gather_facts: no - - vars: - log_path: /var/log/ansible/ - conf_file: /etc/ansible/ansible.cfg - human_readable_plugin: false - callback_plugin_path: /usr/share/ansible/plugins/callback - - tasks: - - name: LOGS | ensure log path - file: path="{{log_path}}" state=directory owner={{ansible_ssh_user}} - - - name: LOGS | ensure plugin path - file: path="{{callback_plugin_path}}" state=directory owner={{ansible_ssh_user}} - when: human_readable_plugin - - - name: LOGS | get plugin - git: repo=https://gist.github.com/cd706de198c85a8255f6.git dest=/tmp/cd706de198c85a8255f6 - when: human_readable_plugin - - - name: LOGS | install plugin - copy: src=/tmp/cd706de198c85a8255f6/human_log.py dest="{{callback_plugin_path}}" - when: human_readable_plugin - - - name: LOGS | config - lineinfile: - line: "log_path={{log_path}}/ansible.log" - regexp: "^#log_path|^log_path" - dest: "{{conf_file}}" - - - name: LOGS | callback plugin - lineinfile: - line: "callback_plugins={{callback_plugin_path}}" - regexp: "^#callback_plugins|^callback_plugins" - dest: "{{conf_file}}" - when: human_readable_plugin diff --git a/tests/cloud_playbooks/upload-logs-gcs.yml b/tests/cloud_playbooks/upload-logs-gcs.yml new file mode 100644 index 000000000..7a7a022c9 --- /dev/null +++ b/tests/cloud_playbooks/upload-logs-gcs.yml @@ -0,0 +1,28 @@ +--- +- hosts: localhost + become: false + gather_facts: no + + vars: + expire: 72000 + + tasks: + - name: replace_test_id + set_fact: + test_name: "{{ test_id | regex_replace('\\.', '-') }}" + + - name: Create a bucket + gc_storage: + bucket: "{{ test_name }}" + mode: create + expiration: "{{ expire }}" + permission: private + + - name: Upload collected diagnostic info + gc_storage: + bucket: "{{ test_name }}" + mode: put + permission: private + expiration: "{{ expire }}" + object: "build-{{ test_name }}-{{ kube_network_plugin }}-logs.tar.gz" + src: logs.tar.gz From 8f20d90f8814cb9be0624321101419bd8fa291b6 Mon Sep 17 00:00:00 2001 From: Spencer Smith Date: Fri, 4 Nov 2016 12:54:35 -0400 Subject: [PATCH 21/35] update admission controllers for > 1.4 --- .../master/templates/manifests/kube-apiserver.manifest.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/roles/kubernetes/master/templates/manifests/kube-apiserver.manifest.j2 b/roles/kubernetes/master/templates/manifests/kube-apiserver.manifest.j2 index ddd6f2085..4100e8a34 100644 --- a/roles/kubernetes/master/templates/manifests/kube-apiserver.manifest.j2 +++ b/roles/kubernetes/master/templates/manifests/kube-apiserver.manifest.j2 @@ -16,7 +16,7 @@ spec: - --etcd-quorum-read=true - --insecure-bind-address={{ kube_apiserver_insecure_bind_address }} - --apiserver-count={{ kube_apiserver_count }} - - --admission-control=NamespaceLifecycle,NamespaceExists,LimitRanger,ServiceAccount,ResourceQuota + - --admission-control=NamespaceLifecycle,LimitRanger,ServiceAccount,DefaultStorageClass,ResourceQuota - --service-cluster-ip-range={{ kube_service_addresses }} - --client-ca-file={{ kube_cert_dir }}/ca.pem - --basic-auth-file={{ kube_users_dir }}/known_users.csv From a6bc284abd7e3909f57ff34288e6cc5f5b1c7985 Mon Sep 17 00:00:00 2001 From: Pablo Moreno Date: Sat, 5 Nov 2016 09:12:40 +0000 Subject: [PATCH 22/35] adds ability to have hosts with no floating ips on terraform/openstack --- contrib/terraform/openstack/README.md | 37 ++++++++++++---- .../openstack/ansible_bastion_template.txt | 1 + .../terraform/openstack/group_vars/all.yml | 44 +++++++++++-------- contrib/terraform/openstack/kubespray.tf | 44 +++++++++++++++++++ contrib/terraform/openstack/variables.tf | 8 ++++ 5 files changed, 107 insertions(+), 27 deletions(-) create mode 100644 contrib/terraform/openstack/ansible_bastion_template.txt diff --git a/contrib/terraform/openstack/README.md b/contrib/terraform/openstack/README.md index ec611a499..2840bde9c 100644 --- a/contrib/terraform/openstack/README.md +++ b/contrib/terraform/openstack/README.md @@ -5,14 +5,13 @@ Openstack. ## Status -This will install a Kubernetes cluster on an Openstack Cloud. It is tested on a -OpenStack Cloud provided by [BlueBox](https://www.blueboxcloud.com/) and -should work on most modern installs of OpenStack that support the basic +This will install a Kubernetes cluster on an Openstack Cloud. It has been tested on a +OpenStack Cloud provided by [BlueBox](https://www.blueboxcloud.com/) and on OpenStack at [EMBL-EBI's](http://www.ebi.ac.uk/) [EMBASSY Cloud](http://www.embassycloud.org/). This should work on most modern installs of OpenStack that support the basic services. There are some assumptions made to try and ensure it will work on your openstack cluster. -* floating-ips are used for access +* floating-ips are used for access, but you can have masters and nodes that don't use floating-ips if needed. You need currently at least 1 floating ip, which we would suggest is used on a master. * you already have a suitable OS image in glance * you already have both an internal network and a floating-ip pool created * you have security-groups enabled @@ -24,16 +23,14 @@ There are some assumptions made to try and ensure it will work on your openstack ## Terraform -Terraform will be used to provision all of the OpenStack resources required to -run Docker Swarm. It is also used to deploy and provision the software +Terraform will be used to provision all of the OpenStack resources. It is also used to deploy and provision the software requirements. ### Prep #### OpenStack -Ensure your OpenStack credentials are loaded in environment variables. This is -how I do it: +Ensure your OpenStack credentials are loaded in environment variables. This can be done by downloading a credentials .rc file from your OpenStack dashboard and sourcing it: ``` $ source ~/.stackrc @@ -46,7 +43,7 @@ differences between OpenStack installs the Terraform does not attempt to create these for you. By default Terraform will expect that your networks are called `internal` and -`external`. You can change this by altering the Terraform variables `network_name` and `floatingip_pool`. +`external`. You can change this by altering the Terraform variables `network_name` and `floatingip_pool`. This can be done on a new variables file or through environment variables. A full list of variables you can change can be found at [variables.tf](variables.tf). @@ -76,8 +73,21 @@ $ echo Setting up Terraform creds && \ export TF_VAR_auth_url=${OS_AUTH_URL} ``` +If you want to provision master or node VMs that don't use floating ips, write on a `my-terraform-vars.tfvars` file, for example: + +``` +number_of_k8s_masters = "1" +number_of_k8s_masters_no_floating_ip = "2" +number_of_k8s_nodes_no_floating_ip = "1" +number_of_k8s_nodes = "0" +``` +This will provision one VM as master using a floating ip, two additional masters using no floating ips (these will only have private ips inside your tenancy) and one VM as node, again without a floating ip. + + + # Provision a Kubernetes Cluster on OpenStack +If not using a tfvars file for your setup, then execute: ``` terraform apply -state=contrib/terraform/openstack/terraform.tfstate contrib/terraform/openstack openstack_compute_secgroup_v2.k8s_master: Creating... @@ -96,6 +106,13 @@ use the `terraform show` command. State path: contrib/terraform/openstack/terraform.tfstate ``` +Alternatively, if you wrote your terraform variables on a file `my-terraform-vars.tfvars`, your command would look like: +``` +terraform apply -state=contrib/terraform/openstack/terraform.tfstate -var-file=my-terraform-vars.tfvars contrib/terraform/openstack +``` + +if you choose to add masters or nodes without floating ips (only internal ips on your OpenStack tenancy), this script will create as well a file `contrib/terraform/openstack/k8s-cluster.yml` with an ssh command for ansible to be able to access your machines tunneling through the first floating ip used. If you want to manually handling the ssh tunneling to these machines, please delete or move that file. If you want to use this, just leave it there, as ansible will pick it up automatically. + Make sure you can connect to the hosts: ``` @@ -114,6 +131,8 @@ example-k8s-master-1 | SUCCESS => { } ``` +if you are deploying a system that needs bootstrapping, like CoreOS, these might have a state `FAILED` due to CoreOS not having python. As long as the state is not `UNREACHABLE`, this is fine. + if it fails try to connect manually via SSH ... it could be somthing as simple as a stale host key. Deploy kubernetes: diff --git a/contrib/terraform/openstack/ansible_bastion_template.txt b/contrib/terraform/openstack/ansible_bastion_template.txt new file mode 100644 index 000000000..cdf012066 --- /dev/null +++ b/contrib/terraform/openstack/ansible_bastion_template.txt @@ -0,0 +1 @@ +ansible_ssh_common_args: '-o ProxyCommand="ssh -o StrictHostKeyChecking=no -W %h:%p -q USER@BASTION_ADDRESS"' diff --git a/contrib/terraform/openstack/group_vars/all.yml b/contrib/terraform/openstack/group_vars/all.yml index b73fb66b2..8b0cd2bcd 100644 --- a/contrib/terraform/openstack/group_vars/all.yml +++ b/contrib/terraform/openstack/group_vars/all.yml @@ -1,9 +1,14 @@ +# Valid bootstrap options (required): xenial, coreos, none +bootstrap_os: "none" + # Directory where the binaries will be installed bin_dir: /usr/local/bin # Where the binaries will be downloaded. # Note: ensure that you've enough disk space (about 1G) local_release_dir: "/tmp/releases" +# Random shifts for retrying failed ops like pushing/downloading +retry_stagger: 5 # Uncomment this line for CoreOS only. # Directory where python binary is installed @@ -28,6 +33,8 @@ kube_users: # Kubernetes cluster name, also will be used as DNS domain cluster_name: cluster.local +# Subdomains of DNS domain to be resolved via /etc/resolv.conf +ndots: 5 # For some environments, each node has a pubilcally accessible # address and an address it should bind services to. These are @@ -51,6 +58,16 @@ cluster_name: cluster.local # but don't know about that address themselves. # access_ip: 1.1.1.1 +# Etcd access modes: +# Enable multiaccess to configure clients to access all of the etcd members directly +# as the "http://hostX:port, http://hostY:port, ..." and ignore the proxy loadbalancers. +# This may be the case if clients support and loadbalance multiple etcd servers natively. +etcd_multiaccess: false + +# Assume there are no internal loadbalancers for apiservers exist and listen on +# kube_apiserver_port (default 443) +loadbalancer_apiserver_localhost: true + # Choose network plugin (calico, weave or flannel) kube_network_plugin: flannel @@ -89,10 +106,12 @@ kube_apiserver_insecure_port: 8080 # (http) # You still must manually configure all your containers to use this DNS server, # Kubernetes won't do this for you (yet). +# Do not install additional dnsmasq +skip_dnsmasq: false # Upstream dns servers used by dnsmasq -upstream_dns_servers: - - 8.8.8.8 - - 8.8.4.4 +#upstream_dns_servers: +# - 8.8.8.8 +# - 8.8.4.4 # # # Use dns server : https://github.com/ansibl8s/k8s-skydns/blob/master/skydns-README.md dns_setup: true @@ -109,21 +128,6 @@ dns_server: "{{ kube_service_addresses|ipaddr('net')|ipaddr(2)|ipaddr('address') # like you would do when using nova-client before starting the playbook. # cloud_provider: -# For multi masters architecture: -# kube-proxy doesn't support multiple apiservers for the time being so you'll need to configure your own loadbalancer -# This domain name will be inserted into the /etc/hosts file of all servers -# configuration example with haproxy : -# listen kubernetes-apiserver-https -# bind 10.99.0.21:8383 -# option ssl-hello-chk -# mode tcp -# timeout client 3h -# timeout server 3h -# server master1 10.99.0.26:443 -# server master2 10.99.0.27:443 -# balance roundrobin -# apiserver_loadbalancer_domain_name: "lb-apiserver.kubernetes.local" - ## Set these proxy values in order to update docker daemon to use proxies # http_proxy: "" # https_proxy: "" @@ -134,3 +138,7 @@ dns_server: "{{ kube_service_addresses|ipaddr('net')|ipaddr(2)|ipaddr('address') ## An obvious use case is allowing insecure-registry access ## to self hosted registries like so: docker_options: "--insecure-registry={{ kube_service_addresses }}" + +# default packages to install within the cluster +kpm_packages: [] +# - name: kube-system/grafana diff --git a/contrib/terraform/openstack/kubespray.tf b/contrib/terraform/openstack/kubespray.tf index 27217d08b..ba526b3e0 100644 --- a/contrib/terraform/openstack/kubespray.tf +++ b/contrib/terraform/openstack/kubespray.tf @@ -70,6 +70,28 @@ resource "openstack_compute_instance_v2" "k8s_master" { ssh_user = "${var.ssh_user}" kubespray_groups = "etcd,kube-master,kube-node,k8s-cluster" } + +} + + +resource "openstack_compute_instance_v2" "k8s_master_no_floating_ip" { + name = "${var.cluster_name}-k8s-master-nf-${count.index+1}" + count = "${var.number_of_k8s_masters_no_floating_ip}" + image_name = "${var.image}" + flavor_id = "${var.flavor_k8s_master}" + key_pair = "${openstack_compute_keypair_v2.k8s.name}" + network { + name = "${var.network_name}" + } + security_groups = [ "${openstack_compute_secgroup_v2.k8s_master.name}", + "${openstack_compute_secgroup_v2.k8s.name}" ] + metadata = { + ssh_user = "${var.ssh_user}" + kubespray_groups = "etcd,kube-master,kube-node,k8s-cluster" + } + provisioner "local-exec" { + command = "sed s/USER/${var.ssh_user}/ contrib/terraform/openstack/ansible_bastion_template.txt | sed s/BASTION_ADDRESS/${element(openstack_networking_floatingip_v2.k8s_master.*.address, 0)}/ > contrib/terraform/openstack/group_vars/k8s-cluster.yml" + } } resource "openstack_compute_instance_v2" "k8s_node" { @@ -89,6 +111,28 @@ resource "openstack_compute_instance_v2" "k8s_node" { } } +resource "openstack_compute_instance_v2" "k8s_node_no_floating_ip" { + name = "${var.cluster_name}-k8s-node-nf-${count.index+1}" + count = "${var.number_of_k8s_nodes_no_floating_ip}" + image_name = "${var.image}" + flavor_id = "${var.flavor_k8s_node}" + key_pair = "${openstack_compute_keypair_v2.k8s.name}" + network { + name = "${var.network_name}" + } + security_groups = ["${openstack_compute_secgroup_v2.k8s.name}" ] + metadata = { + ssh_user = "${var.ssh_user}" + kubespray_groups = "kube-node,k8s-cluster" + } + provisioner "local-exec" { + command = "sed s/USER/${var.ssh_user}/ contrib/terraform/openstack/ansible_bastion_template.txt | sed s/BASTION_ADDRESS/${element(openstack_networking_floatingip_v2.k8s_master.*.address, 0)}/ > contrib/terraform/openstack/group_vars/k8s-cluster.yml" + } +} + + + + #output "msg" { # value = "Your hosts are ready to go!\nYour ssh hosts are: ${join(", ", openstack_networking_floatingip_v2.k8s_master.*.address )}" #} diff --git a/contrib/terraform/openstack/variables.tf b/contrib/terraform/openstack/variables.tf index 6c1fc767d..8be38aed5 100644 --- a/contrib/terraform/openstack/variables.tf +++ b/contrib/terraform/openstack/variables.tf @@ -6,10 +6,18 @@ variable "number_of_k8s_masters" { default = 2 } +variable "number_of_k8s_masters_no_floating_ip" { + default = 2 +} + variable "number_of_k8s_nodes" { default = 1 } +variable "number_of_k8s_nodes_no_floating_ip" { + default = 1 +} + variable "public_key_path" { description = "The path of the ssh pub key" default = "~/.ssh/id_rsa.pub" From 39b8336f3fff9b43d1bd35c52085cd8196f342cd Mon Sep 17 00:00:00 2001 From: Bogdan Dobrelya Date: Fri, 4 Nov 2016 17:53:34 +0100 Subject: [PATCH 23/35] Fix upload logs Signed-off-by: Bogdan Dobrelya --- .travis.yml | 3 ++- tests/cloud_playbooks/upload-logs-gcs.yml | 15 +++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 42c32ae5b..ac922fd80 100644 --- a/.travis.yml +++ b/.travis.yml @@ -154,7 +154,8 @@ after_failure: $HOME/.local/bin/ansible-playbook tests/cloud_playbooks/upload-logs-gcs.yml -i "localhost," -c local -e test_id=${TEST_ID} -e kube_network_plugin=${KUBE_NETWORK_PLUGIN} - >/dev/null + -e gs_key=${GS_ACCESS_KEY_ID} + -e gs_skey=${GS_SECRET_ACCESS_KEY} after_script: - > diff --git a/tests/cloud_playbooks/upload-logs-gcs.yml b/tests/cloud_playbooks/upload-logs-gcs.yml index 7a7a022c9..ba31ab56f 100644 --- a/tests/cloud_playbooks/upload-logs-gcs.yml +++ b/tests/cloud_playbooks/upload-logs-gcs.yml @@ -17,6 +17,8 @@ mode: create expiration: "{{ expire }}" permission: private + gs_access_key: gs_key + gs_secret_key: gs_skey - name: Upload collected diagnostic info gc_storage: @@ -26,3 +28,16 @@ expiration: "{{ expire }}" object: "build-{{ test_name }}-{{ kube_network_plugin }}-logs.tar.gz" src: logs.tar.gz + gs_access_key: gs_key + gs_secret_key: gs_skey + + - name: Get a link + gc_storage: + bucket: "{{ test_name }}" + object: "build-{{ test_name }}-{{ kube_network_plugin }}-logs.tar.gz" + mode: get_url + register: url + gs_access_key: gs_key + gs_secret_key: gs_skey + + - debug: msg="Download URL {{get_url}}" From f106bf5bc47742a34318dadb8f5681be5ff7a63f Mon Sep 17 00:00:00 2001 From: Matthew Mosesohn Date: Wed, 26 Oct 2016 17:56:15 +0300 Subject: [PATCH 24/35] adds ability to have hosts with no floating ips on terraform/openstack (+8 squashed commits) Squashed commits: [f9355ea] Swap order in which we reload docker/socket [2ca6819] Reload docker.socket after installing flannel on coreos Workaround for #569 [9f976e5] Vagrantfile: setup proxy inside virtual machines In corporate networks, it is good to pre-configure proxy variables. [9d7142f] Vagrantfile: use Ubuntu 16.04 LTS Use recent supported version of Ubuntu for local development setup with Vagrant. [50f77cc] Add CI test layouts * Drop Wily from test matrix * Replace the Wily cases dropped with extra cases to test separate roles deployment Signed-off-by: Bogdan Dobrelya [03e162b] Update OWNERS [c7b00ca] Use tar+register instead of copy/slurp for distributing tokens and certs Related bug: https://github.com/ansible/ansible/issues/15405 Uses tar and register because synchronize module cannot sudo on the remote side correctly and copy is too slow. This patch dramatically cuts down the number of tasks to process for cert synchronization. [2778ac6] Add new var skip_dnsmasq_k8s If skip_dnsmasq is set, it will still not set up dnsmasq k8s pod. This enables independent setup of resolvconf section before kubelet is up. --- .travis.yml | 34 +++++++++----- OWNERS | 3 ++ Vagrantfile | 15 ++++++- contrib/terraform/openstack/README.md | 37 ++++++++++++---- .../openstack/ansible_bastion_template.txt | 1 + .../terraform/openstack/group_vars/all.yml | 44 +++++++++++-------- contrib/terraform/openstack/kubespray.tf | 44 +++++++++++++++++++ contrib/terraform/openstack/variables.tf | 8 ++++ roles/dnsmasq/defaults/main.yml | 8 +++- roles/dnsmasq/tasks/main.yml | 2 +- roles/docker/handlers/main.yml | 7 +++ roles/kubernetes/secrets/tasks/gen_certs.yml | 33 +++++++------- roles/kubernetes/secrets/tasks/gen_tokens.yml | 17 +++---- tests/cloud_playbooks/create-gce.yml | 2 +- tests/cloud_playbooks/delete-gce.yml | 2 +- tests/templates/inventory-gce.j2 | 11 +++++ 16 files changed, 198 insertions(+), 70 deletions(-) create mode 100644 contrib/terraform/openstack/ansible_bastion_template.txt diff --git a/.travis.yml b/.travis.yml index 13604765e..863374c26 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,80 +11,92 @@ env: CONTAINER_ENGINE=docker PRIVATE_KEY=$GCE_PRIVATE_KEY ANSIBLE_KEEP_REMOTE_FILES=1 + CLUSTER_MODE=default matrix: # Debian Jessie - >- KUBE_NETWORK_PLUGIN=flannel CLOUD_IMAGE=debian-8-kubespray CLOUD_REGION=europe-west1-b + CLUSTER_MODE=default - >- KUBE_NETWORK_PLUGIN=calico CLOUD_IMAGE=debian-8-kubespray CLOUD_REGION=us-central1-c + CLUSTER_MODE=default - >- KUBE_NETWORK_PLUGIN=weave CLOUD_IMAGE=debian-8-kubespray CLOUD_REGION=us-east1-d + CLUSTER_MODE=default # Centos 7 - >- KUBE_NETWORK_PLUGIN=flannel CLOUD_IMAGE=centos-7-sudo CLOUD_REGION=asia-east1-c - + CLUSTER_MODE=default - >- KUBE_NETWORK_PLUGIN=calico CLOUD_IMAGE=centos-7-sudo CLOUD_REGION=europe-west1-b - + CLUSTER_MODE=default - >- KUBE_NETWORK_PLUGIN=weave CLOUD_IMAGE=centos-7-sudo CLOUD_REGION=us-central1-c + CLUSTER_MODE=default # Redhat 7 - >- KUBE_NETWORK_PLUGIN=flannel CLOUD_IMAGE=rhel-7-sudo CLOUD_REGION=us-east1-d - + CLUSTER_MODE=default - >- KUBE_NETWORK_PLUGIN=calico CLOUD_IMAGE=rhel-7-sudo CLOUD_REGION=asia-east1-c - + CLUSTER_MODE=default - >- KUBE_NETWORK_PLUGIN=weave CLOUD_IMAGE=rhel-7-sudo CLOUD_REGION=europe-west1-b + CLUSTER_MODE=default # Ubuntu 16.04 - >- KUBE_NETWORK_PLUGIN=flannel CLOUD_IMAGE=ubuntu-1604-xenial CLOUD_REGION=us-central1-c + CLUSTER_MODE=default - >- KUBE_NETWORK_PLUGIN=calico CLOUD_IMAGE=ubuntu-1604-xenial CLOUD_REGION=us-east1-d + CLUSTER_MODE=default - >- KUBE_NETWORK_PLUGIN=weave CLOUD_IMAGE=ubuntu-1604-xenial CLOUD_REGION=asia-east1-c + CLUSTER_MODE=default - # Ubuntu 15.10 + # Extra cases for separated roles - >- KUBE_NETWORK_PLUGIN=flannel - CLOUD_IMAGE=ubuntu-1510-wily + CLOUD_IMAGE=rhel-7-sudo CLOUD_REGION=europe-west1-b + CLUSTER_MODE=separate - >- KUBE_NETWORK_PLUGIN=calico - CLOUD_IMAGE=ubuntu-1510-wily + CLOUD_IMAGE=ubuntu-1604-xenial CLOUD_REGION=us-central1-a + CLUSTER_MODE=separate - >- KUBE_NETWORK_PLUGIN=weave - CLOUD_IMAGE=ubuntu-1510-wily + CLOUD_IMAGE=debian-8-kubespray CLOUD_REGION=us-east1-d + CLUSTER_MODE=separate before_install: @@ -92,7 +104,8 @@ before_install: - pip install --user boto -U - pip install --user ansible - pip install --user netaddr - - pip install --user apache-libcloud + # W/A https://github.com/ansible/ansible-modules-core/issues/5196#issuecomment-253766186 + - pip install --user apache-libcloud==0.20.1 cache: - directories: @@ -114,7 +127,8 @@ before_script: script: - > - $HOME/.local/bin/ansible-playbook tests/cloud_playbooks/create-gce.yml -i tests/local_inventory/hosts -c local $LOG_LEVEL + $HOME/.local/bin/ansible-playbook tests/cloud_playbooks/create-gce.yml -i tests/local_inventory/hosts.cfg -c local $LOG_LEVEL + -e mode=${CLUSTER_MODE} -e test_id=${TEST_ID} -e kube_network_plugin=${KUBE_NETWORK_PLUGIN} -e gce_project_id=${GCE_PROJECT_ID} diff --git a/OWNERS b/OWNERS index 583a0314b..6ecbee5c9 100644 --- a/OWNERS +++ b/OWNERS @@ -4,3 +4,6 @@ owners: - Smana - ant31 + - bogdando + - mattymo + - rsmitty diff --git a/Vagrantfile b/Vagrantfile index 44f80db8c..73f812bdf 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -16,7 +16,7 @@ $vm_cpus = 1 $shared_folders = {} $forwarded_ports = {} $subnet = "172.17.8" -$box = "bento/ubuntu-14.04" +$box = "bento/ubuntu-16.04" host_vars = {} @@ -38,6 +38,13 @@ if ! File.exist?(File.join(File.dirname($inventory), "hosts")) end end +if Vagrant.has_plugin?("vagrant-proxyconf") + $no_proxy = ENV['NO_PROXY'] || ENV['no_proxy'] || "127.0.0.1,localhost" + (1..$num_instances).each do |i| + $no_proxy += ",#{$subnet}.#{i+100}" + end +end + Vagrant.configure("2") do |config| # always use Vagrants insecure key config.ssh.insert_key = false @@ -52,6 +59,12 @@ Vagrant.configure("2") do |config| config.vm.define vm_name = "%s-%02d" % [$instance_name_prefix, i] do |config| config.vm.hostname = vm_name + if Vagrant.has_plugin?("vagrant-proxyconf") + config.proxy.http = ENV['HTTP_PROXY'] || ENV['http_proxy'] || "" + config.proxy.https = ENV['HTTPS_PROXY'] || ENV['https_proxy'] || "" + config.proxy.no_proxy = $no_proxy + end + if $expose_docker_tcp config.vm.network "forwarded_port", guest: 2375, host: ($expose_docker_tcp + i - 1), auto_correct: true end diff --git a/contrib/terraform/openstack/README.md b/contrib/terraform/openstack/README.md index ec611a499..2840bde9c 100644 --- a/contrib/terraform/openstack/README.md +++ b/contrib/terraform/openstack/README.md @@ -5,14 +5,13 @@ Openstack. ## Status -This will install a Kubernetes cluster on an Openstack Cloud. It is tested on a -OpenStack Cloud provided by [BlueBox](https://www.blueboxcloud.com/) and -should work on most modern installs of OpenStack that support the basic +This will install a Kubernetes cluster on an Openstack Cloud. It has been tested on a +OpenStack Cloud provided by [BlueBox](https://www.blueboxcloud.com/) and on OpenStack at [EMBL-EBI's](http://www.ebi.ac.uk/) [EMBASSY Cloud](http://www.embassycloud.org/). This should work on most modern installs of OpenStack that support the basic services. There are some assumptions made to try and ensure it will work on your openstack cluster. -* floating-ips are used for access +* floating-ips are used for access, but you can have masters and nodes that don't use floating-ips if needed. You need currently at least 1 floating ip, which we would suggest is used on a master. * you already have a suitable OS image in glance * you already have both an internal network and a floating-ip pool created * you have security-groups enabled @@ -24,16 +23,14 @@ There are some assumptions made to try and ensure it will work on your openstack ## Terraform -Terraform will be used to provision all of the OpenStack resources required to -run Docker Swarm. It is also used to deploy and provision the software +Terraform will be used to provision all of the OpenStack resources. It is also used to deploy and provision the software requirements. ### Prep #### OpenStack -Ensure your OpenStack credentials are loaded in environment variables. This is -how I do it: +Ensure your OpenStack credentials are loaded in environment variables. This can be done by downloading a credentials .rc file from your OpenStack dashboard and sourcing it: ``` $ source ~/.stackrc @@ -46,7 +43,7 @@ differences between OpenStack installs the Terraform does not attempt to create these for you. By default Terraform will expect that your networks are called `internal` and -`external`. You can change this by altering the Terraform variables `network_name` and `floatingip_pool`. +`external`. You can change this by altering the Terraform variables `network_name` and `floatingip_pool`. This can be done on a new variables file or through environment variables. A full list of variables you can change can be found at [variables.tf](variables.tf). @@ -76,8 +73,21 @@ $ echo Setting up Terraform creds && \ export TF_VAR_auth_url=${OS_AUTH_URL} ``` +If you want to provision master or node VMs that don't use floating ips, write on a `my-terraform-vars.tfvars` file, for example: + +``` +number_of_k8s_masters = "1" +number_of_k8s_masters_no_floating_ip = "2" +number_of_k8s_nodes_no_floating_ip = "1" +number_of_k8s_nodes = "0" +``` +This will provision one VM as master using a floating ip, two additional masters using no floating ips (these will only have private ips inside your tenancy) and one VM as node, again without a floating ip. + + + # Provision a Kubernetes Cluster on OpenStack +If not using a tfvars file for your setup, then execute: ``` terraform apply -state=contrib/terraform/openstack/terraform.tfstate contrib/terraform/openstack openstack_compute_secgroup_v2.k8s_master: Creating... @@ -96,6 +106,13 @@ use the `terraform show` command. State path: contrib/terraform/openstack/terraform.tfstate ``` +Alternatively, if you wrote your terraform variables on a file `my-terraform-vars.tfvars`, your command would look like: +``` +terraform apply -state=contrib/terraform/openstack/terraform.tfstate -var-file=my-terraform-vars.tfvars contrib/terraform/openstack +``` + +if you choose to add masters or nodes without floating ips (only internal ips on your OpenStack tenancy), this script will create as well a file `contrib/terraform/openstack/k8s-cluster.yml` with an ssh command for ansible to be able to access your machines tunneling through the first floating ip used. If you want to manually handling the ssh tunneling to these machines, please delete or move that file. If you want to use this, just leave it there, as ansible will pick it up automatically. + Make sure you can connect to the hosts: ``` @@ -114,6 +131,8 @@ example-k8s-master-1 | SUCCESS => { } ``` +if you are deploying a system that needs bootstrapping, like CoreOS, these might have a state `FAILED` due to CoreOS not having python. As long as the state is not `UNREACHABLE`, this is fine. + if it fails try to connect manually via SSH ... it could be somthing as simple as a stale host key. Deploy kubernetes: diff --git a/contrib/terraform/openstack/ansible_bastion_template.txt b/contrib/terraform/openstack/ansible_bastion_template.txt new file mode 100644 index 000000000..cdf012066 --- /dev/null +++ b/contrib/terraform/openstack/ansible_bastion_template.txt @@ -0,0 +1 @@ +ansible_ssh_common_args: '-o ProxyCommand="ssh -o StrictHostKeyChecking=no -W %h:%p -q USER@BASTION_ADDRESS"' diff --git a/contrib/terraform/openstack/group_vars/all.yml b/contrib/terraform/openstack/group_vars/all.yml index b73fb66b2..8b0cd2bcd 100644 --- a/contrib/terraform/openstack/group_vars/all.yml +++ b/contrib/terraform/openstack/group_vars/all.yml @@ -1,9 +1,14 @@ +# Valid bootstrap options (required): xenial, coreos, none +bootstrap_os: "none" + # Directory where the binaries will be installed bin_dir: /usr/local/bin # Where the binaries will be downloaded. # Note: ensure that you've enough disk space (about 1G) local_release_dir: "/tmp/releases" +# Random shifts for retrying failed ops like pushing/downloading +retry_stagger: 5 # Uncomment this line for CoreOS only. # Directory where python binary is installed @@ -28,6 +33,8 @@ kube_users: # Kubernetes cluster name, also will be used as DNS domain cluster_name: cluster.local +# Subdomains of DNS domain to be resolved via /etc/resolv.conf +ndots: 5 # For some environments, each node has a pubilcally accessible # address and an address it should bind services to. These are @@ -51,6 +58,16 @@ cluster_name: cluster.local # but don't know about that address themselves. # access_ip: 1.1.1.1 +# Etcd access modes: +# Enable multiaccess to configure clients to access all of the etcd members directly +# as the "http://hostX:port, http://hostY:port, ..." and ignore the proxy loadbalancers. +# This may be the case if clients support and loadbalance multiple etcd servers natively. +etcd_multiaccess: false + +# Assume there are no internal loadbalancers for apiservers exist and listen on +# kube_apiserver_port (default 443) +loadbalancer_apiserver_localhost: true + # Choose network plugin (calico, weave or flannel) kube_network_plugin: flannel @@ -89,10 +106,12 @@ kube_apiserver_insecure_port: 8080 # (http) # You still must manually configure all your containers to use this DNS server, # Kubernetes won't do this for you (yet). +# Do not install additional dnsmasq +skip_dnsmasq: false # Upstream dns servers used by dnsmasq -upstream_dns_servers: - - 8.8.8.8 - - 8.8.4.4 +#upstream_dns_servers: +# - 8.8.8.8 +# - 8.8.4.4 # # # Use dns server : https://github.com/ansibl8s/k8s-skydns/blob/master/skydns-README.md dns_setup: true @@ -109,21 +128,6 @@ dns_server: "{{ kube_service_addresses|ipaddr('net')|ipaddr(2)|ipaddr('address') # like you would do when using nova-client before starting the playbook. # cloud_provider: -# For multi masters architecture: -# kube-proxy doesn't support multiple apiservers for the time being so you'll need to configure your own loadbalancer -# This domain name will be inserted into the /etc/hosts file of all servers -# configuration example with haproxy : -# listen kubernetes-apiserver-https -# bind 10.99.0.21:8383 -# option ssl-hello-chk -# mode tcp -# timeout client 3h -# timeout server 3h -# server master1 10.99.0.26:443 -# server master2 10.99.0.27:443 -# balance roundrobin -# apiserver_loadbalancer_domain_name: "lb-apiserver.kubernetes.local" - ## Set these proxy values in order to update docker daemon to use proxies # http_proxy: "" # https_proxy: "" @@ -134,3 +138,7 @@ dns_server: "{{ kube_service_addresses|ipaddr('net')|ipaddr(2)|ipaddr('address') ## An obvious use case is allowing insecure-registry access ## to self hosted registries like so: docker_options: "--insecure-registry={{ kube_service_addresses }}" + +# default packages to install within the cluster +kpm_packages: [] +# - name: kube-system/grafana diff --git a/contrib/terraform/openstack/kubespray.tf b/contrib/terraform/openstack/kubespray.tf index 27217d08b..ba526b3e0 100644 --- a/contrib/terraform/openstack/kubespray.tf +++ b/contrib/terraform/openstack/kubespray.tf @@ -70,6 +70,28 @@ resource "openstack_compute_instance_v2" "k8s_master" { ssh_user = "${var.ssh_user}" kubespray_groups = "etcd,kube-master,kube-node,k8s-cluster" } + +} + + +resource "openstack_compute_instance_v2" "k8s_master_no_floating_ip" { + name = "${var.cluster_name}-k8s-master-nf-${count.index+1}" + count = "${var.number_of_k8s_masters_no_floating_ip}" + image_name = "${var.image}" + flavor_id = "${var.flavor_k8s_master}" + key_pair = "${openstack_compute_keypair_v2.k8s.name}" + network { + name = "${var.network_name}" + } + security_groups = [ "${openstack_compute_secgroup_v2.k8s_master.name}", + "${openstack_compute_secgroup_v2.k8s.name}" ] + metadata = { + ssh_user = "${var.ssh_user}" + kubespray_groups = "etcd,kube-master,kube-node,k8s-cluster" + } + provisioner "local-exec" { + command = "sed s/USER/${var.ssh_user}/ contrib/terraform/openstack/ansible_bastion_template.txt | sed s/BASTION_ADDRESS/${element(openstack_networking_floatingip_v2.k8s_master.*.address, 0)}/ > contrib/terraform/openstack/group_vars/k8s-cluster.yml" + } } resource "openstack_compute_instance_v2" "k8s_node" { @@ -89,6 +111,28 @@ resource "openstack_compute_instance_v2" "k8s_node" { } } +resource "openstack_compute_instance_v2" "k8s_node_no_floating_ip" { + name = "${var.cluster_name}-k8s-node-nf-${count.index+1}" + count = "${var.number_of_k8s_nodes_no_floating_ip}" + image_name = "${var.image}" + flavor_id = "${var.flavor_k8s_node}" + key_pair = "${openstack_compute_keypair_v2.k8s.name}" + network { + name = "${var.network_name}" + } + security_groups = ["${openstack_compute_secgroup_v2.k8s.name}" ] + metadata = { + ssh_user = "${var.ssh_user}" + kubespray_groups = "kube-node,k8s-cluster" + } + provisioner "local-exec" { + command = "sed s/USER/${var.ssh_user}/ contrib/terraform/openstack/ansible_bastion_template.txt | sed s/BASTION_ADDRESS/${element(openstack_networking_floatingip_v2.k8s_master.*.address, 0)}/ > contrib/terraform/openstack/group_vars/k8s-cluster.yml" + } +} + + + + #output "msg" { # value = "Your hosts are ready to go!\nYour ssh hosts are: ${join(", ", openstack_networking_floatingip_v2.k8s_master.*.address )}" #} diff --git a/contrib/terraform/openstack/variables.tf b/contrib/terraform/openstack/variables.tf index 6c1fc767d..8be38aed5 100644 --- a/contrib/terraform/openstack/variables.tf +++ b/contrib/terraform/openstack/variables.tf @@ -6,10 +6,18 @@ variable "number_of_k8s_masters" { default = 2 } +variable "number_of_k8s_masters_no_floating_ip" { + default = 2 +} + variable "number_of_k8s_nodes" { default = 1 } +variable "number_of_k8s_nodes_no_floating_ip" { + default = 1 +} + variable "public_key_path" { description = "The path of the ssh pub key" default = "~/.ssh/id_rsa.pub" diff --git a/roles/dnsmasq/defaults/main.yml b/roles/dnsmasq/defaults/main.yml index 7a1e77023..89ab02ab8 100644 --- a/roles/dnsmasq/defaults/main.yml +++ b/roles/dnsmasq/defaults/main.yml @@ -16,4 +16,10 @@ dnsmasq_version: 2.72 # Images dnsmasq_image_repo: "andyshinn/dnsmasq" -dnsmasq_image_tag: "{{ dnsmasq_version }}" \ No newline at end of file +dnsmasq_image_tag: "{{ dnsmasq_version }}" + +# Skip dnsmasq setup +skip_dnsmasq: false + +# Skip setting up dnsmasq daemonset +skip_dnsmasq_k8s: "{{ skip_dnsmasq }}" diff --git a/roles/dnsmasq/tasks/main.yml b/roles/dnsmasq/tasks/main.yml index 46c1604f6..6b271a1e2 100644 --- a/roles/dnsmasq/tasks/main.yml +++ b/roles/dnsmasq/tasks/main.yml @@ -1,5 +1,5 @@ --- - include: dnsmasq.yml - when: "{{ not skip_dnsmasq|bool }}" + when: "{{ not skip_dnsmasq_k8s|bool }}" - include: resolvconf.yml diff --git a/roles/docker/handlers/main.yml b/roles/docker/handlers/main.yml index 6f54f33d5..04d761796 100644 --- a/roles/docker/handlers/main.yml +++ b/roles/docker/handlers/main.yml @@ -3,6 +3,7 @@ command: /bin/true notify: - Docker | reload systemd + - Docker | reload docker.socket - Docker | reload docker - Docker | pause while Docker restarts - Docker | wait for docker @@ -16,6 +17,12 @@ name: docker state: restarted +- name: Docker | reload docker.socket + service: + name: docker.socket + state: restarted + when: ansible_os_family == 'CoreOS' + - name: Docker | pause while Docker restarts pause: seconds=10 prompt="Waiting for docker restart" diff --git a/roles/kubernetes/secrets/tasks/gen_certs.yml b/roles/kubernetes/secrets/tasks/gen_certs.yml index 6057c0676..bec1d9f16 100644 --- a/roles/kubernetes/secrets/tasks/gen_certs.yml +++ b/roles/kubernetes/secrets/tasks/gen_certs.yml @@ -27,31 +27,30 @@ master_certs: ['ca-key.pem', 'admin.pem', 'admin-key.pem', 'apiserver-key.pem', 'apiserver.pem'] node_certs: ['ca.pem', 'node.pem', 'node-key.pem'] -- name: Gen_certs | Get the certs from first master - slurp: - src: "{{ kube_cert_dir }}/{{ item }}" +- name: Gen_certs | Gather master certs + shell: "tar cfz - -C {{ kube_cert_dir }} {{ master_certs|join(' ') }} {{ node_certs|join(' ') }} | base64 --wrap=0" + register: master_cert_data delegate_to: "{{groups['kube-master'][0]}}" - register: slurp_certs - with_items: '{{ master_certs + node_certs }}' - when: sync_certs|default(false) run_once: true - notify: set secret_changed + when: sync_certs|default(false) + +- name: Gen_certs | Gather node certs + shell: "tar cfz - -C {{ kube_cert_dir }} {{ node_certs|join(' ') }} | base64 --wrap=0" + register: node_cert_data + delegate_to: "{{groups['kube-master'][0]}}" + run_once: true + when: sync_certs|default(false) - name: Gen_certs | Copy certs on masters - copy: - content: "{{ item.content|b64decode }}" - dest: "{{ item.source }}" - with_items: '{{slurp_certs.results}}' + shell: "echo '{{master_cert_data.stdout|quote}}' | base64 -d | tar xz -C {{ kube_cert_dir }}" + changed_when: false when: inventory_hostname in groups['kube-master'] and sync_certs|default(false) and inventory_hostname != groups['kube-master'][0] - name: Gen_certs | Copy certs on nodes - copy: - content: "{{ item.content|b64decode }}" - dest: "{{ item.source }}" - with_items: '{{slurp_certs.results}}' - when: item.item in node_certs and - inventory_hostname in groups['kube-node'] and sync_certs|default(false) and + shell: "echo '{{node_cert_data.stdout|quote}}' | base64 -d | tar xz -C {{ kube_cert_dir }}" + changed_when: false + when: inventory_hostname in groups['kube-node'] and sync_certs|default(false) and inventory_hostname != groups['kube-master'][0] - name: Gen_certs | check certificate permissions diff --git a/roles/kubernetes/secrets/tasks/gen_tokens.yml b/roles/kubernetes/secrets/tasks/gen_tokens.yml index 796657f65..dbe35811b 100644 --- a/roles/kubernetes/secrets/tasks/gen_tokens.yml +++ b/roles/kubernetes/secrets/tasks/gen_tokens.yml @@ -43,20 +43,15 @@ delegate_to: "{{groups['kube-master'][0]}}" when: sync_tokens|default(false) -- name: Gen_tokens | Get the tokens from first master - slurp: - src: "{{ item }}" - register: slurp_tokens - with_items: '{{tokens_list.stdout_lines}}' - run_once: true +- name: Gen_tokens | Gather tokens + shell: "tar cfz - {{ tokens_list.stdout_lines | join(' ') }} | base64 --wrap=0" + register: tokens_data delegate_to: "{{groups['kube-master'][0]}}" + run_once: true when: sync_tokens|default(false) - notify: set secret_changed - name: Gen_tokens | Copy tokens on masters - copy: - content: "{{ item.content|b64decode }}" - dest: "{{ item.source }}" - with_items: '{{slurp_tokens.results}}' + shell: "echo '{{ tokens_data.stdout|quote }}' | base64 -d | tar xz -C /" + changed_when: false when: inventory_hostname in groups['kube-master'] and sync_tokens|default(false) and inventory_hostname != groups['kube-master'][0] diff --git a/tests/cloud_playbooks/create-gce.yml b/tests/cloud_playbooks/create-gce.yml index 840cf2e7c..b2c3e3020 100644 --- a/tests/cloud_playbooks/create-gce.yml +++ b/tests/cloud_playbooks/create-gce.yml @@ -1,6 +1,6 @@ --- - hosts: localhost - sudo: False + become: false gather_facts: no vars: cloud_machine_type: g1-small diff --git a/tests/cloud_playbooks/delete-gce.yml b/tests/cloud_playbooks/delete-gce.yml index d42c6cc91..54902fb6f 100644 --- a/tests/cloud_playbooks/delete-gce.yml +++ b/tests/cloud_playbooks/delete-gce.yml @@ -1,6 +1,6 @@ --- - hosts: localhost - sudo: False + become: false gather_facts: no vars: cloud_machine_type: f1-micro diff --git a/tests/templates/inventory-gce.j2 b/tests/templates/inventory-gce.j2 index 72ad469de..418910771 100644 --- a/tests/templates/inventory-gce.j2 +++ b/tests/templates/inventory-gce.j2 @@ -2,6 +2,16 @@ node1 ansible_ssh_host={{gce.instance_data[0].public_ip}} node2 ansible_ssh_host={{gce.instance_data[1].public_ip}} node3 ansible_ssh_host={{gce.instance_data[2].public_ip}} +{% if mode is defined and mode == "separate" %} +[kube-master] +node1 + +[kube-node] +node2 + +[etcd] +node3 +{% else %} [kube-master] node1 node2 @@ -14,6 +24,7 @@ node3 [etcd] node1 node2 +{% endif %} [k8s-cluster:children] kube-node From 309240cd6fb7168828354027d58557e2e059b36c Mon Sep 17 00:00:00 2001 From: Aleksandr Didenko Date: Thu, 3 Nov 2016 16:57:11 +0100 Subject: [PATCH 25/35] Adding support for canal network plugin This patch provides support for Canal network plugin installation as a self-hosted app, see the following link for details: https://github.com/tigera/canal/tree/master/k8s-install --- cluster.yml | 3 + roles/download/defaults/main.yml | 17 ++- roles/kubernetes-apps/ansible/tasks/main.yaml | 1 - .../{ansible => lib}/library/kube.py | 0 .../network_plugin/canal/tasks/main.yaml | 8 ++ .../network_plugin/meta/main.yml | 4 + roles/kubernetes/node/tasks/main.yml | 7 ++ .../node/templates/cni-canal.conf.j2 | 17 +++ roles/kubernetes/node/templates/kubelet.j2 | 2 +- roles/kubernetes/preinstall/tasks/main.yml | 2 +- roles/network_plugin/canal/defaults/main.yml | 11 ++ roles/network_plugin/canal/meta/main.yml | 12 ++ roles/network_plugin/canal/tasks/main.yml | 27 +++++ .../canal/templates/canal-node.yml.j2 | 112 ++++++++++++++++++ .../canal/templates/network.json.j2 | 1 + roles/network_plugin/meta/main.yml | 2 + 16 files changed, 218 insertions(+), 8 deletions(-) rename roles/kubernetes-apps/{ansible => lib}/library/kube.py (100%) create mode 100644 roles/kubernetes-apps/network_plugin/canal/tasks/main.yaml create mode 100644 roles/kubernetes-apps/network_plugin/meta/main.yml create mode 100644 roles/kubernetes/node/templates/cni-canal.conf.j2 create mode 100644 roles/network_plugin/canal/defaults/main.yml create mode 100644 roles/network_plugin/canal/meta/main.yml create mode 100644 roles/network_plugin/canal/tasks/main.yml create mode 100644 roles/network_plugin/canal/templates/canal-node.yml.j2 create mode 100644 roles/network_plugin/canal/templates/network.json.j2 diff --git a/cluster.yml b/cluster.yml index 295bb668a..12c090169 100644 --- a/cluster.yml +++ b/cluster.yml @@ -27,6 +27,8 @@ - hosts: kube-master roles: - { role: kubernetes/master, tags: master } + - { role: kubernetes-apps/lib, tags: apps } + - { role: kubernetes-apps/network_plugin, tags: network } - hosts: k8s-cluster roles: @@ -34,4 +36,5 @@ - hosts: kube-master[0] roles: + - { role: kubernetes-apps/lib, tags: apps } - { role: kubernetes-apps, tags: apps } diff --git a/roles/download/defaults/main.yml b/roles/download/defaults/main.yml index cbe053fa0..caf37c444 100644 --- a/roles/download/defaults/main.yml +++ b/roles/download/defaults/main.yml @@ -42,6 +42,8 @@ calicoctl_image_repo: "calico/ctl" calicoctl_image_tag: "{{ calico_version }}" calico_node_image_repo: "calico/node" calico_node_image_tag: "{{ calico_version }}" +calico_cni_image_repo: "calico/cni" +calico_cni_image_tag: "{{ calico_cni_version }}" hyperkube_image_repo: "quay.io/coreos/hyperkube" hyperkube_image_tag: "{{ kube_version }}_coreos.0" pod_infra_image_repo: "gcr.io/google_containers/pause-amd64" @@ -56,7 +58,7 @@ downloads: url: "{{ calico_cni_download_url }}" owner: "root" mode: "0755" - enabled: "{{ kube_network_plugin == 'calico' }}" + enabled: "{{ kube_network_plugin == 'calico' or kube_network_plugin == 'canal' }}" calico_cni_plugin_ipam: dest: calico/bin/calico-ipam version: "{{calico_cni_version}}" @@ -95,22 +97,27 @@ downloads: container: true repo: "{{ flannel_image_repo }}" tag: "{{ flannel_image_tag }}" - enabled: "{{ kube_network_plugin == 'flannel' }}" + enabled: "{{ kube_network_plugin == 'flannel' or kube_network_plugin == 'canal' }}" flannel_server_helper: container: true repo: "{{ flannel_server_helper_image_repo }}" tag: "{{ flannel_server_helper_image_tag }}" - enabled: "{{ kube_network_plugin == 'flannel' }}" + enabled: "{{ kube_network_plugin == 'flannel' or kube_network_plugin == 'canal' }}" calicoctl: container: true repo: "{{ calicoctl_image_repo }}" tag: "{{ calicoctl_image_tag }}" - enabled: "{{ kube_network_plugin == 'calico' }}" + enabled: "{{ kube_network_plugin == 'calico' or kube_network_plugin == 'canal' }}" calico_node: container: true repo: "{{ calico_node_image_repo }}" tag: "{{ calico_node_image_tag }}" - enabled: "{{ kube_network_plugin == 'calico' }}" + enabled: "{{ kube_network_plugin == 'calico' or kube_network_plugin == 'canal' }}" + calico_cni: + container: true + repo: "{{ calico_cni_image_repo }}" + tag: "{{ calico_cni_image_tag }}" + enabled: "{{ kube_network_plugin == 'canal' }}" pod_infra: container: true repo: "{{ pod_infra_image_repo }}" diff --git a/roles/kubernetes-apps/ansible/tasks/main.yaml b/roles/kubernetes-apps/ansible/tasks/main.yaml index f31eb442b..ccbca3c80 100644 --- a/roles/kubernetes-apps/ansible/tasks/main.yaml +++ b/roles/kubernetes-apps/ansible/tasks/main.yaml @@ -18,6 +18,5 @@ with_items: "{{ manifests.results }}" when: inventory_hostname == groups['kube-master'][0] - - include: tasks/calico-policy-controller.yml when: enable_network_policy is defined and enable_network_policy == True diff --git a/roles/kubernetes-apps/ansible/library/kube.py b/roles/kubernetes-apps/lib/library/kube.py similarity index 100% rename from roles/kubernetes-apps/ansible/library/kube.py rename to roles/kubernetes-apps/lib/library/kube.py diff --git a/roles/kubernetes-apps/network_plugin/canal/tasks/main.yaml b/roles/kubernetes-apps/network_plugin/canal/tasks/main.yaml new file mode 100644 index 000000000..46105126b --- /dev/null +++ b/roles/kubernetes-apps/network_plugin/canal/tasks/main.yaml @@ -0,0 +1,8 @@ +- name: Start flannel and calico-node + run_once: true + kube: + name: "canal-node" + kubectl: "{{bin_dir}}/kubectl" + filename: "/etc/kubernetes/canal-node.yaml" + resource: "ds" + namespace: "kube-system" diff --git a/roles/kubernetes-apps/network_plugin/meta/main.yml b/roles/kubernetes-apps/network_plugin/meta/main.yml new file mode 100644 index 000000000..14a59e5c5 --- /dev/null +++ b/roles/kubernetes-apps/network_plugin/meta/main.yml @@ -0,0 +1,4 @@ +--- +dependencies: + - role: kubernetes-apps/network_plugin/canal + when: kube_network_plugin == 'canal' diff --git a/roles/kubernetes/node/tasks/main.yml b/roles/kubernetes/node/tasks/main.yml index a8cb6ce5a..700f7eb75 100644 --- a/roles/kubernetes/node/tasks/main.yml +++ b/roles/kubernetes/node/tasks/main.yml @@ -11,6 +11,13 @@ owner: kube when: kube_network_plugin == "calico" +- name: Write Canal cni config + template: + src: "cni-canal.conf.j2" + dest: "/etc/cni/net.d/10-canal.conf" + owner: kube + when: kube_network_plugin == "canal" + - name: Write kubelet config file template: src=kubelet.j2 dest={{ kube_config_dir }}/kubelet.env backup=yes notify: diff --git a/roles/kubernetes/node/templates/cni-canal.conf.j2 b/roles/kubernetes/node/templates/cni-canal.conf.j2 new file mode 100644 index 000000000..db3e9231c --- /dev/null +++ b/roles/kubernetes/node/templates/cni-canal.conf.j2 @@ -0,0 +1,17 @@ +{ + "name": "canal-k8s-network", + "type": "flannel", + "delegate": { + "type": "calico", + "etcd_endpoints": "{{ etcd_access_endpoint }}", + "log_level": "info", + {% if enable_network_policy is defined and enable_network_policy == True %} + "policy": { + "type": "k8s" + }, + {% endif %} + "kubernetes": { + "kubeconfig": "{{ kube_config_dir }}/node-kubeconfig.yaml" + } + } +} diff --git a/roles/kubernetes/node/templates/kubelet.j2 b/roles/kubernetes/node/templates/kubelet.j2 index 53f2915d9..46678691a 100644 --- a/roles/kubernetes/node/templates/kubelet.j2 +++ b/roles/kubernetes/node/templates/kubelet.j2 @@ -26,7 +26,7 @@ KUBELET_ARGS="--cluster_dns={{ dns_server }} --cluster_domain={{ dns_domain }} - {% else %} KUBELET_ARGS="--kubeconfig={{ kube_config_dir}}/kubelet.kubeconfig --config={{ kube_manifest_dir }} --pod-infra-container-image={{ pod_infra_image_repo }}:{{ pod_infra_image_tag }}" {% endif %} -{% if kube_network_plugin is defined and kube_network_plugin in ["calico", "weave"] %} +{% if kube_network_plugin is defined and kube_network_plugin in ["calico", "weave", "canal"] %} KUBELET_NETWORK_PLUGIN="--network-plugin=cni --network-plugin-dir=/etc/cni/net.d" {% elif kube_network_plugin is defined and kube_network_plugin == "weave" %} DOCKER_SOCKET="--docker-endpoint=unix:/var/run/weave/weave.sock" diff --git a/roles/kubernetes/preinstall/tasks/main.yml b/roles/kubernetes/preinstall/tasks/main.yml index 49e69a907..5c6520ed3 100644 --- a/roles/kubernetes/preinstall/tasks/main.yml +++ b/roles/kubernetes/preinstall/tasks/main.yml @@ -74,7 +74,7 @@ with_items: - "/etc/cni/net.d" - "/opt/cni/bin" - when: kube_network_plugin in ["calico", "weave"] and "{{ inventory_hostname in groups['k8s-cluster'] }}" + when: kube_network_plugin in ["calico", "weave", "canal"] and "{{ inventory_hostname in groups['k8s-cluster'] }}" - name: Update package management cache (YUM) yum: update_cache=yes name='*' diff --git a/roles/network_plugin/canal/defaults/main.yml b/roles/network_plugin/canal/defaults/main.yml new file mode 100644 index 000000000..24f7c789b --- /dev/null +++ b/roles/network_plugin/canal/defaults/main.yml @@ -0,0 +1,11 @@ +# The interface used by canal for host <-> host communication. +# If left blank, then the interface is chosing using the node's +# default route. +canal_iface: "" + +# Whether or not to masquerade traffic to destinations not within +# the pod network. +canal_masquerade: "true" + +# Log-level +canal_log_level: "info" diff --git a/roles/network_plugin/canal/meta/main.yml b/roles/network_plugin/canal/meta/main.yml new file mode 100644 index 000000000..5b8d38d37 --- /dev/null +++ b/roles/network_plugin/canal/meta/main.yml @@ -0,0 +1,12 @@ +--- +dependencies: + - role: download + file: "{{ downloads.flannel_server_helper }}" + - role: download + file: "{{ downloads.flannel }}" + - role: download + file: "{{ downloads.calico_node }}" + - role: download + file: "{{ downloads.calicoctl }}" + - role: download + file: "{{ downloads.calico_cni }}" diff --git a/roles/network_plugin/canal/tasks/main.yml b/roles/network_plugin/canal/tasks/main.yml new file mode 100644 index 000000000..eb833bf0a --- /dev/null +++ b/roles/network_plugin/canal/tasks/main.yml @@ -0,0 +1,27 @@ +--- +- name: Canal | Write flannel configuration + template: + src: network.json.j2 + dest: /etc/flannel-network.json + backup: yes + +- name: Canal | Write canal configuration + template: + src: canal-node.yml.j2 + dest: /etc/kubernetes/canal-node.yaml + +- name: Canal | Copy cni plugins from hyperkube + command: "/usr/bin/docker run --rm -v /opt/cni/bin:/cnibindir {{ hyperkube_image_repo }}:{{ hyperkube_image_tag }} /usr/bin/rsync -a /opt/cni/bin/ /cnibindir/" + register: cni_task_result + until: cni_task_result.rc == 0 + retries: 4 + delay: "{{ retry_stagger | random + 3 }}" + changed_when: false + +- name: Canal | Copy cni plugins from calico/cni + command: "/usr/bin/docker run --rm -v /opt/cni/bin:/cnibindir {{ calico_cni_image_repo }}:{{ calico_cni_image_tag }} sh -c 'cp -a /opt/cni/bin/* /cnibindir/'" + register: cni_task_result + until: cni_task_result.rc == 0 + retries: 4 + delay: "{{ retry_stagger | random + 3 }}" + changed_when: false diff --git a/roles/network_plugin/canal/templates/canal-node.yml.j2 b/roles/network_plugin/canal/templates/canal-node.yml.j2 new file mode 100644 index 000000000..0b4ea24e4 --- /dev/null +++ b/roles/network_plugin/canal/templates/canal-node.yml.j2 @@ -0,0 +1,112 @@ +--- +kind: DaemonSet +apiVersion: extensions/v1beta1 +metadata: + name: canal-node + labels: + k8s-app: canal-node +spec: + selector: + matchLabels: + k8s-app: canal-node + template: + metadata: + annotations: + scheduler.alpha.kubernetes.io/critical-pod: '' + scheduler.alpha.kubernetes.io/tolerations: '[{"key":"CriticalAddonsOnly", "operator":"Exists"}]' + labels: + k8s-app: canal-node + spec: + hostNetwork: true + volumes: + # Used by flannel-server-helper + - name: "networkconfig" + hostPath: + path: "/etc/flannel-network.json" + # Used by calico/node. + - name: lib-modules + hostPath: + path: /lib/modules + - name: var-run-calico + hostPath: + path: /var/run/calico + # Used to install CNI. + - name: cni-bin-dir + hostPath: + path: /opt/cni/bin + - name: cni-net-dir + hostPath: + path: /etc/cni/net.d + # Used by flannel daemon. + - name: run-flannel + hostPath: + path: /run/flannel + - name: resolv + hostPath: + path: /etc/resolv.conf + containers: + - name: "flannel-server-helper" + image: "{{ flannel_server_helper_image_repo }}:{{ flannel_server_helper_image_tag }}" + args: + - "--network-config=/etc/flannel-network.json" + - "--etcd-prefix=/{{ cluster_name }}/network" + - "--etcd-server={{ etcd_endpoint }}" + volumeMounts: + - name: "networkconfig" + mountPath: "/etc/flannel-network.json" + imagePullPolicy: "Always" + # Runs the flannel daemon to enable vxlan networking between + # container hosts. + - name: flannel + image: "{{ flannel_image_repo }}:{{ flannel_image_tag }}" + env: + # The location of the etcd cluster. + - name: FLANNELD_ETCD_ENDPOINTS + value: "{{ etcd_access_endpoint }}" + # The interface flannel should run on. + - name: FLANNELD_IFACE + value: "{{ canal_iface }}" + # Perform masquerade on traffic leaving the pod cidr. + - name: FLANNELD_IP_MASQ + value: "{{ canal_masquerade }}" + # Write the subnet.env file to the mounted directory. + - name: FLANNELD_SUBNET_FILE + value: "/run/flannel/subnet.env" + command: + - "/bin/sh" + - "-c" + - "/opt/bin/flanneld -etcd-endpoints {{ etcd_access_endpoint }} -etcd-prefix /{{ cluster_name }}/network {% if canal_iface %}-iface {{ canal_iface }}{% endif %}" + ports: + - hostPort: 10253 + containerPort: 10253 + securityContext: + privileged: true + volumeMounts: + - name: "resolv" + mountPath: "/etc/resolv.conf" + - name: "run-flannel" + mountPath: "/run/flannel" + # Runs calico/node container on each Kubernetes node. This + # container programs network policy and local routes on each + # host. + - name: calico-node + image: "{{ calico_node_image_repo }}:{{ calico_node_image_tag }}" + env: + # The location of the etcd cluster. + - name: ETCD_ENDPOINTS + value: "{{ etcd_access_endpoint }}" + # Disable Calico BGP. Calico is simply enforcing policy. + - name: CALICO_NETWORKING + value: "false" + # Disable file logging so `kubectl logs` works. + - name: CALICO_DISABLE_FILE_LOGGING + value: "true" + securityContext: + privileged: true + volumeMounts: + - mountPath: /lib/modules + name: lib-modules + readOnly: true + - mountPath: /var/run/calico + name: var-run-calico + readOnly: false diff --git a/roles/network_plugin/canal/templates/network.json.j2 b/roles/network_plugin/canal/templates/network.json.j2 new file mode 100644 index 000000000..cbbec3841 --- /dev/null +++ b/roles/network_plugin/canal/templates/network.json.j2 @@ -0,0 +1 @@ +{ "Network": "{{ kube_pods_subnet }}", "SubnetLen": {{ kube_network_node_prefix }}, "Backend": { "Type": "{{ flannel_backend_type }}" } } diff --git a/roles/network_plugin/meta/main.yml b/roles/network_plugin/meta/main.yml index 736262ab0..8596c9d70 100644 --- a/roles/network_plugin/meta/main.yml +++ b/roles/network_plugin/meta/main.yml @@ -6,3 +6,5 @@ dependencies: when: kube_network_plugin == 'flannel' - role: network_plugin/weave when: kube_network_plugin == 'weave' + - role: network_plugin/canal + when: kube_network_plugin == 'canal' From 60a217766fee54f68fae58dc9a69069c7571c625 Mon Sep 17 00:00:00 2001 From: Aleksandr Didenko Date: Tue, 8 Nov 2016 12:13:20 +0100 Subject: [PATCH 26/35] Add ConfigMap for basic configuration options Container settings moved from deamonset yaml to a separate configmap. --- roles/kubernetes-apps/ansible/tasks/main.yaml | 3 +- .../network_plugin/canal/tasks/main.yaml | 9 ++++ .../node/templates/cni-canal.conf.j2 | 8 ++-- roles/network_plugin/canal/tasks/main.yml | 7 ++- .../canal/templates/canal-config.yml.j2 | 22 +++++++++ .../canal/templates/canal-node.yml.j2 | 48 ++++++++++++++++--- 6 files changed, 83 insertions(+), 14 deletions(-) create mode 100644 roles/network_plugin/canal/templates/canal-config.yml.j2 diff --git a/roles/kubernetes-apps/ansible/tasks/main.yaml b/roles/kubernetes-apps/ansible/tasks/main.yaml index ccbca3c80..130a17a6f 100644 --- a/roles/kubernetes-apps/ansible/tasks/main.yaml +++ b/roles/kubernetes-apps/ansible/tasks/main.yaml @@ -19,4 +19,5 @@ when: inventory_hostname == groups['kube-master'][0] - include: tasks/calico-policy-controller.yml - when: enable_network_policy is defined and enable_network_policy == True + when: ( enable_network_policy is defined and enable_network_policy == True ) or + ( kube_network_plugin == 'canal' ) diff --git a/roles/kubernetes-apps/network_plugin/canal/tasks/main.yaml b/roles/kubernetes-apps/network_plugin/canal/tasks/main.yaml index 46105126b..c6bcd6992 100644 --- a/roles/kubernetes-apps/network_plugin/canal/tasks/main.yaml +++ b/roles/kubernetes-apps/network_plugin/canal/tasks/main.yaml @@ -1,3 +1,12 @@ +- name: Create canal ConfigMap + run_once: true + kube: + name: "canal-config" + kubectl: "{{bin_dir}}/kubectl" + filename: "/etc/kubernetes/canal-config.yaml" + resource: "configmap" + namespace: "kube-system" + - name: Start flannel and calico-node run_once: true kube: diff --git a/roles/kubernetes/node/templates/cni-canal.conf.j2 b/roles/kubernetes/node/templates/cni-canal.conf.j2 index db3e9231c..b835443c7 100644 --- a/roles/kubernetes/node/templates/cni-canal.conf.j2 +++ b/roles/kubernetes/node/templates/cni-canal.conf.j2 @@ -5,11 +5,9 @@ "type": "calico", "etcd_endpoints": "{{ etcd_access_endpoint }}", "log_level": "info", - {% if enable_network_policy is defined and enable_network_policy == True %} - "policy": { - "type": "k8s" - }, - {% endif %} + "policy": { + "type": "k8s" + }, "kubernetes": { "kubeconfig": "{{ kube_config_dir }}/node-kubeconfig.yaml" } diff --git a/roles/network_plugin/canal/tasks/main.yml b/roles/network_plugin/canal/tasks/main.yml index eb833bf0a..ba83edee8 100644 --- a/roles/network_plugin/canal/tasks/main.yml +++ b/roles/network_plugin/canal/tasks/main.yml @@ -5,7 +5,12 @@ dest: /etc/flannel-network.json backup: yes -- name: Canal | Write canal configuration +- name: Canal | Write canal configmap + template: + src: canal-config.yml.j2 + dest: /etc/kubernetes/canal-config.yaml + +- name: Canal | Write canal node configuration template: src: canal-node.yml.j2 dest: /etc/kubernetes/canal-node.yaml diff --git a/roles/network_plugin/canal/templates/canal-config.yml.j2 b/roles/network_plugin/canal/templates/canal-config.yml.j2 new file mode 100644 index 000000000..34f3faedb --- /dev/null +++ b/roles/network_plugin/canal/templates/canal-config.yml.j2 @@ -0,0 +1,22 @@ +# This ConfigMap can be used to configure a self-hosted Canal installation. +# See `canal.yaml` for an example of a Canal deployment which uses +# the config in this ConfigMap. +kind: ConfigMap +apiVersion: v1 +metadata: + name: canal-config +data: + # Configure this with the location of your etcd cluster. + etcd_endpoints: "{{ etcd_access_endpoint }}" + + # The interface used by canal for host <-> host communication. + # If left blank, then the interface is chosing using the node's + # default route. + flanneld_iface: "{{ canal_iface }}" + + # Whether or not to masquerade traffic to destinations not within + # the pod network. + masquerade: "{{ canal_masquerade }}" + + # Cluster name for Flannel etcd path + cluster_name: "{{ cluster_name }}" diff --git a/roles/network_plugin/canal/templates/canal-node.yml.j2 b/roles/network_plugin/canal/templates/canal-node.yml.j2 index 0b4ea24e4..bdeae6cfd 100644 --- a/roles/network_plugin/canal/templates/canal-node.yml.j2 +++ b/roles/network_plugin/canal/templates/canal-node.yml.j2 @@ -47,10 +47,23 @@ spec: containers: - name: "flannel-server-helper" image: "{{ flannel_server_helper_image_repo }}:{{ flannel_server_helper_image_tag }}" + env: + # Cluster name + - name: CLUSTER_NAME + valueFrom: + configMapKeyRef: + name: canal-config + key: cluster_name + # The location of the etcd cluster. + - name: FLANNELD_ETCD_ENDPOINTS + valueFrom: + configMapKeyRef: + name: canal-config + key: etcd_endpoints args: - "--network-config=/etc/flannel-network.json" - - "--etcd-prefix=/{{ cluster_name }}/network" - - "--etcd-server={{ etcd_endpoint }}" + - "--etcd-prefix=/$(CLUSTER_NAME)/network" + - "--etcd-server=$(FLANNELD_ETCD_ENDPOINTS)" volumeMounts: - name: "networkconfig" mountPath: "/etc/flannel-network.json" @@ -60,22 +73,40 @@ spec: - name: flannel image: "{{ flannel_image_repo }}:{{ flannel_image_tag }}" env: + # Cluster name + - name: CLUSTER_NAME + valueFrom: + configMapKeyRef: + name: canal-config + key: cluster_name # The location of the etcd cluster. - name: FLANNELD_ETCD_ENDPOINTS - value: "{{ etcd_access_endpoint }}" + valueFrom: + configMapKeyRef: + name: canal-config + key: etcd_endpoints # The interface flannel should run on. - name: FLANNELD_IFACE - value: "{{ canal_iface }}" + valueFrom: + configMapKeyRef: + name: canal-config + key: flanneld_iface # Perform masquerade on traffic leaving the pod cidr. - name: FLANNELD_IP_MASQ - value: "{{ canal_masquerade }}" + valueFrom: + configMapKeyRef: + name: canal-config + key: masquerade + # Set etcd-prefix + - name: DOCKER_OPT_ETCD_PREFIX + value: "-etcd-prefix=/$(CLUSTER_NAME)/network" # Write the subnet.env file to the mounted directory. - name: FLANNELD_SUBNET_FILE value: "/run/flannel/subnet.env" command: - "/bin/sh" - "-c" - - "/opt/bin/flanneld -etcd-endpoints {{ etcd_access_endpoint }} -etcd-prefix /{{ cluster_name }}/network {% if canal_iface %}-iface {{ canal_iface }}{% endif %}" + - "/opt/bin/flanneld -etcd-prefix /$(CLUSTER_NAME)/network" ports: - hostPort: 10253 containerPort: 10253 @@ -94,7 +125,10 @@ spec: env: # The location of the etcd cluster. - name: ETCD_ENDPOINTS - value: "{{ etcd_access_endpoint }}" + valueFrom: + configMapKeyRef: + name: canal-config + key: etcd_endpoints # Disable Calico BGP. Calico is simply enforcing policy. - name: CALICO_NETWORKING value: "false" From 4ece73d4327c72b836ff57aa489d48887ec7996a Mon Sep 17 00:00:00 2001 From: Aleksandr Didenko Date: Tue, 8 Nov 2016 12:20:55 +0100 Subject: [PATCH 27/35] Fix idempotency of calico-policy-controller rs We need to specify kube resource type and name in order to avoid playbook errors related to k8s resource duplication. --- .../ansible/tasks/calico-policy-controller.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/roles/kubernetes-apps/ansible/tasks/calico-policy-controller.yml b/roles/kubernetes-apps/ansible/tasks/calico-policy-controller.yml index f4ac65aeb..6ad8dd220 100644 --- a/roles/kubernetes-apps/ansible/tasks/calico-policy-controller.yml +++ b/roles/kubernetes-apps/ansible/tasks/calico-policy-controller.yml @@ -5,6 +5,9 @@ - name: Start of Calico policy controller kube: + name: "calico-policy-controller" kubectl: "{{bin_dir}}/kubectl" - filename: /etc/kubernetes/calico-policy-controller.yml + filename: "/etc/kubernetes/calico-policy-controller.yml" + namespace: "kube-system" + resource: "rs" when: inventory_hostname == groups['kube-master'][0] From d197130148c65c286175b192e2a2a81bae502e13 Mon Sep 17 00:00:00 2001 From: Bogdan Dobrelya Date: Mon, 7 Nov 2016 16:00:49 +0100 Subject: [PATCH 28/35] Fix uploading CI logs to GCS * Use gsutil to configure the logs bucket's lifecycle, which is not in the gc_storage module yet. (See https://cloud.google.com/storage/docs/gsutil_install). * Generate uniq bucket names extended with the build's OS type info as well. * Ignore boto related errors for the gc_storage module. * Use no_log when needed to supress noise/secrets in output Signed-off-by: Bogdan Dobrelya --- .travis.yml | 5 +- scripts/collect-info.yaml | 1 + tests/cloud_playbooks/files/gcs_life.json | 9 +++ tests/cloud_playbooks/upload-logs-gcs.yml | 68 +++++++++++++++-------- 4 files changed, 60 insertions(+), 23 deletions(-) create mode 100644 tests/cloud_playbooks/files/gcs_life.json diff --git a/.travis.yml b/.travis.yml index ac922fd80..3bbb46d1c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -149,13 +149,16 @@ script: - $HOME/.local/bin/ansible-playbook -i inventory/inventory.ini -u $SSH_USER -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root tests/testcases/030_check-network.yml $LOG_LEVEL after_failure: - - $HOME/.local/bin/ansible-playbook -i inventory/inventory.ini -u $SSH_USER -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root scripts/collect-info.yaml >/dev/null + - $HOME/.local/bin/ansible-playbook -i inventory/inventory.ini -u $SSH_USER -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root scripts/collect-info.yaml - > $HOME/.local/bin/ansible-playbook tests/cloud_playbooks/upload-logs-gcs.yml -i "localhost," -c local -e test_id=${TEST_ID} -e kube_network_plugin=${KUBE_NETWORK_PLUGIN} -e gs_key=${GS_ACCESS_KEY_ID} -e gs_skey=${GS_SECRET_ACCESS_KEY} + -e ostype=${CLOUD_IMAGE} + -e commit=${TRAVIS_COMMIT} + -e pr=${TRAVIS_PULL_REQUEST} after_script: - > diff --git a/scripts/collect-info.yaml b/scripts/collect-info.yaml index 877b5bf36..685b8b787 100644 --- a/scripts/collect-info.yaml +++ b/scripts/collect-info.yaml @@ -45,6 +45,7 @@ register: output ignore_errors: true with_items: "{{commands}}" + no_log: True - debug: var=item with_items: "{{output.results}}" diff --git a/tests/cloud_playbooks/files/gcs_life.json b/tests/cloud_playbooks/files/gcs_life.json new file mode 100644 index 000000000..eaab30b4f --- /dev/null +++ b/tests/cloud_playbooks/files/gcs_life.json @@ -0,0 +1,9 @@ +{ + "rule": + [ + { + "action": {"type": "Delete"}, + "condition": {"age": 2} + } + ] +} diff --git a/tests/cloud_playbooks/upload-logs-gcs.yml b/tests/cloud_playbooks/upload-logs-gcs.yml index ba31ab56f..12013798d 100644 --- a/tests/cloud_playbooks/upload-logs-gcs.yml +++ b/tests/cloud_playbooks/upload-logs-gcs.yml @@ -3,41 +3,65 @@ become: false gather_facts: no - vars: - expire: 72000 - tasks: + - name: Generate uniq bucket name prefix + shell: date +%s | sha256sum | base64 | head -c 32 + register: out + - name: replace_test_id set_fact: - test_name: "{{ test_id | regex_replace('\\.', '-') }}" + test_name: "kargo-{{ commit }}-{{ pr }}-{{ out.stdout|lower }}-{{ test_id | regex_replace('\\.', '-') }}" - name: Create a bucket gc_storage: bucket: "{{ test_name }}" mode: create - expiration: "{{ expire }}" permission: private - gs_access_key: gs_key - gs_secret_key: gs_skey + gs_access_key: "{{ gs_key }}" + gs_secret_key: "{{ gs_skey }}" + no_log: True + + - name: Download gsutil cp installer + get_url: + url: https://dl.google.com/dl/cloudsdk/channels/rapid/install_google_cloud_sdk.bash + dest: /tmp/gcp-installer.sh + + - name: Get gsutil tool + script: /tmp/gcp-installer.sh + environment: + CLOUDSDK_CORE_DISABLE_PROMPTS: 1 + no_log: True + + - name: Create a lifecycle template for the bucket + file: src=gcs_life.json path=/tmp/gcs_life.json + + - name: Hack the boto config for GCS access keys + lineinfile: + dest: .boto + line: "gs_access_key_id = {{ gs_key }}" + regexp: "^#gs_access_key_id = .*$" + no_log: True + + - name: Hack the boto config for GCS secret access keys + lineinfile: + dest: .boto + line: "gs_secret_access_key = {{ gs_skey }}" + regexp: "^#gs_secret_access_key = .*$" + no_log: True + + - name: Apply the lifecycle rules + shell: bash google-cloud-sdk/bin/gsutil lifecycle set /tmp/gcs_life.json gs://{{ test_name }} + environment: + BOTO_CONFIG: .boto - name: Upload collected diagnostic info gc_storage: bucket: "{{ test_name }}" mode: put permission: private - expiration: "{{ expire }}" - object: "build-{{ test_name }}-{{ kube_network_plugin }}-logs.tar.gz" + object: "build-{{ ostype }}-{{ kube_network_plugin }}-logs.tar.gz" src: logs.tar.gz - gs_access_key: gs_key - gs_secret_key: gs_skey - - - name: Get a link - gc_storage: - bucket: "{{ test_name }}" - object: "build-{{ test_name }}-{{ kube_network_plugin }}-logs.tar.gz" - mode: get_url - register: url - gs_access_key: gs_key - gs_secret_key: gs_skey - - - debug: msg="Download URL {{get_url}}" + headers: '{"Content-Encoding": "x-gzip"}' + gs_access_key: "{{ gs_key }}" + gs_secret_key: "{{ gs_skey }}" + ignore_errors: true From 95b460ae9489b38fa681597352ab2602b503c4e2 Mon Sep 17 00:00:00 2001 From: Matthew Mosesohn Date: Wed, 9 Nov 2016 13:31:12 +0300 Subject: [PATCH 29/35] Remove etcd-proxy from all nodes and use etcd multiaccess --- docs/ha-mode.md | 6 +- inventory/group_vars/all.yml | 2 +- roles/etcd/handlers/main.yml | 17 +-- roles/etcd/tasks/configure.yml | 16 --- roles/etcd/tasks/main.yml | 19 +-- roles/etcd/tasks/refresh_config.yml | 7 - .../templates/deb-etcd-proxy-docker.initd.j2 | 120 ------------------ .../templates/deb-etcd-proxy-host.initd.j2 | 110 ---------------- .../templates/etcd-proxy-docker.service.j2 | 28 ---- .../etcd/templates/etcd-proxy-host.service.j2 | 19 --- roles/etcd/templates/etcd-proxy.j2 | 5 - roles/etcd/templates/etcd.j2 | 4 - .../templates/calico-policy-controller.yml.j2 | 11 +- roles/kubernetes/master/tasks/pre-upgrade.yml | 9 -- .../manifests/kube-apiserver.manifest.j2 | 1 + .../node/templates/cni-calico.conf.j2 | 1 + roles/kubernetes/preinstall/defaults/main.yml | 3 + .../kubernetes/preinstall/tasks/set_facts.yml | 9 +- roles/network_plugin/calico/tasks/main.yml | 35 +++-- .../calico/templates/calico-node.service.j2 | 4 +- .../calico/templates/calicoctl-container.j2 | 3 +- .../calico/templates/network-environment.j2 | 4 +- .../flannel/templates/flannel-pod.yml | 2 +- 23 files changed, 55 insertions(+), 380 deletions(-) delete mode 100644 roles/etcd/templates/deb-etcd-proxy-docker.initd.j2 delete mode 100644 roles/etcd/templates/deb-etcd-proxy-host.initd.j2 delete mode 100644 roles/etcd/templates/etcd-proxy-docker.service.j2 delete mode 100644 roles/etcd/templates/etcd-proxy-host.service.j2 delete mode 100644 roles/etcd/templates/etcd-proxy.j2 diff --git a/docs/ha-mode.md b/docs/ha-mode.md index 792c18a19..8ec5c93a1 100644 --- a/docs/ha-mode.md +++ b/docs/ha-mode.md @@ -5,10 +5,6 @@ The following components require a highly available endpoints: * etcd cluster, * kube-apiserver service instances. -The former provides the -[etcd-proxy](https://coreos.com/etcd/docs/latest/proxy.html) service to access -the cluster members in HA fashion. - The latter relies on a 3rd side reverse proxies, like Nginx or HAProxy, to achieve the same goal. @@ -57,7 +53,7 @@ type. The following diagram shows how traffic to the apiserver is directed. A user may opt to use an external loadbalancer (LB) instead. An external LB provides access for external clients, while the internal LB accepts client -connections only to the localhost, similarly to the etcd-proxy HA endpoints. +connections only to the localhost. Given a frontend `VIP` address and `IP1, IP2` addresses of backends, here is an example configuration for a HAProxy service acting as an external LB: ``` diff --git a/inventory/group_vars/all.yml b/inventory/group_vars/all.yml index cbf2e63a2..f72276ae6 100644 --- a/inventory/group_vars/all.yml +++ b/inventory/group_vars/all.yml @@ -62,7 +62,7 @@ ndots: 5 # Enable multiaccess to configure clients to access all of the etcd members directly # as the "http://hostX:port, http://hostY:port, ..." and ignore the proxy loadbalancers. # This may be the case if clients support and loadbalance multiple etcd servers natively. -etcd_multiaccess: false +etcd_multiaccess: true # Assume there are no internal loadbalancers for apiservers exist and listen on # kube_apiserver_port (default 443) diff --git a/roles/etcd/handlers/main.yml b/roles/etcd/handlers/main.yml index 693754a06..8104ff1a8 100644 --- a/roles/etcd/handlers/main.yml +++ b/roles/etcd/handlers/main.yml @@ -6,21 +6,14 @@ - reload etcd - wait for etcd up -- name: restart etcd-proxy - command: /bin/true - notify: - - etcd | reload systemd - - reload etcd-proxy - - wait for etcd up - - name: etcd | reload systemd command: systemctl daemon-reload when: ansible_service_mgr == "systemd" - name: wait for etcd up - uri: url="http://{% if is_etcd_master %}{{ etcd_address }}{% else %}127.0.0.1{% endif %}:2379/health" + uri: url="http://{% if is_etcd_master %}{{ etcd_address }}{% else %}127.0.0.1{% endif %}:2379/health" register: result - until: result.status == 200 + until: result.status is defined and result.status == 200 retries: 10 delay: 5 @@ -29,9 +22,3 @@ name: etcd state: restarted when: is_etcd_master - -- name: reload etcd-proxy - service: - name: etcd-proxy - state: restarted - when: is_etcd_proxy diff --git a/roles/etcd/tasks/configure.yml b/roles/etcd/tasks/configure.yml index 514a79d73..a2ef38f2c 100644 --- a/roles/etcd/tasks/configure.yml +++ b/roles/etcd/tasks/configure.yml @@ -26,19 +26,3 @@ mode: 0755 when: ansible_service_mgr in ["sysvinit","upstart"] and ansible_os_family == "Debian" and is_etcd_master notify: restart etcd - -- name: Configure | Copy etcd-proxy.service systemd file - template: - src: "etcd-proxy-{{ etcd_deployment_type }}.service.j2" - dest: /etc/systemd/system/etcd-proxy.service - backup: yes - when: ansible_service_mgr == "systemd" and is_etcd_proxy - notify: restart etcd-proxy -- name: Configure | Write etcd-proxy initd script - template: - src: "deb-etcd-proxy-{{ etcd_deployment_type }}.initd.j2" - dest: /etc/init.d/etcd-proxy - owner: root - mode: 0755 - when: ansible_service_mgr in ["sysvinit","upstart"] and ansible_os_family == "Debian" and is_etcd_proxy - notify: restart etcd-proxy diff --git a/roles/etcd/tasks/main.yml b/roles/etcd/tasks/main.yml index 3ecaa00e6..88dfe59d8 100644 --- a/roles/etcd/tasks/main.yml +++ b/roles/etcd/tasks/main.yml @@ -1,8 +1,13 @@ --- +- include: pre_upgrade.yml - include: install.yml + when: is_etcd_master - include: set_cluster_health.yml + when: is_etcd_master - include: configure.yml + when: is_etcd_master - include: refresh_config.yml + when: is_etcd_master - name: Ensure etcd is running service: @@ -11,23 +16,11 @@ enabled: yes when: is_etcd_master -- name: Ensure etcd-proxy is running - service: - name: etcd-proxy - state: started - enabled: yes - when: is_etcd_proxy - - name: Restart etcd if binary changed command: /bin/true notify: restart etcd when: etcd_deployment_type == "host" and etcd_copy.stdout_lines and is_etcd_master -- name: Restart etcd-proxy if binary changed - command: /bin/true - notify: restart etcd-proxy - when: etcd_deployment_type == "host" and etcd_copy.stdout_lines and is_etcd_proxy - # Reload systemd before starting service - meta: flush_handlers @@ -35,4 +28,6 @@ # initial state of the cluster is in `existing` # state insted of `new`. - include: set_cluster_health.yml + when: is_etcd_master - include: refresh_config.yml + when: is_etcd_master diff --git a/roles/etcd/tasks/refresh_config.yml b/roles/etcd/tasks/refresh_config.yml index 178466153..80a03a7d6 100644 --- a/roles/etcd/tasks/refresh_config.yml +++ b/roles/etcd/tasks/refresh_config.yml @@ -5,10 +5,3 @@ dest: /etc/etcd.env notify: restart etcd when: is_etcd_master - -- name: Refresh config | Create etcd-proxy config file - template: - src: etcd-proxy.j2 - dest: /etc/etcd-proxy.env - notify: restart etcd-proxy - when: is_etcd_proxy diff --git a/roles/etcd/templates/deb-etcd-proxy-docker.initd.j2 b/roles/etcd/templates/deb-etcd-proxy-docker.initd.j2 deleted file mode 100644 index ad0338a09..000000000 --- a/roles/etcd/templates/deb-etcd-proxy-docker.initd.j2 +++ /dev/null @@ -1,120 +0,0 @@ -#!/bin/sh -set -a - -### BEGIN INIT INFO -# Provides: etcd-proxy -# Required-Start: $local_fs $network $syslog -# Required-Stop: -# Default-Start: 2 3 4 5 -# Default-Stop: 0 1 6 -# Short-Description: etcd-proxy -# Description: -# etcd-proxy is a proxy for etcd: distributed, consistent key-value store for shared configuration and service discovery -### END INIT INFO - -PATH=/sbin:/usr/sbin:/bin/:/usr/bin -DESC="etcd-proxy" -NAME=etcd-proxy -DAEMON={{ docker_bin_dir | default("/usr/bin") }}/docker -DAEMON_EXEC=`basename $DAEMON` -DAEMON_ARGS="run --restart=always --env-file=/etc/etcd-proxy.env \ ---net=host \ ---stop-signal=SIGKILL \ --v /usr/share/ca-certificates/:/etc/ssl/certs:ro \ ---name={{ etcd_proxy_member_name | default("etcd-proxy") }} \ -{{ etcd_image_repo }}:{{ etcd_image_tag }} \ -{% if etcd_after_v3 %} -{{ etcd_container_bin_dir }}etcd -{% endif %}" - - -SCRIPTNAME=/etc/init.d/$NAME -DAEMON_USER=root -STOP_SCHEDULE="${STOP_SCHEDULE:-QUIT/5/TERM/5/KILL/5}" -PID=/var/run/etcd-proxy.pid - -# Exit if the binary is not present -[ -x "$DAEMON" ] || exit 0 - -# Define LSB log_* functions. -# Depend on lsb-base (>= 3.2-14) to ensure that this file is present -# and status_of_proc is working. -. /lib/lsb/init-functions - -do_status() -{ - status_of_proc -p $PID "$DAEMON" "$NAME" && exit 0 || exit $? -} - -# Function that starts the daemon/service -# -do_start() -{ - {{ docker_bin_dir | default("/usr/bin") }}/docker rm -f {{ etcd_proxy_member_name | default("etcd-proxy") }} &>/dev/null || true - sleep 1 - start-stop-daemon --background --start --quiet --make-pidfile --pidfile $PID --user $DAEMON_USER --exec $DAEMON -- \ - $DAEMON_ARGS \ - || return 2 -} - -# -# Function that stops the daemon/service -# -do_stop() -{ - start-stop-daemon --stop --quiet --retry=$STOP_SCHEDULE --pidfile $PID --name $DAEMON_EXEC - RETVAL="$?" - - sleep 1 - return "$RETVAL" -} - - -case "$1" in - start) - log_daemon_msg "Starting $DESC" "$NAME" - do_start - case "$?" in - 0|1) log_end_msg 0 || exit 0 ;; - 2) log_end_msg 1 || exit 1 ;; - esac - ;; - stop) - log_daemon_msg "Stopping $DESC" "$NAME" - if do_stop; then - log_end_msg 0 - else - log_failure_msg "Can't stop etcd-proxy" - log_end_msg 1 - fi - ;; - status) - if do_status; then - log_end_msg 0 - else - log_failure_msg "etcd-proxy is not running" - log_end_msg 1 - fi - ;; - - restart|force-reload) - log_daemon_msg "Restarting $DESC" "$NAME" - if do_stop; then - if do_start; then - log_end_msg 0 - exit 0 - else - rc="$?" - fi - else - rc="$?" - fi - log_failure_msg "Can't restart etcd-proxy" - log_end_msg ${rc} - ;; - *) - echo "Usage: $SCRIPTNAME {start|stop|status|restart|force-reload}" >&2 - exit 3 - ;; -esac - diff --git a/roles/etcd/templates/deb-etcd-proxy-host.initd.j2 b/roles/etcd/templates/deb-etcd-proxy-host.initd.j2 deleted file mode 100644 index d0858bb2f..000000000 --- a/roles/etcd/templates/deb-etcd-proxy-host.initd.j2 +++ /dev/null @@ -1,110 +0,0 @@ -#!/bin/sh -set -a - -### BEGIN INIT INFO -# Provides: etcd-proxy -# Required-Start: $local_fs $network $syslog -# Required-Stop: -# Default-Start: 2 3 4 5 -# Default-Stop: 0 1 6 -# Short-Description: etcd-proxy -# Description: -# etcd-proxy is a proxy for etcd: distributed, consistent key-value store for shared configuration and service discovery -### END INIT INFO - -PATH=/sbin:/usr/sbin:/bin:/usr/bin -DESC="etcd-proxy" -NAME=etcd-proxy -DAEMON={{ bin_dir }}/etcd -DAEMON_ARGS="" -SCRIPTNAME=/etc/init.d/$NAME -DAEMON_USER=etcd -STOP_SCHEDULE="${STOP_SCHEDULE:-QUIT/5/TERM/5/KILL/5}" -PID=/var/run/etcd-proxy.pid - -# Exit if the binary is not present -[ -x "$DAEMON" ] || exit 0 - -# Read configuration variable file if it is present -[ -f /etc/etcd-proxy.env ] && . /etc/etcd-proxy.env - -# Define LSB log_* functions. -# Depend on lsb-base (>= 3.2-14) to ensure that this file is present -# and status_of_proc is working. -. /lib/lsb/init-functions - -do_status() -{ - status_of_proc -p $PID "$DAEMON" "$NAME" && exit 0 || exit $? -} - -# Function that starts the daemon/service -# -do_start() -{ - start-stop-daemon --background --start --quiet --make-pidfile --pidfile $PID --user $DAEMON_USER --exec $DAEMON -- \ - $DAEMON_ARGS \ - || return 2 -} - -# -# Function that stops the daemon/service -# -do_stop() -{ - start-stop-daemon --stop --quiet --retry=$STOP_SCHEDULE --pidfile $PID --name $NAME - RETVAL="$?" - - sleep 1 - return "$RETVAL" -} - - -case "$1" in - start) - log_daemon_msg "Starting $DESC" "$NAME" - do_start - case "$?" in - 0|1) log_end_msg 0 || exit 0 ;; - 2) log_end_msg 1 || exit 1 ;; - esac - ;; - stop) - log_daemon_msg "Stopping $DESC" "$NAME" - if do_stop; then - log_end_msg 0 - else - log_failure_msg "Can't stop etcd-proxy" - log_end_msg 1 - fi - ;; - status) - if do_status; then - log_end_msg 0 - else - log_failure_msg "etcd-proxy is not running" - log_end_msg 1 - fi - ;; - - restart|force-reload) - log_daemon_msg "Restarting $DESC" "$NAME" - if do_stop; then - if do_start; then - log_end_msg 0 - exit 0 - else - rc="$?" - fi - else - rc="$?" - fi - log_failure_msg "Can't restart etcd-proxy" - log_end_msg ${rc} - ;; - *) - echo "Usage: $SCRIPTNAME {start|stop|status|restart|force-reload}" >&2 - exit 3 - ;; -esac - diff --git a/roles/etcd/templates/etcd-proxy-docker.service.j2 b/roles/etcd/templates/etcd-proxy-docker.service.j2 deleted file mode 100644 index bf70f0e7f..000000000 --- a/roles/etcd/templates/etcd-proxy-docker.service.j2 +++ /dev/null @@ -1,28 +0,0 @@ -[Unit] -Description=etcd-proxy docker wrapper -Wants=docker.socket -After=docker.service - -[Service] -User=root -PermissionsStartOnly=true -ExecStart={{ docker_bin_dir | default("/usr/bin") }}/docker run --restart=always \ ---env-file=/etc/etcd-proxy.env \ -{# TODO(mattymo): Allow docker IP binding and disable in envfile - -p 2380:2380 -p 2379:2379 #} ---net=host \ ---stop-signal=SIGKILL \ --v /usr/share/ca-certificates/:/etc/ssl/certs:ro \ ---name={{ etcd_proxy_member_name | default("etcd-proxy") }} \ -{{ etcd_image_repo }}:{{ etcd_image_tag }} \ -{% if etcd_after_v3 %} -{{ etcd_container_bin_dir }}etcd -{% endif %} -ExecStartPre=-{{ docker_bin_dir | default("/usr/bin") }}/docker rm -f {{ etcd_proxy_member_name | default("etcd-proxy") }} -ExecReload={{ docker_bin_dir | default("/usr/bin") }}/docker restart {{ etcd_proxy_member_name | default("etcd-proxy") }} -ExecStop={{ docker_bin_dir | default("/usr/bin") }}/docker stop {{ etcd_proxy_member_name | default("etcd-proxy") }} -Restart=always -RestartSec=15s - -[Install] -WantedBy=multi-user.target diff --git a/roles/etcd/templates/etcd-proxy-host.service.j2 b/roles/etcd/templates/etcd-proxy-host.service.j2 deleted file mode 100644 index 4ea5f7bc9..000000000 --- a/roles/etcd/templates/etcd-proxy-host.service.j2 +++ /dev/null @@ -1,19 +0,0 @@ -[Unit] -Description=etcd-proxy -After=network.target - -[Service] -Type=notify -User=etcd -PermissionsStartOnly=true -EnvironmentFile=/etc/etcd-proxy.env -ExecStart={{ bin_dir }}/etcd -ExecStartPre=/bin/mkdir -p /var/lib/etcd-proxy -ExecStartPre=/bin/chown -R etcd: /var/lib/etcd-proxy -NotifyAccess=all -Restart=always -RestartSec=10s -LimitNOFILE=40000 - -[Install] -WantedBy=multi-user.target diff --git a/roles/etcd/templates/etcd-proxy.j2 b/roles/etcd/templates/etcd-proxy.j2 deleted file mode 100644 index 0a1492a37..000000000 --- a/roles/etcd/templates/etcd-proxy.j2 +++ /dev/null @@ -1,5 +0,0 @@ -ETCD_DATA_DIR=/var/lib/etcd-proxy -ETCD_PROXY=on -ETCD_LISTEN_CLIENT_URLS={{ etcd_access_endpoint }} -ETCD_NAME={{ etcd_proxy_member_name | default("etcd-proxy") }} -ETCD_INITIAL_CLUSTER={{ etcd_peer_addresses }} diff --git a/roles/etcd/templates/etcd.j2 b/roles/etcd/templates/etcd.j2 index b82116612..c6dc4c28b 100644 --- a/roles/etcd/templates/etcd.j2 +++ b/roles/etcd/templates/etcd.j2 @@ -3,11 +3,7 @@ ETCD_ADVERTISE_CLIENT_URLS={{ etcd_client_url }} ETCD_INITIAL_ADVERTISE_PEER_URLS={{ etcd_peer_url }} ETCD_INITIAL_CLUSTER_STATE={% if etcd_cluster_is_healthy.rc != 0 | bool %}new{% else %}existing{% endif %} -{% if not is_etcd_proxy %} ETCD_LISTEN_CLIENT_URLS=http://{{ etcd_address }}:2379,http://127.0.0.1:2379 -{% else %} -ETCD_LISTEN_CLIENT_URLS=http://{{ etcd_address }}:2379 -{% endif %} ETCD_ELECTION_TIMEOUT=10000 ETCD_INITIAL_CLUSTER_TOKEN=k8s_etcd ETCD_LISTEN_PEER_URLS=http://{{ etcd_address }}:2380 diff --git a/roles/kubernetes-apps/ansible/templates/calico-policy-controller.yml.j2 b/roles/kubernetes-apps/ansible/templates/calico-policy-controller.yml.j2 index 7c0a21cfa..ff69b5ec6 100644 --- a/roles/kubernetes-apps/ansible/templates/calico-policy-controller.yml.j2 +++ b/roles/kubernetes-apps/ansible/templates/calico-policy-controller.yml.j2 @@ -26,7 +26,7 @@ spec: image: calico/kube-policy-controller:latest env: - name: ETCD_ENDPOINTS - value: "{{ etcd_endpoint }}" + value: "{{ etcd_access_endpoint }}" # Location of the Kubernetes API - this shouldn't need to be # changed so long as it is used in conjunction with # CONFIGURE_ETC_HOSTS="true". @@ -38,3 +38,12 @@ spec: # This removes the need for KubeDNS to resolve the Service. - name: CONFIGURE_ETC_HOSTS value: "true" + volumeMounts: + - mountPath: {{ etcd_cert_dir }} + name: etcd-certs + readOnly: true + volumes: + - hostPath: + path: {{ etcd_cert_dir }} + name: etcd-certs + diff --git a/roles/kubernetes/master/tasks/pre-upgrade.yml b/roles/kubernetes/master/tasks/pre-upgrade.yml index 3b9f26de1..239c46be9 100644 --- a/roles/kubernetes/master/tasks/pre-upgrade.yml +++ b/roles/kubernetes/master/tasks/pre-upgrade.yml @@ -14,12 +14,3 @@ name: kube-apiserver state: stopped when: (kube_apiserver_service_file.stat.exists|default(False) or kube_apiserver_init_script.stat.exists|default(False)) - -- name: "Pre-upgrade | remove kube-apiserver service definition" - file: - path: "{{ item }}" - state: absent - when: (kube_apiserver_service_file.stat.exists|default(False) or kube_apiserver_init_script.stat.exists|default(False)) - with_items: - - /etc/systemd/system/kube-apiserver.service - - /etc/init.d/kube-apiserver diff --git a/roles/kubernetes/master/templates/manifests/kube-apiserver.manifest.j2 b/roles/kubernetes/master/templates/manifests/kube-apiserver.manifest.j2 index 4100e8a34..bcf9f22d4 100644 --- a/roles/kubernetes/master/templates/manifests/kube-apiserver.manifest.j2 +++ b/roles/kubernetes/master/templates/manifests/kube-apiserver.manifest.j2 @@ -62,3 +62,4 @@ spec: - hostPath: path: /var/log/ name: logfile + diff --git a/roles/kubernetes/node/templates/cni-calico.conf.j2 b/roles/kubernetes/node/templates/cni-calico.conf.j2 index 4615cdabd..a6558deaa 100644 --- a/roles/kubernetes/node/templates/cni-calico.conf.j2 +++ b/roles/kubernetes/node/templates/cni-calico.conf.j2 @@ -1,6 +1,7 @@ { "name": "calico-k8s-network", "type": "calico", + "etcd_endpoints": "{{ etcd_access_endpoint }}", "log_level": "info", "ipam": { "type": "calico-ipam" diff --git a/roles/kubernetes/preinstall/defaults/main.yml b/roles/kubernetes/preinstall/defaults/main.yml index 3eae9757d..c02a32e29 100644 --- a/roles/kubernetes/preinstall/defaults/main.yml +++ b/roles/kubernetes/preinstall/defaults/main.yml @@ -45,3 +45,6 @@ openstack_username: "{{ lookup('env','OS_USERNAME') }}" openstack_password: "{{ lookup('env','OS_PASSWORD') }}" openstack_region: "{{ lookup('env','OS_REGION_NAME') }}" openstack_tenant_id: "{{ lookup('env','OS_TENANT_ID') }}" + +# All clients access each node individually, instead of using a load balancer. +etcd_multiaccess: true diff --git a/roles/kubernetes/preinstall/tasks/set_facts.yml b/roles/kubernetes/preinstall/tasks/set_facts.yml index 2dd947dda..d51bcbed4 100644 --- a/roles/kubernetes/preinstall/tasks/set_facts.yml +++ b/roles/kubernetes/preinstall/tasks/set_facts.yml @@ -30,7 +30,7 @@ - set_fact: etcd_access_addresses: |- {% for item in groups['etcd'] -%} - http://{{ hostvars[item].access_ip | default(hostvars[item].ip | default(hostvars[item].ansible_default_ipv4['address'])) }}:2379{% if not loop.last %},{% endif %} + http://{{ item }}:2379{% if not loop.last %},{% endif %} {%- endfor %} - set_fact: etcd_access_endpoint="{% if etcd_multiaccess %}{{ etcd_access_addresses }}{% else %}{{ etcd_endpoint }}{% endif %}" - set_fact: @@ -43,13 +43,6 @@ {% for item in groups['etcd'] -%} {{ "etcd"+loop.index|string }}=http://{{ hostvars[item].access_ip | default(hostvars[item].ip | default(hostvars[item].ansible_default_ipv4['address'])) }}:2380{% if not loop.last %},{% endif %} {%- endfor %} -- set_fact: - etcd_proxy_member_name: |- - {% for host in groups['k8s-cluster'] %} - {% if inventory_hostname == host %}{{"etcd-proxy"+loop.index|string }}{% endif %} - {% endfor %} -- set_fact: - is_etcd_proxy: "{{ inventory_hostname in groups['k8s-cluster'] }}" - set_fact: is_etcd_master: "{{ inventory_hostname in groups['etcd'] }}" - set_fact: diff --git a/roles/network_plugin/calico/tasks/main.yml b/roles/network_plugin/calico/tasks/main.yml index 46f729883..ce43d5224 100644 --- a/roles/network_plugin/calico/tasks/main.yml +++ b/roles/network_plugin/calico/tasks/main.yml @@ -43,17 +43,21 @@ - name: Calico | wait for etcd uri: url=http://localhost:2379/health register: result - until: result.status == 200 + until: result.status == 200 or result.status == 401 retries: 10 delay: 5 - when: inventory_hostname in groups['kube-master'] + delegate_to: "{{groups['etcd'][0]}}" + run_once: true - name: Calico | Check if calico network pool has already been configured - uri: - url: "{{ etcd_endpoint }}/v2/keys/calico/v1/ipam/v4/pool" - return_content: yes - status_code: 200,404 + command: |- + curl \ + --cacert {{ etcd_cert_dir }}/ca.pem \ + --cert {{ etcd_cert_dir}}/admin.pem \ + --key {{ etcd_cert_dir }}/admin-key.pem \ + https://localhost:2379/v2/keys/calico/v1/ipam/v4/pool register: calico_conf + delegate_to: "{{groups['etcd'][0]}}" run_once: true - name: Calico | Define ipip pool argument @@ -79,21 +83,25 @@ environment: NO_DEFAULT_POOLS: true run_once: true - when: calico_conf.status == 404 or "nodes" not in calico_conf.content + when: '"Key not found" in calico_conf.stdout or "nodes" not in calico_conf.stdout' - name: Calico | Get calico configuration from etcd - uri: - url: "{{ etcd_endpoint }}/v2/keys/calico/v1/ipam/v4/pool" - return_content: yes - register: calico_pools + command: |- + curl http://localhost:2379/v2/keys/calico/v1/ipam/v4/pool + register: calico_pools_raw + delegate_to: "{{groups['etcd'][0]}}" + run_once: true + +- set_fact: + calico_pools: "{{ calico_pools_raw.stdout | from_json }}" run_once: true - name: Calico | Check if calico pool is properly configured fail: msg: 'Only one network pool must be configured and it must be the subnet {{ kube_pods_subnet }}. Please erase calico configuration and run the playbook again ("etcdctl rm --recursive /calico/v1/ipam/v4/pool")' - when: ( calico_pools.json['node']['nodes'] | length > 1 ) or - ( not calico_pools.json['node']['nodes'][0]['key'] | search(".*{{ kube_pods_subnet | ipaddr('network') }}.*") ) + when: ( calico_pools['node']['nodes'] | length > 1 ) or + ( not calico_pools['node']['nodes'][0]['key'] | search(".*{{ kube_pods_subnet | ipaddr('network') }}.*") ) run_once: true - name: Calico | Write /etc/network-environment @@ -131,4 +139,3 @@ shell: "{{ bin_dir }}/calicoctl node bgp peer add {{ item.router_id }} as {{ item.as }}" with_items: peers when: peer_with_router|default(false) and inventory_hostname in groups['kube-node'] - diff --git a/roles/network_plugin/calico/templates/calico-node.service.j2 b/roles/network_plugin/calico/templates/calico-node.service.j2 index a7f7e4bab..2a7775fd4 100644 --- a/roles/network_plugin/calico/templates/calico-node.service.j2 +++ b/roles/network_plugin/calico/templates/calico-node.service.j2 @@ -1,8 +1,8 @@ [Unit] Description=Calico per-node agent Documentation=https://github.com/projectcalico/calico-docker -After=docker.service docker.socket etcd-proxy.service -Wants=docker.socket etcd-proxy.service +After=docker.service docker.socket +Wants=docker.socket [Service] User=root diff --git a/roles/network_plugin/calico/templates/calicoctl-container.j2 b/roles/network_plugin/calico/templates/calicoctl-container.j2 index 466f1df93..c8ac759de 100644 --- a/roles/network_plugin/calico/templates/calicoctl-container.j2 +++ b/roles/network_plugin/calico/templates/calicoctl-container.j2 @@ -1,6 +1,7 @@ #!/bin/bash /usr/bin/docker run --privileged --rm \ ---net=host --pid=host -e ETCD_AUTHORITY={{ etcd_authority }} \ +--net=host --pid=host \ +-e ETCD_ENDPOINTS={{ etcd_access_endpoint }} \ -v /usr/bin/docker:/usr/bin/docker \ -v /var/run/docker.sock:/var/run/docker.sock \ -v /var/run/calico:/var/run/calico \ diff --git a/roles/network_plugin/calico/templates/network-environment.j2 b/roles/network_plugin/calico/templates/network-environment.j2 index 086803d1b..0da2db904 100644 --- a/roles/network_plugin/calico/templates/network-environment.j2 +++ b/roles/network_plugin/calico/templates/network-environment.j2 @@ -3,7 +3,7 @@ DEFAULT_IPV4={{ip | default(ansible_default_ipv4.address) }} # The Kubernetes master IP -KUBERNETES_MASTER={{ first_kube_master }} +KUBERNETES_MASTER={{ kube_apiserver_endpoint }} # IP and port of etcd instance used by Calico -ETCD_AUTHORITY={{ etcd_authority }} +ETCD_ENDPOINTS={{ etcd_access_endpoint }} diff --git a/roles/network_plugin/flannel/templates/flannel-pod.yml b/roles/network_plugin/flannel/templates/flannel-pod.yml index 15523bdde..70353f11a 100644 --- a/roles/network_plugin/flannel/templates/flannel-pod.yml +++ b/roles/network_plugin/flannel/templates/flannel-pod.yml @@ -21,7 +21,7 @@ args: - "--network-config=/etc/flannel-network.json" - "--etcd-prefix=/{{ cluster_name }}/network" - - "--etcd-server={{ etcd_endpoint }}" + - "--etcd-endpoints={{ etcd_access_endpoint }}" volumeMounts: - name: "networkconfig" mountPath: "/etc/flannel-network.json" From a32cd85eb7f0fa4c70ead3670fbc73d9e541e26a Mon Sep 17 00:00:00 2001 From: Matthew Mosesohn Date: Wed, 9 Nov 2016 13:44:41 +0300 Subject: [PATCH 30/35] Add etcd TLS support --- roles/download/defaults/main.yml | 5 +- roles/etcd/defaults/main.yml | 6 + roles/etcd/files/make-ssl-etcd.sh | 80 +++++++++++++ roles/etcd/handlers/main.yml | 7 +- roles/etcd/tasks/check_certs.yml | 36 ++++++ roles/etcd/tasks/gen_certs.yml | 111 ++++++++++++++++++ roles/etcd/tasks/main.yml | 2 + roles/etcd/tasks/pre_upgrade.yml | 34 ++++++ roles/etcd/templates/deb-etcd-docker.initd.j2 | 3 +- roles/etcd/templates/etcd-docker.service.j2 | 3 +- roles/etcd/templates/etcd.j2 | 13 +- roles/etcd/templates/openssl.conf.j2 | 39 ++++++ .../templates/calico-policy-controller.yml.j2 | 6 + roles/kubernetes/master/defaults/main.yml | 6 + .../manifests/kube-apiserver.manifest.j2 | 10 +- .../node/templates/cni-calico.conf.j2 | 3 + .../kubernetes/preinstall/tasks/set_facts.yml | 10 +- roles/network_plugin/calico/defaults/main.yml | 3 + roles/network_plugin/calico/tasks/main.yml | 26 +++- .../calico/templates/calicoctl-container.j2 | 4 + .../calico/templates/network-environment.j2 | 3 + roles/network_plugin/flannel/tasks/main.yml | 12 +- .../flannel/templates/flannel-pod.yml | 18 +-- .../flannel/templates/network.json | 1 - roles/uploads/defaults/main.yml | 2 +- 25 files changed, 408 insertions(+), 35 deletions(-) create mode 100755 roles/etcd/files/make-ssl-etcd.sh create mode 100644 roles/etcd/tasks/check_certs.yml create mode 100644 roles/etcd/tasks/gen_certs.yml create mode 100644 roles/etcd/tasks/pre_upgrade.yml create mode 100644 roles/etcd/templates/openssl.conf.j2 delete mode 100644 roles/network_plugin/flannel/templates/network.json diff --git a/roles/download/defaults/main.yml b/roles/download/defaults/main.yml index cbe053fa0..8f33ebd15 100644 --- a/roles/download/defaults/main.yml +++ b/roles/download/defaults/main.yml @@ -10,7 +10,7 @@ kube_version: v1.4.3 etcd_version: v3.0.6 #TODO(mattymo): Move calico versions to roles/network_plugins/calico/defaults # after migration to container download -calico_version: v0.22.0 +calico_version: v1.0.0-beta calico_cni_version: v1.4.2 weave_version: v1.6.1 flannel_version: v0.6.2 @@ -39,7 +39,8 @@ flannel_server_helper_image_tag: "{{ flannel_server_helper_version }}" flannel_image_repo: "quay.io/coreos/flannel" flannel_image_tag: "{{ flannel_version }}" calicoctl_image_repo: "calico/ctl" -calicoctl_image_tag: "{{ calico_version }}" +# TODO(mattymo): v1.0.0-beta has different syntax. Needs work to upgrade +calicoctl_image_tag: "v0.22.0" calico_node_image_repo: "calico/node" calico_node_image_tag: "{{ calico_version }}" hyperkube_image_repo: "quay.io/coreos/hyperkube" diff --git a/roles/etcd/defaults/main.yml b/roles/etcd/defaults/main.yml index 02234a2fe..2df4ba165 100644 --- a/roles/etcd/defaults/main.yml +++ b/roles/etcd/defaults/main.yml @@ -1,2 +1,8 @@ --- etcd_bin_dir: "{{ local_release_dir }}/etcd/etcd-{{ etcd_version }}-linux-amd64/" + +etcd_config_dir: /etc/ssl/etcd +etcd_cert_dir: "{{ etcd_config_dir }}/ssl" +etcd_cert_group: root + +etcd_script_dir: "{{ bin_dir }}/etcd-scripts" diff --git a/roles/etcd/files/make-ssl-etcd.sh b/roles/etcd/files/make-ssl-etcd.sh new file mode 100755 index 000000000..4c7db9430 --- /dev/null +++ b/roles/etcd/files/make-ssl-etcd.sh @@ -0,0 +1,80 @@ +#!/bin/bash + +# Author: Smana smainklh@gmail.com +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -o errexit +set -o pipefail + +usage() +{ + cat << EOF +Create self signed certificates + +Usage : $(basename $0) -f [-d ] + -h | --help : Show this message + -f | --config : Openssl configuration file + -d | --ssldir : Directory where the certificates will be installed + + ex : + $(basename $0) -f openssl.conf -d /srv/ssl +EOF +} + +# Options parsing +while (($#)); do + case "$1" in + -h | --help) usage; exit 0;; + -f | --config) CONFIG=${2}; shift 2;; + -d | --ssldir) SSLDIR="${2}"; shift 2;; + *) + usage + echo "ERROR : Unknown option" + exit 3 + ;; + esac +done + +if [ -z ${CONFIG} ]; then + echo "ERROR: the openssl configuration file is missing. option -f" + exit 1 +fi +if [ -z ${SSLDIR} ]; then + SSLDIR="/etc/ssl/etcd" +fi + +tmpdir=$(mktemp -d /tmp/etcd_cacert.XXXXXX) +trap 'rm -rf "${tmpdir}"' EXIT +cd "${tmpdir}" + +mkdir -p "${SSLDIR}" + +# Root CA +openssl genrsa -out ca-key.pem 2048 > /dev/null 2>&1 +openssl req -x509 -new -nodes -key ca-key.pem -days 10000 -out ca.pem -subj "/CN=etcd-ca" > /dev/null 2>&1 + +# ETCD member +openssl genrsa -out member-key.pem 2048 > /dev/null 2>&1 +openssl req -new -key member-key.pem -out member.csr -subj "/CN=etcd-member" -config ${CONFIG} > /dev/null 2>&1 +openssl x509 -req -in member.csr -CA ca.pem -CAkey ca-key.pem -CAcreateserial -out member.pem -days 365 -extensions ssl_client -extfile ${CONFIG} > /dev/null 2>&1 + +# Nodes and Admin +for i in node admin; do + openssl genrsa -out ${i}-key.pem 2048 > /dev/null 2>&1 + openssl req -new -key ${i}-key.pem -out ${i}.csr -subj "/CN=kube-${i}" > /dev/null 2>&1 + openssl x509 -req -in ${i}.csr -CA ca.pem -CAkey ca-key.pem -CAcreateserial -out ${i}.pem -days 365 -extensions ssl_client -extfile ${CONFIG} > /dev/null 2>&1 +done + +# Install certs +mv *.pem ${SSLDIR}/ diff --git a/roles/etcd/handlers/main.yml b/roles/etcd/handlers/main.yml index 8104ff1a8..badf0bd79 100644 --- a/roles/etcd/handlers/main.yml +++ b/roles/etcd/handlers/main.yml @@ -11,7 +11,7 @@ when: ansible_service_mgr == "systemd" - name: wait for etcd up - uri: url="http://{% if is_etcd_master %}{{ etcd_address }}{% else %}127.0.0.1{% endif %}:2379/health" + uri: url="https://{% if is_etcd_master %}{{ etcd_address }}{% else %}127.0.0.1{% endif %}:2379/health" validate_certs=no register: result until: result.status is defined and result.status == 200 retries: 10 @@ -22,3 +22,8 @@ name: etcd state: restarted when: is_etcd_master + +- name: set etcd_secret_changed + set_fact: + etcd_secret_changed: true + diff --git a/roles/etcd/tasks/check_certs.yml b/roles/etcd/tasks/check_certs.yml new file mode 100644 index 000000000..03a875517 --- /dev/null +++ b/roles/etcd/tasks/check_certs.yml @@ -0,0 +1,36 @@ +--- +- name: "Check_certs | check if the certs have already been generated on first master" + stat: + path: "{{ etcd_cert_dir }}/ca.pem" + delegate_to: "{{groups['etcd'][0]}}" + register: etcdcert_master + run_once: true + +- name: "Check_certs | Set default value for 'sync_certs' and 'gen_certs' to false" + set_fact: + sync_certs: false + gen_certs: false + +- name: "Check_certs | Set 'sync_certs' and 'gen_certs' to true" + set_fact: + gen_certs: true + when: not etcdcert_master.stat.exists + run_once: true + +- name: "Check certs | check if a cert already exists" + stat: + path: "{{ etcd_cert_dir }}/ca.pem" + register: etcdcert + +- name: "Check_certs | Set 'sync_certs' to true" + set_fact: + sync_certs: true + when: >- + {%- set certs = {'sync': False} -%} + {%- for server in play_hosts + if (not hostvars[server].etcdcert.stat.exists|default(False)) or + (hostvars[server].etcdcert.stat.checksum|default('') != etcdcert_master.stat.checksum|default('')) -%} + {%- set _ = certs.update({'sync': True}) -%} + {%- endfor -%} + {{ certs.sync }} + run_once: true diff --git a/roles/etcd/tasks/gen_certs.yml b/roles/etcd/tasks/gen_certs.yml new file mode 100644 index 000000000..e0aad58a2 --- /dev/null +++ b/roles/etcd/tasks/gen_certs.yml @@ -0,0 +1,111 @@ +--- + +- name: Gen_certs | create etcd script dir + file: + path: "{{ etcd_script_dir }}" + state: directory + owner: root + when: inventory_hostname == groups['etcd'][0] + +- name: Gen_certs | create etcd cert dir + file: + path={{ etcd_cert_dir }} + group={{ etcd_cert_group }} + state=directory + owner=root + recurse=yes + +- name: Gen_certs | write openssl config + template: + src: "openssl.conf.j2" + dest: "{{ etcd_config_dir }}/openssl.conf" + run_once: yes + delegate_to: "{{groups['etcd'][0]}}" + when: gen_certs|default(false) + +- name: Gen_certs | copy certs generation script + copy: + src: "make-ssl-etcd.sh" + dest: "{{ etcd_script_dir }}/make-ssl-etcd.sh" + mode: 0700 + run_once: yes + delegate_to: "{{groups['etcd'][0]}}" + when: gen_certs|default(false) + +- name: Gen_certs | run cert generation script + command: "{{ etcd_script_dir }}/make-ssl-etcd.sh -f {{ etcd_config_dir }}/openssl.conf -d {{ etcd_cert_dir }}" + run_once: yes + delegate_to: "{{groups['etcd'][0]}}" + when: gen_certs|default(false) + notify: set etcd_secret_changed + +- set_fact: + master_certs: ['ca-key.pem', 'admin.pem', 'admin-key.pem', 'member.pem', 'member-key.pem'] + node_certs: ['ca.pem', 'node.pem', 'node-key.pem'] + +- name: Gen_certs | Gather etcd master certs + shell: "tar cfz - -C {{ etcd_cert_dir }} {{ master_certs|join(' ') }} {{ node_certs|join(' ') }}| base64 --wrap=0" + register: etcd_master_cert_data + delegate_to: "{{groups['etcd'][0]}}" + run_once: true + when: sync_certs|default(false) + notify: set etcd_secret_changed + +- name: Gen_certs | Gather etcd node certs + shell: "tar cfz - -C {{ etcd_cert_dir }} {{ node_certs|join(' ') }} | base64 --wrap=0" + register: etcd_node_cert_data + delegate_to: "{{groups['etcd'][0]}}" + run_once: true + when: sync_certs|default(false) + notify: set etcd_secret_changed + +- name: Gen_certs | Copy certs on masters + shell: "echo '{{etcd_master_cert_data.stdout|quote}}' | base64 -d | tar xz -C {{ etcd_cert_dir }}" + changed_when: false + when: inventory_hostname in groups['etcd'] and sync_certs|default(false) and + inventory_hostname != groups['etcd'][0] + +- name: Gen_certs | Copy certs on nodes + shell: "echo '{{etcd_node_cert_data.stdout|quote}}' | base64 -d | tar xz -C {{ etcd_cert_dir }}" + changed_when: false + when: inventory_hostname in groups['k8s-cluster'] and sync_certs|default(false) and + inventory_hostname not in groups['etcd'] + +- name: Gen_certs | check certificate permissions + file: + path={{ etcd_cert_dir }} + group={{ etcd_cert_group }} + state=directory + owner=kube + recurse=yes + +- name: Gen_certs | set permissions on keys + shell: chmod 0600 {{ etcd_cert_dir}}/*key.pem + when: inventory_hostname in groups['etcd'] + changed_when: false + +- name: Gen_certs | target ca-certificates directory + set_fact: + ca_cert_dir: |- + {% if ansible_os_family == "Debian" -%} + /usr/local/share/ca-certificates + {%- elif ansible_os_family == "RedHat" -%} + /etc/pki/ca-trust/source/anchors + {%- elif ansible_os_family == "CoreOS" -%} + /etc/ssl/certs + {%- endif %} + +- name: Gen_certs | add CA to trusted CA dir + copy: + src: "{{ etcd_cert_dir }}/ca.pem" + dest: "{{ ca_cert_dir }}/etcd-ca.crt" + remote_src: true + register: etcd_ca_cert + +- name: Gen_certs | update ca-certificates (Debian/Ubuntu/CoreOS) + command: update-ca-certificates + when: etcd_ca_cert.changed and ansible_os_family in ["Debian", "CoreOS"] + +- name: Gen_certs | update ca-certificatesa (RedHat) + command: update-ca-trust extract + when: etcd_ca_cert.changed and ansible_os_family == "RedHat" diff --git a/roles/etcd/tasks/main.yml b/roles/etcd/tasks/main.yml index 88dfe59d8..15be1a769 100644 --- a/roles/etcd/tasks/main.yml +++ b/roles/etcd/tasks/main.yml @@ -1,5 +1,7 @@ --- - include: pre_upgrade.yml +- include: check_certs.yml +- include: gen_certs.yml - include: install.yml when: is_etcd_master - include: set_cluster_health.yml diff --git a/roles/etcd/tasks/pre_upgrade.yml b/roles/etcd/tasks/pre_upgrade.yml new file mode 100644 index 000000000..d1962ea92 --- /dev/null +++ b/roles/etcd/tasks/pre_upgrade.yml @@ -0,0 +1,34 @@ +- name: "Pre-upgrade | check for etcd-proxy unit file" + stat: + path: /etc/systemd/system/etcd-proxy.service + register: kube_apiserver_service_file + +- name: "Pre-upgrade | check for etcd-proxy init script" + stat: + path: /etc/init.d/etcd-proxy + register: kube_apiserver_init_script + +- name: "Pre-upgrade | stop etcd-proxy if service defined" + service: + name: etcd-proxy + state: stopped + when: (kube_apiserver_service_file.stat.exists|default(False) or kube_apiserver_init_script.stat.exists|default(False)) + +- name: "Pre-upgrade | remove etcd-proxy service definition" + file: + path: "{{ item }}" + state: absent + when: (kube_apiserver_service_file.stat.exists|default(False) or kube_apiserver_init_script.stat.exists|default(False)) + with_items: + - /etc/systemd/system/etcd-proxy.service + - /etc/init.d/etcd-proxy + +- name: "Pre-upgrade | find etcd-proxy container" + command: docker ps -aq --filter "name=etcd-proxy*" + register: etcd_proxy_container + ignore_errors: true + +- name: "Pre-upgrade | remove etcd-proxy if it exists" + command: "docker rm -f {{item}}" + with_items: "{{etcd_proxy_container.stdout_lines}}" + diff --git a/roles/etcd/templates/deb-etcd-docker.initd.j2 b/roles/etcd/templates/deb-etcd-docker.initd.j2 index a83aae184..4457b37b9 100644 --- a/roles/etcd/templates/deb-etcd-docker.initd.j2 +++ b/roles/etcd/templates/deb-etcd-docker.initd.j2 @@ -19,8 +19,9 @@ DAEMON={{ docker_bin_dir | default("/usr/bin") }}/docker DAEMON_EXEC=`basename $DAEMON` DAEMON_ARGS="run --restart=always --env-file=/etc/etcd.env \ --net=host \ --v /usr/share/ca-certificates/:/etc/ssl/certs:ro \ +-v /etc/ssl/certs:/etc/ssl/certs:ro \ -v /var/lib/etcd:/var/lib/etcd:rw \ +-v {{ etcd_cert_dir }}:{{ etcd_cert_dir }}:ro \ --name={{ etcd_member_name | default("etcd") }} \ {{ etcd_image_repo }}:{{ etcd_image_tag }} \ {% if etcd_after_v3 %} diff --git a/roles/etcd/templates/etcd-docker.service.j2 b/roles/etcd/templates/etcd-docker.service.j2 index a37759fec..ff40b5b59 100644 --- a/roles/etcd/templates/etcd-docker.service.j2 +++ b/roles/etcd/templates/etcd-docker.service.j2 @@ -11,7 +11,8 @@ ExecStart={{ docker_bin_dir | default("/usr/bin") }}/docker run --restart=always {# TODO(mattymo): Allow docker IP binding and disable in envfile -p 2380:2380 -p 2379:2379 #} --net=host \ --v /usr/share/ca-certificates/:/etc/ssl/certs:ro \ +-v /etc/ssl/certs:/etc/ssl/certs:ro \ +-v {{ etcd_cert_dir }}:{{ etcd_cert_dir }}:ro \ -v /var/lib/etcd:/var/lib/etcd:rw \ --name={{ etcd_member_name | default("etcd") }} \ {{ etcd_image_repo }}:{{ etcd_image_tag }} \ diff --git a/roles/etcd/templates/etcd.j2 b/roles/etcd/templates/etcd.j2 index c6dc4c28b..0b7e1eb9f 100644 --- a/roles/etcd/templates/etcd.j2 +++ b/roles/etcd/templates/etcd.j2 @@ -3,10 +3,19 @@ ETCD_ADVERTISE_CLIENT_URLS={{ etcd_client_url }} ETCD_INITIAL_ADVERTISE_PEER_URLS={{ etcd_peer_url }} ETCD_INITIAL_CLUSTER_STATE={% if etcd_cluster_is_healthy.rc != 0 | bool %}new{% else %}existing{% endif %} -ETCD_LISTEN_CLIENT_URLS=http://{{ etcd_address }}:2379,http://127.0.0.1:2379 +ETCD_LISTEN_CLIENT_URLS=https://{{ etcd_address }}:2379,https://127.0.0.1:2379 ETCD_ELECTION_TIMEOUT=10000 ETCD_INITIAL_CLUSTER_TOKEN=k8s_etcd -ETCD_LISTEN_PEER_URLS=http://{{ etcd_address }}:2380 +ETCD_LISTEN_PEER_URLS=https://{{ etcd_address }}:2380 ETCD_NAME={{ etcd_member_name }} ETCD_PROXY=off ETCD_INITIAL_CLUSTER={{ etcd_peer_addresses }} + +# TLS settings +ETCD_TRUSTED_CA_FILE={{ etcd_cert_dir }}/ca.pem +ETCD_CERT_FILE={{ etcd_cert_dir }}/node.pem +ETCD_KEY_FILE={{ etcd_cert_dir }}/node-key.pem +ETCD_PEER_TRUSTED_CA_FILE={{ etcd_cert_dir }}/ca.pem +ETCD_PEER_CERT_FILE={{ etcd_cert_dir }}/member.pem +ETCD_PEER_KEY_FILE={{ etcd_cert_dir }}/member-key.pem +ETCD_PEER_CLIENT_CERT_AUTH=true diff --git a/roles/etcd/templates/openssl.conf.j2 b/roles/etcd/templates/openssl.conf.j2 new file mode 100644 index 000000000..3ea328289 --- /dev/null +++ b/roles/etcd/templates/openssl.conf.j2 @@ -0,0 +1,39 @@ +[req] +req_extensions = v3_req +distinguished_name = req_distinguished_name + +[req_distinguished_name] + +[ v3_req ] +basicConstraints = CA:FALSE +keyUsage = nonRepudiation, digitalSignature, keyEncipherment +subjectAltName = @alt_names + +[ ssl_client ] +extendedKeyUsage = clientAuth, serverAuth +basicConstraints = CA:FALSE +subjectKeyIdentifier=hash +authorityKeyIdentifier=keyid,issuer +subjectAltName = @alt_names + +[ v3_ca ] +basicConstraints = CA:TRUE +keyUsage = nonRepudiation, digitalSignature, keyEncipherment +subjectAltName = @alt_names +authorityKeyIdentifier=keyid:always,issuer + +[alt_names] +DNS.1 = localhost +{% for host in groups['etcd'] %} +DNS.{{ 1 + loop.index }} = {{ host }} +{% endfor %} +{% if loadbalancer_apiserver is defined and apiserver_loadbalancer_domain_name is defined %} +{% set idx = groups['etcd'] | length | int + 1 %} +DNS.{{ idx | string }} = {{ apiserver_loadbalancer_domain_name }} +{% endif %} +{% for host in groups['etcd'] %} +IP.{{ 2 * loop.index - 1 }} = {{ hostvars[host]['access_ip'] | default(hostvars[host]['ansible_default_ipv4']['address']) }} +IP.{{ 2 * loop.index }} = {{ hostvars[host]['ip'] | default(hostvars[host]['ansible_default_ipv4']['address']) }} +{% endfor %} +{% set idx = groups['etcd'] | length | int * 2 + 1 %} +IP.{{ idx }} = 127.0.0.1 diff --git a/roles/kubernetes-apps/ansible/templates/calico-policy-controller.yml.j2 b/roles/kubernetes-apps/ansible/templates/calico-policy-controller.yml.j2 index ff69b5ec6..5e0586e16 100644 --- a/roles/kubernetes-apps/ansible/templates/calico-policy-controller.yml.j2 +++ b/roles/kubernetes-apps/ansible/templates/calico-policy-controller.yml.j2 @@ -27,6 +27,12 @@ spec: env: - name: ETCD_ENDPOINTS value: "{{ etcd_access_endpoint }}" + - name: ETCD_CA_CERT_FILE + value: "{{ etcd_cert_dir }}/ca.pem" + - name: ETCD_CERT_FILE + value: "{{ etcd_cert_dir }}/node.pem" + - name: ETCD_KEY_FILE + value: "{{ etcd_cert_dir }}/node-key.pem" # Location of the Kubernetes API - this shouldn't need to be # changed so long as it is used in conjunction with # CONFIGURE_ETC_HOSTS="true". diff --git a/roles/kubernetes/master/defaults/main.yml b/roles/kubernetes/master/defaults/main.yml index ee32ccf57..269ed3714 100644 --- a/roles/kubernetes/master/defaults/main.yml +++ b/roles/kubernetes/master/defaults/main.yml @@ -28,3 +28,9 @@ kube_apiserver_insecure_bind_address: 127.0.0.1 # Logging directory (sysvinit systems) kube_log_dir: "/var/log/kubernetes" + +# ETCD cert dir for connecting apiserver to etcd +etcd_config_dir: /etc/ssl/etcd +etcd_cert_dir: "{{ etcd_config_dir }}/ssl" + + diff --git a/roles/kubernetes/master/templates/manifests/kube-apiserver.manifest.j2 b/roles/kubernetes/master/templates/manifests/kube-apiserver.manifest.j2 index bcf9f22d4..192e6021b 100644 --- a/roles/kubernetes/master/templates/manifests/kube-apiserver.manifest.j2 +++ b/roles/kubernetes/master/templates/manifests/kube-apiserver.manifest.j2 @@ -14,6 +14,9 @@ spec: - --advertise-address={{ ip | default(ansible_default_ipv4.address) }} - --etcd-servers={{ etcd_access_endpoint }} - --etcd-quorum-read=true + - --etcd-cafile={{ etcd_cert_dir }}/ca.pem + - --etcd-certfile={{ etcd_cert_dir }}/node.pem + - --etcd-keyfile={{ etcd_cert_dir }}/node-key.pem - --insecure-bind-address={{ kube_apiserver_insecure_bind_address }} - --apiserver-count={{ kube_apiserver_count }} - --admission-control=NamespaceLifecycle,LimitRanger,ServiceAccount,DefaultStorageClass,ResourceQuota @@ -50,6 +53,9 @@ spec: - mountPath: /etc/ssl/certs name: ssl-certs-host readOnly: true + - mountPath: {{ etcd_cert_dir }} + name: etcd-certs + readOnly: true - mountPath: /var/log/ name: logfile volumes: @@ -59,7 +65,9 @@ spec: - hostPath: path: /etc/ssl/certs/ name: ssl-certs-host + - hostPath: + path: {{ etcd_cert_dir }} + name: etcd-certs - hostPath: path: /var/log/ name: logfile - diff --git a/roles/kubernetes/node/templates/cni-calico.conf.j2 b/roles/kubernetes/node/templates/cni-calico.conf.j2 index a6558deaa..4e9752ef4 100644 --- a/roles/kubernetes/node/templates/cni-calico.conf.j2 +++ b/roles/kubernetes/node/templates/cni-calico.conf.j2 @@ -2,6 +2,9 @@ "name": "calico-k8s-network", "type": "calico", "etcd_endpoints": "{{ etcd_access_endpoint }}", + "etcd_cert_file": "{{ etcd_cert_dir }}/node.pem", + "etcd_key_file": "{{ etcd_cert_dir }}/node-key.pem", + "etcd_ca_cert_file": "{{ etcd_cert_dir }}/ca.pem", "log_level": "info", "ipam": { "type": "calico-ipam" diff --git a/roles/kubernetes/preinstall/tasks/set_facts.yml b/roles/kubernetes/preinstall/tasks/set_facts.yml index d51bcbed4..aec296c6e 100644 --- a/roles/kubernetes/preinstall/tasks/set_facts.yml +++ b/roles/kubernetes/preinstall/tasks/set_facts.yml @@ -23,14 +23,14 @@ - set_fact: etcd_address="{{ ip | default(ansible_default_ipv4['address']) }}" - set_fact: etcd_access_address="{{ access_ip | default(etcd_address) }}" -- set_fact: etcd_peer_url="http://{{ etcd_access_address }}:2380" -- set_fact: etcd_client_url="http://{{ etcd_access_address }}:2379" +- set_fact: etcd_peer_url="https://{{ etcd_access_address }}:2380" +- set_fact: etcd_client_url="https://{{ etcd_access_address }}:2379" - set_fact: etcd_authority="127.0.0.1:2379" -- set_fact: etcd_endpoint="http://{{ etcd_authority }}" +- set_fact: etcd_endpoint="https://{{ etcd_authority }}" - set_fact: etcd_access_addresses: |- {% for item in groups['etcd'] -%} - http://{{ item }}:2379{% if not loop.last %},{% endif %} + https://{{ item }}:2379{% if not loop.last %},{% endif %} {%- endfor %} - set_fact: etcd_access_endpoint="{% if etcd_multiaccess %}{{ etcd_access_addresses }}{% else %}{{ etcd_endpoint }}{% endif %}" - set_fact: @@ -41,7 +41,7 @@ - set_fact: etcd_peer_addresses: |- {% for item in groups['etcd'] -%} - {{ "etcd"+loop.index|string }}=http://{{ hostvars[item].access_ip | default(hostvars[item].ip | default(hostvars[item].ansible_default_ipv4['address'])) }}:2380{% if not loop.last %},{% endif %} + {{ "etcd"+loop.index|string }}=https://{{ hostvars[item].access_ip | default(hostvars[item].ip | default(hostvars[item].ansible_default_ipv4['address'])) }}:2380{% if not loop.last %},{% endif %} {%- endfor %} - set_fact: is_etcd_master: "{{ inventory_hostname in groups['etcd'] }}" diff --git a/roles/network_plugin/calico/defaults/main.yml b/roles/network_plugin/calico/defaults/main.yml index aec7a5e15..7b608ab7e 100644 --- a/roles/network_plugin/calico/defaults/main.yml +++ b/roles/network_plugin/calico/defaults/main.yml @@ -8,3 +8,6 @@ ipip: false # Set to true if you want your calico cni binaries to overwrite the # ones from hyperkube while leaving other cni plugins intact. overwrite_hyperkube_cni: true + +calico_cert_dir: /etc/calico/certs +etcd_cert_dir: /etc/ssl/etcd/ssl diff --git a/roles/network_plugin/calico/tasks/main.yml b/roles/network_plugin/calico/tasks/main.yml index ce43d5224..60a728ba0 100644 --- a/roles/network_plugin/calico/tasks/main.yml +++ b/roles/network_plugin/calico/tasks/main.yml @@ -12,6 +12,24 @@ - meta: flush_handlers +- name: Calico | Create calico certs directory + file: + dest: "{{ calico_cert_dir }}" + state: directory + mode: 0750 + owner: root + group: root + +- name: Calico | Link etcd certificates for calico-node + file: + src: "{{ etcd_cert_dir }}/{{ item.s }}" + dest: "{{ calico_cert_dir }}/{{ item.d }}" + state: hard + with_items: + - {s: "ca.pem", d: "ca_cert.crt"} + - {s: "node.pem", d: "cert.crt"} + - {s: "node-key.pem", d: "key.pem"} + - name: Calico | Install calicoctl container script template: src: calicoctl-container.j2 @@ -41,7 +59,7 @@ when: "{{ overwrite_hyperkube_cni|bool }}" - name: Calico | wait for etcd - uri: url=http://localhost:2379/health + uri: url=https://localhost:2379/health validate_certs=no register: result until: result.status == 200 or result.status == 401 retries: 10 @@ -87,7 +105,11 @@ - name: Calico | Get calico configuration from etcd command: |- - curl http://localhost:2379/v2/keys/calico/v1/ipam/v4/pool + curl \ + --cacert {{ etcd_cert_dir }}/ca.pem \ + --cert {{ etcd_cert_dir}}/admin.pem \ + --key {{ etcd_cert_dir }}/admin-key.pem \ + https://localhost:2379/v2/keys/calico/v1/ipam/v4/pool register: calico_pools_raw delegate_to: "{{groups['etcd'][0]}}" run_once: true diff --git a/roles/network_plugin/calico/templates/calicoctl-container.j2 b/roles/network_plugin/calico/templates/calicoctl-container.j2 index c8ac759de..9d47c73ca 100644 --- a/roles/network_plugin/calico/templates/calicoctl-container.j2 +++ b/roles/network_plugin/calico/templates/calicoctl-container.j2 @@ -2,8 +2,12 @@ /usr/bin/docker run --privileged --rm \ --net=host --pid=host \ -e ETCD_ENDPOINTS={{ etcd_access_endpoint }} \ +-e ETCD_CA_CERT_FILE=/etc/calico/certs/ca_cert.crt \ +-e ETCD_CERT_FILE=/etc/calico/certs/cert.crt \ +-e ETCD_KEY_FILE=/etc/calico/certs/key.pem \ -v /usr/bin/docker:/usr/bin/docker \ -v /var/run/docker.sock:/var/run/docker.sock \ -v /var/run/calico:/var/run/calico \ +-v /etc/calico/certs:/etc/calico/certs:ro \ {{ calicoctl_image_repo }}:{{ calicoctl_image_tag}} \ $@ diff --git a/roles/network_plugin/calico/templates/network-environment.j2 b/roles/network_plugin/calico/templates/network-environment.j2 index 0da2db904..8fd13d36c 100644 --- a/roles/network_plugin/calico/templates/network-environment.j2 +++ b/roles/network_plugin/calico/templates/network-environment.j2 @@ -7,3 +7,6 @@ KUBERNETES_MASTER={{ kube_apiserver_endpoint }} # IP and port of etcd instance used by Calico ETCD_ENDPOINTS={{ etcd_access_endpoint }} +ETCD_CA_CERT_FILE=/etc/calico/certs/ca_cert.crt +ETCD_CERT_FILE=/etc/calico/certs/cert.crt +ETCD_KEY_FILE=/etc/calico/certs/key.pem diff --git a/roles/network_plugin/flannel/tasks/main.yml b/roles/network_plugin/flannel/tasks/main.yml index a6fa183ef..8581d2ce7 100644 --- a/roles/network_plugin/flannel/tasks/main.yml +++ b/roles/network_plugin/flannel/tasks/main.yml @@ -1,9 +1,11 @@ --- -- name: Flannel | Write flannel configuration - template: - src: network.json - dest: /etc/flannel-network.json - backup: yes +- name: Flannel | Set Flannel etcd configuration + command: |- + {{ bin_dir }}/etcdctl --peers={{ etcd_access_addresses }} \ + set /{{ cluster_name }}/network/config \ + '{ "Network": "{{ kube_pods_subnet }}", "SubnetLen": {{ kube_network_node_prefix }}, "Backend": { "Type": "{{ flannel_backend_type }}" } }' + delegate_to: "{{groups['etcd'][0]}}" + run_once: true - name: Flannel | Create flannel pod manifest template: diff --git a/roles/network_plugin/flannel/templates/flannel-pod.yml b/roles/network_plugin/flannel/templates/flannel-pod.yml index 70353f11a..02c41e18b 100644 --- a/roles/network_plugin/flannel/templates/flannel-pod.yml +++ b/roles/network_plugin/flannel/templates/flannel-pod.yml @@ -12,26 +12,16 @@ - name: "subnetenv" hostPath: path: "/run/flannel" - - name: "networkconfig" + - name: "etcd-certs" hostPath: - path: "/etc/flannel-network.json" + path: "{{ etcd_cert_dir }}" containers: - - name: "flannel-server-helper" - image: "{{ flannel_server_helper_image_repo }}:{{ flannel_server_helper_image_tag }}" - args: - - "--network-config=/etc/flannel-network.json" - - "--etcd-prefix=/{{ cluster_name }}/network" - - "--etcd-endpoints={{ etcd_access_endpoint }}" - volumeMounts: - - name: "networkconfig" - mountPath: "/etc/flannel-network.json" - imagePullPolicy: "Always" - name: "flannel-container" image: "{{ flannel_image_repo }}:{{ flannel_image_tag }}" command: - "/bin/sh" - "-c" - - "/opt/bin/flanneld -etcd-endpoints {{ etcd_access_endpoint }} -etcd-prefix /{{ cluster_name }}/network {% if flannel_interface is defined %}-iface {{ flannel_interface }}{% endif %} {% if flannel_public_ip is defined %}-public-ip {{ flannel_public_ip }}{% endif %}" + - "/opt/bin/flanneld -etcd-endpoints {{ etcd_access_endpoint }} -etcd-prefix /{{ cluster_name }}/network -etcd-cafile {{ etcd_cert_dir }}/ca.pem -etcd-certfile {{ etcd_cert_dir }}/node.pem -etcd-keyfile {{ etcd_cert_dir }}/node-key.pem {% if flannel_interface is defined %}-iface {{ flannel_interface }}{% endif %} {% if flannel_public_ip is defined %}-public-ip {{ flannel_public_ip }}{% endif %}" ports: - hostPort: 10253 containerPort: 10253 @@ -41,6 +31,8 @@ volumeMounts: - name: "subnetenv" mountPath: "/run/flannel" + - name: "etcd-certs" + mountPath: "{{ etcd_cert_dir }}" securityContext: privileged: true hostNetwork: true diff --git a/roles/network_plugin/flannel/templates/network.json b/roles/network_plugin/flannel/templates/network.json deleted file mode 100644 index cbbec3841..000000000 --- a/roles/network_plugin/flannel/templates/network.json +++ /dev/null @@ -1 +0,0 @@ -{ "Network": "{{ kube_pods_subnet }}", "SubnetLen": {{ kube_network_node_prefix }}, "Backend": { "Type": "{{ flannel_backend_type }}" } } diff --git a/roles/uploads/defaults/main.yml b/roles/uploads/defaults/main.yml index 0774d324c..7b5797881 100644 --- a/roles/uploads/defaults/main.yml +++ b/roles/uploads/defaults/main.yml @@ -5,7 +5,7 @@ local_release_dir: /tmp kube_version: v1.4.3 etcd_version: v3.0.6 -calico_version: v0.22.0 +calico_version: v0.23.0 calico_cni_version: v1.4.2 weave_version: v1.6.1 From fe16fecd8f09dbf09a30300cdc1420755ec34cae Mon Sep 17 00:00:00 2001 From: Matthew Mosesohn Date: Thu, 10 Nov 2016 12:49:47 +0300 Subject: [PATCH 31/35] Fix canal's calico networking config for ETCD TLS Also fixes kube-apiserver upgrade that was erroneously deleted in a previous commit. --- roles/kubernetes/master/tasks/pre-upgrade.yml | 9 +++++++ roles/network_plugin/canal/tasks/main.yml | 12 +++++---- .../canal/templates/canal-node.yml.j2 | 27 ------------------- .../canal/templates/network.json.j2 | 1 - 4 files changed, 16 insertions(+), 33 deletions(-) delete mode 100644 roles/network_plugin/canal/templates/network.json.j2 diff --git a/roles/kubernetes/master/tasks/pre-upgrade.yml b/roles/kubernetes/master/tasks/pre-upgrade.yml index 239c46be9..3b9f26de1 100644 --- a/roles/kubernetes/master/tasks/pre-upgrade.yml +++ b/roles/kubernetes/master/tasks/pre-upgrade.yml @@ -14,3 +14,12 @@ name: kube-apiserver state: stopped when: (kube_apiserver_service_file.stat.exists|default(False) or kube_apiserver_init_script.stat.exists|default(False)) + +- name: "Pre-upgrade | remove kube-apiserver service definition" + file: + path: "{{ item }}" + state: absent + when: (kube_apiserver_service_file.stat.exists|default(False) or kube_apiserver_init_script.stat.exists|default(False)) + with_items: + - /etc/systemd/system/kube-apiserver.service + - /etc/init.d/kube-apiserver diff --git a/roles/network_plugin/canal/tasks/main.yml b/roles/network_plugin/canal/tasks/main.yml index ba83edee8..e88cfad7e 100644 --- a/roles/network_plugin/canal/tasks/main.yml +++ b/roles/network_plugin/canal/tasks/main.yml @@ -1,9 +1,11 @@ --- -- name: Canal | Write flannel configuration - template: - src: network.json.j2 - dest: /etc/flannel-network.json - backup: yes +- name: Canal | Set Flannel etcd configuration + command: |- + {{ bin_dir }}/etcdctl --peers={{ etcd_access_addresses }} \ + set /{{ cluster_name }}/network/config \ + '{ "Network": "{{ kube_pods_subnet }}", "SubnetLen": {{ kube_network_node_prefix }}, "Backend": { "Type": "{{ flannel_backend_type }}" } }' + delegate_to: "{{groups['etcd'][0]}}" + run_once: true - name: Canal | Write canal configmap template: diff --git a/roles/network_plugin/canal/templates/canal-node.yml.j2 b/roles/network_plugin/canal/templates/canal-node.yml.j2 index bdeae6cfd..ef6793f30 100644 --- a/roles/network_plugin/canal/templates/canal-node.yml.j2 +++ b/roles/network_plugin/canal/templates/canal-node.yml.j2 @@ -19,10 +19,6 @@ spec: spec: hostNetwork: true volumes: - # Used by flannel-server-helper - - name: "networkconfig" - hostPath: - path: "/etc/flannel-network.json" # Used by calico/node. - name: lib-modules hostPath: @@ -45,29 +41,6 @@ spec: hostPath: path: /etc/resolv.conf containers: - - name: "flannel-server-helper" - image: "{{ flannel_server_helper_image_repo }}:{{ flannel_server_helper_image_tag }}" - env: - # Cluster name - - name: CLUSTER_NAME - valueFrom: - configMapKeyRef: - name: canal-config - key: cluster_name - # The location of the etcd cluster. - - name: FLANNELD_ETCD_ENDPOINTS - valueFrom: - configMapKeyRef: - name: canal-config - key: etcd_endpoints - args: - - "--network-config=/etc/flannel-network.json" - - "--etcd-prefix=/$(CLUSTER_NAME)/network" - - "--etcd-server=$(FLANNELD_ETCD_ENDPOINTS)" - volumeMounts: - - name: "networkconfig" - mountPath: "/etc/flannel-network.json" - imagePullPolicy: "Always" # Runs the flannel daemon to enable vxlan networking between # container hosts. - name: flannel diff --git a/roles/network_plugin/canal/templates/network.json.j2 b/roles/network_plugin/canal/templates/network.json.j2 deleted file mode 100644 index cbbec3841..000000000 --- a/roles/network_plugin/canal/templates/network.json.j2 +++ /dev/null @@ -1 +0,0 @@ -{ "Network": "{{ kube_pods_subnet }}", "SubnetLen": {{ kube_network_node_prefix }}, "Backend": { "Type": "{{ flannel_backend_type }}" } } From 251800eb16777cf5c6b2edcf7362330352bb5a39 Mon Sep 17 00:00:00 2001 From: Aleksandr Didenko Date: Thu, 10 Nov 2016 13:13:03 +0100 Subject: [PATCH 32/35] Fix policy controller 'etcd_cert_dir' variable is missing from 'kubernetes-apps/ansible' role which breaks Calico policy controller deployment. Also fixing calico-policy-controller.yml. --- roles/kubernetes-apps/ansible/defaults/main.yml | 5 ++++- .../templates/calico-policy-controller.yml.j2 | 17 ++++++++--------- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/roles/kubernetes-apps/ansible/defaults/main.yml b/roles/kubernetes-apps/ansible/defaults/main.yml index b1086aa0d..d39d146fd 100644 --- a/roles/kubernetes-apps/ansible/defaults/main.yml +++ b/roles/kubernetes-apps/ansible/defaults/main.yml @@ -9,4 +9,7 @@ kubedns_image_tag: "{{ kubedns_version }}" kubednsmasq_image_repo: "gcr.io/google_containers/kube-dnsmasq-amd64" kubednsmasq_image_tag: "{{ kubednsmasq_version }}" exechealthz_image_repo: "gcr.io/google_containers/exechealthz-amd64" -exechealthz_image_tag: "{{ exechealthz_version }}" \ No newline at end of file +exechealthz_image_tag: "{{ exechealthz_version }}" + +# SSL +etcd_cert_dir: "/etc/ssl/etcd/ssl" diff --git a/roles/kubernetes-apps/ansible/templates/calico-policy-controller.yml.j2 b/roles/kubernetes-apps/ansible/templates/calico-policy-controller.yml.j2 index 5e0586e16..698710b95 100644 --- a/roles/kubernetes-apps/ansible/templates/calico-policy-controller.yml.j2 +++ b/roles/kubernetes-apps/ansible/templates/calico-policy-controller.yml.j2 @@ -44,12 +44,11 @@ spec: # This removes the need for KubeDNS to resolve the Service. - name: CONFIGURE_ETC_HOSTS value: "true" - volumeMounts: - - mountPath: {{ etcd_cert_dir }} - name: etcd-certs - readOnly: true - volumes: - - hostPath: - path: {{ etcd_cert_dir }} - name: etcd-certs - + volumeMounts: + - mountPath: {{ etcd_cert_dir }} + name: etcd-certs + readOnly: true + volumes: + - hostPath: + path: {{ etcd_cert_dir }} + name: etcd-certs From cf7c60029bd2ff5bfa37aa89e19fc88ace68b0d8 Mon Sep 17 00:00:00 2001 From: Bogdan Dobrelya Date: Wed, 9 Nov 2016 14:15:27 +0100 Subject: [PATCH 33/35] Label k8s apps, adjust collect/upload info steps - Drop debugs from collect-info playbook - Drop sudo from collect-info step and add target dir var (required for travis jobs) - Label all k8s apps, including static manifests - Add logs for K8s apps to be collected as well - Fix upload to GCS as a public-read tarball Signed-off-by: Bogdan Dobrelya --- .travis.yml | 11 ++-- .../manifests/kube-apiserver.manifest.j2 | 2 + .../kube-controller-manager.manifest.j2 | 2 + .../manifests/kube-scheduler.manifest.j2 | 2 + .../manifests/kube-proxy.manifest.j2 | 2 + .../manifests/nginx-proxy.manifest.j2 | 2 + scripts/collect-info.yaml | 49 ++++++++++++--- tests/cloud_playbooks/templates/boto.j2 | 11 ++++ .../gcs_life.json.j2} | 2 +- tests/cloud_playbooks/upload-logs-gcs.yml | 62 +++++++++++-------- 10 files changed, 106 insertions(+), 39 deletions(-) create mode 100644 tests/cloud_playbooks/templates/boto.j2 rename tests/cloud_playbooks/{files/gcs_life.json => templates/gcs_life.json.j2} (59%) diff --git a/.travis.yml b/.travis.yml index 3bbb46d1c..e2a9f9f07 100644 --- a/.travis.yml +++ b/.travis.yml @@ -103,11 +103,11 @@ env: before_install: # Install Ansible. - - pip install --user boto -U - pip install --user ansible - pip install --user netaddr # W/A https://github.com/ansible/ansible-modules-core/issues/5196#issuecomment-253766186 - pip install --user apache-libcloud==0.20.1 + - pip install --user boto==2.9.0 -U cache: - directories: @@ -149,16 +149,19 @@ script: - $HOME/.local/bin/ansible-playbook -i inventory/inventory.ini -u $SSH_USER -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root tests/testcases/030_check-network.yml $LOG_LEVEL after_failure: - - $HOME/.local/bin/ansible-playbook -i inventory/inventory.ini -u $SSH_USER -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root scripts/collect-info.yaml + - > + $HOME/.local/bin/ansible-playbook -i inventory/inventory.ini -u $SSH_USER + -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root -e dir=$HOME + scripts/collect-info.yaml - > $HOME/.local/bin/ansible-playbook tests/cloud_playbooks/upload-logs-gcs.yml -i "localhost," -c local - -e test_id=${TEST_ID} -e kube_network_plugin=${KUBE_NETWORK_PLUGIN} + -e gce_project_id=${GCE_PROJECT_ID} -e gs_key=${GS_ACCESS_KEY_ID} -e gs_skey=${GS_SECRET_ACCESS_KEY} -e ostype=${CLOUD_IMAGE} -e commit=${TRAVIS_COMMIT} - -e pr=${TRAVIS_PULL_REQUEST} + -e dir=${HOME} after_script: - > diff --git a/roles/kubernetes/master/templates/manifests/kube-apiserver.manifest.j2 b/roles/kubernetes/master/templates/manifests/kube-apiserver.manifest.j2 index 4100e8a34..a6718f9e5 100644 --- a/roles/kubernetes/master/templates/manifests/kube-apiserver.manifest.j2 +++ b/roles/kubernetes/master/templates/manifests/kube-apiserver.manifest.j2 @@ -3,6 +3,8 @@ kind: Pod metadata: name: kube-apiserver namespace: kube-system + labels: + k8s-app: kube-apiserver spec: hostNetwork: true containers: diff --git a/roles/kubernetes/master/templates/manifests/kube-controller-manager.manifest.j2 b/roles/kubernetes/master/templates/manifests/kube-controller-manager.manifest.j2 index 3a9e1ef1b..a528f361e 100644 --- a/roles/kubernetes/master/templates/manifests/kube-controller-manager.manifest.j2 +++ b/roles/kubernetes/master/templates/manifests/kube-controller-manager.manifest.j2 @@ -3,6 +3,8 @@ kind: Pod metadata: name: kube-controller-manager namespace: kube-system + labels: + k8s-app: kube-controller spec: hostNetwork: true containers: diff --git a/roles/kubernetes/master/templates/manifests/kube-scheduler.manifest.j2 b/roles/kubernetes/master/templates/manifests/kube-scheduler.manifest.j2 index 024ddbfaa..15a705937 100644 --- a/roles/kubernetes/master/templates/manifests/kube-scheduler.manifest.j2 +++ b/roles/kubernetes/master/templates/manifests/kube-scheduler.manifest.j2 @@ -3,6 +3,8 @@ kind: Pod metadata: name: kube-scheduler namespace: kube-system + labels: + k8s-app: kube-scheduler spec: hostNetwork: true containers: diff --git a/roles/kubernetes/node/templates/manifests/kube-proxy.manifest.j2 b/roles/kubernetes/node/templates/manifests/kube-proxy.manifest.j2 index 7abffe053..86d1e6f9e 100644 --- a/roles/kubernetes/node/templates/manifests/kube-proxy.manifest.j2 +++ b/roles/kubernetes/node/templates/manifests/kube-proxy.manifest.j2 @@ -3,6 +3,8 @@ kind: Pod metadata: name: kube-proxy namespace: kube-system + labels: + k8s-app: kube-proxy spec: hostNetwork: true containers: diff --git a/roles/kubernetes/node/templates/manifests/nginx-proxy.manifest.j2 b/roles/kubernetes/node/templates/manifests/nginx-proxy.manifest.j2 index 50e054268..8e5dfcc11 100644 --- a/roles/kubernetes/node/templates/manifests/nginx-proxy.manifest.j2 +++ b/roles/kubernetes/node/templates/manifests/nginx-proxy.manifest.j2 @@ -3,6 +3,8 @@ kind: Pod metadata: name: nginx-proxy namespace: kube-system + labels: + k8s-app: kube-nginx spec: hostNetwork: true containers: diff --git a/scripts/collect-info.yaml b/scripts/collect-info.yaml index 685b8b787..0ba47866e 100644 --- a/scripts/collect-info.yaml +++ b/scripts/collect-info.yaml @@ -1,10 +1,9 @@ --- - hosts: all - become: true + become: false gather_facts: no vars: - debug: false commands: - name: timedate_info cmd: timedatectl status @@ -26,6 +25,37 @@ cmd: journalctl -p err --utc --no-pager - name: etcd_info cmd: etcdctl --debug cluster-health + - name: weave_info + cmd: weave report | jq "." + - name: weave_logs + cmd: docker logs weave > weave.log + - name: kubedns_logs + cmd: sh -c "for i in `kubectl get pods --all-namespaces -l k8s-app=kubedns -o name`; + do kubectl logs $i --namespace kube-system kubedns > kubedns.log; done" + - name: apiserver_logs + cmd: sh -c "for i in `kubectl get pods --all-namespaces -l k8s-app=kube-apiserver -o name`; + do kubectl logs $i --namespace kube-system > kube-apiserver.log; done" + - name: controller_logs + cmd: sh -c "for i in `kubectl get pods --all-namespaces -l k8s-app=kube-controller -o name`; + do kubectl logs $i --namespace kube-system > kube-controller.log; done" + - name: scheduler_logs + cmd: sh -c "for i in `kubectl get pods --all-namespaces -l k8s-app=kube-scheduler -o name`; + do kubectl logs $i --namespace kube-system > kube-scheduler.log; done" + - name: proxy_logs + cmd: sh -c "for i in `kubectl get pods --all-namespaces -l k8s-app=kube-proxy -o name`; + do kubectl logs $i --namespace kube-system > kube-proxy.log; done" + - name: nginx_logs + cmd: sh -c "for i in `kubectl get pods --all-namespaces -l k8s-app=kube-nginx -o name`; + do kubectl logs $i --namespace kube-system > kube-nginx.log; done" + - name: flannel_logs + cmd: sh -c "for i in `kubectl get pods --all-namespaces -l app=flannel -o name`; + do kubectl logs $i --namespace kube-system flannel-container > flannel.log; done" + - name: canal_logs + cmd: sh -c "for i in `kubectl get pods --all-namespaces -l k8s-app=canal-node -o name`; + do kubectl logs $i --namespace kube-system flannel > flannel.log; done" + - name: calico_policy_logs + cmd: sh -c "for i in `kubectl get pods --all-namespaces -l k8s-app=calico-policy -o name`; + do kubectl logs $i --namespace kube-system calico-policy-controller > calico-policy-controller.log; done" logs: - /var/log/syslog @@ -38,6 +68,15 @@ - /var/log/calico/bird6/current - /var/log/calico/felix/current - /var/log/calico/confd/current + - weave.log + - kubedns.log + - kube-apiserver.log + - kube-controller.log + - kube-scheduler.log + - kube-proxy.log + - kube-nginx.log + - flannel.log + - calico-policy-controller.log tasks: - name: Storing commands output @@ -47,10 +86,6 @@ with_items: "{{commands}}" no_log: True - - debug: var=item - with_items: "{{output.results}}" - when: debug - - name: Fetch results fetch: src={{ item.name }} dest=/tmp/collect-info/commands with_items: "{{commands}}" @@ -60,7 +95,7 @@ with_items: "{{logs}}" - name: Pack results and logs - local_action: shell GZIP=-9 tar --remove-files -cvzf logs.tar.gz -C /tmp collect-info + local_action: shell GZIP=-9 tar --remove-files -cvzf {{dir|default(".")}}/logs.tar.gz -C /tmp collect-info run_once: true - name: Clean up collected command outputs diff --git a/tests/cloud_playbooks/templates/boto.j2 b/tests/cloud_playbooks/templates/boto.j2 new file mode 100644 index 000000000..660f1a0a3 --- /dev/null +++ b/tests/cloud_playbooks/templates/boto.j2 @@ -0,0 +1,11 @@ +[Credentials] +gs_access_key_id = {{ gs_key }} +gs_secret_access_key = {{ gs_skey }} +[Boto] +https_validate_certificates = True +[GoogleCompute] +[GSUtil] +default_project_id = {{ gce_project_id }} +content_language = en +default_api_version = 2 +[OAuth2] diff --git a/tests/cloud_playbooks/files/gcs_life.json b/tests/cloud_playbooks/templates/gcs_life.json.j2 similarity index 59% rename from tests/cloud_playbooks/files/gcs_life.json rename to tests/cloud_playbooks/templates/gcs_life.json.j2 index eaab30b4f..a666c8fef 100644 --- a/tests/cloud_playbooks/files/gcs_life.json +++ b/tests/cloud_playbooks/templates/gcs_life.json.j2 @@ -3,7 +3,7 @@ [ { "action": {"type": "Delete"}, - "condition": {"age": 2} + "condition": {"age": {{expire_days}}} } ] } diff --git a/tests/cloud_playbooks/upload-logs-gcs.yml b/tests/cloud_playbooks/upload-logs-gcs.yml index 12013798d..80d651ba4 100644 --- a/tests/cloud_playbooks/upload-logs-gcs.yml +++ b/tests/cloud_playbooks/upload-logs-gcs.yml @@ -3,65 +3,73 @@ become: false gather_facts: no + vars: + expire_days: 2 + tasks: - name: Generate uniq bucket name prefix - shell: date +%s | sha256sum | base64 | head -c 32 + shell: date +%Y%m%d register: out - name: replace_test_id set_fact: - test_name: "kargo-{{ commit }}-{{ pr }}-{{ out.stdout|lower }}-{{ test_id | regex_replace('\\.', '-') }}" + test_name: "kargo-ci-{{ out.stdout }}" + + - set_fact: + file_name: "{{ostype}}-{{kube_network_plugin}}-{{commit}}-logs.tar.gz" - name: Create a bucket gc_storage: bucket: "{{ test_name }}" mode: create - permission: private + permission: public-read gs_access_key: "{{ gs_key }}" gs_secret_key: "{{ gs_skey }}" no_log: True + - name: Create a lifecycle template for the bucket + template: + src: gcs_life.json.j2 + dest: "{{dir}}/gcs_life.json" + + - name: Create a boto config to access GCS + template: + src: boto.j2 + dest: "{{dir}}/.boto" + no_log: True + - name: Download gsutil cp installer get_url: url: https://dl.google.com/dl/cloudsdk/channels/rapid/install_google_cloud_sdk.bash - dest: /tmp/gcp-installer.sh + dest: "{{dir}}/gcp-installer.sh" - name: Get gsutil tool - script: /tmp/gcp-installer.sh + script: "{{dir}}/gcp-installer.sh" environment: CLOUDSDK_CORE_DISABLE_PROMPTS: 1 + CLOUDSDK_INSTALL_DIR: "{{dir}}" no_log: True - - - name: Create a lifecycle template for the bucket - file: src=gcs_life.json path=/tmp/gcs_life.json - - - name: Hack the boto config for GCS access keys - lineinfile: - dest: .boto - line: "gs_access_key_id = {{ gs_key }}" - regexp: "^#gs_access_key_id = .*$" - no_log: True - - - name: Hack the boto config for GCS secret access keys - lineinfile: - dest: .boto - line: "gs_secret_access_key = {{ gs_skey }}" - regexp: "^#gs_secret_access_key = .*$" - no_log: True + ignore_errors: true - name: Apply the lifecycle rules - shell: bash google-cloud-sdk/bin/gsutil lifecycle set /tmp/gcs_life.json gs://{{ test_name }} + command: "{{dir}}/google-cloud-sdk/bin/gsutil lifecycle set {{dir}}/gcs_life.json gs://{{test_name}}" environment: - BOTO_CONFIG: .boto + BOTO_CONFIG: "{{dir}}/.boto" + no_log: True - name: Upload collected diagnostic info gc_storage: bucket: "{{ test_name }}" mode: put - permission: private - object: "build-{{ ostype }}-{{ kube_network_plugin }}-logs.tar.gz" - src: logs.tar.gz + permission: public-read + object: "{{ file_name }}" + src: "{{dir}}/logs.tar.gz" headers: '{"Content-Encoding": "x-gzip"}' gs_access_key: "{{ gs_key }}" gs_secret_key: "{{ gs_skey }}" + expiration: "{{expire_days * 36000|int}}" ignore_errors: true + no_log: True + + - debug: + msg: "A public url https://storage.googleapis.com/{{test_name}}/{{file_name}}" From c58bd33af7d7d236cb8056099a7a33a1c6bc7c8f Mon Sep 17 00:00:00 2001 From: Artem Panchenko Date: Mon, 7 Nov 2016 22:37:12 +0200 Subject: [PATCH 34/35] Support new version of 'calicoctl' (>=v1.0.0) Since version 'v1.0.0-beta' calicoctl is written in Go and its API differs from old Python based utility. Added support of both old and new version of the utility. --- docs/calico.md | 24 +++++++ roles/download/defaults/main.yml | 3 +- roles/network_plugin/calico/tasks/main.yml | 66 +++++++++++++++---- .../calico/templates/calico-node.service.j2 | 12 +++- .../calico/templates/calicoctl-container.j2 | 2 +- .../calico/templates/deb-calico.initd.j2 | 11 +++- .../calico/templates/rh-calico.initd.j2 | 13 +++- 7 files changed, 113 insertions(+), 18 deletions(-) diff --git a/docs/calico.md b/docs/calico.md index 50744f63f..a8bffc0db 100644 --- a/docs/calico.md +++ b/docs/calico.md @@ -10,18 +10,42 @@ docker ps | grep calico The **calicoctl** command allows to check the status of the network workloads. * Check the status of Calico nodes +``` +calicoctl node status +``` + +or for versions prior *v1.0.0*: + ``` calicoctl status ``` * Show the configured network subnet for containers +``` + calicoctl get ippool -o wide +``` + +or for versions prior *v1.0.0*: + ``` calicoctl pool show ``` * Show the workloads (ip addresses of containers and their located) +``` +calicoctl get workloadEndpoint -o wide +``` + +and + +``` +calicoctl get hostEndpoint -o wide +``` + +or for versions prior *v1.0.0*: + ``` calicoctl endpoint show --detail ``` diff --git a/roles/download/defaults/main.yml b/roles/download/defaults/main.yml index bf8f8e7c2..1ea220fd1 100644 --- a/roles/download/defaults/main.yml +++ b/roles/download/defaults/main.yml @@ -39,7 +39,8 @@ flannel_server_helper_image_tag: "{{ flannel_server_helper_version }}" flannel_image_repo: "quay.io/coreos/flannel" flannel_image_tag: "{{ flannel_version }}" calicoctl_image_repo: "calico/ctl" -# TODO(mattymo): v1.0.0-beta has different syntax. Needs work to upgrade +# TODO(apanchenko): v1.0.0-beta can't execute `node run` from Docker container +# for details see https://github.com/projectcalico/calico-containers/issues/1291 calicoctl_image_tag: "v0.22.0" calico_node_image_repo: "calico/node" calico_node_image_tag: "{{ calico_version }}" diff --git a/roles/network_plugin/calico/tasks/main.yml b/roles/network_plugin/calico/tasks/main.yml index 60a728ba0..6563a1f65 100644 --- a/roles/network_plugin/calico/tasks/main.yml +++ b/roles/network_plugin/calico/tasks/main.yml @@ -78,30 +78,54 @@ delegate_to: "{{groups['etcd'][0]}}" run_once: true -- name: Calico | Define ipip pool argument +- name: Calico | Check calicoctl version + run_once: true + set_fact: + legacy_calicoctl: "{{ calicoctl_image_tag | version_compare('v1.0.0', '<') }}" + +- name: Calico | Configure calico network pool + shell: > + echo '{ + "kind": "ipPool", + "spec": {"disabled": false, "ipip": {"enabled": {{ cloud_provider is defined or ipip }}}, + "nat-outgoing": {{ nat_outgoing|default(false) and not peer_with_router|default(false) }}}, + "apiVersion": "v1", + "metadata": {"cidr": "{{ kube_pods_subnet }}"} + }' + | {{ bin_dir }}/calicoctl create -f - + environment: + NO_DEFAULT_POOLS: true + run_once: true + when: (not legacy_calicoctl and + "Key not found" in calico_conf.stdout or "nodes" not in calico_conf.stdout) + +- name: Calico (old) | Define ipip pool argument run_once: true set_fact: ipip_arg: "--ipip" - when: cloud_provider is defined or ipip|default(false) + when: (legacy_calicoctl and + cloud_provider is defined or ipip) -- name: Calico | Define nat-outgoing pool argument +- name: Calico (old) | Define nat-outgoing pool argument run_once: true set_fact: nat_arg: "--nat-outgoing" - when: nat_outgoing|default(false) and not peer_with_router|default(false) + when: (legacy_calicoctl and + nat_outgoing|default(false) and not peer_with_router|default(false)) -- name: Calico | Define calico pool task name +- name: Calico (old) | Define calico pool task name run_once: true set_fact: pool_task_name: "with options {{ ipip_arg|default('') }} {{ nat_arg|default('') }}" - when: ipip_arg|default(false) or nat_arg|default(false) + when: (legacy_calicoctl and ipip_arg|default(false) or nat_arg|default(false)) -- name: Calico | Configure calico network pool {{ pool_task_name|default('') }} +- name: Calico (old) | Configure calico network pool {{ pool_task_name|default('') }} command: "{{ bin_dir}}/calicoctl pool add {{ kube_pods_subnet }} {{ ipip_arg|default('') }} {{ nat_arg|default('') }}" environment: NO_DEFAULT_POOLS: true run_once: true - when: '"Key not found" in calico_conf.stdout or "nodes" not in calico_conf.stdout' + when: (legacy_calicoctl and + "Key not found" in calico_conf.stdout or "nodes" not in calico_conf.stdout) - name: Calico | Get calico configuration from etcd command: |- @@ -154,10 +178,30 @@ enabled: yes - name: Calico | Disable node mesh - shell: "{{ bin_dir }}/calicoctl bgp node-mesh off" - when: peer_with_router|default(false) and inventory_hostname in groups['kube-node'] + shell: "{{ bin_dir }}/calicoctl config set nodeToNodeMesh off" + when: (not legacy_calicoctl and + peer_with_router|default(false) and inventory_hostname in groups['kube-node']) - name: Calico | Configure peering with router(s) + shell: > + echo '{ + "kind": "bgppeer", + "spec": {"asNumber": {{ item.as }}}, + "apiVersion": "v1", + "metadata": {"node": "rack1-host1", "scope": "node", "peerIP": "{{ item.router_id }}"} + }' + | {{ bin_dir }}/calicoctl create -f - + with_items: peers + when: (not legacy_calicoctl and + peer_with_router|default(false) and inventory_hostname in groups['kube-node']) + +- name: Calico (old) | Disable node mesh + shell: "{{ bin_dir }}/calicoctl bgp node-mesh off" + when: (legacy_calicoctl and + peer_with_router|default(false) and inventory_hostname in groups['kube-node']) + +- name: Calico (old) | Configure peering with router(s) shell: "{{ bin_dir }}/calicoctl node bgp peer add {{ item.router_id }} as {{ item.as }}" with_items: peers - when: peer_with_router|default(false) and inventory_hostname in groups['kube-node'] + when: (legacy_calicoctl and + peer_with_router|default(false) and inventory_hostname in groups['kube-node']) diff --git a/roles/network_plugin/calico/templates/calico-node.service.j2 b/roles/network_plugin/calico/templates/calico-node.service.j2 index 2a7775fd4..87a51fac8 100644 --- a/roles/network_plugin/calico/templates/calico-node.service.j2 +++ b/roles/network_plugin/calico/templates/calico-node.service.j2 @@ -7,11 +7,19 @@ Wants=docker.socket [Service] User=root PermissionsStartOnly=true +{% if legacy_calicoctl %} {% if inventory_hostname in groups['kube-node'] and peer_with_router|default(false)%} ExecStart={{ bin_dir }}/calicoctl node --ip={{ip | default(ansible_default_ipv4.address) }} --as={{ local_as }} --detach=false --node-image={{ calico_node_image_repo }}:{{ calico_node_image_tag }} -{% else %} +{% else %} ExecStart={{ bin_dir }}/calicoctl node --ip={{ip | default(ansible_default_ipv4.address) }} --detach=false --node-image={{ calico_node_image_repo }}:{{ calico_node_image_tag }} -{% endif %} +{% endif %} +{% else %} +{% if inventory_hostname in groups['kube-node'] and peer_with_router|default(false)%} +ExecStart={{ bin_dir }}/calicoctl node run --ip={{ip | default(ansible_default_ipv4.address) }} --as={{ local_as }} --node-image={{ calico_node_image_repo }}:{{ calico_node_image_tag }} +{% else %} +ExecStart={{ bin_dir }}/calicoctl node run --ip={{ip | default(ansible_default_ipv4.address) }} --node-image={{ calico_node_image_repo }}:{{ calico_node_image_tag }} +{% endif %} +{% endif %} Restart=always RestartSec=10s diff --git a/roles/network_plugin/calico/templates/calicoctl-container.j2 b/roles/network_plugin/calico/templates/calicoctl-container.j2 index 9d47c73ca..7be30928a 100644 --- a/roles/network_plugin/calico/templates/calicoctl-container.j2 +++ b/roles/network_plugin/calico/templates/calicoctl-container.j2 @@ -1,5 +1,5 @@ #!/bin/bash -/usr/bin/docker run --privileged --rm \ +/usr/bin/docker run -i --privileged --rm \ --net=host --pid=host \ -e ETCD_ENDPOINTS={{ etcd_access_endpoint }} \ -e ETCD_CA_CERT_FILE=/etc/calico/certs/ca_cert.crt \ diff --git a/roles/network_plugin/calico/templates/deb-calico.initd.j2 b/roles/network_plugin/calico/templates/deb-calico.initd.j2 index ddbc22959..e155cae9c 100644 --- a/roles/network_plugin/calico/templates/deb-calico.initd.j2 +++ b/roles/network_plugin/calico/templates/deb-calico.initd.j2 @@ -37,7 +37,7 @@ DAEMON_USER=root do_status() { - if [ $($DOCKER ps | awk '{ print $2 }' | grep calico/node | wc -l) -eq 1 ]; then + if [ $($DOCKER ps --format "{{.Image}}" | grep -cw 'calico/node') -eq 1 ]; then return 0 else return 1 @@ -51,7 +51,11 @@ do_start() do_status retval=$? if [ $retval -ne 0 ]; then +{% if legacy_calicoctl %} ${DAEMON} node --ip=${DEFAULT_IPV4} >>/dev/null && return 0 || return 2 +{% else %} + ${DAEMON} node run --ip=${DEFAULT_IPV4} >>/dev/null && return 0 || return 2 +{% endif %} else return 1 fi @@ -62,7 +66,12 @@ do_start() # do_stop() { +{% if legacy_calicoctl %} ${DAEMON} node stop >> /dev/null || ${DAEMON} node stop --force >> /dev/null +{% else %} + echo "Current version of ${DAEMON} doesn't support 'node stop' command!" + return 1 +{% endif %} } diff --git a/roles/network_plugin/calico/templates/rh-calico.initd.j2 b/roles/network_plugin/calico/templates/rh-calico.initd.j2 index 6fb870652..7fea72521 100644 --- a/roles/network_plugin/calico/templates/rh-calico.initd.j2 +++ b/roles/network_plugin/calico/templates/rh-calico.initd.j2 @@ -31,7 +31,7 @@ logfile="/var/log/$prog" do_status() { - if [ $($dockerexec ps | awk '{ print $2 }' | grep calico/node | wc -l) -ne 1 ]; then + if [ $($dockerexec ps --format "{{.Image}}" | grep -cw 'calico/node') -ne 1 ]; then return 1 fi } @@ -53,7 +53,11 @@ do_start() { if [ $retval -ne 0 ]; then printf "Starting $prog:\t" echo "\n$(date)\n" >> $logfile - $exec node --ip=${DEFAULT_IPV4} &>>$logfile +{% if legacy_calicoctl %} + $exec node --ip=${DEFAULT_IPV4} &>>$logfile +{% else %} + $exec node run --ip=${DEFAULT_IPV4} &>>$logfile +{% endif %} success echo else @@ -65,7 +69,12 @@ do_start() { do_stop() { echo -n $"Stopping $prog: " +{% if legacy_calicoctl %} $exec node stop >> /dev/null || $exec node stop --force >> /dev/null +{% else %} + echo "Current version of ${exec} doesn't support 'node stop' command!" + return 1 +{% endif %} retval=$? echo return $retval From 46ee9faca91e7a4312ce1d31eb31b2e5e22d8e9c Mon Sep 17 00:00:00 2001 From: Matthew Mosesohn Date: Fri, 11 Nov 2016 18:39:22 +0300 Subject: [PATCH 35/35] Fix ca certificate loading on CoreOS --- roles/etcd/tasks/gen_certs.yml | 15 ++++++++------- roles/kubernetes/secrets/tasks/gen_certs.yml | 14 +++++++------- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/roles/etcd/tasks/gen_certs.yml b/roles/etcd/tasks/gen_certs.yml index e0aad58a2..8d1d34b74 100644 --- a/roles/etcd/tasks/gen_certs.yml +++ b/roles/etcd/tasks/gen_certs.yml @@ -84,21 +84,21 @@ when: inventory_hostname in groups['etcd'] changed_when: false -- name: Gen_certs | target ca-certificates directory +- name: Gen_certs | target ca-certificate store file set_fact: - ca_cert_dir: |- + ca_cert_path: |- {% if ansible_os_family == "Debian" -%} - /usr/local/share/ca-certificates + /usr/local/share/ca-certificates/etcd-ca.crt {%- elif ansible_os_family == "RedHat" -%} - /etc/pki/ca-trust/source/anchors + /etc/pki/ca-trust/source/anchors/etcd-ca.crt {%- elif ansible_os_family == "CoreOS" -%} - /etc/ssl/certs + /etc/ssl/certs/etcd-ca.pem {%- endif %} - name: Gen_certs | add CA to trusted CA dir copy: src: "{{ etcd_cert_dir }}/ca.pem" - dest: "{{ ca_cert_dir }}/etcd-ca.crt" + dest: "{{ ca_cert_path }}" remote_src: true register: etcd_ca_cert @@ -106,6 +106,7 @@ command: update-ca-certificates when: etcd_ca_cert.changed and ansible_os_family in ["Debian", "CoreOS"] -- name: Gen_certs | update ca-certificatesa (RedHat) +- name: Gen_certs | update ca-certificates (RedHat) command: update-ca-trust extract when: etcd_ca_cert.changed and ansible_os_family == "RedHat" + diff --git a/roles/kubernetes/secrets/tasks/gen_certs.yml b/roles/kubernetes/secrets/tasks/gen_certs.yml index bec1d9f16..28ae04892 100644 --- a/roles/kubernetes/secrets/tasks/gen_certs.yml +++ b/roles/kubernetes/secrets/tasks/gen_certs.yml @@ -65,21 +65,21 @@ when: inventory_hostname in groups['kube-master'] changed_when: false -- name: Gen_certs | target ca-certificates directory +- name: Gen_certs | target ca-certificates path set_fact: - ca_cert_dir: |- + ca_cert_path: |- {% if ansible_os_family == "Debian" -%} - /usr/local/share/ca-certificates + /usr/local/share/ca-certificates/kube-ca.crt {%- elif ansible_os_family == "RedHat" -%} - /etc/pki/ca-trust/source/anchors + /etc/pki/ca-trust/source/anchors/kube-ca.crt {%- elif ansible_os_family == "CoreOS" -%} - /etc/ssl/certs + /etc/ssl/certs/kube-ca.pem {%- endif %} - name: Gen_certs | add CA to trusted CA dir copy: src: "{{ kube_cert_dir }}/ca.pem" - dest: "{{ ca_cert_dir }}/kube-ca.crt" + dest: "{{ ca_cert_path }}" remote_src: true register: kube_ca_cert @@ -87,7 +87,7 @@ command: update-ca-certificates when: kube_ca_cert.changed and ansible_os_family in ["Debian", "CoreOS"] -- name: Gen_certs | update ca-certificatesa (RedHat) +- name: Gen_certs | update ca-certificates (RedHat) command: update-ca-trust extract when: kube_ca_cert.changed and ansible_os_family == "RedHat"