Merge pull request #488 from bogdando/issue/480

Distribute container images across nodes
pull/417/merge
Smaine Kahlouch 2016-09-16 09:15:10 +02:00 committed by GitHub
commit 7760f75ae0
19 changed files with 153 additions and 47 deletions

View File

@ -0,0 +1,19 @@
Large deployments of K8s
========================
For a large scaled deployments, consider the following configuration changes:
* Tune [ansible settings](http://docs.ansible.com/ansible/intro_configuration.html)
for `forks` and `timeout` vars to fit large numbers of nodes being deployed.
* Override containers' `foo_image_repo` vars to point to intranet registry.
* Override the ``download_run_once: true`` to download binaries and container
images only once then push to nodes in batches.
* Adjust the `retry_stagger` global var as appropriate. It should provide sane
load on a delegate (the first K8s master node) then retrying failed
push or download operations.
For example, when deploying 200 nodes, you may want to run ansible with
``--forks=50``, ``--timeout=600`` and define the ``retry_stagger: 60``.

View File

@ -7,6 +7,8 @@ bin_dir: /usr/local/bin
# Where the binaries will be downloaded.
# Note: ensure that you've enough disk space (about 1G)
local_release_dir: "/tmp/releases"
# Random shifts for retrying failed ops like pushing/downloading
retry_stagger: 5
# Uncomment this line for CoreOS only.
# Directory where python binary is installed

View File

@ -30,7 +30,7 @@
register: keyserver_task_result
until: keyserver_task_result|success
retries: 4
delay: "{{ 20 | random + 3 }}"
delay: "{{ retry_stagger | random + 3 }}"
with_items: "{{ docker_repo_key_info.repo_keys }}"
when: ansible_os_family != "CoreOS"
@ -58,7 +58,7 @@
register: docker_task_result
until: docker_task_result|success
retries: 4
delay: "{{ 20 | random + 3 }}"
delay: "{{ retry_stagger | random + 3 }}"
with_items: "{{ docker_package_info.pkgs }}"
when: (ansible_os_family != "CoreOS") and (docker_package_info.pkgs|length > 0)

View File

@ -13,6 +13,8 @@ etcd_version: v3.0.6
calico_version: v0.20.0
calico_cni_version: v1.3.1
weave_version: v1.6.1
flannel_version: 0.5.5
flannel_server_helper_version: 0.1
# Download URL's
etcd_download_url: "https://storage.googleapis.com/kargo/{{etcd_version}}_etcd"
@ -26,6 +28,22 @@ calico_cni_ipam_checksum: "3df6951a30749c279229e7e318e74ac4e41263996125be65257db
weave_checksum: "9bf9d6e5a839e7bcbb28cc00c7acae9d09284faa3e7a3720ca9c2b9e93c68580"
etcd_checksum: "385afd518f93e3005510b7aaa04d38ee4a39f06f5152cd33bb86d4f0c94c7485"
# Containers
# Possible values: host, docker
etcd_deployment_type: "docker"
etcd_image_repo: "quay.io/coreos/etcd"
etcd_image_tag: "{{ etcd_version }}"
flannel_server_helper_image_repo: "gcr.io/google_containers/flannel-server-helper"
flannel_server_helper_image_tag: "{{ flannel_server_helper_version }}"
flannel_image_repo: "quay.io/coreos/flannel"
flannel_image_tag: "{{ flannel_version }}"
calicoctl_image_repo: "calico/ctl"
calicoctl_image_tag: "{{ calico_version }}"
calico_node_image_repo: "calico/node"
calico_node_image_tag: "{{ calico_version }}"
hyperkube_image_repo: "quay.io/coreos/hyperkube"
hyperkube_image_tag: "{{ kube_version }}_coreos.0"
downloads:
calico_cni_plugin:
dest: calico/bin/calico
@ -35,6 +53,7 @@ downloads:
url: "{{ calico_cni_download_url }}"
owner: "root"
mode: "0755"
enabled: "{{ kube_network_plugin == 'calico' }}"
calico_cni_plugin_ipam:
dest: calico/bin/calico-ipam
version: "{{calico_cni_version}}"
@ -43,6 +62,7 @@ downloads:
url: "{{ calico_cni_ipam_download_url }}"
owner: "root"
mode: "0755"
enabled: "{{ kube_network_plugin == 'calico' }}"
weave:
dest: weave/bin/weave
version: "{{weave_version}}"
@ -51,6 +71,7 @@ downloads:
sha256: "{{ weave_checksum }}"
owner: "root"
mode: "0755"
enabled: "{{ kube_network_plugin == 'weave' }}"
etcd:
version: "{{etcd_version}}"
dest: "etcd/etcd-{{ etcd_version }}-linux-amd64.tar.gz"
@ -60,10 +81,38 @@ downloads:
unarchive: true
owner: "etcd"
mode: "0755"
nothing:
enabled: false
container: "{{ etcd_deployment_type == 'docker' }}"
repo: "{{ etcd_image_repo }}"
tag: "{{ etcd_image_tag }}"
hyperkube:
container: true
repo: "{{ hyperkube_image_repo }}"
tag: "{{ hyperkube_image_tag }}"
flannel:
container: true
repo: "{{ flannel_image_repo }}"
tag: "{{ flannel_image_tag }}"
enabled: "{{ kube_network_plugin == 'flannel' }}"
flannel_server_helper:
container: true
repo: "{{ flannel_server_helper_image_repo }}"
tag: "{{ flannel_server_helper_image_tag }}"
enabled: "{{ kube_network_plugin == 'flannel' }}"
calicoctl:
container: true
repo: "{{ calicoctl_image_repo }}"
tag: "{{ calicoctl_image_tag }}"
enabled: "{{ kube_network_plugin == 'calico' }}"
calico_node:
container: true
repo: "{{ calico_node_image_repo }}"
tag: "{{ calico_node_image_tag }}"
enabled: "{{ kube_network_plugin == 'calico' }}"
download:
container: "{{ file.container|default('false') }}"
repo: "{{ file.repo|default(None) }}"
tag: "{{ file.tag|default(None) }}"
enabled: "{{ file.enabled|default('true') }}"
dest: "{{ file.dest|default(None) }}"
version: "{{ file.version|default(None) }}"

View File

@ -4,11 +4,12 @@
- name: downloading...
debug:
msg: "{{ download.url }}"
when: "{{ download.enabled|bool }}"
when: "{{ download.enabled|bool and not download.container|bool }}"
- name: Create dest directories
file: path={{local_release_dir}}/{{download.dest|dirname}} state=directory recurse=yes
when: "{{ download.enabled|bool }}"
when: "{{ download.enabled|bool and not download.container|bool }}"
delegate_to: "{{ groups['kube-master'][0] if download_run_once|bool else omit }}"
run_once: "{{ download_run_once|bool }}"
- name: Download items
@ -18,7 +19,12 @@
sha256sum: "{{download.sha256 | default(omit)}}"
owner: "{{ download.owner|default(omit) }}"
mode: "{{ download.mode|default(omit) }}"
when: "{{ download.enabled|bool }}"
register: get_url_result
until: "'OK' in get_url_result.msg or 'file already exists' in get_url_result.msg"
retries: 4
delay: "{{ retry_stagger | random + 3 }}"
when: "{{ download.enabled|bool and not download.container|bool }}"
delegate_to: "{{ groups['kube-master'][0] if download_run_once|bool else omit }}"
run_once: "{{ download_run_once|bool }}"
- name: Extract archives
@ -28,7 +34,8 @@
owner: "{{ download.owner|default(omit) }}"
mode: "{{ download.mode|default(omit) }}"
copy: no
when: "{{ download.enabled|bool }} and ({{download.unarchive is defined and download.unarchive == True}})"
when: "{{ download.enabled|bool and not download.container|bool and download.unarchive is defined and download.unarchive == True }}"
delegate_to: "{{ groups['kube-master'][0] if download_run_once|bool else omit }}"
run_once: "{{ download_run_once|bool }}"
- name: Fix permissions
@ -37,5 +44,50 @@
path: "{{local_release_dir}}/{{download.dest}}"
owner: "{{ download.owner|default(omit) }}"
mode: "{{ download.mode|default(omit) }}"
when: "{{ download.enabled|bool }} and ({{download.unarchive is not defined or download.unarchive == False}})"
when: "{{ download.enabled|bool and not download.container|bool and (download.unarchive is not defined or download.unarchive == False) }}"
delegate_to: "{{ groups['kube-master'][0] if download_run_once|bool else omit }}"
run_once: "{{ download_run_once|bool }}"
- name: pulling...
debug:
msg: "{{ download.repo }}:{{ download.tag }}"
when: "{{ download.enabled|bool and download.container|bool }}"
- name: Create dest directory for saved/loaded container images
file: path="{{local_release_dir}}/containers" state=directory recurse=yes
when: "{{ download.enabled|bool and download.container|bool }}"
#NOTE(bogdando) this brings no docker-py deps for nodes
- name: Download containers
command: "/usr/bin/docker pull {{ download.repo }}:{{ download.tag }}"
register: pull_task_result
until: pull_task_result.rc == 0
retries: 4
delay: "{{ retry_stagger | random + 3 }}"
when: "{{ download.enabled|bool and download.container|bool }}"
delegate_to: "{{ groups['kube-master'][0] if download_run_once|bool else omit }}"
run_once: "{{ download_run_once|bool }}"
- set_fact:
fname: "{{local_release_dir}}/containers/{{download.repo|regex_replace('/|\0|:', '_')}}:{{download.tag|regex_replace('/|\0|:', '_')}}.tar"
- name: Download | save container images
shell: docker save "{{ download.repo }}:{{ download.tag }}" > "{{ fname }}"
delegate_to: "{{groups['kube-master'][0]}}"
run_once: true
when: ansible_os_family != "CoreOS" and download_run_once|bool
- name: Download | get container images
synchronize:
src: "{{ fname }}"
dest: "{{local_release_dir}}/containers"
mode: push
register: get_task
until: get_task|success
retries: 4
delay: "{{ retry_stagger | random + 3 }}"
when: ansible_os_family != "CoreOS" and inventory_hostname != groups['kube-master'][0] and download_run_once|bool
- name: Download | load container images
shell: docker load < "{{ fname }}"
when: ansible_os_family != "CoreOS" and inventory_hostname != groups['kube-master'][0] and download_run_once|bool

View File

@ -1,10 +1,2 @@
---
etcd_version: v3.0.6
etcd_bin_dir: "{{ local_release_dir }}/etcd/etcd-{{ etcd_version }}-linux-amd64/"
# Possible values: host, docker
etcd_deployment_type: "docker"
etcd_image_repo: "quay.io/coreos/etcd"
etcd_image_tag: "{{ etcd_version }}"

View File

@ -3,8 +3,7 @@ dependencies:
- role: adduser
user: "{{ addusers.etcd }}"
when: ansible_os_family != 'CoreOS'
- role: download
file: "{{ downloads.etcd }}"
when: etcd_deployment_type == "host"
- role: docker
when: (ansible_os_family != "CoreOS" and etcd_deployment_type == "docker" or inventory_hostname in groups['k8s-cluster'])
- role: download
file: "{{ downloads.etcd }}"

View File

@ -20,7 +20,7 @@
register: etcd_task_result
until: etcd_task_result.rc == 0
retries: 4
delay: "{{ 20 | random + 3 }}"
delay: "{{ retry_stagger | random + 3 }}"
changed_when: false
#Plan B: looks nicer, but requires docker-py on all hosts:

View File

@ -10,6 +10,3 @@ kube_users_dir: "{{ kube_config_dir }}/users"
# An experimental dev/test only dynamic volumes provisioner,
# for PetSets. Works for kube>=v1.3 only.
kube_hostpath_dynamic_provisioner: "false"
hyperkube_image_repo: "quay.io/coreos/hyperkube"
hyperkube_image_tag: "{{ kube_version }}_coreos.0"

View File

@ -1,4 +1,4 @@
---
dependencies:
- role: download # For kube_version variable
file: "{{ downloads.nothing }}"
- role: download
file: "{{ downloads.hyperkube }}"

View File

@ -12,7 +12,7 @@
register: kube_task_result
until: kube_task_result.rc == 0
retries: 4
delay: "{{ 20 | random + 3 }}"
delay: "{{ retry_stagger | random + 3 }}"
changed_when: false
- name: Write kube-apiserver manifest

View File

@ -8,9 +8,6 @@ kube_resolv_conf: "/etc/resolv.conf"
kube_proxy_mode: iptables
hyperkube_image_repo: "quay.io/coreos/hyperkube"
hyperkube_image_tag: "{{ kube_version }}_coreos.0"
# IP address of the DNS server.
# Kubernetes will create a pod with several containers, serving as the DNS
# server and expose it under this IP address. The IP address must be from

View File

@ -1,5 +1,5 @@
---
dependencies:
- role: download #For kube_version
file: "{{ downloads.nothing }}"
- role: download
file: "{{ downloads.hyperkube }}"
- role: kubernetes/secrets

View File

@ -104,7 +104,7 @@
register: pkgs_task_result
until: pkgs_task_result|success
retries: 4
delay: "{{ 20 | random + 3 }}"
delay: "{{ retry_stagger | random + 3 }}"
with_items: "{{required_pkgs | default([]) | union(common_required_pkgs|default([]))}}"
when: ansible_os_family != "CoreOS"

View File

@ -7,9 +7,3 @@ ipip: false
# cloud_provider can only be set to 'gce' or 'aws'
# cloud_provider:
calicoctl_image_repo: calico/ctl
calicoctl_image_tag: "{{ calico_version }}"
calico_node_image_repo: calico/node
calico_node_image_tag: "{{ calico_version }}"

View File

@ -4,3 +4,9 @@ dependencies:
file: "{{ downloads.calico_cni_plugin }}"
- role: download
file: "{{ downloads.calico_cni_plugin_ipam }}"
- role: download
file: "{{ downloads.calico_node }}"
- role: download
file: "{{ downloads.calicoctl }}"
- role: download
file: "{{ downloads.hyperkube }}"

View File

@ -48,7 +48,7 @@
register: cni_task_result
until: cni_task_result.rc == 0
retries: 4
delay: "{{ 20 | random + 3 }}"
delay: "{{ retry_stagger | random + 3 }}"
changed_when: false
when: use_hyperkube_cni

View File

@ -10,10 +10,3 @@ flannel_public_ip: "{{ access_ip|default(ip|default(ansible_default_ipv4.address
# You can choose what type of flannel backend to use
# please refer to flannel's docs : https://github.com/coreos/flannel/blob/master/README.md
flannel_backend_type: "vxlan"
flannel_server_helper_image_repo: "gcr.io/google_containers/flannel-server-helper"
flannel_server_helper_image_tag: "0.1"
flannel_image_repo: "quay.io/coreos/flannel"
flannel_image_tag: "0.5.5"

View File

@ -0,0 +1,6 @@
---
dependencies:
- role: download
file: "{{ downloads.flannel_server_helper }}"
- role: download
file: "{{ downloads.flannel }}"