From 017a813621695ff891e472fbb123ee9571be2c80 Mon Sep 17 00:00:00 2001 From: Spencer Smith Date: Fri, 9 Dec 2016 10:44:52 -0800 Subject: [PATCH 1/5] first cut of an upgrade process --- roles/upgrade/post-upgrade/tasks/main.yml | 5 ++++ roles/upgrade/pre-upgrade/tasks/main.yml | 12 +++++++++ upgrade-cluster.yml | 33 +++++++++++++++++++++++ 3 files changed, 50 insertions(+) create mode 100644 roles/upgrade/post-upgrade/tasks/main.yml create mode 100644 roles/upgrade/pre-upgrade/tasks/main.yml create mode 100644 upgrade-cluster.yml diff --git a/roles/upgrade/post-upgrade/tasks/main.yml b/roles/upgrade/post-upgrade/tasks/main.yml new file mode 100644 index 000000000..b576b0947 --- /dev/null +++ b/roles/upgrade/post-upgrade/tasks/main.yml @@ -0,0 +1,5 @@ +--- + +- name: Uncordon node + command: kubectl uncordon {{ ansible_hostname }} + delegate_to: "{{ groups['kube-master'][0] }}" diff --git a/roles/upgrade/pre-upgrade/tasks/main.yml b/roles/upgrade/pre-upgrade/tasks/main.yml new file mode 100644 index 000000000..25f665a86 --- /dev/null +++ b/roles/upgrade/pre-upgrade/tasks/main.yml @@ -0,0 +1,12 @@ +--- + +- name: Cordon node + command: kubectl cordon {{ ansible_hostname }} + delegate_to: "{{ groups['kube-master'][0] }}" + +- name: Drain node + command: kubectl drain --force --ignore-daemonsets --delete-local-data {{ ansible_hostname }} + delegate_to: "{{ groups['kube-master'][0] }}" + +- name: Sleep for grace period for draining + pause: seconds=30 \ No newline at end of file diff --git a/upgrade-cluster.yml b/upgrade-cluster.yml new file mode 100644 index 000000000..d7089fc49 --- /dev/null +++ b/upgrade-cluster.yml @@ -0,0 +1,33 @@ +--- +- hosts: all + any_errors_fatal: true + gather_facts: true + +- hosts: all:!network-storage + any_errors_fatal: true + roles: + - { role: kubernetes/preinstall, tags: preinstall } + +- hosts: etcd:!k8s-cluster + any_errors_fatal: true + serial: 1 + roles: + - { role: etcd, tags: etcd } + +- hosts: kube-node + any_errors_fatal: true + serial: 1 + roles: + - { role: etcd, tags: etcd } + - { role: upgrade/pre-upgrade, tags: upgrade/pre-upgrade } + - { role: kubernetes/node, tags: node } + - { role: network_plugin, tags: network } + - { role: upgrade/post-upgrade, tags: upgrade/post-upgrade } + +- hosts: kube-master + any_errors_fatal: true + serial: 1 + roles: + - { role: etcd, tags: etcd } + - { role: kubernetes/node, tags: node } + - { role: kubernetes/master, tags: master } \ No newline at end of file From fbaef7e60f2c462d543a3462e7fe905224b1bcc7 Mon Sep 17 00:00:00 2001 From: Spencer Smith Date: Tue, 13 Dec 2016 13:43:31 -0500 Subject: [PATCH 2/5] specify grace period for draining --- roles/upgrade/pre-upgrade/tasks/main.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/roles/upgrade/pre-upgrade/tasks/main.yml b/roles/upgrade/pre-upgrade/tasks/main.yml index 25f665a86..390e9e559 100644 --- a/roles/upgrade/pre-upgrade/tasks/main.yml +++ b/roles/upgrade/pre-upgrade/tasks/main.yml @@ -5,8 +5,8 @@ delegate_to: "{{ groups['kube-master'][0] }}" - name: Drain node - command: kubectl drain --force --ignore-daemonsets --delete-local-data {{ ansible_hostname }} + command: kubectl drain --force --ignore-daemonsets --grace-period 30 --delete-local-data {{ ansible_hostname }} delegate_to: "{{ groups['kube-master'][0] }}" - name: Sleep for grace period for draining - pause: seconds=30 \ No newline at end of file + pause: seconds=30 From 97ebbb96724e973c9d0127314f0d64f496beafac Mon Sep 17 00:00:00 2001 From: Matthew Mosesohn Date: Tue, 14 Feb 2017 19:08:44 +0300 Subject: [PATCH 3/5] Add graceful upgrade process Based on #718 introduced by rsmitty. Includes all roles and all options to support deployment of new hosts in case they were added to inventory. Main difference here is that master role is evaluated first so that master components get upgraded first. Fixes #694 --- .gitlab-ci.yml | 15 +++-- docs/upgrades.md | 16 ++++- roles/upgrade/post-upgrade/tasks/main.yml | 2 +- roles/upgrade/pre-upgrade/tasks/main.yml | 4 +- upgrade-cluster.yml | 81 ++++++++++++++++++++--- 5 files changed, 96 insertions(+), 22 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 2d281cd72..305b69575 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -101,8 +101,8 @@ before_script: # Check out latest tag if testing upgrade # Uncomment when gitlab kargo repo has tags - #- test "${UPGRADE_TEST}" = "true" && git fetch --all && git checkout $(git describe --tags $(git rev-list --tags --max-count=1)) - - test "${UPGRADE_TEST}" = "true" && git checkout 031cf565ec3ccd3ebbe80eeef3454c3780e5c598 && pip install ansible==2.2.0 + #- test "${UPGRADE_TEST}" != "false" && git fetch --all && git checkout $(git describe --tags $(git rev-list --tags --max-count=1)) + - test "${UPGRADE_TEST}" != "false" && git checkout 031cf565ec3ccd3ebbe80eeef3454c3780e5c598 && pip install ansible==2.2.0 # Create cluster @@ -127,9 +127,10 @@ before_script: cluster.yml # Repeat deployment if testing upgrade - #FIXME(mattymo): repeat "Create cluster" above without duplicating code - > - if [ "${UPGRADE_TEST}" = "true" ]; then + if [ "${UPGRADE_TEST}" != "false" ]; then + test "${UPGRADE_TEST}" == "basic" && PLAYBOOK="cluster.yml"; + test "${UPGRADE_TEST}" == "graceful" && PLAYBOOK="upgrade-cluster.yml"; pip install ansible==2.2.1.0; git checkout "${CI_BUILD_REF}"; ansible-playbook -i inventory/inventory.ini -b --become-user=root --private-key=${HOME}/.ssh/id_rsa -u $SSH_USER @@ -149,7 +150,7 @@ before_script: -e resolvconf_mode=${RESOLVCONF_MODE} -e weave_cpu_requests=${WEAVE_CPU_LIMIT} -e weave_cpu_limit=${WEAVE_CPU_LIMIT} - cluster.yml; + $PLAYBOOK; fi # Tests Cases @@ -253,7 +254,7 @@ before_script: KUBE_NETWORK_PLUGIN: canal CLOUD_IMAGE: debian-8-kubespray CLOUD_REGION: us-east1-b - UPGRADE_TEST: "true" + UPGRADE_TEST: "basic" CLUSTER_MODE: ha .rhel7_weave_variables: &rhel7_weave_variables @@ -261,7 +262,7 @@ before_script: KUBE_NETWORK_PLUGIN: weave CLOUD_IMAGE: rhel-7 CLOUD_REGION: europe-west1-b - UPGRADE_TEST: "true" + UPGRADE_TEST: "graceful" CLUSTER_MODE: default .centos7_flannel_variables: ¢os7_flannel_variables diff --git a/docs/upgrades.md b/docs/upgrades.md index 4a25bd622..9a57f43ac 100644 --- a/docs/upgrades.md +++ b/docs/upgrades.md @@ -18,7 +18,7 @@ versions. Here are all version vars for each component: * flannel_version * kubedns_version -#### Example +#### Unsafe upgrade example If you wanted to upgrade just kube_version from v1.4.3 to v1.4.6, you could deploy the following way: @@ -33,6 +33,20 @@ And then repeat with v1.4.6 as kube_version: ansible-playbook cluster.yml -i inventory/inventory.cfg -e kube_version=v1.4.6 ``` +#### Graceful upgrade + +Kargo also supports cordon, drain and uncordoning of nodes when performing +a cluster upgrade. There is a separate playbook used for this purpose. It is +important to note that upgrade-cluster.yml can only be used for upgrading an +existing cluster. That means there must be at least 1 kube-master already +deployed. + +``` +git fetch origin +git checkout origin/master +ansible-playbook upgrade-cluster cluster.yml -i inventory/inventory.cfg +``` + #### Upgrade order As mentioned above, components are upgraded in the order in which they were diff --git a/roles/upgrade/post-upgrade/tasks/main.yml b/roles/upgrade/post-upgrade/tasks/main.yml index b576b0947..d8243d04d 100644 --- a/roles/upgrade/post-upgrade/tasks/main.yml +++ b/roles/upgrade/post-upgrade/tasks/main.yml @@ -1,5 +1,5 @@ --- - name: Uncordon node - command: kubectl uncordon {{ ansible_hostname }} + command: "{{ bin_dir }}/kubectl uncordon {{ ansible_hostname }}" delegate_to: "{{ groups['kube-master'][0] }}" diff --git a/roles/upgrade/pre-upgrade/tasks/main.yml b/roles/upgrade/pre-upgrade/tasks/main.yml index 390e9e559..90b535d2e 100644 --- a/roles/upgrade/pre-upgrade/tasks/main.yml +++ b/roles/upgrade/pre-upgrade/tasks/main.yml @@ -1,11 +1,11 @@ --- - name: Cordon node - command: kubectl cordon {{ ansible_hostname }} + command: "{{ bin_dir }}/kubectl cordon {{ ansible_hostname }}" delegate_to: "{{ groups['kube-master'][0] }}" - name: Drain node - command: kubectl drain --force --ignore-daemonsets --grace-period 30 --delete-local-data {{ ansible_hostname }} + command: "{{ bin_dir }}/kubectl drain --force --ignore-daemonsets --grace-period 30 --delete-local-data {{ ansible_hostname }}" delegate_to: "{{ groups['kube-master'][0] }}" - name: Sleep for grace period for draining diff --git a/upgrade-cluster.yml b/upgrade-cluster.yml index d7089fc49..1be9c9cab 100644 --- a/upgrade-cluster.yml +++ b/upgrade-cluster.yml @@ -1,33 +1,92 @@ --- -- hosts: all +- hosts: localhost + gather_facts: False + roles: + - bastion-ssh-config + tags: [localhost, bastion] + +- hosts: k8s-cluster:etcd:calico-rr any_errors_fatal: true + gather_facts: false + vars: + # Need to disable pipelining for bootstrap-os as some systems have requiretty in sudoers set, which makes pipelining + # fail. bootstrap-os fixes this on these systems, so in later plays it can be enabled. + ansible_ssh_pipelining: false + roles: + - bootstrap-os + tags: + - bootstrap-os + +- hosts: k8s-cluster:etcd:calico-rr + any_errors_fatal: true + vars: + ansible_ssh_pipelining: true gather_facts: true -- hosts: all:!network-storage +- hosts: k8s-cluster:etcd:calico-rr any_errors_fatal: true roles: + - { role: kernel-upgrade, tags: kernel-upgrade, when: kernel_upgrade is defined and kernel_upgrade } - { role: kubernetes/preinstall, tags: preinstall } + - { role: docker, tags: docker } + - role: rkt + tags: rkt + when: "'rkt' in [etcd_deployment_type, kubelet_deployment_type, vault_deployment_type]" + +- hosts: etcd:k8s-cluster:vault + any_errors_fatal: true + roles: + - { role: vault, tags: vault, vault_bootstrap: true, when: "cert_management == 'vault'" } - hosts: etcd:!k8s-cluster any_errors_fatal: true - serial: 1 roles: - { role: etcd, tags: etcd } -- hosts: kube-node +- hosts: k8s-cluster any_errors_fatal: true - serial: 1 roles: - { role: etcd, tags: etcd } - - { role: upgrade/pre-upgrade, tags: upgrade/pre-upgrade } - - { role: kubernetes/node, tags: node } - - { role: network_plugin, tags: network } - - { role: upgrade/post-upgrade, tags: upgrade/post-upgrade } +- hosts: etcd:k8s-cluster:vault + any_errors_fatal: true + roles: + - { role: vault, tags: vault, when: "cert_management == 'vault'"} + +#Handle upgrades to master components first to maintain backwards compat. - hosts: kube-master any_errors_fatal: true serial: 1 roles: - - { role: etcd, tags: etcd } + - { role: upgrade/pre-upgrade, tags: pre-upgrade } - { role: kubernetes/node, tags: node } - - { role: kubernetes/master, tags: master } \ No newline at end of file + - { role: kubernetes/master, tags: master } + - { role: network_plugin, tags: network } + - { role: upgrade/post-upgrade, tags: post-upgrade } + +#Finally handle worker upgrades, based on given batch size +- hosts: kube-node:!kube-master + any_errors_fatal: true + serial: "{{ serial | default('20%') }}" + roles: + - { role: upgrade/pre-upgrade, tags: pre-upgrade } + - { role: kubernetes/node, tags: node } + - { role: network_plugin, tags: network } + - { role: upgrade/post-upgrade, tags: post-upgrade } + - { role: kubernetes-apps/network_plugin, tags: network } + +- hosts: calico-rr + any_errors_fatal: true + roles: + - { role: network_plugin/calico/rr, tags: network } + +- hosts: k8s-cluster + any_errors_fatal: true + roles: + - { role: dnsmasq, when: "dns_mode == 'dnsmasq_kubedns'", tags: dnsmasq } + - { role: kubernetes/preinstall, when: "dns_mode != 'none' and resolvconf_mode == 'host_resolvconf'", tags: resolvconf } + +- hosts: kube-master[0] + any_errors_fatal: true + roles: + - { role: kubernetes-apps, tags: apps } From 617edda9ba2a27c40f14e167dd4ca447a9f73cda Mon Sep 17 00:00:00 2001 From: Matthew Mosesohn Date: Thu, 16 Feb 2017 18:12:54 +0300 Subject: [PATCH 4/5] Adjust weave daemonset for serial deployment --- .gitlab-ci.yml | 5 +++-- .../kubernetes-apps/network_plugin/weave/tasks/main.yml | 9 +++------ 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 305b69575..a112245f0 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -262,7 +262,6 @@ before_script: KUBE_NETWORK_PLUGIN: weave CLOUD_IMAGE: rhel-7 CLOUD_REGION: europe-west1-b - UPGRADE_TEST: "graceful" CLUSTER_MODE: default .centos7_flannel_variables: ¢os7_flannel_variables @@ -278,6 +277,7 @@ before_script: CLOUD_IMAGE: debian-8-kubespray CLOUD_REGION: us-central1-b CLUSTER_MODE: default + UPGRADE_TEST: "graceful" .coreos_canal_variables: &coreos_canal_variables # stage: deploy-gce-part2 @@ -328,6 +328,7 @@ before_script: CLUSTER_MODE: separate ETCD_DEPLOYMENT: rkt KUBELET_DEPLOYMENT: rkt + UPGRADE_TEST: "graceful" .ubuntu_vault_sep_variables: &ubuntu_vault_sep_variables # stage: deploy-gce-part1 @@ -540,7 +541,7 @@ coreos-alpha-weave-ha: except: ['triggers'] only: ['master', /^pr-.*$/] -ubuntu-rkt-sep: +ubuntu-rkt-sep-upgrade: stage: deploy-gce-part1 <<: *job <<: *gce diff --git a/roles/kubernetes-apps/network_plugin/weave/tasks/main.yml b/roles/kubernetes-apps/network_plugin/weave/tasks/main.yml index 1c216fd92..93be1602b 100644 --- a/roles/kubernetes-apps/network_plugin/weave/tasks/main.yml +++ b/roles/kubernetes-apps/network_plugin/weave/tasks/main.yml @@ -1,6 +1,5 @@ #FIXME: remove if kubernetes/features#124 is implemented - name: Weave | Purge old weave daemonset - run_once: true kube: name: "weave-net" kubectl: "{{ bin_dir }}/kubectl" @@ -12,7 +11,6 @@ - name: Weave | Start Resources - run_once: true kube: name: "weave-net" kubectl: "{{ bin_dir }}/kubectl" @@ -21,17 +19,16 @@ namespace: "{{system_namespace}}" state: "{{ item | ternary('latest','present') }}" with_items: "{{ weave_manifest.changed }}" - delegate_to: "{{groups['kube-master'][0]}}" + when: inventory_hostname == groups['kube-master'][0] - name: "Weave | wait for weave to become available" uri: url: http://127.0.0.1:6784/status return_content: yes - run_once: true register: weave_status - retries: 12 + retries: 180 delay: 10 until: "{{ weave_status.status == 200 and 'Status: ready' in weave_status.content }}" - delegate_to: "{{groups['kube-master'][0]}}" + when: inventory_hostname == groups['kube-master'][0] From a510e7b8f3f2e57c1a16f41e318fcd619d7200ed Mon Sep 17 00:00:00 2001 From: Matthew Mosesohn Date: Fri, 17 Feb 2017 14:54:00 +0300 Subject: [PATCH 5/5] Use gce hostname as inventory name Calico does not allow renaming hosts --- .gitlab-ci.yml | 3 +-- tests/templates/inventory-gce.j2 | 39 +++++++++++++++++--------------- 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index a112245f0..60c76f206 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -328,7 +328,6 @@ before_script: CLUSTER_MODE: separate ETCD_DEPLOYMENT: rkt KUBELET_DEPLOYMENT: rkt - UPGRADE_TEST: "graceful" .ubuntu_vault_sep_variables: &ubuntu_vault_sep_variables # stage: deploy-gce-part1 @@ -445,7 +444,7 @@ rhel7-weave-triggers: when: on_success only: ['triggers'] -debian8-calico: +debian8-calico-upgrade: stage: deploy-gce-part2 <<: *job <<: *gce diff --git a/tests/templates/inventory-gce.j2 b/tests/templates/inventory-gce.j2 index 015bdb6a4..f5326229c 100644 --- a/tests/templates/inventory-gce.j2 +++ b/tests/templates/inventory-gce.j2 @@ -1,48 +1,51 @@ -node1 ansible_ssh_host={{gce.instance_data[0].public_ip}} -node2 ansible_ssh_host={{gce.instance_data[1].public_ip}} +{% set node1 = gce.instance_data[0].name %} +{% set node2 = gce.instance_data[1].name %} +{{node1}} ansible_ssh_host={{gce.instance_data[0].public_ip}} +{{node2}} ansible_ssh_host={{gce.instance_data[1].public_ip}} {% if mode is defined and mode in ["separate", "ha"] %} -node3 ansible_ssh_host={{gce.instance_data[2].public_ip}} +{% set node3 = gce.instance_data[2].name %} +{{node3}} ansible_ssh_host={{gce.instance_data[2].public_ip}} {% endif %} {% if mode is defined and mode == "separate" %} [kube-master] -node1 +{{node1}} [kube-node] -node2 +{{node2}} [etcd] -node3 +{{node3}} [vault] -node3 +{{node3}} {% elif mode is defined and mode == "ha" %} [kube-master] -node1 -node2 +{{node1}} +{{node2}} [kube-node] -node3 +{{node3}} [etcd] -node2 -node3 +{{node2}} +{{node3}} [vault] -node2 -node3 +{{node2}} +{{node3}} {% else %} [kube-master] -node1 +{{node1}} [kube-node] -node2 +{{node2}} [etcd] -node1 +{{node1}} [vault] -node1 +{{node1}} {% endif %} [k8s-cluster:children]