From 1069b05e68732bc54c8b4c68e991439f79d07978 Mon Sep 17 00:00:00 2001 From: forselli-stratio <41321889+forselli-stratio@users.noreply.github.com> Date: Mon, 7 Jun 2021 14:02:40 +0200 Subject: [PATCH] Improve scale flow and documentation (#7610) * Improve scale flow * Add confirmation prompt again --- docs/nodes.md | 12 +++++++++-- remove-node.yml | 21 +++++++++---------- .../remove-etcd-node/tasks/main.yml | 16 ++++++++------ 3 files changed, 30 insertions(+), 19 deletions(-) diff --git a/docs/nodes.md b/docs/nodes.md index 1e506f3c6..8bf58f9ed 100644 --- a/docs/nodes.md +++ b/docs/nodes.md @@ -106,7 +106,7 @@ You need to make sure there are always an odd number of etcd nodes in the cluste Update the inventory and run `cluster.yml` passing `--limit=etcd,kube_control_plane -e ignore_assert_errors=yes`. If the node you want to add as an etcd node is already a worker or master node in your cluster, you have to remove him first using `remove-node.yml`. -Run `upgrade-cluster.yml` also passing `--limit=etcd,kube_control_plane -e ignore_assert_errors=yes`. This is necessary to update all etcd configuration in the cluster. +Run `upgrade-cluster.yml` also passing `--limit=etcd,kube_control_plane -e ignore_assert_errors=yes`. This is necessary to update all etcd configuration in the cluster. At this point, you will have an even number of nodes. Everything should still be working, and you should only have problems if the cluster decides to elect a new etcd leader before you remove a node. @@ -115,6 +115,10 @@ Even so, running applications should continue to be available. If you add multiple ectd nodes with one run, you might want to append `-e etcd_retries=10` to increase the amount of retries between each ectd node join. Otherwise the etcd cluster might still be processing the first join and fail on subsequent nodes. `etcd_retries=10` might work to join 3 new nodes. +### 2) Add the new node to apiserver config + +In every master node, edit `/etc/kubernetes/manifests/kube-apiserver.yaml`. Make sure the new etcd nodes are present in the apiserver command line parameter `--etcd-servers=...`. + ## Removing an etcd node ### 1) Remove an old etcd node @@ -130,6 +134,10 @@ Remove `NODE_NAME` from your inventory file. Run `cluster.yml` to regenerate the configuration files on all remaining nodes. -### 4) Shutdown the old instance +### 4) Remove the old etcd node from apiserver config + +In every master node, edit `/etc/kubernetes/manifests/kube-apiserver.yaml`. Make sure only active etcd nodes are still present in the apiserver command line parameter `--etcd-servers=...`. + +### 5) Shutdown the old instance That's it. diff --git a/remove-node.yml b/remove-node.yml index ddf56614e..39203ad53 100644 --- a/remove-node.yml +++ b/remove-node.yml @@ -8,24 +8,25 @@ - hosts: "{{ node | default('etcd:k8s_cluster:calico_rr') }}" gather_facts: no environment: "{{ proxy_disable_env }}" - vars_prompt: - name: "delete_nodes_confirmation" - prompt: "Are you sure you want to delete nodes state? Type 'yes' to delete nodes." - default: "no" - private: no + tasks: + - name: Confirm Execution + pause: + prompt: "Are you sure you want to delete nodes state? Type 'yes' to delete nodes." + register: pause_result + run_once: True + when: + - not (skip_confirmation | default(false) | bool) - pre_tasks: - - name: check confirmation + - name: Fail if user does not confirm deletion fail: msg: "Delete nodes confirmation failed" - when: delete_nodes_confirmation != "yes" + when: pause_result.user_input | default('yes') != 'yes' - hosts: kube_control_plane[0] gather_facts: no environment: "{{ proxy_disable_env }}" roles: - { role: kubespray-defaults } - - { role: bootstrap-os, tags: bootstrap-os } - { role: remove-node/pre-remove, tags: pre-remove } - hosts: "{{ node | default('kube_node') }}" @@ -33,7 +34,6 @@ environment: "{{ proxy_disable_env }}" roles: - { role: kubespray-defaults, when: reset_nodes|default(True)|bool } - - { role: bootstrap-os, tags: bootstrap-os, when: reset_nodes|default(True)|bool } - { role: remove-node/remove-etcd-node } - { role: reset, tags: reset, when: reset_nodes|default(True)|bool } @@ -43,5 +43,4 @@ environment: "{{ proxy_disable_env }}" roles: - { role: kubespray-defaults, when: reset_nodes|default(True)|bool } - - { role: bootstrap-os, tags: bootstrap-os, when: reset_nodes|default(True)|bool } - { role: remove-node/post-remove, tags: post-remove } diff --git a/roles/remove-node/remove-etcd-node/tasks/main.yml b/roles/remove-node/remove-etcd-node/tasks/main.yml index 4d739729d..5c800044f 100644 --- a/roles/remove-node/remove-etcd-node/tasks/main.yml +++ b/roles/remove-node/remove-etcd-node/tasks/main.yml @@ -34,10 +34,12 @@ - facts environment: ETCDCTL_API: 3 - ETCDCTL_CERT: "{{ etcd_cert_dir }}/admin-{{ inventory_hostname }}.pem" - ETCDCTL_KEY: "{{ etcd_cert_dir }}/admin-{{ inventory_hostname }}-key.pem" + ETCDCTL_CERT: "{{ etcd_cert_dir }}/admin-{{ groups['etcd']|first }}.pem" + ETCDCTL_KEY: "{{ etcd_cert_dir }}/admin-{{ groups['etcd']|first }}-key.pem" ETCDCTL_CACERT: "{{ etcd_cert_dir }}/ca.pem" - ETCDCTL_ENDPOINTS: "{{ etcd_access_addresses }}" + ETCDCTL_ENDPOINTS: "https://{{ hostvars[groups['etcd']|first]['etcd_access_address'] | + default(hostvars[groups['etcd']|first]['ip']) | + default(hostvars[groups['etcd']|first]['fallback_ips'][groups['etcd']|first]) }}:2379" delegate_to: "{{ groups['etcd']|first }}" when: inventory_hostname in groups['etcd'] @@ -50,10 +52,12 @@ - facts environment: ETCDCTL_API: 3 - ETCDCTL_CERT: "{{ etcd_cert_dir }}/admin-{{ inventory_hostname }}.pem" - ETCDCTL_KEY: "{{ etcd_cert_dir }}/admin-{{ inventory_hostname }}-key.pem" + ETCDCTL_CERT: "{{ etcd_cert_dir }}/admin-{{ groups['etcd']|first }}.pem" + ETCDCTL_KEY: "{{ etcd_cert_dir }}/admin-{{ groups['etcd']|first }}-key.pem" ETCDCTL_CACERT: "{{ etcd_cert_dir }}/ca.pem" - ETCDCTL_ENDPOINTS: "{{ etcd_access_addresses }}" + ETCDCTL_ENDPOINTS: "https://{{ hostvars[groups['etcd']|first]['etcd_access_address'] | + default(hostvars[groups['etcd']|first]['ip']) | + default(hostvars[groups['etcd']|first]['fallback_ips'][groups['etcd']|first]) }}:2379" delegate_to: "{{ groups['etcd']|first }}" when: - inventory_hostname in groups['etcd']