From 3311e0a296ce544816a3b1ba1f5bd7706f9871b0 Mon Sep 17 00:00:00 2001 From: Arthur Outhenin-Chalandre Date: Mon, 26 Jun 2023 09:57:08 +0200 Subject: [PATCH] tests: cleanup stale packet namespace automatically (#10245) * tests: cleanup stale packet namespace automatically Cancelled job on Gitlab can produce stale VMs as the delete playbook will never be executed. This commits allow removing old vms by getting all the namespace created from the same branch with an older pipeline id. Signed-off-by: Arthur Outhenin-Chalandre * tests: cleanup stale packet namespace after 2 hours This ensure that we don't have any packet namespace remaining for more than 2 hours. All the jobs complete usually within 30min-1hour so 2 hours is enough to detect a stale namespace. Signed-off-by: Arthur Outhenin-Chalandre * tests: ignore vm cleanup failure Signed-off-by: Arthur Outhenin-Chalandre * tests: use pipeline_id var instead of fetching namespace for cleanup packet vm Signed-off-by: Arthur Outhenin-Chalandre --------- Signed-off-by: Arthur Outhenin-Chalandre --- .gitlab-ci/packet.yml | 8 ++++++++ tests/Makefile | 8 ++++++++ tests/cloud_playbooks/cleanup-packet.yml | 7 +++++++ .../roles/cleanup-packet-ci/tasks/main.yml | 16 ++++++++++++++++ .../roles/packet-ci/tasks/cleanup-old-vms.yml | 17 +++++++++++++++++ .../roles/packet-ci/tasks/create-vms.yml | 4 +++- .../roles/packet-ci/tasks/main.yml | 2 ++ 7 files changed, 61 insertions(+), 1 deletion(-) create mode 100644 tests/cloud_playbooks/cleanup-packet.yml create mode 100644 tests/cloud_playbooks/roles/cleanup-packet-ci/tasks/main.yml create mode 100644 tests/cloud_playbooks/roles/packet-ci/tasks/cleanup-old-vms.yml diff --git a/.gitlab-ci/packet.yml b/.gitlab-ci/packet.yml index edf8ebcdb..b6246b6fa 100644 --- a/.gitlab-ci/packet.yml +++ b/.gitlab-ci/packet.yml @@ -23,6 +23,14 @@ allow_failure: true extends: .packet +packet_cleanup_old: + stage: deploy-part1 + extends: .packet_periodic + script: + - cd tests + - make cleanup-packet + after_script: [] + # The ubuntu20-calico-aio jobs are meant as early stages to prevent running the full CI if something is horribly broken packet_ubuntu20-calico-aio: stage: deploy-part1 diff --git a/tests/Makefile b/tests/Makefile index 787449e5b..c9f561eee 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -64,6 +64,8 @@ create-packet: init-packet $(ANSIBLE_LOG_LEVEL) \ -e @"files/${CI_JOB_NAME}.yml" \ -e test_id=$(TEST_ID) \ + -e branch="$(CI_COMMIT_BRANCH)" \ + -e pipeline_id="$(CI_PIPELINE_ID)" \ -e inventory_path=$(INVENTORY) delete-packet: @@ -71,8 +73,14 @@ delete-packet: $(ANSIBLE_LOG_LEVEL) \ -e @"files/${CI_JOB_NAME}.yml" \ -e test_id=$(TEST_ID) \ + -e branch="$(CI_COMMIT_BRANCH)" \ + -e pipeline_id="$(CI_PIPELINE_ID)" \ -e inventory_path=$(INVENTORY) +cleanup-packet: + ansible-playbook cloud_playbooks/cleanup-packet.yml -c local \ + $(ANSIBLE_LOG_LEVEL) + create-vagrant: vagrant up find / -name vagrant_ansible_inventory diff --git a/tests/cloud_playbooks/cleanup-packet.yml b/tests/cloud_playbooks/cleanup-packet.yml new file mode 100644 index 000000000..b709d6d0d --- /dev/null +++ b/tests/cloud_playbooks/cleanup-packet.yml @@ -0,0 +1,7 @@ +--- + +- hosts: localhost + gather_facts: no + become: true + roles: + - { role: cleanup-packet-ci } diff --git a/tests/cloud_playbooks/roles/cleanup-packet-ci/tasks/main.yml b/tests/cloud_playbooks/roles/cleanup-packet-ci/tasks/main.yml new file mode 100644 index 000000000..9256b2d54 --- /dev/null +++ b/tests/cloud_playbooks/roles/cleanup-packet-ci/tasks/main.yml @@ -0,0 +1,16 @@ +--- + +- name: Fetch a list of namespaces + kubernetes.core.k8s_info: + api_version: v1 + kind: Namespace + label_selectors: + - cijobs = true + register: namespaces + +- name: Delete stale namespaces for more than 2 hours + command: "kubectl delete namespace {{ item.metadata.name }}" + failed_when: false + loop: "{{ namespaces.resources }}" + when: + - (now() - (item.metadata.creationTimestamp | to_datetime("%Y-%m-%dT%H:%M:%SZ"))).total_seconds() >= 7200 diff --git a/tests/cloud_playbooks/roles/packet-ci/tasks/cleanup-old-vms.yml b/tests/cloud_playbooks/roles/packet-ci/tasks/cleanup-old-vms.yml new file mode 100644 index 000000000..cf81e81b5 --- /dev/null +++ b/tests/cloud_playbooks/roles/packet-ci/tasks/cleanup-old-vms.yml @@ -0,0 +1,17 @@ +--- + +- name: Fetch a list of namespaces + kubernetes.core.k8s_info: + api_version: v1 + kind: Namespace + label_selectors: + - cijobs = true + - branch = {{ branch }} + register: namespaces + +- name: Delete older namespaces + command: "kubectl delete namespace {{ item.metadata.name }}" + failed_when: false + loop: "{{ namespaces.resources }}" + when: + - (item.metadata.labels.pipeline_id | int) < (pipeline_id | int) diff --git a/tests/cloud_playbooks/roles/packet-ci/tasks/create-vms.yml b/tests/cloud_playbooks/roles/packet-ci/tasks/create-vms.yml index 4f0a66844..8ccf5adc5 100644 --- a/tests/cloud_playbooks/roles/packet-ci/tasks/create-vms.yml +++ b/tests/cloud_playbooks/roles/packet-ci/tasks/create-vms.yml @@ -1,7 +1,9 @@ --- - name: "Create CI namespace {{ test_name }} for test vms" - command: "kubectl create namespace {{ test_name }}" + shell: |- + kubectl create namespace {{ test_name }} && + kubectl label namespace {{ test_name }} cijobs=true branch="{{ branch }}" pipeline_id="{{ pipeline_id }}" changed_when: false - name: "Create temp dir /tmp/{{ test_name }} for CI files" diff --git a/tests/cloud_playbooks/roles/packet-ci/tasks/main.yml b/tests/cloud_playbooks/roles/packet-ci/tasks/main.yml index bf4e974e3..9d8e105db 100644 --- a/tests/cloud_playbooks/roles/packet-ci/tasks/main.yml +++ b/tests/cloud_playbooks/roles/packet-ci/tasks/main.yml @@ -7,6 +7,8 @@ set_fact: vm_count: "{%- if mode in ['separate', 'separate-scale', 'ha', 'ha-scale', 'ha-recover', 'ha-recover-noquorum'] -%}{{ 3|int }}{%- elif mode == 'aio' -%}{{ 1|int }}{%- else -%}{{ 2|int }}{%- endif -%}" +- import_tasks: cleanup-old-vms.yml + - import_tasks: create-vms.yml when: - not vm_cleanup