kubespray/scripts/collect-info.yaml

154 lines
6.0 KiB
YAML
Raw Normal View History

---
- name: Collect debug info
hosts: all
become: true
gather_facts: no
vars:
docker_bin_dir: /usr/bin
bin_dir: /usr/local/bin
ansible_ssh_pipelining: true
etcd_cert_dir: /etc/ssl/etcd/ssl
kube_network_plugin: calico
archive_dirname: collect-info
commands:
- name: timedate_info
cmd: timedatectl status
- name: kernel_info
cmd: uname -r
- name: docker_info
cmd: "{{ docker_bin_dir }}/docker info"
- name: ip_info
cmd: ip -4 -o a
- name: route_info
cmd: ip ro
- name: proc_info
cmd: ps auxf | grep -v ]$
- name: systemctl_failed_info
cmd: systemctl --state=failed --no-pager
- name: k8s_info
cmd: "{{ bin_dir }}/kubectl get all --all-namespaces -o wide"
- name: errors_info
cmd: journalctl -p err --no-pager
- name: etcd_info
2020-07-20 22:26:51 +08:00
cmd: "{{ bin_dir }}/etcdctl endpoint --cluster health"
- name: calico_info
cmd: "{{ bin_dir }}/calicoctl node status"
when: '{{ kube_network_plugin == "calico" }}'
- name: calico_workload_info
cmd: "{{ bin_dir }}/calicoctl get workloadEndpoint -o wide"
when: '{{ kube_network_plugin == "calico" }}'
- name: calico_pool_info
cmd: "{{ bin_dir }}/calicoctl get ippool -o wide"
when: '{{ kube_network_plugin == "calico" }}'
- name: weave_info
cmd: weave report
when: '{{ kube_network_plugin == "weave" }}'
- name: weave_logs
cmd: "{{ docker_bin_dir }}/docker logs weave"
when: '{{ kube_network_plugin == "weave" }}'
- name: kube_describe_all
cmd: "{{ bin_dir }}/kubectl describe all --all-namespaces"
- name: kube_describe_nodes
cmd: "{{ bin_dir }}/kubectl describe nodes"
- name: kubelet_logs
cmd: journalctl -u kubelet --no-pager
- name: coredns_logs
cmd: "for i in `{{ bin_dir }}/kubectl get pods -n kube-system -l k8s-app=coredns -o jsonpath={.items..metadata.name}`;
do {{ bin_dir }}/kubectl logs ${i} -n kube-system; done"
- name: apiserver_logs
cmd: "for i in `{{ bin_dir }}/kubectl get pods -n kube-system -l component=kube-apiserver -o jsonpath={.items..metadata.name}`;
do {{ bin_dir }}/kubectl logs ${i} -n kube-system; done"
- name: controller_logs
cmd: "for i in `{{ bin_dir }}/kubectl get pods -n kube-system -l component=kube-controller-manager -o jsonpath={.items..metadata.name}`;
do {{ bin_dir }}/kubectl logs ${i} -n kube-system; done"
- name: scheduler_logs
cmd: "for i in `{{ bin_dir }}/kubectl get pods -n kube-system -l component=kube-scheduler -o jsonpath={.items..metadata.name}`;
do {{ bin_dir }}/kubectl logs ${i} -n kube-system; done"
- name: proxy_logs
cmd: "for i in `{{ bin_dir }}/kubectl get pods -n kube-system -l k8s-app=kube-proxy -o jsonpath={.items..metadata.name}`;
do {{ bin_dir }}/kubectl logs ${i} -n kube-system; done"
- name: nginx_logs
cmd: "for i in `{{ bin_dir }}/kubectl get pods -n kube-system -l k8s-app=kube-nginx -o jsonpath={.items..metadata.name}`;
do {{ bin_dir }}/kubectl logs ${i} -n kube-system; done"
- name: flannel_logs
cmd: "for i in `{{ bin_dir }}/kubectl get pods -n kube-system -l app=flannel -o jsonpath={.items..metadata.name}`;
do {{ bin_dir }}/kubectl logs ${i} -n kube-system flannel-container; done"
when: '{{ kube_network_plugin == "flannel" }}'
- name: canal_logs
cmd: "for i in `{{ bin_dir }}/kubectl get pods -n kube-system -l k8s-app=canal-node -o jsonpath={.items..metadata.name}`;
do {{ bin_dir }}/kubectl logs ${i} -n kube-system flannel; done"
when: '{{ kube_network_plugin == "canal" }}'
- name: calico_policy_logs
cmd: "for i in `{{ bin_dir }}/kubectl get pods -n kube-system -l k8s-app=calico-kube-controllers -o jsonpath={.items..metadata.name}`;
do {{ bin_dir }}/kubectl logs ${i} -n kube-system ; done"
when: '{{ kube_network_plugin in ["canal", "calico"] }}'
- name: helm_show_releases_history
cmd: "for i in `{{ bin_dir }}/helm list -q`; do {{ bin_dir }}/helm history ${i} --col-width=0; done"
when: "{{ helm_enabled | default(true) }}"
logs:
- /var/log/syslog
- /var/log/daemon.log
- /var/log/kern.log
- /var/log/dpkg.log
- /var/log/apt/history.log
- /var/log/yum.log
- /var/log/messages
- /var/log/dmesg
environment:
Upgrade ansible (#10190) * project: update all dependencies including ansible Upgrade to ansible 7.x and ansible-core 2.14.x. There seems to be issue with ansible 8/ansible-core 2.15 so we remain on those versions for now. It's quite a big bump already anyway. Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * tests: install aws galaxy collection Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * ansible-lint: disable various rules after ansible upgrade Temporarily disable a bunch of linting action following ansible upgrade. Those should be taken care of separately. Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: resolve deprecated-module ansible-lint error Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: resolve no-free-form ansible-lint error Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: resolve schema[meta] ansible-lint error Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: resolve schema[playbook] ansible-lint error Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: resolve schema[tasks] ansible-lint error Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: resolve risky-file-permissions ansible-lint error Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: resolve risky-shell-pipe ansible-lint error Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: remove deprecated warn args Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: use fqcn for non builtin tasks Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: resolve syntax-check[missing-file] for contrib playbook Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: use arithmetic inside jinja to fix ansible 6 upgrade Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> --------- Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch>
2023-06-26 18:15:45 +08:00
ETCDCTL_API: "3"
2020-07-20 22:26:51 +08:00
ETCDCTL_CERT: "{{ etcd_cert_dir }}/admin-{{ inventory_hostname }}.pem"
ETCDCTL_KEY: "{{ etcd_cert_dir }}/admin-{{ inventory_hostname }}-key.pem"
ETCDCTL_CACERT: "{{ etcd_cert_dir }}/ca.pem"
ETCDCTL_ENDPOINTS: "{{ etcd_access_addresses }}"
tasks:
- name: Set etcd_access_addresses
2019-05-16 15:27:43 +08:00
set_fact:
etcd_access_addresses: |-
{% for item in groups['etcd'] -%}
https://{{ item }}:2379{% if not loop.last %},{% endif %}
{%- endfor %}
when: "'etcd' in groups"
2020-08-05 16:56:28 +08:00
- name: Storing commands output
shell: "{{ item.cmd }} &> {{ item.name }}"
failed_when: false
with_items: "{{ commands }}"
when: item.when | default(True)
no_log: True
- name: Fetch results
Upgrade ansible (#10190) * project: update all dependencies including ansible Upgrade to ansible 7.x and ansible-core 2.14.x. There seems to be issue with ansible 8/ansible-core 2.15 so we remain on those versions for now. It's quite a big bump already anyway. Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * tests: install aws galaxy collection Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * ansible-lint: disable various rules after ansible upgrade Temporarily disable a bunch of linting action following ansible upgrade. Those should be taken care of separately. Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: resolve deprecated-module ansible-lint error Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: resolve no-free-form ansible-lint error Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: resolve schema[meta] ansible-lint error Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: resolve schema[playbook] ansible-lint error Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: resolve schema[tasks] ansible-lint error Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: resolve risky-file-permissions ansible-lint error Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: resolve risky-shell-pipe ansible-lint error Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: remove deprecated warn args Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: use fqcn for non builtin tasks Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: resolve syntax-check[missing-file] for contrib playbook Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: use arithmetic inside jinja to fix ansible 6 upgrade Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> --------- Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch>
2023-06-26 18:15:45 +08:00
fetch:
src: "{{ item.name }}"
dest: "/tmp/{{ archive_dirname }}/commands"
with_items: "{{ commands }}"
when: item.when | default(True)
failed_when: false
- name: Fetch logs
Upgrade ansible (#10190) * project: update all dependencies including ansible Upgrade to ansible 7.x and ansible-core 2.14.x. There seems to be issue with ansible 8/ansible-core 2.15 so we remain on those versions for now. It's quite a big bump already anyway. Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * tests: install aws galaxy collection Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * ansible-lint: disable various rules after ansible upgrade Temporarily disable a bunch of linting action following ansible upgrade. Those should be taken care of separately. Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: resolve deprecated-module ansible-lint error Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: resolve no-free-form ansible-lint error Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: resolve schema[meta] ansible-lint error Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: resolve schema[playbook] ansible-lint error Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: resolve schema[tasks] ansible-lint error Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: resolve risky-file-permissions ansible-lint error Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: resolve risky-shell-pipe ansible-lint error Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: remove deprecated warn args Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: use fqcn for non builtin tasks Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: resolve syntax-check[missing-file] for contrib playbook Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: use arithmetic inside jinja to fix ansible 6 upgrade Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> --------- Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch>
2023-06-26 18:15:45 +08:00
fetch:
src: "{{ item }}"
dest: "/tmp/{{ archive_dirname }}/logs"
with_items: "{{ logs }}"
failed_when: false
- name: Pack results and logs
Upgrade ansible (#10190) * project: update all dependencies including ansible Upgrade to ansible 7.x and ansible-core 2.14.x. There seems to be issue with ansible 8/ansible-core 2.15 so we remain on those versions for now. It's quite a big bump already anyway. Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * tests: install aws galaxy collection Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * ansible-lint: disable various rules after ansible upgrade Temporarily disable a bunch of linting action following ansible upgrade. Those should be taken care of separately. Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: resolve deprecated-module ansible-lint error Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: resolve no-free-form ansible-lint error Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: resolve schema[meta] ansible-lint error Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: resolve schema[playbook] ansible-lint error Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: resolve schema[tasks] ansible-lint error Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: resolve risky-file-permissions ansible-lint error Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: resolve risky-shell-pipe ansible-lint error Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: remove deprecated warn args Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: use fqcn for non builtin tasks Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: resolve syntax-check[missing-file] for contrib playbook Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: use arithmetic inside jinja to fix ansible 6 upgrade Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> --------- Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch>
2023-06-26 18:15:45 +08:00
community.general.archive:
path: "/tmp/{{ archive_dirname }}"
dest: "{{ dir | default('.') }}/logs.tar.gz"
2018-03-13 02:59:22 +08:00
remove: true
mode: 0640
2018-03-13 02:59:22 +08:00
delegate_to: localhost
connection: local
2018-03-13 02:59:22 +08:00
become: false
run_once: true
- name: Clean up collected command outputs
Upgrade ansible (#10190) * project: update all dependencies including ansible Upgrade to ansible 7.x and ansible-core 2.14.x. There seems to be issue with ansible 8/ansible-core 2.15 so we remain on those versions for now. It's quite a big bump already anyway. Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * tests: install aws galaxy collection Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * ansible-lint: disable various rules after ansible upgrade Temporarily disable a bunch of linting action following ansible upgrade. Those should be taken care of separately. Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: resolve deprecated-module ansible-lint error Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: resolve no-free-form ansible-lint error Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: resolve schema[meta] ansible-lint error Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: resolve schema[playbook] ansible-lint error Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: resolve schema[tasks] ansible-lint error Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: resolve risky-file-permissions ansible-lint error Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: resolve risky-shell-pipe ansible-lint error Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: remove deprecated warn args Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: use fqcn for non builtin tasks Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: resolve syntax-check[missing-file] for contrib playbook Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> * project: use arithmetic inside jinja to fix ansible 6 upgrade Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch> --------- Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@proton.ch>
2023-06-26 18:15:45 +08:00
file:
path: "{{ item.name }}"
state: absent
with_items: "{{ commands }}"