From fc598d6eb487eceba2065299effbdd9940d0e17e Mon Sep 17 00:00:00 2001 From: MichaelFisher1997 Date: Sun, 1 Mar 2026 22:59:11 +0000 Subject: [PATCH] chore: add deep CSI diagnostics for smoke test failures --- ansible/roles/csi/tasks/main.yml | 61 ++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/ansible/roles/csi/tasks/main.yml b/ansible/roles/csi/tasks/main.yml index 9e54dda..caca0db 100644 --- a/ansible/roles/csi/tasks/main.yml +++ b/ansible/roles/csi/tasks/main.yml @@ -286,6 +286,55 @@ - csi_smoke_test_enabled | bool - csi_smoke_pvc_wait.rc != 0 or (csi_smoke_job_wait.rc | default(1)) != 0 +- name: Get CSI controller pod name on smoke failure + shell: kubectl -n kube-system get pods -l app=hcloud-csi-controller -o jsonpath='{.items[0].metadata.name}' + register: csi_controller_pod_name + failed_when: false + changed_when: false + when: + - csi_smoke_test_enabled | bool + - csi_smoke_pvc_wait.rc != 0 or (csi_smoke_job_wait.rc | default(1)) != 0 + +- name: Describe CSI controller pod on smoke failure + command: kubectl -n kube-system describe pod {{ csi_controller_pod_name.stdout }} + register: csi_controller_pod_smoke_describe + failed_when: false + changed_when: false + when: + - csi_smoke_test_enabled | bool + - csi_smoke_pvc_wait.rc != 0 or (csi_smoke_job_wait.rc | default(1)) != 0 + - csi_controller_pod_name.stdout | length > 0 + +- name: Show CSI controller container logs on smoke failure + shell: | + pod="{{ csi_controller_pod_name.stdout }}" + for container in hcloud-csi-driver csi-provisioner csi-attacher csi-resizer liveness-probe; do + echo "===== ${container}: current =====" + kubectl -n kube-system logs "$pod" -c "$container" --tail={{ csi_failure_log_tail_lines }} || true + echo "===== ${container}: previous =====" + kubectl -n kube-system logs "$pod" -c "$container" --previous --tail={{ csi_failure_log_tail_lines }} || true + done + register: csi_controller_container_logs + failed_when: false + changed_when: false + when: + - csi_smoke_test_enabled | bool + - csi_smoke_pvc_wait.rc != 0 or (csi_smoke_job_wait.rc | default(1)) != 0 + - csi_controller_pod_name.stdout | length > 0 + +- name: Show CSI driver and node driver objects on smoke failure + shell: | + echo "===== CSIDriver =====" + kubectl get csidriver csi.hetzner.cloud -o yaml || true + echo "===== CSINode =====" + kubectl get csinode -o wide || true + register: csi_driver_objects + failed_when: false + changed_when: false + when: + - csi_smoke_test_enabled | bool + - csi_smoke_pvc_wait.rc != 0 or (csi_smoke_job_wait.rc | default(1)) != 0 + - name: Show CSI smoke pod describe on failure shell: | pod="$(kubectl -n kube-system get pods -l job-name={{ csi_smoke_test_job_name }} -o jsonpath='{.items[0].metadata.name}' 2>/dev/null)" @@ -326,6 +375,18 @@ Storage classes: {{ csi_storageclasses.stdout | default(csi_storageclasses.stderr | default('n/a')) }} + CSI controller pod: + {{ csi_controller_pod_name.stdout | default('n/a') }} + + CSI controller pod describe: + {{ csi_controller_pod_smoke_describe.stdout | default(csi_controller_pod_smoke_describe.stderr | default('n/a')) }} + + CSI controller container logs: + {{ csi_controller_container_logs.stdout | default(csi_controller_container_logs.stderr | default('n/a')) }} + + CSI driver objects: + {{ csi_driver_objects.stdout | default(csi_driver_objects.stderr | default('n/a')) }} + Pod describe: {{ csi_smoke_pod_describe.stdout | default('n/a') }}