From 4cefd7df40c80501b37377092aa2f8be34351832 Mon Sep 17 00:00:00 2001 From: MichaelFisher1997 Date: Sun, 1 Mar 2026 18:42:03 +0000 Subject: [PATCH] fix: surface CSI rollout diagnostics before failing --- ansible/roles/csi/tasks/main.yml | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/ansible/roles/csi/tasks/main.yml b/ansible/roles/csi/tasks/main.yml index 0884901..57c7ab3 100644 --- a/ansible/roles/csi/tasks/main.yml +++ b/ansible/roles/csi/tasks/main.yml @@ -19,6 +19,7 @@ until: csi_controller_rollout.rc == 0 retries: "{{ csi_rollout_retries | int }}" delay: "{{ csi_rollout_delay_seconds | int }}" + failed_when: false changed_when: false - name: Show CSI controller status on failure @@ -26,28 +27,35 @@ register: csi_controller_deploy_status changed_when: false failed_when: false - when: csi_controller_rollout is failed + when: csi_controller_rollout.rc != 0 - name: Show CSI controller pods on failure command: kubectl -n kube-system get pods -l app=hcloud-csi-controller -o wide register: csi_controller_pods_status changed_when: false failed_when: false - when: csi_controller_rollout is failed + when: csi_controller_rollout.rc != 0 + +- name: Describe CSI controller deployment on failure + command: kubectl -n kube-system describe deployment hcloud-csi-controller + register: csi_controller_deploy_describe + changed_when: false + failed_when: false + when: csi_controller_rollout.rc != 0 - name: Show CSI driver logs on failure command: kubectl -n kube-system logs deployment/hcloud-csi-controller -c hcloud-csi-driver --tail={{ csi_failure_log_tail_lines }} register: csi_driver_logs changed_when: false failed_when: false - when: csi_controller_rollout is failed + when: csi_controller_rollout.rc != 0 - name: Show recent kube-system events on failure command: kubectl -n kube-system get events --sort-by=.lastTimestamp register: csi_recent_events changed_when: false failed_when: false - when: csi_controller_rollout is failed + when: csi_controller_rollout.rc != 0 - name: Fail with CSI controller diagnostics fail: @@ -59,12 +67,15 @@ Pods status: {{ csi_controller_pods_status.stdout | default('n/a') }} + Deployment describe: + {{ csi_controller_deploy_describe.stdout | default('n/a') }} + hcloud-csi-driver logs: {{ csi_driver_logs.stdout | default('n/a') }} Recent kube-system events: {{ csi_recent_events.stdout | default('n/a') }} - when: csi_controller_rollout is failed + when: csi_controller_rollout.rc != 0 - name: Wait for CSI node daemonset rollout command: kubectl rollout status daemonset/hcloud-csi-node -n kube-system --timeout={{ csi_rollout_timeout_seconds }}s @@ -72,4 +83,10 @@ until: csi_node_rollout.rc == 0 retries: "{{ csi_rollout_retries | int }}" delay: "{{ csi_rollout_delay_seconds | int }}" + failed_when: false changed_when: false + +- name: Fail when CSI node daemonset rollout does not complete + fail: + msg: "CSI node daemonset rollout failed: {{ csi_node_rollout.stdout | default('') }} {{ csi_node_rollout.stderr | default('') }}" + when: csi_node_rollout.rc != 0