From 6593adbea6889aff45e6efb74420f15b8818f2ad Mon Sep 17 00:00:00 2001 From: MichaelFisher1997 Date: Sun, 1 Mar 2026 18:28:17 +0000 Subject: [PATCH] fix: make CSI rollout checks configurable and faster --- ansible/roles/csi/defaults/main.yml | 4 ++++ ansible/roles/csi/tasks/main.yml | 24 +++++++++++++++++------- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/ansible/roles/csi/defaults/main.yml b/ansible/roles/csi/defaults/main.yml index bae3050..77fa931 100644 --- a/ansible/roles/csi/defaults/main.yml +++ b/ansible/roles/csi/defaults/main.yml @@ -2,3 +2,7 @@ hcloud_token: "" cluster_name: "k8s-cluster" csi_manifest_url: "https://raw.githubusercontent.com/hetznercloud/csi-driver/v2.12.0/deploy/kubernetes/hcloud-csi.yml" +csi_rollout_timeout_seconds: 30 +csi_rollout_retries: 8 +csi_rollout_delay_seconds: 5 +csi_failure_log_tail_lines: 120 diff --git a/ansible/roles/csi/tasks/main.yml b/ansible/roles/csi/tasks/main.yml index 430c3e8..0884901 100644 --- a/ansible/roles/csi/tasks/main.yml +++ b/ansible/roles/csi/tasks/main.yml @@ -14,11 +14,11 @@ changed_when: true - name: Wait for CSI controller rollout - command: kubectl rollout status deployment/hcloud-csi-controller -n kube-system --timeout=30s + command: kubectl rollout status deployment/hcloud-csi-controller -n kube-system --timeout={{ csi_rollout_timeout_seconds }}s register: csi_controller_rollout until: csi_controller_rollout.rc == 0 - retries: 30 - delay: 10 + retries: "{{ csi_rollout_retries | int }}" + delay: "{{ csi_rollout_delay_seconds | int }}" changed_when: false - name: Show CSI controller status on failure @@ -36,12 +36,19 @@ when: csi_controller_rollout is failed - name: Show CSI driver logs on failure - command: kubectl -n kube-system logs deployment/hcloud-csi-controller -c hcloud-csi-driver --tail=120 + command: kubectl -n kube-system logs deployment/hcloud-csi-controller -c hcloud-csi-driver --tail={{ csi_failure_log_tail_lines }} register: csi_driver_logs changed_when: false failed_when: false when: csi_controller_rollout is failed +- name: Show recent kube-system events on failure + command: kubectl -n kube-system get events --sort-by=.lastTimestamp + register: csi_recent_events + changed_when: false + failed_when: false + when: csi_controller_rollout is failed + - name: Fail with CSI controller diagnostics fail: msg: | @@ -54,12 +61,15 @@ hcloud-csi-driver logs: {{ csi_driver_logs.stdout | default('n/a') }} + + Recent kube-system events: + {{ csi_recent_events.stdout | default('n/a') }} when: csi_controller_rollout is failed - name: Wait for CSI node daemonset rollout - command: kubectl rollout status daemonset/hcloud-csi-node -n kube-system --timeout=30s + command: kubectl rollout status daemonset/hcloud-csi-node -n kube-system --timeout={{ csi_rollout_timeout_seconds }}s register: csi_node_rollout until: csi_node_rollout.rc == 0 - retries: 30 - delay: 10 + retries: "{{ csi_rollout_retries | int }}" + delay: "{{ csi_rollout_delay_seconds | int }}" changed_when: false