fix: make CSI rollout checks configurable and faster
Some checks failed
Deploy Cluster / Terraform (push) Successful in 43s
Deploy Cluster / Ansible (push) Failing after 8m19s

This commit is contained in:
2026-03-01 18:28:17 +00:00
parent 712f4e8f82
commit 6593adbea6
2 changed files with 21 additions and 7 deletions

View File

@@ -2,3 +2,7 @@
hcloud_token: ""
cluster_name: "k8s-cluster"
csi_manifest_url: "https://raw.githubusercontent.com/hetznercloud/csi-driver/v2.12.0/deploy/kubernetes/hcloud-csi.yml"
csi_rollout_timeout_seconds: 30
csi_rollout_retries: 8
csi_rollout_delay_seconds: 5
csi_failure_log_tail_lines: 120

View File

@@ -14,11 +14,11 @@
changed_when: true
- name: Wait for CSI controller rollout
command: kubectl rollout status deployment/hcloud-csi-controller -n kube-system --timeout=30s
command: kubectl rollout status deployment/hcloud-csi-controller -n kube-system --timeout={{ csi_rollout_timeout_seconds }}s
register: csi_controller_rollout
until: csi_controller_rollout.rc == 0
retries: 30
delay: 10
retries: "{{ csi_rollout_retries | int }}"
delay: "{{ csi_rollout_delay_seconds | int }}"
changed_when: false
- name: Show CSI controller status on failure
@@ -36,12 +36,19 @@
when: csi_controller_rollout is failed
- name: Show CSI driver logs on failure
command: kubectl -n kube-system logs deployment/hcloud-csi-controller -c hcloud-csi-driver --tail=120
command: kubectl -n kube-system logs deployment/hcloud-csi-controller -c hcloud-csi-driver --tail={{ csi_failure_log_tail_lines }}
register: csi_driver_logs
changed_when: false
failed_when: false
when: csi_controller_rollout is failed
- name: Show recent kube-system events on failure
command: kubectl -n kube-system get events --sort-by=.lastTimestamp
register: csi_recent_events
changed_when: false
failed_when: false
when: csi_controller_rollout is failed
- name: Fail with CSI controller diagnostics
fail:
msg: |
@@ -54,12 +61,15 @@
hcloud-csi-driver logs:
{{ csi_driver_logs.stdout | default('n/a') }}
Recent kube-system events:
{{ csi_recent_events.stdout | default('n/a') }}
when: csi_controller_rollout is failed
- name: Wait for CSI node daemonset rollout
command: kubectl rollout status daemonset/hcloud-csi-node -n kube-system --timeout=30s
command: kubectl rollout status daemonset/hcloud-csi-node -n kube-system --timeout={{ csi_rollout_timeout_seconds }}s
register: csi_node_rollout
until: csi_node_rollout.rc == 0
retries: 30
delay: 10
retries: "{{ csi_rollout_retries | int }}"
delay: "{{ csi_rollout_delay_seconds | int }}"
changed_when: false