fix: make CSI rollout checks configurable and faster
This commit is contained in:
@@ -2,3 +2,7 @@
|
||||
hcloud_token: ""
|
||||
cluster_name: "k8s-cluster"
|
||||
csi_manifest_url: "https://raw.githubusercontent.com/hetznercloud/csi-driver/v2.12.0/deploy/kubernetes/hcloud-csi.yml"
|
||||
csi_rollout_timeout_seconds: 30
|
||||
csi_rollout_retries: 8
|
||||
csi_rollout_delay_seconds: 5
|
||||
csi_failure_log_tail_lines: 120
|
||||
|
||||
@@ -14,11 +14,11 @@
|
||||
changed_when: true
|
||||
|
||||
- name: Wait for CSI controller rollout
|
||||
command: kubectl rollout status deployment/hcloud-csi-controller -n kube-system --timeout=30s
|
||||
command: kubectl rollout status deployment/hcloud-csi-controller -n kube-system --timeout={{ csi_rollout_timeout_seconds }}s
|
||||
register: csi_controller_rollout
|
||||
until: csi_controller_rollout.rc == 0
|
||||
retries: 30
|
||||
delay: 10
|
||||
retries: "{{ csi_rollout_retries | int }}"
|
||||
delay: "{{ csi_rollout_delay_seconds | int }}"
|
||||
changed_when: false
|
||||
|
||||
- name: Show CSI controller status on failure
|
||||
@@ -36,12 +36,19 @@
|
||||
when: csi_controller_rollout is failed
|
||||
|
||||
- name: Show CSI driver logs on failure
|
||||
command: kubectl -n kube-system logs deployment/hcloud-csi-controller -c hcloud-csi-driver --tail=120
|
||||
command: kubectl -n kube-system logs deployment/hcloud-csi-controller -c hcloud-csi-driver --tail={{ csi_failure_log_tail_lines }}
|
||||
register: csi_driver_logs
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
when: csi_controller_rollout is failed
|
||||
|
||||
- name: Show recent kube-system events on failure
|
||||
command: kubectl -n kube-system get events --sort-by=.lastTimestamp
|
||||
register: csi_recent_events
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
when: csi_controller_rollout is failed
|
||||
|
||||
- name: Fail with CSI controller diagnostics
|
||||
fail:
|
||||
msg: |
|
||||
@@ -54,12 +61,15 @@
|
||||
|
||||
hcloud-csi-driver logs:
|
||||
{{ csi_driver_logs.stdout | default('n/a') }}
|
||||
|
||||
Recent kube-system events:
|
||||
{{ csi_recent_events.stdout | default('n/a') }}
|
||||
when: csi_controller_rollout is failed
|
||||
|
||||
- name: Wait for CSI node daemonset rollout
|
||||
command: kubectl rollout status daemonset/hcloud-csi-node -n kube-system --timeout=30s
|
||||
command: kubectl rollout status daemonset/hcloud-csi-node -n kube-system --timeout={{ csi_rollout_timeout_seconds }}s
|
||||
register: csi_node_rollout
|
||||
until: csi_node_rollout.rc == 0
|
||||
retries: 30
|
||||
delay: 10
|
||||
retries: "{{ csi_rollout_retries | int }}"
|
||||
delay: "{{ csi_rollout_delay_seconds | int }}"
|
||||
changed_when: false
|
||||
|
||||
Reference in New Issue
Block a user