fix: make CSI rollout checks configurable and faster
This commit is contained in:
@@ -2,3 +2,7 @@
|
|||||||
hcloud_token: ""
|
hcloud_token: ""
|
||||||
cluster_name: "k8s-cluster"
|
cluster_name: "k8s-cluster"
|
||||||
csi_manifest_url: "https://raw.githubusercontent.com/hetznercloud/csi-driver/v2.12.0/deploy/kubernetes/hcloud-csi.yml"
|
csi_manifest_url: "https://raw.githubusercontent.com/hetznercloud/csi-driver/v2.12.0/deploy/kubernetes/hcloud-csi.yml"
|
||||||
|
csi_rollout_timeout_seconds: 30
|
||||||
|
csi_rollout_retries: 8
|
||||||
|
csi_rollout_delay_seconds: 5
|
||||||
|
csi_failure_log_tail_lines: 120
|
||||||
|
|||||||
@@ -14,11 +14,11 @@
|
|||||||
changed_when: true
|
changed_when: true
|
||||||
|
|
||||||
- name: Wait for CSI controller rollout
|
- name: Wait for CSI controller rollout
|
||||||
command: kubectl rollout status deployment/hcloud-csi-controller -n kube-system --timeout=30s
|
command: kubectl rollout status deployment/hcloud-csi-controller -n kube-system --timeout={{ csi_rollout_timeout_seconds }}s
|
||||||
register: csi_controller_rollout
|
register: csi_controller_rollout
|
||||||
until: csi_controller_rollout.rc == 0
|
until: csi_controller_rollout.rc == 0
|
||||||
retries: 30
|
retries: "{{ csi_rollout_retries | int }}"
|
||||||
delay: 10
|
delay: "{{ csi_rollout_delay_seconds | int }}"
|
||||||
changed_when: false
|
changed_when: false
|
||||||
|
|
||||||
- name: Show CSI controller status on failure
|
- name: Show CSI controller status on failure
|
||||||
@@ -36,12 +36,19 @@
|
|||||||
when: csi_controller_rollout is failed
|
when: csi_controller_rollout is failed
|
||||||
|
|
||||||
- name: Show CSI driver logs on failure
|
- name: Show CSI driver logs on failure
|
||||||
command: kubectl -n kube-system logs deployment/hcloud-csi-controller -c hcloud-csi-driver --tail=120
|
command: kubectl -n kube-system logs deployment/hcloud-csi-controller -c hcloud-csi-driver --tail={{ csi_failure_log_tail_lines }}
|
||||||
register: csi_driver_logs
|
register: csi_driver_logs
|
||||||
changed_when: false
|
changed_when: false
|
||||||
failed_when: false
|
failed_when: false
|
||||||
when: csi_controller_rollout is failed
|
when: csi_controller_rollout is failed
|
||||||
|
|
||||||
|
- name: Show recent kube-system events on failure
|
||||||
|
command: kubectl -n kube-system get events --sort-by=.lastTimestamp
|
||||||
|
register: csi_recent_events
|
||||||
|
changed_when: false
|
||||||
|
failed_when: false
|
||||||
|
when: csi_controller_rollout is failed
|
||||||
|
|
||||||
- name: Fail with CSI controller diagnostics
|
- name: Fail with CSI controller diagnostics
|
||||||
fail:
|
fail:
|
||||||
msg: |
|
msg: |
|
||||||
@@ -54,12 +61,15 @@
|
|||||||
|
|
||||||
hcloud-csi-driver logs:
|
hcloud-csi-driver logs:
|
||||||
{{ csi_driver_logs.stdout | default('n/a') }}
|
{{ csi_driver_logs.stdout | default('n/a') }}
|
||||||
|
|
||||||
|
Recent kube-system events:
|
||||||
|
{{ csi_recent_events.stdout | default('n/a') }}
|
||||||
when: csi_controller_rollout is failed
|
when: csi_controller_rollout is failed
|
||||||
|
|
||||||
- name: Wait for CSI node daemonset rollout
|
- name: Wait for CSI node daemonset rollout
|
||||||
command: kubectl rollout status daemonset/hcloud-csi-node -n kube-system --timeout=30s
|
command: kubectl rollout status daemonset/hcloud-csi-node -n kube-system --timeout={{ csi_rollout_timeout_seconds }}s
|
||||||
register: csi_node_rollout
|
register: csi_node_rollout
|
||||||
until: csi_node_rollout.rc == 0
|
until: csi_node_rollout.rc == 0
|
||||||
retries: 30
|
retries: "{{ csi_rollout_retries | int }}"
|
||||||
delay: 10
|
delay: "{{ csi_rollout_delay_seconds | int }}"
|
||||||
changed_when: false
|
changed_when: false
|
||||||
|
|||||||
Reference in New Issue
Block a user