fix: bound CSI rollout wait time and add failure diagnostics
This commit is contained in:
@@ -14,15 +14,50 @@
|
||||
changed_when: true
|
||||
|
||||
- name: Wait for CSI controller rollout
|
||||
command: kubectl rollout status deployment/hcloud-csi-controller -n kube-system
|
||||
command: kubectl rollout status deployment/hcloud-csi-controller -n kube-system --timeout=30s
|
||||
register: csi_controller_rollout
|
||||
until: csi_controller_rollout.rc == 0
|
||||
retries: 30
|
||||
delay: 10
|
||||
changed_when: false
|
||||
|
||||
- name: Show CSI controller status on failure
|
||||
command: kubectl -n kube-system get deployment hcloud-csi-controller -o wide
|
||||
register: csi_controller_deploy_status
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
when: csi_controller_rollout is failed
|
||||
|
||||
- name: Show CSI controller pods on failure
|
||||
command: kubectl -n kube-system get pods -l app=hcloud-csi-controller -o wide
|
||||
register: csi_controller_pods_status
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
when: csi_controller_rollout is failed
|
||||
|
||||
- name: Show CSI driver logs on failure
|
||||
command: kubectl -n kube-system logs deployment/hcloud-csi-controller -c hcloud-csi-driver --tail=120
|
||||
register: csi_driver_logs
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
when: csi_controller_rollout is failed
|
||||
|
||||
- name: Fail with CSI controller diagnostics
|
||||
fail:
|
||||
msg: |
|
||||
CSI controller rollout failed.
|
||||
Deployment status:
|
||||
{{ csi_controller_deploy_status.stdout | default('n/a') }}
|
||||
|
||||
Pods status:
|
||||
{{ csi_controller_pods_status.stdout | default('n/a') }}
|
||||
|
||||
hcloud-csi-driver logs:
|
||||
{{ csi_driver_logs.stdout | default('n/a') }}
|
||||
when: csi_controller_rollout is failed
|
||||
|
||||
- name: Wait for CSI node daemonset rollout
|
||||
command: kubectl rollout status daemonset/hcloud-csi-node -n kube-system
|
||||
command: kubectl rollout status daemonset/hcloud-csi-node -n kube-system --timeout=30s
|
||||
register: csi_node_rollout
|
||||
until: csi_node_rollout.rc == 0
|
||||
retries: 30
|
||||
|
||||
Reference in New Issue
Block a user