fix: surface CSI rollout diagnostics before failing
This commit is contained in:
@@ -19,6 +19,7 @@
|
||||
until: csi_controller_rollout.rc == 0
|
||||
retries: "{{ csi_rollout_retries | int }}"
|
||||
delay: "{{ csi_rollout_delay_seconds | int }}"
|
||||
failed_when: false
|
||||
changed_when: false
|
||||
|
||||
- name: Show CSI controller status on failure
|
||||
@@ -26,28 +27,35 @@
|
||||
register: csi_controller_deploy_status
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
when: csi_controller_rollout is failed
|
||||
when: csi_controller_rollout.rc != 0
|
||||
|
||||
- name: Show CSI controller pods on failure
|
||||
command: kubectl -n kube-system get pods -l app=hcloud-csi-controller -o wide
|
||||
register: csi_controller_pods_status
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
when: csi_controller_rollout is failed
|
||||
when: csi_controller_rollout.rc != 0
|
||||
|
||||
- name: Describe CSI controller deployment on failure
|
||||
command: kubectl -n kube-system describe deployment hcloud-csi-controller
|
||||
register: csi_controller_deploy_describe
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
when: csi_controller_rollout.rc != 0
|
||||
|
||||
- name: Show CSI driver logs on failure
|
||||
command: kubectl -n kube-system logs deployment/hcloud-csi-controller -c hcloud-csi-driver --tail={{ csi_failure_log_tail_lines }}
|
||||
register: csi_driver_logs
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
when: csi_controller_rollout is failed
|
||||
when: csi_controller_rollout.rc != 0
|
||||
|
||||
- name: Show recent kube-system events on failure
|
||||
command: kubectl -n kube-system get events --sort-by=.lastTimestamp
|
||||
register: csi_recent_events
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
when: csi_controller_rollout is failed
|
||||
when: csi_controller_rollout.rc != 0
|
||||
|
||||
- name: Fail with CSI controller diagnostics
|
||||
fail:
|
||||
@@ -59,12 +67,15 @@
|
||||
Pods status:
|
||||
{{ csi_controller_pods_status.stdout | default('n/a') }}
|
||||
|
||||
Deployment describe:
|
||||
{{ csi_controller_deploy_describe.stdout | default('n/a') }}
|
||||
|
||||
hcloud-csi-driver logs:
|
||||
{{ csi_driver_logs.stdout | default('n/a') }}
|
||||
|
||||
Recent kube-system events:
|
||||
{{ csi_recent_events.stdout | default('n/a') }}
|
||||
when: csi_controller_rollout is failed
|
||||
when: csi_controller_rollout.rc != 0
|
||||
|
||||
- name: Wait for CSI node daemonset rollout
|
||||
command: kubectl rollout status daemonset/hcloud-csi-node -n kube-system --timeout={{ csi_rollout_timeout_seconds }}s
|
||||
@@ -72,4 +83,10 @@
|
||||
until: csi_node_rollout.rc == 0
|
||||
retries: "{{ csi_rollout_retries | int }}"
|
||||
delay: "{{ csi_rollout_delay_seconds | int }}"
|
||||
failed_when: false
|
||||
changed_when: false
|
||||
|
||||
- name: Fail when CSI node daemonset rollout does not complete
|
||||
fail:
|
||||
msg: "CSI node daemonset rollout failed: {{ csi_node_rollout.stdout | default('') }} {{ csi_node_rollout.stderr | default('') }}"
|
||||
when: csi_node_rollout.rc != 0
|
||||
|
||||
Reference in New Issue
Block a user