fix: restart CSI controller and capture previous crash logs
This commit is contained in:
@@ -13,6 +13,15 @@
|
||||
command: kubectl apply -f {{ csi_manifest_url }}
|
||||
changed_when: true
|
||||
|
||||
- name: Restart CSI controller to pick up current secret
|
||||
command: kubectl -n kube-system rollout restart deployment/hcloud-csi-controller
|
||||
changed_when: true
|
||||
|
||||
- name: Wait for CSI controller deployment generation
|
||||
command: kubectl -n kube-system rollout status deployment/hcloud-csi-controller --timeout=30s
|
||||
failed_when: false
|
||||
changed_when: false
|
||||
|
||||
- name: Wait for CSI controller rollout
|
||||
command: kubectl rollout status deployment/hcloud-csi-controller -n kube-system --timeout={{ csi_rollout_timeout_seconds }}s
|
||||
register: csi_controller_rollout
|
||||
@@ -43,6 +52,17 @@
|
||||
failed_when: false
|
||||
when: csi_controller_rollout.rc != 0
|
||||
|
||||
- name: Describe CSI controller pod on failure
|
||||
shell: |
|
||||
pod="$(kubectl -n kube-system get pods -l app=hcloud-csi-controller -o jsonpath='{.items[0].metadata.name}' 2>/dev/null)"
|
||||
if [ -n "$pod" ]; then
|
||||
kubectl -n kube-system describe pod "$pod"
|
||||
fi
|
||||
register: csi_controller_pod_describe
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
when: csi_controller_rollout.rc != 0
|
||||
|
||||
- name: Show CSI driver logs on failure
|
||||
command: kubectl -n kube-system logs deployment/hcloud-csi-controller -c hcloud-csi-driver --tail={{ csi_failure_log_tail_lines }}
|
||||
register: csi_driver_logs
|
||||
@@ -70,9 +90,18 @@
|
||||
Deployment describe:
|
||||
{{ csi_controller_deploy_describe.stdout | default('n/a') }}
|
||||
|
||||
Pod describe:
|
||||
{{ csi_controller_pod_describe.stdout | default('n/a') }}
|
||||
|
||||
hcloud-csi-driver logs:
|
||||
{{ csi_driver_logs.stdout | default('n/a') }}
|
||||
|
||||
hcloud-csi-driver previous logs:
|
||||
{{ csi_driver_previous_logs.stdout | default('n/a') }}
|
||||
|
||||
Sidecar previous logs:
|
||||
{{ csi_sidecar_previous_logs.stdout | default('n/a') }}
|
||||
|
||||
Recent kube-system events:
|
||||
{{ csi_recent_events.stdout | default('n/a') }}
|
||||
when: csi_controller_rollout.rc != 0
|
||||
@@ -90,3 +119,27 @@
|
||||
fail:
|
||||
msg: "CSI node daemonset rollout failed: {{ csi_node_rollout.stdout | default('') }} {{ csi_node_rollout.stderr | default('') }}"
|
||||
when: csi_node_rollout.rc != 0
|
||||
- name: Show CSI driver previous logs on failure
|
||||
shell: |
|
||||
pod="$(kubectl -n kube-system get pods -l app=hcloud-csi-controller -o jsonpath='{.items[0].metadata.name}' 2>/dev/null)"
|
||||
if [ -n "$pod" ]; then
|
||||
kubectl -n kube-system logs "$pod" -c hcloud-csi-driver --previous --tail={{ csi_failure_log_tail_lines }}
|
||||
fi
|
||||
register: csi_driver_previous_logs
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
when: csi_controller_rollout.rc != 0
|
||||
|
||||
- name: Show sidecar previous logs on failure
|
||||
shell: |
|
||||
pod="$(kubectl -n kube-system get pods -l app=hcloud-csi-controller -o jsonpath='{.items[0].metadata.name}' 2>/dev/null)"
|
||||
if [ -n "$pod" ]; then
|
||||
for container in csi-attacher csi-resizer csi-provisioner; do
|
||||
echo "===== $container ====="
|
||||
kubectl -n kube-system logs "$pod" -c "$container" --previous --tail={{ csi_failure_log_tail_lines }} || true
|
||||
done
|
||||
fi
|
||||
register: csi_sidecar_previous_logs
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
when: csi_controller_rollout.rc != 0
|
||||
|
||||
Reference in New Issue
Block a user