From b9011bd4fdfac4d7528f50501c6e112c05811681 Mon Sep 17 00:00:00 2001 From: MichaelFisher1997 Date: Sun, 1 Mar 2026 19:26:17 +0000 Subject: [PATCH] fix: restart CSI controller and capture previous crash logs --- ansible/roles/csi/tasks/main.yml | 53 ++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/ansible/roles/csi/tasks/main.yml b/ansible/roles/csi/tasks/main.yml index 57c7ab3..6faa437 100644 --- a/ansible/roles/csi/tasks/main.yml +++ b/ansible/roles/csi/tasks/main.yml @@ -13,6 +13,15 @@ command: kubectl apply -f {{ csi_manifest_url }} changed_when: true +- name: Restart CSI controller to pick up current secret + command: kubectl -n kube-system rollout restart deployment/hcloud-csi-controller + changed_when: true + +- name: Wait for CSI controller deployment generation + command: kubectl -n kube-system rollout status deployment/hcloud-csi-controller --timeout=30s + failed_when: false + changed_when: false + - name: Wait for CSI controller rollout command: kubectl rollout status deployment/hcloud-csi-controller -n kube-system --timeout={{ csi_rollout_timeout_seconds }}s register: csi_controller_rollout @@ -43,6 +52,17 @@ failed_when: false when: csi_controller_rollout.rc != 0 +- name: Describe CSI controller pod on failure + shell: | + pod="$(kubectl -n kube-system get pods -l app=hcloud-csi-controller -o jsonpath='{.items[0].metadata.name}' 2>/dev/null)" + if [ -n "$pod" ]; then + kubectl -n kube-system describe pod "$pod" + fi + register: csi_controller_pod_describe + changed_when: false + failed_when: false + when: csi_controller_rollout.rc != 0 + - name: Show CSI driver logs on failure command: kubectl -n kube-system logs deployment/hcloud-csi-controller -c hcloud-csi-driver --tail={{ csi_failure_log_tail_lines }} register: csi_driver_logs @@ -70,9 +90,18 @@ Deployment describe: {{ csi_controller_deploy_describe.stdout | default('n/a') }} + Pod describe: + {{ csi_controller_pod_describe.stdout | default('n/a') }} + hcloud-csi-driver logs: {{ csi_driver_logs.stdout | default('n/a') }} + hcloud-csi-driver previous logs: + {{ csi_driver_previous_logs.stdout | default('n/a') }} + + Sidecar previous logs: + {{ csi_sidecar_previous_logs.stdout | default('n/a') }} + Recent kube-system events: {{ csi_recent_events.stdout | default('n/a') }} when: csi_controller_rollout.rc != 0 @@ -90,3 +119,27 @@ fail: msg: "CSI node daemonset rollout failed: {{ csi_node_rollout.stdout | default('') }} {{ csi_node_rollout.stderr | default('') }}" when: csi_node_rollout.rc != 0 +- name: Show CSI driver previous logs on failure + shell: | + pod="$(kubectl -n kube-system get pods -l app=hcloud-csi-controller -o jsonpath='{.items[0].metadata.name}' 2>/dev/null)" + if [ -n "$pod" ]; then + kubectl -n kube-system logs "$pod" -c hcloud-csi-driver --previous --tail={{ csi_failure_log_tail_lines }} + fi + register: csi_driver_previous_logs + changed_when: false + failed_when: false + when: csi_controller_rollout.rc != 0 + +- name: Show sidecar previous logs on failure + shell: | + pod="$(kubectl -n kube-system get pods -l app=hcloud-csi-controller -o jsonpath='{.items[0].metadata.name}' 2>/dev/null)" + if [ -n "$pod" ]; then + for container in csi-attacher csi-resizer csi-provisioner; do + echo "===== $container =====" + kubectl -n kube-system logs "$pod" -c "$container" --previous --tail={{ csi_failure_log_tail_lines }} || true + done + fi + register: csi_sidecar_previous_logs + changed_when: false + failed_when: false + when: csi_controller_rollout.rc != 0