fix: improve CSI smoke test waits and failure diagnostics
Some checks failed
Deploy Cluster / Terraform (push) Successful in 42s
Deploy Cluster / Ansible (push) Failing after 8m19s

This commit is contained in:
2026-03-01 20:08:27 +00:00
parent d8bc0b7512
commit d146811f79
2 changed files with 61 additions and 9 deletions

View File

@@ -9,3 +9,5 @@ csi_failure_log_tail_lines: 120
csi_smoke_test_enabled: true
csi_smoke_test_storage_class: "hcloud-volumes"
csi_smoke_test_size: "1Gi"
csi_smoke_test_pvc_timeout_seconds: 300
csi_smoke_test_job_timeout_seconds: 300

View File

@@ -187,14 +187,14 @@
when: csi_smoke_test_enabled | bool
- name: Wait for CSI smoke PVC to bind
command: kubectl -n kube-system wait --for=jsonpath={.status.phase}=Bound pvc/csi-smoke-pvc --timeout=120s
command: kubectl -n kube-system wait --for=jsonpath='{.status.phase}'=Bound pvc/csi-smoke-pvc --timeout={{ csi_smoke_test_pvc_timeout_seconds }}s
register: csi_smoke_pvc_wait
failed_when: false
changed_when: false
when: csi_smoke_test_enabled | bool
- name: Wait for CSI smoke Job completion
command: kubectl -n kube-system wait --for=condition=complete job/csi-smoke-job --timeout=180s
command: kubectl -n kube-system wait --for=condition=complete job/csi-smoke-job --timeout={{ csi_smoke_test_job_timeout_seconds }}s
register: csi_smoke_job_wait
failed_when: false
changed_when: false
@@ -209,9 +209,45 @@
changed_when: false
when: csi_smoke_test_enabled | bool
- name: Show CSI smoke resources on failure
command: kubectl -n kube-system get pvc csi-smoke-pvc job csi-smoke-job pod -l job-name=csi-smoke-job -o wide
register: csi_smoke_status
- name: Show CSI smoke PVC on failure
command: kubectl -n kube-system get pvc csi-smoke-pvc -o wide
register: csi_smoke_pvc_status
failed_when: false
changed_when: false
when:
- csi_smoke_test_enabled | bool
- csi_smoke_pvc_wait.rc != 0 or (csi_smoke_job_wait.rc | default(1)) != 0
- name: Show CSI smoke Job on failure
command: kubectl -n kube-system get job csi-smoke-job -o wide
register: csi_smoke_job_status
failed_when: false
changed_when: false
when:
- csi_smoke_test_enabled | bool
- csi_smoke_pvc_wait.rc != 0 or (csi_smoke_job_wait.rc | default(1)) != 0
- name: Show CSI smoke pods on failure
command: kubectl -n kube-system get pod -l job-name=csi-smoke-job -o wide
register: csi_smoke_pod_status
failed_when: false
changed_when: false
when:
- csi_smoke_test_enabled | bool
- csi_smoke_pvc_wait.rc != 0 or (csi_smoke_job_wait.rc | default(1)) != 0
- name: Describe CSI smoke PVC on failure
command: kubectl -n kube-system describe pvc csi-smoke-pvc
register: csi_smoke_pvc_describe
failed_when: false
changed_when: false
when:
- csi_smoke_test_enabled | bool
- csi_smoke_pvc_wait.rc != 0 or (csi_smoke_job_wait.rc | default(1)) != 0
- name: Show storage classes on failure
command: kubectl get storageclass
register: csi_storageclasses
failed_when: false
changed_when: false
when:
@@ -236,13 +272,27 @@
msg: |
CSI smoke test failed.
PVC wait:
{{ csi_smoke_pvc_wait.stdout | default(csi_smoke_pvc_wait.stderr) | default('n/a') }}
stdout: {{ csi_smoke_pvc_wait.stdout | default('') }}
stderr: {{ csi_smoke_pvc_wait.stderr | default('') }}
Job wait:
{{ csi_smoke_job_wait.stdout | default(csi_smoke_job_wait.stderr | default('n/a')) }}
stdout: {{ csi_smoke_job_wait.stdout | default('') }}
stderr: {{ csi_smoke_job_wait.stderr | default('') }}
Resources:
{{ csi_smoke_status.stdout | default('n/a') }}
PVC:
{{ csi_smoke_pvc_status.stdout | default(csi_smoke_pvc_status.stderr | default('n/a')) }}
Job:
{{ csi_smoke_job_status.stdout | default(csi_smoke_job_status.stderr | default('n/a')) }}
Pod list:
{{ csi_smoke_pod_status.stdout | default(csi_smoke_pod_status.stderr | default('n/a')) }}
PVC describe:
{{ csi_smoke_pvc_describe.stdout | default(csi_smoke_pvc_describe.stderr | default('n/a')) }}
Storage classes:
{{ csi_storageclasses.stdout | default(csi_storageclasses.stderr | default('n/a')) }}
Pod describe:
{{ csi_smoke_pod_describe.stdout | default('n/a') }}