--- - name: Create Hetzner CSI secret shell: | kubectl -n kube-system create secret generic hcloud \ --from-literal=token='{{ hcloud_token }}' \ --from-literal=network='{{ cluster_name }}-network' \ --dry-run=client -o yaml | kubectl apply -f - no_log: true when: hcloud_token is defined changed_when: true - name: Deploy Hetzner CSI command: kubectl apply -f {{ csi_manifest_url }} changed_when: true - name: Restart CSI controller to pick up current secret command: kubectl -n kube-system rollout restart deployment/hcloud-csi-controller changed_when: true - name: Wait for CSI controller deployment generation command: kubectl -n kube-system rollout status deployment/hcloud-csi-controller --timeout=30s failed_when: false changed_when: false - name: Wait for CSI controller rollout command: kubectl rollout status deployment/hcloud-csi-controller -n kube-system --timeout={{ csi_rollout_timeout_seconds }}s register: csi_controller_rollout until: csi_controller_rollout.rc == 0 retries: "{{ csi_rollout_retries | int }}" delay: "{{ csi_rollout_delay_seconds | int }}" failed_when: false changed_when: false - name: Show CSI controller status on failure command: kubectl -n kube-system get deployment hcloud-csi-controller -o wide register: csi_controller_deploy_status changed_when: false failed_when: false when: csi_controller_rollout.rc != 0 - name: Show CSI controller pods on failure command: kubectl -n kube-system get pods -l app=hcloud-csi-controller -o wide register: csi_controller_pods_status changed_when: false failed_when: false when: csi_controller_rollout.rc != 0 - name: Describe CSI controller deployment on failure command: kubectl -n kube-system describe deployment hcloud-csi-controller register: csi_controller_deploy_describe changed_when: false failed_when: false when: csi_controller_rollout.rc != 0 - name: Describe CSI controller pod on failure shell: | pod="$(kubectl -n kube-system get pods -l app=hcloud-csi-controller -o jsonpath='{.items[0].metadata.name}' 2>/dev/null)" if [ -n "$pod" ]; then kubectl -n kube-system describe pod "$pod" fi register: csi_controller_pod_describe changed_when: false failed_when: false when: csi_controller_rollout.rc != 0 - name: Show CSI driver logs on failure command: kubectl -n kube-system logs deployment/hcloud-csi-controller -c hcloud-csi-driver --tail={{ csi_failure_log_tail_lines }} register: csi_driver_logs changed_when: false failed_when: false when: csi_controller_rollout.rc != 0 - name: Show CSI driver previous logs on failure shell: | pod="$(kubectl -n kube-system get pods -l app=hcloud-csi-controller -o jsonpath='{.items[0].metadata.name}' 2>/dev/null)" if [ -n "$pod" ]; then kubectl -n kube-system logs "$pod" -c hcloud-csi-driver --previous --tail={{ csi_failure_log_tail_lines }} fi register: csi_driver_previous_logs changed_when: false failed_when: false when: csi_controller_rollout.rc != 0 - name: Show sidecar previous logs on failure shell: | pod="$(kubectl -n kube-system get pods -l app=hcloud-csi-controller -o jsonpath='{.items[0].metadata.name}' 2>/dev/null)" if [ -n "$pod" ]; then for container in csi-attacher csi-resizer csi-provisioner; do echo "===== $container =====" kubectl -n kube-system logs "$pod" -c "$container" --previous --tail={{ csi_failure_log_tail_lines }} || true done fi register: csi_sidecar_previous_logs changed_when: false failed_when: false when: csi_controller_rollout.rc != 0 - name: Show recent kube-system events on failure command: kubectl -n kube-system get events --sort-by=.lastTimestamp register: csi_recent_events changed_when: false failed_when: false when: csi_controller_rollout.rc != 0 - name: Fail with CSI controller diagnostics fail: msg: | CSI controller rollout failed. Deployment status: {{ csi_controller_deploy_status.stdout | default('n/a') }} Pods status: {{ csi_controller_pods_status.stdout | default('n/a') }} Deployment describe: {{ csi_controller_deploy_describe.stdout | default('n/a') }} Pod describe: {{ csi_controller_pod_describe.stdout | default('n/a') }} hcloud-csi-driver logs: {{ csi_driver_logs.stdout | default('n/a') }} hcloud-csi-driver previous logs: {{ csi_driver_previous_logs.stdout | default('n/a') }} Sidecar previous logs: {{ csi_sidecar_previous_logs.stdout | default('n/a') }} Recent kube-system events: {{ csi_recent_events.stdout | default('n/a') }} when: csi_controller_rollout.rc != 0 - name: Wait for CSI node daemonset rollout command: kubectl rollout status daemonset/hcloud-csi-node -n kube-system --timeout={{ csi_rollout_timeout_seconds }}s register: csi_node_rollout until: csi_node_rollout.rc == 0 retries: "{{ csi_rollout_retries | int }}" delay: "{{ csi_rollout_delay_seconds | int }}" failed_when: false changed_when: false - name: Fail when CSI node daemonset rollout does not complete fail: msg: "CSI node daemonset rollout failed: {{ csi_node_rollout.stdout | default('') }} {{ csi_node_rollout.stderr | default('') }}" when: csi_node_rollout.rc != 0 - name: Cleanup stale CSI smoke test resources before apply shell: | kubectl -n kube-system delete job csi-smoke-job pvc csi-smoke-pvc --ignore-not-found --wait=true kubectl delete storageclass {{ csi_smoke_test_storage_class }} --ignore-not-found failed_when: false changed_when: false when: csi_smoke_test_enabled | bool - name: Apply CSI smoke test resources shell: | kubectl apply -f - <<'EOF' apiVersion: storage.k8s.io/v1 kind: StorageClass metadata: name: {{ csi_smoke_test_storage_class }} provisioner: csi.hetzner.cloud reclaimPolicy: Delete volumeBindingMode: Immediate allowVolumeExpansion: true --- apiVersion: v1 kind: PersistentVolumeClaim metadata: name: csi-smoke-pvc namespace: kube-system spec: accessModes: - ReadWriteOnce resources: requests: storage: {{ csi_smoke_test_size }} storageClassName: {{ csi_smoke_test_storage_class }} --- apiVersion: batch/v1 kind: Job metadata: name: csi-smoke-job namespace: kube-system spec: backoffLimit: 0 template: spec: restartPolicy: Never containers: - name: write-and-read image: busybox:1.36 command: ["/bin/sh", "-c", "echo csi-ok > /data/health && cat /data/health"] volumeMounts: - name: data mountPath: /data volumes: - name: data persistentVolumeClaim: claimName: csi-smoke-pvc EOF changed_when: true when: csi_smoke_test_enabled | bool - name: Wait for CSI smoke PVC to bind command: kubectl -n kube-system wait --for=jsonpath='{.status.phase}'=Bound pvc/csi-smoke-pvc --timeout={{ csi_smoke_test_pvc_timeout_seconds }}s register: csi_smoke_pvc_wait failed_when: false changed_when: false when: csi_smoke_test_enabled | bool - name: Wait for CSI smoke Job completion command: kubectl -n kube-system wait --for=condition=complete job/csi-smoke-job --timeout={{ csi_smoke_test_job_timeout_seconds }}s register: csi_smoke_job_wait failed_when: false changed_when: false when: - csi_smoke_test_enabled | bool - csi_smoke_pvc_wait.rc == 0 - name: Show CSI smoke job logs command: kubectl -n kube-system logs job/csi-smoke-job register: csi_smoke_job_logs failed_when: false changed_when: false when: csi_smoke_test_enabled | bool - name: Show CSI smoke PVC on failure command: kubectl -n kube-system get pvc csi-smoke-pvc -o wide register: csi_smoke_pvc_status failed_when: false changed_when: false when: - csi_smoke_test_enabled | bool - csi_smoke_pvc_wait.rc != 0 or (csi_smoke_job_wait.rc | default(1)) != 0 - name: Show CSI smoke Job on failure command: kubectl -n kube-system get job csi-smoke-job -o wide register: csi_smoke_job_status failed_when: false changed_when: false when: - csi_smoke_test_enabled | bool - csi_smoke_pvc_wait.rc != 0 or (csi_smoke_job_wait.rc | default(1)) != 0 - name: Show CSI smoke pods on failure command: kubectl -n kube-system get pod -l job-name=csi-smoke-job -o wide register: csi_smoke_pod_status failed_when: false changed_when: false when: - csi_smoke_test_enabled | bool - csi_smoke_pvc_wait.rc != 0 or (csi_smoke_job_wait.rc | default(1)) != 0 - name: Describe CSI smoke PVC on failure command: kubectl -n kube-system describe pvc csi-smoke-pvc register: csi_smoke_pvc_describe failed_when: false changed_when: false when: - csi_smoke_test_enabled | bool - csi_smoke_pvc_wait.rc != 0 or (csi_smoke_job_wait.rc | default(1)) != 0 - name: Show storage classes on failure command: kubectl get storageclass register: csi_storageclasses failed_when: false changed_when: false when: - csi_smoke_test_enabled | bool - csi_smoke_pvc_wait.rc != 0 or (csi_smoke_job_wait.rc | default(1)) != 0 - name: Show CSI smoke pod describe on failure shell: | pod="$(kubectl -n kube-system get pods -l job-name=csi-smoke-job -o jsonpath='{.items[0].metadata.name}' 2>/dev/null)" if [ -n "$pod" ]; then kubectl -n kube-system describe pod "$pod" fi register: csi_smoke_pod_describe failed_when: false changed_when: false when: - csi_smoke_test_enabled | bool - csi_smoke_pvc_wait.rc != 0 or (csi_smoke_job_wait.rc | default(1)) != 0 - name: Fail when CSI smoke test fails fail: msg: | CSI smoke test failed. PVC wait: stdout: {{ csi_smoke_pvc_wait.stdout | default('') }} stderr: {{ csi_smoke_pvc_wait.stderr | default('') }} Job wait: stdout: {{ csi_smoke_job_wait.stdout | default('') }} stderr: {{ csi_smoke_job_wait.stderr | default('') }} PVC: {{ csi_smoke_pvc_status.stdout | default(csi_smoke_pvc_status.stderr | default('n/a')) }} Job: {{ csi_smoke_job_status.stdout | default(csi_smoke_job_status.stderr | default('n/a')) }} Pod list: {{ csi_smoke_pod_status.stdout | default(csi_smoke_pod_status.stderr | default('n/a')) }} PVC describe: {{ csi_smoke_pvc_describe.stdout | default(csi_smoke_pvc_describe.stderr | default('n/a')) }} Storage classes: {{ csi_storageclasses.stdout | default(csi_storageclasses.stderr | default('n/a')) }} Pod describe: {{ csi_smoke_pod_describe.stdout | default('n/a') }} Job logs: {{ csi_smoke_job_logs.stdout | default('n/a') }} when: - csi_smoke_test_enabled | bool - csi_smoke_pvc_wait.rc != 0 or (csi_smoke_job_wait.rc | default(1)) != 0 - name: Cleanup CSI smoke test resources shell: | kubectl -n kube-system delete job csi-smoke-job pvc csi-smoke-pvc --ignore-not-found kubectl delete storageclass {{ csi_smoke_test_storage_class }} --ignore-not-found failed_when: false changed_when: false when: csi_smoke_test_enabled | bool