feat: add CSI smoke test and default Traefik LB location
This commit is contained in:
@@ -1,3 +1,4 @@
|
|||||||
---
|
---
|
||||||
hcloud_token: ""
|
hcloud_token: ""
|
||||||
cluster_name: "k8s-cluster"
|
cluster_name: "k8s-cluster"
|
||||||
|
hcloud_lb_location: "nbg1"
|
||||||
|
|||||||
@@ -49,6 +49,30 @@
|
|||||||
delay: 10
|
delay: 10
|
||||||
when: ccm_workload_kind.stdout == "daemonset"
|
when: ccm_workload_kind.stdout == "daemonset"
|
||||||
|
|
||||||
|
- name: Set default Hetzner load balancer location for Traefik service
|
||||||
|
command: kubectl -n kube-system annotate service traefik load-balancer.hetzner.cloud/location={{ hcloud_lb_location }} --overwrite
|
||||||
|
register: traefik_annotation
|
||||||
|
changed_when: true
|
||||||
|
failed_when: false
|
||||||
|
|
||||||
|
- name: Show Traefik service when annotation patch fails
|
||||||
|
command: kubectl -n kube-system get service traefik -o yaml
|
||||||
|
register: traefik_service_dump
|
||||||
|
changed_when: false
|
||||||
|
failed_when: false
|
||||||
|
when: traefik_annotation.rc != 0
|
||||||
|
|
||||||
|
- name: Fail when Traefik load balancer annotation cannot be set
|
||||||
|
fail:
|
||||||
|
msg: |
|
||||||
|
Failed to set Hetzner load balancer location annotation on kube-system/traefik service.
|
||||||
|
Command output:
|
||||||
|
{{ traefik_annotation.stderr | default(traefik_annotation.stdout) }}
|
||||||
|
|
||||||
|
Service dump:
|
||||||
|
{{ traefik_service_dump.stdout | default('n/a') }}
|
||||||
|
when: traefik_annotation.rc != 0
|
||||||
|
|
||||||
- name: Show CCM namespace objects when workload missing
|
- name: Show CCM namespace objects when workload missing
|
||||||
command: kubectl -n kube-system get all | grep hcloud-cloud-controller-manager || true
|
command: kubectl -n kube-system get all | grep hcloud-cloud-controller-manager || true
|
||||||
register: ccm_ns_objects
|
register: ccm_ns_objects
|
||||||
|
|||||||
@@ -6,3 +6,6 @@ csi_rollout_timeout_seconds: 30
|
|||||||
csi_rollout_retries: 8
|
csi_rollout_retries: 8
|
||||||
csi_rollout_delay_seconds: 5
|
csi_rollout_delay_seconds: 5
|
||||||
csi_failure_log_tail_lines: 120
|
csi_failure_log_tail_lines: 120
|
||||||
|
csi_smoke_test_enabled: true
|
||||||
|
csi_smoke_test_storage_class: "hcloud-volumes"
|
||||||
|
csi_smoke_test_size: "1Gi"
|
||||||
|
|||||||
@@ -70,6 +70,31 @@
|
|||||||
failed_when: false
|
failed_when: false
|
||||||
when: csi_controller_rollout.rc != 0
|
when: csi_controller_rollout.rc != 0
|
||||||
|
|
||||||
|
- name: Show CSI driver previous logs on failure
|
||||||
|
shell: |
|
||||||
|
pod="$(kubectl -n kube-system get pods -l app=hcloud-csi-controller -o jsonpath='{.items[0].metadata.name}' 2>/dev/null)"
|
||||||
|
if [ -n "$pod" ]; then
|
||||||
|
kubectl -n kube-system logs "$pod" -c hcloud-csi-driver --previous --tail={{ csi_failure_log_tail_lines }}
|
||||||
|
fi
|
||||||
|
register: csi_driver_previous_logs
|
||||||
|
changed_when: false
|
||||||
|
failed_when: false
|
||||||
|
when: csi_controller_rollout.rc != 0
|
||||||
|
|
||||||
|
- name: Show sidecar previous logs on failure
|
||||||
|
shell: |
|
||||||
|
pod="$(kubectl -n kube-system get pods -l app=hcloud-csi-controller -o jsonpath='{.items[0].metadata.name}' 2>/dev/null)"
|
||||||
|
if [ -n "$pod" ]; then
|
||||||
|
for container in csi-attacher csi-resizer csi-provisioner; do
|
||||||
|
echo "===== $container ====="
|
||||||
|
kubectl -n kube-system logs "$pod" -c "$container" --previous --tail={{ csi_failure_log_tail_lines }} || true
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
register: csi_sidecar_previous_logs
|
||||||
|
changed_when: false
|
||||||
|
failed_when: false
|
||||||
|
when: csi_controller_rollout.rc != 0
|
||||||
|
|
||||||
- name: Show recent kube-system events on failure
|
- name: Show recent kube-system events on failure
|
||||||
command: kubectl -n kube-system get events --sort-by=.lastTimestamp
|
command: kubectl -n kube-system get events --sort-by=.lastTimestamp
|
||||||
register: csi_recent_events
|
register: csi_recent_events
|
||||||
@@ -119,27 +144,117 @@
|
|||||||
fail:
|
fail:
|
||||||
msg: "CSI node daemonset rollout failed: {{ csi_node_rollout.stdout | default('') }} {{ csi_node_rollout.stderr | default('') }}"
|
msg: "CSI node daemonset rollout failed: {{ csi_node_rollout.stdout | default('') }} {{ csi_node_rollout.stderr | default('') }}"
|
||||||
when: csi_node_rollout.rc != 0
|
when: csi_node_rollout.rc != 0
|
||||||
- name: Show CSI driver previous logs on failure
|
|
||||||
shell: |
|
|
||||||
pod="$(kubectl -n kube-system get pods -l app=hcloud-csi-controller -o jsonpath='{.items[0].metadata.name}' 2>/dev/null)"
|
|
||||||
if [ -n "$pod" ]; then
|
|
||||||
kubectl -n kube-system logs "$pod" -c hcloud-csi-driver --previous --tail={{ csi_failure_log_tail_lines }}
|
|
||||||
fi
|
|
||||||
register: csi_driver_previous_logs
|
|
||||||
changed_when: false
|
|
||||||
failed_when: false
|
|
||||||
when: csi_controller_rollout.rc != 0
|
|
||||||
|
|
||||||
- name: Show sidecar previous logs on failure
|
- name: Apply CSI smoke test resources
|
||||||
shell: |
|
shell: |
|
||||||
pod="$(kubectl -n kube-system get pods -l app=hcloud-csi-controller -o jsonpath='{.items[0].metadata.name}' 2>/dev/null)"
|
kubectl apply -f - <<'EOF'
|
||||||
if [ -n "$pod" ]; then
|
apiVersion: v1
|
||||||
for container in csi-attacher csi-resizer csi-provisioner; do
|
kind: PersistentVolumeClaim
|
||||||
echo "===== $container ====="
|
metadata:
|
||||||
kubectl -n kube-system logs "$pod" -c "$container" --previous --tail={{ csi_failure_log_tail_lines }} || true
|
name: csi-smoke-pvc
|
||||||
done
|
namespace: kube-system
|
||||||
fi
|
spec:
|
||||||
register: csi_sidecar_previous_logs
|
accessModes:
|
||||||
changed_when: false
|
- ReadWriteOnce
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: {{ csi_smoke_test_size }}
|
||||||
|
storageClassName: {{ csi_smoke_test_storage_class }}
|
||||||
|
---
|
||||||
|
apiVersion: batch/v1
|
||||||
|
kind: Job
|
||||||
|
metadata:
|
||||||
|
name: csi-smoke-job
|
||||||
|
namespace: kube-system
|
||||||
|
spec:
|
||||||
|
backoffLimit: 0
|
||||||
|
template:
|
||||||
|
spec:
|
||||||
|
restartPolicy: Never
|
||||||
|
containers:
|
||||||
|
- name: write-and-read
|
||||||
|
image: busybox:1.36
|
||||||
|
command: ["/bin/sh", "-c", "echo csi-ok > /data/health && cat /data/health"]
|
||||||
|
volumeMounts:
|
||||||
|
- name: data
|
||||||
|
mountPath: /data
|
||||||
|
volumes:
|
||||||
|
- name: data
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: csi-smoke-pvc
|
||||||
|
EOF
|
||||||
|
changed_when: true
|
||||||
|
when: csi_smoke_test_enabled | bool
|
||||||
|
|
||||||
|
- name: Wait for CSI smoke PVC to bind
|
||||||
|
command: kubectl -n kube-system wait --for=jsonpath={.status.phase}=Bound pvc/csi-smoke-pvc --timeout=120s
|
||||||
|
register: csi_smoke_pvc_wait
|
||||||
failed_when: false
|
failed_when: false
|
||||||
when: csi_controller_rollout.rc != 0
|
changed_when: false
|
||||||
|
when: csi_smoke_test_enabled | bool
|
||||||
|
|
||||||
|
- name: Wait for CSI smoke Job completion
|
||||||
|
command: kubectl -n kube-system wait --for=condition=complete job/csi-smoke-job --timeout=180s
|
||||||
|
register: csi_smoke_job_wait
|
||||||
|
failed_when: false
|
||||||
|
changed_when: false
|
||||||
|
when:
|
||||||
|
- csi_smoke_test_enabled | bool
|
||||||
|
- csi_smoke_pvc_wait.rc == 0
|
||||||
|
|
||||||
|
- name: Show CSI smoke job logs
|
||||||
|
command: kubectl -n kube-system logs job/csi-smoke-job
|
||||||
|
register: csi_smoke_job_logs
|
||||||
|
failed_when: false
|
||||||
|
changed_when: false
|
||||||
|
when: csi_smoke_test_enabled | bool
|
||||||
|
|
||||||
|
- name: Show CSI smoke resources on failure
|
||||||
|
command: kubectl -n kube-system get pvc csi-smoke-pvc job csi-smoke-job pod -l job-name=csi-smoke-job -o wide
|
||||||
|
register: csi_smoke_status
|
||||||
|
failed_when: false
|
||||||
|
changed_when: false
|
||||||
|
when:
|
||||||
|
- csi_smoke_test_enabled | bool
|
||||||
|
- csi_smoke_pvc_wait.rc != 0 or (csi_smoke_job_wait.rc | default(1)) != 0
|
||||||
|
|
||||||
|
- name: Show CSI smoke pod describe on failure
|
||||||
|
shell: |
|
||||||
|
pod="$(kubectl -n kube-system get pods -l job-name=csi-smoke-job -o jsonpath='{.items[0].metadata.name}' 2>/dev/null)"
|
||||||
|
if [ -n "$pod" ]; then
|
||||||
|
kubectl -n kube-system describe pod "$pod"
|
||||||
|
fi
|
||||||
|
register: csi_smoke_pod_describe
|
||||||
|
failed_when: false
|
||||||
|
changed_when: false
|
||||||
|
when:
|
||||||
|
- csi_smoke_test_enabled | bool
|
||||||
|
- csi_smoke_pvc_wait.rc != 0 or (csi_smoke_job_wait.rc | default(1)) != 0
|
||||||
|
|
||||||
|
- name: Fail when CSI smoke test fails
|
||||||
|
fail:
|
||||||
|
msg: |
|
||||||
|
CSI smoke test failed.
|
||||||
|
PVC wait:
|
||||||
|
{{ csi_smoke_pvc_wait.stdout | default(csi_smoke_pvc_wait.stderr) | default('n/a') }}
|
||||||
|
|
||||||
|
Job wait:
|
||||||
|
{{ csi_smoke_job_wait.stdout | default(csi_smoke_job_wait.stderr | default('n/a')) }}
|
||||||
|
|
||||||
|
Resources:
|
||||||
|
{{ csi_smoke_status.stdout | default('n/a') }}
|
||||||
|
|
||||||
|
Pod describe:
|
||||||
|
{{ csi_smoke_pod_describe.stdout | default('n/a') }}
|
||||||
|
|
||||||
|
Job logs:
|
||||||
|
{{ csi_smoke_job_logs.stdout | default('n/a') }}
|
||||||
|
when:
|
||||||
|
- csi_smoke_test_enabled | bool
|
||||||
|
- csi_smoke_pvc_wait.rc != 0 or (csi_smoke_job_wait.rc | default(1)) != 0
|
||||||
|
|
||||||
|
- name: Cleanup CSI smoke test resources
|
||||||
|
command: kubectl -n kube-system delete job csi-smoke-job pvc csi-smoke-pvc --ignore-not-found
|
||||||
|
failed_when: false
|
||||||
|
changed_when: false
|
||||||
|
when: csi_smoke_test_enabled | bool
|
||||||
|
|||||||
Reference in New Issue
Block a user