fix: run health checks with script module
Deploy Cluster / Terraform (push) Successful in 32s
Deploy Cluster / Ansible (push) Successful in 33m48s

This commit is contained in:
2026-05-01 20:30:23 +00:00
parent 0333344a0a
commit 4b9c07b536
+39 -35
View File
@@ -1039,41 +1039,45 @@ jobs:
working-directory: ansible
run: |
set -euo pipefail
ansible -i inventory.ini 'control_plane[0]' -m shell -a '
set -euo pipefail
kubectl get nodes -o wide
kubectl -n flux-system get gitrepositories,kustomizations,helmreleases,ocirepositories
kubectl -n flux-system wait --for=condition=Ready kustomization/infrastructure --timeout=300s
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-cert-manager --timeout=300s
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-external-secrets --timeout=300s
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-external-secrets-store --timeout=300s
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-nfs-storage --timeout=300s
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-tailscale-operator --timeout=300s
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-tailscale-proxyclass --timeout=300s
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-rancher-secrets --timeout=300s
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-rancher --timeout=900s
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-rancher-config --timeout=300s
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-observability-secrets --timeout=300s
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-observability --timeout=300s
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-observability-content --timeout=300s
kubectl -n flux-system wait --for=condition=Ready helmrelease --all --timeout=600s
kubectl annotate storageclass local-path storageclass.kubernetes.io/is-default-class=false --overwrite
kubectl annotate storageclass flash-nfs storageclass.kubernetes.io/is-default-class=true --overwrite
kubectl get storageclass | grep -E "^flash-nfs.*\\(default\\)"
! kubectl get storageclass | grep -E "^local-path.*\\(default\\)"
unhealthy_pods=$(mktemp)
kubectl get pods -A --no-headers \
| grep -Ev "[[:space:]](Running|Completed)[[:space:]]" \
| grep -Ev "^cattle-system[[:space:]]+helm-operation-" \
| grep -Ev "^cattle-capi-system[[:space:]]+capi-controller-manager-" \
| grep -Ev "^cattle-turtles-system[[:space:]]+cluster-api-operator-resources-cleanup-" \
| grep -Ev "^kube-system[[:space:]]+helm-install-" \
| tee "${unhealthy_pods}" || true
test ! -s "${unhealthy_pods}"
kubectl -n kube-system get pods -o wide
kubectl -n tailscale-system get pods -o wide
kubectl -n external-secrets get pods -o wide
' -e ansible_shell_executable=/bin/bash
health_script="$(mktemp)"
cat >"${health_script}" <<'EOF'
#!/usr/bin/env bash
set -euo pipefail
kubectl get nodes -o wide
kubectl -n flux-system get gitrepositories,kustomizations,helmreleases,ocirepositories
kubectl -n flux-system wait --for=condition=Ready kustomization/infrastructure --timeout=300s
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-cert-manager --timeout=300s
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-external-secrets --timeout=300s
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-external-secrets-store --timeout=300s
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-nfs-storage --timeout=300s
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-tailscale-operator --timeout=300s
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-tailscale-proxyclass --timeout=300s
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-rancher-secrets --timeout=300s
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-rancher --timeout=900s
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-rancher-config --timeout=300s
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-observability-secrets --timeout=300s
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-observability --timeout=300s
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-observability-content --timeout=300s
kubectl -n flux-system wait --for=condition=Ready helmrelease --all --timeout=600s
kubectl annotate storageclass local-path storageclass.kubernetes.io/is-default-class=false --overwrite
kubectl annotate storageclass flash-nfs storageclass.kubernetes.io/is-default-class=true --overwrite
kubectl get storageclass | grep -E "^flash-nfs.*\\(default\\)"
! kubectl get storageclass | grep -E "^local-path.*\\(default\\)"
unhealthy_pods=$(mktemp)
kubectl get pods -A --no-headers \
| grep -Ev "[[:space:]](Running|Completed)[[:space:]]" \
| grep -Ev "^cattle-system[[:space:]]+helm-operation-" \
| grep -Ev "^cattle-capi-system[[:space:]]+capi-controller-manager-" \
| grep -Ev "^cattle-turtles-system[[:space:]]+cluster-api-operator-resources-cleanup-" \
| grep -Ev "^kube-system[[:space:]]+helm-install-" \
| tee "${unhealthy_pods}" || true
test ! -s "${unhealthy_pods}"
kubectl -n kube-system get pods -o wide
kubectl -n tailscale-system get pods -o wide
kubectl -n external-secrets get pods -o wide
EOF
chmod +x "${health_script}"
ansible -i inventory.ini 'control_plane[0]' -m script -a "${health_script}"
env:
ANSIBLE_HOST_KEY_CHECKING: "False"