fix: run health checks with script module
Deploy Cluster / Terraform (push) Successful in 32s
Deploy Cluster / Ansible (push) Successful in 33m48s

This commit is contained in:
2026-05-01 20:30:23 +00:00
parent 0333344a0a
commit 4b9c07b536
+39 -35
View File
@@ -1039,41 +1039,45 @@ jobs:
working-directory: ansible working-directory: ansible
run: | run: |
set -euo pipefail set -euo pipefail
ansible -i inventory.ini 'control_plane[0]' -m shell -a ' health_script="$(mktemp)"
set -euo pipefail cat >"${health_script}" <<'EOF'
kubectl get nodes -o wide #!/usr/bin/env bash
kubectl -n flux-system get gitrepositories,kustomizations,helmreleases,ocirepositories set -euo pipefail
kubectl -n flux-system wait --for=condition=Ready kustomization/infrastructure --timeout=300s kubectl get nodes -o wide
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-cert-manager --timeout=300s kubectl -n flux-system get gitrepositories,kustomizations,helmreleases,ocirepositories
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-external-secrets --timeout=300s kubectl -n flux-system wait --for=condition=Ready kustomization/infrastructure --timeout=300s
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-external-secrets-store --timeout=300s kubectl -n flux-system wait --for=condition=Ready kustomization/addon-cert-manager --timeout=300s
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-nfs-storage --timeout=300s kubectl -n flux-system wait --for=condition=Ready kustomization/addon-external-secrets --timeout=300s
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-tailscale-operator --timeout=300s kubectl -n flux-system wait --for=condition=Ready kustomization/addon-external-secrets-store --timeout=300s
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-tailscale-proxyclass --timeout=300s kubectl -n flux-system wait --for=condition=Ready kustomization/addon-nfs-storage --timeout=300s
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-rancher-secrets --timeout=300s kubectl -n flux-system wait --for=condition=Ready kustomization/addon-tailscale-operator --timeout=300s
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-rancher --timeout=900s kubectl -n flux-system wait --for=condition=Ready kustomization/addon-tailscale-proxyclass --timeout=300s
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-rancher-config --timeout=300s kubectl -n flux-system wait --for=condition=Ready kustomization/addon-rancher-secrets --timeout=300s
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-observability-secrets --timeout=300s kubectl -n flux-system wait --for=condition=Ready kustomization/addon-rancher --timeout=900s
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-observability --timeout=300s kubectl -n flux-system wait --for=condition=Ready kustomization/addon-rancher-config --timeout=300s
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-observability-content --timeout=300s kubectl -n flux-system wait --for=condition=Ready kustomization/addon-observability-secrets --timeout=300s
kubectl -n flux-system wait --for=condition=Ready helmrelease --all --timeout=600s kubectl -n flux-system wait --for=condition=Ready kustomization/addon-observability --timeout=300s
kubectl annotate storageclass local-path storageclass.kubernetes.io/is-default-class=false --overwrite kubectl -n flux-system wait --for=condition=Ready kustomization/addon-observability-content --timeout=300s
kubectl annotate storageclass flash-nfs storageclass.kubernetes.io/is-default-class=true --overwrite kubectl -n flux-system wait --for=condition=Ready helmrelease --all --timeout=600s
kubectl get storageclass | grep -E "^flash-nfs.*\\(default\\)" kubectl annotate storageclass local-path storageclass.kubernetes.io/is-default-class=false --overwrite
! kubectl get storageclass | grep -E "^local-path.*\\(default\\)" kubectl annotate storageclass flash-nfs storageclass.kubernetes.io/is-default-class=true --overwrite
unhealthy_pods=$(mktemp) kubectl get storageclass | grep -E "^flash-nfs.*\\(default\\)"
kubectl get pods -A --no-headers \ ! kubectl get storageclass | grep -E "^local-path.*\\(default\\)"
| grep -Ev "[[:space:]](Running|Completed)[[:space:]]" \ unhealthy_pods=$(mktemp)
| grep -Ev "^cattle-system[[:space:]]+helm-operation-" \ kubectl get pods -A --no-headers \
| grep -Ev "^cattle-capi-system[[:space:]]+capi-controller-manager-" \ | grep -Ev "[[:space:]](Running|Completed)[[:space:]]" \
| grep -Ev "^cattle-turtles-system[[:space:]]+cluster-api-operator-resources-cleanup-" \ | grep -Ev "^cattle-system[[:space:]]+helm-operation-" \
| grep -Ev "^kube-system[[:space:]]+helm-install-" \ | grep -Ev "^cattle-capi-system[[:space:]]+capi-controller-manager-" \
| tee "${unhealthy_pods}" || true | grep -Ev "^cattle-turtles-system[[:space:]]+cluster-api-operator-resources-cleanup-" \
test ! -s "${unhealthy_pods}" | grep -Ev "^kube-system[[:space:]]+helm-install-" \
kubectl -n kube-system get pods -o wide | tee "${unhealthy_pods}" || true
kubectl -n tailscale-system get pods -o wide test ! -s "${unhealthy_pods}"
kubectl -n external-secrets get pods -o wide kubectl -n kube-system get pods -o wide
' -e ansible_shell_executable=/bin/bash kubectl -n tailscale-system get pods -o wide
kubectl -n external-secrets get pods -o wide
EOF
chmod +x "${health_script}"
ansible -i inventory.ini 'control_plane[0]' -m script -a "${health_script}"
env: env:
ANSIBLE_HOST_KEY_CHECKING: "False" ANSIBLE_HOST_KEY_CHECKING: "False"