fix: retry health checks after rancher webhook startup
Deploy Cluster / Terraform (push) Waiting to run
Deploy Cluster / Ansible (push) Blocked by required conditions

This commit is contained in:
2026-05-03 22:54:18 +00:00
parent 55c7a4576c
commit 10e4390eb3
+59 -13
View File
@@ -1137,21 +1137,67 @@ jobs:
cat >"${health_script}" <<'EOF'
#!/usr/bin/env bash
set -euo pipefail
wait_for_rancher_webhook_if_present() {
local elapsed=0
local endpoint_ip
if ! kubectl -n cattle-system get svc/rancher-webhook >/dev/null 2>&1; then
return 0
fi
kubectl -n cattle-system rollout status deployment/rancher-webhook --timeout=300s
while [ "${elapsed}" -lt 300 ]; do
endpoint_ip="$(kubectl -n cattle-system get endpoints/rancher-webhook -o jsonpath='{.subsets[0].addresses[0].ip}' 2>/dev/null || true)"
if [ -n "${endpoint_ip}" ]; then
return 0
fi
sleep 10
elapsed=$((elapsed + 10))
done
kubectl -n cattle-system describe svc/rancher-webhook || true
kubectl -n cattle-system get endpoints/rancher-webhook -o yaml || true
return 1
}
wait_for_kustomization_ready() {
local name="$1"
local timeout="$2"
local message
if kubectl -n flux-system wait --for=condition=Ready "kustomization/${name}" --timeout="${timeout}"; then
return 0
fi
message="$(kubectl -n flux-system get "kustomization/${name}" -o jsonpath='{.status.conditions[?(@.type=="Ready")].message}' 2>/dev/null || true)"
if printf '%s' "${message}" | grep -q 'rancher-webhook'; then
echo "Kustomization ${name} is blocked by Rancher webhook admission; waiting for webhook endpoints and retrying"
wait_for_rancher_webhook_if_present
kubectl -n flux-system annotate "kustomization/${name}" reconcile.fluxcd.io/requestedAt="$(date +%s)" --overwrite >/dev/null
kubectl -n flux-system wait --for=condition=Ready "kustomization/${name}" --timeout="${timeout}"
return 0
fi
kubectl -n flux-system describe "kustomization/${name}" || true
return 1
}
kubectl get nodes -o wide
kubectl -n flux-system get gitrepositories,kustomizations,helmreleases,ocirepositories
kubectl -n flux-system wait --for=condition=Ready kustomization/infrastructure --timeout=300s
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-cert-manager --timeout=300s
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-external-secrets --timeout=300s
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-external-secrets-store --timeout=300s
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-nfs-storage --timeout=300s
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-tailscale-operator --timeout=300s
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-tailscale-proxyclass --timeout=300s
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-rancher-secrets --timeout=300s
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-rancher --timeout=900s
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-rancher-config --timeout=300s
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-observability-secrets --timeout=300s
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-observability --timeout=300s
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-observability-content --timeout=300s
wait_for_kustomization_ready infrastructure 300s
wait_for_kustomization_ready addon-cert-manager 300s
wait_for_kustomization_ready addon-external-secrets 300s
wait_for_kustomization_ready addon-external-secrets-store 300s
wait_for_kustomization_ready addon-nfs-storage 300s
wait_for_kustomization_ready addon-tailscale-operator 300s
wait_for_kustomization_ready addon-tailscale-proxyclass 300s
wait_for_kustomization_ready addon-rancher-secrets 300s
wait_for_kustomization_ready addon-rancher 900s
wait_for_kustomization_ready addon-rancher-config 300s
wait_for_kustomization_ready addon-observability-secrets 300s
wait_for_kustomization_ready addon-observability 300s
wait_for_kustomization_ready addon-observability-content 300s
kubectl -n flux-system wait --for=condition=Ready helmrelease --all --timeout=600s
kubectl annotate storageclass local-path storageclass.kubernetes.io/is-default-class=false --overwrite
kubectl annotate storageclass flash-nfs storageclass.kubernetes.io/is-default-class=true --overwrite