diff --git a/.gitea/workflows/deploy.yml b/.gitea/workflows/deploy.yml index 8128641..c8606b6 100644 --- a/.gitea/workflows/deploy.yml +++ b/.gitea/workflows/deploy.yml @@ -1137,21 +1137,67 @@ jobs: cat >"${health_script}" <<'EOF' #!/usr/bin/env bash set -euo pipefail + + wait_for_rancher_webhook_if_present() { + local elapsed=0 + local endpoint_ip + + if ! kubectl -n cattle-system get svc/rancher-webhook >/dev/null 2>&1; then + return 0 + fi + + kubectl -n cattle-system rollout status deployment/rancher-webhook --timeout=300s + while [ "${elapsed}" -lt 300 ]; do + endpoint_ip="$(kubectl -n cattle-system get endpoints/rancher-webhook -o jsonpath='{.subsets[0].addresses[0].ip}' 2>/dev/null || true)" + if [ -n "${endpoint_ip}" ]; then + return 0 + fi + sleep 10 + elapsed=$((elapsed + 10)) + done + + kubectl -n cattle-system describe svc/rancher-webhook || true + kubectl -n cattle-system get endpoints/rancher-webhook -o yaml || true + return 1 + } + + wait_for_kustomization_ready() { + local name="$1" + local timeout="$2" + local message + + if kubectl -n flux-system wait --for=condition=Ready "kustomization/${name}" --timeout="${timeout}"; then + return 0 + fi + + message="$(kubectl -n flux-system get "kustomization/${name}" -o jsonpath='{.status.conditions[?(@.type=="Ready")].message}' 2>/dev/null || true)" + if printf '%s' "${message}" | grep -q 'rancher-webhook'; then + echo "Kustomization ${name} is blocked by Rancher webhook admission; waiting for webhook endpoints and retrying" + wait_for_rancher_webhook_if_present + kubectl -n flux-system annotate "kustomization/${name}" reconcile.fluxcd.io/requestedAt="$(date +%s)" --overwrite >/dev/null + kubectl -n flux-system wait --for=condition=Ready "kustomization/${name}" --timeout="${timeout}" + return 0 + fi + + kubectl -n flux-system describe "kustomization/${name}" || true + return 1 + } + kubectl get nodes -o wide kubectl -n flux-system get gitrepositories,kustomizations,helmreleases,ocirepositories - kubectl -n flux-system wait --for=condition=Ready kustomization/infrastructure --timeout=300s - kubectl -n flux-system wait --for=condition=Ready kustomization/addon-cert-manager --timeout=300s - kubectl -n flux-system wait --for=condition=Ready kustomization/addon-external-secrets --timeout=300s - kubectl -n flux-system wait --for=condition=Ready kustomization/addon-external-secrets-store --timeout=300s - kubectl -n flux-system wait --for=condition=Ready kustomization/addon-nfs-storage --timeout=300s - kubectl -n flux-system wait --for=condition=Ready kustomization/addon-tailscale-operator --timeout=300s - kubectl -n flux-system wait --for=condition=Ready kustomization/addon-tailscale-proxyclass --timeout=300s - kubectl -n flux-system wait --for=condition=Ready kustomization/addon-rancher-secrets --timeout=300s - kubectl -n flux-system wait --for=condition=Ready kustomization/addon-rancher --timeout=900s - kubectl -n flux-system wait --for=condition=Ready kustomization/addon-rancher-config --timeout=300s - kubectl -n flux-system wait --for=condition=Ready kustomization/addon-observability-secrets --timeout=300s - kubectl -n flux-system wait --for=condition=Ready kustomization/addon-observability --timeout=300s - kubectl -n flux-system wait --for=condition=Ready kustomization/addon-observability-content --timeout=300s + wait_for_kustomization_ready infrastructure 300s + wait_for_kustomization_ready addon-cert-manager 300s + wait_for_kustomization_ready addon-external-secrets 300s + wait_for_kustomization_ready addon-external-secrets-store 300s + wait_for_kustomization_ready addon-nfs-storage 300s + wait_for_kustomization_ready addon-tailscale-operator 300s + wait_for_kustomization_ready addon-tailscale-proxyclass 300s + wait_for_kustomization_ready addon-rancher-secrets 300s + wait_for_kustomization_ready addon-rancher 900s + wait_for_kustomization_ready addon-rancher-config 300s + wait_for_kustomization_ready addon-observability-secrets 300s + wait_for_kustomization_ready addon-observability 300s + wait_for_kustomization_ready addon-observability-content 300s kubectl -n flux-system wait --for=condition=Ready helmrelease --all --timeout=600s kubectl annotate storageclass local-path storageclass.kubernetes.io/is-default-class=false --overwrite kubectl annotate storageclass flash-nfs storageclass.kubernetes.io/is-default-class=true --overwrite