fix: wait for Rancher Flux resources before rollout
Deploy Cluster / Terraform (push) Successful in 27s
Deploy Cluster / Ansible (push) Failing after 39m43s

This commit is contained in:
2026-04-25 00:59:16 +00:00
parent cafa2fa0b3
commit 5523feb563
+50
View File
@@ -387,13 +387,63 @@ jobs:
KUBECONFIG: outputs/kubeconfig KUBECONFIG: outputs/kubeconfig
run: | run: |
set -euo pipefail set -euo pipefail
wait_for_resource() {
local namespace="$1"
local resource="$2"
local timeout_seconds="$3"
local elapsed=0
until {
if [ -n "${namespace}" ]; then
kubectl -n "${namespace}" get "${resource}" >/dev/null 2>&1
else
kubectl get "${resource}" >/dev/null 2>&1
fi
}; do
if [ "${elapsed}" -ge "${timeout_seconds}" ]; then
echo "Timed out waiting for ${resource} to exist" >&2
kubectl -n flux-system get kustomizations,helmrepositories,helmcharts,helmreleases || true
exit 1
fi
sleep 10
elapsed=$((elapsed + 10))
done
}
reconcile_helmrelease() {
local release_name="$1"
local reconcile_at
reconcile_at="$(date +%s)"
kubectl -n flux-system annotate "helmrelease/${release_name}" \
reconcile.fluxcd.io/requestedAt="${reconcile_at}" \
reconcile.fluxcd.io/resetAt="${reconcile_at}" \
reconcile.fluxcd.io/forceAt="${reconcile_at}" \
--overwrite
}
echo "Waiting for Rancher..." echo "Waiting for Rancher..."
wait_for_resource flux-system kustomization.kustomize.toolkit.fluxcd.io/addon-rancher 600
kubectl -n flux-system annotate kustomization/addon-rancher reconcile.fluxcd.io/requestedAt="$(date +%s)" --overwrite
wait_for_resource flux-system helmrelease.helm.toolkit.fluxcd.io/rancher 600
reconcile_helmrelease rancher
kubectl -n flux-system wait --for=condition=Ready helmrelease/rancher --timeout=1800s
wait_for_resource "" namespace/cattle-system 600
kubectl -n cattle-system rollout status deployment/cattle-system-rancher --timeout=900s kubectl -n cattle-system rollout status deployment/cattle-system-rancher --timeout=900s
kubectl -n cattle-system rollout status deployment/rancher-webhook --timeout=900s kubectl -n cattle-system rollout status deployment/rancher-webhook --timeout=900s
kubectl -n cattle-system wait --for=condition=Ready issuer/cattle-system-rancher --timeout=900s kubectl -n cattle-system wait --for=condition=Ready issuer/cattle-system-rancher --timeout=900s
kubectl -n cattle-system wait --for=condition=Ready certificate/tls-rancher-ingress --timeout=900s kubectl -n cattle-system wait --for=condition=Ready certificate/tls-rancher-ingress --timeout=900s
echo "Waiting for rancher-backup operator..." echo "Waiting for rancher-backup operator..."
wait_for_resource flux-system kustomization.kustomize.toolkit.fluxcd.io/addon-rancher-backup 600
kubectl -n flux-system annotate kustomization/addon-rancher-backup reconcile.fluxcd.io/requestedAt="$(date +%s)" --overwrite
wait_for_resource flux-system helmrelease.helm.toolkit.fluxcd.io/rancher-backup-crd 600
wait_for_resource flux-system helmrelease.helm.toolkit.fluxcd.io/rancher-backup 600
reconcile_helmrelease rancher-backup-crd
reconcile_helmrelease rancher-backup
kubectl -n flux-system wait --for=condition=Ready helmrelease/rancher-backup-crd --timeout=1200s
kubectl -n flux-system wait --for=condition=Ready helmrelease/rancher-backup --timeout=1200s
wait_for_resource "" namespace/cattle-resources-system 600
kubectl -n cattle-resources-system rollout status deployment/rancher-backup --timeout=900s kubectl -n cattle-resources-system rollout status deployment/rancher-backup --timeout=900s
- name: Restore Rancher from latest B2 backup - name: Restore Rancher from latest B2 backup