fix: wait on Rancher and storage runtime objects during bootstrap
Flux can leave HelmRelease and Kustomization conditions stale after transient chart fetch or image pull failures even when the underlying workloads recover. Switch the deploy workflow to wait on the concrete runtime resources we care about: the NFS provisioner deployment and StorageClass, Rancher deployment, webhook, cert-manager issuer/certificate, and the rancher-backup deployment.
This commit is contained in:
@@ -249,10 +249,9 @@ jobs:
|
|||||||
reconcile.fluxcd.io/resetAt="$TS" \
|
reconcile.fluxcd.io/resetAt="$TS" \
|
||||||
reconcile.fluxcd.io/forceAt="$TS" \
|
reconcile.fluxcd.io/forceAt="$TS" \
|
||||||
--overwrite || true
|
--overwrite || true
|
||||||
kubectl -n flux-system annotate kustomization/addon-nfs-storage reconcile.fluxcd.io/requestedAt="$TS" --overwrite || true
|
|
||||||
kubectl -n flux-system wait --for=condition=Ready helmrelease/nfs-subdir-external-provisioner --timeout=600s
|
|
||||||
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-nfs-storage --timeout=600s
|
|
||||||
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-tailscale-operator --timeout=300s
|
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-tailscale-operator --timeout=300s
|
||||||
|
kubectl -n kube-system rollout status deployment/kube-system-nfs-subdir-external-provisioner --timeout=600s
|
||||||
|
kubectl get storageclass flash-nfs
|
||||||
|
|
||||||
- name: Wait for Rancher and backup operator
|
- name: Wait for Rancher and backup operator
|
||||||
env:
|
env:
|
||||||
@@ -265,13 +264,20 @@ jobs:
|
|||||||
reconcile.fluxcd.io/resetAt="$TS" \
|
reconcile.fluxcd.io/resetAt="$TS" \
|
||||||
reconcile.fluxcd.io/forceAt="$TS" \
|
reconcile.fluxcd.io/forceAt="$TS" \
|
||||||
--overwrite || true
|
--overwrite || true
|
||||||
|
kubectl -n flux-system annotate helmrelease/rancher-backup \
|
||||||
|
reconcile.fluxcd.io/requestedAt="$TS" \
|
||||||
|
reconcile.fluxcd.io/resetAt="$TS" \
|
||||||
|
reconcile.fluxcd.io/forceAt="$TS" \
|
||||||
|
--overwrite || true
|
||||||
|
|
||||||
echo "Waiting for Rancher..."
|
echo "Waiting for Rancher..."
|
||||||
kubectl -n flux-system wait --for=condition=Ready helmrelease/rancher --timeout=900s
|
kubectl -n cattle-system rollout status deployment/cattle-system-rancher --timeout=900s
|
||||||
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-rancher --timeout=900s
|
kubectl -n cattle-system rollout status deployment/rancher-webhook --timeout=900s
|
||||||
|
kubectl -n cattle-system wait --for=condition=Ready issuer/cattle-system-rancher --timeout=900s
|
||||||
|
kubectl -n cattle-system wait --for=condition=Ready certificate/tls-rancher-ingress --timeout=900s
|
||||||
|
|
||||||
echo "Waiting for rancher-backup operator..."
|
echo "Waiting for rancher-backup operator..."
|
||||||
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-rancher-backup --timeout=600s || true
|
kubectl -n cattle-resources-system rollout status deployment/rancher-backup --timeout=900s
|
||||||
|
|
||||||
- name: Restore Rancher from latest B2 backup
|
- name: Restore Rancher from latest B2 backup
|
||||||
env:
|
env:
|
||||||
|
|||||||
Reference in New Issue
Block a user