fix: cap long flux reconcile waits
This commit is contained in:
+74
-14
@@ -268,6 +268,24 @@ jobs:
|
||||
return 0
|
||||
fi
|
||||
|
||||
ready="$(kubectl -n "${namespace}" get "${resource}" -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' 2>/dev/null || true)"
|
||||
healthy="$(kubectl -n "${namespace}" get "${resource}" -o jsonpath='{.status.conditions[?(@.type=="Healthy")].status}' 2>/dev/null || true)"
|
||||
reconciling="$(kubectl -n "${namespace}" get "${resource}" -o jsonpath='{.status.conditions[?(@.type=="Reconciling")].status}' 2>/dev/null || true)"
|
||||
generation="$(kubectl -n "${namespace}" get "${resource}" -o jsonpath='{.metadata.generation}' 2>/dev/null || true)"
|
||||
observed_generation="$(kubectl -n "${namespace}" get "${resource}" -o jsonpath='{.status.observedGeneration}' 2>/dev/null || true)"
|
||||
|
||||
if [[ "${resource}" == helmrelease/* ]] && [ "${reconciling}" = "True" ]; then
|
||||
echo "${resource} is actively reconciling; continuing without waiting for reconcile token ${reconcile_at}"
|
||||
return 0
|
||||
fi
|
||||
|
||||
if [ "${ready}" = "True" ] && [ "${observed_generation}" = "${generation}" ]; then
|
||||
if [ -z "${healthy}" ] || [ "${healthy}" = "True" ]; then
|
||||
echo "${resource} is already Ready; continuing without waiting for reconcile token ${reconcile_at}"
|
||||
return 0
|
||||
fi
|
||||
fi
|
||||
|
||||
sleep 5
|
||||
elapsed=$((elapsed + 5))
|
||||
done
|
||||
@@ -640,6 +658,24 @@ jobs:
|
||||
return 0
|
||||
fi
|
||||
|
||||
ready="$(kubectl -n "${namespace}" get "${resource}" -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' 2>/dev/null || true)"
|
||||
healthy="$(kubectl -n "${namespace}" get "${resource}" -o jsonpath='{.status.conditions[?(@.type=="Healthy")].status}' 2>/dev/null || true)"
|
||||
reconciling="$(kubectl -n "${namespace}" get "${resource}" -o jsonpath='{.status.conditions[?(@.type=="Reconciling")].status}' 2>/dev/null || true)"
|
||||
generation="$(kubectl -n "${namespace}" get "${resource}" -o jsonpath='{.metadata.generation}' 2>/dev/null || true)"
|
||||
observed_generation="$(kubectl -n "${namespace}" get "${resource}" -o jsonpath='{.status.observedGeneration}' 2>/dev/null || true)"
|
||||
|
||||
if [[ "${resource}" == helmrelease/* ]] && [ "${reconciling}" = "True" ]; then
|
||||
echo "${resource} is actively reconciling; continuing without waiting for reconcile token ${reconcile_at}"
|
||||
return 0
|
||||
fi
|
||||
|
||||
if [ "${ready}" = "True" ] && [ "${observed_generation}" = "${generation}" ]; then
|
||||
if [ -z "${healthy}" ] || [ "${healthy}" = "True" ]; then
|
||||
echo "${resource} is already Ready; continuing without waiting for reconcile token ${reconcile_at}"
|
||||
return 0
|
||||
fi
|
||||
fi
|
||||
|
||||
sleep 5
|
||||
elapsed=$((elapsed + 5))
|
||||
done
|
||||
@@ -911,20 +947,20 @@ jobs:
|
||||
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-rancher-secrets --timeout=900s
|
||||
wait_for_rancher_bootstrap_secrets 900
|
||||
wait_for_resource flux-system kustomization.kustomize.toolkit.fluxcd.io/addon-rancher 600
|
||||
reconcile_flux_resource flux-system kustomization/addon-rancher 1800
|
||||
reconcile_flux_resource flux-system kustomization/addon-rancher 600
|
||||
wait_for_resource flux-system helmrelease.helm.toolkit.fluxcd.io/rancher 600
|
||||
reconcile_helmrelease rancher 300
|
||||
wait_for_helmchart_ready flux-system-rancher rancher 180s 5
|
||||
wait_for_helmrelease_ready rancher cattle-system 900
|
||||
wait_for_helmchart_ready flux-system-rancher rancher 120s 3
|
||||
wait_for_helmrelease_ready rancher cattle-system 600
|
||||
wait_for_resource "" namespace/cattle-system 600
|
||||
wait_for_resource cattle-system deployment/cattle-system-rancher 600
|
||||
kubectl -n cattle-system rollout status deployment/cattle-system-rancher --timeout=900s
|
||||
wait_for_resource cattle-system deployment/rancher-webhook 900
|
||||
kubectl -n cattle-system rollout status deployment/rancher-webhook --timeout=900s
|
||||
wait_for_resource cattle-system issuer/cattle-system-rancher 900
|
||||
wait_for_resource cattle-system certificate/tls-rancher-ingress 900
|
||||
kubectl -n cattle-system wait --for=condition=Ready issuer/cattle-system-rancher --timeout=900s
|
||||
kubectl -n cattle-system wait --for=condition=Ready certificate/tls-rancher-ingress --timeout=900s
|
||||
kubectl -n cattle-system rollout status deployment/cattle-system-rancher --timeout=600s
|
||||
wait_for_resource cattle-system deployment/rancher-webhook 600
|
||||
kubectl -n cattle-system rollout status deployment/rancher-webhook --timeout=600s
|
||||
wait_for_resource cattle-system issuer/cattle-system-rancher 600
|
||||
wait_for_resource cattle-system certificate/tls-rancher-ingress 600
|
||||
kubectl -n cattle-system wait --for=condition=Ready issuer/cattle-system-rancher --timeout=600s
|
||||
kubectl -n cattle-system wait --for=condition=Ready certificate/tls-rancher-ingress --timeout=600s
|
||||
|
||||
- name: Reconcile observability stack
|
||||
env:
|
||||
@@ -1051,16 +1087,40 @@ jobs:
|
||||
wait_for_ocirepository_ready_or_cached() {
|
||||
local repository="$1"
|
||||
local timeout="$2"
|
||||
local attempts="${3:-6}"
|
||||
local attempts="${3:-3}"
|
||||
local artifact_storage
|
||||
local attempt
|
||||
local ready
|
||||
|
||||
for attempt in $(seq 1 "${attempts}"); do
|
||||
reconcile_flux_resource "ocirepository/${repository}" 300
|
||||
if ! kubectl get --raw=/readyz --request-timeout=10s >/dev/null 2>&1; then
|
||||
echo "Kubernetes API is not ready while waiting for OCIRepository ${repository}; failing fast" >&2
|
||||
kubectl -n kube-system get pods -o wide || true
|
||||
exit 1
|
||||
fi
|
||||
|
||||
ready="$(kubectl -n flux-system get "ocirepository/${repository}" -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' 2>/dev/null || true)"
|
||||
if [ "${ready}" = "True" ]; then
|
||||
return 0
|
||||
fi
|
||||
|
||||
artifact_storage="$(kubectl -n flux-system get "ocirepository/${repository}" -o jsonpath='{.status.conditions[?(@.type=="ArtifactInStorage")].status}' 2>/dev/null || true)"
|
||||
if [ "${artifact_storage}" = "True" ]; then
|
||||
echo "OCIRepository ${repository} is not currently Ready; continuing with cached artifact" >&2
|
||||
return 0
|
||||
fi
|
||||
|
||||
reconcile_flux_resource "ocirepository/${repository}" 120
|
||||
if kubectl -n flux-system wait --for=condition=Ready "ocirepository/${repository}" --timeout="${timeout}"; then
|
||||
return 0
|
||||
fi
|
||||
|
||||
if ! kubectl get --raw=/readyz --request-timeout=10s >/dev/null 2>&1; then
|
||||
echo "Kubernetes API became unavailable while waiting for OCIRepository ${repository}; failing fast" >&2
|
||||
kubectl -n kube-system get pods -o wide || true
|
||||
exit 1
|
||||
fi
|
||||
|
||||
artifact_storage="$(kubectl -n flux-system get "ocirepository/${repository}" -o jsonpath='{.status.conditions[?(@.type=="ArtifactInStorage")].status}' 2>/dev/null || true)"
|
||||
if [ "${artifact_storage}" = "True" ]; then
|
||||
echo "OCIRepository ${repository} is not currently Ready; continuing with cached artifact" >&2
|
||||
@@ -1117,8 +1177,8 @@ jobs:
|
||||
wait_for_flux_ready kustomization/addon-observability 300s
|
||||
wait_for_resource flux-system ocirepository.source.toolkit.fluxcd.io/loki 300
|
||||
wait_for_resource flux-system ocirepository.source.toolkit.fluxcd.io/promtail 300
|
||||
wait_for_ocirepository_ready_or_cached loki 300s
|
||||
wait_for_ocirepository_ready_or_cached promtail 300s
|
||||
wait_for_ocirepository_ready_or_cached loki 90s 3
|
||||
wait_for_ocirepository_ready_or_cached promtail 90s 3
|
||||
for release in kube-prometheus-stack loki promtail; do
|
||||
wait_for_resource flux-system "helmrelease.helm.toolkit.fluxcd.io/${release}" 300
|
||||
request_helmrelease_reconcile "${release}"
|
||||
|
||||
Reference in New Issue
Block a user