fix: cap long flux reconcile waits
This commit is contained in:
+74
-14
@@ -268,6 +268,24 @@ jobs:
|
|||||||
return 0
|
return 0
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
ready="$(kubectl -n "${namespace}" get "${resource}" -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' 2>/dev/null || true)"
|
||||||
|
healthy="$(kubectl -n "${namespace}" get "${resource}" -o jsonpath='{.status.conditions[?(@.type=="Healthy")].status}' 2>/dev/null || true)"
|
||||||
|
reconciling="$(kubectl -n "${namespace}" get "${resource}" -o jsonpath='{.status.conditions[?(@.type=="Reconciling")].status}' 2>/dev/null || true)"
|
||||||
|
generation="$(kubectl -n "${namespace}" get "${resource}" -o jsonpath='{.metadata.generation}' 2>/dev/null || true)"
|
||||||
|
observed_generation="$(kubectl -n "${namespace}" get "${resource}" -o jsonpath='{.status.observedGeneration}' 2>/dev/null || true)"
|
||||||
|
|
||||||
|
if [[ "${resource}" == helmrelease/* ]] && [ "${reconciling}" = "True" ]; then
|
||||||
|
echo "${resource} is actively reconciling; continuing without waiting for reconcile token ${reconcile_at}"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ "${ready}" = "True" ] && [ "${observed_generation}" = "${generation}" ]; then
|
||||||
|
if [ -z "${healthy}" ] || [ "${healthy}" = "True" ]; then
|
||||||
|
echo "${resource} is already Ready; continuing without waiting for reconcile token ${reconcile_at}"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
sleep 5
|
sleep 5
|
||||||
elapsed=$((elapsed + 5))
|
elapsed=$((elapsed + 5))
|
||||||
done
|
done
|
||||||
@@ -640,6 +658,24 @@ jobs:
|
|||||||
return 0
|
return 0
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
ready="$(kubectl -n "${namespace}" get "${resource}" -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' 2>/dev/null || true)"
|
||||||
|
healthy="$(kubectl -n "${namespace}" get "${resource}" -o jsonpath='{.status.conditions[?(@.type=="Healthy")].status}' 2>/dev/null || true)"
|
||||||
|
reconciling="$(kubectl -n "${namespace}" get "${resource}" -o jsonpath='{.status.conditions[?(@.type=="Reconciling")].status}' 2>/dev/null || true)"
|
||||||
|
generation="$(kubectl -n "${namespace}" get "${resource}" -o jsonpath='{.metadata.generation}' 2>/dev/null || true)"
|
||||||
|
observed_generation="$(kubectl -n "${namespace}" get "${resource}" -o jsonpath='{.status.observedGeneration}' 2>/dev/null || true)"
|
||||||
|
|
||||||
|
if [[ "${resource}" == helmrelease/* ]] && [ "${reconciling}" = "True" ]; then
|
||||||
|
echo "${resource} is actively reconciling; continuing without waiting for reconcile token ${reconcile_at}"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ "${ready}" = "True" ] && [ "${observed_generation}" = "${generation}" ]; then
|
||||||
|
if [ -z "${healthy}" ] || [ "${healthy}" = "True" ]; then
|
||||||
|
echo "${resource} is already Ready; continuing without waiting for reconcile token ${reconcile_at}"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
sleep 5
|
sleep 5
|
||||||
elapsed=$((elapsed + 5))
|
elapsed=$((elapsed + 5))
|
||||||
done
|
done
|
||||||
@@ -911,20 +947,20 @@ jobs:
|
|||||||
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-rancher-secrets --timeout=900s
|
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-rancher-secrets --timeout=900s
|
||||||
wait_for_rancher_bootstrap_secrets 900
|
wait_for_rancher_bootstrap_secrets 900
|
||||||
wait_for_resource flux-system kustomization.kustomize.toolkit.fluxcd.io/addon-rancher 600
|
wait_for_resource flux-system kustomization.kustomize.toolkit.fluxcd.io/addon-rancher 600
|
||||||
reconcile_flux_resource flux-system kustomization/addon-rancher 1800
|
reconcile_flux_resource flux-system kustomization/addon-rancher 600
|
||||||
wait_for_resource flux-system helmrelease.helm.toolkit.fluxcd.io/rancher 600
|
wait_for_resource flux-system helmrelease.helm.toolkit.fluxcd.io/rancher 600
|
||||||
reconcile_helmrelease rancher 300
|
reconcile_helmrelease rancher 300
|
||||||
wait_for_helmchart_ready flux-system-rancher rancher 180s 5
|
wait_for_helmchart_ready flux-system-rancher rancher 120s 3
|
||||||
wait_for_helmrelease_ready rancher cattle-system 900
|
wait_for_helmrelease_ready rancher cattle-system 600
|
||||||
wait_for_resource "" namespace/cattle-system 600
|
wait_for_resource "" namespace/cattle-system 600
|
||||||
wait_for_resource cattle-system deployment/cattle-system-rancher 600
|
wait_for_resource cattle-system deployment/cattle-system-rancher 600
|
||||||
kubectl -n cattle-system rollout status deployment/cattle-system-rancher --timeout=900s
|
kubectl -n cattle-system rollout status deployment/cattle-system-rancher --timeout=600s
|
||||||
wait_for_resource cattle-system deployment/rancher-webhook 900
|
wait_for_resource cattle-system deployment/rancher-webhook 600
|
||||||
kubectl -n cattle-system rollout status deployment/rancher-webhook --timeout=900s
|
kubectl -n cattle-system rollout status deployment/rancher-webhook --timeout=600s
|
||||||
wait_for_resource cattle-system issuer/cattle-system-rancher 900
|
wait_for_resource cattle-system issuer/cattle-system-rancher 600
|
||||||
wait_for_resource cattle-system certificate/tls-rancher-ingress 900
|
wait_for_resource cattle-system certificate/tls-rancher-ingress 600
|
||||||
kubectl -n cattle-system wait --for=condition=Ready issuer/cattle-system-rancher --timeout=900s
|
kubectl -n cattle-system wait --for=condition=Ready issuer/cattle-system-rancher --timeout=600s
|
||||||
kubectl -n cattle-system wait --for=condition=Ready certificate/tls-rancher-ingress --timeout=900s
|
kubectl -n cattle-system wait --for=condition=Ready certificate/tls-rancher-ingress --timeout=600s
|
||||||
|
|
||||||
- name: Reconcile observability stack
|
- name: Reconcile observability stack
|
||||||
env:
|
env:
|
||||||
@@ -1051,16 +1087,40 @@ jobs:
|
|||||||
wait_for_ocirepository_ready_or_cached() {
|
wait_for_ocirepository_ready_or_cached() {
|
||||||
local repository="$1"
|
local repository="$1"
|
||||||
local timeout="$2"
|
local timeout="$2"
|
||||||
local attempts="${3:-6}"
|
local attempts="${3:-3}"
|
||||||
local artifact_storage
|
local artifact_storage
|
||||||
local attempt
|
local attempt
|
||||||
|
local ready
|
||||||
|
|
||||||
for attempt in $(seq 1 "${attempts}"); do
|
for attempt in $(seq 1 "${attempts}"); do
|
||||||
reconcile_flux_resource "ocirepository/${repository}" 300
|
if ! kubectl get --raw=/readyz --request-timeout=10s >/dev/null 2>&1; then
|
||||||
|
echo "Kubernetes API is not ready while waiting for OCIRepository ${repository}; failing fast" >&2
|
||||||
|
kubectl -n kube-system get pods -o wide || true
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
ready="$(kubectl -n flux-system get "ocirepository/${repository}" -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' 2>/dev/null || true)"
|
||||||
|
if [ "${ready}" = "True" ]; then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
artifact_storage="$(kubectl -n flux-system get "ocirepository/${repository}" -o jsonpath='{.status.conditions[?(@.type=="ArtifactInStorage")].status}' 2>/dev/null || true)"
|
||||||
|
if [ "${artifact_storage}" = "True" ]; then
|
||||||
|
echo "OCIRepository ${repository} is not currently Ready; continuing with cached artifact" >&2
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
reconcile_flux_resource "ocirepository/${repository}" 120
|
||||||
if kubectl -n flux-system wait --for=condition=Ready "ocirepository/${repository}" --timeout="${timeout}"; then
|
if kubectl -n flux-system wait --for=condition=Ready "ocirepository/${repository}" --timeout="${timeout}"; then
|
||||||
return 0
|
return 0
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
if ! kubectl get --raw=/readyz --request-timeout=10s >/dev/null 2>&1; then
|
||||||
|
echo "Kubernetes API became unavailable while waiting for OCIRepository ${repository}; failing fast" >&2
|
||||||
|
kubectl -n kube-system get pods -o wide || true
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
artifact_storage="$(kubectl -n flux-system get "ocirepository/${repository}" -o jsonpath='{.status.conditions[?(@.type=="ArtifactInStorage")].status}' 2>/dev/null || true)"
|
artifact_storage="$(kubectl -n flux-system get "ocirepository/${repository}" -o jsonpath='{.status.conditions[?(@.type=="ArtifactInStorage")].status}' 2>/dev/null || true)"
|
||||||
if [ "${artifact_storage}" = "True" ]; then
|
if [ "${artifact_storage}" = "True" ]; then
|
||||||
echo "OCIRepository ${repository} is not currently Ready; continuing with cached artifact" >&2
|
echo "OCIRepository ${repository} is not currently Ready; continuing with cached artifact" >&2
|
||||||
@@ -1117,8 +1177,8 @@ jobs:
|
|||||||
wait_for_flux_ready kustomization/addon-observability 300s
|
wait_for_flux_ready kustomization/addon-observability 300s
|
||||||
wait_for_resource flux-system ocirepository.source.toolkit.fluxcd.io/loki 300
|
wait_for_resource flux-system ocirepository.source.toolkit.fluxcd.io/loki 300
|
||||||
wait_for_resource flux-system ocirepository.source.toolkit.fluxcd.io/promtail 300
|
wait_for_resource flux-system ocirepository.source.toolkit.fluxcd.io/promtail 300
|
||||||
wait_for_ocirepository_ready_or_cached loki 300s
|
wait_for_ocirepository_ready_or_cached loki 90s 3
|
||||||
wait_for_ocirepository_ready_or_cached promtail 300s
|
wait_for_ocirepository_ready_or_cached promtail 90s 3
|
||||||
for release in kube-prometheus-stack loki promtail; do
|
for release in kube-prometheus-stack loki promtail; do
|
||||||
wait_for_resource flux-system "helmrelease.helm.toolkit.fluxcd.io/${release}" 300
|
wait_for_resource flux-system "helmrelease.helm.toolkit.fluxcd.io/${release}" 300
|
||||||
request_helmrelease_reconcile "${release}"
|
request_helmrelease_reconcile "${release}"
|
||||||
|
|||||||
Reference in New Issue
Block a user