fix: pre-pull Flux controllers before bootstrap rollout
Deploy Cluster / Terraform (push) Successful in 28s
Deploy Cluster / Ansible (push) Failing after 16m39s

This commit is contained in:
2026-04-23 20:36:57 +00:00
parent 12675417bd
commit 31e95eb227
+46 -4
View File
@@ -191,15 +191,57 @@ jobs:
- name: Rewrite kubeconfig for runner-reachable API - name: Rewrite kubeconfig for runner-reachable API
working-directory: terraform working-directory: terraform
run: | run: |
set -euo pipefail
PRIMARY_IP=$(terraform output -raw primary_control_plane_ip) PRIMARY_IP=$(terraform output -raw primary_control_plane_ip)
sed -i "s#https://k8s-cluster-cp-1\.[^:]*:6443#https://${PRIMARY_IP}:6443#g" ../outputs/kubeconfig sed -i "s#https://k8s-cluster-cp-1\.[^:]*:6443#https://${PRIMARY_IP}:6443#g" ../outputs/kubeconfig
- name: Pre-pull Flux controller images on primary control plane
working-directory: terraform
run: |
set -euo pipefail
PRIMARY_IP=$(terraform output -raw primary_control_plane_ip)
ssh -o StrictHostKeyChecking=no "ubuntu@${PRIMARY_IP}" 'bash -s' <<'EOF'
set -euo pipefail
images=(
ghcr.io/fluxcd/source-controller:v1.8.0
ghcr.io/fluxcd/kustomize-controller:v1.8.1
ghcr.io/fluxcd/helm-controller:v1.5.1
ghcr.io/fluxcd/notification-controller:v1.8.1
)
for image in "${images[@]}"; do
for attempt in $(seq 1 12); do
if timeout 180s sudo /usr/local/bin/ctr -n k8s.io images pull "${image}"; then
break
fi
if [ "${attempt}" -eq 12 ]; then
echo "Failed to pre-pull ${image} after ${attempt} attempts" >&2
exit 1
fi
sleep 20
done
done
EOF
- name: Bootstrap Flux source and reconciliation graph - name: Bootstrap Flux source and reconciliation graph
env: env:
KUBECONFIG: outputs/kubeconfig KUBECONFIG: outputs/kubeconfig
FLUX_GIT_HOST: 64.176.189.59 FLUX_GIT_HOST: 64.176.189.59
FLUX_GIT_PORT: "2222" FLUX_GIT_PORT: "2222"
run: | run: |
set -euo pipefail
flux_rollout_status() {
local deployment="$1"
if ! kubectl -n flux-system rollout status "deployment/${deployment}" --timeout=900s; then
kubectl -n flux-system get pods -o wide
kubectl -n flux-system describe deployment "${deployment}"
kubectl -n flux-system describe pods -l "app=${deployment}"
exit 1
fi
}
kubectl create namespace flux-system --dry-run=client -o yaml | kubectl apply -f - kubectl create namespace flux-system --dry-run=client -o yaml | kubectl apply -f -
ssh-keyscan -p "${FLUX_GIT_PORT}" "${FLUX_GIT_HOST}" > /tmp/flux_known_hosts ssh-keyscan -p "${FLUX_GIT_PORT}" "${FLUX_GIT_HOST}" > /tmp/flux_known_hosts
kubectl -n flux-system create secret generic flux-system \ kubectl -n flux-system create secret generic flux-system \
@@ -220,15 +262,15 @@ jobs:
kubectl -n flux-system patch deployment kustomize-controller --type='merge' -p="$PATCH" kubectl -n flux-system patch deployment kustomize-controller --type='merge' -p="$PATCH"
kubectl -n flux-system patch deployment helm-controller --type='merge' -p="$PATCH" kubectl -n flux-system patch deployment helm-controller --type='merge' -p="$PATCH"
kubectl -n flux-system patch deployment notification-controller --type='merge' -p="$PATCH" kubectl -n flux-system patch deployment notification-controller --type='merge' -p="$PATCH"
kubectl -n flux-system rollout status deployment/source-controller --timeout=600s flux_rollout_status source-controller
kubectl -n flux-system rollout status deployment/kustomize-controller --timeout=600s flux_rollout_status kustomize-controller
kubectl -n flux-system rollout status deployment/helm-controller --timeout=600s flux_rollout_status helm-controller
kubectl -n flux-system wait --for=condition=Ready gitrepository/platform --timeout=300s kubectl -n flux-system wait --for=condition=Ready gitrepository/platform --timeout=300s
kubectl -n flux-system wait --for=condition=Ready kustomization/infrastructure --timeout=600s kubectl -n flux-system wait --for=condition=Ready kustomization/infrastructure --timeout=600s
# Wait for ESO CRDs and deployment directly instead of Flux Kustomization status # Wait for ESO CRDs and deployment directly instead of Flux Kustomization status
kubectl wait --for=condition=established --timeout=600s crd/clustersecretstores.external-secrets.io kubectl wait --for=condition=established --timeout=600s crd/clustersecretstores.external-secrets.io
kubectl wait --for=condition=established --timeout=600s crd/externalsecrets.external-secrets.io kubectl wait --for=condition=established --timeout=600s crd/externalsecrets.external-secrets.io
kubectl -n external-secrets rollout status deployment/external-secrets-external-secrets --timeout=600s kubectl -n external-secrets rollout status deployment/external-secrets --timeout=600s
# Create Doppler ClusterSecretStore now that ESO CRDs are available # Create Doppler ClusterSecretStore now that ESO CRDs are available
kubectl apply -f - <<'EOF' kubectl apply -f - <<'EOF'
apiVersion: external-secrets.io/v1 apiVersion: external-secrets.io/v1