diff --git a/.gitea/workflows/deploy.yml b/.gitea/workflows/deploy.yml index 5f8e620..74648d1 100644 --- a/.gitea/workflows/deploy.yml +++ b/.gitea/workflows/deploy.yml @@ -191,15 +191,57 @@ jobs: - name: Rewrite kubeconfig for runner-reachable API working-directory: terraform run: | + set -euo pipefail PRIMARY_IP=$(terraform output -raw primary_control_plane_ip) sed -i "s#https://k8s-cluster-cp-1\.[^:]*:6443#https://${PRIMARY_IP}:6443#g" ../outputs/kubeconfig + - name: Pre-pull Flux controller images on primary control plane + working-directory: terraform + run: | + set -euo pipefail + PRIMARY_IP=$(terraform output -raw primary_control_plane_ip) + ssh -o StrictHostKeyChecking=no "ubuntu@${PRIMARY_IP}" 'bash -s' <<'EOF' + set -euo pipefail + images=( + ghcr.io/fluxcd/source-controller:v1.8.0 + ghcr.io/fluxcd/kustomize-controller:v1.8.1 + ghcr.io/fluxcd/helm-controller:v1.5.1 + ghcr.io/fluxcd/notification-controller:v1.8.1 + ) + + for image in "${images[@]}"; do + for attempt in $(seq 1 12); do + if timeout 180s sudo /usr/local/bin/ctr -n k8s.io images pull "${image}"; then + break + fi + + if [ "${attempt}" -eq 12 ]; then + echo "Failed to pre-pull ${image} after ${attempt} attempts" >&2 + exit 1 + fi + + sleep 20 + done + done + EOF + - name: Bootstrap Flux source and reconciliation graph env: KUBECONFIG: outputs/kubeconfig FLUX_GIT_HOST: 64.176.189.59 FLUX_GIT_PORT: "2222" run: | + set -euo pipefail + flux_rollout_status() { + local deployment="$1" + if ! kubectl -n flux-system rollout status "deployment/${deployment}" --timeout=900s; then + kubectl -n flux-system get pods -o wide + kubectl -n flux-system describe deployment "${deployment}" + kubectl -n flux-system describe pods -l "app=${deployment}" + exit 1 + fi + } + kubectl create namespace flux-system --dry-run=client -o yaml | kubectl apply -f - ssh-keyscan -p "${FLUX_GIT_PORT}" "${FLUX_GIT_HOST}" > /tmp/flux_known_hosts kubectl -n flux-system create secret generic flux-system \ @@ -220,15 +262,15 @@ jobs: kubectl -n flux-system patch deployment kustomize-controller --type='merge' -p="$PATCH" kubectl -n flux-system patch deployment helm-controller --type='merge' -p="$PATCH" kubectl -n flux-system patch deployment notification-controller --type='merge' -p="$PATCH" - kubectl -n flux-system rollout status deployment/source-controller --timeout=600s - kubectl -n flux-system rollout status deployment/kustomize-controller --timeout=600s - kubectl -n flux-system rollout status deployment/helm-controller --timeout=600s + flux_rollout_status source-controller + flux_rollout_status kustomize-controller + flux_rollout_status helm-controller kubectl -n flux-system wait --for=condition=Ready gitrepository/platform --timeout=300s kubectl -n flux-system wait --for=condition=Ready kustomization/infrastructure --timeout=600s # Wait for ESO CRDs and deployment directly instead of Flux Kustomization status kubectl wait --for=condition=established --timeout=600s crd/clustersecretstores.external-secrets.io kubectl wait --for=condition=established --timeout=600s crd/externalsecrets.external-secrets.io - kubectl -n external-secrets rollout status deployment/external-secrets-external-secrets --timeout=600s + kubectl -n external-secrets rollout status deployment/external-secrets --timeout=600s # Create Doppler ClusterSecretStore now that ESO CRDs are available kubectl apply -f - <<'EOF' apiVersion: external-secrets.io/v1