diff --git a/.gitea/workflows/deploy.yml b/.gitea/workflows/deploy.yml index 210cee5..7be0b12 100644 --- a/.gitea/workflows/deploy.yml +++ b/.gitea/workflows/deploy.yml @@ -389,6 +389,40 @@ jobs: echo "Failed to prepare kube-vip image archive on runner" >&2 exit 1 + - name: Prepare bootstrap image archives + run: | + set -euo pipefail + archive_name() { + printf '%s' "$1" | tr '/:' '__' + } + + prepare_image_archive() { + local image="$1" + local archive="outputs/bootstrap-image-archives/$(archive_name "${image}").tar" + + mkdir -p outputs/bootstrap-image-archives + for attempt in 1 2 3; do + if skopeo copy "docker://${image}" "docker-archive:${archive}:${image}"; then + return 0 + fi + sleep 10 + done + + echo "Failed to prepare bootstrap image archive for ${image}" >&2 + return 1 + } + + for image in \ + ghcr.io/fluxcd/source-controller:v1.8.0 \ + ghcr.io/fluxcd/kustomize-controller:v1.8.1 \ + ghcr.io/fluxcd/helm-controller:v1.5.1 \ + ghcr.io/fluxcd/notification-controller:v1.8.1 \ + oci.external-secrets.io/external-secrets/external-secrets:v2.1.0 \ + ghcr.io/tailscale/k8s-operator:v1.96.5 \ + ghcr.io/tailscale/tailscale:v1.96.5; do + prepare_image_archive "${image}" + done + - name: Run Ansible Playbook working-directory: ansible run: | @@ -481,6 +515,26 @@ jobs: fi } + import_required_image() { + local image="$1" + local host_ip="$2" + local archive_name + local archive_path + archive_name="$(printf '%s' "${image}" | tr '/:' '__').tar" + archive_path="outputs/bootstrap-image-archives/${archive_name}" + + if [ ! -s "${archive_path}" ]; then + echo "Missing required bootstrap image archive ${archive_path} for ${image}" >&2 + exit 1 + fi + + echo "Importing ${image} archive on ${host_ip}" + scp -i "$HOME/.ssh/id_ed25519" -o StrictHostKeyChecking=no -o ConnectTimeout=10 \ + "${archive_path}" "ubuntu@${host_ip}:/tmp/${archive_name}" + ssh -i "$HOME/.ssh/id_ed25519" -o StrictHostKeyChecking=no -o ConnectTimeout=10 "ubuntu@${host_ip}" \ + "sudo k3s ctr -n k8s.io images import '/tmp/${archive_name}' && sudo k3s crictl inspecti '${image}' >/dev/null" + } + eso_diagnostics() { kubectl -n flux-system get kustomizations,ocirepositories,helmrepositories,helmcharts,helmreleases || true kubectl -n flux-system describe kustomization addon-external-secrets || true @@ -608,14 +662,12 @@ jobs: --from-file=known_hosts=/tmp/flux_known_hosts \ --dry-run=client -o yaml | kubectl apply -f - PRIMARY_CP_IP=$(python3 -c 'import json; print(json.load(open("outputs/terraform_outputs.json"))["primary_control_plane_ip"]["value"])') - FLUX_IMAGE_PULL_ATTEMPTS=45 - FLUX_IMAGE_PULL_SLEEP=10 for image in \ ghcr.io/fluxcd/source-controller:v1.8.0 \ ghcr.io/fluxcd/kustomize-controller:v1.8.1 \ ghcr.io/fluxcd/helm-controller:v1.5.1 \ ghcr.io/fluxcd/notification-controller:v1.8.1; do - pull_required_image "${image}" "${PRIMARY_CP_IP}" "${FLUX_IMAGE_PULL_ATTEMPTS}" "${FLUX_IMAGE_PULL_SLEEP}" "Failed to pre-pull required Flux image" + import_required_image "${image}" "${PRIMARY_CP_IP}" done # Apply CRDs and controllers first kubectl apply -f clusters/prod/flux-system/gotk-components.yaml @@ -640,7 +692,7 @@ jobs: # Wait directly on the ESO Helm objects; Kustomization readiness hides useful failure details. wait_for_resource flux-system kustomization.kustomize.toolkit.fluxcd.io/addon-external-secrets 600 kubectl -n flux-system annotate kustomization/addon-external-secrets reconcile.fluxcd.io/requestedAt="$(date +%s)" --overwrite - pull_required_image oci.external-secrets.io/external-secrets/external-secrets:v2.1.0 "${PRIMARY_CP_IP}" 45 10 "Failed to pre-pull required External Secrets image" + import_required_image oci.external-secrets.io/external-secrets/external-secrets:v2.1.0 "${PRIMARY_CP_IP}" wait_for_flux_oci_helm_release external-secrets external-secrets external-secrets 600s 600 wait_for_resource "" crd/clustersecretstores.external-secrets.io 900 wait_for_resource "" crd/externalsecrets.external-secrets.io 900 @@ -668,8 +720,8 @@ jobs: namespace: external-secrets EOF # Wait for the storage layer and private access components - pull_required_image ghcr.io/tailscale/k8s-operator:v1.96.5 "${PRIMARY_CP_IP}" 45 10 "Failed to pre-pull required Tailscale operator image" - pull_required_image ghcr.io/tailscale/tailscale:v1.96.5 "${PRIMARY_CP_IP}" 45 10 "Failed to pre-pull required Tailscale proxy image" + import_required_image ghcr.io/tailscale/k8s-operator:v1.96.5 "${PRIMARY_CP_IP}" + import_required_image ghcr.io/tailscale/tailscale:v1.96.5 "${PRIMARY_CP_IP}" wait_for_flux_helm_release tailscale flux-system-tailscale-operator tailscale-operator tailscale-system 600s 900s 900 kubectl -n tailscale-system rollout status deployment/operator --timeout=600s wait_for_flux_helm_release nfs-subdir-external-provisioner flux-system-nfs-subdir-external-provisioner nfs-subdir-external-provisioner kube-system 600s 600s 600