fix: remove runner image archive path
Deploy Cluster / Terraform (push) Successful in 4m16s
Deploy Cluster / Ansible (push) Failing after 13m57s

This commit is contained in:
2026-05-02 00:41:25 +00:00
parent df3d49c0d4
commit 17182f84a9
7 changed files with 7 additions and 270 deletions
-135
View File
@@ -176,80 +176,10 @@ jobs:
- name: Install Ansible Collections
run: ansible-galaxy collection install -r ansible/requirements.yml
- name: Install skopeo
run: |
apt-get update
apt-get install -y skopeo
- name: Generate Ansible Inventory
working-directory: ansible
run: python3 generate_inventory.py
- name: Prepare kube-vip image archive
run: |
set -euo pipefail
mkdir -p outputs
for attempt in 1 2 3; do
if skopeo copy \
docker://ghcr.io/kube-vip/kube-vip:v1.1.2 \
docker-archive:outputs/kube-vip-bootstrap.tar:ghcr.io/kube-vip/kube-vip:v1.1.2; then
exit 0
fi
sleep 10
done
echo "Failed to prepare kube-vip image archive on runner" >&2
exit 1
- name: Prepare bootstrap image archives
run: |
set -euo pipefail
archive_name() {
printf '%s' "$1" | tr '/:' '__'
}
prepare_image_archive() {
local image="$1"
local archive="outputs/bootstrap-image-archives/$(archive_name "${image}").tar"
mkdir -p outputs/bootstrap-image-archives
for attempt in 1 2 3; do
if skopeo copy "docker://${image}" "docker-archive:${archive}:${image}"; then
return 0
fi
sleep 10
done
echo "Failed to prepare bootstrap image archive for ${image}" >&2
return 1
}
for image in \
ghcr.io/fluxcd/source-controller:v1.8.0 \
ghcr.io/fluxcd/kustomize-controller:v1.8.1 \
ghcr.io/fluxcd/helm-controller:v1.5.1 \
ghcr.io/fluxcd/notification-controller:v1.8.1 \
docker.io/rancher/mirrored-coredns-coredns:1.14.2 \
docker.io/rancher/mirrored-metrics-server:v0.8.1 \
docker.io/rancher/local-path-provisioner:v0.0.35 \
docker.io/rancher/mirrored-library-traefik:3.6.10 \
docker.io/rancher/klipper-helm:v0.9.14-build20260309 \
oci.external-secrets.io/external-secrets/external-secrets:v2.1.0 \
ghcr.io/tailscale/k8s-operator:v1.96.5 \
ghcr.io/tailscale/tailscale:v1.96.5 \
registry.k8s.io/sig-storage/nfs-subdir-external-provisioner:v4.0.2 \
docker.io/rancher/mirrored-pause:3.6 \
docker.io/rancher/rancher:v2.13.3 \
docker.io/rancher/rancher-webhook:v0.9.3 \
docker.io/rancher/system-upgrade-controller:v0.17.0 \
docker.io/rancher/shell:v0.6.2 \
quay.io/jetstack/cert-manager-controller:v1.17.2 \
quay.io/jetstack/cert-manager-cainjector:v1.17.2 \
quay.io/jetstack/cert-manager-webhook:v1.17.2 \
quay.io/jetstack/cert-manager-startupapicheck:v1.17.2 \
docker.io/library/busybox:1.31.1; do
prepare_image_archive "${image}"
done
- name: Run Ansible Playbook
working-directory: ansible
run: |
@@ -389,49 +319,6 @@ jobs:
fi
}
import_required_image() {
local image="$1"
local host_ip="$2"
local archive_name
local archive_path
archive_name="$(printf '%s' "${image}" | tr '/:' '__').tar"
archive_path="outputs/bootstrap-image-archives/${archive_name}"
if [ ! -s "${archive_path}" ]; then
echo "Missing required bootstrap image archive ${archive_path} for ${image}" >&2
exit 1
fi
if ssh -i "$HOME/.ssh/id_ed25519" -o StrictHostKeyChecking=no -o ConnectTimeout=10 "ubuntu@${host_ip}" \
"sudo k3s crictl inspecti '${image}' >/dev/null 2>&1"; then
return 0
fi
echo "Importing ${image} archive on ${host_ip}"
timeout 180s scp -i "$HOME/.ssh/id_ed25519" -o StrictHostKeyChecking=no -o ConnectTimeout=10 -o ServerAliveInterval=15 -o ServerAliveCountMax=4 \
"${archive_path}" "ubuntu@${host_ip}:/tmp/${archive_name}"
timeout 300s ssh -i "$HOME/.ssh/id_ed25519" -o StrictHostKeyChecking=no -o ConnectTimeout=10 -o ServerAliveInterval=15 -o ServerAliveCountMax=4 "ubuntu@${host_ip}" \
"set -euo pipefail; \
if sudo k3s crictl inspecti '${image}' >/dev/null 2>&1; then exit 0; fi; \
for attempt in 1 2 3 4 5; do \
echo 'Importing ${image} archive with ctr'; \
if sudo k3s ctr -n k8s.io images import '/tmp/${archive_name}' && sudo k3s crictl inspecti '${image}' >/dev/null; then exit 0; fi; \
sleep 10; \
done; \
sudo systemctl status k3s --no-pager -l || true; \
sudo journalctl -u k3s -n 80 --no-pager || true; \
exit 1"
}
import_required_image_on_all_nodes() {
local image="$1"
local host_ip
for host_ip in ${ALL_NODE_IPS}; do
import_required_image "${image}" "${host_ip}"
done
}
eso_diagnostics() {
kubectl -n flux-system get kustomizations,ocirepositories,helmrepositories,helmcharts,helmreleases || true
kubectl -n flux-system describe kustomization addon-external-secrets || true
@@ -558,23 +445,6 @@ jobs:
--from-file=identity="$HOME/.ssh/id_ed25519" \
--from-file=known_hosts=/tmp/flux_known_hosts \
--dry-run=client -o yaml | kubectl apply -f -
PRIMARY_CP_IP=$(python3 -c 'import json; print(json.load(open("outputs/terraform_outputs.json"))["primary_control_plane_ip"]["value"])')
ALL_NODE_IPS=$(python3 -c 'import json; outputs = json.load(open("outputs/terraform_outputs.json")); print(" ".join(outputs["control_plane_ips"]["value"] + outputs["worker_ips"]["value"]))')
for image in \
ghcr.io/fluxcd/source-controller:v1.8.0 \
ghcr.io/fluxcd/kustomize-controller:v1.8.1 \
ghcr.io/fluxcd/helm-controller:v1.5.1 \
ghcr.io/fluxcd/notification-controller:v1.8.1; do
import_required_image "${image}" "${PRIMARY_CP_IP}"
done
for image in \
docker.io/rancher/mirrored-pause:3.6 \
quay.io/jetstack/cert-manager-controller:v1.17.2 \
quay.io/jetstack/cert-manager-cainjector:v1.17.2 \
quay.io/jetstack/cert-manager-webhook:v1.17.2 \
quay.io/jetstack/cert-manager-startupapicheck:v1.17.2; do
import_required_image_on_all_nodes "${image}"
done
# Apply CRDs and controllers first
kubectl apply -f clusters/prod/flux-system/gotk-components.yaml
# Wait for CRDs to be established
@@ -600,7 +470,6 @@ jobs:
# Wait directly on the ESO Helm objects; Kustomization readiness hides useful failure details.
wait_for_resource flux-system kustomization.kustomize.toolkit.fluxcd.io/addon-external-secrets 600
reconcile_flux_resource flux-system kustomization/addon-external-secrets 900
import_required_image oci.external-secrets.io/external-secrets/external-secrets:v2.1.0 "${PRIMARY_CP_IP}"
wait_for_flux_oci_helm_release external-secrets external-secrets external-secrets 600s 600
wait_for_resource "" crd/clustersecretstores.external-secrets.io 900
wait_for_resource "" crd/externalsecrets.external-secrets.io 900
@@ -615,8 +484,6 @@ jobs:
reconcile_flux_resource flux-system kustomization/addon-external-secrets-store 600
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-external-secrets-store --timeout=600s
# Wait for the storage layer and private access components
import_required_image ghcr.io/tailscale/k8s-operator:v1.96.5 "${PRIMARY_CP_IP}"
import_required_image ghcr.io/tailscale/tailscale:v1.96.5 "${PRIMARY_CP_IP}"
reconcile_flux_resource flux-system kustomization/addon-tailscale-operator 900
if ! kubectl -n flux-system wait --for=condition=Ready kustomization/addon-tailscale-operator --timeout=300s; then
kubectl -n flux-system describe kustomization/addon-tailscale-operator || true
@@ -627,14 +494,12 @@ jobs:
wait_for_helmrelease_ready tailscale-operator tailscale-system 900
kubectl wait --for=condition=Established crd/proxyclasses.tailscale.com --timeout=600s
kubectl -n tailscale-system rollout status deployment/operator --timeout=600s
import_required_image registry.k8s.io/sig-storage/nfs-subdir-external-provisioner:v4.0.2 "${PRIMARY_CP_IP}"
reconcile_flux_resource flux-system kustomization/addon-nfs-storage 600
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-nfs-storage --timeout=300s
kubectl -n kube-system rollout status deployment/nfs-subdir-external-provisioner --timeout=300s
kubectl annotate storageclass local-path storageclass.kubernetes.io/is-default-class=false --overwrite
kubectl annotate storageclass flash-nfs storageclass.kubernetes.io/is-default-class=true --overwrite
kubectl get storageclass flash-nfs
import_required_image docker.io/library/busybox:1.31.1 "${PRIMARY_CP_IP}"
kubectl -n kube-system delete pod/nfs-smoke pvc/nfs-smoke --ignore-not-found=true
kubectl apply -f - <<'EOF'
apiVersion: v1