fix: reduce rerun bootstrap pre-pull delays
Deploy Cluster / Terraform (push) Successful in 28s
Deploy Cluster / Ansible (push) Failing after 39m26s

This commit is contained in:
2026-04-24 12:09:34 +00:00
parent 3f52bad854
commit 347ca041ba
3 changed files with 33 additions and 37 deletions
-30
View File
@@ -195,36 +195,6 @@ jobs:
PRIMARY_IP=$(terraform output -raw primary_control_plane_ip)
sed -i "s#https://k8s-cluster-cp-1\.[^:]*:6443#https://${PRIMARY_IP}:6443#g" ../outputs/kubeconfig
- name: Pre-pull Flux controller images on primary control plane
working-directory: terraform
run: |
set -euo pipefail
PRIMARY_IP=$(terraform output -raw primary_control_plane_ip)
ssh -o StrictHostKeyChecking=no "ubuntu@${PRIMARY_IP}" 'bash -s' <<'EOF'
set -euo pipefail
images=(
ghcr.io/fluxcd/source-controller:v1.8.0
ghcr.io/fluxcd/kustomize-controller:v1.8.1
ghcr.io/fluxcd/helm-controller:v1.5.1
ghcr.io/fluxcd/notification-controller:v1.8.1
)
for image in "${images[@]}"; do
for attempt in $(seq 1 12); do
if timeout 180s sudo /usr/local/bin/ctr -n k8s.io images pull "${image}"; then
break
fi
if [ "${attempt}" -eq 12 ]; then
echo "Failed to pre-pull ${image} after ${attempt} attempts" >&2
exit 1
fi
sleep 20
done
done
EOF
- name: Bootstrap Flux source and reconciliation graph
env:
KUBECONFIG: outputs/kubeconfig
+30 -5
View File
@@ -1,12 +1,37 @@
---
- name: Pre-pull kube-vip bootstrap images into containerd
command: /usr/local/bin/ctr -n k8s.io images pull {{ item }}
shell: |
if /usr/local/bin/ctr -n k8s.io images ls -q | grep -Fx -- "{{ item }}" >/dev/null; then
echo "already present"
exit 0
fi
for attempt in 1 2 3; do
if timeout 120s /usr/local/bin/ctr -n k8s.io images pull "{{ item }}"; then
echo "pulled image"
exit 0
fi
sleep 10
done
exit 1
args:
executable: /bin/bash
register: kube_vip_image_pull
retries: 12
delay: 15
until: kube_vip_image_pull.rc == 0
loop: "{{ kube_vip_prepull_images }}"
changed_when: true
changed_when: "'pulled image' in kube_vip_image_pull.stdout"
failed_when: false
- name: Report kube-vip images that did not pre-pull after retries
debug:
msg: >-
Best-effort kube-vip image pre-pull did not complete for {{ item.item }} after
3 attempt(s): {{ item.stderr | default('no stderr') }}
loop: "{{ kube_vip_image_pull.results | default([]) }}"
loop_control:
label: "{{ item.item }}"
when: item.rc is defined and item.rc != 0
- name: Render kube-vip control plane manifest
template:
+3 -2
View File
@@ -110,11 +110,12 @@
- bootstrap-image-prepull
- name: Pre-pull Rancher bootstrap images
hosts: cluster
hosts: workers
become: true
roles:
- rancher-image-prepull
- role: rancher-image-prepull
when: rancher_image_prepull_enabled | default(false) | bool
- name: Deploy observability stack
hosts: control_plane[0]