fix: pre-pull Rancher images and reset Rancher release during bootstrap
Deploy Cluster / Terraform (push) Successful in 28s
Deploy Cluster / Ansible (push) Failing after 27m30s

Rancher installs were stalling on transient Docker Hub TLS handshake timeouts
for rancher shell, webhook, and system-upgrade-controller images. Pre-pull the
required images onto all nodes after k3s comes up, extend the Rancher HelmRelease
timeout, and reset/force the Rancher HelmRelease before waiting on addon-rancher
so bootstrap can recover from stale failed remediation state.
This commit is contained in:
2026-04-22 11:00:54 +00:00
parent 8372d562ad
commit 9c0523e880
5 changed files with 32 additions and 2 deletions
+9 -2
View File
@@ -259,9 +259,16 @@ jobs:
KUBECONFIG: outputs/kubeconfig
run: |
set -euo pipefail
TS=$(date --iso-8601=seconds)
kubectl -n flux-system annotate helmrelease/rancher \
reconcile.fluxcd.io/requestedAt="$TS" \
reconcile.fluxcd.io/resetAt="$TS" \
reconcile.fluxcd.io/forceAt="$TS" \
--overwrite || true
echo "Waiting for Rancher..."
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-rancher --timeout=600s
kubectl -n flux-system wait --for=condition=Ready helmrelease/rancher -n flux-system --timeout=300s
kubectl -n flux-system wait --for=condition=Ready helmrelease/rancher --timeout=900s
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-rancher --timeout=900s
echo "Waiting for rancher-backup operator..."
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-rancher-backup --timeout=600s || true
@@ -0,0 +1,6 @@
---
rancher_images_to_prepull:
- docker.io/rancher/rancher:v2.13.3
- docker.io/rancher/rancher-webhook:v0.9.3
- docker.io/rancher/system-upgrade-controller:v0.17.0
- docker.io/rancher/shell:v0.6.2
@@ -0,0 +1,9 @@
---
- name: Pre-pull Rancher images into containerd
command: /usr/local/bin/ctr -n k8s.io images pull {{ item }}
register: rancher_image_pull
retries: 5
delay: 15
until: rancher_image_pull.rc == 0
loop: "{{ rancher_images_to_prepull }}"
changed_when: true
+7
View File
@@ -93,6 +93,13 @@
roles:
- k3s-agent
- name: Pre-pull Rancher bootstrap images
hosts: cluster
become: true
roles:
- rancher-image-prepull
- name: Deploy observability stack
hosts: control_plane[0]
become: true
@@ -5,6 +5,7 @@ metadata:
namespace: flux-system
spec:
interval: 10m
timeout: 15m
targetNamespace: cattle-system
chart:
spec: