From 1156dc0203f20f7d1afc76ebefcc2508a752a339 Mon Sep 17 00:00:00 2001 From: MichaelFisher1997 Date: Thu, 23 Apr 2026 03:55:52 +0000 Subject: [PATCH] fix: pre-pull kube-vip images before waiting for VIP The primary control plane was stalling because kubelet still had to pull both the Rancher pause image and the kube-vip image before the DaemonSet pod could become Ready. Pre-pull those images into containerd, extend the readiness wait, and emit pod diagnostics if kube-vip still does not come up. --- .../roles/kube-vip-deploy/defaults/main.yml | 3 ++ ansible/roles/kube-vip-deploy/tasks/main.yml | 38 ++++++++++++++++++- 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/ansible/roles/kube-vip-deploy/defaults/main.yml b/ansible/roles/kube-vip-deploy/defaults/main.yml index d70b311..4b6120a 100644 --- a/ansible/roles/kube-vip-deploy/defaults/main.yml +++ b/ansible/roles/kube-vip-deploy/defaults/main.yml @@ -2,3 +2,6 @@ kube_vip_version: v1.1.2 kube_vip_interface: "{{ ansible_default_ipv4.interface | default('eth0') }}" kube_vip_address: "{{ kube_api_endpoint }}" +kube_vip_prepull_images: + - docker.io/rancher/mirrored-pause:3.6 + - ghcr.io/kube-vip/kube-vip:{{ kube_vip_version }} diff --git a/ansible/roles/kube-vip-deploy/tasks/main.yml b/ansible/roles/kube-vip-deploy/tasks/main.yml index 14c5e8d..a7f0974 100644 --- a/ansible/roles/kube-vip-deploy/tasks/main.yml +++ b/ansible/roles/kube-vip-deploy/tasks/main.yml @@ -1,4 +1,13 @@ --- +- name: Pre-pull kube-vip bootstrap images into containerd + command: /usr/local/bin/ctr -n k8s.io images pull {{ item }} + register: kube_vip_image_pull + retries: 12 + delay: 15 + until: kube_vip_image_pull.rc == 0 + loop: "{{ kube_vip_prepull_images }}" + changed_when: true + - name: Render kube-vip control plane manifest template: src: kube-vip-control-plane.yaml.j2 @@ -18,9 +27,36 @@ register: kube_vip_pod_ready changed_when: false until: kube_vip_pod_ready.stdout == "True" - retries: 18 + retries: 30 delay: 10 +- name: Show kube-vip pod status on failure + command: kubectl -n kube-system get pods -l app.kubernetes.io/name=kube-vip -o wide + register: kube_vip_pods + changed_when: false + failed_when: false + when: kube_vip_pod_ready is failed + +- name: Describe kube-vip pod on failure + shell: >- + kubectl -n kube-system describe pod + $(kubectl -n kube-system get pods -l app.kubernetes.io/name=kube-vip --field-selector spec.nodeName={{ inventory_hostname }} -o jsonpath='{.items[0].metadata.name}') + register: kube_vip_pod_describe + changed_when: false + failed_when: false + when: kube_vip_pod_ready is failed + +- name: Fail with kube-vip diagnostics + fail: + msg: | + kube-vip failed to become ready on {{ inventory_hostname }}. + Pods: + {{ kube_vip_pods.stdout | default('n/a') }} + + Describe: + {{ kube_vip_pod_describe.stdout | default('n/a') }} + when: kube_vip_pod_ready is failed + - name: Wait for API VIP on 6443 wait_for: host: "{{ kube_vip_address }}"