fix: gate workers on kube-vip reachability
This commit is contained in:
@@ -108,6 +108,41 @@
|
||||
hosts: control_plane[0]
|
||||
become: true
|
||||
tasks:
|
||||
- name: Wait for kube-vip DaemonSet across control planes
|
||||
command: kubectl -n kube-system rollout status daemonset/kube-vip --timeout=300s
|
||||
register: kube_vip_rollout
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
|
||||
- name: Show kube-vip pod status on rollout failure
|
||||
command: kubectl -n kube-system get pods -l app.kubernetes.io/name=kube-vip -o wide
|
||||
register: kube_vip_pods_after_join
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
when: kube_vip_rollout.rc != 0
|
||||
|
||||
- name: Describe kube-vip pods on rollout failure
|
||||
command: kubectl -n kube-system describe pods -l app.kubernetes.io/name=kube-vip
|
||||
register: kube_vip_describe_after_join
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
when: kube_vip_rollout.rc != 0
|
||||
|
||||
- name: Fail when kube-vip is not healthy on all control planes
|
||||
fail:
|
||||
msg: |
|
||||
kube-vip DaemonSet did not become healthy after secondary control planes joined.
|
||||
Rollout:
|
||||
{{ kube_vip_rollout.stdout | default('') }}
|
||||
{{ kube_vip_rollout.stderr | default('') }}
|
||||
|
||||
Pods:
|
||||
{{ kube_vip_pods_after_join.stdout | default('n/a') }}
|
||||
|
||||
Describe:
|
||||
{{ kube_vip_describe_after_join.stdout | default('n/a') }}
|
||||
when: kube_vip_rollout.rc != 0
|
||||
|
||||
- name: Wait for control plane node readiness
|
||||
command: kubectl wait --for=condition=Ready node/{{ item }} --timeout=30s
|
||||
register: control_plane_ready
|
||||
@@ -125,6 +160,49 @@
|
||||
delay: 10
|
||||
changed_when: false
|
||||
|
||||
- name: Verify worker reachability to Kubernetes API VIP
|
||||
hosts: workers
|
||||
become: true
|
||||
tasks:
|
||||
- name: Wait for Kubernetes API VIP from worker
|
||||
wait_for:
|
||||
host: "{{ kube_api_endpoint }}"
|
||||
port: 6443
|
||||
state: started
|
||||
timeout: 180
|
||||
register: worker_vip_wait
|
||||
failed_when: false
|
||||
|
||||
- name: Collect worker network diagnostics when VIP is unreachable
|
||||
shell: |
|
||||
set -euo pipefail
|
||||
echo "== ip addr =="
|
||||
ip addr
|
||||
echo "== ip route =="
|
||||
ip route
|
||||
echo "== ip neigh =="
|
||||
ip neigh || true
|
||||
echo "== vip route =="
|
||||
ip route get {{ kube_api_endpoint }} || true
|
||||
echo "== tcp probe =="
|
||||
timeout 5 bash -c '</dev/tcp/{{ kube_api_endpoint }}/6443' && echo connected || echo failed
|
||||
args:
|
||||
executable: /bin/bash
|
||||
register: worker_vip_diagnostics
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
when: worker_vip_wait.msg is defined
|
||||
|
||||
- name: Fail when worker cannot reach Kubernetes API VIP
|
||||
fail:
|
||||
msg: |
|
||||
Worker {{ inventory_hostname }} cannot reach Kubernetes API VIP {{ kube_api_endpoint }}:6443.
|
||||
This blocks k3s agent join and points to kube-vip/L2/routing reachability, not agent install.
|
||||
|
||||
Diagnostics:
|
||||
{{ worker_vip_diagnostics.stdout | default('n/a') }}
|
||||
when: worker_vip_wait.msg is defined
|
||||
|
||||
- name: Setup workers
|
||||
hosts: workers
|
||||
become: true
|
||||
|
||||
Reference in New Issue
Block a user