fix: point Cilium directly at API server and print rollout diagnostics
All checks were successful
Terraform Plan / Terraform Plan (push) Successful in 18s

Set Cilium k8sServiceHost/k8sServicePort to the primary control-plane API endpoint to avoid in-cluster service routing dependency during early bootstrap. Also print cilium daemonset/pod/log diagnostics when rollout times out.
This commit is contained in:
2026-03-05 01:21:21 +00:00
parent ca54c44fa4
commit e06b2c692e

View File

@@ -343,6 +343,8 @@ class Controller:
"sudo KUBECONFIG=/etc/kubernetes/admin.conf " "sudo KUBECONFIG=/etc/kubernetes/admin.conf "
"helm upgrade --install cilium cilium/cilium " "helm upgrade --install cilium cilium/cilium "
"--namespace kube-system " "--namespace kube-system "
f"--set k8sServiceHost={shlex.quote(self.primary_ip)} "
"--set k8sServicePort=6443 "
f"--set kubeProxyReplacement={shlex.quote(self.cilium_kpr)}" f"--set kubeProxyReplacement={shlex.quote(self.cilium_kpr)}"
), ),
) )
@@ -410,21 +412,24 @@ class Controller:
) )
except Exception: except Exception:
self.log("Cilium rollout failed; collecting diagnostics") self.log("Cilium rollout failed; collecting diagnostics")
self.remote( proc = self.remote(
self.primary_ip, self.primary_ip,
"sudo kubectl --kubeconfig /etc/kubernetes/admin.conf -n kube-system get ds cilium -o wide || true", "sudo kubectl --kubeconfig /etc/kubernetes/admin.conf -n kube-system get ds cilium -o wide || true",
check=False, check=False,
) )
self.remote( print(proc.stdout)
proc = self.remote(
self.primary_ip, self.primary_ip,
"sudo kubectl --kubeconfig /etc/kubernetes/admin.conf -n kube-system get pods -l k8s-app=cilium -o wide || true", "sudo kubectl --kubeconfig /etc/kubernetes/admin.conf -n kube-system get pods -l k8s-app=cilium -o wide || true",
check=False, check=False,
) )
self.remote( print(proc.stdout)
proc = self.remote(
self.primary_ip, self.primary_ip,
"for p in $(sudo kubectl --kubeconfig /etc/kubernetes/admin.conf -n kube-system get pods -l k8s-app=cilium -o name 2>/dev/null); do sudo kubectl --kubeconfig /etc/kubernetes/admin.conf -n kube-system logs --tail=120 $p || true; done", "for p in $(sudo kubectl --kubeconfig /etc/kubernetes/admin.conf -n kube-system get pods -l k8s-app=cilium -o name 2>/dev/null); do sudo kubectl --kubeconfig /etc/kubernetes/admin.conf -n kube-system logs --tail=120 $p || true; done",
check=False, check=False,
) )
print(proc.stdout)
raise raise
self.remote( self.remote(
self.primary_ip, self.primary_ip,