From e06b2c692e9f0b97b41959b3a030a88b1d149ce9 Mon Sep 17 00:00:00 2001 From: MichaelFisher1997 Date: Thu, 5 Mar 2026 01:21:21 +0000 Subject: [PATCH] fix: point Cilium directly at API server and print rollout diagnostics Set Cilium k8sServiceHost/k8sServicePort to the primary control-plane API endpoint to avoid in-cluster service routing dependency during early bootstrap. Also print cilium daemonset/pod/log diagnostics when rollout times out. --- nixos/kubeadm/bootstrap/controller.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/nixos/kubeadm/bootstrap/controller.py b/nixos/kubeadm/bootstrap/controller.py index 99cf082..c7eb17d 100755 --- a/nixos/kubeadm/bootstrap/controller.py +++ b/nixos/kubeadm/bootstrap/controller.py @@ -343,6 +343,8 @@ class Controller: "sudo KUBECONFIG=/etc/kubernetes/admin.conf " "helm upgrade --install cilium cilium/cilium " "--namespace kube-system " + f"--set k8sServiceHost={shlex.quote(self.primary_ip)} " + "--set k8sServicePort=6443 " f"--set kubeProxyReplacement={shlex.quote(self.cilium_kpr)}" ), ) @@ -410,21 +412,24 @@ class Controller: ) except Exception: self.log("Cilium rollout failed; collecting diagnostics") - self.remote( + proc = self.remote( self.primary_ip, "sudo kubectl --kubeconfig /etc/kubernetes/admin.conf -n kube-system get ds cilium -o wide || true", check=False, ) - self.remote( + print(proc.stdout) + proc = self.remote( self.primary_ip, "sudo kubectl --kubeconfig /etc/kubernetes/admin.conf -n kube-system get pods -l k8s-app=cilium -o wide || true", check=False, ) - self.remote( + print(proc.stdout) + proc = self.remote( self.primary_ip, "for p in $(sudo kubectl --kubeconfig /etc/kubernetes/admin.conf -n kube-system get pods -l k8s-app=cilium -o name 2>/dev/null); do sudo kubectl --kubeconfig /etc/kubernetes/admin.conf -n kube-system logs --tail=120 $p || true; done", check=False, ) + print(proc.stdout) raise self.remote( self.primary_ip,