From a70de061b05fd44d54625c6a1e725c575fd17333 Mon Sep 17 00:00:00 2001 From: MichaelFisher1997 Date: Wed, 4 Mar 2026 22:26:43 +0000 Subject: [PATCH] fix: wait for Cilium and node readiness before marking bootstrap success Update verification stage to block on cilium daemonset rollout and all nodes reaching Ready. This prevents workflows from reporting success while the cluster is still NotReady immediately after join. --- nixos/kubeadm/bootstrap/controller.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/nixos/kubeadm/bootstrap/controller.py b/nixos/kubeadm/bootstrap/controller.py index 5368c4c..3707ff7 100755 --- a/nixos/kubeadm/bootstrap/controller.py +++ b/nixos/kubeadm/bootstrap/controller.py @@ -397,6 +397,14 @@ class Controller: self.log("Verification already complete") return self.log("Final node verification") + self.remote( + self.primary_ip, + "sudo kubectl --kubeconfig /etc/kubernetes/admin.conf -n kube-system rollout status ds/cilium --timeout=10m", + ) + self.remote( + self.primary_ip, + "sudo kubectl --kubeconfig /etc/kubernetes/admin.conf wait --for=condition=Ready nodes --all --timeout=10m", + ) proc = self.remote(self.primary_ip, "sudo kubectl --kubeconfig /etc/kubernetes/admin.conf get nodes -o wide") print(proc.stdout) self.mark_done("verified")