From ba6cf42c043591aee5b851bfb540bd695cc3ff1d Mon Sep 17 00:00:00 2001 From: MichaelFisher1997 Date: Wed, 4 Mar 2026 18:37:50 +0000 Subject: [PATCH] fix: restart kubelet during CRISocket recovery and add registration diagnostics When kubeadm init fails at upload-config/kubelet due missing node object, explicitly restart kubelet to ensure bootstrap flags are loaded before waiting for node registration. Add kubelet flag dump and focused registration log output to surface auth/cert errors. --- nixos/kubeadm/modules/k8s-common.nix | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/nixos/kubeadm/modules/k8s-common.nix b/nixos/kubeadm/modules/k8s-common.nix index d88383f..29c378e 100644 --- a/nixos/kubeadm/modules/k8s-common.nix +++ b/nixos/kubeadm/modules/k8s-common.nix @@ -209,6 +209,12 @@ in --ignore-preflight-errors=NumCPU,HTTPProxyCIDR,Port-10250 2>&1 | tee "$KUBEADM_INIT_LOG"; then if grep -q "error writing CRISocket for this node: nodes" "$KUBEADM_INIT_LOG" && [ -f /etc/kubernetes/admin.conf ]; then echo "==> kubeadm hit CRISocket race; waiting for node registration" + echo "==> forcing kubelet restart to pick bootstrap flags" + systemctl daemon-reload || true + systemctl restart kubelet || true + sleep 3 + echo "==> kubelet bootstrap flags" + cat /var/lib/kubelet/kubeadm-flags.env || true registered=0 for i in $(seq 1 60); do if KUBECONFIG=/etc/kubernetes/admin.conf kubectl get node "$node_name" >/dev/null 2>&1; then @@ -222,6 +228,8 @@ in if [ "$registered" -ne 1 ]; then echo "==> node $node_name did not register after kubeadm init failure" KUBECONFIG=/etc/kubernetes/admin.conf kubectl get nodes -o wide || true + echo "==> kubelet logs (registration hints)" + journalctl -u kubelet --no-pager -n 120 | grep -Ei "register|node|bootstrap|certificate|forbidden|unauthorized|refused|x509" || true exit 1 fi else