fix: restart kubelet during CRISocket recovery and add registration diagnostics
All checks were successful
Terraform Plan / Terraform Plan (push) Successful in 16s
All checks were successful
Terraform Plan / Terraform Plan (push) Successful in 16s
When kubeadm init fails at upload-config/kubelet due missing node object, explicitly restart kubelet to ensure bootstrap flags are loaded before waiting for node registration. Add kubelet flag dump and focused registration log output to surface auth/cert errors.
This commit is contained in:
@@ -209,6 +209,12 @@ in
|
|||||||
--ignore-preflight-errors=NumCPU,HTTPProxyCIDR,Port-10250 2>&1 | tee "$KUBEADM_INIT_LOG"; then
|
--ignore-preflight-errors=NumCPU,HTTPProxyCIDR,Port-10250 2>&1 | tee "$KUBEADM_INIT_LOG"; then
|
||||||
if grep -q "error writing CRISocket for this node: nodes" "$KUBEADM_INIT_LOG" && [ -f /etc/kubernetes/admin.conf ]; then
|
if grep -q "error writing CRISocket for this node: nodes" "$KUBEADM_INIT_LOG" && [ -f /etc/kubernetes/admin.conf ]; then
|
||||||
echo "==> kubeadm hit CRISocket race; waiting for node registration"
|
echo "==> kubeadm hit CRISocket race; waiting for node registration"
|
||||||
|
echo "==> forcing kubelet restart to pick bootstrap flags"
|
||||||
|
systemctl daemon-reload || true
|
||||||
|
systemctl restart kubelet || true
|
||||||
|
sleep 3
|
||||||
|
echo "==> kubelet bootstrap flags"
|
||||||
|
cat /var/lib/kubelet/kubeadm-flags.env || true
|
||||||
registered=0
|
registered=0
|
||||||
for i in $(seq 1 60); do
|
for i in $(seq 1 60); do
|
||||||
if KUBECONFIG=/etc/kubernetes/admin.conf kubectl get node "$node_name" >/dev/null 2>&1; then
|
if KUBECONFIG=/etc/kubernetes/admin.conf kubectl get node "$node_name" >/dev/null 2>&1; then
|
||||||
@@ -222,6 +228,8 @@ in
|
|||||||
if [ "$registered" -ne 1 ]; then
|
if [ "$registered" -ne 1 ]; then
|
||||||
echo "==> node $node_name did not register after kubeadm init failure"
|
echo "==> node $node_name did not register after kubeadm init failure"
|
||||||
KUBECONFIG=/etc/kubernetes/admin.conf kubectl get nodes -o wide || true
|
KUBECONFIG=/etc/kubernetes/admin.conf kubectl get nodes -o wide || true
|
||||||
|
echo "==> kubelet logs (registration hints)"
|
||||||
|
journalctl -u kubelet --no-pager -n 120 | grep -Ei "register|node|bootstrap|certificate|forbidden|unauthorized|refused|x509" || true
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
else
|
else
|
||||||
|
|||||||
Reference in New Issue
Block a user