Compare commits

...

4 Commits

Author SHA1 Message Date
c034f7975c Merge pull request 'stage' (#112) from stage into master
Some checks failed
Terraform Apply / Terraform Apply (push) Failing after 28m53s
Reviewed-on: #112
2026-03-04 18:51:53 +00:00
90ef0ec33f Merge branch 'master' into stage
All checks were successful
Terraform Plan / Terraform Plan (push) Successful in 17s
2026-03-04 18:42:22 +00:00
ba6cf42c04 fix: restart kubelet during CRISocket recovery and add registration diagnostics
All checks were successful
Terraform Plan / Terraform Plan (push) Successful in 16s
When kubeadm init fails at upload-config/kubelet due missing node object, explicitly restart kubelet to ensure bootstrap flags are loaded before waiting for node registration. Add kubelet flag dump and focused registration log output to surface auth/cert errors.
2026-03-04 18:37:50 +00:00
3cd0c70727 fix: stop overriding kubelet config in kubeadm init
All checks were successful
Terraform Plan / Terraform Plan (push) Successful in 17s
Remove custom KubeletConfiguration from init config so kubeadm uses default kubelet authn/authz settings and bootstrap registration path. This avoids the standalone-style kubelet behavior where the node never appears in the API.
2026-03-04 18:35:34 +00:00

View File

@@ -175,14 +175,6 @@ in
podSubnet: "KUBEADM_POD_SUBNET" podSubnet: "KUBEADM_POD_SUBNET"
serviceSubnet: "KUBEADM_SERVICE_SUBNET" serviceSubnet: "KUBEADM_SERVICE_SUBNET"
dnsDomain: "KUBEADM_DNS_DOMAIN" dnsDomain: "KUBEADM_DNS_DOMAIN"
---
apiVersion: kubelet.config.k8s.io/v1beta1
kind: KubeletConfiguration
authentication:
webhook:
enabled: false
authorization:
mode: AlwaysAllow
KUBEADMCONFIG KUBEADMCONFIG
sed -i "s|KUBEADM_ENDPOINT|$vip:6443|g" /tmp/kubeadm/init-config.yaml sed -i "s|KUBEADM_ENDPOINT|$vip:6443|g" /tmp/kubeadm/init-config.yaml
@@ -217,6 +209,12 @@ in
--ignore-preflight-errors=NumCPU,HTTPProxyCIDR,Port-10250 2>&1 | tee "$KUBEADM_INIT_LOG"; then --ignore-preflight-errors=NumCPU,HTTPProxyCIDR,Port-10250 2>&1 | tee "$KUBEADM_INIT_LOG"; then
if grep -q "error writing CRISocket for this node: nodes" "$KUBEADM_INIT_LOG" && [ -f /etc/kubernetes/admin.conf ]; then if grep -q "error writing CRISocket for this node: nodes" "$KUBEADM_INIT_LOG" && [ -f /etc/kubernetes/admin.conf ]; then
echo "==> kubeadm hit CRISocket race; waiting for node registration" echo "==> kubeadm hit CRISocket race; waiting for node registration"
echo "==> forcing kubelet restart to pick bootstrap flags"
systemctl daemon-reload || true
systemctl restart kubelet || true
sleep 3
echo "==> kubelet bootstrap flags"
cat /var/lib/kubelet/kubeadm-flags.env || true
registered=0 registered=0
for i in $(seq 1 60); do for i in $(seq 1 60); do
if KUBECONFIG=/etc/kubernetes/admin.conf kubectl get node "$node_name" >/dev/null 2>&1; then if KUBECONFIG=/etc/kubernetes/admin.conf kubectl get node "$node_name" >/dev/null 2>&1; then
@@ -230,6 +228,8 @@ in
if [ "$registered" -ne 1 ]; then if [ "$registered" -ne 1 ]; then
echo "==> node $node_name did not register after kubeadm init failure" echo "==> node $node_name did not register after kubeadm init failure"
KUBECONFIG=/etc/kubernetes/admin.conf kubectl get nodes -o wide || true KUBECONFIG=/etc/kubernetes/admin.conf kubectl get nodes -o wide || true
echo "==> kubelet logs (registration hints)"
journalctl -u kubelet --no-pager -n 120 | grep -Ei "register|node|bootstrap|certificate|forbidden|unauthorized|refused|x509" || true
exit 1 exit 1
fi fi
else else