Merge pull request 'fix: recover from kubeadm CRISocket node-registration race' (#111) from stage into master
Some checks failed
Terraform Apply / Terraform Apply (push) Failing after 18m6s
Some checks failed
Terraform Apply / Terraform Apply (push) Failing after 18m6s
Reviewed-on: #111
This commit was merged in pull request #111.
This commit is contained in:
@@ -210,27 +210,47 @@ in
|
||||
echo "==> kube-vip manifest kubeconfig mount"
|
||||
grep -E 'mountPath:|path:' /etc/kubernetes/manifests/kube-vip.yaml | grep -E 'kubernetes/(admin|super-admin)\.conf' || true
|
||||
|
||||
env -i PATH=/run/current-system/sw/bin:/usr/bin:/bin kubeadm init \
|
||||
KUBEADM_INIT_LOG=/tmp/kubeadm-init.log
|
||||
if ! env -i PATH=/run/current-system/sw/bin:/usr/bin:/bin kubeadm init \
|
||||
--config /tmp/kubeadm/init-config.yaml \
|
||||
--upload-certs \
|
||||
--ignore-preflight-errors=NumCPU,HTTPProxyCIDR,Port-10250 || {
|
||||
echo "==> kubeadm init failed, checking pod status:"
|
||||
crictl pods || true
|
||||
crictl ps -a || true
|
||||
echo "==> kube-vip containers:"
|
||||
crictl ps -a --name kube-vip || true
|
||||
echo "==> kube-vip logs:"
|
||||
for container_id in $(crictl ps -a --name kube-vip -q 2>/dev/null); do
|
||||
echo "--- kube-vip container $container_id ---"
|
||||
crictl logs "$container_id" 2>/dev/null || true
|
||||
crictl inspect "$container_id" 2>/dev/null | jq -r '.status | "exitCode=\(.exitCode) reason=\(.reason // "") message=\(.message // "")"' || true
|
||||
done
|
||||
echo "==> Checking if VIP is bound:"
|
||||
ip -4 addr show | grep "$vip" || echo "VIP NOT BOUND"
|
||||
echo "==> kubelet logs:"
|
||||
journalctl -xeu kubelet --no-pager -n 50
|
||||
exit 1
|
||||
}
|
||||
--ignore-preflight-errors=NumCPU,HTTPProxyCIDR,Port-10250 2>&1 | tee "$KUBEADM_INIT_LOG"; then
|
||||
if grep -q "error writing CRISocket for this node: nodes" "$KUBEADM_INIT_LOG" && [ -f /etc/kubernetes/admin.conf ]; then
|
||||
echo "==> kubeadm hit CRISocket race; waiting for node registration"
|
||||
registered=0
|
||||
for i in $(seq 1 60); do
|
||||
if KUBECONFIG=/etc/kubernetes/admin.conf kubectl get node "$node_name" >/dev/null 2>&1; then
|
||||
echo "==> node $node_name registered; uploading kubelet config"
|
||||
env -i PATH=/run/current-system/sw/bin:/usr/bin:/bin kubeadm init phase upload-config kubelet --config /tmp/kubeadm/init-config.yaml
|
||||
registered=1
|
||||
break
|
||||
fi
|
||||
sleep 2
|
||||
done
|
||||
if [ "$registered" -ne 1 ]; then
|
||||
echo "==> node $node_name did not register after kubeadm init failure"
|
||||
KUBECONFIG=/etc/kubernetes/admin.conf kubectl get nodes -o wide || true
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
echo "==> kubeadm init failed, checking pod status:"
|
||||
crictl pods || true
|
||||
crictl ps -a || true
|
||||
echo "==> kube-vip containers:"
|
||||
crictl ps -a --name kube-vip || true
|
||||
echo "==> kube-vip logs:"
|
||||
for container_id in $(crictl ps -a --name kube-vip -q 2>/dev/null); do
|
||||
echo "--- kube-vip container $container_id ---"
|
||||
crictl logs "$container_id" 2>/dev/null || true
|
||||
crictl inspect "$container_id" 2>/dev/null | jq -r '.status | "exitCode=\(.exitCode) reason=\(.reason // "") message=\(.message // "")"' || true
|
||||
done
|
||||
echo "==> Checking if VIP is bound:"
|
||||
ip -4 addr show | grep "$vip" || echo "VIP NOT BOUND"
|
||||
echo "==> kubelet logs:"
|
||||
journalctl -xeu kubelet --no-pager -n 50
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "==> Waiting for kube-vip to claim VIP $vip"
|
||||
for i in $(seq 1 90); do
|
||||
|
||||
Reference in New Issue
Block a user