Merge pull request 'fix: recover from kubeadm CRISocket node-registration race' (#111) from stage into master
Some checks failed
Terraform Apply / Terraform Apply (push) Failing after 18m6s
Some checks failed
Terraform Apply / Terraform Apply (push) Failing after 18m6s
Reviewed-on: #111
This commit was merged in pull request #111.
This commit is contained in:
@@ -210,27 +210,47 @@ in
|
|||||||
echo "==> kube-vip manifest kubeconfig mount"
|
echo "==> kube-vip manifest kubeconfig mount"
|
||||||
grep -E 'mountPath:|path:' /etc/kubernetes/manifests/kube-vip.yaml | grep -E 'kubernetes/(admin|super-admin)\.conf' || true
|
grep -E 'mountPath:|path:' /etc/kubernetes/manifests/kube-vip.yaml | grep -E 'kubernetes/(admin|super-admin)\.conf' || true
|
||||||
|
|
||||||
env -i PATH=/run/current-system/sw/bin:/usr/bin:/bin kubeadm init \
|
KUBEADM_INIT_LOG=/tmp/kubeadm-init.log
|
||||||
|
if ! env -i PATH=/run/current-system/sw/bin:/usr/bin:/bin kubeadm init \
|
||||||
--config /tmp/kubeadm/init-config.yaml \
|
--config /tmp/kubeadm/init-config.yaml \
|
||||||
--upload-certs \
|
--upload-certs \
|
||||||
--ignore-preflight-errors=NumCPU,HTTPProxyCIDR,Port-10250 || {
|
--ignore-preflight-errors=NumCPU,HTTPProxyCIDR,Port-10250 2>&1 | tee "$KUBEADM_INIT_LOG"; then
|
||||||
echo "==> kubeadm init failed, checking pod status:"
|
if grep -q "error writing CRISocket for this node: nodes" "$KUBEADM_INIT_LOG" && [ -f /etc/kubernetes/admin.conf ]; then
|
||||||
crictl pods || true
|
echo "==> kubeadm hit CRISocket race; waiting for node registration"
|
||||||
crictl ps -a || true
|
registered=0
|
||||||
echo "==> kube-vip containers:"
|
for i in $(seq 1 60); do
|
||||||
crictl ps -a --name kube-vip || true
|
if KUBECONFIG=/etc/kubernetes/admin.conf kubectl get node "$node_name" >/dev/null 2>&1; then
|
||||||
echo "==> kube-vip logs:"
|
echo "==> node $node_name registered; uploading kubelet config"
|
||||||
for container_id in $(crictl ps -a --name kube-vip -q 2>/dev/null); do
|
env -i PATH=/run/current-system/sw/bin:/usr/bin:/bin kubeadm init phase upload-config kubelet --config /tmp/kubeadm/init-config.yaml
|
||||||
echo "--- kube-vip container $container_id ---"
|
registered=1
|
||||||
crictl logs "$container_id" 2>/dev/null || true
|
break
|
||||||
crictl inspect "$container_id" 2>/dev/null | jq -r '.status | "exitCode=\(.exitCode) reason=\(.reason // "") message=\(.message // "")"' || true
|
fi
|
||||||
done
|
sleep 2
|
||||||
echo "==> Checking if VIP is bound:"
|
done
|
||||||
ip -4 addr show | grep "$vip" || echo "VIP NOT BOUND"
|
if [ "$registered" -ne 1 ]; then
|
||||||
echo "==> kubelet logs:"
|
echo "==> node $node_name did not register after kubeadm init failure"
|
||||||
journalctl -xeu kubelet --no-pager -n 50
|
KUBECONFIG=/etc/kubernetes/admin.conf kubectl get nodes -o wide || true
|
||||||
exit 1
|
exit 1
|
||||||
}
|
fi
|
||||||
|
else
|
||||||
|
echo "==> kubeadm init failed, checking pod status:"
|
||||||
|
crictl pods || true
|
||||||
|
crictl ps -a || true
|
||||||
|
echo "==> kube-vip containers:"
|
||||||
|
crictl ps -a --name kube-vip || true
|
||||||
|
echo "==> kube-vip logs:"
|
||||||
|
for container_id in $(crictl ps -a --name kube-vip -q 2>/dev/null); do
|
||||||
|
echo "--- kube-vip container $container_id ---"
|
||||||
|
crictl logs "$container_id" 2>/dev/null || true
|
||||||
|
crictl inspect "$container_id" 2>/dev/null | jq -r '.status | "exitCode=\(.exitCode) reason=\(.reason // "") message=\(.message // "")"' || true
|
||||||
|
done
|
||||||
|
echo "==> Checking if VIP is bound:"
|
||||||
|
ip -4 addr show | grep "$vip" || echo "VIP NOT BOUND"
|
||||||
|
echo "==> kubelet logs:"
|
||||||
|
journalctl -xeu kubelet --no-pager -n 50
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
echo "==> Waiting for kube-vip to claim VIP $vip"
|
echo "==> Waiting for kube-vip to claim VIP $vip"
|
||||||
for i in $(seq 1 90); do
|
for i in $(seq 1 90); do
|
||||||
|
|||||||
Reference in New Issue
Block a user