Files
TerraHome/nixos/kubeadm/modules/k8s-common.nix
MichaelFisher1997 ba6cf42c04
All checks were successful
Terraform Plan / Terraform Plan (push) Successful in 16s
fix: restart kubelet during CRISocket recovery and add registration diagnostics
When kubeadm init fails at upload-config/kubelet due missing node object, explicitly restart kubelet to ensure bootstrap flags are loaded before waiting for node registration. Add kubelet flag dump and focused registration log output to surface auth/cert errors.
2026-03-04 18:37:50 +00:00

408 lines
14 KiB
Nix

{ config, lib, pkgs, ... }:
let
pinnedK8s = lib.attrByPath [ "kubernetes_1_31" ] pkgs.kubernetes pkgs;
kubeVipImage = "ghcr.io/kube-vip/kube-vip:v0.8.9";
in
{
options.terrahome.kubeadm = {
k8sMinor = lib.mkOption {
type = lib.types.str;
default = "1.31";
};
controlPlaneInterface = lib.mkOption {
type = lib.types.str;
default = "eth0";
};
controlPlaneVipSuffix = lib.mkOption {
type = lib.types.int;
default = 250;
};
podSubnet = lib.mkOption {
type = lib.types.str;
default = "10.244.0.0/16";
};
serviceSubnet = lib.mkOption {
type = lib.types.str;
default = "10.96.0.0/12";
};
clusterDomain = lib.mkOption {
type = lib.types.str;
default = "cluster.local";
};
};
config = {
boot.kernelModules = [ "overlay" "br_netfilter" ];
boot.kernel.sysctl = {
"net.ipv4.ip_forward" = 1;
"net.bridge.bridge-nf-call-iptables" = 1;
"net.bridge.bridge-nf-call-ip6tables" = 1;
};
virtualisation.containerd.enable = true;
virtualisation.containerd.settings = {
plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options.SystemdCgroup = true;
};
swapDevices = lib.mkForce [ ];
services.openssh.enable = true;
services.openssh.settings = {
PasswordAuthentication = false;
KbdInteractiveAuthentication = false;
};
users.users.micqdf = {
isNormalUser = true;
extraGroups = [ "wheel" ];
};
security.sudo.wheelNeedsPassword = false;
nix.settings.trusted-users = [ "root" "micqdf" ];
nix.gc = {
automatic = true;
dates = "daily";
options = "--delete-older-than 3d";
};
nix.settings.auto-optimise-store = true;
environment.variables = {
KUBECONFIG = "/etc/kubernetes/admin.conf";
KUBE_VIP_IMAGE = kubeVipImage;
};
environment.systemPackages = (with pkgs; [
containerd
cri-tools
cni-plugins
pinnedK8s
kubernetes-helm
conntrack-tools
socat
ethtool
ipvsadm
iproute2
iptables
ebtables
jq
curl
vim
gawk
]) ++ [
(pkgs.writeShellScriptBin "th-kubeadm-init" ''
set -euo pipefail
unset http_proxy https_proxy HTTP_PROXY HTTPS_PROXY no_proxy NO_PROXY
iface="${config.terrahome.kubeadm.controlPlaneInterface}"
if ! ip link show "$iface" >/dev/null 2>&1; then
iface="$(ip -o -4 route show to default | awk 'NR==1 {print $5}')"
fi
if [ -z "''${iface:-}" ]; then
echo "Could not determine network interface for kube-vip"
exit 1
fi
suffix="${toString config.terrahome.kubeadm.controlPlaneVipSuffix}"
pod_subnet="${config.terrahome.kubeadm.podSubnet}"
service_subnet="${config.terrahome.kubeadm.serviceSubnet}"
domain="${config.terrahome.kubeadm.clusterDomain}"
node_name="${config.networking.hostName}"
local_ip_cidr=$(ip -4 -o addr show dev "$iface" | awk 'NR==1 {print $4}')
if [ -z "''${local_ip_cidr:-}" ]; then
echo "Could not determine IPv4 CIDR on interface $iface"
exit 1
fi
subnet_prefix=$(echo "$local_ip_cidr" | cut -d/ -f1 | awk -F. '{print $1"."$2"."$3}')
vip="$subnet_prefix.$suffix"
echo "Using control-plane endpoint: $vip:6443"
echo "Using kube-vip interface: $iface"
echo "Using kubeadm node name: $node_name"
hostname "$node_name" || true
rm -f /var/lib/kubelet/config.yaml /var/lib/kubelet/kubeadm-flags.env
systemctl unmask kubelet || true
systemctl stop kubelet || true
systemctl reset-failed kubelet || true
env -i PATH=/run/current-system/sw/bin:/usr/bin:/bin kubeadm reset -f || true
rm -f /etc/kubernetes/kubelet.conf /etc/kubernetes/bootstrap-kubelet.conf
rm -f /var/lib/kubelet/kubeconfig /var/lib/kubelet/instance-config.yaml
rm -rf /var/lib/kubelet/pki
systemctl daemon-reload
systemctl unmask kubelet || true
systemctl enable kubelet || true
echo "==> Ensuring containerd is running"
systemctl start containerd || true
sleep 2
if ! systemctl is-active containerd; then
echo "ERROR: containerd not running"
journalctl -xeu containerd --no-pager -n 30
exit 1
fi
mkdir -p /etc/kubernetes/manifests
mkdir -p /tmp/kubeadm
cat > /tmp/kubeadm/init-config.yaml << 'KUBEADMCONFIG'
apiVersion: kubeadm.k8s.io/v1beta4
kind: InitConfiguration
nodeRegistration:
name: "KUBEADM_NODE_NAME"
criSocket: unix:///run/containerd/containerd.sock
kubeletExtraArgs:
- name: hostname-override
value: "KUBEADM_NODE_NAME"
---
apiVersion: kubeadm.k8s.io/v1beta4
kind: ClusterConfiguration
controlPlaneEndpoint: "KUBEADM_ENDPOINT"
networking:
podSubnet: "KUBEADM_POD_SUBNET"
serviceSubnet: "KUBEADM_SERVICE_SUBNET"
dnsDomain: "KUBEADM_DNS_DOMAIN"
KUBEADMCONFIG
sed -i "s|KUBEADM_ENDPOINT|$vip:6443|g" /tmp/kubeadm/init-config.yaml
sed -i "s|KUBEADM_POD_SUBNET|$pod_subnet|g" /tmp/kubeadm/init-config.yaml
sed -i "s|KUBEADM_SERVICE_SUBNET|$service_subnet|g" /tmp/kubeadm/init-config.yaml
sed -i "s|KUBEADM_DNS_DOMAIN|$domain|g" /tmp/kubeadm/init-config.yaml
sed -i "s|KUBEADM_NODE_NAME|$node_name|g" /tmp/kubeadm/init-config.yaml
echo "==> Pre-pulling kubeadm images"
env -i PATH=/run/current-system/sw/bin:/usr/bin:/bin kubeadm config images pull --config /tmp/kubeadm/init-config.yaml || true
echo "==> Creating kube-vip static pod manifest"
ctr image pull "${kubeVipImage}"
ctr run --rm --net-host "${kubeVipImage}" kube-vip-manifest /kube-vip manifest pod \
--log 4 \
--interface "$iface" \
--address "$vip" \
--controlplane \
--arp \
> /etc/kubernetes/manifests/kube-vip.yaml
# kube-vip bootstrap workaround for Kubernetes >=1.29.
# During early kubeadm phases, super-admin.conf is available before admin.conf is fully usable.
sed -i 's#path: /etc/kubernetes/admin.conf#path: /etc/kubernetes/super-admin.conf#' /etc/kubernetes/manifests/kube-vip.yaml || true
echo "==> kube-vip manifest kubeconfig mount"
grep -E 'mountPath:|path:' /etc/kubernetes/manifests/kube-vip.yaml | grep -E 'kubernetes/(admin|super-admin)\.conf' || true
KUBEADM_INIT_LOG=/tmp/kubeadm-init.log
if ! env -i PATH=/run/current-system/sw/bin:/usr/bin:/bin kubeadm init \
--config /tmp/kubeadm/init-config.yaml \
--upload-certs \
--ignore-preflight-errors=NumCPU,HTTPProxyCIDR,Port-10250 2>&1 | tee "$KUBEADM_INIT_LOG"; then
if grep -q "error writing CRISocket for this node: nodes" "$KUBEADM_INIT_LOG" && [ -f /etc/kubernetes/admin.conf ]; then
echo "==> kubeadm hit CRISocket race; waiting for node registration"
echo "==> forcing kubelet restart to pick bootstrap flags"
systemctl daemon-reload || true
systemctl restart kubelet || true
sleep 3
echo "==> kubelet bootstrap flags"
cat /var/lib/kubelet/kubeadm-flags.env || true
registered=0
for i in $(seq 1 60); do
if KUBECONFIG=/etc/kubernetes/admin.conf kubectl get node "$node_name" >/dev/null 2>&1; then
echo "==> node $node_name registered; uploading kubelet config"
env -i PATH=/run/current-system/sw/bin:/usr/bin:/bin kubeadm init phase upload-config kubelet --config /tmp/kubeadm/init-config.yaml
registered=1
break
fi
sleep 2
done
if [ "$registered" -ne 1 ]; then
echo "==> node $node_name did not register after kubeadm init failure"
KUBECONFIG=/etc/kubernetes/admin.conf kubectl get nodes -o wide || true
echo "==> kubelet logs (registration hints)"
journalctl -u kubelet --no-pager -n 120 | grep -Ei "register|node|bootstrap|certificate|forbidden|unauthorized|refused|x509" || true
exit 1
fi
else
echo "==> kubeadm init failed, checking pod status:"
crictl pods || true
crictl ps -a || true
echo "==> kube-vip containers:"
crictl ps -a --name kube-vip || true
echo "==> kube-vip logs:"
for container_id in $(crictl ps -a --name kube-vip -q 2>/dev/null); do
echo "--- kube-vip container $container_id ---"
crictl logs "$container_id" 2>/dev/null || true
crictl inspect "$container_id" 2>/dev/null | jq -r '.status | "exitCode=\(.exitCode) reason=\(.reason // "") message=\(.message // "")"' || true
done
echo "==> Checking if VIP is bound:"
ip -4 addr show | grep "$vip" || echo "VIP NOT BOUND"
echo "==> kubelet logs:"
journalctl -xeu kubelet --no-pager -n 50
exit 1
fi
fi
echo "==> Waiting for kube-vip to claim VIP $vip"
for i in $(seq 1 90); do
if ip -4 addr show | grep -q "$vip"; then
echo "==> VIP $vip is bound"
break
fi
if [ "$i" -eq 90 ]; then
echo "==> ERROR: VIP not bound after 3 minutes"
crictl ps -a --name kube-vip || true
for container_id in $(crictl ps -a --name kube-vip -q 2>/dev/null); do
echo "--- kube-vip container $container_id ---"
crictl logs "$container_id" 2>/dev/null || true
done
exit 1
fi
sleep 2
done
echo "==> Waiting for API server to be ready"
for i in $(seq 1 60); do
if curl -sk "https://$vip:6443/healthz" 2>/dev/null | grep -q "ok"; then
echo "==> API server is healthy"
break
fi
if [ "$i" -eq 60 ]; then
echo "==> ERROR: API server not healthy after 2 minutes"
crictl pods || true
crictl ps -a || true
exit 1
fi
sleep 2
done
# Switch kube-vip to normal admin.conf after bootstrap finishes.
sed -i 's#path: /etc/kubernetes/super-admin.conf#path: /etc/kubernetes/admin.conf#' /etc/kubernetes/manifests/kube-vip.yaml || true
mkdir -p /root/.kube
cp /etc/kubernetes/admin.conf /root/.kube/config
chmod 600 /root/.kube/config
echo
echo "Next: install Cilium, then generate join commands:"
echo " kubeadm token create --print-join-command"
echo " kubeadm token create --print-join-command --certificate-key <key>"
'')
(pkgs.writeShellScriptBin "th-kubeadm-join-control-plane" ''
set -euo pipefail
unset http_proxy https_proxy HTTP_PROXY HTTPS_PROXY no_proxy NO_PROXY
if [ "$#" -lt 1 ]; then
echo "Usage: th-kubeadm-join-control-plane '<kubeadm join ... --control-plane --certificate-key ...>'"
exit 1
fi
iface="${config.terrahome.kubeadm.controlPlaneInterface}"
if ! ip link show "$iface" >/dev/null 2>&1; then
iface="$(ip -o -4 route show to default | awk 'NR==1 {print $5}')"
fi
if [ -z "''${iface:-}" ]; then
echo "Could not determine network interface for kube-vip"
exit 1
fi
suffix="${toString config.terrahome.kubeadm.controlPlaneVipSuffix}"
local_ip_cidr=$(ip -4 -o addr show dev "$iface" | awk 'NR==1 {print $4}')
if [ -z "''${local_ip_cidr:-}" ]; then
echo "Could not determine IPv4 CIDR on interface $iface"
exit 1
fi
subnet_prefix=$(echo "$local_ip_cidr" | cut -d/ -f1 | awk -F. '{print $1"."$2"."$3}')
vip="$subnet_prefix.$suffix"
mkdir -p /etc/kubernetes/manifests
ctr image pull "${kubeVipImage}"
ctr run --rm --net-host "${kubeVipImage}" kube-vip /kube-vip manifest pod \
--log 4 \
--interface "$iface" \
--address "$vip" \
--controlplane \
--arp \
--leaderElection \
> /etc/kubernetes/manifests/kube-vip.yaml
rm -f /var/lib/kubelet/config.yaml /var/lib/kubelet/kubeadm-flags.env
systemctl unmask kubelet || true
systemctl stop kubelet || true
systemctl enable kubelet || true
systemctl reset-failed kubelet || true
systemctl daemon-reload
eval "$1"
'')
(pkgs.writeShellScriptBin "th-kubeadm-join-worker" ''
set -euo pipefail
unset http_proxy https_proxy HTTP_PROXY HTTPS_PROXY no_proxy NO_PROXY
if [ "$#" -lt 1 ]; then
echo "Usage: th-kubeadm-join-worker '<kubeadm join ...>'"
exit 1
fi
rm -f /var/lib/kubelet/config.yaml /var/lib/kubelet/kubeadm-flags.env
systemctl unmask kubelet || true
systemctl stop kubelet || true
systemctl enable kubelet || true
systemctl reset-failed kubelet || true
systemctl daemon-reload
eval "$1"
'')
(pkgs.writeShellScriptBin "th-kubeadm-status" ''
set -euo pipefail
systemctl is-active containerd || true
systemctl is-active kubelet || true
crictl info >/dev/null && echo "crictl: ok" || echo "crictl: not-ready"
'')
];
systemd.services.kubelet = {
description = "Kubernetes Kubelet";
wantedBy = [ "multi-user.target" ];
wants = [ "network-online.target" ];
after = [ "containerd.service" "network-online.target" ];
serviceConfig = {
Environment = [
"KUBELET_CONFIG_ARGS=--config=/var/lib/kubelet/config.yaml"
"KUBELET_KUBEADM_ARGS="
"KUBELET_EXTRA_ARGS="
];
EnvironmentFile = [
"-/var/lib/kubelet/kubeadm-flags.env"
"-/etc/default/kubelet"
];
ExecStart = "${pinnedK8s}/bin/kubelet \$KUBELET_CONFIG_ARGS \$KUBELET_KUBEADM_ARGS \$KUBELET_EXTRA_ARGS";
Restart = "on-failure";
RestartSec = "10";
};
unitConfig = {
ConditionPathExists = "/var/lib/kubelet/config.yaml";
};
};
systemd.tmpfiles.rules = [
"d /etc/kubernetes 0755 root root -"
"d /etc/kubernetes/manifests 0755 root root -"
"d /var/lib/kubelet 0755 root root -"
"d /var/lib/kubelet/pki 0755 root root -"
];
};
}