All checks were successful
Terraform Plan / Terraform Plan (push) Successful in 16s
When kubeadm init fails at upload-config/kubelet due missing node object, explicitly restart kubelet to ensure bootstrap flags are loaded before waiting for node registration. Add kubelet flag dump and focused registration log output to surface auth/cert errors.
408 lines
14 KiB
Nix
408 lines
14 KiB
Nix
{ config, lib, pkgs, ... }:
|
|
|
|
let
|
|
pinnedK8s = lib.attrByPath [ "kubernetes_1_31" ] pkgs.kubernetes pkgs;
|
|
kubeVipImage = "ghcr.io/kube-vip/kube-vip:v0.8.9";
|
|
in
|
|
{
|
|
options.terrahome.kubeadm = {
|
|
k8sMinor = lib.mkOption {
|
|
type = lib.types.str;
|
|
default = "1.31";
|
|
};
|
|
|
|
controlPlaneInterface = lib.mkOption {
|
|
type = lib.types.str;
|
|
default = "eth0";
|
|
};
|
|
|
|
controlPlaneVipSuffix = lib.mkOption {
|
|
type = lib.types.int;
|
|
default = 250;
|
|
};
|
|
|
|
podSubnet = lib.mkOption {
|
|
type = lib.types.str;
|
|
default = "10.244.0.0/16";
|
|
};
|
|
|
|
serviceSubnet = lib.mkOption {
|
|
type = lib.types.str;
|
|
default = "10.96.0.0/12";
|
|
};
|
|
|
|
clusterDomain = lib.mkOption {
|
|
type = lib.types.str;
|
|
default = "cluster.local";
|
|
};
|
|
};
|
|
|
|
config = {
|
|
boot.kernelModules = [ "overlay" "br_netfilter" ];
|
|
|
|
boot.kernel.sysctl = {
|
|
"net.ipv4.ip_forward" = 1;
|
|
"net.bridge.bridge-nf-call-iptables" = 1;
|
|
"net.bridge.bridge-nf-call-ip6tables" = 1;
|
|
};
|
|
|
|
virtualisation.containerd.enable = true;
|
|
virtualisation.containerd.settings = {
|
|
plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options.SystemdCgroup = true;
|
|
};
|
|
|
|
swapDevices = lib.mkForce [ ];
|
|
|
|
services.openssh.enable = true;
|
|
services.openssh.settings = {
|
|
PasswordAuthentication = false;
|
|
KbdInteractiveAuthentication = false;
|
|
};
|
|
|
|
users.users.micqdf = {
|
|
isNormalUser = true;
|
|
extraGroups = [ "wheel" ];
|
|
};
|
|
|
|
security.sudo.wheelNeedsPassword = false;
|
|
|
|
nix.settings.trusted-users = [ "root" "micqdf" ];
|
|
nix.gc = {
|
|
automatic = true;
|
|
dates = "daily";
|
|
options = "--delete-older-than 3d";
|
|
};
|
|
nix.settings.auto-optimise-store = true;
|
|
|
|
environment.variables = {
|
|
KUBECONFIG = "/etc/kubernetes/admin.conf";
|
|
KUBE_VIP_IMAGE = kubeVipImage;
|
|
};
|
|
|
|
environment.systemPackages = (with pkgs; [
|
|
containerd
|
|
cri-tools
|
|
cni-plugins
|
|
pinnedK8s
|
|
kubernetes-helm
|
|
conntrack-tools
|
|
socat
|
|
ethtool
|
|
ipvsadm
|
|
iproute2
|
|
iptables
|
|
ebtables
|
|
jq
|
|
curl
|
|
vim
|
|
gawk
|
|
]) ++ [
|
|
(pkgs.writeShellScriptBin "th-kubeadm-init" ''
|
|
set -euo pipefail
|
|
|
|
unset http_proxy https_proxy HTTP_PROXY HTTPS_PROXY no_proxy NO_PROXY
|
|
|
|
iface="${config.terrahome.kubeadm.controlPlaneInterface}"
|
|
if ! ip link show "$iface" >/dev/null 2>&1; then
|
|
iface="$(ip -o -4 route show to default | awk 'NR==1 {print $5}')"
|
|
fi
|
|
|
|
if [ -z "''${iface:-}" ]; then
|
|
echo "Could not determine network interface for kube-vip"
|
|
exit 1
|
|
fi
|
|
|
|
suffix="${toString config.terrahome.kubeadm.controlPlaneVipSuffix}"
|
|
pod_subnet="${config.terrahome.kubeadm.podSubnet}"
|
|
service_subnet="${config.terrahome.kubeadm.serviceSubnet}"
|
|
domain="${config.terrahome.kubeadm.clusterDomain}"
|
|
node_name="${config.networking.hostName}"
|
|
|
|
local_ip_cidr=$(ip -4 -o addr show dev "$iface" | awk 'NR==1 {print $4}')
|
|
if [ -z "''${local_ip_cidr:-}" ]; then
|
|
echo "Could not determine IPv4 CIDR on interface $iface"
|
|
exit 1
|
|
fi
|
|
|
|
subnet_prefix=$(echo "$local_ip_cidr" | cut -d/ -f1 | awk -F. '{print $1"."$2"."$3}')
|
|
vip="$subnet_prefix.$suffix"
|
|
|
|
echo "Using control-plane endpoint: $vip:6443"
|
|
echo "Using kube-vip interface: $iface"
|
|
echo "Using kubeadm node name: $node_name"
|
|
|
|
hostname "$node_name" || true
|
|
|
|
rm -f /var/lib/kubelet/config.yaml /var/lib/kubelet/kubeadm-flags.env
|
|
|
|
systemctl unmask kubelet || true
|
|
systemctl stop kubelet || true
|
|
systemctl reset-failed kubelet || true
|
|
env -i PATH=/run/current-system/sw/bin:/usr/bin:/bin kubeadm reset -f || true
|
|
rm -f /etc/kubernetes/kubelet.conf /etc/kubernetes/bootstrap-kubelet.conf
|
|
rm -f /var/lib/kubelet/kubeconfig /var/lib/kubelet/instance-config.yaml
|
|
rm -rf /var/lib/kubelet/pki
|
|
|
|
systemctl daemon-reload
|
|
systemctl unmask kubelet || true
|
|
systemctl enable kubelet || true
|
|
|
|
echo "==> Ensuring containerd is running"
|
|
systemctl start containerd || true
|
|
sleep 2
|
|
if ! systemctl is-active containerd; then
|
|
echo "ERROR: containerd not running"
|
|
journalctl -xeu containerd --no-pager -n 30
|
|
exit 1
|
|
fi
|
|
|
|
mkdir -p /etc/kubernetes/manifests
|
|
mkdir -p /tmp/kubeadm
|
|
cat > /tmp/kubeadm/init-config.yaml << 'KUBEADMCONFIG'
|
|
apiVersion: kubeadm.k8s.io/v1beta4
|
|
kind: InitConfiguration
|
|
nodeRegistration:
|
|
name: "KUBEADM_NODE_NAME"
|
|
criSocket: unix:///run/containerd/containerd.sock
|
|
kubeletExtraArgs:
|
|
- name: hostname-override
|
|
value: "KUBEADM_NODE_NAME"
|
|
---
|
|
apiVersion: kubeadm.k8s.io/v1beta4
|
|
kind: ClusterConfiguration
|
|
controlPlaneEndpoint: "KUBEADM_ENDPOINT"
|
|
networking:
|
|
podSubnet: "KUBEADM_POD_SUBNET"
|
|
serviceSubnet: "KUBEADM_SERVICE_SUBNET"
|
|
dnsDomain: "KUBEADM_DNS_DOMAIN"
|
|
KUBEADMCONFIG
|
|
|
|
sed -i "s|KUBEADM_ENDPOINT|$vip:6443|g" /tmp/kubeadm/init-config.yaml
|
|
sed -i "s|KUBEADM_POD_SUBNET|$pod_subnet|g" /tmp/kubeadm/init-config.yaml
|
|
sed -i "s|KUBEADM_SERVICE_SUBNET|$service_subnet|g" /tmp/kubeadm/init-config.yaml
|
|
sed -i "s|KUBEADM_DNS_DOMAIN|$domain|g" /tmp/kubeadm/init-config.yaml
|
|
sed -i "s|KUBEADM_NODE_NAME|$node_name|g" /tmp/kubeadm/init-config.yaml
|
|
|
|
echo "==> Pre-pulling kubeadm images"
|
|
env -i PATH=/run/current-system/sw/bin:/usr/bin:/bin kubeadm config images pull --config /tmp/kubeadm/init-config.yaml || true
|
|
|
|
echo "==> Creating kube-vip static pod manifest"
|
|
ctr image pull "${kubeVipImage}"
|
|
ctr run --rm --net-host "${kubeVipImage}" kube-vip-manifest /kube-vip manifest pod \
|
|
--log 4 \
|
|
--interface "$iface" \
|
|
--address "$vip" \
|
|
--controlplane \
|
|
--arp \
|
|
> /etc/kubernetes/manifests/kube-vip.yaml
|
|
|
|
# kube-vip bootstrap workaround for Kubernetes >=1.29.
|
|
# During early kubeadm phases, super-admin.conf is available before admin.conf is fully usable.
|
|
sed -i 's#path: /etc/kubernetes/admin.conf#path: /etc/kubernetes/super-admin.conf#' /etc/kubernetes/manifests/kube-vip.yaml || true
|
|
echo "==> kube-vip manifest kubeconfig mount"
|
|
grep -E 'mountPath:|path:' /etc/kubernetes/manifests/kube-vip.yaml | grep -E 'kubernetes/(admin|super-admin)\.conf' || true
|
|
|
|
KUBEADM_INIT_LOG=/tmp/kubeadm-init.log
|
|
if ! env -i PATH=/run/current-system/sw/bin:/usr/bin:/bin kubeadm init \
|
|
--config /tmp/kubeadm/init-config.yaml \
|
|
--upload-certs \
|
|
--ignore-preflight-errors=NumCPU,HTTPProxyCIDR,Port-10250 2>&1 | tee "$KUBEADM_INIT_LOG"; then
|
|
if grep -q "error writing CRISocket for this node: nodes" "$KUBEADM_INIT_LOG" && [ -f /etc/kubernetes/admin.conf ]; then
|
|
echo "==> kubeadm hit CRISocket race; waiting for node registration"
|
|
echo "==> forcing kubelet restart to pick bootstrap flags"
|
|
systemctl daemon-reload || true
|
|
systemctl restart kubelet || true
|
|
sleep 3
|
|
echo "==> kubelet bootstrap flags"
|
|
cat /var/lib/kubelet/kubeadm-flags.env || true
|
|
registered=0
|
|
for i in $(seq 1 60); do
|
|
if KUBECONFIG=/etc/kubernetes/admin.conf kubectl get node "$node_name" >/dev/null 2>&1; then
|
|
echo "==> node $node_name registered; uploading kubelet config"
|
|
env -i PATH=/run/current-system/sw/bin:/usr/bin:/bin kubeadm init phase upload-config kubelet --config /tmp/kubeadm/init-config.yaml
|
|
registered=1
|
|
break
|
|
fi
|
|
sleep 2
|
|
done
|
|
if [ "$registered" -ne 1 ]; then
|
|
echo "==> node $node_name did not register after kubeadm init failure"
|
|
KUBECONFIG=/etc/kubernetes/admin.conf kubectl get nodes -o wide || true
|
|
echo "==> kubelet logs (registration hints)"
|
|
journalctl -u kubelet --no-pager -n 120 | grep -Ei "register|node|bootstrap|certificate|forbidden|unauthorized|refused|x509" || true
|
|
exit 1
|
|
fi
|
|
else
|
|
echo "==> kubeadm init failed, checking pod status:"
|
|
crictl pods || true
|
|
crictl ps -a || true
|
|
echo "==> kube-vip containers:"
|
|
crictl ps -a --name kube-vip || true
|
|
echo "==> kube-vip logs:"
|
|
for container_id in $(crictl ps -a --name kube-vip -q 2>/dev/null); do
|
|
echo "--- kube-vip container $container_id ---"
|
|
crictl logs "$container_id" 2>/dev/null || true
|
|
crictl inspect "$container_id" 2>/dev/null | jq -r '.status | "exitCode=\(.exitCode) reason=\(.reason // "") message=\(.message // "")"' || true
|
|
done
|
|
echo "==> Checking if VIP is bound:"
|
|
ip -4 addr show | grep "$vip" || echo "VIP NOT BOUND"
|
|
echo "==> kubelet logs:"
|
|
journalctl -xeu kubelet --no-pager -n 50
|
|
exit 1
|
|
fi
|
|
fi
|
|
|
|
echo "==> Waiting for kube-vip to claim VIP $vip"
|
|
for i in $(seq 1 90); do
|
|
if ip -4 addr show | grep -q "$vip"; then
|
|
echo "==> VIP $vip is bound"
|
|
break
|
|
fi
|
|
if [ "$i" -eq 90 ]; then
|
|
echo "==> ERROR: VIP not bound after 3 minutes"
|
|
crictl ps -a --name kube-vip || true
|
|
for container_id in $(crictl ps -a --name kube-vip -q 2>/dev/null); do
|
|
echo "--- kube-vip container $container_id ---"
|
|
crictl logs "$container_id" 2>/dev/null || true
|
|
done
|
|
exit 1
|
|
fi
|
|
sleep 2
|
|
done
|
|
|
|
echo "==> Waiting for API server to be ready"
|
|
for i in $(seq 1 60); do
|
|
if curl -sk "https://$vip:6443/healthz" 2>/dev/null | grep -q "ok"; then
|
|
echo "==> API server is healthy"
|
|
break
|
|
fi
|
|
if [ "$i" -eq 60 ]; then
|
|
echo "==> ERROR: API server not healthy after 2 minutes"
|
|
crictl pods || true
|
|
crictl ps -a || true
|
|
exit 1
|
|
fi
|
|
sleep 2
|
|
done
|
|
|
|
# Switch kube-vip to normal admin.conf after bootstrap finishes.
|
|
sed -i 's#path: /etc/kubernetes/super-admin.conf#path: /etc/kubernetes/admin.conf#' /etc/kubernetes/manifests/kube-vip.yaml || true
|
|
|
|
mkdir -p /root/.kube
|
|
cp /etc/kubernetes/admin.conf /root/.kube/config
|
|
chmod 600 /root/.kube/config
|
|
|
|
echo
|
|
echo "Next: install Cilium, then generate join commands:"
|
|
echo " kubeadm token create --print-join-command"
|
|
echo " kubeadm token create --print-join-command --certificate-key <key>"
|
|
'')
|
|
|
|
(pkgs.writeShellScriptBin "th-kubeadm-join-control-plane" ''
|
|
set -euo pipefail
|
|
unset http_proxy https_proxy HTTP_PROXY HTTPS_PROXY no_proxy NO_PROXY
|
|
if [ "$#" -lt 1 ]; then
|
|
echo "Usage: th-kubeadm-join-control-plane '<kubeadm join ... --control-plane --certificate-key ...>'"
|
|
exit 1
|
|
fi
|
|
|
|
iface="${config.terrahome.kubeadm.controlPlaneInterface}"
|
|
if ! ip link show "$iface" >/dev/null 2>&1; then
|
|
iface="$(ip -o -4 route show to default | awk 'NR==1 {print $5}')"
|
|
fi
|
|
|
|
if [ -z "''${iface:-}" ]; then
|
|
echo "Could not determine network interface for kube-vip"
|
|
exit 1
|
|
fi
|
|
|
|
suffix="${toString config.terrahome.kubeadm.controlPlaneVipSuffix}"
|
|
local_ip_cidr=$(ip -4 -o addr show dev "$iface" | awk 'NR==1 {print $4}')
|
|
if [ -z "''${local_ip_cidr:-}" ]; then
|
|
echo "Could not determine IPv4 CIDR on interface $iface"
|
|
exit 1
|
|
fi
|
|
|
|
subnet_prefix=$(echo "$local_ip_cidr" | cut -d/ -f1 | awk -F. '{print $1"."$2"."$3}')
|
|
vip="$subnet_prefix.$suffix"
|
|
|
|
mkdir -p /etc/kubernetes/manifests
|
|
ctr image pull "${kubeVipImage}"
|
|
ctr run --rm --net-host "${kubeVipImage}" kube-vip /kube-vip manifest pod \
|
|
--log 4 \
|
|
--interface "$iface" \
|
|
--address "$vip" \
|
|
--controlplane \
|
|
--arp \
|
|
--leaderElection \
|
|
> /etc/kubernetes/manifests/kube-vip.yaml
|
|
|
|
rm -f /var/lib/kubelet/config.yaml /var/lib/kubelet/kubeadm-flags.env
|
|
|
|
systemctl unmask kubelet || true
|
|
systemctl stop kubelet || true
|
|
systemctl enable kubelet || true
|
|
systemctl reset-failed kubelet || true
|
|
systemctl daemon-reload
|
|
eval "$1"
|
|
'')
|
|
|
|
(pkgs.writeShellScriptBin "th-kubeadm-join-worker" ''
|
|
set -euo pipefail
|
|
unset http_proxy https_proxy HTTP_PROXY HTTPS_PROXY no_proxy NO_PROXY
|
|
if [ "$#" -lt 1 ]; then
|
|
echo "Usage: th-kubeadm-join-worker '<kubeadm join ...>'"
|
|
exit 1
|
|
fi
|
|
|
|
rm -f /var/lib/kubelet/config.yaml /var/lib/kubelet/kubeadm-flags.env
|
|
|
|
systemctl unmask kubelet || true
|
|
systemctl stop kubelet || true
|
|
systemctl enable kubelet || true
|
|
systemctl reset-failed kubelet || true
|
|
systemctl daemon-reload
|
|
eval "$1"
|
|
'')
|
|
|
|
(pkgs.writeShellScriptBin "th-kubeadm-status" ''
|
|
set -euo pipefail
|
|
systemctl is-active containerd || true
|
|
systemctl is-active kubelet || true
|
|
crictl info >/dev/null && echo "crictl: ok" || echo "crictl: not-ready"
|
|
'')
|
|
];
|
|
|
|
systemd.services.kubelet = {
|
|
description = "Kubernetes Kubelet";
|
|
wantedBy = [ "multi-user.target" ];
|
|
wants = [ "network-online.target" ];
|
|
after = [ "containerd.service" "network-online.target" ];
|
|
serviceConfig = {
|
|
Environment = [
|
|
"KUBELET_CONFIG_ARGS=--config=/var/lib/kubelet/config.yaml"
|
|
"KUBELET_KUBEADM_ARGS="
|
|
"KUBELET_EXTRA_ARGS="
|
|
];
|
|
EnvironmentFile = [
|
|
"-/var/lib/kubelet/kubeadm-flags.env"
|
|
"-/etc/default/kubelet"
|
|
];
|
|
ExecStart = "${pinnedK8s}/bin/kubelet \$KUBELET_CONFIG_ARGS \$KUBELET_KUBEADM_ARGS \$KUBELET_EXTRA_ARGS";
|
|
Restart = "on-failure";
|
|
RestartSec = "10";
|
|
};
|
|
unitConfig = {
|
|
ConditionPathExists = "/var/lib/kubelet/config.yaml";
|
|
};
|
|
};
|
|
|
|
systemd.tmpfiles.rules = [
|
|
"d /etc/kubernetes 0755 root root -"
|
|
"d /etc/kubernetes/manifests 0755 root root -"
|
|
"d /var/lib/kubelet 0755 root root -"
|
|
"d /var/lib/kubelet/pki 0755 root root -"
|
|
];
|
|
};
|
|
}
|