{ config, lib, pkgs, ... }: let pinnedK8s = lib.attrByPath [ "kubernetes_1_31" ] pkgs.kubernetes pkgs; kubeVipImage = "ghcr.io/kube-vip/kube-vip:v0.8.9"; in { options.terrahome.kubeadm = { k8sMinor = lib.mkOption { type = lib.types.str; default = "1.31"; }; controlPlaneInterface = lib.mkOption { type = lib.types.str; default = "eth0"; }; controlPlaneVipSuffix = lib.mkOption { type = lib.types.int; default = 250; }; podSubnet = lib.mkOption { type = lib.types.str; default = "10.244.0.0/16"; }; serviceSubnet = lib.mkOption { type = lib.types.str; default = "10.96.0.0/12"; }; clusterDomain = lib.mkOption { type = lib.types.str; default = "cluster.local"; }; }; config = { boot.kernelModules = [ "overlay" "br_netfilter" ]; boot.kernel.sysctl = { "net.ipv4.ip_forward" = 1; "net.bridge.bridge-nf-call-iptables" = 1; "net.bridge.bridge-nf-call-ip6tables" = 1; }; virtualisation.containerd.enable = true; virtualisation.containerd.settings = { plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options.SystemdCgroup = true; }; swapDevices = lib.mkForce [ ]; services.openssh.enable = true; services.openssh.settings = { PasswordAuthentication = false; KbdInteractiveAuthentication = false; }; users.users.micqdf = { isNormalUser = true; extraGroups = [ "wheel" ]; }; security.sudo.wheelNeedsPassword = false; nix.settings.trusted-users = [ "root" "micqdf" ]; nix.gc = { automatic = true; dates = "daily"; options = "--delete-older-than 3d"; }; nix.settings.auto-optimise-store = true; environment.variables = { KUBECONFIG = "/etc/kubernetes/admin.conf"; KUBE_VIP_IMAGE = kubeVipImage; }; environment.systemPackages = (with pkgs; [ containerd cri-tools cni-plugins pinnedK8s kubernetes-helm conntrack-tools socat ethtool ipvsadm iproute2 iptables ebtables jq curl vim gawk ]) ++ [ (pkgs.writeShellScriptBin "th-kubeadm-init" '' set -euo pipefail unset http_proxy https_proxy HTTP_PROXY HTTPS_PROXY no_proxy NO_PROXY iface="${config.terrahome.kubeadm.controlPlaneInterface}" if ! ip link show "$iface" >/dev/null 2>&1; then iface="$(ip -o -4 route show to default | awk 'NR==1 {print $5}')" fi if [ -z "''${iface:-}" ]; then echo "Could not determine network interface for kube-vip" exit 1 fi suffix="${toString config.terrahome.kubeadm.controlPlaneVipSuffix}" pod_subnet="${config.terrahome.kubeadm.podSubnet}" service_subnet="${config.terrahome.kubeadm.serviceSubnet}" domain="${config.terrahome.kubeadm.clusterDomain}" node_name="${config.networking.hostName}" local_ip_cidr=$(ip -4 -o addr show dev "$iface" | awk 'NR==1 {print $4}') if [ -z "''${local_ip_cidr:-}" ]; then echo "Could not determine IPv4 CIDR on interface $iface" exit 1 fi subnet_prefix=$(echo "$local_ip_cidr" | cut -d/ -f1 | awk -F. '{print $1"."$2"."$3}') vip="$subnet_prefix.$suffix" echo "Using control-plane endpoint: $vip:6443" echo "Using kube-vip interface: $iface" echo "Using kubeadm node name: $node_name" hostname "$node_name" || true rm -f /var/lib/kubelet/config.yaml /var/lib/kubelet/kubeadm-flags.env systemctl unmask kubelet || true systemctl stop kubelet || true systemctl reset-failed kubelet || true env -i PATH=/run/current-system/sw/bin:/usr/bin:/bin kubeadm reset -f || true rm -f /etc/kubernetes/kubelet.conf /etc/kubernetes/bootstrap-kubelet.conf rm -f /var/lib/kubelet/kubeconfig /var/lib/kubelet/instance-config.yaml rm -rf /var/lib/kubelet/pki systemctl daemon-reload systemctl unmask kubelet || true systemctl enable kubelet || true echo "==> Ensuring containerd is running" systemctl start containerd || true sleep 2 if ! systemctl is-active containerd; then echo "ERROR: containerd not running" journalctl -xeu containerd --no-pager -n 30 exit 1 fi mkdir -p /etc/kubernetes/manifests mkdir -p /tmp/kubeadm cat > /tmp/kubeadm/init-config.yaml << 'KUBEADMCONFIG' apiVersion: kubeadm.k8s.io/v1beta4 kind: InitConfiguration nodeRegistration: name: "KUBEADM_NODE_NAME" criSocket: unix:///run/containerd/containerd.sock kubeletExtraArgs: - name: hostname-override value: "KUBEADM_NODE_NAME" --- apiVersion: kubeadm.k8s.io/v1beta4 kind: ClusterConfiguration controlPlaneEndpoint: "KUBEADM_ENDPOINT" networking: podSubnet: "KUBEADM_POD_SUBNET" serviceSubnet: "KUBEADM_SERVICE_SUBNET" dnsDomain: "KUBEADM_DNS_DOMAIN" KUBEADMCONFIG sed -i "s|KUBEADM_ENDPOINT|$vip:6443|g" /tmp/kubeadm/init-config.yaml sed -i "s|KUBEADM_POD_SUBNET|$pod_subnet|g" /tmp/kubeadm/init-config.yaml sed -i "s|KUBEADM_SERVICE_SUBNET|$service_subnet|g" /tmp/kubeadm/init-config.yaml sed -i "s|KUBEADM_DNS_DOMAIN|$domain|g" /tmp/kubeadm/init-config.yaml sed -i "s|KUBEADM_NODE_NAME|$node_name|g" /tmp/kubeadm/init-config.yaml echo "==> Pre-pulling kubeadm images" env -i PATH=/run/current-system/sw/bin:/usr/bin:/bin kubeadm config images pull --config /tmp/kubeadm/init-config.yaml || true echo "==> Creating kube-vip static pod manifest" ctr image pull "${kubeVipImage}" ctr run --rm --net-host "${kubeVipImage}" kube-vip-manifest /kube-vip manifest pod \ --log 4 \ --interface "$iface" \ --address "$vip" \ --controlplane \ --arp \ > /etc/kubernetes/manifests/kube-vip.yaml # kube-vip bootstrap workaround for Kubernetes >=1.29. # During early kubeadm phases, super-admin.conf is available before admin.conf is fully usable. sed -i 's#path: /etc/kubernetes/admin.conf#path: /etc/kubernetes/super-admin.conf#' /etc/kubernetes/manifests/kube-vip.yaml || true echo "==> kube-vip manifest kubeconfig mount" grep -E 'mountPath:|path:' /etc/kubernetes/manifests/kube-vip.yaml | grep -E 'kubernetes/(admin|super-admin)\.conf' || true KUBEADM_INIT_LOG=/tmp/kubeadm-init.log if ! env -i PATH=/run/current-system/sw/bin:/usr/bin:/bin kubeadm init \ --config /tmp/kubeadm/init-config.yaml \ --upload-certs \ --ignore-preflight-errors=NumCPU,HTTPProxyCIDR,Port-10250 2>&1 | tee "$KUBEADM_INIT_LOG"; then if grep -q "error writing CRISocket for this node: nodes" "$KUBEADM_INIT_LOG" && [ -f /etc/kubernetes/admin.conf ]; then echo "==> kubeadm hit CRISocket race; waiting for node registration" echo "==> forcing kubelet restart to pick bootstrap flags" systemctl daemon-reload || true systemctl restart kubelet || true sleep 3 echo "==> kubelet bootstrap flags" cat /var/lib/kubelet/kubeadm-flags.env || true registered=0 for i in $(seq 1 60); do if KUBECONFIG=/etc/kubernetes/admin.conf kubectl get node "$node_name" >/dev/null 2>&1; then echo "==> node $node_name registered; uploading kubelet config" env -i PATH=/run/current-system/sw/bin:/usr/bin:/bin kubeadm init phase upload-config kubelet --config /tmp/kubeadm/init-config.yaml registered=1 break fi sleep 2 done if [ "$registered" -ne 1 ]; then echo "==> node $node_name did not register after kubeadm init failure" KUBECONFIG=/etc/kubernetes/admin.conf kubectl get nodes -o wide || true echo "==> kubelet logs (registration hints)" journalctl -u kubelet --no-pager -n 120 | grep -Ei "register|node|bootstrap|certificate|forbidden|unauthorized|refused|x509" || true exit 1 fi else echo "==> kubeadm init failed, checking pod status:" crictl pods || true crictl ps -a || true echo "==> kube-vip containers:" crictl ps -a --name kube-vip || true echo "==> kube-vip logs:" for container_id in $(crictl ps -a --name kube-vip -q 2>/dev/null); do echo "--- kube-vip container $container_id ---" crictl logs "$container_id" 2>/dev/null || true crictl inspect "$container_id" 2>/dev/null | jq -r '.status | "exitCode=\(.exitCode) reason=\(.reason // "") message=\(.message // "")"' || true done echo "==> Checking if VIP is bound:" ip -4 addr show | grep "$vip" || echo "VIP NOT BOUND" echo "==> kubelet logs:" journalctl -xeu kubelet --no-pager -n 50 exit 1 fi fi echo "==> Waiting for kube-vip to claim VIP $vip" for i in $(seq 1 90); do if ip -4 addr show | grep -q "$vip"; then echo "==> VIP $vip is bound" break fi if [ "$i" -eq 90 ]; then echo "==> ERROR: VIP not bound after 3 minutes" crictl ps -a --name kube-vip || true for container_id in $(crictl ps -a --name kube-vip -q 2>/dev/null); do echo "--- kube-vip container $container_id ---" crictl logs "$container_id" 2>/dev/null || true done exit 1 fi sleep 2 done echo "==> Waiting for API server to be ready" for i in $(seq 1 60); do if curl -sk "https://$vip:6443/healthz" 2>/dev/null | grep -q "ok"; then echo "==> API server is healthy" break fi if [ "$i" -eq 60 ]; then echo "==> ERROR: API server not healthy after 2 minutes" crictl pods || true crictl ps -a || true exit 1 fi sleep 2 done # Switch kube-vip to normal admin.conf after bootstrap finishes. sed -i 's#path: /etc/kubernetes/super-admin.conf#path: /etc/kubernetes/admin.conf#' /etc/kubernetes/manifests/kube-vip.yaml || true mkdir -p /root/.kube cp /etc/kubernetes/admin.conf /root/.kube/config chmod 600 /root/.kube/config echo echo "Next: install Cilium, then generate join commands:" echo " kubeadm token create --print-join-command" echo " kubeadm token create --print-join-command --certificate-key " '') (pkgs.writeShellScriptBin "th-kubeadm-join-control-plane" '' set -euo pipefail unset http_proxy https_proxy HTTP_PROXY HTTPS_PROXY no_proxy NO_PROXY if [ "$#" -lt 1 ]; then echo "Usage: th-kubeadm-join-control-plane ''" exit 1 fi iface="${config.terrahome.kubeadm.controlPlaneInterface}" if ! ip link show "$iface" >/dev/null 2>&1; then iface="$(ip -o -4 route show to default | awk 'NR==1 {print $5}')" fi if [ -z "''${iface:-}" ]; then echo "Could not determine network interface for kube-vip" exit 1 fi suffix="${toString config.terrahome.kubeadm.controlPlaneVipSuffix}" local_ip_cidr=$(ip -4 -o addr show dev "$iface" | awk 'NR==1 {print $4}') if [ -z "''${local_ip_cidr:-}" ]; then echo "Could not determine IPv4 CIDR on interface $iface" exit 1 fi subnet_prefix=$(echo "$local_ip_cidr" | cut -d/ -f1 | awk -F. '{print $1"."$2"."$3}') vip="$subnet_prefix.$suffix" mkdir -p /etc/kubernetes/manifests ctr image pull "${kubeVipImage}" ctr run --rm --net-host "${kubeVipImage}" kube-vip /kube-vip manifest pod \ --log 4 \ --interface "$iface" \ --address "$vip" \ --controlplane \ --arp \ --leaderElection \ > /etc/kubernetes/manifests/kube-vip.yaml rm -f /var/lib/kubelet/config.yaml /var/lib/kubelet/kubeadm-flags.env rm -f /etc/kubernetes/kubelet.conf /etc/kubernetes/bootstrap-kubelet.conf rm -f /var/lib/kubelet/kubeconfig /var/lib/kubelet/instance-config.yaml rm -rf /var/lib/kubelet/pki systemctl unmask kubelet || true systemctl stop kubelet || true systemctl enable kubelet || true systemctl reset-failed kubelet || true systemctl daemon-reload env -i PATH=/run/current-system/sw/bin:/usr/bin:/bin kubeadm reset -f || true eval "$1" '') (pkgs.writeShellScriptBin "th-kubeadm-join-worker" '' set -euo pipefail unset http_proxy https_proxy HTTP_PROXY HTTPS_PROXY no_proxy NO_PROXY if [ "$#" -lt 1 ]; then echo "Usage: th-kubeadm-join-worker ''" exit 1 fi rm -f /var/lib/kubelet/config.yaml /var/lib/kubelet/kubeadm-flags.env rm -f /etc/kubernetes/kubelet.conf /etc/kubernetes/bootstrap-kubelet.conf rm -f /var/lib/kubelet/kubeconfig /var/lib/kubelet/instance-config.yaml rm -rf /var/lib/kubelet/pki systemctl unmask kubelet || true systemctl stop kubelet || true systemctl enable kubelet || true systemctl reset-failed kubelet || true systemctl daemon-reload env -i PATH=/run/current-system/sw/bin:/usr/bin:/bin kubeadm reset -f || true eval "$1" '') (pkgs.writeShellScriptBin "th-kubeadm-status" '' set -euo pipefail systemctl is-active containerd || true systemctl is-active kubelet || true crictl info >/dev/null && echo "crictl: ok" || echo "crictl: not-ready" '') ]; systemd.services.kubelet = { description = "Kubernetes Kubelet"; wantedBy = [ "multi-user.target" ]; path = [ pkgs.util-linux ]; wants = [ "network-online.target" ]; after = [ "containerd.service" "network-online.target" ]; serviceConfig = { Environment = [ "KUBELET_CONFIG_ARGS=--config=/var/lib/kubelet/config.yaml" "KUBELET_KUBEADM_ARGS=" "KUBELET_EXTRA_ARGS=" ]; EnvironmentFile = [ "-/var/lib/kubelet/kubeadm-flags.env" "-/etc/default/kubelet" ]; ExecStart = "${pinnedK8s}/bin/kubelet --bootstrap-kubeconfig=/etc/kubernetes/bootstrap-kubelet.conf --kubeconfig=/etc/kubernetes/kubelet.conf \$KUBELET_CONFIG_ARGS \$KUBELET_KUBEADM_ARGS \$KUBELET_EXTRA_ARGS"; Restart = "on-failure"; RestartSec = "10"; }; unitConfig = { ConditionPathExists = "/var/lib/kubelet/config.yaml"; ConditionPathExistsGlob = "/etc/kubernetes/*kubelet.conf"; }; }; systemd.tmpfiles.rules = [ "d /etc/kubernetes 0755 root root -" "d /etc/kubernetes/manifests 0755 root root -" "d /var/lib/kubelet 0755 root root -" "d /var/lib/kubelet/pki 0755 root root -" ]; }; }