Files
TerraHome/nixos/kubeadm/modules/k8s-common.nix
MichaelFisher1997 a81799a2b5
Some checks failed
Terraform Plan / Terraform Plan (push) Has been cancelled
fix: stabilize kubeadm bootstrap and reduce Proxmox plan latency
Move kubeadm reset ahead of kube-vip manifest generation, use super-admin.conf during bootstrap for kube-vip, and restore admin.conf after init. Also switch nixos-rebuild to --sudo and make QEMU guest agent optional so Terraform plan can skip slow guest-agent refreshes when it is not installed.
2026-03-02 22:09:10 +00:00

371 lines
12 KiB
Nix

{ config, lib, pkgs, ... }:
let
pinnedK8s = lib.attrByPath [ "kubernetes_1_31" ] pkgs.kubernetes pkgs;
kubeVipImage = "ghcr.io/kube-vip/kube-vip:v0.8.9";
in
{
options.terrahome.kubeadm = {
k8sMinor = lib.mkOption {
type = lib.types.str;
default = "1.31";
};
controlPlaneInterface = lib.mkOption {
type = lib.types.str;
default = "eth0";
};
controlPlaneVipSuffix = lib.mkOption {
type = lib.types.int;
default = 250;
};
podSubnet = lib.mkOption {
type = lib.types.str;
default = "10.244.0.0/16";
};
serviceSubnet = lib.mkOption {
type = lib.types.str;
default = "10.96.0.0/12";
};
clusterDomain = lib.mkOption {
type = lib.types.str;
default = "cluster.local";
};
};
config = {
boot.kernelModules = [ "overlay" "br_netfilter" ];
boot.kernel.sysctl = {
"net.ipv4.ip_forward" = 1;
"net.bridge.bridge-nf-call-iptables" = 1;
"net.bridge.bridge-nf-call-ip6tables" = 1;
};
virtualisation.containerd.enable = true;
virtualisation.containerd.settings = {
plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options.SystemdCgroup = true;
};
swapDevices = lib.mkForce [ ];
services.openssh.enable = true;
services.openssh.settings = {
PasswordAuthentication = false;
KbdInteractiveAuthentication = false;
};
users.users.micqdf = {
isNormalUser = true;
extraGroups = [ "wheel" ];
};
security.sudo.wheelNeedsPassword = false;
nix.settings.trusted-users = [ "root" "micqdf" ];
nix.gc = {
automatic = true;
dates = "daily";
options = "--delete-older-than 3d";
};
nix.settings.auto-optimise-store = true;
environment.variables = {
KUBECONFIG = "/etc/kubernetes/admin.conf";
KUBE_VIP_IMAGE = kubeVipImage;
};
environment.systemPackages = (with pkgs; [
containerd
cri-tools
cni-plugins
pinnedK8s
kubernetes-helm
conntrack-tools
socat
ethtool
ipvsadm
iproute2
iptables
ebtables
jq
curl
vim
gawk
]) ++ [
(pkgs.writeShellScriptBin "th-kubeadm-init" ''
set -euo pipefail
unset http_proxy https_proxy HTTP_PROXY HTTPS_PROXY no_proxy NO_PROXY
iface="${config.terrahome.kubeadm.controlPlaneInterface}"
if ! ip link show "$iface" >/dev/null 2>&1; then
iface="$(ip -o -4 route show to default | awk 'NR==1 {print $5}')"
fi
if [ -z "''${iface:-}" ]; then
echo "Could not determine network interface for kube-vip"
exit 1
fi
suffix="${toString config.terrahome.kubeadm.controlPlaneVipSuffix}"
pod_subnet="${config.terrahome.kubeadm.podSubnet}"
service_subnet="${config.terrahome.kubeadm.serviceSubnet}"
domain="${config.terrahome.kubeadm.clusterDomain}"
local_ip_cidr=$(ip -4 -o addr show dev "$iface" | awk 'NR==1 {print $4}')
if [ -z "''${local_ip_cidr:-}" ]; then
echo "Could not determine IPv4 CIDR on interface $iface"
exit 1
fi
subnet_prefix=$(echo "$local_ip_cidr" | cut -d/ -f1 | awk -F. '{print $1"."$2"."$3}')
vip="$subnet_prefix.$suffix"
echo "Using control-plane endpoint: $vip:6443"
echo "Using kube-vip interface: $iface"
rm -f /var/lib/kubelet/config.yaml /var/lib/kubelet/kubeadm-flags.env
systemctl unmask kubelet || true
systemctl stop kubelet || true
systemctl reset-failed kubelet || true
env -i PATH=/run/current-system/sw/bin:/usr/bin:/bin kubeadm reset -f || true
systemctl daemon-reload
systemctl unmask kubelet || true
echo "==> Ensuring containerd is running"
systemctl start containerd || true
sleep 2
if ! systemctl is-active containerd; then
echo "ERROR: containerd not running"
journalctl -xeu containerd --no-pager -n 30
exit 1
fi
mkdir -p /etc/kubernetes/manifests
mkdir -p /tmp/kubeadm
cat > /tmp/kubeadm/init-config.yaml << 'KUBEADMCONFIG'
apiVersion: kubeadm.k8s.io/v1beta4
kind: InitConfiguration
nodeRegistration:
criSocket: unix:///run/containerd/containerd.sock
---
apiVersion: kubeadm.k8s.io/v1beta4
kind: ClusterConfiguration
controlPlaneEndpoint: "KUBEADM_ENDPOINT"
networking:
podSubnet: "KUBEADM_POD_SUBNET"
serviceSubnet: "KUBEADM_SERVICE_SUBNET"
dnsDomain: "KUBEADM_DNS_DOMAIN"
---
apiVersion: kubelet.config.k8s.io/v1beta1
kind: KubeletConfiguration
authentication:
webhook:
enabled: false
authorization:
mode: AlwaysAllow
KUBEADMCONFIG
sed -i "s|KUBEADM_ENDPOINT|$vip:6443|g" /tmp/kubeadm/init-config.yaml
sed -i "s|KUBEADM_POD_SUBNET|$pod_subnet|g" /tmp/kubeadm/init-config.yaml
sed -i "s|KUBEADM_SERVICE_SUBNET|$service_subnet|g" /tmp/kubeadm/init-config.yaml
sed -i "s|KUBEADM_DNS_DOMAIN|$domain|g" /tmp/kubeadm/init-config.yaml
echo "==> Pre-pulling kubeadm images"
env -i PATH=/run/current-system/sw/bin:/usr/bin:/bin kubeadm config images pull --config /tmp/kubeadm/init-config.yaml || true
echo "==> Creating kube-vip static pod manifest"
ctr image pull "${kubeVipImage}"
ctr run --rm --net-host "${kubeVipImage}" kube-vip-manifest /kube-vip manifest pod \
--interface "$iface" \
--address "$vip" \
--controlplane \
--services \
--arp \
--leaderElection \
> /etc/kubernetes/manifests/kube-vip.yaml
# kube-vip bootstrap workaround for Kubernetes >=1.29.
# During early kubeadm phases, super-admin.conf is available before admin.conf is fully usable.
sed -i 's#/etc/kubernetes/admin.conf#/etc/kubernetes/super-admin.conf#g' /etc/kubernetes/manifests/kube-vip.yaml || true
env -i PATH=/run/current-system/sw/bin:/usr/bin:/bin kubeadm init \
--config /tmp/kubeadm/init-config.yaml \
--upload-certs \
--ignore-preflight-errors=NumCPU,HTTPProxyCIDR,Port-10250 || {
echo "==> kubeadm init failed, checking pod status:"
crictl pods || true
crictl ps -a || true
echo "==> kube-vip containers:"
crictl ps -a --name kube-vip || true
echo "==> kube-vip logs:"
for container_id in $(crictl ps -a --name kube-vip -q 2>/dev/null); do
echo "--- kube-vip container $container_id ---"
crictl logs "$container_id" 2>/dev/null || true
done
echo "==> Checking if VIP is bound:"
ip -4 addr show | grep "$vip" || echo "VIP NOT BOUND"
echo "==> kubelet logs:"
journalctl -xeu kubelet --no-pager -n 50
exit 1
}
echo "==> Waiting for kube-vip to claim VIP $vip"
for i in $(seq 1 90); do
if ip -4 addr show | grep -q "$vip"; then
echo "==> VIP $vip is bound"
break
fi
if [ "$i" -eq 90 ]; then
echo "==> ERROR: VIP not bound after 3 minutes"
crictl ps -a --name kube-vip || true
for container_id in $(crictl ps -a --name kube-vip -q 2>/dev/null); do
echo "--- kube-vip container $container_id ---"
crictl logs "$container_id" 2>/dev/null || true
done
exit 1
fi
sleep 2
done
echo "==> Waiting for API server to be ready"
for i in $(seq 1 60); do
if curl -sk "https://$vip:6443/healthz" 2>/dev/null | grep -q "ok"; then
echo "==> API server is healthy"
break
fi
if [ "$i" -eq 60 ]; then
echo "==> ERROR: API server not healthy after 2 minutes"
crictl pods || true
crictl ps -a || true
exit 1
fi
sleep 2
done
# Switch kube-vip to normal admin.conf after bootstrap finishes.
sed -i 's#/etc/kubernetes/super-admin.conf#/etc/kubernetes/admin.conf#g' /etc/kubernetes/manifests/kube-vip.yaml || true
mkdir -p /root/.kube
cp /etc/kubernetes/admin.conf /root/.kube/config
chmod 600 /root/.kube/config
echo
echo "Next: install Cilium, then generate join commands:"
echo " kubeadm token create --print-join-command"
echo " kubeadm token create --print-join-command --certificate-key <key>"
'')
(pkgs.writeShellScriptBin "th-kubeadm-join-control-plane" ''
set -euo pipefail
unset http_proxy https_proxy HTTP_PROXY HTTPS_PROXY no_proxy NO_PROXY
if [ "$#" -lt 1 ]; then
echo "Usage: th-kubeadm-join-control-plane '<kubeadm join ... --control-plane --certificate-key ...>'"
exit 1
fi
iface="${config.terrahome.kubeadm.controlPlaneInterface}"
if ! ip link show "$iface" >/dev/null 2>&1; then
iface="$(ip -o -4 route show to default | awk 'NR==1 {print $5}')"
fi
if [ -z "''${iface:-}" ]; then
echo "Could not determine network interface for kube-vip"
exit 1
fi
suffix="${toString config.terrahome.kubeadm.controlPlaneVipSuffix}"
local_ip_cidr=$(ip -4 -o addr show dev "$iface" | awk 'NR==1 {print $4}')
if [ -z "''${local_ip_cidr:-}" ]; then
echo "Could not determine IPv4 CIDR on interface $iface"
exit 1
fi
subnet_prefix=$(echo "$local_ip_cidr" | cut -d/ -f1 | awk -F. '{print $1"."$2"."$3}')
vip="$subnet_prefix.$suffix"
mkdir -p /etc/kubernetes/manifests
ctr image pull "${kubeVipImage}"
ctr run --rm --net-host "${kubeVipImage}" kube-vip /kube-vip manifest pod \
--interface "$iface" \
--address "$vip" \
--controlplane \
--services \
--arp \
--leaderElection \
> /etc/kubernetes/manifests/kube-vip.yaml
rm -f /var/lib/kubelet/config.yaml /var/lib/kubelet/kubeadm-flags.env
systemctl unmask kubelet || true
systemctl stop kubelet || true
systemctl reset-failed kubelet || true
systemctl daemon-reload
eval "$1"
'')
(pkgs.writeShellScriptBin "th-kubeadm-join-worker" ''
set -euo pipefail
unset http_proxy https_proxy HTTP_PROXY HTTPS_PROXY no_proxy NO_PROXY
if [ "$#" -lt 1 ]; then
echo "Usage: th-kubeadm-join-worker '<kubeadm join ...>'"
exit 1
fi
rm -f /var/lib/kubelet/config.yaml /var/lib/kubelet/kubeadm-flags.env
systemctl unmask kubelet || true
systemctl stop kubelet || true
systemctl reset-failed kubelet || true
systemctl daemon-reload
eval "$1"
'')
(pkgs.writeShellScriptBin "th-kubeadm-status" ''
set -euo pipefail
systemctl is-active containerd || true
systemctl is-active kubelet || true
crictl info >/dev/null && echo "crictl: ok" || echo "crictl: not-ready"
'')
];
systemd.services.kubelet = {
description = "Kubernetes Kubelet";
wantedBy = lib.mkForce [];
wants = [ "network-online.target" ];
after = [ "containerd.service" "network-online.target" ];
serviceConfig = {
Environment = [
"KUBELET_CONFIG_ARGS=--config=/var/lib/kubelet/config.yaml"
"KUBELET_KUBEADM_ARGS="
"KUBELET_EXTRA_ARGS="
];
EnvironmentFile = [
"-/var/lib/kubelet/kubeadm-flags.env"
"-/etc/default/kubelet"
];
ExecStart = "${pinnedK8s}/bin/kubelet \$KUBELET_CONFIG_ARGS \$KUBELET_KUBEADM_ARGS \$KUBELET_EXTRA_ARGS";
Restart = "on-failure";
RestartSec = "10";
};
unitConfig = {
ConditionPathExists = "/var/lib/kubelet/config.yaml";
};
};
systemd.tmpfiles.rules = [
"d /etc/kubernetes 0755 root root -"
"d /etc/kubernetes/manifests 0755 root root -"
"d /var/lib/kubelet 0755 root root -"
"d /var/lib/kubelet/pki 0755 root root -"
];
};
}