Merge pull request 'stage' (#39) from stage into master
All checks were successful
Terraform Apply / Terraform Apply (push) Successful in 27s

Reviewed-on: #39
This commit was merged in pull request #39.
This commit is contained in:
2026-02-28 16:34:24 +00:00
17 changed files with 753 additions and 102 deletions

View File

@@ -0,0 +1,119 @@
name: Kubeadm Bootstrap
run-name: ${{ gitea.actor }} requested kubeadm bootstrap
on:
workflow_dispatch:
inputs:
confirm:
description: "Type BOOTSTRAP to run rebuild + kubeadm bootstrap"
required: true
type: string
concurrency:
group: kubeadm-bootstrap
cancel-in-progress: false
jobs:
bootstrap:
name: "Rebuild and Bootstrap Cluster"
runs-on: ubuntu-latest
steps:
- name: Validate confirmation phrase
run: |
if [ "${{ inputs.confirm }}" != "BOOTSTRAP" ]; then
echo "Confirmation failed. You must type BOOTSTRAP."
exit 1
fi
- name: Checkout repository
uses: https://gitea.com/actions/checkout@v4
- name: Create SSH key
run: |
install -m 0700 -d ~/.ssh
KEY_CONTENT="$(printf '%s' "${{ secrets.KUBEADM_SSH_PRIVATE_KEY }}")"
if [ -z "$KEY_CONTENT" ]; then
KEY_CONTENT="$(printf '%s' "${{ secrets.SSH_KEY_PRIVATE }}")"
fi
if [ -z "$KEY_CONTENT" ]; then
echo "Missing SSH private key secret. Set KUBEADM_SSH_PRIVATE_KEY or SSH_KEY_PRIVATE."
exit 1
fi
printf '%s\n' "$KEY_CONTENT" > ~/.ssh/id_ed25519
chmod 0600 ~/.ssh/id_ed25519
- name: Set up Terraform
uses: hashicorp/setup-terraform@v2
with:
terraform_version: 1.6.6
terraform_wrapper: false
- name: Build Terraform backend files
working-directory: terraform
run: |
cat > secrets.auto.tfvars << EOF
pm_api_token_secret = "${{ secrets.PM_API_TOKEN_SECRET }}"
SSH_KEY_PUBLIC = "$(printf '%s' "${{ secrets.SSH_KEY_PUBLIC }}" | tr -d '\r\n')"
EOF
cat > backend.hcl << EOF
bucket = "${{ secrets.B2_TF_BUCKET }}"
key = "terraform.tfstate"
region = "us-east-005"
endpoints = {
s3 = "${{ secrets.B2_TF_ENDPOINT }}"
}
access_key = "$(printf '%s' "${{ secrets.B2_KEY_ID }}" | tr -d '\r\n')"
secret_key = "$(printf '%s' "${{ secrets.B2_APPLICATION_KEY }}" | tr -d '\r\n')"
skip_credentials_validation = true
skip_metadata_api_check = true
skip_region_validation = true
skip_requesting_account_id = true
use_path_style = true
EOF
- name: Terraform init for state read
working-directory: terraform
run: terraform init -reconfigure -backend-config=backend.hcl
- name: Create kubeadm inventory
run: |
TF_OUTPUT_JSON="$(terraform -chdir=terraform output -json)"
CP_1="$(python3 -c 'import json,sys; d=json.loads(sys.stdin.read()); print(d["control_plane_vm_ipv4"]["value"]["cp-1"])' <<< "$TF_OUTPUT_JSON")"
CP_2="$(python3 -c 'import json,sys; d=json.loads(sys.stdin.read()); print(d["control_plane_vm_ipv4"]["value"]["cp-2"])' <<< "$TF_OUTPUT_JSON")"
CP_3="$(python3 -c 'import json,sys; d=json.loads(sys.stdin.read()); print(d["control_plane_vm_ipv4"]["value"]["cp-3"])' <<< "$TF_OUTPUT_JSON")"
WK_1="$(python3 -c 'import json,sys; d=json.loads(sys.stdin.read()); print(d["worker_vm_ipv4"]["value"]["wk-1"])' <<< "$TF_OUTPUT_JSON")"
WK_2="$(python3 -c 'import json,sys; d=json.loads(sys.stdin.read()); print(d["worker_vm_ipv4"]["value"]["wk-2"])' <<< "$TF_OUTPUT_JSON")"
WK_3="$(python3 -c 'import json,sys; d=json.loads(sys.stdin.read()); print(d["worker_vm_ipv4"]["value"]["wk-3"])' <<< "$TF_OUTPUT_JSON")"
SSH_USER="$(printf '%s' "${{ secrets.KUBEADM_SSH_USER }}")"
if [ -z "$SSH_USER" ]; then
SSH_USER="micqdf"
fi
cat > nixos/kubeadm/scripts/inventory.env << EOF
SSH_USER=$SSH_USER
CP_1=$CP_1
CP_2=$CP_2
CP_3=$CP_3
WK_1=$WK_1
WK_2=$WK_2
WK_3=$WK_3
EOF
- name: Validate nix installation
run: |
if [ ! -x /nix/var/nix/profiles/default/bin/nix ]; then
echo "Nix not found at /nix/var/nix/profiles/default/bin/nix"
exit 1
fi
- name: Run cluster rebuild and bootstrap
env:
PATH: /nix/var/nix/profiles/default/bin:${{ env.PATH }}
run: |
./nixos/kubeadm/scripts/rebuild-and-bootstrap.sh

View File

@@ -0,0 +1,110 @@
name: Kubeadm Reset
run-name: ${{ gitea.actor }} requested kubeadm reset
on:
workflow_dispatch:
inputs:
confirm:
description: "Type RESET to run kubeadm reset on all nodes"
required: true
type: string
concurrency:
group: kubeadm-bootstrap
cancel-in-progress: false
jobs:
reset:
name: "Reset Cluster Nodes"
runs-on: ubuntu-latest
steps:
- name: Validate confirmation phrase
run: |
if [ "${{ inputs.confirm }}" != "RESET" ]; then
echo "Confirmation failed. You must type RESET."
exit 1
fi
- name: Checkout repository
uses: https://gitea.com/actions/checkout@v4
- name: Create SSH key
run: |
install -m 0700 -d ~/.ssh
KEY_CONTENT="$(printf '%s' "${{ secrets.KUBEADM_SSH_PRIVATE_KEY }}")"
if [ -z "$KEY_CONTENT" ]; then
KEY_CONTENT="$(printf '%s' "${{ secrets.SSH_KEY_PRIVATE }}")"
fi
if [ -z "$KEY_CONTENT" ]; then
echo "Missing SSH private key secret. Set KUBEADM_SSH_PRIVATE_KEY or SSH_KEY_PRIVATE."
exit 1
fi
printf '%s\n' "$KEY_CONTENT" > ~/.ssh/id_ed25519
chmod 0600 ~/.ssh/id_ed25519
- name: Set up Terraform
uses: hashicorp/setup-terraform@v2
with:
terraform_version: 1.6.6
terraform_wrapper: false
- name: Build Terraform backend files
working-directory: terraform
run: |
cat > secrets.auto.tfvars << EOF
pm_api_token_secret = "${{ secrets.PM_API_TOKEN_SECRET }}"
SSH_KEY_PUBLIC = "$(printf '%s' "${{ secrets.SSH_KEY_PUBLIC }}" | tr -d '\r\n')"
EOF
cat > backend.hcl << EOF
bucket = "${{ secrets.B2_TF_BUCKET }}"
key = "terraform.tfstate"
region = "us-east-005"
endpoints = {
s3 = "${{ secrets.B2_TF_ENDPOINT }}"
}
access_key = "$(printf '%s' "${{ secrets.B2_KEY_ID }}" | tr -d '\r\n')"
secret_key = "$(printf '%s' "${{ secrets.B2_APPLICATION_KEY }}" | tr -d '\r\n')"
skip_credentials_validation = true
skip_metadata_api_check = true
skip_region_validation = true
skip_requesting_account_id = true
use_path_style = true
EOF
- name: Terraform init for state read
working-directory: terraform
run: terraform init -reconfigure -backend-config=backend.hcl
- name: Create kubeadm inventory
run: |
TF_OUTPUT_JSON="$(terraform -chdir=terraform output -json)"
CP_1="$(python3 -c 'import json,sys; d=json.loads(sys.stdin.read()); print(d["control_plane_vm_ipv4"]["value"]["cp-1"])' <<< "$TF_OUTPUT_JSON")"
CP_2="$(python3 -c 'import json,sys; d=json.loads(sys.stdin.read()); print(d["control_plane_vm_ipv4"]["value"]["cp-2"])' <<< "$TF_OUTPUT_JSON")"
CP_3="$(python3 -c 'import json,sys; d=json.loads(sys.stdin.read()); print(d["control_plane_vm_ipv4"]["value"]["cp-3"])' <<< "$TF_OUTPUT_JSON")"
WK_1="$(python3 -c 'import json,sys; d=json.loads(sys.stdin.read()); print(d["worker_vm_ipv4"]["value"]["wk-1"])' <<< "$TF_OUTPUT_JSON")"
WK_2="$(python3 -c 'import json,sys; d=json.loads(sys.stdin.read()); print(d["worker_vm_ipv4"]["value"]["wk-2"])' <<< "$TF_OUTPUT_JSON")"
WK_3="$(python3 -c 'import json,sys; d=json.loads(sys.stdin.read()); print(d["worker_vm_ipv4"]["value"]["wk-3"])' <<< "$TF_OUTPUT_JSON")"
SSH_USER="$(printf '%s' "${{ secrets.KUBEADM_SSH_USER }}")"
if [ -z "$SSH_USER" ]; then
SSH_USER="micqdf"
fi
cat > nixos/kubeadm/scripts/inventory.env << EOF
SSH_USER=$SSH_USER
CP_1=$CP_1
CP_2=$CP_2
CP_3=$CP_3
WK_1=$WK_1
WK_2=$WK_2
WK_3=$WK_3
EOF
- name: Run cluster reset
run: |
./nixos/kubeadm/scripts/reset-cluster-nodes.sh

View File

@@ -10,12 +10,18 @@ This folder defines role-based NixOS configs for a kubeadm cluster.
## What this provides
- Shared Kubernetes/node prerequisites in `modules/k8s-common.nix`
- Shared cluster defaults in `modules/k8s-cluster-settings.nix`
- Role-specific settings for control planes and workers
- Host configs for each node in `hosts/`
- Generated per-node host configs from `flake.nix` (no duplicated host files)
- Bootstrap helper commands:
- `th-kubeadm-init`
- `th-kubeadm-join-control-plane`
- `th-kubeadm-join-worker`
- `th-kubeadm-status`
## Hardware config files
Each host file optionally imports `hosts/hardware/<host>.nix` if present.
The flake automatically imports `hosts/hardware/<host>.nix` if present.
Copy each node's generated hardware config into this folder:
```bash
@@ -36,7 +42,99 @@ sudo nixos-rebuild switch --flake .#cp-1
For remote target-host workflows, use your preferred deploy wrapper later
(`nixos-rebuild --target-host ...` or deploy-rs/colmena).
## Bootstrap runbook (kubeadm + kube-vip + Cilium)
1. Apply Nix config on all nodes (`cp-*`, then `wk-*`).
2. On `cp-1`, run:
```bash
sudo th-kubeadm-init
```
This infers the control-plane VIP as `<node-subnet>.250` on `eth0`, creates the
kube-vip static pod manifest, and runs `kubeadm init`.
3. Install Cilium from `cp-1`:
```bash
helm repo add cilium https://helm.cilium.io
helm repo update
helm upgrade --install cilium cilium/cilium \
--namespace kube-system \
--set kubeProxyReplacement=true
```
4. Generate join commands on `cp-1`:
```bash
sudo kubeadm token create --print-join-command
sudo kubeadm init phase upload-certs --upload-certs
```
5. Join `cp-2` and `cp-3`:
```bash
sudo th-kubeadm-join-control-plane '<kubeadm join ... --control-plane --certificate-key ...>'
```
6. Join workers:
```bash
sudo th-kubeadm-join-worker '<kubeadm join ...>'
```
7. Validate from a control plane:
```bash
kubectl get nodes -o wide
kubectl -n kube-system get pods -o wide
```
## Repeatable rebuild flow (recommended)
1. Copy and edit inventory:
```bash
cp ./scripts/inventory.example.env ./scripts/inventory.env
$EDITOR ./scripts/inventory.env
```
2. Rebuild all nodes and bootstrap cluster:
```bash
./scripts/rebuild-and-bootstrap.sh
```
3. If you only want to reset Kubernetes state on existing VMs:
```bash
./scripts/reset-cluster-nodes.sh
```
For a full nuke/recreate lifecycle:
- run Terraform destroy/apply for VMs first,
- then run `./scripts/rebuild-and-bootstrap.sh` again.
## Optional Gitea workflow automation
Manual dispatch workflows are available:
- `.gitea/workflows/kubeadm-bootstrap.yml`
- `.gitea/workflows/kubeadm-reset.yml`
Required repository secrets:
- Existing Terraform/backend secrets used by current workflows (`B2_*`, `PM_API_TOKEN_SECRET`, `SSH_KEY_PUBLIC`)
- SSH private key: prefer `KUBEADM_SSH_PRIVATE_KEY`, fallback to existing `SSH_KEY_PRIVATE`
Optional secrets:
- `KUBEADM_SSH_USER` (defaults to `micqdf`)
Node IPs are auto-discovered from Terraform state outputs (`control_plane_vm_ipv4`, `worker_vm_ipv4`), so you do not need per-node IP secrets.
## Notes
- This does not run `kubeadm init/join` automatically.
- It prepares OS/runtime/kernel prerequisites so kubeadm bootstrapping is clean.
- Scripts are intentionally manual-triggered (predictable for homelab bring-up).
- If `.250` on the node subnet is already in use, change `controlPlaneVipSuffix`
in `modules/k8s-cluster-settings.nix` before bootstrap.

27
nixos/kubeadm/flake.lock generated Normal file
View File

@@ -0,0 +1,27 @@
{
"nodes": {
"nixpkgs": {
"locked": {
"lastModified": 1767313136,
"narHash": "sha256-16KkgfdYqjaeRGBaYsNrhPRRENs0qzkQVUooNHtoy2w=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "ac62194c3917d5f474c1a844b6fd6da2db95077d",
"type": "github"
},
"original": {
"owner": "NixOS",
"ref": "nixos-25.05",
"repo": "nixpkgs",
"type": "github"
}
},
"root": {
"inputs": {
"nixpkgs": "nixpkgs"
}
}
},
"root": "root",
"version": 7
}

View File

@@ -8,19 +8,70 @@
outputs = { nixpkgs, ... }:
let
system = "x86_64-linux";
mkHost = hostModules:
lib = nixpkgs.lib;
pkgs = nixpkgs.legacyPackages.${system};
nodeNames = [ "cp-1" "cp-2" "cp-3" "wk-1" "wk-2" "wk-3" ];
mkNode = {
name,
role,
extraModules ? [ ],
}:
let
roleModule = if role == "control-plane" then ./modules/k8s-control-plane.nix else ./modules/k8s-worker.nix;
hardwarePath = ./hosts/hardware + "/${name}.nix";
in
nixpkgs.lib.nixosSystem {
inherit system;
modules = hostModules;
modules = [
./modules/k8s-cluster-settings.nix
./modules/k8s-common.nix
roleModule
({ lib, ... }: {
imports = lib.optional (builtins.pathExists hardwarePath) hardwarePath;
networking.hostName = name;
system.stateVersion = "25.05";
boot.loader.grub.devices = lib.mkDefault [ "/dev/sda" ];
fileSystems."/" = lib.mkDefault {
device = "/dev/disk/by-label/nixos";
fsType = "ext4";
};
})
] ++ extraModules;
};
mkNodeByName = name:
mkNode {
inherit name;
role = if lib.hasPrefix "cp-" name then "control-plane" else "worker";
};
mkEvalCheck = name:
let
cfg = mkNode {
inherit name;
role = if lib.hasPrefix "cp-" name then "control-plane" else "worker";
extraModules = [
({ lib, ... }: {
boot.loader.grub.devices = lib.mkDefault [ "/dev/sda" ];
fileSystems."/" = lib.mkDefault {
device = "/dev/disk/by-label/nixos";
fsType = "ext4";
};
})
];
};
in
pkgs.runCommand "eval-${name}" { } ''
cat > "$out" <<'EOF'
host=${cfg.config.networking.hostName}
role=${if lib.hasPrefix "cp-" name then "control-plane" else "worker"}
stateVersion=${cfg.config.system.stateVersion}
EOF
'';
in {
nixosConfigurations = {
cp-1 = mkHost [ ./hosts/cp-1.nix ];
cp-2 = mkHost [ ./hosts/cp-2.nix ];
cp-3 = mkHost [ ./hosts/cp-3.nix ];
wk-1 = mkHost [ ./hosts/wk-1.nix ];
wk-2 = mkHost [ ./hosts/wk-2.nix ];
wk-3 = mkHost [ ./hosts/wk-3.nix ];
};
nixosConfigurations = lib.genAttrs nodeNames mkNodeByName;
checks.${system} = lib.genAttrs nodeNames mkEvalCheck;
};
}

View File

@@ -1,14 +0,0 @@
{ lib, ... }:
{
imports =
[
../modules/k8s-common.nix
../modules/k8s-control-plane.nix
]
++ lib.optional (builtins.pathExists ./hardware/cp-1.nix) ./hardware/cp-1.nix;
networking.hostName = "cp-1";
system.stateVersion = "25.05";
}

View File

@@ -1,14 +0,0 @@
{ lib, ... }:
{
imports =
[
../modules/k8s-common.nix
../modules/k8s-control-plane.nix
]
++ lib.optional (builtins.pathExists ./hardware/cp-2.nix) ./hardware/cp-2.nix;
networking.hostName = "cp-2";
system.stateVersion = "25.05";
}

View File

@@ -1,14 +0,0 @@
{ lib, ... }:
{
imports =
[
../modules/k8s-common.nix
../modules/k8s-control-plane.nix
]
++ lib.optional (builtins.pathExists ./hardware/cp-3.nix) ./hardware/cp-3.nix;
networking.hostName = "cp-3";
system.stateVersion = "25.05";
}

View File

@@ -1,14 +0,0 @@
{ lib, ... }:
{
imports =
[
../modules/k8s-common.nix
../modules/k8s-worker.nix
]
++ lib.optional (builtins.pathExists ./hardware/wk-1.nix) ./hardware/wk-1.nix;
networking.hostName = "wk-1";
system.stateVersion = "25.05";
}

View File

@@ -1,14 +0,0 @@
{ lib, ... }:
{
imports =
[
../modules/k8s-common.nix
../modules/k8s-worker.nix
]
++ lib.optional (builtins.pathExists ./hardware/wk-2.nix) ./hardware/wk-2.nix;
networking.hostName = "wk-2";
system.stateVersion = "25.05";
}

View File

@@ -1,14 +0,0 @@
{ lib, ... }:
{
imports =
[
../modules/k8s-common.nix
../modules/k8s-worker.nix
]
++ lib.optional (builtins.pathExists ./hardware/wk-3.nix) ./hardware/wk-3.nix;
networking.hostName = "wk-3";
system.stateVersion = "25.05";
}

View File

@@ -0,0 +1,12 @@
{ ... }:
{
terrahome.kubeadm = {
k8sMinor = "1.31";
controlPlaneInterface = "eth0";
controlPlaneVipSuffix = 250;
podSubnet = "10.244.0.0/16";
serviceSubnet = "10.96.0.0/12";
clusterDomain = "cluster.local";
};
}

View File

@@ -1,6 +1,43 @@
{ pkgs, ... }:
{ config, lib, pkgs, ... }:
let
pinnedK8s = lib.attrByPath [ "kubernetes_1_31" ] pkgs.kubernetes pkgs;
kubeVipImage = "ghcr.io/kube-vip/kube-vip:v0.8.9";
in
{
options.terrahome.kubeadm = {
k8sMinor = lib.mkOption {
type = lib.types.str;
default = "1.31";
};
controlPlaneInterface = lib.mkOption {
type = lib.types.str;
default = "eth0";
};
controlPlaneVipSuffix = lib.mkOption {
type = lib.types.int;
default = 250;
};
podSubnet = lib.mkOption {
type = lib.types.str;
default = "10.244.0.0/16";
};
serviceSubnet = lib.mkOption {
type = lib.types.str;
default = "10.96.0.0/12";
};
clusterDomain = lib.mkOption {
type = lib.types.str;
default = "cluster.local";
};
};
config = {
boot.kernelModules = [ "overlay" "br_netfilter" ];
boot.kernel.sysctl = {
@@ -10,6 +47,11 @@
};
virtualisation.containerd.enable = true;
virtualisation.containerd.settings = {
plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options.SystemdCgroup = true;
};
swapDevices = lib.mkForce [ ];
services.openssh.enable = true;
services.openssh.settings = {
@@ -17,19 +59,144 @@
KbdInteractiveAuthentication = false;
};
environment.systemPackages = with pkgs; [
environment.variables = {
KUBECONFIG = "/etc/kubernetes/admin.conf";
KUBE_VIP_IMAGE = kubeVipImage;
};
environment.systemPackages = (with pkgs; [
containerd
cri-tools
cni-plugins
kubernetes
kubectl
pinnedK8s
kubernetes-helm
conntrack-tools
socat
ethtool
ipvsadm
iproute2
iptables
ebtables
jq
curl
vim
gawk
]) ++ [
(pkgs.writeShellScriptBin "th-kubeadm-init" ''
set -euo pipefail
iface="${config.terrahome.kubeadm.controlPlaneInterface}"
suffix="${toString config.terrahome.kubeadm.controlPlaneVipSuffix}"
pod_subnet="${config.terrahome.kubeadm.podSubnet}"
service_subnet="${config.terrahome.kubeadm.serviceSubnet}"
domain="${config.terrahome.kubeadm.clusterDomain}"
local_ip_cidr=$(ip -4 -o addr show dev "$iface" | awk 'NR==1 {print $4}')
if [ -z "''${local_ip_cidr:-}" ]; then
echo "Could not determine IPv4 CIDR on interface $iface"
exit 1
fi
subnet_prefix=$(echo "$local_ip_cidr" | cut -d/ -f1 | awk -F. '{print $1"."$2"."$3}')
vip="$subnet_prefix.$suffix"
echo "Using control-plane endpoint: $vip:6443"
echo "Using kube-vip interface: $iface"
mkdir -p /etc/kubernetes/manifests
ctr image pull "$KUBE_VIP_IMAGE"
ctr run --rm --net-host "$KUBE_VIP_IMAGE" kube-vip /kube-vip manifest pod \
--interface "$iface" \
--address "$vip" \
--controlplane \
--services \
--arp \
--leaderElection \
> /etc/kubernetes/manifests/kube-vip.yaml
kubeadm init \
--control-plane-endpoint "$vip:6443" \
--upload-certs \
--pod-network-cidr "$pod_subnet" \
--service-cidr "$service_subnet" \
--service-dns-domain "$domain"
mkdir -p /root/.kube
cp /etc/kubernetes/admin.conf /root/.kube/config
chmod 600 /root/.kube/config
echo
echo "Next: install Cilium, then generate join commands:"
echo " kubeadm token create --print-join-command"
echo " kubeadm token create --print-join-command --certificate-key <key>"
'')
(pkgs.writeShellScriptBin "th-kubeadm-join-control-plane" ''
set -euo pipefail
if [ "$#" -lt 1 ]; then
echo "Usage: th-kubeadm-join-control-plane '<kubeadm join ... --control-plane --certificate-key ...>'"
exit 1
fi
iface="${config.terrahome.kubeadm.controlPlaneInterface}"
suffix="${toString config.terrahome.kubeadm.controlPlaneVipSuffix}"
local_ip_cidr=$(ip -4 -o addr show dev "$iface" | awk 'NR==1 {print $4}')
if [ -z "''${local_ip_cidr:-}" ]; then
echo "Could not determine IPv4 CIDR on interface $iface"
exit 1
fi
subnet_prefix=$(echo "$local_ip_cidr" | cut -d/ -f1 | awk -F. '{print $1"."$2"."$3}')
vip="$subnet_prefix.$suffix"
mkdir -p /etc/kubernetes/manifests
ctr image pull "$KUBE_VIP_IMAGE"
ctr run --rm --net-host "$KUBE_VIP_IMAGE" kube-vip /kube-vip manifest pod \
--interface "$iface" \
--address "$vip" \
--controlplane \
--services \
--arp \
--leaderElection \
> /etc/kubernetes/manifests/kube-vip.yaml
eval "$1"
'')
(pkgs.writeShellScriptBin "th-kubeadm-join-worker" ''
set -euo pipefail
if [ "$#" -lt 1 ]; then
echo "Usage: th-kubeadm-join-worker '<kubeadm join ...>'"
exit 1
fi
eval "$1"
'')
(pkgs.writeShellScriptBin "th-kubeadm-status" ''
set -euo pipefail
systemctl is-active containerd || true
systemctl is-active kubelet || true
crictl info >/dev/null && echo "crictl: ok" || echo "crictl: not-ready"
'')
];
systemd.services.kubelet = {
description = "Kubernetes Kubelet";
wantedBy = [ "multi-user.target" ];
wants = [ "network-online.target" ];
after = [ "containerd.service" "network-online.target" ];
serviceConfig = {
ExecStart = "${pinnedK8s}/bin/kubelet";
Restart = "always";
RestartSec = "10";
};
};
systemd.tmpfiles.rules = [
"d /etc/kubernetes 0755 root root -"
"d /etc/kubernetes/manifests 0755 root root -"
];
};
}

View File

@@ -0,0 +1,11 @@
SSH_USER=micqdf
# Control planes
CP_1=192.168.1.101
CP_2=192.168.1.102
CP_3=192.168.1.103
# Workers
WK_1=192.168.1.111
WK_2=192.168.1.112
WK_3=192.168.1.113

View File

@@ -0,0 +1,89 @@
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
FLAKE_DIR="${FLAKE_DIR:-$(cd "$SCRIPT_DIR/.." && pwd)}"
INVENTORY_FILE="${1:-$SCRIPT_DIR/inventory.env}"
if [ ! -f "$INVENTORY_FILE" ]; then
echo "Missing inventory file: $INVENTORY_FILE"
echo "Copy $SCRIPT_DIR/inventory.example.env to $SCRIPT_DIR/inventory.env and edit IPs."
exit 1
fi
# shellcheck disable=SC1090
source "$INVENTORY_FILE"
SSH_USER="${SSH_USER:-micqdf}"
SSH_OPTS="${SSH_OPTS:- -o BatchMode=yes -o StrictHostKeyChecking=accept-new }"
required=(CP_1 CP_2 CP_3 WK_1 WK_2 WK_3)
for key in "${required[@]}"; do
if [ -z "${!key:-}" ]; then
echo "Missing required inventory variable: $key"
exit 1
fi
done
remote() {
local host_ip="$1"
local cmd="$2"
ssh $SSH_OPTS "$SSH_USER@$host_ip" "$cmd"
}
rebuild_node() {
local node_name="$1"
local node_ip="$2"
echo "==> Rebuilding $node_name on $node_ip"
nixos-rebuild switch \
--flake "$FLAKE_DIR#$node_name" \
--target-host "$SSH_USER@$node_ip" \
--use-remote-sudo
}
for node in cp-1 cp-2 cp-3 wk-1 wk-2 wk-3; do
key="${node^^}"
key="${key//-/_}"
rebuild_node "$node" "${!key}"
done
echo "==> Initializing control plane on cp-1"
remote "$CP_1" "sudo th-kubeadm-init"
echo "==> Installing Cilium on cp-1"
remote "$CP_1" "helm repo add cilium https://helm.cilium.io >/dev/null 2>&1 || true"
remote "$CP_1" "helm repo update >/dev/null"
remote "$CP_1" "kubectl create namespace kube-system >/dev/null 2>&1 || true"
remote "$CP_1" "helm upgrade --install cilium cilium/cilium --namespace kube-system --set kubeProxyReplacement=true"
echo "==> Building kubeadm join commands"
JOIN_CMD="$(remote "$CP_1" "sudo kubeadm token create --print-join-command")"
CERT_KEY="$(remote "$CP_1" "sudo kubeadm init phase upload-certs --upload-certs | tail -n 1")"
CP_JOIN_CMD="$JOIN_CMD --control-plane --certificate-key $CERT_KEY"
join_control_plane() {
local node_ip="$1"
local encoded
encoded="$(printf '%s' "$CP_JOIN_CMD" | base64 -w0)"
remote "$node_ip" "sudo th-kubeadm-join-control-plane \"\$(echo $encoded | base64 -d)\""
}
join_worker() {
local node_ip="$1"
local encoded
encoded="$(printf '%s' "$JOIN_CMD" | base64 -w0)"
remote "$node_ip" "sudo th-kubeadm-join-worker \"\$(echo $encoded | base64 -d)\""
}
echo "==> Joining remaining control planes"
join_control_plane "$CP_2"
join_control_plane "$CP_3"
echo "==> Joining workers"
join_worker "$WK_1"
join_worker "$WK_2"
join_worker "$WK_3"
echo "==> Final node list"
remote "$CP_1" "kubectl get nodes -o wide"

View File

@@ -0,0 +1,37 @@
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
INVENTORY_FILE="${1:-$SCRIPT_DIR/inventory.env}"
if [ ! -f "$INVENTORY_FILE" ]; then
echo "Missing inventory file: $INVENTORY_FILE"
echo "Copy $SCRIPT_DIR/inventory.example.env to $SCRIPT_DIR/inventory.env and edit IPs."
exit 1
fi
# shellcheck disable=SC1090
source "$INVENTORY_FILE"
SSH_USER="${SSH_USER:-micqdf}"
SSH_OPTS="${SSH_OPTS:- -o BatchMode=yes -o StrictHostKeyChecking=accept-new }"
required=(CP_1 CP_2 CP_3 WK_1 WK_2 WK_3)
for key in "${required[@]}"; do
if [ -z "${!key:-}" ]; then
echo "Missing required inventory variable: $key"
exit 1
fi
done
reset_node() {
local node_ip="$1"
echo "==> Resetting $node_ip"
ssh $SSH_OPTS "$SSH_USER@$node_ip" "sudo kubeadm reset -f && sudo systemctl stop kubelet && sudo rm -rf /etc/kubernetes /var/lib/etcd /var/lib/cni /etc/cni/net.d"
}
for key in CP_1 CP_2 CP_3 WK_1 WK_2 WK_3; do
reset_node "${!key}"
done
echo "Cluster components reset on all listed nodes."

View File

@@ -9,6 +9,13 @@ output "control_plane_vm_names" {
value = [for vm in proxmox_vm_qemu.control_planes : vm.name]
}
output "control_plane_vm_ipv4" {
value = {
for vm in proxmox_vm_qemu.control_planes :
vm.name => vm.default_ipv4_address
}
}
output "worker_vm_ids" {
value = {
for i in range(var.worker_count) :
@@ -19,3 +26,10 @@ output "worker_vm_ids" {
output "worker_vm_names" {
value = [for vm in proxmox_vm_qemu.workers : vm.name]
}
output "worker_vm_ipv4" {
value = {
for vm in proxmox_vm_qemu.workers :
vm.name => vm.default_ipv4_address
}
}