Compare commits
8 Commits
a42d44bb27
...
c154ff4d15
| Author | SHA1 | Date | |
|---|---|---|---|
| c154ff4d15 | |||
| 8bcc162956 | |||
| b0779c51c0 | |||
| 9fe845b53d | |||
| 885a92f494 | |||
| 91dd20e60e | |||
| abac6300ca | |||
| 7206d8cd41 |
119
.gitea/workflows/kubeadm-bootstrap.yml
Normal file
119
.gitea/workflows/kubeadm-bootstrap.yml
Normal file
@@ -0,0 +1,119 @@
|
||||
name: Kubeadm Bootstrap
|
||||
run-name: ${{ gitea.actor }} requested kubeadm bootstrap
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
confirm:
|
||||
description: "Type BOOTSTRAP to run rebuild + kubeadm bootstrap"
|
||||
required: true
|
||||
type: string
|
||||
|
||||
concurrency:
|
||||
group: kubeadm-bootstrap
|
||||
cancel-in-progress: false
|
||||
|
||||
jobs:
|
||||
bootstrap:
|
||||
name: "Rebuild and Bootstrap Cluster"
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Validate confirmation phrase
|
||||
run: |
|
||||
if [ "${{ inputs.confirm }}" != "BOOTSTRAP" ]; then
|
||||
echo "Confirmation failed. You must type BOOTSTRAP."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Checkout repository
|
||||
uses: https://gitea.com/actions/checkout@v4
|
||||
|
||||
- name: Create SSH key
|
||||
run: |
|
||||
install -m 0700 -d ~/.ssh
|
||||
KEY_CONTENT="$(printf '%s' "${{ secrets.KUBEADM_SSH_PRIVATE_KEY }}")"
|
||||
if [ -z "$KEY_CONTENT" ]; then
|
||||
KEY_CONTENT="$(printf '%s' "${{ secrets.SSH_KEY_PRIVATE }}")"
|
||||
fi
|
||||
|
||||
if [ -z "$KEY_CONTENT" ]; then
|
||||
echo "Missing SSH private key secret. Set KUBEADM_SSH_PRIVATE_KEY or SSH_KEY_PRIVATE."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
printf '%s\n' "$KEY_CONTENT" > ~/.ssh/id_ed25519
|
||||
chmod 0600 ~/.ssh/id_ed25519
|
||||
|
||||
- name: Set up Terraform
|
||||
uses: hashicorp/setup-terraform@v2
|
||||
with:
|
||||
terraform_version: 1.6.6
|
||||
terraform_wrapper: false
|
||||
|
||||
- name: Build Terraform backend files
|
||||
working-directory: terraform
|
||||
run: |
|
||||
cat > secrets.auto.tfvars << EOF
|
||||
pm_api_token_secret = "${{ secrets.PM_API_TOKEN_SECRET }}"
|
||||
SSH_KEY_PUBLIC = "$(printf '%s' "${{ secrets.SSH_KEY_PUBLIC }}" | tr -d '\r\n')"
|
||||
EOF
|
||||
|
||||
cat > backend.hcl << EOF
|
||||
bucket = "${{ secrets.B2_TF_BUCKET }}"
|
||||
key = "terraform.tfstate"
|
||||
region = "us-east-005"
|
||||
endpoints = {
|
||||
s3 = "${{ secrets.B2_TF_ENDPOINT }}"
|
||||
}
|
||||
access_key = "$(printf '%s' "${{ secrets.B2_KEY_ID }}" | tr -d '\r\n')"
|
||||
secret_key = "$(printf '%s' "${{ secrets.B2_APPLICATION_KEY }}" | tr -d '\r\n')"
|
||||
skip_credentials_validation = true
|
||||
skip_metadata_api_check = true
|
||||
skip_region_validation = true
|
||||
skip_requesting_account_id = true
|
||||
use_path_style = true
|
||||
EOF
|
||||
|
||||
- name: Terraform init for state read
|
||||
working-directory: terraform
|
||||
run: terraform init -reconfigure -backend-config=backend.hcl
|
||||
|
||||
- name: Create kubeadm inventory
|
||||
run: |
|
||||
TF_OUTPUT_JSON="$(terraform -chdir=terraform output -json)"
|
||||
|
||||
CP_1="$(python3 -c 'import json,sys; d=json.loads(sys.stdin.read()); print(d["control_plane_vm_ipv4"]["value"]["cp-1"])' <<< "$TF_OUTPUT_JSON")"
|
||||
CP_2="$(python3 -c 'import json,sys; d=json.loads(sys.stdin.read()); print(d["control_plane_vm_ipv4"]["value"]["cp-2"])' <<< "$TF_OUTPUT_JSON")"
|
||||
CP_3="$(python3 -c 'import json,sys; d=json.loads(sys.stdin.read()); print(d["control_plane_vm_ipv4"]["value"]["cp-3"])' <<< "$TF_OUTPUT_JSON")"
|
||||
WK_1="$(python3 -c 'import json,sys; d=json.loads(sys.stdin.read()); print(d["worker_vm_ipv4"]["value"]["wk-1"])' <<< "$TF_OUTPUT_JSON")"
|
||||
WK_2="$(python3 -c 'import json,sys; d=json.loads(sys.stdin.read()); print(d["worker_vm_ipv4"]["value"]["wk-2"])' <<< "$TF_OUTPUT_JSON")"
|
||||
WK_3="$(python3 -c 'import json,sys; d=json.loads(sys.stdin.read()); print(d["worker_vm_ipv4"]["value"]["wk-3"])' <<< "$TF_OUTPUT_JSON")"
|
||||
|
||||
SSH_USER="$(printf '%s' "${{ secrets.KUBEADM_SSH_USER }}")"
|
||||
if [ -z "$SSH_USER" ]; then
|
||||
SSH_USER="micqdf"
|
||||
fi
|
||||
|
||||
cat > nixos/kubeadm/scripts/inventory.env << EOF
|
||||
SSH_USER=$SSH_USER
|
||||
CP_1=$CP_1
|
||||
CP_2=$CP_2
|
||||
CP_3=$CP_3
|
||||
WK_1=$WK_1
|
||||
WK_2=$WK_2
|
||||
WK_3=$WK_3
|
||||
EOF
|
||||
|
||||
- name: Validate nix installation
|
||||
run: |
|
||||
if [ ! -x /nix/var/nix/profiles/default/bin/nix ]; then
|
||||
echo "Nix not found at /nix/var/nix/profiles/default/bin/nix"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Run cluster rebuild and bootstrap
|
||||
env:
|
||||
PATH: /nix/var/nix/profiles/default/bin:${{ env.PATH }}
|
||||
run: |
|
||||
./nixos/kubeadm/scripts/rebuild-and-bootstrap.sh
|
||||
110
.gitea/workflows/kubeadm-reset.yml
Normal file
110
.gitea/workflows/kubeadm-reset.yml
Normal file
@@ -0,0 +1,110 @@
|
||||
name: Kubeadm Reset
|
||||
run-name: ${{ gitea.actor }} requested kubeadm reset
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
confirm:
|
||||
description: "Type RESET to run kubeadm reset on all nodes"
|
||||
required: true
|
||||
type: string
|
||||
|
||||
concurrency:
|
||||
group: kubeadm-bootstrap
|
||||
cancel-in-progress: false
|
||||
|
||||
jobs:
|
||||
reset:
|
||||
name: "Reset Cluster Nodes"
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Validate confirmation phrase
|
||||
run: |
|
||||
if [ "${{ inputs.confirm }}" != "RESET" ]; then
|
||||
echo "Confirmation failed. You must type RESET."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Checkout repository
|
||||
uses: https://gitea.com/actions/checkout@v4
|
||||
|
||||
- name: Create SSH key
|
||||
run: |
|
||||
install -m 0700 -d ~/.ssh
|
||||
KEY_CONTENT="$(printf '%s' "${{ secrets.KUBEADM_SSH_PRIVATE_KEY }}")"
|
||||
if [ -z "$KEY_CONTENT" ]; then
|
||||
KEY_CONTENT="$(printf '%s' "${{ secrets.SSH_KEY_PRIVATE }}")"
|
||||
fi
|
||||
|
||||
if [ -z "$KEY_CONTENT" ]; then
|
||||
echo "Missing SSH private key secret. Set KUBEADM_SSH_PRIVATE_KEY or SSH_KEY_PRIVATE."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
printf '%s\n' "$KEY_CONTENT" > ~/.ssh/id_ed25519
|
||||
chmod 0600 ~/.ssh/id_ed25519
|
||||
|
||||
- name: Set up Terraform
|
||||
uses: hashicorp/setup-terraform@v2
|
||||
with:
|
||||
terraform_version: 1.6.6
|
||||
terraform_wrapper: false
|
||||
|
||||
- name: Build Terraform backend files
|
||||
working-directory: terraform
|
||||
run: |
|
||||
cat > secrets.auto.tfvars << EOF
|
||||
pm_api_token_secret = "${{ secrets.PM_API_TOKEN_SECRET }}"
|
||||
SSH_KEY_PUBLIC = "$(printf '%s' "${{ secrets.SSH_KEY_PUBLIC }}" | tr -d '\r\n')"
|
||||
EOF
|
||||
|
||||
cat > backend.hcl << EOF
|
||||
bucket = "${{ secrets.B2_TF_BUCKET }}"
|
||||
key = "terraform.tfstate"
|
||||
region = "us-east-005"
|
||||
endpoints = {
|
||||
s3 = "${{ secrets.B2_TF_ENDPOINT }}"
|
||||
}
|
||||
access_key = "$(printf '%s' "${{ secrets.B2_KEY_ID }}" | tr -d '\r\n')"
|
||||
secret_key = "$(printf '%s' "${{ secrets.B2_APPLICATION_KEY }}" | tr -d '\r\n')"
|
||||
skip_credentials_validation = true
|
||||
skip_metadata_api_check = true
|
||||
skip_region_validation = true
|
||||
skip_requesting_account_id = true
|
||||
use_path_style = true
|
||||
EOF
|
||||
|
||||
- name: Terraform init for state read
|
||||
working-directory: terraform
|
||||
run: terraform init -reconfigure -backend-config=backend.hcl
|
||||
|
||||
- name: Create kubeadm inventory
|
||||
run: |
|
||||
TF_OUTPUT_JSON="$(terraform -chdir=terraform output -json)"
|
||||
|
||||
CP_1="$(python3 -c 'import json,sys; d=json.loads(sys.stdin.read()); print(d["control_plane_vm_ipv4"]["value"]["cp-1"])' <<< "$TF_OUTPUT_JSON")"
|
||||
CP_2="$(python3 -c 'import json,sys; d=json.loads(sys.stdin.read()); print(d["control_plane_vm_ipv4"]["value"]["cp-2"])' <<< "$TF_OUTPUT_JSON")"
|
||||
CP_3="$(python3 -c 'import json,sys; d=json.loads(sys.stdin.read()); print(d["control_plane_vm_ipv4"]["value"]["cp-3"])' <<< "$TF_OUTPUT_JSON")"
|
||||
WK_1="$(python3 -c 'import json,sys; d=json.loads(sys.stdin.read()); print(d["worker_vm_ipv4"]["value"]["wk-1"])' <<< "$TF_OUTPUT_JSON")"
|
||||
WK_2="$(python3 -c 'import json,sys; d=json.loads(sys.stdin.read()); print(d["worker_vm_ipv4"]["value"]["wk-2"])' <<< "$TF_OUTPUT_JSON")"
|
||||
WK_3="$(python3 -c 'import json,sys; d=json.loads(sys.stdin.read()); print(d["worker_vm_ipv4"]["value"]["wk-3"])' <<< "$TF_OUTPUT_JSON")"
|
||||
|
||||
SSH_USER="$(printf '%s' "${{ secrets.KUBEADM_SSH_USER }}")"
|
||||
if [ -z "$SSH_USER" ]; then
|
||||
SSH_USER="micqdf"
|
||||
fi
|
||||
|
||||
cat > nixos/kubeadm/scripts/inventory.env << EOF
|
||||
SSH_USER=$SSH_USER
|
||||
CP_1=$CP_1
|
||||
CP_2=$CP_2
|
||||
CP_3=$CP_3
|
||||
WK_1=$WK_1
|
||||
WK_2=$WK_2
|
||||
WK_3=$WK_3
|
||||
EOF
|
||||
|
||||
- name: Run cluster reset
|
||||
run: |
|
||||
./nixos/kubeadm/scripts/reset-cluster-nodes.sh
|
||||
@@ -10,12 +10,18 @@ This folder defines role-based NixOS configs for a kubeadm cluster.
|
||||
## What this provides
|
||||
|
||||
- Shared Kubernetes/node prerequisites in `modules/k8s-common.nix`
|
||||
- Shared cluster defaults in `modules/k8s-cluster-settings.nix`
|
||||
- Role-specific settings for control planes and workers
|
||||
- Host configs for each node in `hosts/`
|
||||
- Generated per-node host configs from `flake.nix` (no duplicated host files)
|
||||
- Bootstrap helper commands:
|
||||
- `th-kubeadm-init`
|
||||
- `th-kubeadm-join-control-plane`
|
||||
- `th-kubeadm-join-worker`
|
||||
- `th-kubeadm-status`
|
||||
|
||||
## Hardware config files
|
||||
|
||||
Each host file optionally imports `hosts/hardware/<host>.nix` if present.
|
||||
The flake automatically imports `hosts/hardware/<host>.nix` if present.
|
||||
Copy each node's generated hardware config into this folder:
|
||||
|
||||
```bash
|
||||
@@ -36,7 +42,99 @@ sudo nixos-rebuild switch --flake .#cp-1
|
||||
For remote target-host workflows, use your preferred deploy wrapper later
|
||||
(`nixos-rebuild --target-host ...` or deploy-rs/colmena).
|
||||
|
||||
## Bootstrap runbook (kubeadm + kube-vip + Cilium)
|
||||
|
||||
1. Apply Nix config on all nodes (`cp-*`, then `wk-*`).
|
||||
2. On `cp-1`, run:
|
||||
|
||||
```bash
|
||||
sudo th-kubeadm-init
|
||||
```
|
||||
|
||||
This infers the control-plane VIP as `<node-subnet>.250` on `eth0`, creates the
|
||||
kube-vip static pod manifest, and runs `kubeadm init`.
|
||||
|
||||
3. Install Cilium from `cp-1`:
|
||||
|
||||
```bash
|
||||
helm repo add cilium https://helm.cilium.io
|
||||
helm repo update
|
||||
helm upgrade --install cilium cilium/cilium \
|
||||
--namespace kube-system \
|
||||
--set kubeProxyReplacement=true
|
||||
```
|
||||
|
||||
4. Generate join commands on `cp-1`:
|
||||
|
||||
```bash
|
||||
sudo kubeadm token create --print-join-command
|
||||
sudo kubeadm init phase upload-certs --upload-certs
|
||||
```
|
||||
|
||||
5. Join `cp-2` and `cp-3`:
|
||||
|
||||
```bash
|
||||
sudo th-kubeadm-join-control-plane '<kubeadm join ... --control-plane --certificate-key ...>'
|
||||
```
|
||||
|
||||
6. Join workers:
|
||||
|
||||
```bash
|
||||
sudo th-kubeadm-join-worker '<kubeadm join ...>'
|
||||
```
|
||||
|
||||
7. Validate from a control plane:
|
||||
|
||||
```bash
|
||||
kubectl get nodes -o wide
|
||||
kubectl -n kube-system get pods -o wide
|
||||
```
|
||||
|
||||
## Repeatable rebuild flow (recommended)
|
||||
|
||||
1. Copy and edit inventory:
|
||||
|
||||
```bash
|
||||
cp ./scripts/inventory.example.env ./scripts/inventory.env
|
||||
$EDITOR ./scripts/inventory.env
|
||||
```
|
||||
|
||||
2. Rebuild all nodes and bootstrap cluster:
|
||||
|
||||
```bash
|
||||
./scripts/rebuild-and-bootstrap.sh
|
||||
```
|
||||
|
||||
3. If you only want to reset Kubernetes state on existing VMs:
|
||||
|
||||
```bash
|
||||
./scripts/reset-cluster-nodes.sh
|
||||
```
|
||||
|
||||
For a full nuke/recreate lifecycle:
|
||||
- run Terraform destroy/apply for VMs first,
|
||||
- then run `./scripts/rebuild-and-bootstrap.sh` again.
|
||||
|
||||
## Optional Gitea workflow automation
|
||||
|
||||
Manual dispatch workflows are available:
|
||||
|
||||
- `.gitea/workflows/kubeadm-bootstrap.yml`
|
||||
- `.gitea/workflows/kubeadm-reset.yml`
|
||||
|
||||
Required repository secrets:
|
||||
|
||||
- Existing Terraform/backend secrets used by current workflows (`B2_*`, `PM_API_TOKEN_SECRET`, `SSH_KEY_PUBLIC`)
|
||||
- SSH private key: prefer `KUBEADM_SSH_PRIVATE_KEY`, fallback to existing `SSH_KEY_PRIVATE`
|
||||
|
||||
Optional secrets:
|
||||
|
||||
- `KUBEADM_SSH_USER` (defaults to `micqdf`)
|
||||
|
||||
Node IPs are auto-discovered from Terraform state outputs (`control_plane_vm_ipv4`, `worker_vm_ipv4`), so you do not need per-node IP secrets.
|
||||
|
||||
## Notes
|
||||
|
||||
- This does not run `kubeadm init/join` automatically.
|
||||
- It prepares OS/runtime/kernel prerequisites so kubeadm bootstrapping is clean.
|
||||
- Scripts are intentionally manual-triggered (predictable for homelab bring-up).
|
||||
- If `.250` on the node subnet is already in use, change `controlPlaneVipSuffix`
|
||||
in `modules/k8s-cluster-settings.nix` before bootstrap.
|
||||
|
||||
27
nixos/kubeadm/flake.lock
generated
Normal file
27
nixos/kubeadm/flake.lock
generated
Normal file
@@ -0,0 +1,27 @@
|
||||
{
|
||||
"nodes": {
|
||||
"nixpkgs": {
|
||||
"locked": {
|
||||
"lastModified": 1767313136,
|
||||
"narHash": "sha256-16KkgfdYqjaeRGBaYsNrhPRRENs0qzkQVUooNHtoy2w=",
|
||||
"owner": "NixOS",
|
||||
"repo": "nixpkgs",
|
||||
"rev": "ac62194c3917d5f474c1a844b6fd6da2db95077d",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "NixOS",
|
||||
"ref": "nixos-25.05",
|
||||
"repo": "nixpkgs",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"root": {
|
||||
"inputs": {
|
||||
"nixpkgs": "nixpkgs"
|
||||
}
|
||||
}
|
||||
},
|
||||
"root": "root",
|
||||
"version": 7
|
||||
}
|
||||
@@ -8,19 +8,70 @@
|
||||
outputs = { nixpkgs, ... }:
|
||||
let
|
||||
system = "x86_64-linux";
|
||||
mkHost = hostModules:
|
||||
lib = nixpkgs.lib;
|
||||
pkgs = nixpkgs.legacyPackages.${system};
|
||||
nodeNames = [ "cp-1" "cp-2" "cp-3" "wk-1" "wk-2" "wk-3" ];
|
||||
|
||||
mkNode = {
|
||||
name,
|
||||
role,
|
||||
extraModules ? [ ],
|
||||
}:
|
||||
let
|
||||
roleModule = if role == "control-plane" then ./modules/k8s-control-plane.nix else ./modules/k8s-worker.nix;
|
||||
hardwarePath = ./hosts/hardware + "/${name}.nix";
|
||||
in
|
||||
nixpkgs.lib.nixosSystem {
|
||||
inherit system;
|
||||
modules = hostModules;
|
||||
modules = [
|
||||
./modules/k8s-cluster-settings.nix
|
||||
./modules/k8s-common.nix
|
||||
roleModule
|
||||
({ lib, ... }: {
|
||||
imports = lib.optional (builtins.pathExists hardwarePath) hardwarePath;
|
||||
networking.hostName = name;
|
||||
system.stateVersion = "25.05";
|
||||
boot.loader.grub.devices = lib.mkDefault [ "/dev/sda" ];
|
||||
fileSystems."/" = lib.mkDefault {
|
||||
device = "/dev/disk/by-label/nixos";
|
||||
fsType = "ext4";
|
||||
};
|
||||
})
|
||||
] ++ extraModules;
|
||||
};
|
||||
|
||||
mkNodeByName = name:
|
||||
mkNode {
|
||||
inherit name;
|
||||
role = if lib.hasPrefix "cp-" name then "control-plane" else "worker";
|
||||
};
|
||||
|
||||
mkEvalCheck = name:
|
||||
let
|
||||
cfg = mkNode {
|
||||
inherit name;
|
||||
role = if lib.hasPrefix "cp-" name then "control-plane" else "worker";
|
||||
extraModules = [
|
||||
({ lib, ... }: {
|
||||
boot.loader.grub.devices = lib.mkDefault [ "/dev/sda" ];
|
||||
fileSystems."/" = lib.mkDefault {
|
||||
device = "/dev/disk/by-label/nixos";
|
||||
fsType = "ext4";
|
||||
};
|
||||
})
|
||||
];
|
||||
};
|
||||
in
|
||||
pkgs.runCommand "eval-${name}" { } ''
|
||||
cat > "$out" <<'EOF'
|
||||
host=${cfg.config.networking.hostName}
|
||||
role=${if lib.hasPrefix "cp-" name then "control-plane" else "worker"}
|
||||
stateVersion=${cfg.config.system.stateVersion}
|
||||
EOF
|
||||
'';
|
||||
in {
|
||||
nixosConfigurations = {
|
||||
cp-1 = mkHost [ ./hosts/cp-1.nix ];
|
||||
cp-2 = mkHost [ ./hosts/cp-2.nix ];
|
||||
cp-3 = mkHost [ ./hosts/cp-3.nix ];
|
||||
wk-1 = mkHost [ ./hosts/wk-1.nix ];
|
||||
wk-2 = mkHost [ ./hosts/wk-2.nix ];
|
||||
wk-3 = mkHost [ ./hosts/wk-3.nix ];
|
||||
};
|
||||
nixosConfigurations = lib.genAttrs nodeNames mkNodeByName;
|
||||
|
||||
checks.${system} = lib.genAttrs nodeNames mkEvalCheck;
|
||||
};
|
||||
}
|
||||
|
||||
@@ -1,14 +0,0 @@
|
||||
{ lib, ... }:
|
||||
|
||||
{
|
||||
imports =
|
||||
[
|
||||
../modules/k8s-common.nix
|
||||
../modules/k8s-control-plane.nix
|
||||
]
|
||||
++ lib.optional (builtins.pathExists ./hardware/cp-1.nix) ./hardware/cp-1.nix;
|
||||
|
||||
networking.hostName = "cp-1";
|
||||
|
||||
system.stateVersion = "25.05";
|
||||
}
|
||||
@@ -1,14 +0,0 @@
|
||||
{ lib, ... }:
|
||||
|
||||
{
|
||||
imports =
|
||||
[
|
||||
../modules/k8s-common.nix
|
||||
../modules/k8s-control-plane.nix
|
||||
]
|
||||
++ lib.optional (builtins.pathExists ./hardware/cp-2.nix) ./hardware/cp-2.nix;
|
||||
|
||||
networking.hostName = "cp-2";
|
||||
|
||||
system.stateVersion = "25.05";
|
||||
}
|
||||
@@ -1,14 +0,0 @@
|
||||
{ lib, ... }:
|
||||
|
||||
{
|
||||
imports =
|
||||
[
|
||||
../modules/k8s-common.nix
|
||||
../modules/k8s-control-plane.nix
|
||||
]
|
||||
++ lib.optional (builtins.pathExists ./hardware/cp-3.nix) ./hardware/cp-3.nix;
|
||||
|
||||
networking.hostName = "cp-3";
|
||||
|
||||
system.stateVersion = "25.05";
|
||||
}
|
||||
@@ -1,14 +0,0 @@
|
||||
{ lib, ... }:
|
||||
|
||||
{
|
||||
imports =
|
||||
[
|
||||
../modules/k8s-common.nix
|
||||
../modules/k8s-worker.nix
|
||||
]
|
||||
++ lib.optional (builtins.pathExists ./hardware/wk-1.nix) ./hardware/wk-1.nix;
|
||||
|
||||
networking.hostName = "wk-1";
|
||||
|
||||
system.stateVersion = "25.05";
|
||||
}
|
||||
@@ -1,14 +0,0 @@
|
||||
{ lib, ... }:
|
||||
|
||||
{
|
||||
imports =
|
||||
[
|
||||
../modules/k8s-common.nix
|
||||
../modules/k8s-worker.nix
|
||||
]
|
||||
++ lib.optional (builtins.pathExists ./hardware/wk-2.nix) ./hardware/wk-2.nix;
|
||||
|
||||
networking.hostName = "wk-2";
|
||||
|
||||
system.stateVersion = "25.05";
|
||||
}
|
||||
@@ -1,14 +0,0 @@
|
||||
{ lib, ... }:
|
||||
|
||||
{
|
||||
imports =
|
||||
[
|
||||
../modules/k8s-common.nix
|
||||
../modules/k8s-worker.nix
|
||||
]
|
||||
++ lib.optional (builtins.pathExists ./hardware/wk-3.nix) ./hardware/wk-3.nix;
|
||||
|
||||
networking.hostName = "wk-3";
|
||||
|
||||
system.stateVersion = "25.05";
|
||||
}
|
||||
12
nixos/kubeadm/modules/k8s-cluster-settings.nix
Normal file
12
nixos/kubeadm/modules/k8s-cluster-settings.nix
Normal file
@@ -0,0 +1,12 @@
|
||||
{ ... }:
|
||||
|
||||
{
|
||||
terrahome.kubeadm = {
|
||||
k8sMinor = "1.31";
|
||||
controlPlaneInterface = "eth0";
|
||||
controlPlaneVipSuffix = 250;
|
||||
podSubnet = "10.244.0.0/16";
|
||||
serviceSubnet = "10.96.0.0/12";
|
||||
clusterDomain = "cluster.local";
|
||||
};
|
||||
}
|
||||
@@ -1,6 +1,43 @@
|
||||
{ pkgs, ... }:
|
||||
{ config, lib, pkgs, ... }:
|
||||
|
||||
let
|
||||
pinnedK8s = lib.attrByPath [ "kubernetes_1_31" ] pkgs.kubernetes pkgs;
|
||||
kubeVipImage = "ghcr.io/kube-vip/kube-vip:v0.8.9";
|
||||
in
|
||||
{
|
||||
options.terrahome.kubeadm = {
|
||||
k8sMinor = lib.mkOption {
|
||||
type = lib.types.str;
|
||||
default = "1.31";
|
||||
};
|
||||
|
||||
controlPlaneInterface = lib.mkOption {
|
||||
type = lib.types.str;
|
||||
default = "eth0";
|
||||
};
|
||||
|
||||
controlPlaneVipSuffix = lib.mkOption {
|
||||
type = lib.types.int;
|
||||
default = 250;
|
||||
};
|
||||
|
||||
podSubnet = lib.mkOption {
|
||||
type = lib.types.str;
|
||||
default = "10.244.0.0/16";
|
||||
};
|
||||
|
||||
serviceSubnet = lib.mkOption {
|
||||
type = lib.types.str;
|
||||
default = "10.96.0.0/12";
|
||||
};
|
||||
|
||||
clusterDomain = lib.mkOption {
|
||||
type = lib.types.str;
|
||||
default = "cluster.local";
|
||||
};
|
||||
};
|
||||
|
||||
config = {
|
||||
boot.kernelModules = [ "overlay" "br_netfilter" ];
|
||||
|
||||
boot.kernel.sysctl = {
|
||||
@@ -10,6 +47,11 @@
|
||||
};
|
||||
|
||||
virtualisation.containerd.enable = true;
|
||||
virtualisation.containerd.settings = {
|
||||
plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options.SystemdCgroup = true;
|
||||
};
|
||||
|
||||
swapDevices = lib.mkForce [ ];
|
||||
|
||||
services.openssh.enable = true;
|
||||
services.openssh.settings = {
|
||||
@@ -17,19 +59,144 @@
|
||||
KbdInteractiveAuthentication = false;
|
||||
};
|
||||
|
||||
environment.systemPackages = with pkgs; [
|
||||
environment.variables = {
|
||||
KUBECONFIG = "/etc/kubernetes/admin.conf";
|
||||
KUBE_VIP_IMAGE = kubeVipImage;
|
||||
};
|
||||
|
||||
environment.systemPackages = (with pkgs; [
|
||||
containerd
|
||||
cri-tools
|
||||
cni-plugins
|
||||
kubernetes
|
||||
kubectl
|
||||
pinnedK8s
|
||||
kubernetes-helm
|
||||
conntrack-tools
|
||||
socat
|
||||
ethtool
|
||||
ipvsadm
|
||||
iproute2
|
||||
iptables
|
||||
ebtables
|
||||
jq
|
||||
curl
|
||||
vim
|
||||
gawk
|
||||
]) ++ [
|
||||
(pkgs.writeShellScriptBin "th-kubeadm-init" ''
|
||||
set -euo pipefail
|
||||
|
||||
iface="${config.terrahome.kubeadm.controlPlaneInterface}"
|
||||
suffix="${toString config.terrahome.kubeadm.controlPlaneVipSuffix}"
|
||||
pod_subnet="${config.terrahome.kubeadm.podSubnet}"
|
||||
service_subnet="${config.terrahome.kubeadm.serviceSubnet}"
|
||||
domain="${config.terrahome.kubeadm.clusterDomain}"
|
||||
|
||||
local_ip_cidr=$(ip -4 -o addr show dev "$iface" | awk 'NR==1 {print $4}')
|
||||
if [ -z "''${local_ip_cidr:-}" ]; then
|
||||
echo "Could not determine IPv4 CIDR on interface $iface"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
subnet_prefix=$(echo "$local_ip_cidr" | cut -d/ -f1 | awk -F. '{print $1"."$2"."$3}')
|
||||
vip="$subnet_prefix.$suffix"
|
||||
|
||||
echo "Using control-plane endpoint: $vip:6443"
|
||||
echo "Using kube-vip interface: $iface"
|
||||
|
||||
mkdir -p /etc/kubernetes/manifests
|
||||
ctr image pull "$KUBE_VIP_IMAGE"
|
||||
|
||||
ctr run --rm --net-host "$KUBE_VIP_IMAGE" kube-vip /kube-vip manifest pod \
|
||||
--interface "$iface" \
|
||||
--address "$vip" \
|
||||
--controlplane \
|
||||
--services \
|
||||
--arp \
|
||||
--leaderElection \
|
||||
> /etc/kubernetes/manifests/kube-vip.yaml
|
||||
|
||||
kubeadm init \
|
||||
--control-plane-endpoint "$vip:6443" \
|
||||
--upload-certs \
|
||||
--pod-network-cidr "$pod_subnet" \
|
||||
--service-cidr "$service_subnet" \
|
||||
--service-dns-domain "$domain"
|
||||
|
||||
mkdir -p /root/.kube
|
||||
cp /etc/kubernetes/admin.conf /root/.kube/config
|
||||
chmod 600 /root/.kube/config
|
||||
|
||||
echo
|
||||
echo "Next: install Cilium, then generate join commands:"
|
||||
echo " kubeadm token create --print-join-command"
|
||||
echo " kubeadm token create --print-join-command --certificate-key <key>"
|
||||
'')
|
||||
|
||||
(pkgs.writeShellScriptBin "th-kubeadm-join-control-plane" ''
|
||||
set -euo pipefail
|
||||
if [ "$#" -lt 1 ]; then
|
||||
echo "Usage: th-kubeadm-join-control-plane '<kubeadm join ... --control-plane --certificate-key ...>'"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
iface="${config.terrahome.kubeadm.controlPlaneInterface}"
|
||||
suffix="${toString config.terrahome.kubeadm.controlPlaneVipSuffix}"
|
||||
local_ip_cidr=$(ip -4 -o addr show dev "$iface" | awk 'NR==1 {print $4}')
|
||||
if [ -z "''${local_ip_cidr:-}" ]; then
|
||||
echo "Could not determine IPv4 CIDR on interface $iface"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
subnet_prefix=$(echo "$local_ip_cidr" | cut -d/ -f1 | awk -F. '{print $1"."$2"."$3}')
|
||||
vip="$subnet_prefix.$suffix"
|
||||
|
||||
mkdir -p /etc/kubernetes/manifests
|
||||
ctr image pull "$KUBE_VIP_IMAGE"
|
||||
ctr run --rm --net-host "$KUBE_VIP_IMAGE" kube-vip /kube-vip manifest pod \
|
||||
--interface "$iface" \
|
||||
--address "$vip" \
|
||||
--controlplane \
|
||||
--services \
|
||||
--arp \
|
||||
--leaderElection \
|
||||
> /etc/kubernetes/manifests/kube-vip.yaml
|
||||
|
||||
eval "$1"
|
||||
'')
|
||||
|
||||
(pkgs.writeShellScriptBin "th-kubeadm-join-worker" ''
|
||||
set -euo pipefail
|
||||
if [ "$#" -lt 1 ]; then
|
||||
echo "Usage: th-kubeadm-join-worker '<kubeadm join ...>'"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
eval "$1"
|
||||
'')
|
||||
|
||||
(pkgs.writeShellScriptBin "th-kubeadm-status" ''
|
||||
set -euo pipefail
|
||||
systemctl is-active containerd || true
|
||||
systemctl is-active kubelet || true
|
||||
crictl info >/dev/null && echo "crictl: ok" || echo "crictl: not-ready"
|
||||
'')
|
||||
];
|
||||
|
||||
systemd.services.kubelet = {
|
||||
description = "Kubernetes Kubelet";
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
wants = [ "network-online.target" ];
|
||||
after = [ "containerd.service" "network-online.target" ];
|
||||
serviceConfig = {
|
||||
ExecStart = "${pinnedK8s}/bin/kubelet";
|
||||
Restart = "always";
|
||||
RestartSec = "10";
|
||||
};
|
||||
};
|
||||
|
||||
systemd.tmpfiles.rules = [
|
||||
"d /etc/kubernetes 0755 root root -"
|
||||
"d /etc/kubernetes/manifests 0755 root root -"
|
||||
];
|
||||
};
|
||||
}
|
||||
|
||||
11
nixos/kubeadm/scripts/inventory.example.env
Normal file
11
nixos/kubeadm/scripts/inventory.example.env
Normal file
@@ -0,0 +1,11 @@
|
||||
SSH_USER=micqdf
|
||||
|
||||
# Control planes
|
||||
CP_1=192.168.1.101
|
||||
CP_2=192.168.1.102
|
||||
CP_3=192.168.1.103
|
||||
|
||||
# Workers
|
||||
WK_1=192.168.1.111
|
||||
WK_2=192.168.1.112
|
||||
WK_3=192.168.1.113
|
||||
89
nixos/kubeadm/scripts/rebuild-and-bootstrap.sh
Executable file
89
nixos/kubeadm/scripts/rebuild-and-bootstrap.sh
Executable file
@@ -0,0 +1,89 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
FLAKE_DIR="${FLAKE_DIR:-$(cd "$SCRIPT_DIR/.." && pwd)}"
|
||||
INVENTORY_FILE="${1:-$SCRIPT_DIR/inventory.env}"
|
||||
|
||||
if [ ! -f "$INVENTORY_FILE" ]; then
|
||||
echo "Missing inventory file: $INVENTORY_FILE"
|
||||
echo "Copy $SCRIPT_DIR/inventory.example.env to $SCRIPT_DIR/inventory.env and edit IPs."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# shellcheck disable=SC1090
|
||||
source "$INVENTORY_FILE"
|
||||
|
||||
SSH_USER="${SSH_USER:-micqdf}"
|
||||
SSH_OPTS="${SSH_OPTS:- -o BatchMode=yes -o StrictHostKeyChecking=accept-new }"
|
||||
|
||||
required=(CP_1 CP_2 CP_3 WK_1 WK_2 WK_3)
|
||||
for key in "${required[@]}"; do
|
||||
if [ -z "${!key:-}" ]; then
|
||||
echo "Missing required inventory variable: $key"
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
|
||||
remote() {
|
||||
local host_ip="$1"
|
||||
local cmd="$2"
|
||||
ssh $SSH_OPTS "$SSH_USER@$host_ip" "$cmd"
|
||||
}
|
||||
|
||||
rebuild_node() {
|
||||
local node_name="$1"
|
||||
local node_ip="$2"
|
||||
|
||||
echo "==> Rebuilding $node_name on $node_ip"
|
||||
nixos-rebuild switch \
|
||||
--flake "$FLAKE_DIR#$node_name" \
|
||||
--target-host "$SSH_USER@$node_ip" \
|
||||
--use-remote-sudo
|
||||
}
|
||||
|
||||
for node in cp-1 cp-2 cp-3 wk-1 wk-2 wk-3; do
|
||||
key="${node^^}"
|
||||
key="${key//-/_}"
|
||||
rebuild_node "$node" "${!key}"
|
||||
done
|
||||
|
||||
echo "==> Initializing control plane on cp-1"
|
||||
remote "$CP_1" "sudo th-kubeadm-init"
|
||||
|
||||
echo "==> Installing Cilium on cp-1"
|
||||
remote "$CP_1" "helm repo add cilium https://helm.cilium.io >/dev/null 2>&1 || true"
|
||||
remote "$CP_1" "helm repo update >/dev/null"
|
||||
remote "$CP_1" "kubectl create namespace kube-system >/dev/null 2>&1 || true"
|
||||
remote "$CP_1" "helm upgrade --install cilium cilium/cilium --namespace kube-system --set kubeProxyReplacement=true"
|
||||
|
||||
echo "==> Building kubeadm join commands"
|
||||
JOIN_CMD="$(remote "$CP_1" "sudo kubeadm token create --print-join-command")"
|
||||
CERT_KEY="$(remote "$CP_1" "sudo kubeadm init phase upload-certs --upload-certs | tail -n 1")"
|
||||
CP_JOIN_CMD="$JOIN_CMD --control-plane --certificate-key $CERT_KEY"
|
||||
|
||||
join_control_plane() {
|
||||
local node_ip="$1"
|
||||
local encoded
|
||||
encoded="$(printf '%s' "$CP_JOIN_CMD" | base64 -w0)"
|
||||
remote "$node_ip" "sudo th-kubeadm-join-control-plane \"\$(echo $encoded | base64 -d)\""
|
||||
}
|
||||
|
||||
join_worker() {
|
||||
local node_ip="$1"
|
||||
local encoded
|
||||
encoded="$(printf '%s' "$JOIN_CMD" | base64 -w0)"
|
||||
remote "$node_ip" "sudo th-kubeadm-join-worker \"\$(echo $encoded | base64 -d)\""
|
||||
}
|
||||
|
||||
echo "==> Joining remaining control planes"
|
||||
join_control_plane "$CP_2"
|
||||
join_control_plane "$CP_3"
|
||||
|
||||
echo "==> Joining workers"
|
||||
join_worker "$WK_1"
|
||||
join_worker "$WK_2"
|
||||
join_worker "$WK_3"
|
||||
|
||||
echo "==> Final node list"
|
||||
remote "$CP_1" "kubectl get nodes -o wide"
|
||||
37
nixos/kubeadm/scripts/reset-cluster-nodes.sh
Executable file
37
nixos/kubeadm/scripts/reset-cluster-nodes.sh
Executable file
@@ -0,0 +1,37 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
INVENTORY_FILE="${1:-$SCRIPT_DIR/inventory.env}"
|
||||
|
||||
if [ ! -f "$INVENTORY_FILE" ]; then
|
||||
echo "Missing inventory file: $INVENTORY_FILE"
|
||||
echo "Copy $SCRIPT_DIR/inventory.example.env to $SCRIPT_DIR/inventory.env and edit IPs."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# shellcheck disable=SC1090
|
||||
source "$INVENTORY_FILE"
|
||||
|
||||
SSH_USER="${SSH_USER:-micqdf}"
|
||||
SSH_OPTS="${SSH_OPTS:- -o BatchMode=yes -o StrictHostKeyChecking=accept-new }"
|
||||
|
||||
required=(CP_1 CP_2 CP_3 WK_1 WK_2 WK_3)
|
||||
for key in "${required[@]}"; do
|
||||
if [ -z "${!key:-}" ]; then
|
||||
echo "Missing required inventory variable: $key"
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
|
||||
reset_node() {
|
||||
local node_ip="$1"
|
||||
echo "==> Resetting $node_ip"
|
||||
ssh $SSH_OPTS "$SSH_USER@$node_ip" "sudo kubeadm reset -f && sudo systemctl stop kubelet && sudo rm -rf /etc/kubernetes /var/lib/etcd /var/lib/cni /etc/cni/net.d"
|
||||
}
|
||||
|
||||
for key in CP_1 CP_2 CP_3 WK_1 WK_2 WK_3; do
|
||||
reset_node "${!key}"
|
||||
done
|
||||
|
||||
echo "Cluster components reset on all listed nodes."
|
||||
@@ -9,6 +9,13 @@ output "control_plane_vm_names" {
|
||||
value = [for vm in proxmox_vm_qemu.control_planes : vm.name]
|
||||
}
|
||||
|
||||
output "control_plane_vm_ipv4" {
|
||||
value = {
|
||||
for vm in proxmox_vm_qemu.control_planes :
|
||||
vm.name => vm.default_ipv4_address
|
||||
}
|
||||
}
|
||||
|
||||
output "worker_vm_ids" {
|
||||
value = {
|
||||
for i in range(var.worker_count) :
|
||||
@@ -19,3 +26,10 @@ output "worker_vm_ids" {
|
||||
output "worker_vm_names" {
|
||||
value = [for vm in proxmox_vm_qemu.workers : vm.name]
|
||||
}
|
||||
|
||||
output "worker_vm_ipv4" {
|
||||
value = {
|
||||
for vm in proxmox_vm_qemu.workers :
|
||||
vm.name => vm.default_ipv4_address
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user