diff --git a/.gitea/workflows/terraform-destroy.yml b/.gitea/workflows/terraform-destroy.yml index e592710..1c3cb56 100644 --- a/.gitea/workflows/terraform-destroy.yml +++ b/.gitea/workflows/terraform-destroy.yml @@ -15,8 +15,8 @@ on: type: choice options: - all - - alpacas - - llamas + - control-planes + - workers concurrency: group: terraform-global @@ -77,11 +77,11 @@ jobs: all) terraform plan -destroy -out=tfdestroy ;; - alpacas) - terraform plan -destroy -target=proxmox_vm_qemu.alpacas -out=tfdestroy + control-planes) + terraform plan -destroy -target=proxmox_vm_qemu.control_planes -out=tfdestroy ;; - llamas) - terraform plan -destroy -target=proxmox_vm_qemu.llamas -out=tfdestroy + workers) + terraform plan -destroy -target=proxmox_vm_qemu.workers -out=tfdestroy ;; *) echo "Invalid destroy target: ${{ inputs.target }}" diff --git a/nixos/kubeadm/README.md b/nixos/kubeadm/README.md new file mode 100644 index 0000000..e724000 --- /dev/null +++ b/nixos/kubeadm/README.md @@ -0,0 +1,42 @@ +# Kubeadm Cluster Layout (NixOS) + +This folder defines role-based NixOS configs for a kubeadm cluster. + +## Topology + +- Control planes: `cp-1`, `cp-2`, `cp-3` +- Workers: `wk-1`, `wk-2`, `wk-3` + +## What this provides + +- Shared Kubernetes/node prerequisites in `modules/k8s-common.nix` +- Role-specific settings for control planes and workers +- Host configs for each node in `hosts/` + +## Hardware config files + +Each host file optionally imports `hosts/hardware/.nix` if present. +Copy each node's generated hardware config into this folder: + +```bash +sudo nixos-generate-config +sudo cp /etc/nixos/hardware-configuration.nix ./hosts/hardware/cp-1.nix +``` + +Repeat for each node (`cp-2`, `cp-3`, `wk-1`, `wk-2`, `wk-3`). + +## Deploy approach + +Start from one node at a time while experimenting: + +```bash +sudo nixos-rebuild switch --flake .#cp-1 +``` + +For remote target-host workflows, use your preferred deploy wrapper later +(`nixos-rebuild --target-host ...` or deploy-rs/colmena). + +## Notes + +- This does not run `kubeadm init/join` automatically. +- It prepares OS/runtime/kernel prerequisites so kubeadm bootstrapping is clean. diff --git a/nixos/kubeadm/flake.nix b/nixos/kubeadm/flake.nix new file mode 100644 index 0000000..d3dacef --- /dev/null +++ b/nixos/kubeadm/flake.nix @@ -0,0 +1,26 @@ +{ + description = "NixOS kubeadm cluster configs"; + + inputs = { + nixpkgs.url = "github:NixOS/nixpkgs/nixos-25.05"; + }; + + outputs = { nixpkgs, ... }: + let + system = "x86_64-linux"; + mkHost = hostModules: + nixpkgs.lib.nixosSystem { + inherit system; + modules = hostModules; + }; + in { + nixosConfigurations = { + cp-1 = mkHost [ ./hosts/cp-1.nix ]; + cp-2 = mkHost [ ./hosts/cp-2.nix ]; + cp-3 = mkHost [ ./hosts/cp-3.nix ]; + wk-1 = mkHost [ ./hosts/wk-1.nix ]; + wk-2 = mkHost [ ./hosts/wk-2.nix ]; + wk-3 = mkHost [ ./hosts/wk-3.nix ]; + }; + }; +} diff --git a/nixos/kubeadm/hosts/cp-1.nix b/nixos/kubeadm/hosts/cp-1.nix new file mode 100644 index 0000000..56af9ba --- /dev/null +++ b/nixos/kubeadm/hosts/cp-1.nix @@ -0,0 +1,14 @@ +{ lib, ... }: + +{ + imports = + [ + ../modules/k8s-common.nix + ../modules/k8s-control-plane.nix + ] + ++ lib.optional (builtins.pathExists ./hardware/cp-1.nix) ./hardware/cp-1.nix; + + networking.hostName = "cp-1"; + + system.stateVersion = "25.05"; +} diff --git a/nixos/kubeadm/hosts/cp-2.nix b/nixos/kubeadm/hosts/cp-2.nix new file mode 100644 index 0000000..a186ca8 --- /dev/null +++ b/nixos/kubeadm/hosts/cp-2.nix @@ -0,0 +1,14 @@ +{ lib, ... }: + +{ + imports = + [ + ../modules/k8s-common.nix + ../modules/k8s-control-plane.nix + ] + ++ lib.optional (builtins.pathExists ./hardware/cp-2.nix) ./hardware/cp-2.nix; + + networking.hostName = "cp-2"; + + system.stateVersion = "25.05"; +} diff --git a/nixos/kubeadm/hosts/cp-3.nix b/nixos/kubeadm/hosts/cp-3.nix new file mode 100644 index 0000000..5006fff --- /dev/null +++ b/nixos/kubeadm/hosts/cp-3.nix @@ -0,0 +1,14 @@ +{ lib, ... }: + +{ + imports = + [ + ../modules/k8s-common.nix + ../modules/k8s-control-plane.nix + ] + ++ lib.optional (builtins.pathExists ./hardware/cp-3.nix) ./hardware/cp-3.nix; + + networking.hostName = "cp-3"; + + system.stateVersion = "25.05"; +} diff --git a/nixos/kubeadm/hosts/hardware/.gitkeep b/nixos/kubeadm/hosts/hardware/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/nixos/kubeadm/hosts/wk-1.nix b/nixos/kubeadm/hosts/wk-1.nix new file mode 100644 index 0000000..709643e --- /dev/null +++ b/nixos/kubeadm/hosts/wk-1.nix @@ -0,0 +1,14 @@ +{ lib, ... }: + +{ + imports = + [ + ../modules/k8s-common.nix + ../modules/k8s-worker.nix + ] + ++ lib.optional (builtins.pathExists ./hardware/wk-1.nix) ./hardware/wk-1.nix; + + networking.hostName = "wk-1"; + + system.stateVersion = "25.05"; +} diff --git a/nixos/kubeadm/hosts/wk-2.nix b/nixos/kubeadm/hosts/wk-2.nix new file mode 100644 index 0000000..5a11e90 --- /dev/null +++ b/nixos/kubeadm/hosts/wk-2.nix @@ -0,0 +1,14 @@ +{ lib, ... }: + +{ + imports = + [ + ../modules/k8s-common.nix + ../modules/k8s-worker.nix + ] + ++ lib.optional (builtins.pathExists ./hardware/wk-2.nix) ./hardware/wk-2.nix; + + networking.hostName = "wk-2"; + + system.stateVersion = "25.05"; +} diff --git a/nixos/kubeadm/hosts/wk-3.nix b/nixos/kubeadm/hosts/wk-3.nix new file mode 100644 index 0000000..64d11d5 --- /dev/null +++ b/nixos/kubeadm/hosts/wk-3.nix @@ -0,0 +1,14 @@ +{ lib, ... }: + +{ + imports = + [ + ../modules/k8s-common.nix + ../modules/k8s-worker.nix + ] + ++ lib.optional (builtins.pathExists ./hardware/wk-3.nix) ./hardware/wk-3.nix; + + networking.hostName = "wk-3"; + + system.stateVersion = "25.05"; +} diff --git a/nixos/kubeadm/modules/k8s-common.nix b/nixos/kubeadm/modules/k8s-common.nix new file mode 100644 index 0000000..2533868 --- /dev/null +++ b/nixos/kubeadm/modules/k8s-common.nix @@ -0,0 +1,35 @@ +{ pkgs, ... }: + +{ + boot.kernelModules = [ "overlay" "br_netfilter" ]; + + boot.kernel.sysctl = { + "net.ipv4.ip_forward" = 1; + "net.bridge.bridge-nf-call-iptables" = 1; + "net.bridge.bridge-nf-call-ip6tables" = 1; + }; + + virtualisation.containerd.enable = true; + + services.openssh.enable = true; + services.openssh.settings = { + PasswordAuthentication = false; + KbdInteractiveAuthentication = false; + }; + + environment.systemPackages = with pkgs; [ + containerd + cri-tools + cni-plugins + kubernetes + kubectl + kubernetes-helm + conntrack-tools + socat + ethtool + ipvsadm + jq + curl + vim + ]; +} diff --git a/nixos/kubeadm/modules/k8s-control-plane.nix b/nixos/kubeadm/modules/k8s-control-plane.nix new file mode 100644 index 0000000..b17d32d --- /dev/null +++ b/nixos/kubeadm/modules/k8s-control-plane.nix @@ -0,0 +1,14 @@ +{ + networking.firewall.allowedTCPPorts = [ + 6443 + 2379 + 2380 + 10250 + 10257 + 10259 + ]; + + networking.firewall.allowedUDPPorts = [ + 8472 + ]; +} diff --git a/nixos/kubeadm/modules/k8s-worker.nix b/nixos/kubeadm/modules/k8s-worker.nix new file mode 100644 index 0000000..1d6215e --- /dev/null +++ b/nixos/kubeadm/modules/k8s-worker.nix @@ -0,0 +1,11 @@ +{ + networking.firewall.allowedTCPPorts = [ + 10250 + 30000 + 32767 + ]; + + networking.firewall.allowedUDPPorts = [ + 8472 + ]; +} diff --git a/terraform/cloud-init.tf b/terraform/cloud-init.tf index 1c6340a..6c5738e 100644 --- a/terraform/cloud-init.tf +++ b/terraform/cloud-init.tf @@ -1,12 +1,33 @@ -data "template_file" "cloud_init_global" { +data "template_file" "control_plane_cloud_init" { + count = var.control_plane_count template = file("${path.module}/files/cloud_init_global.tpl") vars = { + hostname = "cp-${count.index + 1}" + domain = "home.arpa" SSH_KEY_PUBLIC = var.SSH_KEY_PUBLIC } } -resource "local_file" "cloud_init_global" { - content = data.template_file.cloud_init_global.rendered - filename = "${path.module}/files/rendered/cloud_init_global.yaml" +resource "local_file" "control_plane_cloud_init" { + count = var.control_plane_count + content = data.template_file.control_plane_cloud_init[count.index].rendered + filename = "${path.module}/files/rendered/cp-${count.index + 1}.yaml" +} + +data "template_file" "worker_cloud_init" { + count = var.worker_count + template = file("${path.module}/files/cloud_init_global.tpl") + + vars = { + hostname = "wk-${count.index + 1}" + domain = "home.arpa" + SSH_KEY_PUBLIC = var.SSH_KEY_PUBLIC + } +} + +resource "local_file" "worker_cloud_init" { + count = var.worker_count + content = data.template_file.worker_cloud_init[count.index].rendered + filename = "${path.module}/files/rendered/wk-${count.index + 1}.yaml" } diff --git a/terraform/files/cloud_init_global.tpl b/terraform/files/cloud_init_global.tpl index 2275158..91f161a 100644 --- a/terraform/files/cloud_init_global.tpl +++ b/terraform/files/cloud_init_global.tpl @@ -1,4 +1,5 @@ #cloud-config +hostname: ${hostname} manage_etc_hosts: true resolv_conf: nameservers: @@ -6,6 +7,7 @@ resolv_conf: - 1.1.1.1 preserve_hostname: false +fqdn: ${hostname}.${domain} users: - name: micqdf diff --git a/terraform/main.tf b/terraform/main.tf index 2c5b727..4d43583 100644 --- a/terraform/main.tf +++ b/terraform/main.tf @@ -16,10 +16,10 @@ provider "proxmox" { pm_tls_insecure = true } -resource "proxmox_vm_qemu" "alpacas" { - count = var.alpaca_vm_count - name = "alpaca-${count.index + 1}" - vmid = 500 + count.index + 1 +resource "proxmox_vm_qemu" "control_planes" { + count = var.control_plane_count + name = "cp-${count.index + 1}" + vmid = var.control_plane_vmid_start + count.index target_node = var.target_node clone = var.clone_template full_clone = true @@ -27,22 +27,22 @@ resource "proxmox_vm_qemu" "alpacas" { agent = 1 cpu { - sockets = var.sockets - cores = var.cores + sockets = 1 + cores = var.control_plane_cores } - memory = var.memory + memory = var.control_plane_memory_mb scsihw = "virtio-scsi-pci" boot = "order=scsi0" bootdisk = "scsi0" ipconfig0 = "ip=dhcp" - cicustom = "user=local:snippets/cloud_init_global.yaml" + cicustom = "user=local:snippets/cp-${count.index + 1}.yaml" disks { scsi { scsi0 { disk { - size = var.disk_size + size = var.control_plane_disk_size storage = var.storage } } @@ -65,10 +65,10 @@ resource "proxmox_vm_qemu" "alpacas" { } -resource "proxmox_vm_qemu" "llamas" { - count = var.llama_vm_count - name = "llama-${count.index + 1}" - vmid = 600 + count.index + 1 +resource "proxmox_vm_qemu" "workers" { + count = var.worker_count + name = "wk-${count.index + 1}" + vmid = var.worker_vmid_start + count.index target_node = var.target_node clone = var.clone_template full_clone = true @@ -76,21 +76,21 @@ resource "proxmox_vm_qemu" "llamas" { agent = 1 cpu { - sockets = var.sockets - cores = var.cores + sockets = 1 + cores = var.worker_cores[count.index] } - memory = var.memory + memory = var.worker_memory_mb[count.index] scsihw = "virtio-scsi-pci" boot = "order=scsi0" bootdisk = "scsi0" ipconfig0 = "ip=dhcp" - cicustom = "user=local:snippets/cloud_init_global.yaml" + cicustom = "user=local:snippets/wk-${count.index + 1}.yaml" disks { scsi { scsi0 { disk { - size = var.disk_size + size = var.worker_disk_size storage = var.storage } } diff --git a/terraform/outputs.tf b/terraform/outputs.tf index 148cb0f..8ec628f 100644 --- a/terraform/outputs.tf +++ b/terraform/outputs.tf @@ -1,21 +1,21 @@ -output "alpaca_vm_ids" { +output "control_plane_vm_ids" { value = { - for i in range(var.alpaca_vm_count) : - "alpaca-${i + 1}" => proxmox_vm_qemu.alpacas[i].vmid + for i in range(var.control_plane_count) : + "cp-${i + 1}" => proxmox_vm_qemu.control_planes[i].vmid } } -output "alpaca_vm_names" { - value = [for vm in proxmox_vm_qemu.alpacas : vm.name] +output "control_plane_vm_names" { + value = [for vm in proxmox_vm_qemu.control_planes : vm.name] } -output "llama_vm_ids" { +output "worker_vm_ids" { value = { - for i in range(var.llama_vm_count) : - "llama-${i + 1}" => proxmox_vm_qemu.llamas[i].vmid + for i in range(var.worker_count) : + "wk-${i + 1}" => proxmox_vm_qemu.workers[i].vmid } } -output "llama_vm_names" { - value = [for vm in proxmox_vm_qemu.llamas : vm.name] +output "worker_vm_names" { + value = [for vm in proxmox_vm_qemu.workers : vm.name] } diff --git a/terraform/terraform.tfvars b/terraform/terraform.tfvars index 1f92a4c..47bf16a 100644 --- a/terraform/terraform.tfvars +++ b/terraform/terraform.tfvars @@ -1,10 +1,19 @@ target_node = "flex" clone_template = "nixos-template" -cores = 1 -memory = 1024 -disk_size = "15G" -sockets = 1 bridge = "vmbr0" storage = "Flash" pm_api_url = "https://100.105.0.115:8006/api2/json" pm_api_token_id = "terraform-prov@pve!mytoken" + +control_plane_count = 3 +worker_count = 3 +control_plane_vmid_start = 701 +worker_vmid_start = 711 + +control_plane_cores = 1 +control_plane_memory_mb = 4096 +control_plane_disk_size = "40G" + +worker_cores = [4, 4, 3] +worker_memory_mb = [12288, 12288, 12288] +worker_disk_size = "60G" diff --git a/terraform/variables.tf b/terraform/variables.tf index e805ebe..8c83b54 100644 --- a/terraform/variables.tf +++ b/terraform/variables.tf @@ -27,20 +27,74 @@ variable "clone_template" { type = string } -variable "cores" { - type = number +variable "control_plane_count" { + type = number + default = 3 + description = "Number of control plane VMs" } -variable "memory" { - type = number +variable "worker_count" { + type = number + default = 3 + description = "Number of worker VMs" } -variable "disk_size" { - type = string +variable "control_plane_vmid_start" { + type = number + default = 701 + description = "Starting VMID for control plane VMs" } -variable "sockets" { - type = number +variable "worker_vmid_start" { + type = number + default = 711 + description = "Starting VMID for worker VMs" +} + +variable "control_plane_cores" { + type = number + default = 1 + description = "vCPU cores per control plane VM" +} + +variable "control_plane_memory_mb" { + type = number + default = 4096 + description = "Memory in MB per control plane VM" +} + +variable "worker_cores" { + type = list(number) + default = [4, 4, 3] + description = "vCPU cores for each worker VM" + + validation { + condition = length(var.worker_cores) == var.worker_count + error_message = "worker_cores list length must equal worker_count." + } +} + +variable "worker_memory_mb" { + type = list(number) + default = [12288, 12288, 12288] + description = "Memory in MB for each worker VM" + + validation { + condition = length(var.worker_memory_mb) == var.worker_count + error_message = "worker_memory_mb list length must equal worker_count." + } +} + +variable "control_plane_disk_size" { + type = string + default = "40G" + description = "Disk size for control plane VMs" +} + +variable "worker_disk_size" { + type = string + default = "60G" + description = "Disk size for worker VMs" } variable "bridge" { @@ -55,18 +109,6 @@ variable "pm_api_url" { type = string } -variable "alpaca_vm_count" { - type = number - default = 1 - description = "How many Alpaca VMs to create" -} - -variable "llama_vm_count" { - type = number - default = 1 - description = "How many Llama VMs to create" -} - variable "SSH_KEY_PUBLIC" { type = string description = "Public SSH key injected via cloud-init"