feat: migrate cluster baseline from Hetzner to Proxmox

Replace Hetzner infrastructure and cloud-provider assumptions with Proxmox VM clones, kube-vip API HA, and NFS-backed storage. Update bootstrap, Flux addons, CI workflows, and docs to target the new private Proxmox baseline while preserving the existing Tailscale, Doppler, Flux, Rancher, and B2 backup flows.
2026-04-22 03:02:13 +00:00
parent 6c6b9d20ca
commit b1dae28aa5
40 changed files with 577 additions and 784 deletions
@@ -12,12 +12,15 @@ on:

 env:
  TF_VERSION: "1.7.0"
-  TF_VAR_hcloud_token: ${{ secrets.HCLOUD_TOKEN }}
  TF_VAR_s3_access_key: ${{ secrets.S3_ACCESS_KEY }}
  TF_VAR_s3_secret_key: ${{ secrets.S3_SECRET_KEY }}
  TF_VAR_s3_endpoint: ${{ secrets.S3_ENDPOINT }}
  TF_VAR_s3_bucket: ${{ secrets.S3_BUCKET }}
  TF_VAR_tailscale_tailnet: ${{ secrets.TAILSCALE_TAILNET }}
+  TF_VAR_proxmox_endpoint: ${{ secrets.PROXMOX_ENDPOINT }}
+  TF_VAR_proxmox_api_token_id: ${{ secrets.PROXMOX_API_TOKEN_ID }}
+  TF_VAR_proxmox_api_token_secret: ${{ secrets.PROXMOX_API_TOKEN_SECRET }}
+  TF_VAR_proxmox_insecure: "true"

 jobs:
  dashboards:
@@ -51,25 +54,6 @@ jobs:
            -backend-config="secret_key=${{ secrets.S3_SECRET_KEY }}" \
            -backend-config="skip_requesting_account_id=true"

-      - name: Detect runner egress IP
-        run: |
-          RUNNER_IP=$(curl -fsSL https://api.ipify.org)
-          echo "RUNNER_CIDR=[\"${RUNNER_IP}/32\"]" >> "$GITHUB_ENV"
-          echo "Runner egress IP: ${RUNNER_IP}"
-
-      - name: Open SSH/API for current runner CIDR
-        working-directory: terraform
-        run: |
-          terraform apply \
-            -refresh=false \
-            -target=hcloud_firewall.cluster \
-            -var="hcloud_token=${{ secrets.HCLOUD_TOKEN }}" \
-            -var="ssh_public_key=$HOME/.ssh/id_ed25519.pub" \
-            -var="ssh_private_key=$HOME/.ssh/id_ed25519" \
-            -var="allowed_ssh_ips=${RUNNER_CIDR}" \
-            -var="allowed_api_ips=${RUNNER_CIDR}" \
-            -auto-approve
-
      - name: Install Python Dependencies
        run: |
          apt-get update && apt-get install -y python3-pip
@@ -11,12 +11,15 @@ on:

 env:
  TF_VERSION: "1.7.0"
-  TF_VAR_hcloud_token: ${{ secrets.HCLOUD_TOKEN }}
  TF_VAR_s3_access_key: ${{ secrets.S3_ACCESS_KEY }}
  TF_VAR_s3_secret_key: ${{ secrets.S3_SECRET_KEY }}
  TF_VAR_s3_endpoint: ${{ secrets.S3_ENDPOINT }}
  TF_VAR_s3_bucket: ${{ secrets.S3_BUCKET }}
  TF_VAR_tailscale_tailnet: ${{ secrets.TAILSCALE_TAILNET }}
+  TF_VAR_proxmox_endpoint: ${{ secrets.PROXMOX_ENDPOINT }}
+  TF_VAR_proxmox_api_token_id: ${{ secrets.PROXMOX_API_TOKEN_ID }}
+  TF_VAR_proxmox_api_token_secret: ${{ secrets.PROXMOX_API_TOKEN_SECRET }}
+  TF_VAR_proxmox_insecure: "true"
  TS_OAUTH_CLIENT_ID: ${{ secrets.TAILSCALE_OAUTH_CLIENT_ID }}
  TS_OAUTH_CLIENT_SECRET: ${{ secrets.TAILSCALE_OAUTH_CLIENT_SECRET }}

@@ -60,40 +63,6 @@ jobs:
          echo "${{ secrets.SSH_PUBLIC_KEY }}" > ~/.ssh/id_ed25519.pub
          chmod 644 ~/.ssh/id_ed25519.pub

-      - name: Install jq
-        run: |
-          apt-get update
-          apt-get install -y jq
-
-      - name: Import existing servers into state (if missing)
-        working-directory: terraform
-        env:
-          HCLOUD_TOKEN: ${{ secrets.HCLOUD_TOKEN }}
-        run: |
-          set -e
-          ensure_import() {
-            address="$1"
-            name="$2"
-            if terraform state show "$address" >/dev/null 2>&1; then
-              echo "$address already in state"
-              return
-            fi
-            id=$(curl -sS -H "Authorization: Bearer ${HCLOUD_TOKEN}" "https://api.hetzner.cloud/v1/servers?name=${name}" | jq -r '.servers[0].id // empty')
-            if [ -n "$id" ]; then
-              echo "Importing $address from server $name ($id)"
-              terraform import "$address" "$id"
-            else
-              echo "No existing server found for $name; skipping import"
-            fi
-          }
-
-          ensure_import 'hcloud_server.control_plane[0]' 'k8s-cluster-cp-1'
-          ensure_import 'hcloud_server.control_plane[1]' 'k8s-cluster-cp-2'
-          ensure_import 'hcloud_server.control_plane[2]' 'k8s-cluster-cp-3'
-          ensure_import 'hcloud_server.workers[0]' 'k8s-cluster-worker-1'
-          ensure_import 'hcloud_server.workers[1]' 'k8s-cluster-worker-2'
-          ensure_import 'hcloud_server.workers[2]' 'k8s-cluster-worker-3'
-
      - name: Terraform Plan
        id: plan
        working-directory: terraform
@@ -187,32 +156,11 @@ jobs:
          mkdir -p ../outputs
          terraform output -json > ../outputs/terraform_outputs.json

-      - name: Detect runner egress IP
-        run: |
-          RUNNER_IP=$(curl -fsSL https://api.ipify.org)
-          echo "RUNNER_CIDR=[\"${RUNNER_IP}/32\"]" >> "$GITHUB_ENV"
-          echo "Runner egress IP: ${RUNNER_IP}"
-
-      - name: Open SSH/API for current runner CIDR
-        working-directory: terraform
-        run: |
-          terraform apply \
-            -target=hcloud_firewall.cluster \
-            -var="ssh_public_key=$HOME/.ssh/id_ed25519.pub" \
-            -var="ssh_private_key=$HOME/.ssh/id_ed25519" \
-            -var="allowed_ssh_ips=${RUNNER_CIDR}" \
-            -var="allowed_api_ips=${RUNNER_CIDR}" \
-            -auto-approve
-
      - name: Install Python Dependencies
        run: |
          apt-get update && apt-get install -y python3-pip
          pip3 install --break-system-packages ansible kubernetes jinja2 pyyaml

-      - name: Note runner connectivity mode
-        run: |
-          echo "Using runner public network access with RUNNER_ALLOWED_CIDRS for SSH/API"
-
      - name: Install Ansible Collections
        run: ansible-galaxy collection install -r ansible/requirements.yml

@@ -224,7 +172,6 @@ jobs:
        working-directory: ansible
        run: |
          ansible-playbook site.yml \
-            -e "hcloud_token=${{ secrets.HCLOUD_TOKEN }}" \
            -e "tailscale_auth_key=${{ secrets.TAILSCALE_AUTH_KEY }}" \
            -e "tailscale_tailnet=${{ secrets.TAILSCALE_TAILNET }}" \
            -e "tailscale_oauth_client_id=${{ secrets.TAILSCALE_OAUTH_CLIENT_ID }}" \
@@ -294,9 +241,8 @@ jobs:
                      key: dopplerToken
                      namespace: external-secrets
          EOF
-          # Wait for CCM and CSI (Hetzner cloud integration)
-          kubectl -n flux-system wait --for=condition=Ready kustomization/addon-ccm --timeout=600s
-          kubectl -n flux-system wait --for=condition=Ready kustomization/addon-csi --timeout=600s
+          # Wait for the storage layer and private access components
+          kubectl -n flux-system wait --for=condition=Ready kustomization/addon-nfs-storage --timeout=600s
          kubectl -n flux-system wait --for=condition=Ready kustomization/addon-tailscale-operator --timeout=300s

      - name: Wait for Rancher and backup operator
@@ -397,10 +343,9 @@ jobs:
        working-directory: ansible
        run: |
          ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl get nodes -o wide"
-          ansible -i inventory.ini 'control_plane[0]' -m shell -a "kubectl describe nodes | grep -E 'Name:|providerID:'"
          ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl -n flux-system get gitrepositories,kustomizations,helmreleases"
          ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl -n kube-system get pods -o wide"
-          ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl get storageclass"
+          ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl get storageclass flash-nfs"
          ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl -n tailscale-system get pods -o wide"
          ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl -n external-secrets get pods"
        env:
@@ -10,107 +10,22 @@ on:

 env:
  TF_VERSION: "1.7.0"
-  TF_VAR_hcloud_token: ${{ secrets.HCLOUD_TOKEN }}
  TF_VAR_s3_access_key: ${{ secrets.S3_ACCESS_KEY }}
  TF_VAR_s3_secret_key: ${{ secrets.S3_SECRET_KEY }}
  TF_VAR_s3_endpoint: ${{ secrets.S3_ENDPOINT }}
  TF_VAR_s3_bucket: ${{ secrets.S3_BUCKET }}
  TF_VAR_tailscale_tailnet: ${{ secrets.TAILSCALE_TAILNET }}
-  B2_ACCOUNT_ID: ${{ secrets.B2_ACCOUNT_ID }}
-  B2_APPLICATION_KEY: ${{ secrets.B2_APPLICATION_KEY }}
+  TF_VAR_proxmox_endpoint: ${{ secrets.PROXMOX_ENDPOINT }}
+  TF_VAR_proxmox_api_token_id: ${{ secrets.PROXMOX_API_TOKEN_ID }}
+  TF_VAR_proxmox_api_token_secret: ${{ secrets.PROXMOX_API_TOKEN_SECRET }}
+  TF_VAR_proxmox_insecure: "true"

 jobs:
-  pre-destroy-backup:
-    name: Pre-Destroy Backup
-    runs-on: ubuntu-latest
-    if: github.event.inputs.confirm == 'destroy'
-    environment: destroy
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-
-      - name: Setup Terraform
-        uses: hashicorp/setup-terraform@v3
-        with:
-          terraform_version: ${{ env.TF_VERSION }}
-
-      - name: Terraform Init
-        working-directory: terraform
-        run: |
-          terraform init \
-            -backend-config="endpoint=${{ secrets.S3_ENDPOINT }}" \
-            -backend-config="bucket=${{ secrets.S3_BUCKET }}" \
-            -backend-config="region=auto" \
-            -backend-config="access_key=${{ secrets.S3_ACCESS_KEY }}" \
-            -backend-config="secret_key=${{ secrets.S3_SECRET_KEY }}" \
-            -backend-config="skip_requesting_account_id=true"
-
-      - name: Setup SSH Keys
-        run: |
-          mkdir -p ~/.ssh
-          echo "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/id_ed25519
-          chmod 600 ~/.ssh/id_ed25519
-          echo "${{ secrets.SSH_PUBLIC_KEY }}" > ~/.ssh/id_ed25519.pub
-          chmod 644 ~/.ssh/id_ed25519.pub
-
-      - name: Get Control Plane IP
-        id: cp_ip
-        working-directory: terraform
-        run: |
-          PRIMARY_IP=$(terraform output -raw primary_control_plane_ip)
-          echo "PRIMARY_IP=${PRIMARY_IP}" >> "$GITHUB_ENV"
-
-      - name: Pre-Destroy pg_dump to B2
-        run: |
-          set +e
-          echo "Attempting pre-destroy backup to B2..."
-          ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null root@${PRIMARY_IP} << 'EOF'
-            set -e
-            # Check if kubectl is available and cluster is up
-            if ! command -v kubectl &> /dev/null; then
-              echo "kubectl not found, skipping pre-destroy backup"
-              exit 0
-            fi
-            
-            # Check if we can reach the cluster
-            if ! kubectl cluster-info &> /dev/null; then
-              echo "Cannot reach cluster, skipping pre-destroy backup"
-              exit 0
-            fi
-            
-            # Check if CNP is deployed
-            if ! kubectl get namespace cnpg-cluster &> /dev/null; then
-              echo "CNP namespace not found, skipping pre-destroy backup"
-              exit 0
-            fi
-            
-            # Run backup using the pgdump image directly
-            BACKUP_FILE="rancher-backup-$(date +%Y%m%d-%H%M%S).sql.gz"
-            B2_ACCOUNT_ID="$(cat /etc/kubernetes/secret/b2_account_id 2>/dev/null || echo '')"
-            B2_APPLICATION_KEY="$(cat /etc/kubernetes/secret/b2_application_key 2>/dev/null || echo '')"
-            
-            if [ -z "$B2_ACCOUNT_ID" ] || [ -z "$B2_APPLICATION_KEY" ]; then
-              echo "B2 credentials not found in secret, skipping pre-destroy backup"
-              exit 0
-            fi
-            
-            kubectl run pgdump-manual --image=ghcr.io/cloudnative-pg/pgbackrest:latest --restart=Never \
-              -n cnpg-cluster --dry-run=client -o yaml | \
-              kubectl apply -f -
-            
-            echo "Waiting for backup job to complete..."
-            kubectl wait --for=condition=complete job/pgdump-manual -n cnpg-cluster --timeout=300s || true
-            kubectl logs job/pgdump-manual -n cnpg-cluster || true
-            kubectl delete job pgdump-manual -n cnpg-cluster --ignore-not-found=true || true
-          EOF
-          echo "Pre-destroy backup step completed (failure is non-fatal)"
-
  destroy:
    name: Destroy Cluster
    runs-on: ubuntu-latest
    if: github.event.inputs.confirm == 'destroy'
    environment: destroy
-    needs: pre-destroy-backup
    steps:
      - name: Checkout
        uses: actions/checkout@v4
@@ -120,6 +35,14 @@ jobs:
        with:
          terraform_version: ${{ env.TF_VERSION }}

+      - name: Setup SSH Keys
+        run: |
+          mkdir -p ~/.ssh
+          echo "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/id_ed25519
+          chmod 600 ~/.ssh/id_ed25519
+          echo "${{ secrets.SSH_PUBLIC_KEY }}" > ~/.ssh/id_ed25519.pub
+          chmod 644 ~/.ssh/id_ed25519.pub
+
      - name: Terraform Init
        working-directory: terraform
        run: |
@@ -131,19 +54,6 @@ jobs:
            -backend-config="secret_key=${{ secrets.S3_SECRET_KEY }}" \
            -backend-config="skip_requesting_account_id=true"

-      - name: Setup SSH Keys
-        run: |
-          mkdir -p ~/.ssh
-          echo "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/id_ed25519
-          chmod 600 ~/.ssh/id_ed25519
-          echo "${{ secrets.SSH_PUBLIC_KEY }}" > ~/.ssh/id_ed25519.pub
-          chmod 644 ~/.ssh/id_ed25519.pub
-
-      - name: Install jq
-        run: |
-          apt-get update
-          apt-get install -y jq
-
      - name: Terraform Destroy
        id: destroy
        working-directory: terraform
@@ -152,7 +62,6 @@ jobs:
          for attempt in 1 2 3; do
            echo "Terraform destroy attempt ${attempt}/3"
            terraform destroy \
-              -var="hcloud_token=${{ secrets.HCLOUD_TOKEN }}" \
              -var="ssh_public_key=$HOME/.ssh/id_ed25519.pub" \
              -var="ssh_private_key=$HOME/.ssh/id_ed25519" \
              -auto-approve
@@ -164,32 +73,13 @@ jobs:
              echo "Terraform destroy failed with exit code ${rc}; retrying in 30s"
              sleep 30
              terraform refresh \
-                -var="hcloud_token=${{ secrets.HCLOUD_TOKEN }}" \
                -var="ssh_public_key=$HOME/.ssh/id_ed25519.pub" \
                -var="ssh_private_key=$HOME/.ssh/id_ed25519" || true
            fi
          done
          exit "$rc"

-      - name: Hetzner destroy diagnostics
+      - name: Terraform state diagnostics
        if: failure() && steps.destroy.outcome == 'failure'
-        env:
-          HCLOUD_TOKEN: ${{ secrets.HCLOUD_TOKEN }}
        run: |
-          set +e
-          echo "== Terraform state list =="
          terraform -chdir=terraform state list || true
-
-          network_id=$(terraform -chdir=terraform state show hcloud_network.cluster 2>/dev/null | awk '/^id *=/ {gsub(/"/, "", $3); print $3; exit}')
-          if [ -z "$network_id" ]; then
-            network_id="11988935"
-          fi
-
-          echo "== Hetzner network =="
-          curl -fsSL -H "Authorization: Bearer ${HCLOUD_TOKEN}" "https://api.hetzner.cloud/v1/networks/${network_id}" | jq . || true
-
-          echo "== Hetzner servers attached to network =="
-          curl -fsSL -H "Authorization: Bearer ${HCLOUD_TOKEN}" "https://api.hetzner.cloud/v1/servers" | jq --argjson id "$network_id" '.servers[] | select(any(.private_net[]?; .network == $id)) | {id, name, private_net}' || true
-
-          echo "== Hetzner load balancers attached to network =="
-          curl -fsSL -H "Authorization: Bearer ${HCLOUD_TOKEN}" "https://api.hetzner.cloud/v1/load_balancers" | jq --argjson id "$network_id" '.load_balancers[] | select(any(.private_net[]?; .network == $id)) | {id, name, private_net}' || true
@@ -9,7 +9,9 @@ Repository guide for OpenCode sessions in this repo.

 ## Current Baseline

- HA private cluster: 3 control planes, 3 workers.
+- HA private cluster: 3 control planes, 5 workers on Proxmox.
+- Proxmox clones come from template `9000` on node `flex`; API VIP is `10.27.27.40` via kube-vip.
+- Storage is `nfs-subdir-external-provisioner` backed by `10.27.27.22:/TheFlash/k8s-nfs` with StorageClass `flash-nfs`.
 - Tailscale is the private access path for Rancher and shared services.
 - Rancher, Grafana, and Prometheus are exposed through Tailscale; Flux UI / Weave GitOps is removed.
 - `apps/` is suspended by default.
@@ -20,8 +22,8 @@ Repository guide for OpenCode sessions in this repo.
 - Terraform: `terraform -chdir=terraform fmt -recursive`, `terraform -chdir=terraform validate`, `terraform -chdir=terraform plan -var-file=../terraform.tfvars`, `terraform -chdir=terraform apply -var-file=../terraform.tfvars`
 - Ansible: `ansible-galaxy collection install -r ansible/requirements.yml`, `cd ansible && python3 generate_inventory.py`, `ansible-playbook -i ansible/inventory.ini ansible/site.yml --syntax-check`, `ansible-playbook ansible/site.yml`
 - Flux/Kustomize: `kubectl kustomize infrastructure/addons/<addon>`, `kubectl kustomize clusters/prod/flux-system`
- Kubeconfig refresh: `scripts/refresh-kubeconfig.sh <cp1-public-ip>`
- Tailnet smoke check: `ssh root@<cp1-ip> 'bash -s' < scripts/smoke-check-tailnet-services.sh`
+- Kubeconfig refresh: `scripts/refresh-kubeconfig.sh <cp1-ip>`
+- Tailnet smoke check: `ssh ubuntu@<cp1-ip> 'bash -s' < scripts/smoke-check-tailnet-services.sh`

 ## Workflow Rules

@@ -31,12 +33,14 @@ Repository guide for OpenCode sessions in this repo.
 - CI deploy order is Terraform -> Ansible -> Flux bootstrap -> Rancher restore -> health checks.
 - One object per Kubernetes YAML file; keep filenames kebab-case.
 - If `kubectl` points at `localhost:8080` after a rebuild, refresh kubeconfig from the primary control-plane IP.
+- Bootstrap assumptions that matter: SSH user is `ubuntu`, NIC is `ens18`, API join endpoint is the kube-vip address.

 ## Repo-Specific Gotchas

 - `rancher-backup` uses a postRenderer to swap the broken hook image to `rancher/kubectl:v1.34.0`; do not put S3 config in HelmRelease values. Put it in the Backup CR.
 - Tailscale cleanup only runs before service proxies exist; it removes stale offline `rancher`/`grafana`/`prometheus`/`flux` devices, then must stop so live proxies are not deleted.
 - Keep the Tailscale operator on the stable Helm repo `https://pkgs.tailscale.com/helmcharts` at `1.96.5` unless you have a reason to change it.
+- The repo no longer uses a cloud controller manager. If you see `providerID` or Hetzner-specific logic, it is stale.
 - Current private URLs:
  - Rancher: `https://rancher.silverside-gopher.ts.net/`
  - Grafana: `http://grafana.silverside-gopher.ts.net/`
@@ -1,30 +1,28 @@
-# Hetzner Kubernetes Cluster
+# Proxmox Kubernetes Cluster

-Production-ready Kubernetes cluster on Hetzner Cloud using Terraform and Ansible.
+Production-ready private Kubernetes cluster on Proxmox using Terraform, Ansible, and Flux.

 ## Architecture

 | Component | Details |
 |-----------|---------|
-| **Control Plane** | 3x CX23 (HA) |
-| **Workers** | 3x CX33 |
+| **Control Plane** | 3x Proxmox VMs (2 vCPU / 4 GiB / 32 GiB) |
+| **Workers** | 5x Proxmox VMs (4 vCPU / 8 GiB / 64 GiB) |
 | **K8s** | k3s (latest, HA) |
-| **Addons** | Hetzner CCM + CSI + Prometheus + Grafana + Loki |
+| **Addons** | NFS provisioner + Prometheus + Grafana + Loki + Rancher |
 | **Access** | SSH/API and private services restricted to Tailnet |
 | **Bootstrap** | Terraform + Ansible + Flux |

 ## Prerequisites

-### 1. Hetzner Cloud API Token
+### 1. Proxmox API Token

-1. Go to [Hetzner Cloud Console](https://console.hetzner.com/)
-2. Select your project (or create a new one)
-3. Navigate to **Security** → **API Tokens**
-4. Click **Generate API Token**
-5. Set description: `k8s-cluster-terraform`
-6. Select permissions: **Read & Write**
-7. Click **Generate API Token**
-8. **Copy the token immediately** - it won't be shown again!
+Create an API token for the Proxmox VE user used by Terraform. The repo expects the `bpg/proxmox` provider with:
+
+- endpoint: `https://100.105.0.115:8006/`
+- node: `flex`
+- clone source: template `9000` (`ubuntu-2404-k8s-template`)
+- auth: API token

 ### 2. Backblaze B2 Bucket (for Terraform State)

@@ -44,7 +42,7 @@ Production-ready Kubernetes cluster on Hetzner Cloud using Terraform and Ansible
 ### 3. SSH Key Pair

 ```bash
-ssh-keygen -t ed25519 -C "k8s@hetzner" -f ~/.ssh/hetzner_k8s
+ssh-keygen -t ed25519 -C "k8s@proxmox" -f ~/.ssh/infra
 ```

 ### 4. Local Tools
@@ -71,10 +69,12 @@ cp terraform.tfvars.example terraform.tfvars
 Edit `terraform.tfvars`:

 ```hcl
-hcloud_token = "your-hetzner-api-token"
+ proxmox_endpoint         = "https://100.105.0.115:8006/"
+ proxmox_api_token_id     = "terraform-prov@pve!k8s-cluster"
+ proxmox_api_token_secret = "your-proxmox-token-secret"

-ssh_public_key  = "~/.ssh/hetzner_k8s.pub"
-ssh_private_key = "~/.ssh/hetzner_k8s"
+ ssh_public_key  = "~/.ssh/infra.pub"
+ ssh_private_key = "~/.ssh/infra"

 s3_access_key = "your-backblaze-key-id"
 s3_secret_key = "your-backblaze-application-key"
@@ -84,12 +84,7 @@ s3_bucket     = "k8s-terraform-state"
 tailscale_auth_key = "tskey-auth-..."
 tailscale_tailnet  = "yourtailnet.ts.net"

-restrict_api_ssh_to_tailnet = true
-tailnet_cidr                = "100.64.0.0/10"
-enable_nodeport_public      = false
-
-allowed_ssh_ips = []
-allowed_api_ips = []
+ kube_api_vip = "10.27.27.40"
 ```

 ### 3. Initialize Terraform
@@ -152,7 +147,9 @@ Set these in your Gitea repository settings (**Settings** → **Secrets** → **

 | Secret | Description |
 |--------|-------------|
-| `HCLOUD_TOKEN` | Hetzner Cloud API token |
+| `PROXMOX_ENDPOINT` | Proxmox API endpoint (for example `https://100.105.0.115:8006/`) |
+| `PROXMOX_API_TOKEN_ID` | Proxmox API token ID |
+| `PROXMOX_API_TOKEN_SECRET` | Proxmox API token secret |
 | `S3_ACCESS_KEY` | Backblaze B2 keyID |
 | `S3_SECRET_KEY` | Backblaze B2 applicationKey |
 | `S3_ENDPOINT` | Backblaze S3 endpoint (e.g., `https://s3.eu-central-003.backblazeb2.com`) |
@@ -163,7 +160,6 @@ Set these in your Gitea repository settings (**Settings** → **Secrets** → **
 | `TAILSCALE_OAUTH_CLIENT_SECRET` | Tailscale OAuth client secret for Kubernetes Operator |
 | `DOPPLER_HETZNERTERRA_SERVICE_TOKEN` | Doppler service token for `hetznerterra` runtime secrets |
 | `GRAFANA_ADMIN_PASSWORD` | Optional admin password for Grafana (auto-generated if unset) |
-| `RUNNER_ALLOWED_CIDRS` | Optional CIDR list for CI runner access if you choose to pass it via tfvars/secrets |
 | `SSH_PUBLIC_KEY` | SSH public key content |
 | `SSH_PRIVATE_KEY` | SSH private key content |

@@ -176,8 +172,8 @@ This repo uses Flux for continuous reconciliation after Terraform + Ansible boot
 The current default target is the HA private baseline:

 - `3` control plane nodes
- `3` worker nodes
- private Hetzner network only
+- `5` worker nodes
+- private Proxmox network only
 - Tailscale for operator and service access
 - Flux-managed platform addons with `apps` suspended by default

@@ -207,8 +203,7 @@ Terraform/bootstrap secrets remain in Gitea Actions secrets and are not managed
 ### Reconciliation graph

 - `infrastructure` (top-level)
-  - `addon-ccm`
-  - `addon-csi` depends on `addon-ccm`
+  - `addon-nfs-storage`
  - `addon-tailscale-operator`
  - `addon-observability`
  - `addon-observability-content` depends on `addon-observability`
@@ -224,7 +219,7 @@ Terraform/bootstrap secrets remain in Gitea Actions secrets and are not managed
 ### Current addon status

 - Core infrastructure addons are Flux-managed from `infrastructure/addons/`.
- Active Flux addons for the current baseline: `addon-ccm`, `addon-csi`, `addon-cert-manager`, `addon-external-secrets`, `addon-tailscale-operator`, `addon-tailscale-proxyclass`, `addon-observability`, `addon-observability-content`, `addon-rancher`, `addon-rancher-config`, `addon-rancher-backup`, `addon-rancher-backup-config`.
+- Active Flux addons for the current baseline: `addon-nfs-storage`, `addon-cert-manager`, `addon-external-secrets`, `addon-tailscale-operator`, `addon-tailscale-proxyclass`, `addon-observability`, `addon-observability-content`, `addon-rancher`, `addon-rancher-config`, `addon-rancher-backup`, `addon-rancher-backup-config`.
 - `apps` remains suspended until workload rollout is explicitly enabled.
 - Ansible is limited to cluster bootstrap, prerequisite secret creation, pre-proxy Tailscale cleanup, and kubeconfig finalization.
 - Weave GitOps / Flux UI is no longer deployed; use Rancher or the `flux` CLI for Flux operations.
@@ -232,14 +227,14 @@ Terraform/bootstrap secrets remain in Gitea Actions secrets and are not managed
 ### Rancher access

 - Rancher is private-only and exposed through Tailscale at `https://rancher.silverside-gopher.ts.net/`.
- The public Hetzner load balancer path is not used for Rancher.
+- Rancher and the Kubernetes API stay private; kube-vip provides the API VIP on the LAN.
 - Rancher stores state in embedded etcd; no external database is used.

 ### Stable baseline acceptance

 A rebuild is considered successful only when all of the following pass without manual intervention:

- Terraform create succeeds for the default `3` control planes and `3` workers.
+- Terraform create succeeds for the default `3` control planes and `5` workers.
 - Ansible bootstrap succeeds end-to-end.
 - All nodes become `Ready`.
 - Flux core reconciliation is healthy.
@@ -323,9 +318,6 @@ It avoids full cluster provisioning and only applies Grafana content resources:
 ├── terraform/
 │   ├── main.tf
 │   ├── variables.tf
-│   ├── network.tf
-│   ├── firewall.tf
-│   ├── ssh.tf
 │   ├── servers.tf
 │   ├── outputs.tf
 │   └── backend.tf
@@ -353,17 +345,19 @@ It avoids full cluster provisioning and only applies Grafana content resources:

 ## Firewall Rules

+This repo no longer manages cloud firewalls. Access control is expected to be handled on your LAN infrastructure and through Tailscale.
+
+Important cluster-local ports still in use:
+
 | Port | Source | Purpose |
 |------|--------|---------|
-| 22 | Tailnet CIDR | SSH |
-| 6443 | Tailnet CIDR + internal | Kubernetes API |
-| 41641/udp | Any | Tailscale WireGuard |
-| 9345 | 10.0.0.0/16 | k3s Supervisor (HA join) |
-| 2379 | 10.0.0.0/16 | etcd Client |
-| 2380 | 10.0.0.0/16 | etcd Peer |
-| 8472 | 10.0.0.0/16 | Flannel VXLAN |
-| 10250 | 10.0.0.0/16 | Kubelet |
-| 30000-32767 | Optional | NodePorts (disabled by default) |
+| 22 | Admin hosts / CI | SSH |
+| 6443 | 10.27.27.0/24 + VIP | Kubernetes API |
+| 9345 | 10.27.27.0/24 | k3s Supervisor |
+| 2379 | 10.27.27.0/24 | etcd Client |
+| 2380 | 10.27.27.0/24 | etcd Peer |
+| 8472/udp | 10.27.27.0/24 | Flannel VXLAN |
+| 10250 | 10.27.27.0/24 | Kubelet |

 ## Operations

@@ -399,7 +393,7 @@ terraform destroy
 ### Check k3s Logs

 ```bash
-ssh root@<control-plane-ip> journalctl -u k3s -f
+ssh ubuntu@<control-plane-ip> sudo journalctl -u k3s -f
 ```

 ### Reset k3s
@@ -408,19 +402,10 @@ ssh root@<control-plane-ip> journalctl -u k3s -f
 ansible-playbook site.yml -t reset
 ```

-## Costs Breakdown
-
-| Resource | Quantity | Unit Price | Monthly |
-|----------|----------|------------|---------|
-| CX23 (Control Plane) | 3 | €2.99 | €8.97 |
-| CX33 (Workers) | 4 | €4.99 | €19.96 |
-| Backblaze B2 | ~1 GB | Free (first 10GB) | €0.00 |
-| **Total** | | | **€28.93/mo** |
-
 ## Security Notes

 - Control plane has HA (3 nodes, can survive 1 failure)
- Consider adding Hetzner load balancer for API server
+- Kubernetes API HA is provided by kube-vip on `10.27.27.40`
 - Rotate API tokens regularly
 - Use network policies in Kubernetes
 - Enable audit logging for production
@@ -1,6 +1,6 @@
 # Gitea Secrets Setup

-This document describes the secrets required for the HetznerTerra deployment workflow.
+This document describes the secrets required for the Proxmox-based deployment workflow.

 ## Required Secrets

@@ -9,10 +9,17 @@ Add these secrets in your Gitea repository settings:

 ### Infrastructure Secrets

-#### `HCLOUD_TOKEN`
- Hetzner Cloud API token
- Get from: https://console.hetzner.com/projects/{project-id}/security/api-tokens
- Permissions: Read & Write
+#### `PROXMOX_ENDPOINT`
+- Proxmox VE API endpoint
+- Example: `https://100.105.0.115:8006/`
+
+#### `PROXMOX_API_TOKEN_ID`
+- Proxmox API token ID
+- Example: `terraform-prov@pve!k8s-cluster`
+
+#### `PROXMOX_API_TOKEN_SECRET`
+- Proxmox API token secret
+- Create with `pveum user token add terraform-prov@pve k8s-cluster`

 #### `S3_ACCESS_KEY` & `S3_SECRET_KEY`
 - Backblaze B2 credentials for Terraform state storage
@@ -31,7 +38,7 @@ Add these secrets in your Gitea repository settings:

 #### `SSH_PRIVATE_KEY` & `SSH_PUBLIC_KEY`
 - SSH key pair for cluster access
- Generate with: `ssh-keygen -t ed25519 -C "k8s@hetzner" -f ~/.ssh/hetzner_k8s`
+- Generate with: `ssh-keygen -t ed25519 -C "k8s@proxmox" -f ~/.ssh/infra`
 - Private key content (include BEGIN/END lines)
 - Public key content (full line starting with ssh-ed25519)

@@ -90,4 +97,4 @@ Check the workflow logs to verify all secrets are being used correctly.
 - Prefer Doppler for runtime app/platform secrets after cluster bootstrap
 - Rotate Tailscale auth keys periodically
 - Review OAuth client permissions regularly
- The workflow automatically opens SSH/API access only for the runner's IP during deployment
+- CI expects direct SSH access to the Proxmox VMs and direct Proxmox API access
@@ -5,9 +5,9 @@ This document defines the current engineering target for this repository.
 ## Topology

 - 3 control planes (HA etcd cluster)
- 3 workers
- Hetzner Load Balancer for Kubernetes API
- private Hetzner network
+- 5 workers
+- kube-vip API VIP (`10.27.27.40`)
+- private Proxmox/LAN network (`10.27.27.0/24`)
 - Tailscale operator access and service exposure
 - Rancher exposed through Tailscale (`rancher.silverside-gopher.ts.net`)
 - Grafana exposed through Tailscale (`grafana.silverside-gopher.ts.net`)
@@ -17,11 +17,10 @@ This document defines the current engineering target for this repository.
 ## In Scope

 - Terraform infrastructure bootstrap
- Ansible k3s bootstrap with external cloud provider
+- Ansible k3s bootstrap on Ubuntu cloud-init VMs
 - **HA control plane (3 nodes with etcd quorum)**
- **Hetzner Load Balancer for Kubernetes API**
- **Hetzner CCM deployed via Ansible (before workers join)**
- **Hetzner CSI for persistent volumes (via Flux)**
+- **kube-vip for Kubernetes API HA**
+- **NFS-backed persistent volumes via `nfs-subdir-external-provisioner`**
 - Flux core reconciliation
 - External Secrets Operator with Doppler
 - Tailscale private access and smoke-check validation
@@ -45,15 +44,14 @@ This document defines the current engineering target for this repository.

 ## Phase Gates

-1. Terraform apply completes for HA topology (3 CP, 3 workers, 1 LB).
-2. Load Balancer is healthy with all 3 control plane targets.
-3. Primary control plane bootstraps with `--cluster-init`.
-4. Secondary control planes join via Load Balancer endpoint.
-5. **CCM deployed via Ansible before workers join** (fixes uninitialized taint issue).
-6. Workers join successfully via Load Balancer and all nodes show proper `providerID`.
+1. Terraform apply completes for HA topology (3 CP, 5 workers, 1 VIP).
+2. Primary control plane bootstraps with `--cluster-init`.
+3. kube-vip advertises `10.27.27.40:6443` from the control-plane set.
+4. Secondary control planes join via the kube-vip endpoint.
+5. Workers join successfully via the kube-vip endpoint.
 7. etcd reports 3 healthy members.
 8. Flux source and infrastructure reconciliation are healthy.
-9. **CSI deploys and creates `hcloud-volumes` StorageClass**.
+9. **NFS provisioner deploys and creates `flash-nfs` StorageClass**.
 10. **PVC provisioning tested and working**.
 11. External Secrets sync required secrets.
 12. Tailscale private access works for Rancher, Grafana, and Prometheus.
@@ -13,7 +13,7 @@ control_plane
 workers

 [cluster:vars]
-ansible_user=root
+ansible_user=ubuntu
 ansible_python_interpreter=/usr/bin/python3
 ansible_ssh_private_key_file={{ private_key_file }}
 k3s_version=latest
@@ -1,14 +1,4 @@
 ---
- name: Apply Hetzner cloud secret
-  shell: >-
-    kubectl -n kube-system create secret generic hcloud
-    --from-literal=token='{{ hcloud_token }}'
-    --from-literal=network='{{ cluster_name }}-network'
-    --dry-run=client -o yaml | kubectl apply -f -
-  changed_when: true
-  no_log: true
-  when: hcloud_token | default('') | length > 0
-
 - name: Ensure Tailscale operator namespace exists
  command: >-
    kubectl create namespace {{ tailscale_operator_namespace | default('tailscale-system') }}
@@ -1,82 +0,0 @@
---
- name: Check if hcloud secret exists
-  command: kubectl -n kube-system get secret hcloud
-  register: hcloud_secret_check
-  changed_when: false
-  failed_when: false
-
- name: Fail if hcloud secret is missing
-  fail:
-    msg: "hcloud secret not found in kube-system namespace. CCM requires it."
-  when: hcloud_secret_check.rc != 0
-
- name: Check if helm is installed
-  command: which helm
-  register: helm_check
-  changed_when: false
-  failed_when: false
-
- name: Install helm
-  when: helm_check.rc != 0
-  block:
-    - name: Download helm install script
-      get_url:
-        url: https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3
-        dest: /tmp/get-helm-3.sh
-        mode: "0755"
-
-    - name: Run helm install script
-      command: /tmp/get-helm-3.sh
-      args:
-        creates: /usr/local/bin/helm
-
- name: Add Hetzner Helm repository
-  kubernetes.core.helm_repository:
-    name: hcloud
-    repo_url: https://charts.hetzner.cloud
-    kubeconfig: /etc/rancher/k3s/k3s.yaml
-  environment:
-    KUBECONFIG: /etc/rancher/k3s/k3s.yaml
-
- name: Deploy Hetzner Cloud Controller Manager
-  kubernetes.core.helm:
-    name: hcloud-cloud-controller-manager
-    chart_ref: hcloud/hcloud-cloud-controller-manager
-    release_namespace: kube-system
-    create_namespace: true
-    values:
-      networking:
-        enabled: true
-      nodeSelector:
-        kubernetes.io/hostname: "{{ inventory_hostname }}"
-      additionalTolerations:
-        - key: node-role.kubernetes.io/control-plane
-          operator: Exists
-          effect: NoSchedule
-    kubeconfig: /etc/rancher/k3s/k3s.yaml
-    wait: true
-    wait_timeout: 300s
-  environment:
-    KUBECONFIG: /etc/rancher/k3s/k3s.yaml
-
- name: Wait for CCM to be ready
-  command: kubectl -n kube-system rollout status deployment/hcloud-cloud-controller-manager --timeout=120s
-  changed_when: false
-  register: ccm_rollout
-  until: ccm_rollout.rc == 0
-  retries: 3
-  delay: 10
-
- name: Pause to ensure CCM is fully ready to process new nodes
-  pause:
-    seconds: 10
-
- name: Verify CCM is removing uninitialized taints
-  command: kubectl get nodes -o jsonpath='{.items[*].spec.taints[?(@.key=="node.cloudprovider.kubernetes.io/uninitialized")].key}'
-  register: uninitialized_taints
-  changed_when: false
-  failed_when: false
-
- name: Display taint status
-  debug:
-    msg: "Nodes with uninitialized taint: {{ uninitialized_taints.stdout }}"
@@ -19,6 +19,7 @@
      - lsb-release
      - software-properties-common
      - jq
+      - nfs-common
      - htop
      - vim
    state: present
@@ -3,4 +3,5 @@ k3s_version: latest
 k3s_server_url: ""
 k3s_token: ""
 k3s_node_ip: ""
-k3s_kubelet_cloud_provider_external: true
+k3s_kubelet_cloud_provider_external: false
+k3s_flannel_iface: ens18
@@ -22,7 +22,7 @@
      command: >-
        /tmp/install-k3s.sh agent
        --node-ip {{ k3s_node_ip }}
-        --flannel-iface=enp7s0
+        --flannel-iface={{ k3s_flannel_iface }}
        {% if k3s_kubelet_cloud_provider_external | bool %}--kubelet-arg=cloud-provider=external{% endif %}
      args:
        creates: /usr/local/bin/k3s-agent
@@ -3,9 +3,10 @@ k3s_version: latest
 k3s_token: ""
 k3s_node_ip: ""
 k3s_primary_public_ip: ""
-k3s_disable_embedded_ccm: true
+k3s_disable_embedded_ccm: false
 k3s_disable_servicelb: true
-k3s_kubelet_cloud_provider_external: true
+k3s_kubelet_cloud_provider_external: false
+k3s_flannel_iface: ens18
 # Load Balancer endpoint for HA cluster joins (set in inventory)
 kube_api_endpoint: ""
 # Tailscale DNS names for control planes (to enable tailnet access)
@@ -61,7 +61,7 @@
    --cluster-init
    --advertise-address={{ k3s_primary_ip }}
    --node-ip={{ k3s_node_ip }}
-    --flannel-iface=enp7s0
+    --flannel-iface={{ k3s_flannel_iface }}
    --tls-san={{ k3s_primary_ip }}
    --tls-san={{ k3s_primary_public_ip }}
    --tls-san={{ kube_api_endpoint }}
@@ -87,7 +87,7 @@
        --server https://{{ k3s_join_endpoint | default(k3s_primary_ip) }}:6443
        --advertise-address={{ k3s_node_ip }}
        --node-ip={{ k3s_node_ip }}
-        --flannel-iface=enp7s0
+        --flannel-iface={{ k3s_flannel_iface }}
        {% if k3s_disable_embedded_ccm | bool %}--disable-cloud-controller{% endif %}
        {% if k3s_disable_servicelb | bool %}--disable=servicelb{% endif %}
        {% if k3s_kubelet_cloud_provider_external | bool %}--kubelet-arg=cloud-provider=external{% endif %}
@@ -0,0 +1,4 @@
+---
+kube_vip_version: v1.1.2
+kube_vip_interface: ens18
+kube_vip_address: "{{ kube_api_endpoint }}"
@@ -0,0 +1,21 @@
+---
+- name: Render kube-vip control plane manifest
+  template:
+    src: kube-vip-control-plane.yaml.j2
+    dest: /tmp/kube-vip-control-plane.yaml
+    mode: "0644"
+
+- name: Apply kube-vip control plane manifest
+  command: kubectl apply -f /tmp/kube-vip-control-plane.yaml
+  changed_when: true
+
+- name: Wait for kube-vip DaemonSet rollout
+  command: kubectl -n kube-system rollout status daemonset/kube-vip --timeout=180s
+  changed_when: false
+
+- name: Wait for API VIP on 6443
+  wait_for:
+    host: "{{ kube_vip_address }}"
+    port: 6443
+    state: started
+    timeout: 180
@@ -0,0 +1,110 @@
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: kube-vip
+  namespace: kube-system
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  name: system:kube-vip-role
+rules:
+  - apiGroups: [""]
+    resources: ["services/status"]
+    verbs: ["update"]
+  - apiGroups: [""]
+    resources: ["services", "endpoints"]
+    verbs: ["list", "get", "watch", "update"]
+  - apiGroups: [""]
+    resources: ["nodes"]
+    verbs: ["list", "get", "watch", "update", "patch"]
+  - apiGroups: ["coordination.k8s.io"]
+    resources: ["leases"]
+    verbs: ["list", "get", "watch", "update", "create"]
+  - apiGroups: ["discovery.k8s.io"]
+    resources: ["endpointslices"]
+    verbs: ["list", "get", "watch", "update"]
+  - apiGroups: [""]
+    resources: ["pods"]
+    verbs: ["list"]
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+  name: system:kube-vip-binding
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: ClusterRole
+  name: system:kube-vip-role
+subjects:
+  - kind: ServiceAccount
+    name: kube-vip
+    namespace: kube-system
+---
+apiVersion: apps/v1
+kind: DaemonSet
+metadata:
+  name: kube-vip
+  namespace: kube-system
+spec:
+  selector:
+    matchLabels:
+      app.kubernetes.io/name: kube-vip
+  template:
+    metadata:
+      labels:
+        app.kubernetes.io/name: kube-vip
+    spec:
+      serviceAccountName: kube-vip
+      hostNetwork: true
+      affinity:
+        nodeAffinity:
+          requiredDuringSchedulingIgnoredDuringExecution:
+            nodeSelectorTerms:
+              - matchExpressions:
+                  - key: node-role.kubernetes.io/control-plane
+                    operator: Exists
+      tolerations:
+        - key: node-role.kubernetes.io/control-plane
+          operator: Exists
+          effect: NoSchedule
+        - key: node-role.kubernetes.io/master
+          operator: Exists
+          effect: NoSchedule
+      containers:
+        - name: kube-vip
+          image: ghcr.io/kube-vip/kube-vip:{{ kube_vip_version }}
+          imagePullPolicy: IfNotPresent
+          args:
+            - manager
+          env:
+            - name: vip_arp
+              value: "true"
+            - name: port
+              value: "6443"
+            - name: vip_interface
+              value: {{ kube_vip_interface | quote }}
+            - name: vip_subnet
+              value: "32"
+            - name: cp_enable
+              value: "true"
+            - name: cp_namespace
+              value: kube-system
+            - name: vip_ddns
+              value: "false"
+            - name: vip_leaderelection
+              value: "true"
+            - name: vip_leaseduration
+              value: "5"
+            - name: vip_renewdeadline
+              value: "3"
+            - name: vip_retryperiod
+              value: "1"
+            - name: address
+              value: {{ kube_vip_address | quote }}
+          securityContext:
+            capabilities:
+              add:
+                - NET_ADMIN
+                - NET_RAW
+                - SYS_TIME
@@ -57,12 +57,12 @@
  roles:
    - addon-secrets-bootstrap

- name: Deploy Hetzner CCM (required for workers with external cloud provider)
+- name: Deploy kube-vip for API HA
  hosts: control_plane[0]
  become: true

  roles:
-    - ccm-deploy
+    - kube-vip-deploy

 - name: Setup secondary control planes
  hosts: control_plane[1:]
@@ -1,36 +0,0 @@
-apiVersion: helm.toolkit.fluxcd.io/v2
-kind: HelmRelease
-metadata:
-  name: hcloud-cloud-controller-manager
-  namespace: flux-system
-spec:
-  interval: 10m
-  targetNamespace: kube-system
-  chart:
-    spec:
-      chart: hcloud-cloud-controller-manager
-      version: 1.30.1
-      sourceRef:
-        kind: HelmRepository
-        name: hcloud
-        namespace: flux-system
-  install:
-    createNamespace: true
-    remediation:
-      retries: 3
-  upgrade:
-    remediation:
-      retries: 3
-  values:
-    selectorLabels:
-      app: hcloud-cloud-controller-manager
-    args:
-      secure-port: "0"
-    networking:
-      enabled: true
-    nodeSelector:
-      kubernetes.io/hostname: k8s-cluster-cp-1
-    additionalTolerations:
-      - key: node-role.kubernetes.io/control-plane
-        operator: Exists
-        effect: NoSchedule
@@ -1,8 +0,0 @@
-apiVersion: source.toolkit.fluxcd.io/v1
-kind: HelmRepository
-metadata:
-  name: hcloud
-  namespace: flux-system
-spec:
-  interval: 1h
-  url: https://charts.hetzner.cloud
@@ -1,5 +0,0 @@
-apiVersion: kustomize.config.k8s.io/v1beta1
-kind: Kustomization
-resources:
-  - helmrepository-hcloud.yaml
-  - helmrelease-hcloud-ccm.yaml
@@ -1,36 +0,0 @@
-apiVersion: helm.toolkit.fluxcd.io/v2
-kind: HelmRelease
-metadata:
-  name: hcloud-csi
-  namespace: flux-system
-spec:
-  interval: 10m
-  targetNamespace: kube-system
-  chart:
-    spec:
-      chart: hcloud-csi
-      version: 2.20.0
-      sourceRef:
-        kind: HelmRepository
-        name: hcloud
-        namespace: flux-system
-  install:
-    createNamespace: true
-    remediation:
-      retries: 3
-  upgrade:
-    remediation:
-      retries: 3
-  values:
-    controller:
-      nodeSelector:
-        kubernetes.io/hostname: k8s-cluster-cp-1
-      tolerations:
-        - key: node-role.kubernetes.io/control-plane
-          operator: Exists
-          effect: NoSchedule
-      hcloudVolumeDefaultLocation: nbg1
-    storageClasses:
-      - name: hcloud-volumes
-        defaultStorageClass: true
-        reclaimPolicy: Delete
@@ -1,5 +0,0 @@
-apiVersion: kustomize.config.k8s.io/v1beta1
-kind: Kustomization
-resources:
-  - helmrepository-hcloud.yaml
-  - helmrelease-hcloud-csi.yaml
@@ -1,17 +0,0 @@
-apiVersion: kustomize.toolkit.fluxcd.io/v1
-kind: Kustomization
-metadata:
-  name: addon-csi
-  namespace: flux-system
-spec:
-  interval: 10m
-  prune: true
-  sourceRef:
-    kind: GitRepository
-    name: platform
-  path: ./infrastructure/addons/csi
-  dependsOn:
-    - name: addon-ccm
-  wait: true
-  timeout: 10m
-  suspend: false
@@ -1,7 +1,7 @@
 apiVersion: kustomize.toolkit.fluxcd.io/v1
 kind: Kustomization
 metadata:
-  name: addon-ccm
+  name: addon-nfs-storage
  namespace: flux-system
 spec:
  interval: 10m
@@ -9,7 +9,7 @@ spec:
  sourceRef:
    kind: GitRepository
    name: platform
-  path: ./infrastructure/addons/ccm
+  path: ./infrastructure/addons/nfs-storage
  wait: true
  timeout: 10m
  suspend: false
@@ -1,8 +1,7 @@
 apiVersion: kustomize.config.k8s.io/v1beta1
 kind: Kustomization
 resources:
-  - kustomization-ccm.yaml
-  - kustomization-csi.yaml
+  - kustomization-nfs-storage.yaml
  - kustomization-external-secrets.yaml
  - kustomization-cert-manager.yaml
  - kustomization-tailscale-operator.yaml
@@ -0,0 +1,36 @@
+apiVersion: helm.toolkit.fluxcd.io/v2
+kind: HelmRelease
+metadata:
+  name: nfs-subdir-external-provisioner
+  namespace: flux-system
+spec:
+  interval: 10m
+  targetNamespace: kube-system
+  chart:
+    spec:
+      chart: nfs-subdir-external-provisioner
+      version: 4.0.18
+      sourceRef:
+        kind: HelmRepository
+        name: nfs-subdir-external-provisioner
+        namespace: flux-system
+  install:
+    createNamespace: true
+    remediation:
+      retries: 3
+  upgrade:
+    remediation:
+      retries: 3
+  values:
+    nfs:
+      server: 10.27.27.22
+      path: /TheFlash/k8s-nfs
+    storageClass:
+      create: true
+      defaultClass: true
+      name: flash-nfs
+      provisionerName: flash-nfs
+      reclaimPolicy: Delete
+      archiveOnDelete: true
+      allowVolumeExpansion: true
+      volumeBindingMode: Immediate
@@ -1,8 +1,8 @@
 apiVersion: source.toolkit.fluxcd.io/v1
 kind: HelmRepository
 metadata:
-  name: hcloud
+  name: nfs-subdir-external-provisioner
  namespace: flux-system
 spec:
  interval: 1h
-  url: https://charts.hetzner.cloud
+  url: https://kubernetes-sigs.github.io/nfs-subdir-external-provisioner
@@ -0,0 +1,5 @@
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+resources:
+  - helmrepository-nfs-subdir-external-provisioner.yaml
+  - helmrelease-nfs-subdir-external-provisioner.yaml
@@ -24,10 +24,11 @@ echo "Fetching kubeconfig from $CP1_PUBLIC_IP ..."
 ssh -i "$SSH_KEY" \
    -o StrictHostKeyChecking=no \
    -o UserKnownHostsFile=/dev/null \
-    "root@$CP1_PUBLIC_IP" "cat /etc/rancher/k3s/k3s.yaml" \
+    "ubuntu@$CP1_PUBLIC_IP" "sudo cat /etc/rancher/k3s/k3s.yaml" \
    | sed "s/127.0.0.1/$CP1_PUBLIC_IP/g" \
    > "$KUBECONFIG_PATH"

+
 chmod 600 "$KUBECONFIG_PATH"
 echo "Kubeconfig saved to $KUBECONFIG_PATH"
 echo "Run: export KUBECONFIG=$KUBECONFIG_PATH"
@@ -1,29 +1,33 @@
-hcloud_token = "your-hetzner-cloud-api-token-here"
+proxmox_endpoint         = "https://100.105.0.115:8006/"
+proxmox_api_token_id     = "terraform-prov@pve!k8s-cluster"
+proxmox_api_token_secret = "your-proxmox-api-token-secret"

-ssh_public_key  = "~/.ssh/hetzner_k8s.pub"
-ssh_private_key = "~/.ssh/hetzner_k8s"
+ssh_public_key  = "~/.ssh/infra.pub"
+ssh_private_key = "~/.ssh/infra"

 s3_access_key = "your-backblaze-key-id"
 s3_secret_key = "your-backblaze-application-key"
 s3_endpoint   = "https://s3.eu-central-003.backblazeb2.com"
 s3_bucket     = "k8s-terraform-state"

-cluster_name = "k8s-prod"
+cluster_name = "k8s-cluster"

 tailscale_tailnet  = "yourtailnet.ts.net"

-restrict_api_ssh_to_tailnet = true
-tailnet_cidr                = "100.64.0.0/10"
-enable_nodeport_public      = false
+kube_api_vip = "10.27.27.40"

 control_plane_count = 3
-control_plane_type  = "cx23"
+control_plane_ips   = ["10.27.27.30", "10.27.27.31", "10.27.27.32"]
+control_plane_vm_ids = [200, 201, 202]

-worker_count = 4
-worker_type  = "cx33"
+worker_count   = 5
+worker_ips     = ["10.27.27.41", "10.27.27.42", "10.27.27.43", "10.27.27.44", "10.27.27.45"]
+worker_vm_ids  = [210, 211, 212, 213, 214]

-location = "nbg1"
-
-allowed_ssh_ips = []
-
-allowed_api_ips = []
+proxmox_node_name            = "flex"
+proxmox_template_vm_id       = 9000
+proxmox_vm_storage_pool      = "Flash"
+proxmox_cloud_init_storage_pool = "Flash"
+proxmox_bridge               = "vmbr0"
+proxmox_gateway              = "10.27.27.1"
+proxmox_dns_servers          = ["1.1.1.1", "8.8.8.8"]
@@ -1,118 +0,0 @@
-locals {
-  ssh_source_ips = var.restrict_api_ssh_to_tailnet ? concat([var.tailnet_cidr], var.allowed_ssh_ips) : var.allowed_ssh_ips
-  api_source_ips = var.restrict_api_ssh_to_tailnet ? concat([var.tailnet_cidr], var.allowed_api_ips) : var.allowed_api_ips
-}
-
-resource "hcloud_firewall" "cluster" {
-  name = "${var.cluster_name}-firewall"
-
-  rule {
-    description = "SSH"
-    direction   = "in"
-    protocol    = "tcp"
-    port        = "22"
-    source_ips  = local.ssh_source_ips
-  }
-
-  rule {
-    description = "Kubernetes API"
-    direction   = "in"
-    protocol    = "tcp"
-    port        = "6443"
-    source_ips  = local.api_source_ips
-  }
-
-  rule {
-    description = "Tailscale WireGuard"
-    direction   = "in"
-    protocol    = "udp"
-    port        = "41641"
-    source_ips  = ["0.0.0.0/0"]
-  }
-
-  rule {
-    description = "Kubernetes API (internal)"
-    direction   = "in"
-    protocol    = "tcp"
-    port        = "6443"
-    source_ips  = [var.subnet_cidr]
-  }
-
-  rule {
-    description = "k3s Supervisor"
-    direction   = "in"
-    protocol    = "tcp"
-    port        = "9345"
-    source_ips  = [var.subnet_cidr]
-  }
-
-  rule {
-    description = "etcd Client"
-    direction   = "in"
-    protocol    = "tcp"
-    port        = "2379"
-    source_ips  = [var.subnet_cidr]
-  }
-
-  rule {
-    description = "etcd Peer"
-    direction   = "in"
-    protocol    = "tcp"
-    port        = "2380"
-    source_ips  = [var.subnet_cidr]
-  }
-
-  rule {
-    description = "Flannel VXLAN"
-    direction   = "in"
-    protocol    = "udp"
-    port        = "8472"
-    source_ips  = [var.subnet_cidr]
-  }
-
-  rule {
-    description = "Kubelet"
-    direction   = "in"
-    protocol    = "tcp"
-    port        = "10250"
-    source_ips  = [var.subnet_cidr]
-  }
-
-  dynamic "rule" {
-    for_each = var.enable_nodeport_public ? [1] : []
-    content {
-      description = "NodePorts"
-      direction   = "in"
-      protocol    = "tcp"
-      port        = "30000-32767"
-      source_ips  = ["0.0.0.0/0"]
-    }
-  }
-
-  rule {
-    description = "HTTP from Load Balancer"
-    direction   = "in"
-    protocol    = "tcp"
-    port        = "80"
-    source_ips  = ["0.0.0.0/0"]
-  }
-
-  rule {
-    description = "HTTPS from Load Balancer"
-    direction   = "in"
-    protocol    = "tcp"
-    port        = "443"
-    source_ips  = ["0.0.0.0/0"]
-  }
-
-  rule {
-    description = "ICMP"
-    direction   = "in"
-    protocol    = "icmp"
-    source_ips  = ["0.0.0.0/0"]
-  }
-
-  apply_to {
-    label_selector = "cluster=${var.cluster_name}"
-  }
-}
@@ -1,50 +0,0 @@
-# Load Balancer for Kubernetes API High Availability
-# Provides a single endpoint for all control planes
-
-resource "hcloud_load_balancer" "kube_api" {
-  name               = "${var.cluster_name}-api"
-  load_balancer_type = "lb11" # Cheapest tier: €5.39/month
-  location           = var.location
-
-  labels = {
-    cluster = var.cluster_name
-    role    = "kube-api"
-  }
-}
-
-# Attach Load Balancer to private network (required for use_private_ip)
-resource "hcloud_load_balancer_network" "kube_api" {
-  load_balancer_id = hcloud_load_balancer.kube_api.id
-  network_id       = hcloud_network.cluster.id
-  ip               = cidrhost(var.subnet_cidr, 5) # 10.0.1.5
-}
-
-# Attach all control plane servers as targets
-resource "hcloud_load_balancer_target" "kube_api_targets" {
-  count            = var.control_plane_count
-  type             = "server"
-  load_balancer_id = hcloud_load_balancer.kube_api.id
-  server_id        = hcloud_server.control_plane[count.index].id
-  use_private_ip   = true
-
-  depends_on = [hcloud_load_balancer_network.kube_api, hcloud_server.control_plane]
-}
-
-# Kubernetes API service on port 6443
-resource "hcloud_load_balancer_service" "kube_api" {
-  load_balancer_id = hcloud_load_balancer.kube_api.id
-  protocol         = "tcp"
-  listen_port      = 6443
-  destination_port = 6443
-
-  health_check {
-    protocol = "tcp"
-    port     = 6443
-    interval = 15
-    timeout  = 10
-    retries  = 3
-  }
-}
-
-# Firewall rule to allow LB access to control planes on 6443
-# This is added to the existing cluster firewall
@@ -2,13 +2,20 @@ terraform {
  required_version = ">= 1.0"

  required_providers {
-    hcloud = {
-      source  = "hetznercloud/hcloud"
-      version = "~> 1.45"
+    local = {
+      source  = "hashicorp/local"
+      version = "~> 2.5"
+    }
+
+    proxmox = {
+      source  = "bpg/proxmox"
+      version = ">= 0.60.0"
    }
  }
 }

-provider "hcloud" {
-  token = var.hcloud_token
+provider "proxmox" {
+  endpoint  = var.proxmox_endpoint
+  api_token = "${var.proxmox_api_token_id}=${var.proxmox_api_token_secret}"
+  insecure  = var.proxmox_insecure
 }
@@ -1,11 +0,0 @@
-resource "hcloud_network" "cluster" {
-  name     = "${var.cluster_name}-network"
-  ip_range = var.network_cidr
-}
-
-resource "hcloud_network_subnet" "servers" {
-  network_id   = hcloud_network.cluster.id
-  type         = "cloud"
-  network_zone = "eu-central"
-  ip_range     = var.subnet_cidr
-}
@@ -1,42 +1,36 @@
 output "control_plane_ips" {
  description = "Public IPs of control plane nodes"
-  value       = [for cp in hcloud_server.control_plane : cp.ipv4_address]
+  value       = var.control_plane_ips
 }

 output "control_plane_names" {
  description = "Control plane hostnames"
-  value       = [for cp in hcloud_server.control_plane : cp.name]
+  value       = [for idx in range(var.control_plane_count) : format("%s-cp-%d", var.cluster_name, idx + 1)]
 }

 output "control_plane_private_ips" {
  description = "Private IPs of control plane nodes"
-  value = [
-    for idx, cp in hcloud_server.control_plane :
-    try(one(cp.network).ip, cidrhost(var.subnet_cidr, 10 + idx))
-  ]
+  value       = var.control_plane_ips
 }

 output "primary_control_plane_ip" {
  description = "Public IP of the primary control plane (first node)"
-  value       = hcloud_server.control_plane[0].ipv4_address
+  value       = var.control_plane_ips[0]
 }

 output "worker_ips" {
  description = "Public IPs of worker nodes"
-  value       = [for worker in hcloud_server.workers : worker.ipv4_address]
+  value       = var.worker_ips
 }

 output "worker_names" {
  description = "Worker hostnames"
-  value       = [for worker in hcloud_server.workers : worker.name]
+  value       = [for idx in range(var.worker_count) : format("%s-worker-%d", var.cluster_name, idx + 1)]
 }

 output "worker_private_ips" {
  description = "Private IPs of worker nodes"
-  value = [
-    for idx, worker in hcloud_server.workers :
-    try(one(worker.network).ip, cidrhost(var.subnet_cidr, 20 + idx))
-  ]
+  value       = var.worker_ips
 }

 output "ssh_private_key_path" {
@@ -61,10 +55,10 @@ output "network_cidr" {

 output "kubeconfig_command" {
  description = "Command to fetch kubeconfig"
-  value       = "ssh root@${hcloud_server.control_plane[0].ipv4_address} 'cat /etc/rancher/k3s/k3s.yaml' > kubeconfig && sed -i 's/127.0.0.1/${hcloud_server.control_plane[0].ipv4_address}/g' kubeconfig"
+  value       = "ssh ubuntu@${var.control_plane_ips[0]} 'sudo cat /etc/rancher/k3s/k3s.yaml' > kubeconfig && sed -i 's/127.0.0.1/${var.control_plane_ips[0]}/g' kubeconfig"
 }

 output "kube_api_lb_ip" {
  description = "Load Balancer private IP for Kubernetes API (used for cluster joins)"
-  value       = hcloud_load_balancer_network.kube_api.ip
+  value       = var.kube_api_vip
 }
@@ -1,60 +1,121 @@
-data "hcloud_image" "ubuntu" {
-  name        = "ubuntu-24.04"
-  with_status = ["available"]
+data "local_file" "ssh_public_key" {
+  filename = pathexpand(var.ssh_public_key)
 }

-resource "hcloud_server" "control_plane" {
-  count = var.control_plane_count
+locals {
+  subnet_prefix = split("/", var.subnet_cidr)[1]

-  name        = "${var.cluster_name}-cp-${count.index + 1}"
-  server_type = var.control_plane_type
-  image       = data.hcloud_image.ubuntu.id
-  location    = var.location
-  ssh_keys    = [data.hcloud_ssh_key.cluster.id]
-
-  labels = {
-    cluster = var.cluster_name
+  control_planes = {
+    for idx in range(var.control_plane_count) :
+    format("%s-cp-%d", var.cluster_name, idx + 1) => {
      role      = "control-plane"
+      vm_id     = var.control_plane_vm_ids[idx]
+      ip        = var.control_plane_ips[idx]
+      cpu       = var.control_plane_cores
+      memory_mb = var.control_plane_memory_mb
+      disk_gb   = var.control_plane_disk_gb
+      startup   = 1
+    }
  }

-  network {
-    network_id = hcloud_network.cluster.id
-    ip         = cidrhost(var.subnet_cidr, 10 + count.index)
-  }
-
-  public_net {
-    ipv4_enabled = true
-    ipv6_enabled = true
-  }
-
-  firewall_ids = [hcloud_firewall.cluster.id]
-}
-
-resource "hcloud_server" "workers" {
-  count = var.worker_count
-
-  name        = "${var.cluster_name}-worker-${count.index + 1}"
-  server_type = var.worker_type
-  image       = data.hcloud_image.ubuntu.id
-  location    = var.location
-  ssh_keys    = [data.hcloud_ssh_key.cluster.id]
-
-  labels = {
-    cluster = var.cluster_name
+  workers = {
+    for idx in range(var.worker_count) :
+    format("%s-worker-%d", var.cluster_name, idx + 1) => {
      role      = "worker"
+      vm_id     = var.worker_vm_ids[idx]
+      ip        = var.worker_ips[idx]
+      cpu       = var.worker_cores
+      memory_mb = var.worker_memory_mb
+      disk_gb   = var.worker_disk_gb
+      startup   = 2
+    }
  }

-  network {
-    network_id = hcloud_network.cluster.id
-    ip         = cidrhost(var.subnet_cidr, 20 + count.index)
-  }
-
-  public_net {
-    ipv4_enabled = true
-    ipv6_enabled = true
-  }
-
-  firewall_ids = [hcloud_firewall.cluster.id]
-
-  depends_on = [hcloud_server.control_plane]
+  nodes = merge(local.control_planes, local.workers)
+}
+
+resource "proxmox_virtual_environment_vm" "nodes" {
+  for_each = local.nodes
+
+  name        = each.key
+  description = "Managed by Terraform for ${var.cluster_name}"
+  tags        = ["terraform", var.cluster_name, each.value.role]
+  node_name   = var.proxmox_node_name
+  vm_id       = each.value.vm_id
+
+  on_boot             = true
+  started             = true
+  stop_on_destroy     = true
+  reboot_after_update = true
+  timeout_clone       = 1800
+  timeout_create      = 1800
+  timeout_shutdown_vm = 300
+  timeout_start_vm    = 300
+  scsi_hardware       = "virtio-scsi-single"
+
+  clone {
+    vm_id        = var.proxmox_template_vm_id
+    datastore_id = var.proxmox_vm_storage_pool
+    full         = var.proxmox_clone_full
+    retries      = 3
+  }
+
+  agent {
+    enabled = true
+    trim    = true
+  }
+
+  cpu {
+    cores = each.value.cpu
+    type  = "x86-64-v2-AES"
+  }
+
+  memory {
+    dedicated = each.value.memory_mb
+    floating  = each.value.memory_mb
+  }
+
+  startup {
+    order      = tostring(each.value.startup)
+    up_delay   = "20"
+    down_delay = "20"
+  }
+
+  disk {
+    datastore_id = var.proxmox_vm_storage_pool
+    interface    = "scsi0"
+    size         = each.value.disk_gb
+    discard      = "on"
+    iothread     = true
+    ssd          = true
+  }
+
+  initialization {
+    datastore_id = var.proxmox_cloud_init_storage_pool
+
+    dns {
+      servers = var.proxmox_dns_servers
+    }
+
+    ip_config {
+      ipv4 {
+        address = "${each.value.ip}/${local.subnet_prefix}"
+        gateway = var.proxmox_gateway
+      }
+    }
+
+    user_account {
+      username = var.proxmox_ssh_username
+      keys     = [trimspace(data.local_file.ssh_public_key.content)]
+    }
+  }
+
+  network_device {
+    bridge = var.proxmox_bridge
+    model  = "virtio"
+  }
+
+  operating_system {
+    type = "l26"
+  }
 }
@@ -1,7 +0,0 @@
-data "local_file" "ssh_public_key" {
-  filename = pathexpand(var.ssh_public_key)
-}
-
-data "hcloud_ssh_key" "cluster" {
-  name = "infra"
-}
@@ -1,19 +1,13 @@
-variable "hcloud_token" {
-  description = "Hetzner Cloud API token"
-  type        = string
-  sensitive   = true
-}
-
 variable "ssh_public_key" {
  description = "Path to SSH public key"
  type        = string
-  default     = "~/.ssh/id_ed25519.pub"
+  default     = "~/.ssh/infra.pub"
 }

 variable "ssh_private_key" {
  description = "Path to SSH private key"
  type        = string
-  default     = "~/.ssh/id_ed25519"
+  default     = "~/.ssh/infra"
 }

 variable "cluster_name" {
@@ -28,28 +22,112 @@ variable "control_plane_count" {
  default     = 3
 }

-variable "control_plane_type" {
-  description = "Hetzner server type for control plane"
-  type        = string
-  default     = "cx23"
+variable "control_plane_cores" {
+  description = "vCPU count for control plane VMs"
+  type        = number
+  default     = 2
+}
+
+variable "control_plane_memory_mb" {
+  description = "Dedicated memory for control plane VMs in MiB"
+  type        = number
+  default     = 4096
+}
+
+variable "control_plane_disk_gb" {
+  description = "Disk size for control plane VMs in GiB"
+  type        = number
+  default     = 32
 }

 variable "worker_count" {
  description = "Number of worker nodes"
  type        = number
-  default     = 3
+  default     = 5
 }

-variable "worker_type" {
-  description = "Hetzner server type for workers"
-  type        = string
-  default     = "cx33"
+variable "worker_cores" {
+  description = "vCPU count for worker VMs"
+  type        = number
+  default     = 4
 }

-variable "location" {
-  description = "Hetzner datacenter location"
+variable "worker_memory_mb" {
+  description = "Dedicated memory for worker VMs in MiB"
+  type        = number
+  default     = 8192
+}
+
+variable "worker_disk_gb" {
+  description = "Disk size for worker VMs in GiB"
+  type        = number
+  default     = 64
+}
+
+variable "proxmox_endpoint" {
+  description = "Proxmox API endpoint without /api2/json suffix"
  type        = string
-  default     = "nbg1"
+  default     = "https://100.105.0.115:8006/"
+}
+
+variable "proxmox_api_token_id" {
+  description = "Proxmox API token ID"
+  type        = string
+  sensitive   = true
+}
+
+variable "proxmox_api_token_secret" {
+  description = "Proxmox API token secret"
+  type        = string
+  sensitive   = true
+}
+
+variable "proxmox_insecure" {
+  description = "Skip TLS verification for the Proxmox API"
+  type        = bool
+  default     = true
+}
+
+variable "proxmox_node_name" {
+  description = "Fixed Proxmox node name for all cluster VMs"
+  type        = string
+  default     = "flex"
+}
+
+variable "proxmox_template_vm_id" {
+  description = "Template VM ID used for linked clones"
+  type        = number
+  default     = 9000
+}
+
+variable "proxmox_clone_full" {
+  description = "Whether to use full clones instead of linked clones"
+  type        = bool
+  default     = false
+}
+
+variable "proxmox_vm_storage_pool" {
+  description = "Proxmox datastore for VM disks"
+  type        = string
+  default     = "Flash"
+}
+
+variable "proxmox_cloud_init_storage_pool" {
+  description = "Proxmox datastore for cloud-init disks"
+  type        = string
+  default     = "Flash"
+}
+
+variable "proxmox_bridge" {
+  description = "Proxmox bridge for cluster VM interfaces"
+  type        = string
+  default     = "vmbr0"
+}
+
+variable "proxmox_ssh_username" {
+  description = "Cloud-init user injected into cloned VMs"
+  type        = string
+  default     = "ubuntu"
 }

 variable "allowed_ssh_ips" {
@@ -90,13 +168,55 @@ variable "enable_nodeport_public" {
 variable "network_cidr" {
  description = "CIDR for private network"
  type        = string
-  default     = "10.0.0.0/16"
+  default     = "10.27.27.0/24"
 }

 variable "subnet_cidr" {
  description = "CIDR for server subnet"
  type        = string
-  default     = "10.0.1.0/24"
+  default     = "10.27.27.0/24"
+}
+
+variable "proxmox_gateway" {
+  description = "Gateway for cluster VM networking"
+  type        = string
+  default     = "10.27.27.1"
+}
+
+variable "proxmox_dns_servers" {
+  description = "DNS servers configured through cloud-init"
+  type        = list(string)
+  default     = ["1.1.1.1", "8.8.8.8"]
+}
+
+variable "control_plane_ips" {
+  description = "Static IPv4 addresses for control plane VMs"
+  type        = list(string)
+  default     = ["10.27.27.30", "10.27.27.31", "10.27.27.32"]
+}
+
+variable "worker_ips" {
+  description = "Static IPv4 addresses for worker VMs"
+  type        = list(string)
+  default     = ["10.27.27.41", "10.27.27.42", "10.27.27.43", "10.27.27.44", "10.27.27.45"]
+}
+
+variable "control_plane_vm_ids" {
+  description = "Fixed VMIDs for control plane VMs"
+  type        = list(number)
+  default     = [200, 201, 202]
+}
+
+variable "worker_vm_ids" {
+  description = "Fixed VMIDs for worker VMs"
+  type        = list(number)
+  default     = [210, 211, 212, 213, 214]
+}
+
+variable "kube_api_vip" {
+  description = "Virtual IP advertised by kube-vip for the Kubernetes API"
+  type        = string
+  default     = "10.27.27.40"
 }

 variable "s3_access_key" {