test

fix: recover tailscale operator during smoke checks
fix: restart tailscale operator before health scan
2026-05-04 06:46:00 +00:00 · 2026-05-04 06:20:26 +00:00 · 2026-05-04 05:43:38 +00:00 · 2026-05-04 04:51:40 +00:00 · 2026-05-04 04:31:00 +00:00 · 2026-05-04 04:26:19 +00:00
604 changed files with 171334 additions and 1692 deletions
@@ -7,22 +7,28 @@ on:
    paths:
      - "ansible/dashboards.yml"
      - "ansible/roles/observability-content/**"
-      - ".gitea/workflows/dashboards.yml"
  workflow_dispatch:

+concurrency:
+  group: prod-cluster
+  cancel-in-progress: false
+
 env:
-  TF_VERSION: "1.7.0"
-  TF_VAR_hcloud_token: ${{ secrets.HCLOUD_TOKEN }}
+  TF_VERSION: "1.14.9"
  TF_VAR_s3_access_key: ${{ secrets.S3_ACCESS_KEY }}
  TF_VAR_s3_secret_key: ${{ secrets.S3_SECRET_KEY }}
  TF_VAR_s3_endpoint: ${{ secrets.S3_ENDPOINT }}
  TF_VAR_s3_bucket: ${{ secrets.S3_BUCKET }}
  TF_VAR_tailscale_tailnet: ${{ secrets.TAILSCALE_TAILNET }}
+  TF_VAR_proxmox_endpoint: ${{ secrets.PROXMOX_ENDPOINT }}
+  TF_VAR_proxmox_api_token_id: ${{ secrets.PROXMOX_API_TOKEN_ID }}
+  TF_VAR_proxmox_api_token_secret: ${{ secrets.PROXMOX_API_TOKEN_SECRET }}
+  TF_VAR_proxmox_insecure: "true"

 jobs:
  dashboards:
    name: Grafana Content
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-22.04
    steps:
      - name: Checkout
        uses: actions/checkout@v4
@@ -31,6 +37,7 @@ jobs:
        uses: hashicorp/setup-terraform@v3
        with:
          terraform_version: ${{ env.TF_VERSION }}
+          terraform_wrapper: false

      - name: Setup SSH Keys
        run: |
@@ -44,6 +51,7 @@ jobs:
        working-directory: terraform
        run: |
          terraform init \
+            -lockfile=readonly \
            -backend-config="endpoint=${{ secrets.S3_ENDPOINT }}" \
            -backend-config="bucket=${{ secrets.S3_BUCKET }}" \
            -backend-config="region=auto" \
@@ -51,29 +59,10 @@ jobs:
            -backend-config="secret_key=${{ secrets.S3_SECRET_KEY }}" \
            -backend-config="skip_requesting_account_id=true"

-      - name: Detect runner egress IP
-        run: |
-          RUNNER_IP=$(curl -fsSL https://api.ipify.org)
-          echo "RUNNER_CIDR=[\"${RUNNER_IP}/32\"]" >> "$GITHUB_ENV"
-          echo "Runner egress IP: ${RUNNER_IP}"
-
-      - name: Open SSH/API for current runner CIDR
-        working-directory: terraform
-        run: |
-          terraform apply \
-            -refresh=false \
-            -target=hcloud_firewall.cluster \
-            -var="hcloud_token=${{ secrets.HCLOUD_TOKEN }}" \
-            -var="ssh_public_key=$HOME/.ssh/id_ed25519.pub" \
-            -var="ssh_private_key=$HOME/.ssh/id_ed25519" \
-            -var="allowed_ssh_ips=${RUNNER_CIDR}" \
-            -var="allowed_api_ips=${RUNNER_CIDR}" \
-            -auto-approve
-
      - name: Install Python Dependencies
        run: |
          apt-get update && apt-get install -y python3-pip
-          pip3 install --break-system-packages ansible kubernetes jinja2 pyyaml
+          pip3 install ansible==8.7.0 kubernetes==26.1.0 jinja2==3.1.5 pyyaml==6.0.2

      - name: Install Ansible Collections
        run: ansible-galaxy collection install -r ansible/requirements.yml
@@ -8,109 +8,28 @@ on:
        required: true
        default: ''

+concurrency:
+  group: prod-cluster
+  cancel-in-progress: false
+
 env:
-  TF_VERSION: "1.7.0"
-  TF_VAR_hcloud_token: ${{ secrets.HCLOUD_TOKEN }}
+  TF_VERSION: "1.14.9"
  TF_VAR_s3_access_key: ${{ secrets.S3_ACCESS_KEY }}
  TF_VAR_s3_secret_key: ${{ secrets.S3_SECRET_KEY }}
  TF_VAR_s3_endpoint: ${{ secrets.S3_ENDPOINT }}
  TF_VAR_s3_bucket: ${{ secrets.S3_BUCKET }}
  TF_VAR_tailscale_tailnet: ${{ secrets.TAILSCALE_TAILNET }}
-  B2_ACCOUNT_ID: ${{ secrets.B2_ACCOUNT_ID }}
-  B2_APPLICATION_KEY: ${{ secrets.B2_APPLICATION_KEY }}
+  TF_VAR_proxmox_endpoint: ${{ secrets.PROXMOX_ENDPOINT }}
+  TF_VAR_proxmox_api_token_id: ${{ secrets.PROXMOX_API_TOKEN_ID }}
+  TF_VAR_proxmox_api_token_secret: ${{ secrets.PROXMOX_API_TOKEN_SECRET }}
+  TF_VAR_proxmox_insecure: "true"

 jobs:
-  pre-destroy-backup:
-    name: Pre-Destroy Backup
-    runs-on: ubuntu-latest
-    if: github.event.inputs.confirm == 'destroy'
-    environment: destroy
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-
-      - name: Setup Terraform
-        uses: hashicorp/setup-terraform@v3
-        with:
-          terraform_version: ${{ env.TF_VERSION }}
-
-      - name: Terraform Init
-        working-directory: terraform
-        run: |
-          terraform init \
-            -backend-config="endpoint=${{ secrets.S3_ENDPOINT }}" \
-            -backend-config="bucket=${{ secrets.S3_BUCKET }}" \
-            -backend-config="region=auto" \
-            -backend-config="access_key=${{ secrets.S3_ACCESS_KEY }}" \
-            -backend-config="secret_key=${{ secrets.S3_SECRET_KEY }}" \
-            -backend-config="skip_requesting_account_id=true"
-
-      - name: Setup SSH Keys
-        run: |
-          mkdir -p ~/.ssh
-          echo "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/id_ed25519
-          chmod 600 ~/.ssh/id_ed25519
-          echo "${{ secrets.SSH_PUBLIC_KEY }}" > ~/.ssh/id_ed25519.pub
-          chmod 644 ~/.ssh/id_ed25519.pub
-
-      - name: Get Control Plane IP
-        id: cp_ip
-        working-directory: terraform
-        run: |
-          PRIMARY_IP=$(terraform output -raw primary_control_plane_ip)
-          echo "PRIMARY_IP=${PRIMARY_IP}" >> "$GITHUB_ENV"
-
-      - name: Pre-Destroy pg_dump to B2
-        run: |
-          set +e
-          echo "Attempting pre-destroy backup to B2..."
-          ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null root@${PRIMARY_IP} << 'EOF'
-            set -e
-            # Check if kubectl is available and cluster is up
-            if ! command -v kubectl &> /dev/null; then
-              echo "kubectl not found, skipping pre-destroy backup"
-              exit 0
-            fi
-            
-            # Check if we can reach the cluster
-            if ! kubectl cluster-info &> /dev/null; then
-              echo "Cannot reach cluster, skipping pre-destroy backup"
-              exit 0
-            fi
-            
-            # Check if CNP is deployed
-            if ! kubectl get namespace cnpg-cluster &> /dev/null; then
-              echo "CNP namespace not found, skipping pre-destroy backup"
-              exit 0
-            fi
-            
-            # Run backup using the pgdump image directly
-            BACKUP_FILE="rancher-backup-$(date +%Y%m%d-%H%M%S).sql.gz"
-            B2_ACCOUNT_ID="$(cat /etc/kubernetes/secret/b2_account_id 2>/dev/null || echo '')"
-            B2_APPLICATION_KEY="$(cat /etc/kubernetes/secret/b2_application_key 2>/dev/null || echo '')"
-            
-            if [ -z "$B2_ACCOUNT_ID" ] || [ -z "$B2_APPLICATION_KEY" ]; then
-              echo "B2 credentials not found in secret, skipping pre-destroy backup"
-              exit 0
-            fi
-            
-            kubectl run pgdump-manual --image=ghcr.io/cloudnative-pg/pgbackrest:latest --restart=Never \
-              -n cnpg-cluster --dry-run=client -o yaml | \
-              kubectl apply -f -
-            
-            echo "Waiting for backup job to complete..."
-            kubectl wait --for=condition=complete job/pgdump-manual -n cnpg-cluster --timeout=300s || true
-            kubectl logs job/pgdump-manual -n cnpg-cluster || true
-            kubectl delete job pgdump-manual -n cnpg-cluster --ignore-not-found=true || true
-          EOF
-          echo "Pre-destroy backup step completed (failure is non-fatal)"
-
  destroy:
    name: Destroy Cluster
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-22.04
    if: github.event.inputs.confirm == 'destroy'
    environment: destroy
-    needs: pre-destroy-backup
    steps:
      - name: Checkout
        uses: actions/checkout@v4
@@ -119,17 +38,7 @@ jobs:
        uses: hashicorp/setup-terraform@v3
        with:
          terraform_version: ${{ env.TF_VERSION }}
-
-      - name: Terraform Init
-        working-directory: terraform
-        run: |
-          terraform init \
-            -backend-config="endpoint=${{ secrets.S3_ENDPOINT }}" \
-            -backend-config="bucket=${{ secrets.S3_BUCKET }}" \
-            -backend-config="region=auto" \
-            -backend-config="access_key=${{ secrets.S3_ACCESS_KEY }}" \
-            -backend-config="secret_key=${{ secrets.S3_SECRET_KEY }}" \
-            -backend-config="skip_requesting_account_id=true"
+          terraform_wrapper: false

      - name: Setup SSH Keys
        run: |
@@ -139,10 +48,30 @@ jobs:
          echo "${{ secrets.SSH_PUBLIC_KEY }}" > ~/.ssh/id_ed25519.pub
          chmod 644 ~/.ssh/id_ed25519.pub

-      - name: Install jq
+      - name: Terraform Init
+        working-directory: terraform
        run: |
-          apt-get update
-          apt-get install -y jq
+          terraform init \
+            -lockfile=readonly \
+            -backend-config="endpoint=${{ secrets.S3_ENDPOINT }}" \
+            -backend-config="bucket=${{ secrets.S3_BUCKET }}" \
+            -backend-config="region=auto" \
+            -backend-config="access_key=${{ secrets.S3_ACCESS_KEY }}" \
+            -backend-config="secret_key=${{ secrets.S3_SECRET_KEY }}" \
+            -backend-config="skip_requesting_account_id=true"
+
+      - name: Save Proxmox target list
+        run: |
+          mkdir -p outputs
+          if ! terraform -chdir=terraform output -json proxmox_target_vms > outputs/proxmox_target_vms.json; then
+            terraform -chdir=terraform plan \
+              -refresh=false \
+              -var="ssh_public_key=$HOME/.ssh/id_ed25519.pub" \
+              -var="ssh_private_key=$HOME/.ssh/id_ed25519" \
+              -out=cleanup.tfplan \
+              -no-color || true
+            printf '[]' > outputs/proxmox_target_vms.json
+          fi

      - name: Terraform Destroy
        id: destroy
@@ -152,7 +81,7 @@ jobs:
          for attempt in 1 2 3; do
            echo "Terraform destroy attempt ${attempt}/3"
            terraform destroy \
-              -var="hcloud_token=${{ secrets.HCLOUD_TOKEN }}" \
+              -parallelism=2 \
              -var="ssh_public_key=$HOME/.ssh/id_ed25519.pub" \
              -var="ssh_private_key=$HOME/.ssh/id_ed25519" \
              -auto-approve
@@ -164,32 +93,21 @@ jobs:
              echo "Terraform destroy failed with exit code ${rc}; retrying in 30s"
              sleep 30
              terraform refresh \
-                -var="hcloud_token=${{ secrets.HCLOUD_TOKEN }}" \
                -var="ssh_public_key=$HOME/.ssh/id_ed25519.pub" \
                -var="ssh_private_key=$HOME/.ssh/id_ed25519" || true
            fi
          done
          exit "$rc"

-      - name: Hetzner destroy diagnostics
-        if: failure() && steps.destroy.outcome == 'failure'
-        env:
-          HCLOUD_TOKEN: ${{ secrets.HCLOUD_TOKEN }}
+      - name: Verify Proxmox target VMs removed
+        if: success()
        run: |
-          set +e
-          echo "== Terraform state list =="
-          terraform -chdir=terraform state list || true
-
-          network_id=$(terraform -chdir=terraform state show hcloud_network.cluster 2>/dev/null | awk '/^id *=/ {gsub(/"/, "", $3); print $3; exit}')
-          if [ -z "$network_id" ]; then
-            network_id="11988935"
+          python3 scripts/proxmox-rebuild-cleanup.py --mode post-destroy --targets-file outputs/proxmox_target_vms.json
+          if [ -f terraform/cleanup.tfplan ]; then
+            python3 scripts/proxmox-rebuild-cleanup.py --mode post-destroy --terraform-dir terraform --plan cleanup.tfplan
          fi

-          echo "== Hetzner network =="
-          curl -fsSL -H "Authorization: Bearer ${HCLOUD_TOKEN}" "https://api.hetzner.cloud/v1/networks/${network_id}" | jq . || true
-
-          echo "== Hetzner servers attached to network =="
-          curl -fsSL -H "Authorization: Bearer ${HCLOUD_TOKEN}" "https://api.hetzner.cloud/v1/servers" | jq --argjson id "$network_id" '.servers[] | select(any(.private_net[]?; .network == $id)) | {id, name, private_net}' || true
-
-          echo "== Hetzner load balancers attached to network =="
-          curl -fsSL -H "Authorization: Bearer ${HCLOUD_TOKEN}" "https://api.hetzner.cloud/v1/load_balancers" | jq --argjson id "$network_id" '.load_balancers[] | select(any(.private_net[]?; .network == $id)) | {id, name, private_net}' || true
+      - name: Terraform state diagnostics
+        if: failure() && steps.destroy.outcome == 'failure'
+        run: |
+          terraform -chdir=terraform state list || true
@@ -3,7 +3,6 @@
 *.tfstate.*
 *.tfstate.backup
 .terraform/
-.terraform.lock.hcl
 terraform.tfvars
 crash.log
 override.tf
@@ -1,48 +1,56 @@
 # AGENTS.md

-Repository guide for OpenCode sessions in this repo.
+Compact repo guidance for OpenCode sessions. Trust executable sources over docs when they conflict.

 ## Read First

- Trust manifests and workflows over prose when they conflict.
- Highest-value sources: `terraform/main.tf`, `terraform/variables.tf`, `ansible/site.yml`, `clusters/prod/flux-system/`, `infrastructure/addons/kustomization.yaml`, `.gitea/workflows/deploy.yml`, `.gitea/workflows/destroy.yml`, `README.md`, `STABLE_BASELINE.md`, `scripts/refresh-kubeconfig.sh`, `scripts/smoke-check-tailnet-services.sh`.
+- Highest-value sources: `.gitea/workflows/deploy.yml`, `.gitea/workflows/destroy.yml`, `terraform/main.tf`, `terraform/variables.tf`, `terraform/servers.tf`, `ansible/site.yml`, `ansible/inventory.tmpl`, `clusters/prod/flux-system/`, `infrastructure/addons/kustomization.yaml`.

-## Current Baseline
+## Baseline

- HA private cluster: 3 control planes, 3 workers.
- Tailscale is the private access path for Rancher and shared services.
- Rancher, Grafana, and Prometheus are exposed through Tailscale; Flux UI / Weave GitOps is removed.
- `apps/` is suspended by default.
- Rancher stores state in embedded etcd; backup/restore uses `rancher-backup` to B2.
+- Proxmox HA K3s cluster: 3 control planes, 5 workers, VMIDs `200-202` and `210-214`, node `flex`, template VMID `9000`, datastore `Flash`.
+- API HA is kube-vip at `10.27.27.40`; control planes are `10.27.27.30-32`, workers are `10.27.27.41-45`.
+- SSH user is `ubuntu`; Ansible derives the flannel iface from `ansible_default_ipv4.interface` with `eth0` fallback, so do not hard-code `ens18`.
+- Storage is raw-manifest `nfs-subdir-external-provisioner` using `10.27.27.239:/TheFlash/k8s-nfs` and default StorageClass `flash-nfs`.
+- Tailscale is the private access path. Rancher, Grafana, and Prometheus are exposed only through Tailscale services.
+- `apps` is intentionally suspended in `clusters/prod/flux-system/kustomization-apps.yaml`.

-## Common Commands
+## Commands

- Terraform: `terraform -chdir=terraform fmt -recursive`, `terraform -chdir=terraform validate`, `terraform -chdir=terraform plan -var-file=../terraform.tfvars`, `terraform -chdir=terraform apply -var-file=../terraform.tfvars`
- Ansible: `ansible-galaxy collection install -r ansible/requirements.yml`, `cd ansible && python3 generate_inventory.py`, `ansible-playbook -i ansible/inventory.ini ansible/site.yml --syntax-check`, `ansible-playbook ansible/site.yml`
- Flux/Kustomize: `kubectl kustomize infrastructure/addons/<addon>`, `kubectl kustomize clusters/prod/flux-system`
- Kubeconfig refresh: `scripts/refresh-kubeconfig.sh <cp1-public-ip>`
- Tailnet smoke check: `ssh root@<cp1-ip> 'bash -s' < scripts/smoke-check-tailnet-services.sh`
+- Terraform: `terraform -chdir=terraform fmt -recursive`, `terraform -chdir=terraform validate`, `terraform -chdir=terraform plan -var-file=../terraform.tfvars`, `terraform -chdir=terraform apply -var-file=../terraform.tfvars`.
+- Ansible setup: `ansible-galaxy collection install -r ansible/requirements.yml`, then from `ansible/` run `python3 generate_inventory.py` and `ansible-playbook site.yml --syntax-check`.
+- Flux/Kustomize checks: `kubectl kustomize infrastructure/addons/<addon>`, `kubectl kustomize infrastructure/addons`, `kubectl kustomize clusters/prod/flux-system`.
+- Kubeconfig refresh: `scripts/refresh-kubeconfig.sh <cp1-ip>`; use this if local `kubectl` falls back to `localhost:8080` after rebuilds.
+- Tailnet smoke check from cp1: `ssh ubuntu@<cp1-ip> 'bash -s' < scripts/smoke-check-tailnet-services.sh`.
+- Fast Grafana content iteration uses `.gitea/workflows/dashboards.yml` and `ansible/dashboards.yml`, not a full cluster rebuild.

-## Workflow Rules
+## Deploy Flow

- Keep diffs small and validate only the directory you edited.
- Update manifests and docs together when behavior changes.
- Use `set -euo pipefail` in workflow shell blocks.
- CI deploy order is Terraform -> Ansible -> Flux bootstrap -> Rancher restore -> health checks.
- One object per Kubernetes YAML file; keep filenames kebab-case.
- If `kubectl` points at `localhost:8080` after a rebuild, refresh kubeconfig from the primary control-plane IP.
+- Pushes to `main` run Gitea CI: Terraform fmt/init/validate/plan/apply, Proxmox cleanup/retry, Ansible bootstrap, Flux bootstrap, addon gates, Rancher gate, observability image seeding, health checks, tailnet smoke checks.
+- Deploy and destroy workflows share `concurrency.group: prod-cluster`; destroy only requires workflow input `confirm: destroy` and has no backup gate.
+- Keep `set -euo pipefail` in workflow shell blocks.
+- Terraform retry cleanup has hard-coded target VMIDs/names in `.gitea/workflows/deploy.yml`; update it when changing node counts, names, or VMIDs.
+- Fresh VMs pull bootstrap images directly through containerd/K3s. Do not add runner-side `skopeo` archive/import paths; registry/network failures should surface directly in deploy logs.
+- CI applies `clusters/prod/flux-system/gotk-components.yaml` directly and then patches Flux controller deployments inline; changes only in `gotk-controller-cp1-patches.yaml` do not affect CI bootstrap.

-## Repo-Specific Gotchas
+## GitOps Addons

- `rancher-backup` uses a postRenderer to swap the broken hook image to `rancher/kubectl:v1.34.0`; do not put S3 config in HelmRelease values. Put it in the Backup CR.
- Tailscale cleanup only runs before service proxies exist; it removes stale offline `rancher`/`grafana`/`prometheus`/`flux` devices, then must stop so live proxies are not deleted.
- Keep the Tailscale operator on the stable Helm repo `https://pkgs.tailscale.com/helmcharts` at `1.96.5` unless you have a reason to change it.
- Current private URLs:
-  - Rancher: `https://rancher.silverside-gopher.ts.net/`
-  - Grafana: `http://grafana.silverside-gopher.ts.net/`
-  - Prometheus: `http://prometheus.silverside-gopher.ts.net:9090/`
+- Vendored charts are intentional: `infrastructure/charts/{cert-manager,traefik,kube-prometheus-stack,tailscale-operator,rancher}`. Do not restore remote `HelmRepository` objects unless cluster-side chart fetch reliability is intentionally changed.
+- External Secrets and Loki/Promtail use Flux `OCIRepository`; Rancher, Tailscale, cert-manager, Traefik, and kube-prometheus-stack use `GitRepository` chart paths.
+- Use fully qualified `helmchart.source.toolkit.fluxcd.io/...` in scripts; K3s also has `helmcharts.helm.cattle.io`, so `helmchart/...` can target the wrong resource.
+- `doppler-bootstrap` only creates the `external-secrets` namespace and Doppler token secret. The deploy workflow creates `ClusterSecretStore/doppler-hetznerterra` after ESO CRDs and webhook endpoints exist.
+- The checked-in `infrastructure/addons/external-secrets/clustersecretstore-doppler-hetznerterra.yaml` is not included by that addon kustomization; do not assume Flux applies it.
+- Keep Kubernetes manifests one object per file with kebab-case filenames.
+
+## Gotchas
+
+- Rancher chart `2.13.3` requires Kubernetes `<1.35.0-0`; K3s `latest` can break Rancher. Role defaults pin `v1.34.6+k3s1`; do not reintroduce a generated-inventory `k3s_version=latest` override.
+- The repo no longer uses a cloud controller manager. `providerID`, Hetzner CCM/CSI, or Hetzner firewall/load-balancer logic is stale.
+- Tailscale cleanup must only remove stale offline reserved hostnames before live service proxies exist; do not delete active `rancher`, `grafana`, `prometheus`, or `flux` devices.
+- Proxmox endpoint should be the base URL, for example `https://100.105.0.115:8006/`; provider/workflow code strips `/api2/json` when needed.
+- Current private URLs: Rancher `https://rancher.silverside-gopher.ts.net/`, Grafana `http://grafana.silverside-gopher.ts.net/`, Prometheus `http://prometheus.silverside-gopher.ts.net:9090/`.

 ## Secrets

- Runtime secrets live in Doppler + External Secrets.
- Bootstrap and CI secrets stay in Gitea; never commit secrets, kubeconfigs, or private keys.
+- Runtime secrets are Doppler + External Secrets; Terraform/bootstrap/CI secrets stay in Gitea Actions secrets.
+- Never commit secrets, kubeconfigs, private keys, `terraform.tfvars`, or generated `outputs/` artifacts.
@@ -0,0 +1,287 @@
+# App Repo Deployment Guide
+
+This guide explains the recommended way to deploy an application to this cluster.
+
+## Recommended Model
+
+Use two repos:
+
+- `HetznerTerra` (this repo): cluster, addons, shared infrastructure, Flux wiring
+- `your-app-repo`: application source, Dockerfile, CI, Kubernetes manifests or Helm chart
+
+Why:
+
+- cluster lifecycle stays separate from app code
+- app CI can build and tag images independently
+- this repo remains the source of truth for what the cluster is allowed to deploy
+
+## Current Cluster Assumptions
+
+- Flux is already installed and reconciles this repo from `main`
+- `clusters/prod/flux-system/kustomization-apps.yaml` points at `./apps`
+- `apps` is suspended by default
+- private access is through Tailscale
+- runtime secrets should come from Doppler via External Secrets
+
+## Deployment Options
+
+### Option A: Separate app repo
+
+Recommended for most real applications.
+
+Flow:
+
+1. App repo builds and pushes an image.
+2. This repo defines a `GitRepository` pointing at the app repo.
+3. This repo defines a `Kustomization` pointing at a path in the app repo.
+4. Flux pulls the app repo and applies the manifests.
+
+### Option B: In-repo app manifests
+
+Only use this when the application is tiny or tightly coupled to the platform.
+
+Flow:
+
+1. Put Kubernetes manifests directly under `apps/` in this repo.
+2. Unsuspend the top-level `apps` Kustomization.
+
+This is simpler, but mixes platform and app changes together.
+
+## App Repo Structure
+
+Suggested layout:
+
+```text
+your-app-repo/
+├── src/
+├── Dockerfile
+├── .gitea/workflows/
+└── deploy/
+    ├── base/
+    │   ├── namespace.yaml
+    │   ├── deployment.yaml
+    │   ├── service.yaml
+    │   ├── externalsecret.yaml
+    │   └── kustomization.yaml
+    └── prod/
+        ├── kustomization.yaml
+        └── patch-*.yaml
+```
+
+If you prefer Helm, replace `deploy/base` and `deploy/prod` with a chart path and point Flux at that instead.
+
+## What the App Repo Should Own
+
+- application source code
+- image build pipeline
+- image tag strategy
+- Deployment / Service / Ingress or Tailscale-facing Service manifests
+- app-specific `ExternalSecret` manifests
+- app-specific namespace
+
+## What This Repo Should Own
+
+- cluster-level permission to deploy the app
+- the `GitRepository` and top-level `Kustomization` that attach the app repo to the cluster
+- whether the `apps` layer is suspended or active
+
+## Recommended First App Integration
+
+In this repo, add Flux objects under `apps/` that point to the app repo.
+
+Example files to add:
+
+- `apps/gitrepository-my-app.yaml`
+- `apps/kustomization-my-app.yaml`
+- update `apps/kustomization.yaml`
+
+Example `apps/gitrepository-my-app.yaml`:
+
+```yaml
+apiVersion: source.toolkit.fluxcd.io/v1
+kind: GitRepository
+metadata:
+  name: my-app
+  namespace: flux-system
+spec:
+  interval: 1m
+  ref:
+    branch: main
+  secretRef:
+    name: flux-system
+  url: ssh://git@<your-git-host>:<port>/<org>/<your-app-repo>.git
+```
+
+Example `apps/kustomization-my-app.yaml`:
+
+```yaml
+apiVersion: kustomize.toolkit.fluxcd.io/v1
+kind: Kustomization
+metadata:
+  name: my-app
+  namespace: flux-system
+spec:
+  interval: 10m
+  prune: true
+  sourceRef:
+    kind: GitRepository
+    name: my-app
+  path: ./deploy/prod
+  wait: true
+  timeout: 5m
+  dependsOn:
+    - name: infrastructure
+```
+
+Then update `apps/kustomization.yaml`:
+
+```yaml
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+resources:
+  - gitrepository-my-app.yaml
+  - kustomization-my-app.yaml
+```
+
+## App Secrets
+
+Recommended path:
+
+1. Put runtime values in Doppler.
+2. In the app manifests, create an `ExternalSecret` that reads from `doppler-hetznerterra`.
+3. Reference the resulting Kubernetes Secret from the Deployment.
+
+Example app-side `ExternalSecret`:
+
+```yaml
+apiVersion: external-secrets.io/v1
+kind: ExternalSecret
+metadata:
+  name: my-app-env
+  namespace: my-app
+spec:
+  refreshInterval: 1h
+  secretStoreRef:
+    name: doppler-hetznerterra
+    kind: ClusterSecretStore
+  target:
+    name: my-app-env
+    creationPolicy: Owner
+  data:
+    - secretKey: DATABASE_URL
+      remoteRef:
+        key: MY_APP_DATABASE_URL
+```
+
+## Image Delivery
+
+Recommended flow:
+
+1. App repo CI builds a container image.
+2. CI pushes it to a registry.
+3. The app repo updates the Kubernetes image tag in `deploy/prod`.
+4. Flux notices the Git change and deploys it.
+
+Keep the first version simple. Do not add image automation until the basic deploy path is proven.
+
+## Exposing the App
+
+Pick one:
+
+### Private app over Tailscale
+
+Best fit for this cluster right now.
+
+Create a Service like the existing Rancher/Grafana/Prometheus pattern:
+
+```yaml
+apiVersion: v1
+kind: Service
+metadata:
+  name: my-app-tailscale
+  namespace: my-app
+  annotations:
+    tailscale.com/hostname: my-app
+    tailscale.com/tags: "tag:prod"
+    tailscale.com/proxy-class: infra-stable
+spec:
+  type: LoadBalancer
+  loadBalancerClass: tailscale
+  selector:
+    app.kubernetes.io/name: my-app
+  ports:
+    - name: http
+      port: 80
+      protocol: TCP
+      targetPort: 3000
+```
+
+Use `http://my-app.<your-tailnet>` or your chosen hostname.
+
+### Cluster-internal only
+
+Create only a `ClusterIP` Service.
+
+### Public ingress
+
+Not recommended as the first app path in this repo. Get the private path working first.
+
+## Enabling the Apps Layer
+
+The cluster-wide `apps` Kustomization is suspended by default.
+
+When you are ready to let Flux deploy app attachments from `apps/`, unsuspend it:
+
+```bash
+kubectl -n flux-system patch kustomization apps --type=merge -p '{"spec":{"suspend":false}}'
+```
+
+Or commit a change to `clusters/prod/flux-system/kustomization-apps.yaml` changing:
+
+```yaml
+suspend: true
+```
+
+to:
+
+```yaml
+suspend: false
+```
+
+## First Deploy Checklist
+
+Before deploying the first app, make sure:
+
+1. app image builds successfully
+2. app repo contains valid `deploy/prod` manifests
+3. this repo contains the `GitRepository` + `Kustomization` attachment objects
+4. required Doppler secrets exist
+5. `apps` is unsuspended if you are using the top-level `apps` layer
+
+## Verification Commands
+
+From a machine with cluster access:
+
+```bash
+kubectl -n flux-system get gitrepositories,kustomizations
+kubectl get ns
+kubectl -n my-app get deploy,svc,pods,externalsecret,secret
+```
+
+If private over Tailscale:
+
+```bash
+kubectl -n my-app get svc my-app-tailscale -o wide
+```
+
+## Minimal Recommendation
+
+If you want the simplest, lowest-risk first deploy:
+
+1. create a separate app repo
+2. add `deploy/base` + `deploy/prod`
+3. add a `GitRepository` + `Kustomization` in this repo under `apps/`
+4. keep the app private with a Tailscale `LoadBalancer` Service
+5. use Doppler + `ExternalSecret` for runtime config
+
+That matches the current cluster design with the least surprise.
@@ -0,0 +1,120 @@
+# Network Stabilization Plan
+
+## Goal
+
+Make destroy/rebuild deploys reliable without hiding real network failures behind runner-side image archives or one-off manual intervention.
+
+## Current Symptoms
+
+- Registry pulls intermittently fail from cluster nodes with TLS handshake timeouts.
+- Failures have appeared across GHCR, Docker Hub, Quay, registry.k8s.io, and redirected blob hosts.
+- Doppler API calls from External Secrets intermittently timeout.
+- Flux OCIRepository objects can show transient upstream failures even when cached artifacts are sufficient for successful Helm releases.
+- Lowering node MTU to 1400 improved kube-vip and some image pulls but did not eliminate the issue.
+
+## Working Hypothesis
+
+The remaining instability is likely egress path behavior from the VM subnet, especially PMTUD/MSS/NAT/firewall handling. The same timeout pattern appears across unrelated upstream services, which points away from a single registry, chart, or Kubernetes component.
+
+## Phase 1: Prove The Network Root Cause
+
+Run repeatable probes from the Proxmox host, cp1, and one worker.
+
+- Test registry and API endpoints with repeated `curl` timing checks.
+- Test known flaky pulls with repeated `crictl pull` attempts.
+- Test Doppler API reachability from a node.
+- Compare Proxmox host egress against VM egress.
+- Check path MTU behavior with tools such as `tracepath` where available.
+- Record node MTU, default route, DNS resolver, and selected remote IPs during tests.
+
+Target endpoints:
+
+- `https://ghcr.io/v2/`
+- `https://auth.docker.io/token`
+- `https://registry-1.docker.io/v2/`
+- `https://quay.io/v2/`
+- `https://registry.k8s.io/v2/`
+- `https://api.doppler.com/v3/projects`
+
+Known useful test images:
+
+- `ghcr.io/fluxcd/helm-controller:v1.5.1`
+- `oci.external-secrets.io/external-secrets/external-secrets:v2.1.0`
+- `docker.io/rancher/mirrored-library-busybox:1.37.0`
+- `ghcr.io/tailscale/tailscale:v1.96.5`
+- `quay.io/prometheus/node-exporter:v1.8.2`
+
+## Phase 2: Fix The Network Layer
+
+Prefer a network fix before adding more application-level retries.
+
+- Verify whether the gateway/firewall allows ICMP fragmentation-needed messages.
+- Add TCP MSS clamping on the gateway/firewall for the Kubernetes VM subnet.
+- Start with an MSS value derived from the working path MTU, then reduce only if tests still fail.
+- Keep VM MTU at `1400` unless tests prove a better value.
+- Re-run the Phase 1 probes after each network change.
+
+Success criteria:
+
+- Repeated registry token and manifest requests succeed without TLS handshake timeouts.
+- Repeated image pulls succeed from cp1 and at least one worker.
+- Doppler API calls from the cluster succeed consistently enough that External Secrets does not flap for long periods.
+
+## Phase 3: Reduce External Registry Dependence
+
+If network fixes do not fully stabilize pulls, add a local registry mirror or pull-through cache on the private network.
+
+- Run the mirror close to the cluster, reachable from `10.27.27.0/24`.
+- Configure K3s/containerd via `/etc/rancher/k3s/registries.yaml`.
+- Mirror or cache high-risk bootstrap and addon images.
+- Keep direct upstream pulls as fallback, but make the mirror the primary path.
+
+Priority image groups:
+
+- K3s bootstrap images
+- kube-vip
+- Flux controllers
+- External Secrets
+- Tailscale operator and proxy image
+- Rancher and Rancher support images
+- Traefik
+- cert-manager
+- observability stack images
+- NFS and helper images
+
+## Phase 4: Keep Secrets From Blocking The Flux Graph
+
+External Secrets should stay the runtime secret source, but Flux should not require live Doppler validation for unrelated graph progress.
+
+- Keep `ClusterSecretStore` application decoupled from Flux health checks.
+- Keep explicit workflow checks for generated Kubernetes `Secret` objects where bootstrap needs them.
+- Continue using `external-secrets.io/force-sync` for critical bootstrap secrets.
+- Prefer checking generated Kubernetes secrets over checking live Doppler readiness in broad post-deploy gates.
+
+## Phase 5: Tighten Workflow Diagnostics
+
+Keep the current green deploy path, but improve failure output.
+
+- Print image pull failures grouped by image and node.
+- Print Flux source failures separately from HelmRelease readiness.
+- Print External Secrets and Doppler status only in secret-related gates.
+- Print node MTU, default route, and DNS resolver when registry pulls fail.
+- Treat cached OCI artifacts as acceptable when the dependent HelmRelease is already Ready.
+
+## Recommended Order
+
+1. Run Phase 1 probes and capture evidence.
+2. Add or adjust gateway TCP MSS clamping.
+3. Re-run Phase 1 probes and one full destroy/rebuild.
+4. Add a local registry mirror only if registry pulls remain flaky.
+5. Simplify retry-heavy workflow logic after the network path is stable.
+
+## Current Mitigations Already In Place
+
+- Node MTU is set to `1400` by Ansible.
+- Bootstrap image pre-pulls use direct node pulls with retries.
+- Critical bootstrap images are pre-pulled before Flux/addons need them.
+- Doppler store health no longer blocks the Flux graph.
+- Rancher bootstrap secrets are force-synced and checked explicitly.
+- Traefik Helm release has longer timeouts and more retries.
+- Post-deploy health checks verify Flux, Helm releases, storage, and pod health.
@@ -1,296 +1,268 @@
-# Hetzner Kubernetes Cluster
+# Proxmox Kubernetes Cluster

-Production-ready Kubernetes cluster on Hetzner Cloud using Terraform and Ansible.
+Private HA K3s cluster on Proxmox, provisioned by Terraform, bootstrapped by Ansible, and reconciled by Flux.

 ## Architecture

-| Component | Details |
-|-----------|---------|
-| **Control Plane** | 3x CX23 (HA) |
-| **Workers** | 3x CX33 |
-| **K8s** | k3s (latest, HA) |
-| **Addons** | Hetzner CCM + CSI + Prometheus + Grafana + Loki |
-| **Access** | SSH/API and private services restricted to Tailnet |
-| **Bootstrap** | Terraform + Ansible + Flux |
+| Component | Current Baseline |
+|-----------|------------------|
+| **Control plane** | 3 Proxmox VMs, VMIDs `200-202`, IPs `10.27.27.30-32`, 2 vCPU / 4 GiB / 32 GiB |
+| **Workers** | 5 Proxmox VMs, VMIDs `210-214`, IPs `10.27.27.41-45`, 4 vCPU / 8 GiB / 64 GiB |
+| **Kubernetes** | K3s `v1.34.6+k3s1`, HA embedded etcd, kube-vip API VIP `10.27.27.40` |
+| **Proxmox** | Node `flex`, template VMID `9000`, datastore `Flash`, bridge `vmbr0` |
+| **Storage** | Raw-manifest `nfs-subdir-external-provisioner`, `10.27.27.239:/TheFlash/k8s-nfs`, default StorageClass `flash-nfs` |
+| **GitOps** | Flux source `platform` on branch `main`; `apps` Kustomization is intentionally suspended |
+| **Private access** | Tailscale operator exposes Rancher, Grafana, and Prometheus; no public ingress baseline |
+| **Runtime secrets** | Doppler service token bootstraps External Secrets Operator |
+
+K3s is pinned because Rancher chart `2.13.3` requires Kubernetes `<1.35.0-0`.

 ## Prerequisites

-### 1. Hetzner Cloud API Token
+- Terraform `>= 1.0`.
+- Ansible with Python `jinja2` and `pyyaml`.
+- `kubectl` for local verification.
+- Proxmox API token for the `bpg/proxmox` provider.
+- S3-compatible bucket for Terraform state, currently Backblaze B2.
+- SSH key pair available to Terraform and Ansible, defaulting to `~/.ssh/infra` and `~/.ssh/infra.pub`.

-1. Go to [Hetzner Cloud Console](https://console.hetzner.com/)
-2. Select your project (or create a new one)
-3. Navigate to **Security** → **API Tokens**
-4. Click **Generate API Token**
-5. Set description: `k8s-cluster-terraform`
-6. Select permissions: **Read & Write**
-7. Click **Generate API Token**
-8. **Copy the token immediately** - it won't be shown again!
+Expected Proxmox inputs:

-### 2. Backblaze B2 Bucket (for Terraform State)
+| Setting | Value |
+|---------|-------|
+| Endpoint | `https://100.105.0.115:8006/` |
+| Node | `flex` |
+| Clone source | Template VMID `9000` (`ubuntu-2404-k8s-template`) |
+| Storage | `Flash` |

-1. Go to [Backblaze B2](https://secure.backblaze.com/b2_buckets.htm)
-2. Click **Create a Bucket**
-3. Set bucket name: `k8s-terraform-state` (must be globally unique)
-4. Choose **Private** access
-5. Click **Create Bucket**
-6. Create application key:
-   - Go to **App Keys** → **Add a New Application Key**
-   - Name: `terraform-state`
-   - Allow access to: `k8s-terraform-state` bucket only
-   - Type: **Read and Write**
-   - Copy **keyID** (access key) and **applicationKey** (secret key)
-7. Note your bucket's S3 endpoint (e.g., `https://s3.eu-central-003.backblazeb2.com`)
+## Local Setup

-### 3. SSH Key Pair
-
-```bash
-ssh-keygen -t ed25519 -C "k8s@hetzner" -f ~/.ssh/hetzner_k8s
-```
-
-### 4. Local Tools
-
- [Terraform](https://terraform.io/downloads) >= 1.0
- [Ansible](https://docs.ansible.com/ansible/latest/installation_guide/intro_installation.html) >= 2.9
- Python 3 with `jinja2` and `pyyaml`
-
-## Setup
-
-### 1. Clone Repository
-
-```bash
-git clone <your-gitea-repo>/HetznerTerra.git
-cd HetznerTerra
-```
-
-### 2. Configure Variables
+Create local variables from the example:

 ```bash
 cp terraform.tfvars.example terraform.tfvars
 ```

-Edit `terraform.tfvars`:
+Important defaults in `terraform.tfvars.example`:

 ```hcl
-hcloud_token = "your-hetzner-api-token"
+proxmox_endpoint         = "https://100.105.0.115:8006/"
+proxmox_api_token_id     = "terraform-prov@pve!k8s-cluster"
+proxmox_api_token_secret = "your-proxmox-api-token-secret"

-ssh_public_key  = "~/.ssh/hetzner_k8s.pub"
-ssh_private_key = "~/.ssh/hetzner_k8s"
+ssh_public_key  = "~/.ssh/infra.pub"
+ssh_private_key = "~/.ssh/infra"

 s3_access_key = "your-backblaze-key-id"
 s3_secret_key = "your-backblaze-application-key"
 s3_endpoint   = "https://s3.eu-central-003.backblazeb2.com"
 s3_bucket     = "k8s-terraform-state"

-tailscale_auth_key = "tskey-auth-..."
 tailscale_tailnet = "yourtailnet.ts.net"
-
-restrict_api_ssh_to_tailnet = true
-tailnet_cidr                = "100.64.0.0/10"
-enable_nodeport_public      = false
-
-allowed_ssh_ips = []
-allowed_api_ips = []
+kube_api_vip     = "10.27.27.40"
 ```

-### 3. Initialize Terraform
+Initialize Terraform with backend credentials:

 ```bash
-cd terraform
-
-# Create backend config file (or use CLI args)
-cat > backend.hcl << EOF
-endpoint                    = "https://s3.eu-central-003.backblazeb2.com"
-bucket                      = "k8s-terraform-state"
-access_key                  = "your-backblaze-key-id"
-secret_key                  = "your-backblaze-application-key"
-skip_requesting_account_id  = true
-EOF
-
-terraform init -backend-config=backend.hcl
+terraform -chdir=terraform init \
+  -backend-config="endpoint=<s3-endpoint>" \
+  -backend-config="bucket=<s3-bucket>" \
+  -backend-config="region=auto" \
+  -backend-config="access_key=<s3-access-key>" \
+  -backend-config="secret_key=<s3-secret-key>" \
+  -backend-config="skip_requesting_account_id=true"
 ```

-### 4. Plan and Apply
+## Common Commands
+
+Terraform:

 ```bash
-terraform plan -var-file=../terraform.tfvars
-terraform apply -var-file=../terraform.tfvars
+terraform -chdir=terraform fmt -recursive
+terraform -chdir=terraform validate
+terraform -chdir=terraform plan -var-file=../terraform.tfvars
+terraform -chdir=terraform apply -var-file=../terraform.tfvars
 ```

-### 5. Generate Ansible Inventory
+Ansible setup:

 ```bash
-cd ../ansible
+ansible-galaxy collection install -r ansible/requirements.yml
+cd ansible
 python3 generate_inventory.py
+ansible-playbook site.yml --syntax-check
 ```

-### 6. Bootstrap Cluster
+Manual Ansible bootstrap uses the same extra vars as the deploy workflow:

 ```bash
-ansible-playbook site.yml
+cd ansible
+ansible-playbook site.yml \
+  -e "tailscale_auth_key=$TAILSCALE_AUTH_KEY" \
+  -e "tailscale_tailnet=$TAILSCALE_TAILNET" \
+  -e "tailscale_oauth_client_id=$TAILSCALE_OAUTH_CLIENT_ID" \
+  -e "tailscale_oauth_client_secret=$TAILSCALE_OAUTH_CLIENT_SECRET" \
+  -e "doppler_hetznerterra_service_token=$DOPPLER_HETZNERTERRA_SERVICE_TOKEN" \
+  -e "tailscale_api_key=${TAILSCALE_API_KEY:-}" \
+  -e "grafana_admin_password=${GRAFANA_ADMIN_PASSWORD:-}" \
+  -e "cluster_name=k8s-cluster"
 ```

-### 7. Get Kubeconfig
+Flux/Kustomize verification:

 ```bash
+kubectl kustomize infrastructure/addons/<addon>
+kubectl kustomize infrastructure/addons
+kubectl kustomize clusters/prod/flux-system
+```
+
+Refresh kubeconfig after rebuilds:
+
+```bash
+scripts/refresh-kubeconfig.sh 10.27.27.30
 export KUBECONFIG=$(pwd)/outputs/kubeconfig
 kubectl get nodes
 ```

-Use `scripts/refresh-kubeconfig.sh <cp1-public-ip>` to refresh kubeconfig against the primary control-plane public IP after rebuilds.
+Run the tailnet smoke check from cp1:
+
+```bash
+ssh ubuntu@10.27.27.30 'bash -s' < scripts/smoke-check-tailnet-services.sh
+```

 ## Gitea CI/CD

-This repository includes Gitea workflows for:
+The supported full rebuild path is the Gitea deploy workflow.

- **deploy**: End-to-end Terraform + Ansible + Flux bootstrap + restore + health checks
- **destroy**: Cluster teardown with backup-aware cleanup
- **dashboards**: Fast workflow that updates Grafana datasources/dashboards only
+| Workflow | Trigger | Purpose |
+|----------|---------|---------|
+| `.gitea/workflows/deploy.yml` | PR to `main`, push to `main`, manual dispatch | PRs run Terraform plan; pushes run Terraform apply, Ansible bootstrap, Flux bootstrap, addon gates, health checks, and tailnet smoke checks |
+| `.gitea/workflows/destroy.yml` | Manual dispatch with `confirm: destroy` | Terraform destroy with retries; no Rancher backup gate |
+| `.gitea/workflows/dashboards.yml` | Grafana content changes or manual dispatch | Fast Grafana datasource/dashboard update through `ansible/dashboards.yml` |

-### Required Gitea Secrets
+Deploy and destroy share `concurrency.group: prod-cluster` so they do not run at the same time.

-Set these in your Gitea repository settings (**Settings** → **Secrets** → **Actions**):
+Deploy sequence on push to `main`:
+
+1. Terraform fmt/init/validate/plan/apply.
+2. Cleanup/retry around known transient Proxmox clone and disk-update failures.
+3. Generate Ansible inventory from Terraform outputs.
+4. Run `ansible/site.yml` to bootstrap nodes, K3s, kube-vip, prerequisite secrets, and kubeconfig.
+5. Pull bootstrap images directly through containerd/K3s on the target nodes.
+6. Apply Flux CRDs/controllers and the `clusters/prod/flux-system` graph.
+7. Gate cert-manager, External Secrets, Tailscale, NFS, Rancher, and observability.
+8. Run post-deploy health checks and Tailscale service smoke checks.
+
+Required Gitea secrets:

 | Secret | Description |
 |--------|-------------|
-| `HCLOUD_TOKEN` | Hetzner Cloud API token |
-| `S3_ACCESS_KEY` | Backblaze B2 keyID |
-| `S3_SECRET_KEY` | Backblaze B2 applicationKey |
-| `S3_ENDPOINT` | Backblaze S3 endpoint (e.g., `https://s3.eu-central-003.backblazeb2.com`) |
-| `S3_BUCKET` | S3 bucket name (e.g., `k8s-terraform-state`) |
+| `PROXMOX_ENDPOINT` | Proxmox API endpoint, for example `https://100.105.0.115:8006/` |
+| `PROXMOX_API_TOKEN_ID` | Proxmox API token ID |
+| `PROXMOX_API_TOKEN_SECRET` | Proxmox API token secret |
+| `S3_ACCESS_KEY` | S3/Backblaze access key for Terraform state |
+| `S3_SECRET_KEY` | S3/Backblaze secret key for Terraform state |
+| `S3_ENDPOINT` | S3 endpoint, for example `https://s3.eu-central-003.backblazeb2.com` |
+| `S3_BUCKET` | Terraform state bucket, for example `k8s-terraform-state` |
 | `TAILSCALE_AUTH_KEY` | Tailscale auth key for node bootstrap |
-| `TAILSCALE_TAILNET` | Tailnet domain (e.g., `yourtailnet.ts.net`) |
-| `TAILSCALE_OAUTH_CLIENT_ID` | Tailscale OAuth client ID for Kubernetes Operator |
-| `TAILSCALE_OAUTH_CLIENT_SECRET` | Tailscale OAuth client secret for Kubernetes Operator |
-| `DOPPLER_HETZNERTERRA_SERVICE_TOKEN` | Doppler service token for `hetznerterra` runtime secrets |
-| `GRAFANA_ADMIN_PASSWORD` | Optional admin password for Grafana (auto-generated if unset) |
-| `RUNNER_ALLOWED_CIDRS` | Optional CIDR list for CI runner access if you choose to pass it via tfvars/secrets |
+| `TAILSCALE_TAILNET` | Tailnet domain, for example `silverside-gopher.ts.net` |
+| `TAILSCALE_OAUTH_CLIENT_ID` | Tailscale OAuth client ID for the Kubernetes operator |
+| `TAILSCALE_OAUTH_CLIENT_SECRET` | Tailscale OAuth client secret for the Kubernetes operator |
+| `TAILSCALE_API_KEY` | Optional API key used to delete stale offline reserved devices before service proxies exist |
+| `DOPPLER_HETZNERTERRA_SERVICE_TOKEN` | Doppler service token for runtime cluster secrets |
+| `GRAFANA_ADMIN_PASSWORD` | Optional Grafana admin password |
 | `SSH_PUBLIC_KEY` | SSH public key content |
 | `SSH_PRIVATE_KEY` | SSH private key content |

-## GitOps (Flux)
+## GitOps Graph

-This repo uses Flux for continuous reconciliation after Terraform + Ansible bootstrap.
+Flux entrypoint:

-### Stable private-only baseline
+```text
+clusters/prod/flux-system/
+├── gotk-components.yaml
+├── gitrepository-platform.yaml
+├── kustomization-infrastructure.yaml
+└── kustomization-apps.yaml  # suspend: true
+```

-The current default target is the HA private baseline:
+Active infrastructure addons from `infrastructure/addons/kustomization.yaml`:

- `3` control plane nodes
- `3` worker nodes
- private Hetzner network only
- Tailscale for operator and service access
- Flux-managed platform addons with `apps` suspended by default
+- `addon-nfs-storage`
+- `addon-external-secrets`
+- `addon-cert-manager`
+- `addon-tailscale-operator`
+- `addon-tailscale-proxyclass`
+- `traefik` HelmRelease manifests applied directly by the top-level infrastructure Kustomization
+- `addon-observability`
+- `addon-observability-content`
+- `addon-rancher`
+- `addon-rancher-config`

-Detailed phase gates and success criteria live in `STABLE_BASELINE.md`.
+Chart/source strategy:

-This is the default until rebuilds are consistently green. High availability, public ingress, and app-layer expansion come later.
+- Vendored charts are intentional: `cert-manager`, `traefik`, `kube-prometheus-stack`, `tailscale-operator`, and `rancher` live under `infrastructure/charts/`.
+- External Secrets, Loki, and Promtail use Flux `OCIRepository` sources; deploy gates cap OCI retry time and rely on cached artifacts when available.
+- NFS storage is raw Kubernetes manifests, not a Helm chart.
+- Rancher backup/restore is not part of the current live graph.

-### Runtime secrets
+Doppler bootstrap details:

-Runtime cluster secrets are moving to Doppler + External Secrets Operator.
+- `ansible/roles/doppler-bootstrap` creates the `external-secrets` namespace and the Doppler token secret only.
+- The deploy workflow creates `ClusterSecretStore/doppler-hetznerterra` after ESO CRDs and webhook endpoints exist.
+- The checked-in `infrastructure/addons/external-secrets/clustersecretstore-doppler-hetznerterra.yaml` is not included by the addon kustomization.

- Doppler project: `hetznerterra`
- Initial auth: service token via `DOPPLER_HETZNERTERRA_SERVICE_TOKEN`
- First synced secrets:
-  - `GRAFANA_ADMIN_PASSWORD`
+## Access URLs

-Terraform/bootstrap secrets remain in Gitea Actions secrets and are not managed by Doppler.
+| Service | URL |
+|---------|-----|
+| Rancher | `https://rancher.silverside-gopher.ts.net/` |
+| Grafana | `http://grafana.silverside-gopher.ts.net/` |
+| Prometheus | `http://prometheus.silverside-gopher.ts.net:9090/` |

-### Repository layout
-
- `clusters/prod/`: cluster entrypoint and Flux reconciliation objects
- `clusters/prod/flux-system/`: `GitRepository` source and top-level `Kustomization` graph
- `infrastructure/`: infrastructure addon reconciliation graph
- `infrastructure/addons/*`: per-addon manifests for Flux-managed cluster addons
- `apps/`: application workload layer (currently scaffolded)
-
-### Reconciliation graph
-
- `infrastructure` (top-level)
-  - `addon-ccm`
-  - `addon-csi` depends on `addon-ccm`
-  - `addon-tailscale-operator`
-  - `addon-observability`
-  - `addon-observability-content` depends on `addon-observability`
- `apps` depends on `infrastructure`
-
-### Bootstrap notes
-
-1. Install Flux controllers in `flux-system`.
-2. Create the Flux deploy key/secret named `flux-system` in `flux-system` namespace.
-3. Apply `clusters/prod/flux-system/` once to establish source + reconciliation graph.
-4. Bootstrap-only Ansible creates prerequisite secrets; Flux manages addon lifecycle after bootstrap.
-
-### Current addon status
-
- Core infrastructure addons are Flux-managed from `infrastructure/addons/`.
- Active Flux addons for the current baseline: `addon-ccm`, `addon-csi`, `addon-cert-manager`, `addon-external-secrets`, `addon-tailscale-operator`, `addon-tailscale-proxyclass`, `addon-observability`, `addon-observability-content`, `addon-rancher`, `addon-rancher-config`, `addon-rancher-backup`, `addon-rancher-backup-config`.
- `apps` remains suspended until workload rollout is explicitly enabled.
- Ansible is limited to cluster bootstrap, prerequisite secret creation, pre-proxy Tailscale cleanup, and kubeconfig finalization.
- Weave GitOps / Flux UI is no longer deployed; use Rancher or the `flux` CLI for Flux operations.
-
-### Rancher access
-
- Rancher is private-only and exposed through Tailscale at `https://rancher.silverside-gopher.ts.net/`.
- The public Hetzner load balancer path is not used for Rancher.
- Rancher stores state in embedded etcd; no external database is used.
-
-### Stable baseline acceptance
-
-A rebuild is considered successful only when all of the following pass without manual intervention:
-
- Terraform create succeeds for the default `3` control planes and `3` workers.
- Ansible bootstrap succeeds end-to-end.
- All nodes become `Ready`.
- Flux core reconciliation is healthy.
- External Secrets Operator is ready.
- Tailscale operator is ready.
- Tailnet smoke checks pass for Rancher, Grafana, and Prometheus.
- Terraform destroy succeeds cleanly or succeeds after workflow retries.
-
-## Observability Stack
-
-Flux deploys a lightweight observability stack in the `observability` namespace:
-
- `kube-prometheus-stack` (Prometheus + Grafana)
- `loki`
- `promtail`
-
-Grafana content is managed as code via ConfigMaps in `infrastructure/addons/observability-content/`.
-
-Grafana and Prometheus are exposed through dedicated Tailscale LoadBalancer services when the Tailscale Kubernetes Operator is healthy.
-
-### Access Grafana and Prometheus
-
-Preferred private access:
-
- Grafana: `http://grafana.silverside-gopher.ts.net/`
- Prometheus: `http://prometheus.silverside-gopher.ts.net:9090/`
-
-Fallback (port-forward from a tailnet-connected machine):
-
-Run from a tailnet-connected machine:
+Fallback port-forward from a tailnet-connected machine:

 ```bash
 export KUBECONFIG=$(pwd)/outputs/kubeconfig
-
 kubectl -n observability port-forward svc/kube-prometheus-stack-grafana 3000:80
 kubectl -n observability port-forward svc/kube-prometheus-stack-prometheus 9090:9090
 ```

-Then open:
+Grafana user is `admin`; password comes from the `GRAFANA_ADMIN_PASSWORD` Doppler secret or the workflow-provided fallback.

- Grafana: http://127.0.0.1:3000
- Prometheus: http://127.0.0.1:9090
+## Operations

-Grafana user: `admin`
-Grafana password: value of `GRAFANA_ADMIN_PASSWORD` secret (or the generated value shown by Ansible output)
+Scale workers by updating `terraform.tfvars` counts, IP lists, and VMID lists together. If node names or VMIDs change, also update the hard-coded retry cleanup target map in `.gitea/workflows/deploy.yml`.

-### Verify Tailscale exposure
+Upgrade K3s by changing the role defaults in `ansible/roles/k3s-server/defaults/main.yml` and `ansible/roles/k3s-agent/defaults/main.yml`. Check Rancher chart compatibility before moving to a Kubernetes minor outside `<1.35.0-0`.
+
+Destroy through the Gitea `Destroy` workflow with `confirm: destroy`, or locally with:

 ```bash
-export KUBECONFIG=$(pwd)/outputs/kubeconfig
+terraform -chdir=terraform destroy -var-file=../terraform.tfvars
+```

+## Troubleshooting
+
+Check K3s from cp1:
+
+```bash
+ssh ubuntu@10.27.27.30 'sudo k3s kubectl get nodes -o wide'
+ssh ubuntu@10.27.27.30 'sudo journalctl -u k3s -n 120 --no-pager'
+```
+
+Check Flux and Rancher:
+
+```bash
+kubectl -n flux-system get gitrepositories,kustomizations,helmreleases,ocirepositories
+kubectl -n flux-system describe helmrelease rancher
+kubectl -n cattle-system get pods,deploy -o wide
+```
+
+Check Tailscale services:
+
+```bash
 kubectl -n tailscale-system get pods
 kubectl -n cattle-system get svc rancher-tailscale
 kubectl -n observability get svc grafana-tailscale prometheus-tailscale
@@ -299,131 +271,50 @@ kubectl -n observability describe svc grafana-tailscale | grep TailscaleProxyRea
 kubectl -n observability describe svc prometheus-tailscale | grep TailscaleProxyReady
 ```

-If `TailscaleProxyReady=False`, check:
+If local `kubectl` falls back to `localhost:8080`, refresh `outputs/kubeconfig` with `scripts/refresh-kubeconfig.sh 10.27.27.30`.
+
+## Network Stabilization Probes
+
+Run the same probe from the Proxmox host, `cp1`, and one worker when registry pulls or Doppler calls flap:

 ```bash
-kubectl -n tailscale-system logs deployment/operator --tail=100
+scripts/network-stabilization-probe.sh
 ```

-Common cause: OAuth client missing tag/scopes permissions.
-
-### Fast dashboard iteration workflow
-
-Use the `Deploy Grafana Content` workflow when changing dashboard/data source templates.
-It avoids full cluster provisioning and only applies Grafana content resources:
-
- `ansible/roles/observability-content/templates/grafana-datasources.yaml.j2`
- `ansible/roles/observability-content/templates/grafana-dashboard-k8s-overview.yaml.j2`
- `ansible/dashboards.yml`
-
-## File Structure
-
-```
-.
-├── terraform/
-│   ├── main.tf
-│   ├── variables.tf
-│   ├── network.tf
-│   ├── firewall.tf
-│   ├── ssh.tf
-│   ├── servers.tf
-│   ├── outputs.tf
-│   └── backend.tf
-├── ansible/
-│   ├── inventory.tmpl
-│   ├── generate_inventory.py
-│   ├── site.yml
-│   ├── roles/
-│   │   ├── common/
-│   │   ├── k3s-server/
-│   │   ├── k3s-agent/
-│   │   ├── addon-secrets-bootstrap/
-│   │   ├── observability-content/
-│   │   └── observability/
-│   └── ansible.cfg
-├── .gitea/
-│   └── workflows/
-│       ├── terraform.yml
-│       ├── ansible.yml
-│       └── dashboards.yml
-├── outputs/
-├── terraform.tfvars.example
-└── README.md
-```
-
-## Firewall Rules
-
-| Port | Source | Purpose |
-|------|--------|---------|
-| 22 | Tailnet CIDR | SSH |
-| 6443 | Tailnet CIDR + internal | Kubernetes API |
-| 41641/udp | Any | Tailscale WireGuard |
-| 9345 | 10.0.0.0/16 | k3s Supervisor (HA join) |
-| 2379 | 10.0.0.0/16 | etcd Client |
-| 2380 | 10.0.0.0/16 | etcd Peer |
-| 8472 | 10.0.0.0/16 | Flannel VXLAN |
-| 10250 | 10.0.0.0/16 | Kubelet |
-| 30000-32767 | Optional | NodePorts (disabled by default) |
-
-## Operations
-
-### Scale Workers
-
-Edit `terraform.tfvars`:
-
-```hcl
-worker_count = 5
-```
-
-Then:
+From the generated Ansible inventory:

 ```bash
-terraform apply
-ansible-playbook site.yml
+cd ansible
+ansible -i inventory.ini 'control_plane[0]' -m script -a '../scripts/network-stabilization-probe.sh'
+ansible -i inventory.ini 'workers[0]' -m script -a '../scripts/network-stabilization-probe.sh'
 ```

-### Upgrade k3s
+Use `NETWORK_PROBE_REPEAT_COUNT`, `NETWORK_PROBE_CURL_TIMEOUT`, and `NETWORK_PROBE_PULL_TIMEOUT` to tune probe duration.
+
+## Registry Cache
+
+K3s nodes are configured by Ansible to use the Proxmox host as a local pull-through cache for common upstream registries. The cache listens on `10.27.27.239`:
+
+```text
+docker.io                 -> http://10.27.27.239:5000
+ghcr.io                   -> http://10.27.27.239:5001
+quay.io                   -> http://10.27.27.239:5002
+registry.k8s.io           -> http://10.27.27.239:5003
+oci.external-secrets.io   -> http://10.27.27.239:5004
+```
+
+Bootstrap or repair the cache on Proxmox with:

 ```bash
-ansible-playbook site.yml -t upgrade
+ssh -i ~/.ssh/infra root@10.27.27.239 'bash -s' < scripts/setup-proxmox-registry-cache.sh
 ```

-### Destroy Cluster
-
-```bash
-terraform destroy
-```
-
-## Troubleshooting
-
-### Check k3s Logs
-
-```bash
-ssh root@<control-plane-ip> journalctl -u k3s -f
-```
-
-### Reset k3s
-
-```bash
-ansible-playbook site.yml -t reset
-```
-
-## Costs Breakdown
-
-| Resource | Quantity | Unit Price | Monthly |
-|----------|----------|------------|---------|
-| CX23 (Control Plane) | 3 | €2.99 | €8.97 |
-| CX33 (Workers) | 4 | €4.99 | €19.96 |
-| Backblaze B2 | ~1 GB | Free (first 10GB) | €0.00 |
-| **Total** | | | **€28.93/mo** |
-
 ## Security Notes

- Control plane has HA (3 nodes, can survive 1 failure)
- Consider adding Hetzner load balancer for API server
- Rotate API tokens regularly
- Use network policies in Kubernetes
- Enable audit logging for production
+- Never commit `terraform.tfvars`, kubeconfigs, private keys, `outputs/`, or real secret values.
+- Terraform/bootstrap/CI secrets stay in Gitea Actions secrets.
+- Runtime cluster secrets are sourced from Doppler through External Secrets.
+- This repo does not manage Proxmox/LAN firewalls or public ingress.

 ## License

@@ -1,6 +1,6 @@
 # Gitea Secrets Setup

-This document describes the secrets required for the HetznerTerra deployment workflow.
+This document describes the secrets required for the Proxmox-based deployment workflow.

 ## Required Secrets

@@ -9,10 +9,17 @@ Add these secrets in your Gitea repository settings:

 ### Infrastructure Secrets

-#### `HCLOUD_TOKEN`
- Hetzner Cloud API token
- Get from: https://console.hetzner.com/projects/{project-id}/security/api-tokens
- Permissions: Read & Write
+#### `PROXMOX_ENDPOINT`
+- Proxmox VE API endpoint
+- Example: `https://100.105.0.115:8006/`
+
+#### `PROXMOX_API_TOKEN_ID`
+- Proxmox API token ID
+- Example: `terraform-prov@pve!k8s-cluster`
+
+#### `PROXMOX_API_TOKEN_SECRET`
+- Proxmox API token secret
+- Create with `pveum user token add terraform-prov@pve k8s-cluster`

 #### `S3_ACCESS_KEY` & `S3_SECRET_KEY`
 - Backblaze B2 credentials for Terraform state storage
@@ -31,7 +38,7 @@ Add these secrets in your Gitea repository settings:

 #### `SSH_PRIVATE_KEY` & `SSH_PUBLIC_KEY`
 - SSH key pair for cluster access
- Generate with: `ssh-keygen -t ed25519 -C "k8s@hetzner" -f ~/.ssh/hetzner_k8s`
+- Generate with: `ssh-keygen -t ed25519 -C "k8s@proxmox" -f ~/.ssh/infra`
 - Private key content (include BEGIN/END lines)
 - Public key content (full line starting with ssh-ed25519)

@@ -90,4 +97,4 @@ Check the workflow logs to verify all secrets are being used correctly.
 - Prefer Doppler for runtime app/platform secrets after cluster bootstrap
 - Rotate Tailscale auth keys periodically
 - Review OAuth client permissions regularly
- The workflow automatically opens SSH/API access only for the runner's IP during deployment
+- CI expects direct SSH access to the Proxmox VMs and direct Proxmox API access
@@ -1,75 +0,0 @@
-# Stable Private-Only Baseline
-
-This document defines the current engineering target for this repository.
-
-## Topology
-
- 3 control planes (HA etcd cluster)
- 3 workers
- Hetzner Load Balancer for Kubernetes API
- private Hetzner network
- Tailscale operator access and service exposure
- Rancher exposed through Tailscale (`rancher.silverside-gopher.ts.net`)
- Grafana exposed through Tailscale (`grafana.silverside-gopher.ts.net`)
- Prometheus exposed through Tailscale (`prometheus.silverside-gopher.ts.net:9090`)
- `apps` Kustomization suspended by default
-
-## In Scope
-
- Terraform infrastructure bootstrap
- Ansible k3s bootstrap with external cloud provider
- **HA control plane (3 nodes with etcd quorum)**
- **Hetzner Load Balancer for Kubernetes API**
- **Hetzner CCM deployed via Ansible (before workers join)**
- **Hetzner CSI for persistent volumes (via Flux)**
- Flux core reconciliation
- External Secrets Operator with Doppler
- Tailscale private access and smoke-check validation
- cert-manager
- Rancher and rancher-backup
- Rancher backup/restore validation
- Observability stack (Grafana, Prometheus, Loki, Promtail)
- Persistent volume provisioning validated
-
-## Deferred for Later Phases
-
- app workloads in `apps/`
-
-## Out of Scope
-
- public ingress or DNS
- public TLS
- app workloads
- cross-region / multi-cluster disaster recovery strategy
- upgrade strategy
-
-## Phase Gates
-
-1. Terraform apply completes for HA topology (3 CP, 3 workers, 1 LB).
-2. Load Balancer is healthy with all 3 control plane targets.
-3. Primary control plane bootstraps with `--cluster-init`.
-4. Secondary control planes join via Load Balancer endpoint.
-5. **CCM deployed via Ansible before workers join** (fixes uninitialized taint issue).
-6. Workers join successfully via Load Balancer and all nodes show proper `providerID`.
-7. etcd reports 3 healthy members.
-8. Flux source and infrastructure reconciliation are healthy.
-9. **CSI deploys and creates `hcloud-volumes` StorageClass**.
-10. **PVC provisioning tested and working**.
-11. External Secrets sync required secrets.
-12. Tailscale private access works for Rancher, Grafana, and Prometheus.
-13. CI smoke checks pass for Tailscale DNS resolution, `tailscale ping`, and HTTP reachability.
-14. A fresh Rancher backup can be created and restored successfully.
-15. Terraform destroy succeeds cleanly or via workflow retry.
-
-## Success Criteria
-
-Success requires two consecutive HA rebuilds passing all phase gates with no manual fixes, no manual `kubectl` patching, and no manual Tailscale proxy recreation.
-
-## Validated Drills
-
- 2026-04-18: live Rancher backup/restore drill succeeded on the current cluster.
- A fresh one-time backup was created, restored back onto the same cluster, and post-restore validation confirmed:
-  - all nodes remained `Ready`
-  - Flux infrastructure stayed healthy
-  - Rancher backup/restore resources reported `Completed`
-  - Rancher, Grafana, and Prometheus remained reachable through the Tailscale smoke checks
@@ -3,8 +3,14 @@ inventory = inventory.ini
 host_key_checking = False
 retry_files_enabled = False
 roles_path = roles
-stdout_callback = yaml
+stdout_callback = default
+result_format = yaml
 interpreter_python = auto_silent
+forks = 20
+
+[ssh_connection]
+pipelining = True
+ssh_args = -o ControlMaster=auto -o ControlPersist=60s -o ServerAliveInterval=30 -o ServerAliveCountMax=6

 [privilege_escalation]
 become = True
@@ -13,8 +13,7 @@ control_plane
 workers

 [cluster:vars]
-ansible_user=root
+ansible_user=ubuntu
 ansible_python_interpreter=/usr/bin/python3
 ansible_ssh_private_key_file={{ private_key_file }}
-k3s_version=latest
 kube_api_endpoint={{ kube_api_lb_ip }}
@@ -1,14 +1,4 @@
 ---
- name: Apply Hetzner cloud secret
-  shell: >-
-    kubectl -n kube-system create secret generic hcloud
-    --from-literal=token='{{ hcloud_token }}'
-    --from-literal=network='{{ cluster_name }}-network'
-    --dry-run=client -o yaml | kubectl apply -f -
-  changed_when: true
-  no_log: true
-  when: hcloud_token | default('') | length > 0
-
 - name: Ensure Tailscale operator namespace exists
  command: >-
    kubectl create namespace {{ tailscale_operator_namespace | default('tailscale-system') }}
@@ -0,0 +1,14 @@
+---
+bootstrap_prepull_images:
+  - docker.io/rancher/mirrored-pause:3.6
+  - docker.io/rancher/mirrored-coredns-coredns:1.14.2
+  - docker.io/rancher/mirrored-metrics-server:v0.8.1
+  - docker.io/rancher/local-path-provisioner:v0.0.35
+  - docker.io/rancher/mirrored-library-busybox:1.37.0
+  - docker.io/rancher/mirrored-library-traefik:3.6.10
+  - docker.io/rancher/klipper-helm:v0.9.14-build20260309
+  - ghcr.io/fluxcd/source-controller:v1.8.0
+  - ghcr.io/fluxcd/kustomize-controller:v1.8.1
+  - ghcr.io/fluxcd/helm-controller:v1.5.1
+  - ghcr.io/fluxcd/notification-controller:v1.8.1
+  - oci.external-secrets.io/external-secrets/external-secrets:v2.1.0
@@ -0,0 +1,27 @@
+---
+- name: Pull bootstrap images into containerd
+  shell: |
+    if /usr/local/bin/k3s crictl inspecti "{{ item }}" >/dev/null 2>&1; then
+      echo "already present"
+      exit 0
+    fi
+
+    echo "mtu=$(cat /sys/class/net/{{ ansible_default_ipv4.interface | default('eth0') }}/mtu 2>/dev/null || true)"
+    ip route get 1.1.1.1 || true
+
+    for attempt in 1 2 3 4 5 6 7 8 9 10; do
+      echo "pull attempt ${attempt}: {{ item }}"
+      if timeout 240s /usr/local/bin/k3s crictl pull "{{ item }}"; then
+        echo "pulled image"
+        exit 0
+      fi
+
+      sleep 15
+    done
+
+    exit 1
+  args:
+    executable: /bin/bash
+  register: bootstrap_image_pull
+  loop: "{{ bootstrap_prepull_images }}"
+  changed_when: "'pulled image' in bootstrap_image_pull.stdout"
@@ -1,82 +0,0 @@
---
- name: Check if hcloud secret exists
-  command: kubectl -n kube-system get secret hcloud
-  register: hcloud_secret_check
-  changed_when: false
-  failed_when: false
-
- name: Fail if hcloud secret is missing
-  fail:
-    msg: "hcloud secret not found in kube-system namespace. CCM requires it."
-  when: hcloud_secret_check.rc != 0
-
- name: Check if helm is installed
-  command: which helm
-  register: helm_check
-  changed_when: false
-  failed_when: false
-
- name: Install helm
-  when: helm_check.rc != 0
-  block:
-    - name: Download helm install script
-      get_url:
-        url: https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3
-        dest: /tmp/get-helm-3.sh
-        mode: "0755"
-
-    - name: Run helm install script
-      command: /tmp/get-helm-3.sh
-      args:
-        creates: /usr/local/bin/helm
-
- name: Add Hetzner Helm repository
-  kubernetes.core.helm_repository:
-    name: hcloud
-    repo_url: https://charts.hetzner.cloud
-    kubeconfig: /etc/rancher/k3s/k3s.yaml
-  environment:
-    KUBECONFIG: /etc/rancher/k3s/k3s.yaml
-
- name: Deploy Hetzner Cloud Controller Manager
-  kubernetes.core.helm:
-    name: hcloud-cloud-controller-manager
-    chart_ref: hcloud/hcloud-cloud-controller-manager
-    release_namespace: kube-system
-    create_namespace: true
-    values:
-      networking:
-        enabled: true
-      nodeSelector:
-        kubernetes.io/hostname: "{{ inventory_hostname }}"
-      additionalTolerations:
-        - key: node-role.kubernetes.io/control-plane
-          operator: Exists
-          effect: NoSchedule
-    kubeconfig: /etc/rancher/k3s/k3s.yaml
-    wait: true
-    wait_timeout: 300s
-  environment:
-    KUBECONFIG: /etc/rancher/k3s/k3s.yaml
-
- name: Wait for CCM to be ready
-  command: kubectl -n kube-system rollout status deployment/hcloud-cloud-controller-manager --timeout=120s
-  changed_when: false
-  register: ccm_rollout
-  until: ccm_rollout.rc == 0
-  retries: 3
-  delay: 10
-
- name: Pause to ensure CCM is fully ready to process new nodes
-  pause:
-    seconds: 10
-
- name: Verify CCM is removing uninitialized taints
-  command: kubectl get nodes -o jsonpath='{.items[*].spec.taints[?(@.key=="node.cloudprovider.kubernetes.io/uninitialized")].key}'
-  register: uninitialized_taints
-  changed_when: false
-  failed_when: false
-
- name: Display taint status
-  debug:
-    msg: "Nodes with uninitialized taint: {{ uninitialized_taints.stdout }}"
@@ -1,5 +1,6 @@
 ---
 common_upgrade_packages: false
+common_node_mtu: 1400
 tailscale_auth_key: ""
 tailscale_ssh: false
 tailscale_accept_routes: false
@@ -1,12 +1,68 @@
 ---
+- name: Check if cloud-init is installed
+  command: which cloud-init
+  register: cloud_init_binary
+  changed_when: false
+  failed_when: false
+
+- name: Wait for cloud-init to finish first-boot tasks
+  command: cloud-init status --wait
+  register: cloud_init_wait
+  changed_when: false
+  failed_when: >-
+    cloud_init_wait.rc not in [0, 1, 2] or
+    (
+      'status: done' not in cloud_init_wait.stdout and
+      'status: error' not in cloud_init_wait.stdout and
+      'status: disabled' not in cloud_init_wait.stdout
+    )
+  when: cloud_init_binary.rc == 0
+
+- name: Report non-blocking cloud-init error status
+  debug:
+    msg: |
+      cloud-init completed with status error on {{ inventory_hostname }}; continuing because SSH is reachable and Ansible owns package setup.
+      {{ cloud_init_wait.stdout | default('') }}
+      {{ cloud_init_wait.stderr | default('') }}
+  when:
+    - cloud_init_binary.rc == 0
+    - "'status: error' in (cloud_init_wait.stdout | default(''))"
+
+- name: Persist primary interface MTU for registry egress
+  copy:
+    dest: /etc/netplan/99-k8s-mtu.yaml
+    content: |
+      network:
+        version: 2
+        ethernets:
+          {{ ansible_default_ipv4.interface | default('eth0') }}:
+            mtu: {{ common_node_mtu }}
+    mode: "0600"
+  register: k8s_mtu_netplan
+  when: common_node_mtu | int > 0
+
+- name: Apply primary interface MTU immediately
+  command: ip link set dev {{ ansible_default_ipv4.interface | default('eth0') }} mtu {{ common_node_mtu }}
+  changed_when: false
+  when: common_node_mtu | int > 0
+
+- name: Apply persisted MTU netplan
+  command: netplan apply
+  changed_when: true
+  when:
+    - common_node_mtu | int > 0
+    - k8s_mtu_netplan.changed
+
 - name: Update apt cache
  apt:
    update_cache: true
    cache_valid_time: 3600
+    lock_timeout: 600

 - name: Upgrade packages
  apt:
    upgrade: dist
+    lock_timeout: 600
  when: common_upgrade_packages | default(false)

 - name: Install required packages
@@ -19,18 +75,27 @@
      - lsb-release
      - software-properties-common
      - jq
+      - nfs-common
      - htop
      - vim
    state: present
+    lock_timeout: 600
+
+- name: Check active swap
+  command: swapon --noheadings
+  register: active_swap
+  changed_when: false
+  failed_when: false

 - name: Disable swap
  command: swapoff -a
  changed_when: true
+  when: active_swap.stdout | trim | length > 0

 - name: Remove swap from fstab
-  mount:
-    name: swap
-    fstype: swap
+  lineinfile:
+    path: /etc/fstab
+    regexp: '^\s*[^#]\S+\s+\S+\s+swap\s+.*$'
    state: absent

 - name: Load br_netfilter module
@@ -66,6 +131,10 @@

 - name: Install tailscale
  shell: curl -fsSL https://tailscale.com/install.sh | sh
+  register: tailscale_install
+  until: tailscale_install.rc == 0
+  retries: 5
+  delay: 15
  when:
    - tailscale_auth_key | length > 0
    - tailscale_binary.rc != 0
@@ -78,9 +147,22 @@
  failed_when: false
  when: tailscale_auth_key | length > 0

- name: Connect node to tailnet
-  command: tailscale up --authkey {{ tailscale_auth_key }} --hostname {{ inventory_hostname }} --ssh={{ tailscale_ssh | ternary('true', 'false') }} --accept-routes={{ tailscale_accept_routes | ternary('true', 'false') }}
+- name: Parse tailscale connection state
+  set_fact:
+    tailscale_backend_state: "{{ (tailscale_status.stdout | from_json).BackendState | default('') }}"
  when:
    - tailscale_auth_key | length > 0
-    - tailscale_status.rc != 0 or '"BackendState":"Running"' not in tailscale_status.stdout
+    - tailscale_status.rc == 0
+    - tailscale_status.stdout | length > 0
+
+- name: Connect node to tailnet
+  command: tailscale up --authkey {{ tailscale_auth_key }} --hostname {{ inventory_hostname }} --ssh={{ tailscale_ssh | ternary('true', 'false') }} --accept-routes={{ tailscale_accept_routes | ternary('true', 'false') }}
+  register: tailscale_up
+  until: tailscale_up.rc == 0
+  retries: 5
+  delay: 15
+  no_log: true
+  when:
+    - tailscale_auth_key | length > 0
+    - tailscale_status.rc != 0 or (tailscale_backend_state | default('')) != 'Running'
  changed_when: true
@@ -15,36 +15,10 @@
    --from-literal=dopplerToken='{{ doppler_hetznerterra_service_token }}'
    --dry-run=client -o yaml | kubectl apply -f -
  changed_when: true
-
- name: Check for ClusterSecretStore CRD
-  command: kubectl get crd clustersecretstores.external-secrets.io
-  register: doppler_clustersecretstore_crd
-  changed_when: false
-  failed_when: false
-
- name: Apply Doppler ClusterSecretStore
-  shell: |
-    cat <<'EOF' | kubectl apply -f -
-    apiVersion: external-secrets.io/v1
-    kind: ClusterSecretStore
-    metadata:
-      name: doppler-hetznerterra
-    spec:
-      provider:
-        doppler:
-          auth:
-            secretRef:
-              dopplerToken:
-                name: doppler-hetznerterra-service-token
-                key: dopplerToken
-                namespace: external-secrets
-    EOF
-  changed_when: true
-  when: doppler_clustersecretstore_crd.rc == 0
+  no_log: true

 - name: Note pending Doppler ClusterSecretStore bootstrap
  debug:
    msg: >-
-      Skipping Doppler ClusterSecretStore bootstrap because the External Secrets CRD
-      is not available yet. Re-run after External Secrets is installed.
-  when: doppler_clustersecretstore_crd.rc != 0
+      Doppler service token secret is bootstrapped. The deploy workflow creates the
+      ClusterSecretStore after External Secrets CRDs and webhook endpoints are ready.
@@ -1,6 +1,7 @@
 ---
-k3s_version: latest
+k3s_version: v1.34.6+k3s1
 k3s_server_url: ""
 k3s_token: ""
 k3s_node_ip: ""
-k3s_kubelet_cloud_provider_external: true
+k3s_kubelet_cloud_provider_external: false
+k3s_flannel_iface: "{{ ansible_default_ipv4.interface | default('eth0') }}"
@@ -1,19 +1,58 @@
 ---
- name: Check if k3s agent is already installed
+- name: Check if k3s agent service exists
  stat:
-    path: /usr/local/bin/k3s-agent
-  register: k3s_agent_binary
+    path: /etc/systemd/system/k3s-agent.service
+  register: k3s_agent_service
+
+- name: Check k3s agent service state
+  command: systemctl is-active k3s-agent
+  register: k3s_agent_service_state
+  changed_when: false
+  failed_when: false
+  when: k3s_agent_service.stat.exists
+
+- name: Check installed k3s version
+  command: k3s --version
+  register: installed_k3s_version
+  changed_when: false
+  failed_when: false
+  when: k3s_agent_service.stat.exists
+
+- name: Determine whether k3s agent install is needed
+  set_fact:
+    k3s_agent_install_needed: >-
+      {{
+        (not k3s_agent_service.stat.exists)
+        or ((k3s_agent_service_state.stdout | default('')) != 'active')
+        or (k3s_version != 'latest' and k3s_version not in (installed_k3s_version.stdout | default('')))
+        or (not (k3s_node_registered | default(true) | bool))
+      }}
+
+- name: Configure k3s registry mirrors
+  import_role:
+    name: k3s-registry-mirror

 - name: Download k3s install script
  get_url:
    url: https://get.k3s.io
    dest: /tmp/install-k3s.sh
    mode: "0755"
-  when: not k3s_agent_binary.stat.exists
+  register: k3s_agent_install_script
+  until: k3s_agent_install_script is succeeded
+  retries: 5
+  delay: 10
+  when: k3s_agent_install_needed

 - name: Install k3s agent
-  when: not k3s_agent_binary.stat.exists
+  when: k3s_agent_install_needed
  block:
+    - name: Wait for Kubernetes API endpoint before agent join
+      wait_for:
+        host: "{{ k3s_server_url | regex_replace('^https?://([^:/]+).*$', '\\1') }}"
+        port: 6443
+        state: started
+        timeout: 180
+
    - name: Run k3s agent install
      environment:
        INSTALL_K3S_VERSION: "{{ k3s_version if k3s_version != 'latest' else '' }}"
@@ -22,32 +61,12 @@
      command: >-
        /tmp/install-k3s.sh agent
        --node-ip {{ k3s_node_ip }}
-        --flannel-iface=enp7s0
+        --flannel-iface={{ k3s_flannel_iface }}
        {% if k3s_kubelet_cloud_provider_external | bool %}--kubelet-arg=cloud-provider=external{% endif %}
-      args:
-        creates: /usr/local/bin/k3s-agent
-  rescue:
-    - name: Show k3s-agent service status after failed install
-      command: systemctl status k3s-agent --no-pager
-      register: k3s_agent_status_after_install
-      changed_when: false
-      failed_when: false
-
-    - name: Show recent k3s-agent logs after failed install
-      command: journalctl -u k3s-agent -n 120 --no-pager
-      register: k3s_agent_journal_after_install
-      changed_when: false
-      failed_when: false
-
-    - name: Fail with k3s-agent diagnostics
-      fail:
-        msg: |
-          k3s agent install failed on {{ inventory_hostname }}.
-          Service status:
-          {{ k3s_agent_status_after_install.stdout | default('n/a') }}
-
-          Recent logs:
-          {{ k3s_agent_journal_after_install.stdout | default('n/a') }}
+      register: k3s_agent_install
+      until: k3s_agent_install.rc == 0
+      retries: 3
+      delay: 20

 - name: Wait for k3s agent to be ready
  command: systemctl is-active k3s-agent
@@ -56,3 +75,34 @@
  retries: 30
  delay: 10
  changed_when: false
+
+- name: Show k3s-agent service status on failure
+  command: systemctl status k3s-agent --no-pager
+  register: k3s_agent_status
+  changed_when: false
+  failed_when: false
+  when: agent_status is failed
+
+- name: Show recent k3s-agent logs on failure
+  command: journalctl -u k3s-agent -n 120 --no-pager
+  register: k3s_agent_journal
+  changed_when: false
+  failed_when: false
+  when: agent_status is failed
+
+- name: Fail with k3s-agent diagnostics
+  fail:
+    msg: |
+      k3s agent failed to become ready on {{ inventory_hostname }}.
+      Install stdout:
+      {{ k3s_agent_install.stdout | default('n/a') }}
+
+      Install stderr:
+      {{ k3s_agent_install.stderr | default('n/a') }}
+
+      Service status:
+      {{ k3s_agent_status.stdout | default('n/a') }}
+
+      Recent logs:
+      {{ k3s_agent_journal.stdout | default('n/a') }}
+  when: agent_status is failed
@@ -0,0 +1,16 @@
+---
+k3s_registry_mirror_enabled: true
+k3s_registry_mirror_host: 10.27.27.239
+k3s_registry_mirrors:
+  docker.io:
+    port: 5000
+  ghcr.io:
+    port: 5001
+  quay.io:
+    port: 5002
+  registry.k8s.io:
+    port: 5003
+  oci.external-secrets.io:
+    port: 5004
+  registry.rancher.com:
+    port: 5005
@@ -0,0 +1,20 @@
+---
+- name: Ensure k3s config directory exists
+  file:
+    path: /etc/rancher/k3s
+    state: directory
+    mode: "0755"
+  when: k3s_registry_mirror_enabled | bool
+
+- name: Configure k3s registry mirrors
+  template:
+    src: registries.yaml.j2
+    dest: /etc/rancher/k3s/registries.yaml
+    mode: "0644"
+  when: k3s_registry_mirror_enabled | bool
+
+- name: Remove k3s registry mirror config when disabled
+  file:
+    path: /etc/rancher/k3s/registries.yaml
+    state: absent
+  when: not (k3s_registry_mirror_enabled | bool)
@@ -0,0 +1,6 @@
+mirrors:
+{% for registry, mirror in k3s_registry_mirrors.items() %}
+  "{{ registry }}":
+    endpoint:
+      - "http://{{ k3s_registry_mirror_host }}:{{ mirror.port }}"
+{% endfor %}
@@ -1,11 +1,12 @@
 ---
-k3s_version: latest
+k3s_version: v1.34.6+k3s1
 k3s_token: ""
 k3s_node_ip: ""
 k3s_primary_public_ip: ""
-k3s_disable_embedded_ccm: true
+k3s_disable_embedded_ccm: false
 k3s_disable_servicelb: true
-k3s_kubelet_cloud_provider_external: true
+k3s_kubelet_cloud_provider_external: false
+k3s_flannel_iface: "{{ ansible_default_ipv4.interface | default('eth0') }}"
 # Load Balancer endpoint for HA cluster joins (set in inventory)
 kube_api_endpoint: ""
 # Tailscale DNS names for control planes (to enable tailnet access)
@@ -11,9 +11,22 @@
  failed_when: false
  when: k3s_service.stat.exists

+- name: Check installed k3s version
+  command: k3s --version
+  register: installed_k3s_version
+  changed_when: false
+  failed_when: false
+  when: k3s_service.stat.exists
+
 - name: Determine whether k3s install is needed
  set_fact:
-    k3s_install_needed: "{{ (not k3s_service.stat.exists) or ((k3s_service_state.stdout | default('')) != 'active') }}"
+    k3s_install_needed: >-
+      {{
+        (not k3s_service.stat.exists)
+        or ((k3s_service_state.stdout | default('')) != 'active')
+        or (k3s_version != 'latest' and k3s_version not in (installed_k3s_version.stdout | default('')))
+        or (not (k3s_node_registered | default(true) | bool))
+      }}

 - name: Wait for API endpoint on 6443 (secondary only)
  wait_for:
@@ -45,11 +58,19 @@
    - /var/lib/rancher/k3s
  when: k3s_install_needed

+- name: Configure k3s registry mirrors
+  import_role:
+    name: k3s-registry-mirror
+
 - name: Download k3s install script
  get_url:
    url: https://get.k3s.io
    dest: /tmp/install-k3s.sh
    mode: "0755"
+  register: k3s_install_script
+  until: k3s_install_script is succeeded
+  retries: 5
+  delay: 10
  when: k3s_install_needed

 - name: Install k3s server (primary)
@@ -61,7 +82,7 @@
    --cluster-init
    --advertise-address={{ k3s_primary_ip }}
    --node-ip={{ k3s_node_ip }}
-    --flannel-iface=enp7s0
+    --flannel-iface={{ k3s_flannel_iface }}
    --tls-san={{ k3s_primary_ip }}
    --tls-san={{ k3s_primary_public_ip }}
    --tls-san={{ kube_api_endpoint }}
@@ -69,6 +90,10 @@
    {% if k3s_disable_embedded_ccm | bool %}--disable-cloud-controller{% endif %}
    {% if k3s_disable_servicelb | bool %}--disable=servicelb{% endif %}
    {% if k3s_kubelet_cloud_provider_external | bool %}--kubelet-arg=cloud-provider=external{% endif %}
+  register: primary_install
+  until: primary_install.rc == 0
+  retries: 3
+  delay: 20
  when:
    - k3s_install_needed
    - k3s_primary | default(false)
@@ -87,40 +112,14 @@
        --server https://{{ k3s_join_endpoint | default(k3s_primary_ip) }}:6443
        --advertise-address={{ k3s_node_ip }}
        --node-ip={{ k3s_node_ip }}
-        --flannel-iface=enp7s0
+        --flannel-iface={{ k3s_flannel_iface }}
        {% if k3s_disable_embedded_ccm | bool %}--disable-cloud-controller{% endif %}
        {% if k3s_disable_servicelb | bool %}--disable=servicelb{% endif %}
        {% if k3s_kubelet_cloud_provider_external | bool %}--kubelet-arg=cloud-provider=external{% endif %}
      register: secondary_install
-
-  rescue:
-    - name: Show k3s service status after failed secondary install
-      command: systemctl status k3s --no-pager
-      register: k3s_status_after_install
-      changed_when: false
-      failed_when: false
-
-    - name: Show recent k3s logs after failed secondary install
-      command: journalctl -u k3s -n 120 --no-pager
-      register: k3s_journal_after_install
-      changed_when: false
-      failed_when: false
-
-    - name: Fail with secondary install diagnostics
-      fail:
-        msg: |
-          Secondary k3s install failed on {{ inventory_hostname }}.
-          Install stdout:
-          {{ secondary_install.stdout | default('n/a') }}
-
-          Install stderr:
-          {{ secondary_install.stderr | default('n/a') }}
-
-          Service status:
-          {{ k3s_status_after_install.stdout | default('n/a') }}
-
-          Recent logs:
-          {{ k3s_journal_after_install.stdout | default('n/a') }}
+      until: secondary_install.rc == 0
+      retries: 3
+      delay: 20

 - name: Wait for k3s to be ready
  command: "{{ (k3s_primary | default(false)) | ternary('kubectl get nodes', 'systemctl is-active k3s') }}"
@@ -0,0 +1,7 @@
+---
+kube_vip_version: v1.1.2
+kube_vip_interface: "{{ ansible_default_ipv4.interface | default('eth0') }}"
+kube_vip_address: "{{ kube_api_endpoint }}"
+kube_vip_prepull_images:
+  - docker.io/rancher/mirrored-pause:3.6
+  - ghcr.io/kube-vip/kube-vip:{{ kube_vip_version }}
@@ -0,0 +1,87 @@
+---
+- name: Pre-pull kube-vip bootstrap images into containerd
+  shell: |
+    if /usr/local/bin/k3s crictl inspecti "{{ item }}" >/dev/null 2>&1; then
+      echo "already present"
+      exit 0
+    fi
+
+    echo "mtu=$(cat /sys/class/net/{{ ansible_default_ipv4.interface | default('eth0') }}/mtu 2>/dev/null || true)"
+    ip route get 1.1.1.1 || true
+
+    for attempt in 1 2 3 4 5; do
+      echo "pull attempt ${attempt}: {{ item }}"
+      if timeout 180s /usr/local/bin/k3s crictl pull "{{ item }}"; then
+        echo "pulled image"
+        exit 0
+      fi
+
+      sleep 15
+    done
+
+    exit 1
+  args:
+    executable: /bin/bash
+  register: kube_vip_image_pull
+  loop: "{{ kube_vip_prepull_images }}"
+  changed_when: "'pulled image' in kube_vip_image_pull.stdout"
+
+- name: Render kube-vip control plane manifest
+  template:
+    src: kube-vip-control-plane.yaml.j2
+    dest: /tmp/kube-vip-control-plane.yaml
+    mode: "0644"
+
+- name: Apply kube-vip control plane manifest
+  command: kubectl apply -f /tmp/kube-vip-control-plane.yaml
+  register: kube_vip_apply
+  until: kube_vip_apply.rc == 0
+  retries: 3
+  delay: 10
+  changed_when: true
+
+- name: Wait for local kube-vip pod to be ready
+  shell: >-
+    kubectl -n kube-system get pods
+    -l app.kubernetes.io/name=kube-vip
+    --field-selector spec.nodeName={{ inventory_hostname }}
+    -o jsonpath='{.items[0].status.conditions[?(@.type=="Ready")].status}'
+  register: kube_vip_pod_ready
+  changed_when: false
+  until: kube_vip_pod_ready.stdout == "True"
+  retries: 30
+  delay: 10
+
+- name: Show kube-vip pod status on failure
+  command: kubectl -n kube-system get pods -l app.kubernetes.io/name=kube-vip -o wide
+  register: kube_vip_pods
+  changed_when: false
+  failed_when: false
+  when: kube_vip_pod_ready is failed
+
+- name: Describe kube-vip pod on failure
+  shell: >-
+    kubectl -n kube-system describe pod
+    $(kubectl -n kube-system get pods -l app.kubernetes.io/name=kube-vip --field-selector spec.nodeName={{ inventory_hostname }} -o jsonpath='{.items[0].metadata.name}')
+  register: kube_vip_pod_describe
+  changed_when: false
+  failed_when: false
+  when: kube_vip_pod_ready is failed
+
+- name: Fail with kube-vip diagnostics
+  fail:
+    msg: |
+      kube-vip failed to become ready on {{ inventory_hostname }}.
+      Pods:
+      {{ kube_vip_pods.stdout | default('n/a') }}
+
+      Describe:
+      {{ kube_vip_pod_describe.stdout | default('n/a') }}
+  when: kube_vip_pod_ready is failed
+
+- name: Wait for API VIP on 6443
+  wait_for:
+    host: "{{ kube_vip_address }}"
+    port: 6443
+    state: started
+    timeout: 180
@@ -0,0 +1,110 @@
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: kube-vip
+  namespace: kube-system
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  name: system:kube-vip-role
+rules:
+  - apiGroups: [""]
+    resources: ["services/status"]
+    verbs: ["update"]
+  - apiGroups: [""]
+    resources: ["services", "endpoints"]
+    verbs: ["list", "get", "watch", "update"]
+  - apiGroups: [""]
+    resources: ["nodes"]
+    verbs: ["list", "get", "watch", "update", "patch"]
+  - apiGroups: ["coordination.k8s.io"]
+    resources: ["leases"]
+    verbs: ["list", "get", "watch", "update", "create"]
+  - apiGroups: ["discovery.k8s.io"]
+    resources: ["endpointslices"]
+    verbs: ["list", "get", "watch", "update"]
+  - apiGroups: [""]
+    resources: ["pods"]
+    verbs: ["list"]
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+  name: system:kube-vip-binding
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: ClusterRole
+  name: system:kube-vip-role
+subjects:
+  - kind: ServiceAccount
+    name: kube-vip
+    namespace: kube-system
+---
+apiVersion: apps/v1
+kind: DaemonSet
+metadata:
+  name: kube-vip
+  namespace: kube-system
+spec:
+  selector:
+    matchLabels:
+      app.kubernetes.io/name: kube-vip
+  template:
+    metadata:
+      labels:
+        app.kubernetes.io/name: kube-vip
+    spec:
+      serviceAccountName: kube-vip
+      hostNetwork: true
+      affinity:
+        nodeAffinity:
+          requiredDuringSchedulingIgnoredDuringExecution:
+            nodeSelectorTerms:
+              - matchExpressions:
+                  - key: node-role.kubernetes.io/control-plane
+                    operator: Exists
+      tolerations:
+        - key: node-role.kubernetes.io/control-plane
+          operator: Exists
+          effect: NoSchedule
+        - key: node-role.kubernetes.io/master
+          operator: Exists
+          effect: NoSchedule
+      containers:
+        - name: kube-vip
+          image: ghcr.io/kube-vip/kube-vip:{{ kube_vip_version }}
+          imagePullPolicy: IfNotPresent
+          args:
+            - manager
+          env:
+            - name: vip_arp
+              value: "true"
+            - name: port
+              value: "6443"
+            - name: vip_interface
+              value: {{ kube_vip_interface | quote }}
+            - name: vip_subnet
+              value: "32"
+            - name: cp_enable
+              value: "true"
+            - name: cp_namespace
+              value: kube-system
+            - name: vip_ddns
+              value: "false"
+            - name: vip_leaderelection
+              value: "true"
+            - name: vip_leaseduration
+              value: "5"
+            - name: vip_renewdeadline
+              value: "3"
+            - name: vip_retryperiod
+              value: "1"
+            - name: address
+              value: {{ kube_vip_address | quote }}
+          securityContext:
+            capabilities:
+              add:
+                - NET_ADMIN
+                - NET_RAW
+                - SYS_TIME
@@ -105,6 +105,11 @@
  register: grafana_loki_labels
  changed_when: false
  failed_when: false
+  until: >-
+    grafana_loki_labels.rc != 0 or
+    '"data":[]' not in (grafana_loki_labels.stdout | replace(' ', ''))
+  retries: 30
+  delay: 10
  when: loki_enabled

 - name: Fail when Loki is reachable but has zero indexed labels
@@ -0,0 +1,6 @@
+---
+rancher_images_to_prepull:
+  - docker.io/rancher/rancher:v2.13.3
+  - docker.io/rancher/rancher-webhook:v0.9.3
+  - docker.io/rancher/system-upgrade-controller:v0.17.0
+  - docker.io/rancher/shell:v0.6.2
@@ -0,0 +1,27 @@
+---
+- name: Pull Rancher images into containerd
+  shell: |
+    if /usr/local/bin/k3s crictl inspecti "{{ item }}" >/dev/null 2>&1; then
+      echo "already present"
+      exit 0
+    fi
+
+    echo "mtu=$(cat /sys/class/net/{{ ansible_default_ipv4.interface | default('eth0') }}/mtu 2>/dev/null || true)"
+    ip route get 1.1.1.1 || true
+
+    for attempt in 1 2 3 4 5 6 7 8 9 10; do
+      echo "pull attempt ${attempt}: {{ item }}"
+      if timeout 240s /usr/local/bin/k3s crictl pull "{{ item }}"; then
+        echo "pulled image"
+        exit 0
+      fi
+
+      sleep 15
+    done
+
+    exit 1
+  args:
+    executable: /bin/bash
+  register: rancher_image_pull
+  loop: "{{ rancher_images_to_prepull }}"
+  changed_when: "'pulled image' in rancher_image_pull.stdout"
@@ -8,15 +8,20 @@
        headers:
          Authorization: "Bearer {{ tailscale_api_key }}"
        return_content: true
+        timeout: 10
      register: ts_devices
+      until: ts_devices.status == 200
+      retries: 5
+      delay: 10

    - name: Find stale devices matching reserved hostnames
      set_fact:
        stale_devices: >-
-          {{ ts_devices.json.devices | default([])
+          {{ (ts_devices.json.devices | default([])
              | selectattr('hostname', 'defined')
              | selectattr('hostname', 'in', tailscale_reserved_hostnames)
-             | rejectattr('online', 'defined')
+              | selectattr('connectedToControl', 'defined')
+              | rejectattr('connectedToControl', 'equalto', true)
              | list
              +
              ts_devices.json.devices | default([])
@@ -24,7 +29,7 @@
              | selectattr('hostname', 'in', tailscale_reserved_hostnames)
              | selectattr('online', 'defined')
              | rejectattr('online', 'equalto', true)
-             | list }}
+              | list) | unique(attribute='id') | list }}

    - name: Delete stale devices
      uri:
@@ -33,6 +38,11 @@
        headers:
          Authorization: "Bearer {{ tailscale_api_key }}"
        status_code: 200
+        timeout: 10
+      register: ts_delete_device
+      until: ts_delete_device.status == 200
+      retries: 3
+      delay: 5
      loop: "{{ stale_devices }}"
      loop_control:
        label: "{{ item.name }} ({{ item.id }})"
@@ -1,14 +1,49 @@
 ---
+- name: Clean up stale Tailscale cluster node devices
+  hosts: localhost
+  connection: local
+  vars:
+    tailscale_reserved_hostnames: "{{ groups['cluster'] | default([]) | list }}"
+
+  roles:
+    - tailscale-cleanup
+
 - name: Bootstrap Kubernetes cluster
  hosts: cluster
  become: true
-  gather_facts: true
+  gather_facts: false

  pre_tasks:
    - name: Wait for SSH
      wait_for_connection:
-        delay: 10
+        delay: 0
+        timeout: 600
+
+    - name: Gather facts after SSH is reachable
+      setup:
+        gather_subset:
+          - "!all"
+          - network
+      register: initial_setup
+      ignore_errors: true
+      ignore_unreachable: true
+
+    - name: Clear transient SSH unreachable state after first fact gather
+      meta: clear_host_errors
+      when: initial_setup.unreachable | default(false)
+
+    - name: Wait for SSH after transient first-boot disconnect
+      wait_for_connection:
+        delay: 5
        timeout: 300
+      when: initial_setup.unreachable | default(false) or initial_setup.failed | default(false)
+
+    - name: Gather facts after transient first-boot disconnect
+      setup:
+        gather_subset:
+          - "!all"
+          - network
+      when: initial_setup.unreachable | default(false) or initial_setup.failed | default(false)

  roles:
    - common
@@ -57,12 +92,24 @@
  roles:
    - addon-secrets-bootstrap

- name: Deploy Hetzner CCM (required for workers with external cloud provider)
+- name: Deploy kube-vip for API HA
  hosts: control_plane[0]
  become: true

  roles:
-    - ccm-deploy
+    - kube-vip-deploy
+
+- name: Wait for Kubernetes API VIP readiness
+  hosts: control_plane[0]
+  become: true
+  tasks:
+    - name: Wait for Kubernetes readyz through the VIP
+      command: kubectl --server=https://{{ kube_api_endpoint }}:6443 get --raw=/readyz
+      register: api_readyz
+      until: api_readyz.rc == 0
+      retries: 30
+      delay: 10
+      changed_when: false

 - name: Setup secondary control planes
  hosts: control_plane[1:]
@@ -77,9 +124,116 @@
    # Use Load Balancer for HA - all control planes join via LB endpoint
    k3s_join_endpoint: "{{ kube_api_endpoint | default(hostvars[groups['control_plane'][0]]['k3s_primary_private_ip']) }}"

+  pre_tasks:
+    - name: Check whether secondary control plane is registered in Kubernetes
+      command: kubectl get node/{{ inventory_hostname }}
+      delegate_to: "{{ groups['control_plane'][0] }}"
+      register: k3s_node_registration
+      changed_when: false
+      failed_when: false
+
+    - name: Record secondary control plane registration state
+      set_fact:
+        k3s_node_registered: "{{ k3s_node_registration.rc == 0 }}"
+
  roles:
    - k3s-server

+- name: Wait for all control plane nodes to be Ready
+  hosts: control_plane[0]
+  become: true
+  tasks:
+    - name: Wait for kube-vip DaemonSet across control planes
+      command: kubectl -n kube-system rollout status daemonset/kube-vip --timeout=300s
+      register: kube_vip_rollout
+      changed_when: false
+      failed_when: false
+
+    - name: Show kube-vip pod status on rollout failure
+      command: kubectl -n kube-system get pods -l app.kubernetes.io/name=kube-vip -o wide
+      register: kube_vip_pods_after_join
+      changed_when: false
+      failed_when: false
+      when: kube_vip_rollout.rc != 0
+
+    - name: Describe kube-vip pods on rollout failure
+      command: kubectl -n kube-system describe pods -l app.kubernetes.io/name=kube-vip
+      register: kube_vip_describe_after_join
+      changed_when: false
+      failed_when: false
+      when: kube_vip_rollout.rc != 0
+
+    - name: Fail when kube-vip is not healthy on all control planes
+      fail:
+        msg: |
+          kube-vip DaemonSet did not become healthy after secondary control planes joined.
+          Rollout:
+          {{ kube_vip_rollout.stdout | default('') }}
+          {{ kube_vip_rollout.stderr | default('') }}
+
+          Pods:
+          {{ kube_vip_pods_after_join.stdout | default('n/a') }}
+
+          Describe:
+          {{ kube_vip_describe_after_join.stdout | default('n/a') }}
+      when: kube_vip_rollout.rc != 0
+
+    - name: Wait for control plane node readiness
+      command: kubectl wait --for=condition=Ready node -l node-role.kubernetes.io/control-plane --timeout=900s
+      register: control_plane_ready
+      changed_when: false
+
+    - name: Wait for Kubernetes readyz before worker joins
+      command: kubectl --server=https://{{ kube_api_endpoint }}:6443 get --raw=/readyz
+      register: api_readyz_before_workers
+      until: api_readyz_before_workers.rc == 0
+      retries: 30
+      delay: 10
+      changed_when: false
+
+- name: Verify worker reachability to Kubernetes API VIP
+  hosts: workers
+  become: true
+  tasks:
+    - name: Wait for Kubernetes API VIP from worker
+      wait_for:
+        host: "{{ kube_api_endpoint }}"
+        port: 6443
+        state: started
+        timeout: 180
+      register: worker_vip_wait
+      failed_when: false
+
+    - name: Collect worker network diagnostics when VIP is unreachable
+      shell: |
+        set -euo pipefail
+        echo "== ip addr =="
+        ip addr
+        echo "== ip route =="
+        ip route
+        echo "== ip neigh =="
+        ip neigh || true
+        echo "== vip route =="
+        ip route get {{ kube_api_endpoint }} || true
+        echo "== tcp probe =="
+        timeout 5 bash -c '</dev/tcp/{{ kube_api_endpoint }}/6443' && echo connected || echo failed
+      args:
+        executable: /bin/bash
+      register: worker_vip_diagnostics
+      changed_when: false
+      failed_when: false
+      when: worker_vip_wait.msg is defined
+
+    - name: Fail when worker cannot reach Kubernetes API VIP
+      fail:
+        msg: |
+          Worker {{ inventory_hostname }} cannot reach Kubernetes API VIP {{ kube_api_endpoint }}:6443.
+          This blocks k3s agent join and points to kube-vip/L2/routing reachability, not agent install.
+
+          Diagnostics:
+          {{ worker_vip_diagnostics.stdout | default('n/a') }}
+      when: worker_vip_wait.msg is defined
+
 - name: Setup workers
  hosts: workers
  become: true
@@ -90,9 +244,36 @@
    k3s_server_url: "https://{{ kube_api_endpoint | default(hostvars[groups['control_plane'][0]]['k3s_primary_private_ip']) }}:6443"
    k3s_node_ip: "{{ k3s_private_ip }}"

+  pre_tasks:
+    - name: Check whether worker is registered in Kubernetes
+      command: kubectl get node/{{ inventory_hostname }}
+      delegate_to: "{{ groups['control_plane'][0] }}"
+      register: k3s_node_registration
+      changed_when: false
+      failed_when: false
+
+    - name: Record worker registration state
+      set_fact:
+        k3s_node_registered: "{{ k3s_node_registration.rc == 0 }}"
+
  roles:
    - k3s-agent

+- name: Pre-pull bootstrap control-plane images
+  hosts: control_plane[0]
+  become: true
+
+  roles:
+    - bootstrap-image-prepull
+
+- name: Pre-pull Rancher bootstrap images
+  hosts: workers
+  become: true
+
+  roles:
+    - role: rancher-image-prepull
+      when: rancher_image_prepull_enabled | default(false) | bool
+
 - name: Deploy observability stack
  hosts: control_plane[0]
  become: true
@@ -148,10 +329,16 @@
  hosts: localhost
  connection: local
  tasks:
+    - name: Check whether kubeconfig was fetched
+      stat:
+        path: ../outputs/kubeconfig
+      register: kubeconfig_file
+
    - name: Update kubeconfig server address
      command: |
        sed -i 's/127.0.0.1/{{ hostvars[groups["control_plane"][0]]["ansible_host"] }}/g' ../outputs/kubeconfig
      changed_when: true
+      when: kubeconfig_file.stat.exists

    - name: Display success message
      debug:
@@ -8,6 +8,10 @@ spec:
    spec:
      nodeSelector:
        kubernetes.io/hostname: k8s-cluster-cp-1
+      tolerations:
+        - key: node-role.kubernetes.io/control-plane
+          operator: Exists
+          effect: NoSchedule
 ---
 apiVersion: apps/v1
 kind: Deployment
@@ -19,6 +23,10 @@ spec:
    spec:
      nodeSelector:
        kubernetes.io/hostname: k8s-cluster-cp-1
+      tolerations:
+        - key: node-role.kubernetes.io/control-plane
+          operator: Exists
+          effect: NoSchedule
 ---
 apiVersion: apps/v1
 kind: Deployment
@@ -30,6 +38,10 @@ spec:
    spec:
      nodeSelector:
        kubernetes.io/hostname: k8s-cluster-cp-1
+      tolerations:
+        - key: node-role.kubernetes.io/control-plane
+          operator: Exists
+          effect: NoSchedule
 ---
 apiVersion: apps/v1
 kind: Deployment
@@ -41,3 +53,7 @@ spec:
    spec:
      nodeSelector:
        kubernetes.io/hostname: k8s-cluster-cp-1
+      tolerations:
+        - key: node-role.kubernetes.io/control-plane
+          operator: Exists
+          effect: NoSchedule
@@ -1,36 +0,0 @@
-apiVersion: helm.toolkit.fluxcd.io/v2
-kind: HelmRelease
-metadata:
-  name: hcloud-cloud-controller-manager
-  namespace: flux-system
-spec:
-  interval: 10m
-  targetNamespace: kube-system
-  chart:
-    spec:
-      chart: hcloud-cloud-controller-manager
-      version: 1.30.1
-      sourceRef:
-        kind: HelmRepository
-        name: hcloud
-        namespace: flux-system
-  install:
-    createNamespace: true
-    remediation:
-      retries: 3
-  upgrade:
-    remediation:
-      retries: 3
-  values:
-    selectorLabels:
-      app: hcloud-cloud-controller-manager
-    args:
-      secure-port: "0"
-    networking:
-      enabled: true
-    nodeSelector:
-      kubernetes.io/hostname: k8s-cluster-cp-1
-    additionalTolerations:
-      - key: node-role.kubernetes.io/control-plane
-        operator: Exists
-        effect: NoSchedule
@@ -1,8 +0,0 @@
-apiVersion: source.toolkit.fluxcd.io/v1
-kind: HelmRepository
-metadata:
-  name: hcloud
-  namespace: flux-system
-spec:
-  interval: 1h
-  url: https://charts.hetzner.cloud
@@ -5,14 +5,14 @@ metadata:
  namespace: flux-system
 spec:
  interval: 10m
+  timeout: 15m
  targetNamespace: cert-manager
  chart:
    spec:
-      chart: cert-manager
-      version: "v1.17.2"
+      chart: ./infrastructure/charts/cert-manager
      sourceRef:
-        kind: HelmRepository
-        name: jetstack
+        kind: GitRepository
+        name: platform
        namespace: flux-system
  install:
    createNamespace: true
@@ -1,8 +0,0 @@
-apiVersion: source.toolkit.fluxcd.io/v1
-kind: HelmRepository
-metadata:
-  name: jetstack
-  namespace: flux-system
-spec:
-  interval: 1h
-  url: https://charts.jetstack.io
@@ -2,5 +2,4 @@ apiVersion: kustomize.config.k8s.io/v1beta1
 kind: Kustomization
 resources:
  - namespace.yaml
-  - helmrepository-cert-manager.yaml
  - helmrelease-cert-manager.yaml
@@ -1,36 +0,0 @@
-apiVersion: helm.toolkit.fluxcd.io/v2
-kind: HelmRelease
-metadata:
-  name: hcloud-csi
-  namespace: flux-system
-spec:
-  interval: 10m
-  targetNamespace: kube-system
-  chart:
-    spec:
-      chart: hcloud-csi
-      version: 2.20.0
-      sourceRef:
-        kind: HelmRepository
-        name: hcloud
-        namespace: flux-system
-  install:
-    createNamespace: true
-    remediation:
-      retries: 3
-  upgrade:
-    remediation:
-      retries: 3
-  values:
-    controller:
-      nodeSelector:
-        kubernetes.io/hostname: k8s-cluster-cp-1
-      tolerations:
-        - key: node-role.kubernetes.io/control-plane
-          operator: Exists
-          effect: NoSchedule
-      hcloudVolumeDefaultLocation: nbg1
-    storageClasses:
-      - name: hcloud-volumes
-        defaultStorageClass: true
-        reclaimPolicy: Delete
@@ -1,8 +0,0 @@
-apiVersion: source.toolkit.fluxcd.io/v1
-kind: HelmRepository
-metadata:
-  name: hcloud
-  namespace: flux-system
-spec:
-  interval: 1h
-  url: https://charts.hetzner.cloud
@@ -1,5 +0,0 @@
-apiVersion: kustomize.config.k8s.io/v1beta1
-kind: Kustomization
-resources:
-  - helmrepository-hcloud.yaml
-  - helmrelease-hcloud-csi.yaml
@@ -1,5 +1,4 @@
 apiVersion: kustomize.config.k8s.io/v1beta1
 kind: Kustomization
 resources:
-  - backup-recurring.yaml
-  - restore-from-b2.yaml
+  - clustersecretstore-doppler-hetznerterra.yaml
@@ -6,12 +6,8 @@ metadata:
 spec:
  interval: 10m
  targetNamespace: external-secrets
-  chart:
-    spec:
-      chart: external-secrets
-      version: 2.1.0
-      sourceRef:
-        kind: HelmRepository
+  chartRef:
+    kind: OCIRepository
    name: external-secrets
    namespace: flux-system
  install:
@@ -23,13 +19,25 @@ spec:
      retries: 3
  values:
    installCRDs: true
+    image:
+      repository: oci.external-secrets.io/external-secrets/external-secrets
+      tag: v2.1.0
+      pullPolicy: IfNotPresent
    nodeSelector:
      kubernetes.io/hostname: k8s-cluster-cp-1
    webhook:
      failurePolicy: Ignore
+      image:
+        repository: oci.external-secrets.io/external-secrets/external-secrets
+        tag: v2.1.0
+        pullPolicy: IfNotPresent
      nodeSelector:
        kubernetes.io/hostname: k8s-cluster-cp-1
    certController:
+      image:
+        repository: oci.external-secrets.io/external-secrets/external-secrets
+        tag: v2.1.0
+        pullPolicy: IfNotPresent
      nodeSelector:
        kubernetes.io/hostname: k8s-cluster-cp-1
    serviceMonitor:
@@ -1,8 +0,0 @@
-apiVersion: source.toolkit.fluxcd.io/v1
-kind: HelmRepository
-metadata:
-  name: external-secrets
-  namespace: flux-system
-spec:
-  interval: 1h
-  url: https://charts.external-secrets.io
@@ -2,5 +2,5 @@ apiVersion: kustomize.config.k8s.io/v1beta1
 kind: Kustomization
 resources:
  - namespace.yaml
-  - helmrepository-external-secrets.yaml
+  - ocirepository-external-secrets.yaml
  - helmrelease-external-secrets.yaml
@@ -0,0 +1,13 @@
+apiVersion: source.toolkit.fluxcd.io/v1
+kind: OCIRepository
+metadata:
+  name: external-secrets
+  namespace: flux-system
+spec:
+  interval: 10m
+  url: oci://ghcr.io/external-secrets/charts/external-secrets
+  ref:
+    tag: 2.1.0
+  layerSelector:
+    mediaType: application/vnd.cncf.helm.chart.content.v1.tar+gzip
+    operation: copy
@@ -11,5 +11,5 @@ spec:
    name: platform
  path: ./infrastructure/addons/cert-manager
  wait: true
-  timeout: 10m
+  timeout: 20m
  suspend: false
@@ -1,7 +1,7 @@
 apiVersion: kustomize.toolkit.fluxcd.io/v1
 kind: Kustomization
 metadata:
-  name: addon-rancher-backup
+  name: addon-external-secrets-store
  namespace: flux-system
 spec:
  interval: 10m
@@ -9,10 +9,9 @@ spec:
  sourceRef:
    kind: GitRepository
    name: platform
-  path: ./infrastructure/addons/rancher-backup
-  wait: true
-  timeout: 10m
-  suspend: false
+  path: ./infrastructure/addons/external-secrets-store
  dependsOn:
    - name: addon-external-secrets
-    - name: addon-rancher
+  wait: false
+  timeout: 15m
+  suspend: false
@@ -10,6 +10,19 @@ spec:
    kind: GitRepository
    name: platform
  path: ./infrastructure/addons/external-secrets
-  wait: true
-  timeout: 5m
+  wait: false
+  healthChecks:
+    - apiVersion: helm.toolkit.fluxcd.io/v2
+      kind: HelmRelease
+      name: external-secrets
+      namespace: flux-system
+    - apiVersion: apps/v1
+      kind: Deployment
+      name: external-secrets-external-secrets
+      namespace: external-secrets
+    - apiVersion: apps/v1
+      kind: Deployment
+      name: external-secrets-external-secrets-webhook
+      namespace: external-secrets
+  timeout: 10m
  suspend: false
@@ -1,7 +1,7 @@
 apiVersion: kustomize.toolkit.fluxcd.io/v1
 kind: Kustomization
 metadata:
-  name: addon-ccm
+  name: addon-nfs-storage
  namespace: flux-system
 spec:
  interval: 10m
@@ -9,7 +9,12 @@ spec:
  sourceRef:
    kind: GitRepository
    name: platform
-  path: ./infrastructure/addons/ccm
+  path: ./infrastructure/addons/nfs-storage
  wait: true
+  healthChecks:
+    - apiVersion: apps/v1
+      kind: Deployment
+      name: nfs-subdir-external-provisioner
+      namespace: kube-system
  timeout: 10m
  suspend: false
@@ -1,7 +1,7 @@
 apiVersion: kustomize.toolkit.fluxcd.io/v1
 kind: Kustomization
 metadata:
-  name: addon-rancher-backup-config
+  name: addon-observability-secrets
  namespace: flux-system
 spec:
  interval: 10m
@@ -9,8 +9,9 @@ spec:
  sourceRef:
    kind: GitRepository
    name: platform
-  path: ./infrastructure/addons/rancher-backup-config
+  path: ./infrastructure/addons/observability-secrets
+  dependsOn:
+    - name: addon-external-secrets-store
+  wait: false
  timeout: 5m
  suspend: false
-  dependsOn:
-    - name: addon-rancher-backup
@@ -11,9 +11,10 @@ spec:
    name: platform
  path: ./infrastructure/addons/observability
  dependsOn:
-    - name: addon-external-secrets
+    - name: addon-observability-secrets
+    - name: addon-nfs-storage
    - name: addon-tailscale-operator
    - name: addon-tailscale-proxyclass
-  wait: true
+  wait: false
  timeout: 5m
  suspend: false
@@ -13,5 +13,5 @@ spec:
  dependsOn:
    - name: addon-rancher
  wait: true
-  timeout: 5m
+  timeout: 10m
  suspend: false
@@ -1,7 +1,7 @@
 apiVersion: kustomize.toolkit.fluxcd.io/v1
 kind: Kustomization
 metadata:
-  name: addon-csi
+  name: addon-rancher-secrets
  namespace: flux-system
 spec:
  interval: 10m
@@ -9,9 +9,9 @@ spec:
  sourceRef:
    kind: GitRepository
    name: platform
-  path: ./infrastructure/addons/csi
+  path: ./infrastructure/addons/rancher-secrets
  dependsOn:
-    - name: addon-ccm
-  wait: true
-  timeout: 10m
+    - name: addon-external-secrets-store
+  wait: false
+  timeout: 15m
  suspend: false
@@ -10,11 +10,32 @@ spec:
    kind: GitRepository
    name: platform
  path: ./infrastructure/addons/rancher
-  wait: true
-  timeout: 15m
+  timeout: 30m
  suspend: false
  dependsOn:
    - name: addon-tailscale-operator
    - name: addon-tailscale-proxyclass
-    - name: addon-external-secrets
+    - name: addon-rancher-secrets
    - name: addon-cert-manager
+  wait: false
+  healthChecks:
+    - apiVersion: helm.toolkit.fluxcd.io/v2
+      kind: HelmRelease
+      name: rancher
+      namespace: flux-system
+    - apiVersion: apps/v1
+      kind: Deployment
+      name: cattle-system-rancher
+      namespace: cattle-system
+    - apiVersion: apps/v1
+      kind: Deployment
+      name: rancher-webhook
+      namespace: cattle-system
+    - apiVersion: cert-manager.io/v1
+      kind: Issuer
+      name: cattle-system-rancher
+      namespace: cattle-system
+    - apiVersion: cert-manager.io/v1
+      kind: Certificate
+      name: tls-rancher-ingress
+      namespace: cattle-system
@@ -10,6 +10,6 @@ spec:
    kind: GitRepository
    name: platform
  path: ./infrastructure/addons/tailscale-operator
-  wait: true
-  timeout: 5m
+  wait: false
+  timeout: 10m
  suspend: false
@@ -1,16 +1,16 @@
 apiVersion: kustomize.config.k8s.io/v1beta1
 kind: Kustomization
 resources:
-  - kustomization-ccm.yaml
-  - kustomization-csi.yaml
+  - kustomization-nfs-storage.yaml
  - kustomization-external-secrets.yaml
+  - kustomization-external-secrets-store.yaml
  - kustomization-cert-manager.yaml
  - kustomization-tailscale-operator.yaml
  - kustomization-tailscale-proxyclass.yaml
  - traefik
+  - kustomization-observability-secrets.yaml
  - kustomization-observability.yaml
  - kustomization-observability-content.yaml
+  - kustomization-rancher-secrets.yaml
  - kustomization-rancher.yaml
  - kustomization-rancher-config.yaml
-  - kustomization-rancher-backup.yaml
-  - kustomization-rancher-backup-config.yaml
@@ -0,0 +1,20 @@
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  name: nfs-subdir-external-provisioner-runner
+rules:
+  - apiGroups: [""]
+    resources: ["nodes"]
+    verbs: ["get", "list", "watch"]
+  - apiGroups: [""]
+    resources: ["persistentvolumes"]
+    verbs: ["get", "list", "watch", "create", "delete"]
+  - apiGroups: [""]
+    resources: ["persistentvolumeclaims"]
+    verbs: ["get", "list", "watch", "update"]
+  - apiGroups: ["storage.k8s.io"]
+    resources: ["storageclasses"]
+    verbs: ["get", "list", "watch"]
+  - apiGroups: [""]
+    resources: ["events"]
+    verbs: ["create", "update", "patch"]
@@ -0,0 +1,12 @@
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+  name: run-nfs-subdir-external-provisioner
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: ClusterRole
+  name: nfs-subdir-external-provisioner-runner
+subjects:
+  - kind: ServiceAccount
+    name: nfs-subdir-external-provisioner
+    namespace: kube-system
@@ -0,0 +1,41 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: nfs-subdir-external-provisioner
+  namespace: kube-system
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: nfs-subdir-external-provisioner
+  template:
+    metadata:
+      labels:
+        app: nfs-subdir-external-provisioner
+    spec:
+      serviceAccountName: nfs-subdir-external-provisioner
+      nodeSelector:
+        kubernetes.io/hostname: k8s-cluster-cp-1
+      tolerations:
+        - key: node-role.kubernetes.io/control-plane
+          operator: Exists
+          effect: NoSchedule
+      containers:
+        - name: nfs-subdir-external-provisioner
+          image: registry.k8s.io/sig-storage/nfs-subdir-external-provisioner:v4.0.2
+          imagePullPolicy: IfNotPresent
+          env:
+            - name: PROVISIONER_NAME
+              value: flash-nfs
+            - name: NFS_SERVER
+              value: 10.27.27.239
+            - name: NFS_PATH
+              value: /TheFlash/k8s-nfs
+          volumeMounts:
+            - name: nfs-subdir-external-provisioner-root
+              mountPath: /persistentvolumes
+      volumes:
+        - name: nfs-subdir-external-provisioner-root
+          nfs:
+            server: 10.27.27.239
+            path: /TheFlash/k8s-nfs
@@ -0,0 +1,10 @@
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+resources:
+  - serviceaccount-nfs-subdir-external-provisioner.yaml
+  - clusterrole-nfs-subdir-external-provisioner.yaml
+  - clusterrolebinding-nfs-subdir-external-provisioner.yaml
+  - role-nfs-subdir-external-provisioner.yaml
+  - rolebinding-nfs-subdir-external-provisioner.yaml
+  - storageclass-flash-nfs.yaml
+  - deployment-nfs-subdir-external-provisioner.yaml
@@ -0,0 +1,9 @@
+apiVersion: rbac.authorization.k8s.io/v1
+kind: Role
+metadata:
+  name: leader-locking-nfs-subdir-external-provisioner
+  namespace: kube-system
+rules:
+  - apiGroups: [""]
+    resources: ["endpoints"]
+    verbs: ["get", "list", "watch", "create", "update", "patch"]
@@ -0,0 +1,13 @@
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+  name: leader-locking-nfs-subdir-external-provisioner
+  namespace: kube-system
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: Role
+  name: leader-locking-nfs-subdir-external-provisioner
+subjects:
+  - kind: ServiceAccount
+    name: nfs-subdir-external-provisioner
+    namespace: kube-system
@@ -0,0 +1,5 @@
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: nfs-subdir-external-provisioner
+  namespace: kube-system
@@ -0,0 +1,12 @@
+apiVersion: storage.k8s.io/v1
+kind: StorageClass
+metadata:
+  name: flash-nfs
+  annotations:
+    storageclass.kubernetes.io/is-default-class: "true"
+provisioner: flash-nfs
+parameters:
+  archiveOnDelete: "true"
+reclaimPolicy: Delete
+allowVolumeExpansion: true
+volumeBindingMode: Immediate
@@ -1,5 +1,5 @@
 apiVersion: kustomize.config.k8s.io/v1beta1
 kind: Kustomization
 resources:
-  - helmrepository-hcloud.yaml
-  - helmrelease-hcloud-ccm.yaml
+  - namespace.yaml
+  - grafana-admin-externalsecret.yaml
@@ -5,14 +5,14 @@ metadata:
  namespace: flux-system
 spec:
  interval: 10m
+  timeout: 15m
  targetNamespace: observability
  chart:
    spec:
-      chart: kube-prometheus-stack
-      version: 68.4.4
+      chart: ./infrastructure/charts/kube-prometheus-stack
      sourceRef:
-        kind: HelmRepository
-        name: prometheus-community
+        kind: GitRepository
+        name: platform
        namespace: flux-system
  install:
    createNamespace: true
@@ -21,6 +21,7 @@ spec:
  upgrade:
    remediation:
      retries: 3
+      strategy: uninstall
  values:
    grafana:
      enabled: true
@@ -6,13 +6,9 @@ metadata:
 spec:
  interval: 10m
  targetNamespace: observability
-  chart:
-    spec:
-      chart: loki
-      version: 6.10.0
-      sourceRef:
-        kind: HelmRepository
-        name: grafana
+  chartRef:
+    kind: OCIRepository
+    name: loki
    namespace: flux-system
  install:
    createNamespace: true
@@ -50,7 +46,7 @@ spec:
      replicas: 1
      persistence:
        size: 10Gi
-        storageClass: local-path
+        storageClass: flash-nfs
      resources:
        requests:
          cpu: 100m
@@ -87,11 +83,11 @@ spec:
    test:
      enabled: false
    chunksCache:
-      enabled: true
-      allocatedMemory: 128
+      enabled: false
    resultsCache:
-      enabled: true
-      allocatedMemory: 128
+      enabled: false
+    lokiCanary:
+      enabled: false
    monitoring:
      selfMonitoring:
        enabled: false
@@ -5,14 +5,11 @@ metadata:
  namespace: flux-system
 spec:
  interval: 10m
+  timeout: 20m
  targetNamespace: observability
-  chart:
-    spec:
-      chart: promtail
-      version: 6.16.6
-      sourceRef:
-        kind: HelmRepository
-        name: grafana
+  chartRef:
+    kind: OCIRepository
+    name: promtail
    namespace: flux-system
  install:
    createNamespace: true
@@ -22,6 +19,8 @@ spec:
    remediation:
      retries: 3
  values:
+    image:
+      pullPolicy: IfNotPresent
    config:
      clients:
-        - url: http://loki.observability.svc.cluster.local:3100/loki/api/v1/push
+        - url: http://observability-loki.observability.svc.cluster.local:3100/loki/api/v1/push
@@ -1,8 +0,0 @@
-apiVersion: source.toolkit.fluxcd.io/v1
-kind: HelmRepository
-metadata:
-  name: grafana
-  namespace: flux-system
-spec:
-  interval: 1h
-  url: https://grafana.github.io/helm-charts
@@ -1,8 +0,0 @@
-apiVersion: source.toolkit.fluxcd.io/v1
-kind: HelmRepository
-metadata:
-  name: prometheus-community
-  namespace: flux-system
-spec:
-  interval: 1h
-  url: https://prometheus-community.github.io/helm-charts
@@ -1,10 +1,8 @@
 apiVersion: kustomize.config.k8s.io/v1beta1
 kind: Kustomization
 resources:
-  - namespace.yaml
-  - grafana-admin-externalsecret.yaml
-  - helmrepository-prometheus-community.yaml
-  - helmrepository-grafana.yaml
+  - ocirepository-loki.yaml
+  - ocirepository-promtail.yaml
  - helmrelease-kube-prometheus-stack.yaml
  - helmrelease-loki.yaml
  - helmrelease-promtail.yaml
@@ -0,0 +1,14 @@
+apiVersion: source.toolkit.fluxcd.io/v1
+kind: OCIRepository
+metadata:
+  name: loki
+  namespace: flux-system
+spec:
+  interval: 10m
+  timeout: 5m
+  url: oci://ghcr.io/grafana/helm-charts/loki
+  ref:
+    tag: 6.46.0
+  layerSelector:
+    mediaType: application/vnd.cncf.helm.chart.content.v1.tar+gzip
+    operation: copy
@@ -0,0 +1,14 @@
+apiVersion: source.toolkit.fluxcd.io/v1
+kind: OCIRepository
+metadata:
+  name: promtail
+  namespace: flux-system
+spec:
+  interval: 10m
+  timeout: 5m
+  url: oci://ghcr.io/grafana/helm-charts/promtail
+  ref:
+    tag: 6.16.6
+  layerSelector:
+    mediaType: application/vnd.cncf.helm.chart.content.v1.tar+gzip
+    operation: copy
@@ -1,17 +0,0 @@
-apiVersion: resources.cattle.io/v1
-kind: Backup
-metadata:
-  name: rancher-b2-recurring
-  namespace: cattle-resources-system
-spec:
-  resourceSetName: rancher-resource-set-full
-  storageLocation:
-    s3:
-      credentialSecretName: rancher-b2-creds
-      credentialSecretNamespace: cattle-resources-system
-      bucketName: HetznerTerra
-      folder: rancher-backups
-      endpoint: s3.us-east-005.backblazeb2.com
-      region: us-east-005
-  schedule: "0 3 * * *"
-  retentionCount: 7
@@ -1,19 +0,0 @@
-# Uncomment and set backupFilename to restore from a specific backup on rebuild.
-# Find the latest backup filename in B2: rancher-backups/ folder.
-# After restore succeeds, Rancher will have all users/settings from the backup.
-#
-# apiVersion: resources.cattle.io/v1
-# kind: Restore
-# metadata:
-#   name: restore-from-b2
-#   namespace: cattle-resources-system
-# spec:
-#   backupFilename: rancher-b2-manual-test-0a416444-2c8a-4d34-8a07-d9e406750374-2026-03-30T00-08-02Z.tar.gz
-#   storageLocation:
-#     s3:
-#       credentialSecretName: rancher-b2-creds
-#       credentialSecretNamespace: cattle-resources-system
-#       bucketName: HetznerTerra
-#       folder: rancher-backups
-#       endpoint: s3.us-east-005.backblazeb2.com
-#       region: us-east-005
@@ -1,25 +0,0 @@
-apiVersion: external-secrets.io/v1
-kind: ExternalSecret
-metadata:
-  name: rancher-b2-creds
-  namespace: cattle-resources-system
-spec:
-  refreshInterval: 1h
-  secretStoreRef:
-    name: doppler-hetznerterra
-    kind: ClusterSecretStore
-  target:
-    name: rancher-b2-creds
-    creationPolicy: Owner
-    template:
-      type: Opaque
-      data:
-        accessKey: "{{ .B2_ACCOUNT_ID }}"
-        secretKey: "{{ .B2_APPLICATION_KEY }}"
-  data:
-    - secretKey: B2_ACCOUNT_ID
-      remoteRef:
-        key: B2_ACCOUNT_ID
-    - secretKey: B2_APPLICATION_KEY
-      remoteRef:
-        key: B2_APPLICATION_KEY
@@ -1,23 +0,0 @@
-apiVersion: helm.toolkit.fluxcd.io/v2
-kind: HelmRelease
-metadata:
-  name: rancher-backup-crd
-  namespace: flux-system
-spec:
-  interval: 10m
-  targetNamespace: cattle-resources-system
-  chart:
-    spec:
-      chart: rancher-backup-crd
-      version: "106.0.2+up8.1.0"
-      sourceRef:
-        kind: HelmRepository
-        name: rancher-charts
-        namespace: flux-system
-  install:
-    createNamespace: true
-    remediation:
-      retries: 3
-  upgrade:
-    remediation:
-      retries: 3
@@ -1,42 +0,0 @@
-apiVersion: helm.toolkit.fluxcd.io/v2
-kind: HelmRelease
-metadata:
-  name: rancher-backup
-  namespace: flux-system
-spec:
-  interval: 10m
-  targetNamespace: cattle-resources-system
-  dependsOn:
-    - name: rancher-backup-crd
-  chart:
-    spec:
-      chart: rancher-backup
-      version: "106.0.2+up8.1.0"
-      sourceRef:
-        kind: HelmRepository
-        name: rancher-charts
-        namespace: flux-system
-  install:
-    createNamespace: true
-    remediation:
-      retries: 3
-  upgrade:
-    remediation:
-      retries: 3
-  values:
-    image:
-      repository: rancher/backup-restore-operator
-    kubectl:
-      image:
-        repository: rancher/kubectl
-        tag: "v1.34.0"
-  postRenderers:
-    - kustomize:
-        patches:
-          - target:
-              kind: Job
-              name: rancher-backup-patch-sa
-            patch: |
-              - op: replace
-                path: /spec/template/spec/containers/0/image
-                value: rancher/kubectl:v1.34.0
@@ -1,8 +0,0 @@
-apiVersion: source.toolkit.fluxcd.io/v1
-kind: HelmRepository
-metadata:
-  name: rancher-charts
-  namespace: flux-system
-spec:
-  interval: 1h
-  url: https://charts.rancher.io
@@ -1,8 +0,0 @@
-apiVersion: kustomize.config.k8s.io/v1beta1
-kind: Kustomization
-resources:
-  - namespace.yaml
-  - helmrepository-rancher-backup.yaml
-  - helmrelease-rancher-backup-crd.yaml
-  - helmrelease-rancher-backup.yaml
-  - b2-credentials-externalsecret.yaml
@@ -1,4 +0,0 @@
-apiVersion: v1
-kind: Namespace
-metadata:
-  name: cattle-resources-system
@@ -0,0 +1,6 @@
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+resources:
+  - namespace.yaml
+  - rancher-bootstrap-password-flux-externalsecret.yaml
+  - rancher-bootstrap-password-externalsecret.yaml
@@ -14,8 +14,8 @@ spec:
    template:
      type: Opaque
      data:
-        bootstrapPassword: "{{ .RANCHER_BOOTSTRAP_PASSWORD }}"
+        bootstrapPassword: "{{ .rancherBootstrapPassword }}"
  data:
-    - secretKey: RANCHER_BOOTSTRAP_PASSWORD
+    - secretKey: rancherBootstrapPassword
      remoteRef:
        key: RANCHER_BOOTSTRAP_PASSWORD
@@ -5,14 +5,14 @@ metadata:
  namespace: flux-system
 spec:
  interval: 10m
+  timeout: 15m
  targetNamespace: cattle-system
  chart:
    spec:
-      chart: rancher
-      version: "2.13.3"
+      chart: ./infrastructure/charts/rancher
      sourceRef:
-        kind: HelmRepository
-        name: rancher-stable
+        kind: GitRepository
+        name: platform
        namespace: flux-system
  install:
    createNamespace: true
@@ -23,10 +23,19 @@ spec:
      retries: 3
  values:
    hostname: rancher.silverside-gopher.ts.net
+    systemDefaultRegistry: registry.rancher.com
+    useBundledSystemChart: true
    replicas: 1
    extraEnv:
      - name: CATTLE_PROMETHEUS_METRICS
        value: "true"
+      - name: CATTLE_FEATURES
+        value: "managed-system-upgrade-controller=false"
+    webhook:
+      image:
+        repository: rancher/rancher-webhook
+        tag: v0.9.3
+        imagePullPolicy: IfNotPresent
    resources:
      requests:
        cpu: 500m
@@ -34,6 +43,10 @@ spec:
      limits:
        cpu: 1000m
        memory: 1Gi
+    startupProbe:
+      timeoutSeconds: 5
+      periodSeconds: 10
+      failureThreshold: 60
    affinity:
      nodeAffinity:
        requiredDuringSchedulingIgnoredDuringExecution:
@@ -1,8 +0,0 @@
-apiVersion: source.toolkit.fluxcd.io/v1
-kind: HelmRepository
-metadata:
-  name: rancher-stable
-  namespace: flux-system
-spec:
-  interval: 1h
-  url: https://releases.rancher.com/server-charts/stable
@@ -1,9 +1,5 @@
 apiVersion: kustomize.config.k8s.io/v1beta1
 kind: Kustomization
 resources:
-  - namespace.yaml
-  - helmrepository-rancher.yaml
  - helmrelease-rancher.yaml
-  - rancher-bootstrap-password-flux-externalsecret.yaml
-  - rancher-bootstrap-password-externalsecret.yaml
  - rancher-tailscale-service.yaml
@@ -8,11 +8,10 @@ spec:
  targetNamespace: tailscale-system
  chart:
    spec:
-      chart: tailscale-operator
-      version: 1.96.5
+      chart: ./infrastructure/charts/tailscale-operator
      sourceRef:
-        kind: HelmRepository
-        name: tailscale
+        kind: GitRepository
+        name: platform
        namespace: flux-system
  install:
    createNamespace: true
@@ -28,6 +27,10 @@ spec:
    operatorConfig:
      defaultTags:
        - tag:k8s
+      image:
+        repository: ghcr.io/tailscale/k8s-operator
+        tag: v1.96.5
+        pullPolicy: IfNotPresent
      nodeSelector:
        kubernetes.io/hostname: k8s-cluster-cp-1
      tolerations:
@@ -37,3 +40,6 @@ spec:
    proxyConfig:
      defaultTags: tag:k8s
      defaultProxyClass: infra-stable
+      image:
+        repository: ghcr.io/tailscale/tailscale
+        tag: v1.96.5
@@ -1,8 +0,0 @@
-apiVersion: source.toolkit.fluxcd.io/v1
-kind: HelmRepository
-metadata:
-  name: tailscale
-  namespace: flux-system
-spec:
-  interval: 1h
-  url: https://pkgs.tailscale.com/helmcharts
@@ -2,5 +2,4 @@ apiVersion: kustomize.config.k8s.io/v1beta1
 kind: Kustomization
 resources:
  - namespace.yaml
-  - helmrepository-tailscale.yaml
  - helmrelease-tailscale-operator.yaml
@@ -8,19 +8,20 @@ spec:
  targetNamespace: kube-system
  chart:
    spec:
-      chart: traefik
-      version: "39.0.0"
+      chart: ./infrastructure/charts/traefik
      sourceRef:
-        kind: HelmRepository
-        name: traefik
+        kind: GitRepository
+        name: platform
        namespace: flux-system
  install:
    createNamespace: true
+    timeout: 15m
    remediation:
-      retries: 3
+      retries: 10
  upgrade:
+    timeout: 15m
    remediation:
-      retries: 3
+      retries: 10
  values:
    additionalArguments:
      - "--entryPoints.flux.address=:9001/tcp"
@@ -1,9 +0,0 @@
-apiVersion: source.toolkit.fluxcd.io/v1
-kind: HelmRepository
-metadata:
-  name: traefik
-  namespace: flux-system
-spec:
-  interval: 10m
-  url: https://traefik.github.io/charts
-  provider: generic
@@ -1,5 +1,4 @@
 apiVersion: kustomize.config.k8s.io/v1beta1
 kind: Kustomization
 resources:
-  - helmrepository-traefik.yaml
  - helmrelease-traefik.yaml
--- a/Show More
+++ b/Show More