feat: migrate cluster baseline from Hetzner to Proxmox
Deploy Cluster / Terraform (push) Failing after 52s
Deploy Cluster / Ansible (push) Has been skipped
Deploy Grafana Content / Grafana Content (push) Failing after 1m37s

Replace Hetzner infrastructure and cloud-provider assumptions with Proxmox
VM clones, kube-vip API HA, and NFS-backed storage. Update bootstrap,
Flux addons, CI workflows, and docs to target the new private Proxmox
baseline while preserving the existing Tailscale, Doppler, Flux, Rancher,
and B2 backup flows.
This commit is contained in:
2026-04-22 03:02:13 +00:00
parent 6c6b9d20ca
commit b1dae28aa5
40 changed files with 577 additions and 784 deletions
@@ -0,0 +1,4 @@
---
kube_vip_version: v1.1.2
kube_vip_interface: ens18
kube_vip_address: "{{ kube_api_endpoint }}"
@@ -0,0 +1,21 @@
---
- name: Render kube-vip control plane manifest
template:
src: kube-vip-control-plane.yaml.j2
dest: /tmp/kube-vip-control-plane.yaml
mode: "0644"
- name: Apply kube-vip control plane manifest
command: kubectl apply -f /tmp/kube-vip-control-plane.yaml
changed_when: true
- name: Wait for kube-vip DaemonSet rollout
command: kubectl -n kube-system rollout status daemonset/kube-vip --timeout=180s
changed_when: false
- name: Wait for API VIP on 6443
wait_for:
host: "{{ kube_vip_address }}"
port: 6443
state: started
timeout: 180
@@ -0,0 +1,110 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: kube-vip
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: system:kube-vip-role
rules:
- apiGroups: [""]
resources: ["services/status"]
verbs: ["update"]
- apiGroups: [""]
resources: ["services", "endpoints"]
verbs: ["list", "get", "watch", "update"]
- apiGroups: [""]
resources: ["nodes"]
verbs: ["list", "get", "watch", "update", "patch"]
- apiGroups: ["coordination.k8s.io"]
resources: ["leases"]
verbs: ["list", "get", "watch", "update", "create"]
- apiGroups: ["discovery.k8s.io"]
resources: ["endpointslices"]
verbs: ["list", "get", "watch", "update"]
- apiGroups: [""]
resources: ["pods"]
verbs: ["list"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: system:kube-vip-binding
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: system:kube-vip-role
subjects:
- kind: ServiceAccount
name: kube-vip
namespace: kube-system
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: kube-vip
namespace: kube-system
spec:
selector:
matchLabels:
app.kubernetes.io/name: kube-vip
template:
metadata:
labels:
app.kubernetes.io/name: kube-vip
spec:
serviceAccountName: kube-vip
hostNetwork: true
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: node-role.kubernetes.io/control-plane
operator: Exists
tolerations:
- key: node-role.kubernetes.io/control-plane
operator: Exists
effect: NoSchedule
- key: node-role.kubernetes.io/master
operator: Exists
effect: NoSchedule
containers:
- name: kube-vip
image: ghcr.io/kube-vip/kube-vip:{{ kube_vip_version }}
imagePullPolicy: IfNotPresent
args:
- manager
env:
- name: vip_arp
value: "true"
- name: port
value: "6443"
- name: vip_interface
value: {{ kube_vip_interface | quote }}
- name: vip_subnet
value: "32"
- name: cp_enable
value: "true"
- name: cp_namespace
value: kube-system
- name: vip_ddns
value: "false"
- name: vip_leaderelection
value: "true"
- name: vip_leaseduration
value: "5"
- name: vip_renewdeadline
value: "3"
- name: vip_retryperiod
value: "1"
- name: address
value: {{ kube_vip_address | quote }}
securityContext:
capabilities:
add:
- NET_ADMIN
- NET_RAW
- SYS_TIME