feat: migrate cluster baseline from Hetzner to Proxmox
Deploy Cluster / Terraform (push) Failing after 52s
Deploy Cluster / Ansible (push) Has been skipped
Deploy Grafana Content / Grafana Content (push) Failing after 1m37s

Replace Hetzner infrastructure and cloud-provider assumptions with Proxmox
VM clones, kube-vip API HA, and NFS-backed storage. Update bootstrap,
Flux addons, CI workflows, and docs to target the new private Proxmox
baseline while preserving the existing Tailscale, Doppler, Flux, Rancher,
and B2 backup flows.
This commit is contained in:
2026-04-22 03:02:13 +00:00
parent 6c6b9d20ca
commit b1dae28aa5
40 changed files with 577 additions and 784 deletions
+1 -1
View File
@@ -13,7 +13,7 @@ control_plane
workers
[cluster:vars]
ansible_user=root
ansible_user=ubuntu
ansible_python_interpreter=/usr/bin/python3
ansible_ssh_private_key_file={{ private_key_file }}
k3s_version=latest
@@ -1,14 +1,4 @@
---
- name: Apply Hetzner cloud secret
shell: >-
kubectl -n kube-system create secret generic hcloud
--from-literal=token='{{ hcloud_token }}'
--from-literal=network='{{ cluster_name }}-network'
--dry-run=client -o yaml | kubectl apply -f -
changed_when: true
no_log: true
when: hcloud_token | default('') | length > 0
- name: Ensure Tailscale operator namespace exists
command: >-
kubectl create namespace {{ tailscale_operator_namespace | default('tailscale-system') }}
-82
View File
@@ -1,82 +0,0 @@
---
- name: Check if hcloud secret exists
command: kubectl -n kube-system get secret hcloud
register: hcloud_secret_check
changed_when: false
failed_when: false
- name: Fail if hcloud secret is missing
fail:
msg: "hcloud secret not found in kube-system namespace. CCM requires it."
when: hcloud_secret_check.rc != 0
- name: Check if helm is installed
command: which helm
register: helm_check
changed_when: false
failed_when: false
- name: Install helm
when: helm_check.rc != 0
block:
- name: Download helm install script
get_url:
url: https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3
dest: /tmp/get-helm-3.sh
mode: "0755"
- name: Run helm install script
command: /tmp/get-helm-3.sh
args:
creates: /usr/local/bin/helm
- name: Add Hetzner Helm repository
kubernetes.core.helm_repository:
name: hcloud
repo_url: https://charts.hetzner.cloud
kubeconfig: /etc/rancher/k3s/k3s.yaml
environment:
KUBECONFIG: /etc/rancher/k3s/k3s.yaml
- name: Deploy Hetzner Cloud Controller Manager
kubernetes.core.helm:
name: hcloud-cloud-controller-manager
chart_ref: hcloud/hcloud-cloud-controller-manager
release_namespace: kube-system
create_namespace: true
values:
networking:
enabled: true
nodeSelector:
kubernetes.io/hostname: "{{ inventory_hostname }}"
additionalTolerations:
- key: node-role.kubernetes.io/control-plane
operator: Exists
effect: NoSchedule
kubeconfig: /etc/rancher/k3s/k3s.yaml
wait: true
wait_timeout: 300s
environment:
KUBECONFIG: /etc/rancher/k3s/k3s.yaml
- name: Wait for CCM to be ready
command: kubectl -n kube-system rollout status deployment/hcloud-cloud-controller-manager --timeout=120s
changed_when: false
register: ccm_rollout
until: ccm_rollout.rc == 0
retries: 3
delay: 10
- name: Pause to ensure CCM is fully ready to process new nodes
pause:
seconds: 10
- name: Verify CCM is removing uninitialized taints
command: kubectl get nodes -o jsonpath='{.items[*].spec.taints[?(@.key=="node.cloudprovider.kubernetes.io/uninitialized")].key}'
register: uninitialized_taints
changed_when: false
failed_when: false
- name: Display taint status
debug:
msg: "Nodes with uninitialized taint: {{ uninitialized_taints.stdout }}"
+1
View File
@@ -19,6 +19,7 @@
- lsb-release
- software-properties-common
- jq
- nfs-common
- htop
- vim
state: present
+2 -1
View File
@@ -3,4 +3,5 @@ k3s_version: latest
k3s_server_url: ""
k3s_token: ""
k3s_node_ip: ""
k3s_kubelet_cloud_provider_external: true
k3s_kubelet_cloud_provider_external: false
k3s_flannel_iface: ens18
+1 -1
View File
@@ -22,7 +22,7 @@
command: >-
/tmp/install-k3s.sh agent
--node-ip {{ k3s_node_ip }}
--flannel-iface=enp7s0
--flannel-iface={{ k3s_flannel_iface }}
{% if k3s_kubelet_cloud_provider_external | bool %}--kubelet-arg=cloud-provider=external{% endif %}
args:
creates: /usr/local/bin/k3s-agent
+3 -2
View File
@@ -3,9 +3,10 @@ k3s_version: latest
k3s_token: ""
k3s_node_ip: ""
k3s_primary_public_ip: ""
k3s_disable_embedded_ccm: true
k3s_disable_embedded_ccm: false
k3s_disable_servicelb: true
k3s_kubelet_cloud_provider_external: true
k3s_kubelet_cloud_provider_external: false
k3s_flannel_iface: ens18
# Load Balancer endpoint for HA cluster joins (set in inventory)
kube_api_endpoint: ""
# Tailscale DNS names for control planes (to enable tailnet access)
+2 -2
View File
@@ -61,7 +61,7 @@
--cluster-init
--advertise-address={{ k3s_primary_ip }}
--node-ip={{ k3s_node_ip }}
--flannel-iface=enp7s0
--flannel-iface={{ k3s_flannel_iface }}
--tls-san={{ k3s_primary_ip }}
--tls-san={{ k3s_primary_public_ip }}
--tls-san={{ kube_api_endpoint }}
@@ -87,7 +87,7 @@
--server https://{{ k3s_join_endpoint | default(k3s_primary_ip) }}:6443
--advertise-address={{ k3s_node_ip }}
--node-ip={{ k3s_node_ip }}
--flannel-iface=enp7s0
--flannel-iface={{ k3s_flannel_iface }}
{% if k3s_disable_embedded_ccm | bool %}--disable-cloud-controller{% endif %}
{% if k3s_disable_servicelb | bool %}--disable=servicelb{% endif %}
{% if k3s_kubelet_cloud_provider_external | bool %}--kubelet-arg=cloud-provider=external{% endif %}
@@ -0,0 +1,4 @@
---
kube_vip_version: v1.1.2
kube_vip_interface: ens18
kube_vip_address: "{{ kube_api_endpoint }}"
@@ -0,0 +1,21 @@
---
- name: Render kube-vip control plane manifest
template:
src: kube-vip-control-plane.yaml.j2
dest: /tmp/kube-vip-control-plane.yaml
mode: "0644"
- name: Apply kube-vip control plane manifest
command: kubectl apply -f /tmp/kube-vip-control-plane.yaml
changed_when: true
- name: Wait for kube-vip DaemonSet rollout
command: kubectl -n kube-system rollout status daemonset/kube-vip --timeout=180s
changed_when: false
- name: Wait for API VIP on 6443
wait_for:
host: "{{ kube_vip_address }}"
port: 6443
state: started
timeout: 180
@@ -0,0 +1,110 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: kube-vip
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: system:kube-vip-role
rules:
- apiGroups: [""]
resources: ["services/status"]
verbs: ["update"]
- apiGroups: [""]
resources: ["services", "endpoints"]
verbs: ["list", "get", "watch", "update"]
- apiGroups: [""]
resources: ["nodes"]
verbs: ["list", "get", "watch", "update", "patch"]
- apiGroups: ["coordination.k8s.io"]
resources: ["leases"]
verbs: ["list", "get", "watch", "update", "create"]
- apiGroups: ["discovery.k8s.io"]
resources: ["endpointslices"]
verbs: ["list", "get", "watch", "update"]
- apiGroups: [""]
resources: ["pods"]
verbs: ["list"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: system:kube-vip-binding
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: system:kube-vip-role
subjects:
- kind: ServiceAccount
name: kube-vip
namespace: kube-system
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: kube-vip
namespace: kube-system
spec:
selector:
matchLabels:
app.kubernetes.io/name: kube-vip
template:
metadata:
labels:
app.kubernetes.io/name: kube-vip
spec:
serviceAccountName: kube-vip
hostNetwork: true
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: node-role.kubernetes.io/control-plane
operator: Exists
tolerations:
- key: node-role.kubernetes.io/control-plane
operator: Exists
effect: NoSchedule
- key: node-role.kubernetes.io/master
operator: Exists
effect: NoSchedule
containers:
- name: kube-vip
image: ghcr.io/kube-vip/kube-vip:{{ kube_vip_version }}
imagePullPolicy: IfNotPresent
args:
- manager
env:
- name: vip_arp
value: "true"
- name: port
value: "6443"
- name: vip_interface
value: {{ kube_vip_interface | quote }}
- name: vip_subnet
value: "32"
- name: cp_enable
value: "true"
- name: cp_namespace
value: kube-system
- name: vip_ddns
value: "false"
- name: vip_leaderelection
value: "true"
- name: vip_leaseduration
value: "5"
- name: vip_renewdeadline
value: "3"
- name: vip_retryperiod
value: "1"
- name: address
value: {{ kube_vip_address | quote }}
securityContext:
capabilities:
add:
- NET_ADMIN
- NET_RAW
- SYS_TIME
+2 -2
View File
@@ -57,12 +57,12 @@
roles:
- addon-secrets-bootstrap
- name: Deploy Hetzner CCM (required for workers with external cloud provider)
- name: Deploy kube-vip for API HA
hosts: control_plane[0]
become: true
roles:
- ccm-deploy
- kube-vip-deploy
- name: Setup secondary control planes
hosts: control_plane[1:]