feat: migrate cluster baseline from Hetzner to Proxmox
Replace Hetzner infrastructure and cloud-provider assumptions with Proxmox VM clones, kube-vip API HA, and NFS-backed storage. Update bootstrap, Flux addons, CI workflows, and docs to target the new private Proxmox baseline while preserving the existing Tailscale, Doppler, Flux, Rancher, and B2 backup flows.
This commit is contained in:
@@ -12,12 +12,15 @@ on:
|
|||||||
|
|
||||||
env:
|
env:
|
||||||
TF_VERSION: "1.7.0"
|
TF_VERSION: "1.7.0"
|
||||||
TF_VAR_hcloud_token: ${{ secrets.HCLOUD_TOKEN }}
|
|
||||||
TF_VAR_s3_access_key: ${{ secrets.S3_ACCESS_KEY }}
|
TF_VAR_s3_access_key: ${{ secrets.S3_ACCESS_KEY }}
|
||||||
TF_VAR_s3_secret_key: ${{ secrets.S3_SECRET_KEY }}
|
TF_VAR_s3_secret_key: ${{ secrets.S3_SECRET_KEY }}
|
||||||
TF_VAR_s3_endpoint: ${{ secrets.S3_ENDPOINT }}
|
TF_VAR_s3_endpoint: ${{ secrets.S3_ENDPOINT }}
|
||||||
TF_VAR_s3_bucket: ${{ secrets.S3_BUCKET }}
|
TF_VAR_s3_bucket: ${{ secrets.S3_BUCKET }}
|
||||||
TF_VAR_tailscale_tailnet: ${{ secrets.TAILSCALE_TAILNET }}
|
TF_VAR_tailscale_tailnet: ${{ secrets.TAILSCALE_TAILNET }}
|
||||||
|
TF_VAR_proxmox_endpoint: ${{ secrets.PROXMOX_ENDPOINT }}
|
||||||
|
TF_VAR_proxmox_api_token_id: ${{ secrets.PROXMOX_API_TOKEN_ID }}
|
||||||
|
TF_VAR_proxmox_api_token_secret: ${{ secrets.PROXMOX_API_TOKEN_SECRET }}
|
||||||
|
TF_VAR_proxmox_insecure: "true"
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
dashboards:
|
dashboards:
|
||||||
@@ -51,25 +54,6 @@ jobs:
|
|||||||
-backend-config="secret_key=${{ secrets.S3_SECRET_KEY }}" \
|
-backend-config="secret_key=${{ secrets.S3_SECRET_KEY }}" \
|
||||||
-backend-config="skip_requesting_account_id=true"
|
-backend-config="skip_requesting_account_id=true"
|
||||||
|
|
||||||
- name: Detect runner egress IP
|
|
||||||
run: |
|
|
||||||
RUNNER_IP=$(curl -fsSL https://api.ipify.org)
|
|
||||||
echo "RUNNER_CIDR=[\"${RUNNER_IP}/32\"]" >> "$GITHUB_ENV"
|
|
||||||
echo "Runner egress IP: ${RUNNER_IP}"
|
|
||||||
|
|
||||||
- name: Open SSH/API for current runner CIDR
|
|
||||||
working-directory: terraform
|
|
||||||
run: |
|
|
||||||
terraform apply \
|
|
||||||
-refresh=false \
|
|
||||||
-target=hcloud_firewall.cluster \
|
|
||||||
-var="hcloud_token=${{ secrets.HCLOUD_TOKEN }}" \
|
|
||||||
-var="ssh_public_key=$HOME/.ssh/id_ed25519.pub" \
|
|
||||||
-var="ssh_private_key=$HOME/.ssh/id_ed25519" \
|
|
||||||
-var="allowed_ssh_ips=${RUNNER_CIDR}" \
|
|
||||||
-var="allowed_api_ips=${RUNNER_CIDR}" \
|
|
||||||
-auto-approve
|
|
||||||
|
|
||||||
- name: Install Python Dependencies
|
- name: Install Python Dependencies
|
||||||
run: |
|
run: |
|
||||||
apt-get update && apt-get install -y python3-pip
|
apt-get update && apt-get install -y python3-pip
|
||||||
|
|||||||
@@ -11,12 +11,15 @@ on:
|
|||||||
|
|
||||||
env:
|
env:
|
||||||
TF_VERSION: "1.7.0"
|
TF_VERSION: "1.7.0"
|
||||||
TF_VAR_hcloud_token: ${{ secrets.HCLOUD_TOKEN }}
|
|
||||||
TF_VAR_s3_access_key: ${{ secrets.S3_ACCESS_KEY }}
|
TF_VAR_s3_access_key: ${{ secrets.S3_ACCESS_KEY }}
|
||||||
TF_VAR_s3_secret_key: ${{ secrets.S3_SECRET_KEY }}
|
TF_VAR_s3_secret_key: ${{ secrets.S3_SECRET_KEY }}
|
||||||
TF_VAR_s3_endpoint: ${{ secrets.S3_ENDPOINT }}
|
TF_VAR_s3_endpoint: ${{ secrets.S3_ENDPOINT }}
|
||||||
TF_VAR_s3_bucket: ${{ secrets.S3_BUCKET }}
|
TF_VAR_s3_bucket: ${{ secrets.S3_BUCKET }}
|
||||||
TF_VAR_tailscale_tailnet: ${{ secrets.TAILSCALE_TAILNET }}
|
TF_VAR_tailscale_tailnet: ${{ secrets.TAILSCALE_TAILNET }}
|
||||||
|
TF_VAR_proxmox_endpoint: ${{ secrets.PROXMOX_ENDPOINT }}
|
||||||
|
TF_VAR_proxmox_api_token_id: ${{ secrets.PROXMOX_API_TOKEN_ID }}
|
||||||
|
TF_VAR_proxmox_api_token_secret: ${{ secrets.PROXMOX_API_TOKEN_SECRET }}
|
||||||
|
TF_VAR_proxmox_insecure: "true"
|
||||||
TS_OAUTH_CLIENT_ID: ${{ secrets.TAILSCALE_OAUTH_CLIENT_ID }}
|
TS_OAUTH_CLIENT_ID: ${{ secrets.TAILSCALE_OAUTH_CLIENT_ID }}
|
||||||
TS_OAUTH_CLIENT_SECRET: ${{ secrets.TAILSCALE_OAUTH_CLIENT_SECRET }}
|
TS_OAUTH_CLIENT_SECRET: ${{ secrets.TAILSCALE_OAUTH_CLIENT_SECRET }}
|
||||||
|
|
||||||
@@ -60,40 +63,6 @@ jobs:
|
|||||||
echo "${{ secrets.SSH_PUBLIC_KEY }}" > ~/.ssh/id_ed25519.pub
|
echo "${{ secrets.SSH_PUBLIC_KEY }}" > ~/.ssh/id_ed25519.pub
|
||||||
chmod 644 ~/.ssh/id_ed25519.pub
|
chmod 644 ~/.ssh/id_ed25519.pub
|
||||||
|
|
||||||
- name: Install jq
|
|
||||||
run: |
|
|
||||||
apt-get update
|
|
||||||
apt-get install -y jq
|
|
||||||
|
|
||||||
- name: Import existing servers into state (if missing)
|
|
||||||
working-directory: terraform
|
|
||||||
env:
|
|
||||||
HCLOUD_TOKEN: ${{ secrets.HCLOUD_TOKEN }}
|
|
||||||
run: |
|
|
||||||
set -e
|
|
||||||
ensure_import() {
|
|
||||||
address="$1"
|
|
||||||
name="$2"
|
|
||||||
if terraform state show "$address" >/dev/null 2>&1; then
|
|
||||||
echo "$address already in state"
|
|
||||||
return
|
|
||||||
fi
|
|
||||||
id=$(curl -sS -H "Authorization: Bearer ${HCLOUD_TOKEN}" "https://api.hetzner.cloud/v1/servers?name=${name}" | jq -r '.servers[0].id // empty')
|
|
||||||
if [ -n "$id" ]; then
|
|
||||||
echo "Importing $address from server $name ($id)"
|
|
||||||
terraform import "$address" "$id"
|
|
||||||
else
|
|
||||||
echo "No existing server found for $name; skipping import"
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
ensure_import 'hcloud_server.control_plane[0]' 'k8s-cluster-cp-1'
|
|
||||||
ensure_import 'hcloud_server.control_plane[1]' 'k8s-cluster-cp-2'
|
|
||||||
ensure_import 'hcloud_server.control_plane[2]' 'k8s-cluster-cp-3'
|
|
||||||
ensure_import 'hcloud_server.workers[0]' 'k8s-cluster-worker-1'
|
|
||||||
ensure_import 'hcloud_server.workers[1]' 'k8s-cluster-worker-2'
|
|
||||||
ensure_import 'hcloud_server.workers[2]' 'k8s-cluster-worker-3'
|
|
||||||
|
|
||||||
- name: Terraform Plan
|
- name: Terraform Plan
|
||||||
id: plan
|
id: plan
|
||||||
working-directory: terraform
|
working-directory: terraform
|
||||||
@@ -187,32 +156,11 @@ jobs:
|
|||||||
mkdir -p ../outputs
|
mkdir -p ../outputs
|
||||||
terraform output -json > ../outputs/terraform_outputs.json
|
terraform output -json > ../outputs/terraform_outputs.json
|
||||||
|
|
||||||
- name: Detect runner egress IP
|
|
||||||
run: |
|
|
||||||
RUNNER_IP=$(curl -fsSL https://api.ipify.org)
|
|
||||||
echo "RUNNER_CIDR=[\"${RUNNER_IP}/32\"]" >> "$GITHUB_ENV"
|
|
||||||
echo "Runner egress IP: ${RUNNER_IP}"
|
|
||||||
|
|
||||||
- name: Open SSH/API for current runner CIDR
|
|
||||||
working-directory: terraform
|
|
||||||
run: |
|
|
||||||
terraform apply \
|
|
||||||
-target=hcloud_firewall.cluster \
|
|
||||||
-var="ssh_public_key=$HOME/.ssh/id_ed25519.pub" \
|
|
||||||
-var="ssh_private_key=$HOME/.ssh/id_ed25519" \
|
|
||||||
-var="allowed_ssh_ips=${RUNNER_CIDR}" \
|
|
||||||
-var="allowed_api_ips=${RUNNER_CIDR}" \
|
|
||||||
-auto-approve
|
|
||||||
|
|
||||||
- name: Install Python Dependencies
|
- name: Install Python Dependencies
|
||||||
run: |
|
run: |
|
||||||
apt-get update && apt-get install -y python3-pip
|
apt-get update && apt-get install -y python3-pip
|
||||||
pip3 install --break-system-packages ansible kubernetes jinja2 pyyaml
|
pip3 install --break-system-packages ansible kubernetes jinja2 pyyaml
|
||||||
|
|
||||||
- name: Note runner connectivity mode
|
|
||||||
run: |
|
|
||||||
echo "Using runner public network access with RUNNER_ALLOWED_CIDRS for SSH/API"
|
|
||||||
|
|
||||||
- name: Install Ansible Collections
|
- name: Install Ansible Collections
|
||||||
run: ansible-galaxy collection install -r ansible/requirements.yml
|
run: ansible-galaxy collection install -r ansible/requirements.yml
|
||||||
|
|
||||||
@@ -224,7 +172,6 @@ jobs:
|
|||||||
working-directory: ansible
|
working-directory: ansible
|
||||||
run: |
|
run: |
|
||||||
ansible-playbook site.yml \
|
ansible-playbook site.yml \
|
||||||
-e "hcloud_token=${{ secrets.HCLOUD_TOKEN }}" \
|
|
||||||
-e "tailscale_auth_key=${{ secrets.TAILSCALE_AUTH_KEY }}" \
|
-e "tailscale_auth_key=${{ secrets.TAILSCALE_AUTH_KEY }}" \
|
||||||
-e "tailscale_tailnet=${{ secrets.TAILSCALE_TAILNET }}" \
|
-e "tailscale_tailnet=${{ secrets.TAILSCALE_TAILNET }}" \
|
||||||
-e "tailscale_oauth_client_id=${{ secrets.TAILSCALE_OAUTH_CLIENT_ID }}" \
|
-e "tailscale_oauth_client_id=${{ secrets.TAILSCALE_OAUTH_CLIENT_ID }}" \
|
||||||
@@ -294,9 +241,8 @@ jobs:
|
|||||||
key: dopplerToken
|
key: dopplerToken
|
||||||
namespace: external-secrets
|
namespace: external-secrets
|
||||||
EOF
|
EOF
|
||||||
# Wait for CCM and CSI (Hetzner cloud integration)
|
# Wait for the storage layer and private access components
|
||||||
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-ccm --timeout=600s
|
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-nfs-storage --timeout=600s
|
||||||
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-csi --timeout=600s
|
|
||||||
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-tailscale-operator --timeout=300s
|
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-tailscale-operator --timeout=300s
|
||||||
|
|
||||||
- name: Wait for Rancher and backup operator
|
- name: Wait for Rancher and backup operator
|
||||||
@@ -397,10 +343,9 @@ jobs:
|
|||||||
working-directory: ansible
|
working-directory: ansible
|
||||||
run: |
|
run: |
|
||||||
ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl get nodes -o wide"
|
ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl get nodes -o wide"
|
||||||
ansible -i inventory.ini 'control_plane[0]' -m shell -a "kubectl describe nodes | grep -E 'Name:|providerID:'"
|
|
||||||
ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl -n flux-system get gitrepositories,kustomizations,helmreleases"
|
ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl -n flux-system get gitrepositories,kustomizations,helmreleases"
|
||||||
ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl -n kube-system get pods -o wide"
|
ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl -n kube-system get pods -o wide"
|
||||||
ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl get storageclass"
|
ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl get storageclass flash-nfs"
|
||||||
ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl -n tailscale-system get pods -o wide"
|
ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl -n tailscale-system get pods -o wide"
|
||||||
ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl -n external-secrets get pods"
|
ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl -n external-secrets get pods"
|
||||||
env:
|
env:
|
||||||
|
|||||||
+13
-123
@@ -10,107 +10,22 @@ on:
|
|||||||
|
|
||||||
env:
|
env:
|
||||||
TF_VERSION: "1.7.0"
|
TF_VERSION: "1.7.0"
|
||||||
TF_VAR_hcloud_token: ${{ secrets.HCLOUD_TOKEN }}
|
|
||||||
TF_VAR_s3_access_key: ${{ secrets.S3_ACCESS_KEY }}
|
TF_VAR_s3_access_key: ${{ secrets.S3_ACCESS_KEY }}
|
||||||
TF_VAR_s3_secret_key: ${{ secrets.S3_SECRET_KEY }}
|
TF_VAR_s3_secret_key: ${{ secrets.S3_SECRET_KEY }}
|
||||||
TF_VAR_s3_endpoint: ${{ secrets.S3_ENDPOINT }}
|
TF_VAR_s3_endpoint: ${{ secrets.S3_ENDPOINT }}
|
||||||
TF_VAR_s3_bucket: ${{ secrets.S3_BUCKET }}
|
TF_VAR_s3_bucket: ${{ secrets.S3_BUCKET }}
|
||||||
TF_VAR_tailscale_tailnet: ${{ secrets.TAILSCALE_TAILNET }}
|
TF_VAR_tailscale_tailnet: ${{ secrets.TAILSCALE_TAILNET }}
|
||||||
B2_ACCOUNT_ID: ${{ secrets.B2_ACCOUNT_ID }}
|
TF_VAR_proxmox_endpoint: ${{ secrets.PROXMOX_ENDPOINT }}
|
||||||
B2_APPLICATION_KEY: ${{ secrets.B2_APPLICATION_KEY }}
|
TF_VAR_proxmox_api_token_id: ${{ secrets.PROXMOX_API_TOKEN_ID }}
|
||||||
|
TF_VAR_proxmox_api_token_secret: ${{ secrets.PROXMOX_API_TOKEN_SECRET }}
|
||||||
|
TF_VAR_proxmox_insecure: "true"
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
pre-destroy-backup:
|
|
||||||
name: Pre-Destroy Backup
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
if: github.event.inputs.confirm == 'destroy'
|
|
||||||
environment: destroy
|
|
||||||
steps:
|
|
||||||
- name: Checkout
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
|
|
||||||
- name: Setup Terraform
|
|
||||||
uses: hashicorp/setup-terraform@v3
|
|
||||||
with:
|
|
||||||
terraform_version: ${{ env.TF_VERSION }}
|
|
||||||
|
|
||||||
- name: Terraform Init
|
|
||||||
working-directory: terraform
|
|
||||||
run: |
|
|
||||||
terraform init \
|
|
||||||
-backend-config="endpoint=${{ secrets.S3_ENDPOINT }}" \
|
|
||||||
-backend-config="bucket=${{ secrets.S3_BUCKET }}" \
|
|
||||||
-backend-config="region=auto" \
|
|
||||||
-backend-config="access_key=${{ secrets.S3_ACCESS_KEY }}" \
|
|
||||||
-backend-config="secret_key=${{ secrets.S3_SECRET_KEY }}" \
|
|
||||||
-backend-config="skip_requesting_account_id=true"
|
|
||||||
|
|
||||||
- name: Setup SSH Keys
|
|
||||||
run: |
|
|
||||||
mkdir -p ~/.ssh
|
|
||||||
echo "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/id_ed25519
|
|
||||||
chmod 600 ~/.ssh/id_ed25519
|
|
||||||
echo "${{ secrets.SSH_PUBLIC_KEY }}" > ~/.ssh/id_ed25519.pub
|
|
||||||
chmod 644 ~/.ssh/id_ed25519.pub
|
|
||||||
|
|
||||||
- name: Get Control Plane IP
|
|
||||||
id: cp_ip
|
|
||||||
working-directory: terraform
|
|
||||||
run: |
|
|
||||||
PRIMARY_IP=$(terraform output -raw primary_control_plane_ip)
|
|
||||||
echo "PRIMARY_IP=${PRIMARY_IP}" >> "$GITHUB_ENV"
|
|
||||||
|
|
||||||
- name: Pre-Destroy pg_dump to B2
|
|
||||||
run: |
|
|
||||||
set +e
|
|
||||||
echo "Attempting pre-destroy backup to B2..."
|
|
||||||
ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null root@${PRIMARY_IP} << 'EOF'
|
|
||||||
set -e
|
|
||||||
# Check if kubectl is available and cluster is up
|
|
||||||
if ! command -v kubectl &> /dev/null; then
|
|
||||||
echo "kubectl not found, skipping pre-destroy backup"
|
|
||||||
exit 0
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Check if we can reach the cluster
|
|
||||||
if ! kubectl cluster-info &> /dev/null; then
|
|
||||||
echo "Cannot reach cluster, skipping pre-destroy backup"
|
|
||||||
exit 0
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Check if CNP is deployed
|
|
||||||
if ! kubectl get namespace cnpg-cluster &> /dev/null; then
|
|
||||||
echo "CNP namespace not found, skipping pre-destroy backup"
|
|
||||||
exit 0
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Run backup using the pgdump image directly
|
|
||||||
BACKUP_FILE="rancher-backup-$(date +%Y%m%d-%H%M%S).sql.gz"
|
|
||||||
B2_ACCOUNT_ID="$(cat /etc/kubernetes/secret/b2_account_id 2>/dev/null || echo '')"
|
|
||||||
B2_APPLICATION_KEY="$(cat /etc/kubernetes/secret/b2_application_key 2>/dev/null || echo '')"
|
|
||||||
|
|
||||||
if [ -z "$B2_ACCOUNT_ID" ] || [ -z "$B2_APPLICATION_KEY" ]; then
|
|
||||||
echo "B2 credentials not found in secret, skipping pre-destroy backup"
|
|
||||||
exit 0
|
|
||||||
fi
|
|
||||||
|
|
||||||
kubectl run pgdump-manual --image=ghcr.io/cloudnative-pg/pgbackrest:latest --restart=Never \
|
|
||||||
-n cnpg-cluster --dry-run=client -o yaml | \
|
|
||||||
kubectl apply -f -
|
|
||||||
|
|
||||||
echo "Waiting for backup job to complete..."
|
|
||||||
kubectl wait --for=condition=complete job/pgdump-manual -n cnpg-cluster --timeout=300s || true
|
|
||||||
kubectl logs job/pgdump-manual -n cnpg-cluster || true
|
|
||||||
kubectl delete job pgdump-manual -n cnpg-cluster --ignore-not-found=true || true
|
|
||||||
EOF
|
|
||||||
echo "Pre-destroy backup step completed (failure is non-fatal)"
|
|
||||||
|
|
||||||
destroy:
|
destroy:
|
||||||
name: Destroy Cluster
|
name: Destroy Cluster
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
if: github.event.inputs.confirm == 'destroy'
|
if: github.event.inputs.confirm == 'destroy'
|
||||||
environment: destroy
|
environment: destroy
|
||||||
needs: pre-destroy-backup
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
@@ -120,6 +35,14 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
terraform_version: ${{ env.TF_VERSION }}
|
terraform_version: ${{ env.TF_VERSION }}
|
||||||
|
|
||||||
|
- name: Setup SSH Keys
|
||||||
|
run: |
|
||||||
|
mkdir -p ~/.ssh
|
||||||
|
echo "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/id_ed25519
|
||||||
|
chmod 600 ~/.ssh/id_ed25519
|
||||||
|
echo "${{ secrets.SSH_PUBLIC_KEY }}" > ~/.ssh/id_ed25519.pub
|
||||||
|
chmod 644 ~/.ssh/id_ed25519.pub
|
||||||
|
|
||||||
- name: Terraform Init
|
- name: Terraform Init
|
||||||
working-directory: terraform
|
working-directory: terraform
|
||||||
run: |
|
run: |
|
||||||
@@ -131,19 +54,6 @@ jobs:
|
|||||||
-backend-config="secret_key=${{ secrets.S3_SECRET_KEY }}" \
|
-backend-config="secret_key=${{ secrets.S3_SECRET_KEY }}" \
|
||||||
-backend-config="skip_requesting_account_id=true"
|
-backend-config="skip_requesting_account_id=true"
|
||||||
|
|
||||||
- name: Setup SSH Keys
|
|
||||||
run: |
|
|
||||||
mkdir -p ~/.ssh
|
|
||||||
echo "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/id_ed25519
|
|
||||||
chmod 600 ~/.ssh/id_ed25519
|
|
||||||
echo "${{ secrets.SSH_PUBLIC_KEY }}" > ~/.ssh/id_ed25519.pub
|
|
||||||
chmod 644 ~/.ssh/id_ed25519.pub
|
|
||||||
|
|
||||||
- name: Install jq
|
|
||||||
run: |
|
|
||||||
apt-get update
|
|
||||||
apt-get install -y jq
|
|
||||||
|
|
||||||
- name: Terraform Destroy
|
- name: Terraform Destroy
|
||||||
id: destroy
|
id: destroy
|
||||||
working-directory: terraform
|
working-directory: terraform
|
||||||
@@ -152,7 +62,6 @@ jobs:
|
|||||||
for attempt in 1 2 3; do
|
for attempt in 1 2 3; do
|
||||||
echo "Terraform destroy attempt ${attempt}/3"
|
echo "Terraform destroy attempt ${attempt}/3"
|
||||||
terraform destroy \
|
terraform destroy \
|
||||||
-var="hcloud_token=${{ secrets.HCLOUD_TOKEN }}" \
|
|
||||||
-var="ssh_public_key=$HOME/.ssh/id_ed25519.pub" \
|
-var="ssh_public_key=$HOME/.ssh/id_ed25519.pub" \
|
||||||
-var="ssh_private_key=$HOME/.ssh/id_ed25519" \
|
-var="ssh_private_key=$HOME/.ssh/id_ed25519" \
|
||||||
-auto-approve
|
-auto-approve
|
||||||
@@ -164,32 +73,13 @@ jobs:
|
|||||||
echo "Terraform destroy failed with exit code ${rc}; retrying in 30s"
|
echo "Terraform destroy failed with exit code ${rc}; retrying in 30s"
|
||||||
sleep 30
|
sleep 30
|
||||||
terraform refresh \
|
terraform refresh \
|
||||||
-var="hcloud_token=${{ secrets.HCLOUD_TOKEN }}" \
|
|
||||||
-var="ssh_public_key=$HOME/.ssh/id_ed25519.pub" \
|
-var="ssh_public_key=$HOME/.ssh/id_ed25519.pub" \
|
||||||
-var="ssh_private_key=$HOME/.ssh/id_ed25519" || true
|
-var="ssh_private_key=$HOME/.ssh/id_ed25519" || true
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
exit "$rc"
|
exit "$rc"
|
||||||
|
|
||||||
- name: Hetzner destroy diagnostics
|
- name: Terraform state diagnostics
|
||||||
if: failure() && steps.destroy.outcome == 'failure'
|
if: failure() && steps.destroy.outcome == 'failure'
|
||||||
env:
|
|
||||||
HCLOUD_TOKEN: ${{ secrets.HCLOUD_TOKEN }}
|
|
||||||
run: |
|
run: |
|
||||||
set +e
|
|
||||||
echo "== Terraform state list =="
|
|
||||||
terraform -chdir=terraform state list || true
|
terraform -chdir=terraform state list || true
|
||||||
|
|
||||||
network_id=$(terraform -chdir=terraform state show hcloud_network.cluster 2>/dev/null | awk '/^id *=/ {gsub(/"/, "", $3); print $3; exit}')
|
|
||||||
if [ -z "$network_id" ]; then
|
|
||||||
network_id="11988935"
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "== Hetzner network =="
|
|
||||||
curl -fsSL -H "Authorization: Bearer ${HCLOUD_TOKEN}" "https://api.hetzner.cloud/v1/networks/${network_id}" | jq . || true
|
|
||||||
|
|
||||||
echo "== Hetzner servers attached to network =="
|
|
||||||
curl -fsSL -H "Authorization: Bearer ${HCLOUD_TOKEN}" "https://api.hetzner.cloud/v1/servers" | jq --argjson id "$network_id" '.servers[] | select(any(.private_net[]?; .network == $id)) | {id, name, private_net}' || true
|
|
||||||
|
|
||||||
echo "== Hetzner load balancers attached to network =="
|
|
||||||
curl -fsSL -H "Authorization: Bearer ${HCLOUD_TOKEN}" "https://api.hetzner.cloud/v1/load_balancers" | jq --argjson id "$network_id" '.load_balancers[] | select(any(.private_net[]?; .network == $id)) | {id, name, private_net}' || true
|
|
||||||
|
|||||||
@@ -9,7 +9,9 @@ Repository guide for OpenCode sessions in this repo.
|
|||||||
|
|
||||||
## Current Baseline
|
## Current Baseline
|
||||||
|
|
||||||
- HA private cluster: 3 control planes, 3 workers.
|
- HA private cluster: 3 control planes, 5 workers on Proxmox.
|
||||||
|
- Proxmox clones come from template `9000` on node `flex`; API VIP is `10.27.27.40` via kube-vip.
|
||||||
|
- Storage is `nfs-subdir-external-provisioner` backed by `10.27.27.22:/TheFlash/k8s-nfs` with StorageClass `flash-nfs`.
|
||||||
- Tailscale is the private access path for Rancher and shared services.
|
- Tailscale is the private access path for Rancher and shared services.
|
||||||
- Rancher, Grafana, and Prometheus are exposed through Tailscale; Flux UI / Weave GitOps is removed.
|
- Rancher, Grafana, and Prometheus are exposed through Tailscale; Flux UI / Weave GitOps is removed.
|
||||||
- `apps/` is suspended by default.
|
- `apps/` is suspended by default.
|
||||||
@@ -20,8 +22,8 @@ Repository guide for OpenCode sessions in this repo.
|
|||||||
- Terraform: `terraform -chdir=terraform fmt -recursive`, `terraform -chdir=terraform validate`, `terraform -chdir=terraform plan -var-file=../terraform.tfvars`, `terraform -chdir=terraform apply -var-file=../terraform.tfvars`
|
- Terraform: `terraform -chdir=terraform fmt -recursive`, `terraform -chdir=terraform validate`, `terraform -chdir=terraform plan -var-file=../terraform.tfvars`, `terraform -chdir=terraform apply -var-file=../terraform.tfvars`
|
||||||
- Ansible: `ansible-galaxy collection install -r ansible/requirements.yml`, `cd ansible && python3 generate_inventory.py`, `ansible-playbook -i ansible/inventory.ini ansible/site.yml --syntax-check`, `ansible-playbook ansible/site.yml`
|
- Ansible: `ansible-galaxy collection install -r ansible/requirements.yml`, `cd ansible && python3 generate_inventory.py`, `ansible-playbook -i ansible/inventory.ini ansible/site.yml --syntax-check`, `ansible-playbook ansible/site.yml`
|
||||||
- Flux/Kustomize: `kubectl kustomize infrastructure/addons/<addon>`, `kubectl kustomize clusters/prod/flux-system`
|
- Flux/Kustomize: `kubectl kustomize infrastructure/addons/<addon>`, `kubectl kustomize clusters/prod/flux-system`
|
||||||
- Kubeconfig refresh: `scripts/refresh-kubeconfig.sh <cp1-public-ip>`
|
- Kubeconfig refresh: `scripts/refresh-kubeconfig.sh <cp1-ip>`
|
||||||
- Tailnet smoke check: `ssh root@<cp1-ip> 'bash -s' < scripts/smoke-check-tailnet-services.sh`
|
- Tailnet smoke check: `ssh ubuntu@<cp1-ip> 'bash -s' < scripts/smoke-check-tailnet-services.sh`
|
||||||
|
|
||||||
## Workflow Rules
|
## Workflow Rules
|
||||||
|
|
||||||
@@ -31,12 +33,14 @@ Repository guide for OpenCode sessions in this repo.
|
|||||||
- CI deploy order is Terraform -> Ansible -> Flux bootstrap -> Rancher restore -> health checks.
|
- CI deploy order is Terraform -> Ansible -> Flux bootstrap -> Rancher restore -> health checks.
|
||||||
- One object per Kubernetes YAML file; keep filenames kebab-case.
|
- One object per Kubernetes YAML file; keep filenames kebab-case.
|
||||||
- If `kubectl` points at `localhost:8080` after a rebuild, refresh kubeconfig from the primary control-plane IP.
|
- If `kubectl` points at `localhost:8080` after a rebuild, refresh kubeconfig from the primary control-plane IP.
|
||||||
|
- Bootstrap assumptions that matter: SSH user is `ubuntu`, NIC is `ens18`, API join endpoint is the kube-vip address.
|
||||||
|
|
||||||
## Repo-Specific Gotchas
|
## Repo-Specific Gotchas
|
||||||
|
|
||||||
- `rancher-backup` uses a postRenderer to swap the broken hook image to `rancher/kubectl:v1.34.0`; do not put S3 config in HelmRelease values. Put it in the Backup CR.
|
- `rancher-backup` uses a postRenderer to swap the broken hook image to `rancher/kubectl:v1.34.0`; do not put S3 config in HelmRelease values. Put it in the Backup CR.
|
||||||
- Tailscale cleanup only runs before service proxies exist; it removes stale offline `rancher`/`grafana`/`prometheus`/`flux` devices, then must stop so live proxies are not deleted.
|
- Tailscale cleanup only runs before service proxies exist; it removes stale offline `rancher`/`grafana`/`prometheus`/`flux` devices, then must stop so live proxies are not deleted.
|
||||||
- Keep the Tailscale operator on the stable Helm repo `https://pkgs.tailscale.com/helmcharts` at `1.96.5` unless you have a reason to change it.
|
- Keep the Tailscale operator on the stable Helm repo `https://pkgs.tailscale.com/helmcharts` at `1.96.5` unless you have a reason to change it.
|
||||||
|
- The repo no longer uses a cloud controller manager. If you see `providerID` or Hetzner-specific logic, it is stale.
|
||||||
- Current private URLs:
|
- Current private URLs:
|
||||||
- Rancher: `https://rancher.silverside-gopher.ts.net/`
|
- Rancher: `https://rancher.silverside-gopher.ts.net/`
|
||||||
- Grafana: `http://grafana.silverside-gopher.ts.net/`
|
- Grafana: `http://grafana.silverside-gopher.ts.net/`
|
||||||
|
|||||||
@@ -1,30 +1,28 @@
|
|||||||
# Hetzner Kubernetes Cluster
|
# Proxmox Kubernetes Cluster
|
||||||
|
|
||||||
Production-ready Kubernetes cluster on Hetzner Cloud using Terraform and Ansible.
|
Production-ready private Kubernetes cluster on Proxmox using Terraform, Ansible, and Flux.
|
||||||
|
|
||||||
## Architecture
|
## Architecture
|
||||||
|
|
||||||
| Component | Details |
|
| Component | Details |
|
||||||
|-----------|---------|
|
|-----------|---------|
|
||||||
| **Control Plane** | 3x CX23 (HA) |
|
| **Control Plane** | 3x Proxmox VMs (2 vCPU / 4 GiB / 32 GiB) |
|
||||||
| **Workers** | 3x CX33 |
|
| **Workers** | 5x Proxmox VMs (4 vCPU / 8 GiB / 64 GiB) |
|
||||||
| **K8s** | k3s (latest, HA) |
|
| **K8s** | k3s (latest, HA) |
|
||||||
| **Addons** | Hetzner CCM + CSI + Prometheus + Grafana + Loki |
|
| **Addons** | NFS provisioner + Prometheus + Grafana + Loki + Rancher |
|
||||||
| **Access** | SSH/API and private services restricted to Tailnet |
|
| **Access** | SSH/API and private services restricted to Tailnet |
|
||||||
| **Bootstrap** | Terraform + Ansible + Flux |
|
| **Bootstrap** | Terraform + Ansible + Flux |
|
||||||
|
|
||||||
## Prerequisites
|
## Prerequisites
|
||||||
|
|
||||||
### 1. Hetzner Cloud API Token
|
### 1. Proxmox API Token
|
||||||
|
|
||||||
1. Go to [Hetzner Cloud Console](https://console.hetzner.com/)
|
Create an API token for the Proxmox VE user used by Terraform. The repo expects the `bpg/proxmox` provider with:
|
||||||
2. Select your project (or create a new one)
|
|
||||||
3. Navigate to **Security** → **API Tokens**
|
- endpoint: `https://100.105.0.115:8006/`
|
||||||
4. Click **Generate API Token**
|
- node: `flex`
|
||||||
5. Set description: `k8s-cluster-terraform`
|
- clone source: template `9000` (`ubuntu-2404-k8s-template`)
|
||||||
6. Select permissions: **Read & Write**
|
- auth: API token
|
||||||
7. Click **Generate API Token**
|
|
||||||
8. **Copy the token immediately** - it won't be shown again!
|
|
||||||
|
|
||||||
### 2. Backblaze B2 Bucket (for Terraform State)
|
### 2. Backblaze B2 Bucket (for Terraform State)
|
||||||
|
|
||||||
@@ -44,7 +42,7 @@ Production-ready Kubernetes cluster on Hetzner Cloud using Terraform and Ansible
|
|||||||
### 3. SSH Key Pair
|
### 3. SSH Key Pair
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
ssh-keygen -t ed25519 -C "k8s@hetzner" -f ~/.ssh/hetzner_k8s
|
ssh-keygen -t ed25519 -C "k8s@proxmox" -f ~/.ssh/infra
|
||||||
```
|
```
|
||||||
|
|
||||||
### 4. Local Tools
|
### 4. Local Tools
|
||||||
@@ -71,10 +69,12 @@ cp terraform.tfvars.example terraform.tfvars
|
|||||||
Edit `terraform.tfvars`:
|
Edit `terraform.tfvars`:
|
||||||
|
|
||||||
```hcl
|
```hcl
|
||||||
hcloud_token = "your-hetzner-api-token"
|
proxmox_endpoint = "https://100.105.0.115:8006/"
|
||||||
|
proxmox_api_token_id = "terraform-prov@pve!k8s-cluster"
|
||||||
|
proxmox_api_token_secret = "your-proxmox-token-secret"
|
||||||
|
|
||||||
ssh_public_key = "~/.ssh/hetzner_k8s.pub"
|
ssh_public_key = "~/.ssh/infra.pub"
|
||||||
ssh_private_key = "~/.ssh/hetzner_k8s"
|
ssh_private_key = "~/.ssh/infra"
|
||||||
|
|
||||||
s3_access_key = "your-backblaze-key-id"
|
s3_access_key = "your-backblaze-key-id"
|
||||||
s3_secret_key = "your-backblaze-application-key"
|
s3_secret_key = "your-backblaze-application-key"
|
||||||
@@ -84,12 +84,7 @@ s3_bucket = "k8s-terraform-state"
|
|||||||
tailscale_auth_key = "tskey-auth-..."
|
tailscale_auth_key = "tskey-auth-..."
|
||||||
tailscale_tailnet = "yourtailnet.ts.net"
|
tailscale_tailnet = "yourtailnet.ts.net"
|
||||||
|
|
||||||
restrict_api_ssh_to_tailnet = true
|
kube_api_vip = "10.27.27.40"
|
||||||
tailnet_cidr = "100.64.0.0/10"
|
|
||||||
enable_nodeport_public = false
|
|
||||||
|
|
||||||
allowed_ssh_ips = []
|
|
||||||
allowed_api_ips = []
|
|
||||||
```
|
```
|
||||||
|
|
||||||
### 3. Initialize Terraform
|
### 3. Initialize Terraform
|
||||||
@@ -152,7 +147,9 @@ Set these in your Gitea repository settings (**Settings** → **Secrets** → **
|
|||||||
|
|
||||||
| Secret | Description |
|
| Secret | Description |
|
||||||
|--------|-------------|
|
|--------|-------------|
|
||||||
| `HCLOUD_TOKEN` | Hetzner Cloud API token |
|
| `PROXMOX_ENDPOINT` | Proxmox API endpoint (for example `https://100.105.0.115:8006/`) |
|
||||||
|
| `PROXMOX_API_TOKEN_ID` | Proxmox API token ID |
|
||||||
|
| `PROXMOX_API_TOKEN_SECRET` | Proxmox API token secret |
|
||||||
| `S3_ACCESS_KEY` | Backblaze B2 keyID |
|
| `S3_ACCESS_KEY` | Backblaze B2 keyID |
|
||||||
| `S3_SECRET_KEY` | Backblaze B2 applicationKey |
|
| `S3_SECRET_KEY` | Backblaze B2 applicationKey |
|
||||||
| `S3_ENDPOINT` | Backblaze S3 endpoint (e.g., `https://s3.eu-central-003.backblazeb2.com`) |
|
| `S3_ENDPOINT` | Backblaze S3 endpoint (e.g., `https://s3.eu-central-003.backblazeb2.com`) |
|
||||||
@@ -163,7 +160,6 @@ Set these in your Gitea repository settings (**Settings** → **Secrets** → **
|
|||||||
| `TAILSCALE_OAUTH_CLIENT_SECRET` | Tailscale OAuth client secret for Kubernetes Operator |
|
| `TAILSCALE_OAUTH_CLIENT_SECRET` | Tailscale OAuth client secret for Kubernetes Operator |
|
||||||
| `DOPPLER_HETZNERTERRA_SERVICE_TOKEN` | Doppler service token for `hetznerterra` runtime secrets |
|
| `DOPPLER_HETZNERTERRA_SERVICE_TOKEN` | Doppler service token for `hetznerterra` runtime secrets |
|
||||||
| `GRAFANA_ADMIN_PASSWORD` | Optional admin password for Grafana (auto-generated if unset) |
|
| `GRAFANA_ADMIN_PASSWORD` | Optional admin password for Grafana (auto-generated if unset) |
|
||||||
| `RUNNER_ALLOWED_CIDRS` | Optional CIDR list for CI runner access if you choose to pass it via tfvars/secrets |
|
|
||||||
| `SSH_PUBLIC_KEY` | SSH public key content |
|
| `SSH_PUBLIC_KEY` | SSH public key content |
|
||||||
| `SSH_PRIVATE_KEY` | SSH private key content |
|
| `SSH_PRIVATE_KEY` | SSH private key content |
|
||||||
|
|
||||||
@@ -176,8 +172,8 @@ This repo uses Flux for continuous reconciliation after Terraform + Ansible boot
|
|||||||
The current default target is the HA private baseline:
|
The current default target is the HA private baseline:
|
||||||
|
|
||||||
- `3` control plane nodes
|
- `3` control plane nodes
|
||||||
- `3` worker nodes
|
- `5` worker nodes
|
||||||
- private Hetzner network only
|
- private Proxmox network only
|
||||||
- Tailscale for operator and service access
|
- Tailscale for operator and service access
|
||||||
- Flux-managed platform addons with `apps` suspended by default
|
- Flux-managed platform addons with `apps` suspended by default
|
||||||
|
|
||||||
@@ -207,8 +203,7 @@ Terraform/bootstrap secrets remain in Gitea Actions secrets and are not managed
|
|||||||
### Reconciliation graph
|
### Reconciliation graph
|
||||||
|
|
||||||
- `infrastructure` (top-level)
|
- `infrastructure` (top-level)
|
||||||
- `addon-ccm`
|
- `addon-nfs-storage`
|
||||||
- `addon-csi` depends on `addon-ccm`
|
|
||||||
- `addon-tailscale-operator`
|
- `addon-tailscale-operator`
|
||||||
- `addon-observability`
|
- `addon-observability`
|
||||||
- `addon-observability-content` depends on `addon-observability`
|
- `addon-observability-content` depends on `addon-observability`
|
||||||
@@ -224,7 +219,7 @@ Terraform/bootstrap secrets remain in Gitea Actions secrets and are not managed
|
|||||||
### Current addon status
|
### Current addon status
|
||||||
|
|
||||||
- Core infrastructure addons are Flux-managed from `infrastructure/addons/`.
|
- Core infrastructure addons are Flux-managed from `infrastructure/addons/`.
|
||||||
- Active Flux addons for the current baseline: `addon-ccm`, `addon-csi`, `addon-cert-manager`, `addon-external-secrets`, `addon-tailscale-operator`, `addon-tailscale-proxyclass`, `addon-observability`, `addon-observability-content`, `addon-rancher`, `addon-rancher-config`, `addon-rancher-backup`, `addon-rancher-backup-config`.
|
- Active Flux addons for the current baseline: `addon-nfs-storage`, `addon-cert-manager`, `addon-external-secrets`, `addon-tailscale-operator`, `addon-tailscale-proxyclass`, `addon-observability`, `addon-observability-content`, `addon-rancher`, `addon-rancher-config`, `addon-rancher-backup`, `addon-rancher-backup-config`.
|
||||||
- `apps` remains suspended until workload rollout is explicitly enabled.
|
- `apps` remains suspended until workload rollout is explicitly enabled.
|
||||||
- Ansible is limited to cluster bootstrap, prerequisite secret creation, pre-proxy Tailscale cleanup, and kubeconfig finalization.
|
- Ansible is limited to cluster bootstrap, prerequisite secret creation, pre-proxy Tailscale cleanup, and kubeconfig finalization.
|
||||||
- Weave GitOps / Flux UI is no longer deployed; use Rancher or the `flux` CLI for Flux operations.
|
- Weave GitOps / Flux UI is no longer deployed; use Rancher or the `flux` CLI for Flux operations.
|
||||||
@@ -232,14 +227,14 @@ Terraform/bootstrap secrets remain in Gitea Actions secrets and are not managed
|
|||||||
### Rancher access
|
### Rancher access
|
||||||
|
|
||||||
- Rancher is private-only and exposed through Tailscale at `https://rancher.silverside-gopher.ts.net/`.
|
- Rancher is private-only and exposed through Tailscale at `https://rancher.silverside-gopher.ts.net/`.
|
||||||
- The public Hetzner load balancer path is not used for Rancher.
|
- Rancher and the Kubernetes API stay private; kube-vip provides the API VIP on the LAN.
|
||||||
- Rancher stores state in embedded etcd; no external database is used.
|
- Rancher stores state in embedded etcd; no external database is used.
|
||||||
|
|
||||||
### Stable baseline acceptance
|
### Stable baseline acceptance
|
||||||
|
|
||||||
A rebuild is considered successful only when all of the following pass without manual intervention:
|
A rebuild is considered successful only when all of the following pass without manual intervention:
|
||||||
|
|
||||||
- Terraform create succeeds for the default `3` control planes and `3` workers.
|
- Terraform create succeeds for the default `3` control planes and `5` workers.
|
||||||
- Ansible bootstrap succeeds end-to-end.
|
- Ansible bootstrap succeeds end-to-end.
|
||||||
- All nodes become `Ready`.
|
- All nodes become `Ready`.
|
||||||
- Flux core reconciliation is healthy.
|
- Flux core reconciliation is healthy.
|
||||||
@@ -323,9 +318,6 @@ It avoids full cluster provisioning and only applies Grafana content resources:
|
|||||||
├── terraform/
|
├── terraform/
|
||||||
│ ├── main.tf
|
│ ├── main.tf
|
||||||
│ ├── variables.tf
|
│ ├── variables.tf
|
||||||
│ ├── network.tf
|
|
||||||
│ ├── firewall.tf
|
|
||||||
│ ├── ssh.tf
|
|
||||||
│ ├── servers.tf
|
│ ├── servers.tf
|
||||||
│ ├── outputs.tf
|
│ ├── outputs.tf
|
||||||
│ └── backend.tf
|
│ └── backend.tf
|
||||||
@@ -353,17 +345,19 @@ It avoids full cluster provisioning and only applies Grafana content resources:
|
|||||||
|
|
||||||
## Firewall Rules
|
## Firewall Rules
|
||||||
|
|
||||||
|
This repo no longer manages cloud firewalls. Access control is expected to be handled on your LAN infrastructure and through Tailscale.
|
||||||
|
|
||||||
|
Important cluster-local ports still in use:
|
||||||
|
|
||||||
| Port | Source | Purpose |
|
| Port | Source | Purpose |
|
||||||
|------|--------|---------|
|
|------|--------|---------|
|
||||||
| 22 | Tailnet CIDR | SSH |
|
| 22 | Admin hosts / CI | SSH |
|
||||||
| 6443 | Tailnet CIDR + internal | Kubernetes API |
|
| 6443 | 10.27.27.0/24 + VIP | Kubernetes API |
|
||||||
| 41641/udp | Any | Tailscale WireGuard |
|
| 9345 | 10.27.27.0/24 | k3s Supervisor |
|
||||||
| 9345 | 10.0.0.0/16 | k3s Supervisor (HA join) |
|
| 2379 | 10.27.27.0/24 | etcd Client |
|
||||||
| 2379 | 10.0.0.0/16 | etcd Client |
|
| 2380 | 10.27.27.0/24 | etcd Peer |
|
||||||
| 2380 | 10.0.0.0/16 | etcd Peer |
|
| 8472/udp | 10.27.27.0/24 | Flannel VXLAN |
|
||||||
| 8472 | 10.0.0.0/16 | Flannel VXLAN |
|
| 10250 | 10.27.27.0/24 | Kubelet |
|
||||||
| 10250 | 10.0.0.0/16 | Kubelet |
|
|
||||||
| 30000-32767 | Optional | NodePorts (disabled by default) |
|
|
||||||
|
|
||||||
## Operations
|
## Operations
|
||||||
|
|
||||||
@@ -399,7 +393,7 @@ terraform destroy
|
|||||||
### Check k3s Logs
|
### Check k3s Logs
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
ssh root@<control-plane-ip> journalctl -u k3s -f
|
ssh ubuntu@<control-plane-ip> sudo journalctl -u k3s -f
|
||||||
```
|
```
|
||||||
|
|
||||||
### Reset k3s
|
### Reset k3s
|
||||||
@@ -408,19 +402,10 @@ ssh root@<control-plane-ip> journalctl -u k3s -f
|
|||||||
ansible-playbook site.yml -t reset
|
ansible-playbook site.yml -t reset
|
||||||
```
|
```
|
||||||
|
|
||||||
## Costs Breakdown
|
|
||||||
|
|
||||||
| Resource | Quantity | Unit Price | Monthly |
|
|
||||||
|----------|----------|------------|---------|
|
|
||||||
| CX23 (Control Plane) | 3 | €2.99 | €8.97 |
|
|
||||||
| CX33 (Workers) | 4 | €4.99 | €19.96 |
|
|
||||||
| Backblaze B2 | ~1 GB | Free (first 10GB) | €0.00 |
|
|
||||||
| **Total** | | | **€28.93/mo** |
|
|
||||||
|
|
||||||
## Security Notes
|
## Security Notes
|
||||||
|
|
||||||
- Control plane has HA (3 nodes, can survive 1 failure)
|
- Control plane has HA (3 nodes, can survive 1 failure)
|
||||||
- Consider adding Hetzner load balancer for API server
|
- Kubernetes API HA is provided by kube-vip on `10.27.27.40`
|
||||||
- Rotate API tokens regularly
|
- Rotate API tokens regularly
|
||||||
- Use network policies in Kubernetes
|
- Use network policies in Kubernetes
|
||||||
- Enable audit logging for production
|
- Enable audit logging for production
|
||||||
|
|||||||
+14
-7
@@ -1,6 +1,6 @@
|
|||||||
# Gitea Secrets Setup
|
# Gitea Secrets Setup
|
||||||
|
|
||||||
This document describes the secrets required for the HetznerTerra deployment workflow.
|
This document describes the secrets required for the Proxmox-based deployment workflow.
|
||||||
|
|
||||||
## Required Secrets
|
## Required Secrets
|
||||||
|
|
||||||
@@ -9,10 +9,17 @@ Add these secrets in your Gitea repository settings:
|
|||||||
|
|
||||||
### Infrastructure Secrets
|
### Infrastructure Secrets
|
||||||
|
|
||||||
#### `HCLOUD_TOKEN`
|
#### `PROXMOX_ENDPOINT`
|
||||||
- Hetzner Cloud API token
|
- Proxmox VE API endpoint
|
||||||
- Get from: https://console.hetzner.com/projects/{project-id}/security/api-tokens
|
- Example: `https://100.105.0.115:8006/`
|
||||||
- Permissions: Read & Write
|
|
||||||
|
#### `PROXMOX_API_TOKEN_ID`
|
||||||
|
- Proxmox API token ID
|
||||||
|
- Example: `terraform-prov@pve!k8s-cluster`
|
||||||
|
|
||||||
|
#### `PROXMOX_API_TOKEN_SECRET`
|
||||||
|
- Proxmox API token secret
|
||||||
|
- Create with `pveum user token add terraform-prov@pve k8s-cluster`
|
||||||
|
|
||||||
#### `S3_ACCESS_KEY` & `S3_SECRET_KEY`
|
#### `S3_ACCESS_KEY` & `S3_SECRET_KEY`
|
||||||
- Backblaze B2 credentials for Terraform state storage
|
- Backblaze B2 credentials for Terraform state storage
|
||||||
@@ -31,7 +38,7 @@ Add these secrets in your Gitea repository settings:
|
|||||||
|
|
||||||
#### `SSH_PRIVATE_KEY` & `SSH_PUBLIC_KEY`
|
#### `SSH_PRIVATE_KEY` & `SSH_PUBLIC_KEY`
|
||||||
- SSH key pair for cluster access
|
- SSH key pair for cluster access
|
||||||
- Generate with: `ssh-keygen -t ed25519 -C "k8s@hetzner" -f ~/.ssh/hetzner_k8s`
|
- Generate with: `ssh-keygen -t ed25519 -C "k8s@proxmox" -f ~/.ssh/infra`
|
||||||
- Private key content (include BEGIN/END lines)
|
- Private key content (include BEGIN/END lines)
|
||||||
- Public key content (full line starting with ssh-ed25519)
|
- Public key content (full line starting with ssh-ed25519)
|
||||||
|
|
||||||
@@ -90,4 +97,4 @@ Check the workflow logs to verify all secrets are being used correctly.
|
|||||||
- Prefer Doppler for runtime app/platform secrets after cluster bootstrap
|
- Prefer Doppler for runtime app/platform secrets after cluster bootstrap
|
||||||
- Rotate Tailscale auth keys periodically
|
- Rotate Tailscale auth keys periodically
|
||||||
- Review OAuth client permissions regularly
|
- Review OAuth client permissions regularly
|
||||||
- The workflow automatically opens SSH/API access only for the runner's IP during deployment
|
- CI expects direct SSH access to the Proxmox VMs and direct Proxmox API access
|
||||||
|
|||||||
+12
-14
@@ -5,9 +5,9 @@ This document defines the current engineering target for this repository.
|
|||||||
## Topology
|
## Topology
|
||||||
|
|
||||||
- 3 control planes (HA etcd cluster)
|
- 3 control planes (HA etcd cluster)
|
||||||
- 3 workers
|
- 5 workers
|
||||||
- Hetzner Load Balancer for Kubernetes API
|
- kube-vip API VIP (`10.27.27.40`)
|
||||||
- private Hetzner network
|
- private Proxmox/LAN network (`10.27.27.0/24`)
|
||||||
- Tailscale operator access and service exposure
|
- Tailscale operator access and service exposure
|
||||||
- Rancher exposed through Tailscale (`rancher.silverside-gopher.ts.net`)
|
- Rancher exposed through Tailscale (`rancher.silverside-gopher.ts.net`)
|
||||||
- Grafana exposed through Tailscale (`grafana.silverside-gopher.ts.net`)
|
- Grafana exposed through Tailscale (`grafana.silverside-gopher.ts.net`)
|
||||||
@@ -17,11 +17,10 @@ This document defines the current engineering target for this repository.
|
|||||||
## In Scope
|
## In Scope
|
||||||
|
|
||||||
- Terraform infrastructure bootstrap
|
- Terraform infrastructure bootstrap
|
||||||
- Ansible k3s bootstrap with external cloud provider
|
- Ansible k3s bootstrap on Ubuntu cloud-init VMs
|
||||||
- **HA control plane (3 nodes with etcd quorum)**
|
- **HA control plane (3 nodes with etcd quorum)**
|
||||||
- **Hetzner Load Balancer for Kubernetes API**
|
- **kube-vip for Kubernetes API HA**
|
||||||
- **Hetzner CCM deployed via Ansible (before workers join)**
|
- **NFS-backed persistent volumes via `nfs-subdir-external-provisioner`**
|
||||||
- **Hetzner CSI for persistent volumes (via Flux)**
|
|
||||||
- Flux core reconciliation
|
- Flux core reconciliation
|
||||||
- External Secrets Operator with Doppler
|
- External Secrets Operator with Doppler
|
||||||
- Tailscale private access and smoke-check validation
|
- Tailscale private access and smoke-check validation
|
||||||
@@ -45,15 +44,14 @@ This document defines the current engineering target for this repository.
|
|||||||
|
|
||||||
## Phase Gates
|
## Phase Gates
|
||||||
|
|
||||||
1. Terraform apply completes for HA topology (3 CP, 3 workers, 1 LB).
|
1. Terraform apply completes for HA topology (3 CP, 5 workers, 1 VIP).
|
||||||
2. Load Balancer is healthy with all 3 control plane targets.
|
2. Primary control plane bootstraps with `--cluster-init`.
|
||||||
3. Primary control plane bootstraps with `--cluster-init`.
|
3. kube-vip advertises `10.27.27.40:6443` from the control-plane set.
|
||||||
4. Secondary control planes join via Load Balancer endpoint.
|
4. Secondary control planes join via the kube-vip endpoint.
|
||||||
5. **CCM deployed via Ansible before workers join** (fixes uninitialized taint issue).
|
5. Workers join successfully via the kube-vip endpoint.
|
||||||
6. Workers join successfully via Load Balancer and all nodes show proper `providerID`.
|
|
||||||
7. etcd reports 3 healthy members.
|
7. etcd reports 3 healthy members.
|
||||||
8. Flux source and infrastructure reconciliation are healthy.
|
8. Flux source and infrastructure reconciliation are healthy.
|
||||||
9. **CSI deploys and creates `hcloud-volumes` StorageClass**.
|
9. **NFS provisioner deploys and creates `flash-nfs` StorageClass**.
|
||||||
10. **PVC provisioning tested and working**.
|
10. **PVC provisioning tested and working**.
|
||||||
11. External Secrets sync required secrets.
|
11. External Secrets sync required secrets.
|
||||||
12. Tailscale private access works for Rancher, Grafana, and Prometheus.
|
12. Tailscale private access works for Rancher, Grafana, and Prometheus.
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ control_plane
|
|||||||
workers
|
workers
|
||||||
|
|
||||||
[cluster:vars]
|
[cluster:vars]
|
||||||
ansible_user=root
|
ansible_user=ubuntu
|
||||||
ansible_python_interpreter=/usr/bin/python3
|
ansible_python_interpreter=/usr/bin/python3
|
||||||
ansible_ssh_private_key_file={{ private_key_file }}
|
ansible_ssh_private_key_file={{ private_key_file }}
|
||||||
k3s_version=latest
|
k3s_version=latest
|
||||||
|
|||||||
@@ -1,14 +1,4 @@
|
|||||||
---
|
---
|
||||||
- name: Apply Hetzner cloud secret
|
|
||||||
shell: >-
|
|
||||||
kubectl -n kube-system create secret generic hcloud
|
|
||||||
--from-literal=token='{{ hcloud_token }}'
|
|
||||||
--from-literal=network='{{ cluster_name }}-network'
|
|
||||||
--dry-run=client -o yaml | kubectl apply -f -
|
|
||||||
changed_when: true
|
|
||||||
no_log: true
|
|
||||||
when: hcloud_token | default('') | length > 0
|
|
||||||
|
|
||||||
- name: Ensure Tailscale operator namespace exists
|
- name: Ensure Tailscale operator namespace exists
|
||||||
command: >-
|
command: >-
|
||||||
kubectl create namespace {{ tailscale_operator_namespace | default('tailscale-system') }}
|
kubectl create namespace {{ tailscale_operator_namespace | default('tailscale-system') }}
|
||||||
|
|||||||
@@ -1,82 +0,0 @@
|
|||||||
---
|
|
||||||
- name: Check if hcloud secret exists
|
|
||||||
command: kubectl -n kube-system get secret hcloud
|
|
||||||
register: hcloud_secret_check
|
|
||||||
changed_when: false
|
|
||||||
failed_when: false
|
|
||||||
|
|
||||||
- name: Fail if hcloud secret is missing
|
|
||||||
fail:
|
|
||||||
msg: "hcloud secret not found in kube-system namespace. CCM requires it."
|
|
||||||
when: hcloud_secret_check.rc != 0
|
|
||||||
|
|
||||||
- name: Check if helm is installed
|
|
||||||
command: which helm
|
|
||||||
register: helm_check
|
|
||||||
changed_when: false
|
|
||||||
failed_when: false
|
|
||||||
|
|
||||||
- name: Install helm
|
|
||||||
when: helm_check.rc != 0
|
|
||||||
block:
|
|
||||||
- name: Download helm install script
|
|
||||||
get_url:
|
|
||||||
url: https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3
|
|
||||||
dest: /tmp/get-helm-3.sh
|
|
||||||
mode: "0755"
|
|
||||||
|
|
||||||
- name: Run helm install script
|
|
||||||
command: /tmp/get-helm-3.sh
|
|
||||||
args:
|
|
||||||
creates: /usr/local/bin/helm
|
|
||||||
|
|
||||||
- name: Add Hetzner Helm repository
|
|
||||||
kubernetes.core.helm_repository:
|
|
||||||
name: hcloud
|
|
||||||
repo_url: https://charts.hetzner.cloud
|
|
||||||
kubeconfig: /etc/rancher/k3s/k3s.yaml
|
|
||||||
environment:
|
|
||||||
KUBECONFIG: /etc/rancher/k3s/k3s.yaml
|
|
||||||
|
|
||||||
- name: Deploy Hetzner Cloud Controller Manager
|
|
||||||
kubernetes.core.helm:
|
|
||||||
name: hcloud-cloud-controller-manager
|
|
||||||
chart_ref: hcloud/hcloud-cloud-controller-manager
|
|
||||||
release_namespace: kube-system
|
|
||||||
create_namespace: true
|
|
||||||
values:
|
|
||||||
networking:
|
|
||||||
enabled: true
|
|
||||||
nodeSelector:
|
|
||||||
kubernetes.io/hostname: "{{ inventory_hostname }}"
|
|
||||||
additionalTolerations:
|
|
||||||
- key: node-role.kubernetes.io/control-plane
|
|
||||||
operator: Exists
|
|
||||||
effect: NoSchedule
|
|
||||||
kubeconfig: /etc/rancher/k3s/k3s.yaml
|
|
||||||
wait: true
|
|
||||||
wait_timeout: 300s
|
|
||||||
environment:
|
|
||||||
KUBECONFIG: /etc/rancher/k3s/k3s.yaml
|
|
||||||
|
|
||||||
- name: Wait for CCM to be ready
|
|
||||||
command: kubectl -n kube-system rollout status deployment/hcloud-cloud-controller-manager --timeout=120s
|
|
||||||
changed_when: false
|
|
||||||
register: ccm_rollout
|
|
||||||
until: ccm_rollout.rc == 0
|
|
||||||
retries: 3
|
|
||||||
delay: 10
|
|
||||||
|
|
||||||
- name: Pause to ensure CCM is fully ready to process new nodes
|
|
||||||
pause:
|
|
||||||
seconds: 10
|
|
||||||
|
|
||||||
- name: Verify CCM is removing uninitialized taints
|
|
||||||
command: kubectl get nodes -o jsonpath='{.items[*].spec.taints[?(@.key=="node.cloudprovider.kubernetes.io/uninitialized")].key}'
|
|
||||||
register: uninitialized_taints
|
|
||||||
changed_when: false
|
|
||||||
failed_when: false
|
|
||||||
|
|
||||||
- name: Display taint status
|
|
||||||
debug:
|
|
||||||
msg: "Nodes with uninitialized taint: {{ uninitialized_taints.stdout }}"
|
|
||||||
@@ -19,6 +19,7 @@
|
|||||||
- lsb-release
|
- lsb-release
|
||||||
- software-properties-common
|
- software-properties-common
|
||||||
- jq
|
- jq
|
||||||
|
- nfs-common
|
||||||
- htop
|
- htop
|
||||||
- vim
|
- vim
|
||||||
state: present
|
state: present
|
||||||
|
|||||||
@@ -3,4 +3,5 @@ k3s_version: latest
|
|||||||
k3s_server_url: ""
|
k3s_server_url: ""
|
||||||
k3s_token: ""
|
k3s_token: ""
|
||||||
k3s_node_ip: ""
|
k3s_node_ip: ""
|
||||||
k3s_kubelet_cloud_provider_external: true
|
k3s_kubelet_cloud_provider_external: false
|
||||||
|
k3s_flannel_iface: ens18
|
||||||
|
|||||||
@@ -22,7 +22,7 @@
|
|||||||
command: >-
|
command: >-
|
||||||
/tmp/install-k3s.sh agent
|
/tmp/install-k3s.sh agent
|
||||||
--node-ip {{ k3s_node_ip }}
|
--node-ip {{ k3s_node_ip }}
|
||||||
--flannel-iface=enp7s0
|
--flannel-iface={{ k3s_flannel_iface }}
|
||||||
{% if k3s_kubelet_cloud_provider_external | bool %}--kubelet-arg=cloud-provider=external{% endif %}
|
{% if k3s_kubelet_cloud_provider_external | bool %}--kubelet-arg=cloud-provider=external{% endif %}
|
||||||
args:
|
args:
|
||||||
creates: /usr/local/bin/k3s-agent
|
creates: /usr/local/bin/k3s-agent
|
||||||
|
|||||||
@@ -3,9 +3,10 @@ k3s_version: latest
|
|||||||
k3s_token: ""
|
k3s_token: ""
|
||||||
k3s_node_ip: ""
|
k3s_node_ip: ""
|
||||||
k3s_primary_public_ip: ""
|
k3s_primary_public_ip: ""
|
||||||
k3s_disable_embedded_ccm: true
|
k3s_disable_embedded_ccm: false
|
||||||
k3s_disable_servicelb: true
|
k3s_disable_servicelb: true
|
||||||
k3s_kubelet_cloud_provider_external: true
|
k3s_kubelet_cloud_provider_external: false
|
||||||
|
k3s_flannel_iface: ens18
|
||||||
# Load Balancer endpoint for HA cluster joins (set in inventory)
|
# Load Balancer endpoint for HA cluster joins (set in inventory)
|
||||||
kube_api_endpoint: ""
|
kube_api_endpoint: ""
|
||||||
# Tailscale DNS names for control planes (to enable tailnet access)
|
# Tailscale DNS names for control planes (to enable tailnet access)
|
||||||
|
|||||||
@@ -61,7 +61,7 @@
|
|||||||
--cluster-init
|
--cluster-init
|
||||||
--advertise-address={{ k3s_primary_ip }}
|
--advertise-address={{ k3s_primary_ip }}
|
||||||
--node-ip={{ k3s_node_ip }}
|
--node-ip={{ k3s_node_ip }}
|
||||||
--flannel-iface=enp7s0
|
--flannel-iface={{ k3s_flannel_iface }}
|
||||||
--tls-san={{ k3s_primary_ip }}
|
--tls-san={{ k3s_primary_ip }}
|
||||||
--tls-san={{ k3s_primary_public_ip }}
|
--tls-san={{ k3s_primary_public_ip }}
|
||||||
--tls-san={{ kube_api_endpoint }}
|
--tls-san={{ kube_api_endpoint }}
|
||||||
@@ -87,7 +87,7 @@
|
|||||||
--server https://{{ k3s_join_endpoint | default(k3s_primary_ip) }}:6443
|
--server https://{{ k3s_join_endpoint | default(k3s_primary_ip) }}:6443
|
||||||
--advertise-address={{ k3s_node_ip }}
|
--advertise-address={{ k3s_node_ip }}
|
||||||
--node-ip={{ k3s_node_ip }}
|
--node-ip={{ k3s_node_ip }}
|
||||||
--flannel-iface=enp7s0
|
--flannel-iface={{ k3s_flannel_iface }}
|
||||||
{% if k3s_disable_embedded_ccm | bool %}--disable-cloud-controller{% endif %}
|
{% if k3s_disable_embedded_ccm | bool %}--disable-cloud-controller{% endif %}
|
||||||
{% if k3s_disable_servicelb | bool %}--disable=servicelb{% endif %}
|
{% if k3s_disable_servicelb | bool %}--disable=servicelb{% endif %}
|
||||||
{% if k3s_kubelet_cloud_provider_external | bool %}--kubelet-arg=cloud-provider=external{% endif %}
|
{% if k3s_kubelet_cloud_provider_external | bool %}--kubelet-arg=cloud-provider=external{% endif %}
|
||||||
|
|||||||
@@ -0,0 +1,4 @@
|
|||||||
|
---
|
||||||
|
kube_vip_version: v1.1.2
|
||||||
|
kube_vip_interface: ens18
|
||||||
|
kube_vip_address: "{{ kube_api_endpoint }}"
|
||||||
@@ -0,0 +1,21 @@
|
|||||||
|
---
|
||||||
|
- name: Render kube-vip control plane manifest
|
||||||
|
template:
|
||||||
|
src: kube-vip-control-plane.yaml.j2
|
||||||
|
dest: /tmp/kube-vip-control-plane.yaml
|
||||||
|
mode: "0644"
|
||||||
|
|
||||||
|
- name: Apply kube-vip control plane manifest
|
||||||
|
command: kubectl apply -f /tmp/kube-vip-control-plane.yaml
|
||||||
|
changed_when: true
|
||||||
|
|
||||||
|
- name: Wait for kube-vip DaemonSet rollout
|
||||||
|
command: kubectl -n kube-system rollout status daemonset/kube-vip --timeout=180s
|
||||||
|
changed_when: false
|
||||||
|
|
||||||
|
- name: Wait for API VIP on 6443
|
||||||
|
wait_for:
|
||||||
|
host: "{{ kube_vip_address }}"
|
||||||
|
port: 6443
|
||||||
|
state: started
|
||||||
|
timeout: 180
|
||||||
@@ -0,0 +1,110 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: ServiceAccount
|
||||||
|
metadata:
|
||||||
|
name: kube-vip
|
||||||
|
namespace: kube-system
|
||||||
|
---
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: ClusterRole
|
||||||
|
metadata:
|
||||||
|
name: system:kube-vip-role
|
||||||
|
rules:
|
||||||
|
- apiGroups: [""]
|
||||||
|
resources: ["services/status"]
|
||||||
|
verbs: ["update"]
|
||||||
|
- apiGroups: [""]
|
||||||
|
resources: ["services", "endpoints"]
|
||||||
|
verbs: ["list", "get", "watch", "update"]
|
||||||
|
- apiGroups: [""]
|
||||||
|
resources: ["nodes"]
|
||||||
|
verbs: ["list", "get", "watch", "update", "patch"]
|
||||||
|
- apiGroups: ["coordination.k8s.io"]
|
||||||
|
resources: ["leases"]
|
||||||
|
verbs: ["list", "get", "watch", "update", "create"]
|
||||||
|
- apiGroups: ["discovery.k8s.io"]
|
||||||
|
resources: ["endpointslices"]
|
||||||
|
verbs: ["list", "get", "watch", "update"]
|
||||||
|
- apiGroups: [""]
|
||||||
|
resources: ["pods"]
|
||||||
|
verbs: ["list"]
|
||||||
|
---
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: ClusterRoleBinding
|
||||||
|
metadata:
|
||||||
|
name: system:kube-vip-binding
|
||||||
|
roleRef:
|
||||||
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
kind: ClusterRole
|
||||||
|
name: system:kube-vip-role
|
||||||
|
subjects:
|
||||||
|
- kind: ServiceAccount
|
||||||
|
name: kube-vip
|
||||||
|
namespace: kube-system
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: DaemonSet
|
||||||
|
metadata:
|
||||||
|
name: kube-vip
|
||||||
|
namespace: kube-system
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: kube-vip
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: kube-vip
|
||||||
|
spec:
|
||||||
|
serviceAccountName: kube-vip
|
||||||
|
hostNetwork: true
|
||||||
|
affinity:
|
||||||
|
nodeAffinity:
|
||||||
|
requiredDuringSchedulingIgnoredDuringExecution:
|
||||||
|
nodeSelectorTerms:
|
||||||
|
- matchExpressions:
|
||||||
|
- key: node-role.kubernetes.io/control-plane
|
||||||
|
operator: Exists
|
||||||
|
tolerations:
|
||||||
|
- key: node-role.kubernetes.io/control-plane
|
||||||
|
operator: Exists
|
||||||
|
effect: NoSchedule
|
||||||
|
- key: node-role.kubernetes.io/master
|
||||||
|
operator: Exists
|
||||||
|
effect: NoSchedule
|
||||||
|
containers:
|
||||||
|
- name: kube-vip
|
||||||
|
image: ghcr.io/kube-vip/kube-vip:{{ kube_vip_version }}
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
args:
|
||||||
|
- manager
|
||||||
|
env:
|
||||||
|
- name: vip_arp
|
||||||
|
value: "true"
|
||||||
|
- name: port
|
||||||
|
value: "6443"
|
||||||
|
- name: vip_interface
|
||||||
|
value: {{ kube_vip_interface | quote }}
|
||||||
|
- name: vip_subnet
|
||||||
|
value: "32"
|
||||||
|
- name: cp_enable
|
||||||
|
value: "true"
|
||||||
|
- name: cp_namespace
|
||||||
|
value: kube-system
|
||||||
|
- name: vip_ddns
|
||||||
|
value: "false"
|
||||||
|
- name: vip_leaderelection
|
||||||
|
value: "true"
|
||||||
|
- name: vip_leaseduration
|
||||||
|
value: "5"
|
||||||
|
- name: vip_renewdeadline
|
||||||
|
value: "3"
|
||||||
|
- name: vip_retryperiod
|
||||||
|
value: "1"
|
||||||
|
- name: address
|
||||||
|
value: {{ kube_vip_address | quote }}
|
||||||
|
securityContext:
|
||||||
|
capabilities:
|
||||||
|
add:
|
||||||
|
- NET_ADMIN
|
||||||
|
- NET_RAW
|
||||||
|
- SYS_TIME
|
||||||
+2
-2
@@ -57,12 +57,12 @@
|
|||||||
roles:
|
roles:
|
||||||
- addon-secrets-bootstrap
|
- addon-secrets-bootstrap
|
||||||
|
|
||||||
- name: Deploy Hetzner CCM (required for workers with external cloud provider)
|
- name: Deploy kube-vip for API HA
|
||||||
hosts: control_plane[0]
|
hosts: control_plane[0]
|
||||||
become: true
|
become: true
|
||||||
|
|
||||||
roles:
|
roles:
|
||||||
- ccm-deploy
|
- kube-vip-deploy
|
||||||
|
|
||||||
- name: Setup secondary control planes
|
- name: Setup secondary control planes
|
||||||
hosts: control_plane[1:]
|
hosts: control_plane[1:]
|
||||||
|
|||||||
@@ -1,36 +0,0 @@
|
|||||||
apiVersion: helm.toolkit.fluxcd.io/v2
|
|
||||||
kind: HelmRelease
|
|
||||||
metadata:
|
|
||||||
name: hcloud-cloud-controller-manager
|
|
||||||
namespace: flux-system
|
|
||||||
spec:
|
|
||||||
interval: 10m
|
|
||||||
targetNamespace: kube-system
|
|
||||||
chart:
|
|
||||||
spec:
|
|
||||||
chart: hcloud-cloud-controller-manager
|
|
||||||
version: 1.30.1
|
|
||||||
sourceRef:
|
|
||||||
kind: HelmRepository
|
|
||||||
name: hcloud
|
|
||||||
namespace: flux-system
|
|
||||||
install:
|
|
||||||
createNamespace: true
|
|
||||||
remediation:
|
|
||||||
retries: 3
|
|
||||||
upgrade:
|
|
||||||
remediation:
|
|
||||||
retries: 3
|
|
||||||
values:
|
|
||||||
selectorLabels:
|
|
||||||
app: hcloud-cloud-controller-manager
|
|
||||||
args:
|
|
||||||
secure-port: "0"
|
|
||||||
networking:
|
|
||||||
enabled: true
|
|
||||||
nodeSelector:
|
|
||||||
kubernetes.io/hostname: k8s-cluster-cp-1
|
|
||||||
additionalTolerations:
|
|
||||||
- key: node-role.kubernetes.io/control-plane
|
|
||||||
operator: Exists
|
|
||||||
effect: NoSchedule
|
|
||||||
@@ -1,8 +0,0 @@
|
|||||||
apiVersion: source.toolkit.fluxcd.io/v1
|
|
||||||
kind: HelmRepository
|
|
||||||
metadata:
|
|
||||||
name: hcloud
|
|
||||||
namespace: flux-system
|
|
||||||
spec:
|
|
||||||
interval: 1h
|
|
||||||
url: https://charts.hetzner.cloud
|
|
||||||
@@ -1,5 +0,0 @@
|
|||||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
|
||||||
kind: Kustomization
|
|
||||||
resources:
|
|
||||||
- helmrepository-hcloud.yaml
|
|
||||||
- helmrelease-hcloud-ccm.yaml
|
|
||||||
@@ -1,36 +0,0 @@
|
|||||||
apiVersion: helm.toolkit.fluxcd.io/v2
|
|
||||||
kind: HelmRelease
|
|
||||||
metadata:
|
|
||||||
name: hcloud-csi
|
|
||||||
namespace: flux-system
|
|
||||||
spec:
|
|
||||||
interval: 10m
|
|
||||||
targetNamespace: kube-system
|
|
||||||
chart:
|
|
||||||
spec:
|
|
||||||
chart: hcloud-csi
|
|
||||||
version: 2.20.0
|
|
||||||
sourceRef:
|
|
||||||
kind: HelmRepository
|
|
||||||
name: hcloud
|
|
||||||
namespace: flux-system
|
|
||||||
install:
|
|
||||||
createNamespace: true
|
|
||||||
remediation:
|
|
||||||
retries: 3
|
|
||||||
upgrade:
|
|
||||||
remediation:
|
|
||||||
retries: 3
|
|
||||||
values:
|
|
||||||
controller:
|
|
||||||
nodeSelector:
|
|
||||||
kubernetes.io/hostname: k8s-cluster-cp-1
|
|
||||||
tolerations:
|
|
||||||
- key: node-role.kubernetes.io/control-plane
|
|
||||||
operator: Exists
|
|
||||||
effect: NoSchedule
|
|
||||||
hcloudVolumeDefaultLocation: nbg1
|
|
||||||
storageClasses:
|
|
||||||
- name: hcloud-volumes
|
|
||||||
defaultStorageClass: true
|
|
||||||
reclaimPolicy: Delete
|
|
||||||
@@ -1,5 +0,0 @@
|
|||||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
|
||||||
kind: Kustomization
|
|
||||||
resources:
|
|
||||||
- helmrepository-hcloud.yaml
|
|
||||||
- helmrelease-hcloud-csi.yaml
|
|
||||||
@@ -1,17 +0,0 @@
|
|||||||
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
|
||||||
kind: Kustomization
|
|
||||||
metadata:
|
|
||||||
name: addon-csi
|
|
||||||
namespace: flux-system
|
|
||||||
spec:
|
|
||||||
interval: 10m
|
|
||||||
prune: true
|
|
||||||
sourceRef:
|
|
||||||
kind: GitRepository
|
|
||||||
name: platform
|
|
||||||
path: ./infrastructure/addons/csi
|
|
||||||
dependsOn:
|
|
||||||
- name: addon-ccm
|
|
||||||
wait: true
|
|
||||||
timeout: 10m
|
|
||||||
suspend: false
|
|
||||||
+2
-2
@@ -1,7 +1,7 @@
|
|||||||
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
||||||
kind: Kustomization
|
kind: Kustomization
|
||||||
metadata:
|
metadata:
|
||||||
name: addon-ccm
|
name: addon-nfs-storage
|
||||||
namespace: flux-system
|
namespace: flux-system
|
||||||
spec:
|
spec:
|
||||||
interval: 10m
|
interval: 10m
|
||||||
@@ -9,7 +9,7 @@ spec:
|
|||||||
sourceRef:
|
sourceRef:
|
||||||
kind: GitRepository
|
kind: GitRepository
|
||||||
name: platform
|
name: platform
|
||||||
path: ./infrastructure/addons/ccm
|
path: ./infrastructure/addons/nfs-storage
|
||||||
wait: true
|
wait: true
|
||||||
timeout: 10m
|
timeout: 10m
|
||||||
suspend: false
|
suspend: false
|
||||||
@@ -1,8 +1,7 @@
|
|||||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||||
kind: Kustomization
|
kind: Kustomization
|
||||||
resources:
|
resources:
|
||||||
- kustomization-ccm.yaml
|
- kustomization-nfs-storage.yaml
|
||||||
- kustomization-csi.yaml
|
|
||||||
- kustomization-external-secrets.yaml
|
- kustomization-external-secrets.yaml
|
||||||
- kustomization-cert-manager.yaml
|
- kustomization-cert-manager.yaml
|
||||||
- kustomization-tailscale-operator.yaml
|
- kustomization-tailscale-operator.yaml
|
||||||
|
|||||||
@@ -0,0 +1,36 @@
|
|||||||
|
apiVersion: helm.toolkit.fluxcd.io/v2
|
||||||
|
kind: HelmRelease
|
||||||
|
metadata:
|
||||||
|
name: nfs-subdir-external-provisioner
|
||||||
|
namespace: flux-system
|
||||||
|
spec:
|
||||||
|
interval: 10m
|
||||||
|
targetNamespace: kube-system
|
||||||
|
chart:
|
||||||
|
spec:
|
||||||
|
chart: nfs-subdir-external-provisioner
|
||||||
|
version: 4.0.18
|
||||||
|
sourceRef:
|
||||||
|
kind: HelmRepository
|
||||||
|
name: nfs-subdir-external-provisioner
|
||||||
|
namespace: flux-system
|
||||||
|
install:
|
||||||
|
createNamespace: true
|
||||||
|
remediation:
|
||||||
|
retries: 3
|
||||||
|
upgrade:
|
||||||
|
remediation:
|
||||||
|
retries: 3
|
||||||
|
values:
|
||||||
|
nfs:
|
||||||
|
server: 10.27.27.22
|
||||||
|
path: /TheFlash/k8s-nfs
|
||||||
|
storageClass:
|
||||||
|
create: true
|
||||||
|
defaultClass: true
|
||||||
|
name: flash-nfs
|
||||||
|
provisionerName: flash-nfs
|
||||||
|
reclaimPolicy: Delete
|
||||||
|
archiveOnDelete: true
|
||||||
|
allowVolumeExpansion: true
|
||||||
|
volumeBindingMode: Immediate
|
||||||
+2
-2
@@ -1,8 +1,8 @@
|
|||||||
apiVersion: source.toolkit.fluxcd.io/v1
|
apiVersion: source.toolkit.fluxcd.io/v1
|
||||||
kind: HelmRepository
|
kind: HelmRepository
|
||||||
metadata:
|
metadata:
|
||||||
name: hcloud
|
name: nfs-subdir-external-provisioner
|
||||||
namespace: flux-system
|
namespace: flux-system
|
||||||
spec:
|
spec:
|
||||||
interval: 1h
|
interval: 1h
|
||||||
url: https://charts.hetzner.cloud
|
url: https://kubernetes-sigs.github.io/nfs-subdir-external-provisioner
|
||||||
@@ -0,0 +1,5 @@
|
|||||||
|
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||||
|
kind: Kustomization
|
||||||
|
resources:
|
||||||
|
- helmrepository-nfs-subdir-external-provisioner.yaml
|
||||||
|
- helmrelease-nfs-subdir-external-provisioner.yaml
|
||||||
@@ -24,10 +24,11 @@ echo "Fetching kubeconfig from $CP1_PUBLIC_IP ..."
|
|||||||
ssh -i "$SSH_KEY" \
|
ssh -i "$SSH_KEY" \
|
||||||
-o StrictHostKeyChecking=no \
|
-o StrictHostKeyChecking=no \
|
||||||
-o UserKnownHostsFile=/dev/null \
|
-o UserKnownHostsFile=/dev/null \
|
||||||
"root@$CP1_PUBLIC_IP" "cat /etc/rancher/k3s/k3s.yaml" \
|
"ubuntu@$CP1_PUBLIC_IP" "sudo cat /etc/rancher/k3s/k3s.yaml" \
|
||||||
| sed "s/127.0.0.1/$CP1_PUBLIC_IP/g" \
|
| sed "s/127.0.0.1/$CP1_PUBLIC_IP/g" \
|
||||||
> "$KUBECONFIG_PATH"
|
> "$KUBECONFIG_PATH"
|
||||||
|
|
||||||
|
|
||||||
chmod 600 "$KUBECONFIG_PATH"
|
chmod 600 "$KUBECONFIG_PATH"
|
||||||
echo "Kubeconfig saved to $KUBECONFIG_PATH"
|
echo "Kubeconfig saved to $KUBECONFIG_PATH"
|
||||||
echo "Run: export KUBECONFIG=$KUBECONFIG_PATH"
|
echo "Run: export KUBECONFIG=$KUBECONFIG_PATH"
|
||||||
|
|||||||
+19
-15
@@ -1,29 +1,33 @@
|
|||||||
hcloud_token = "your-hetzner-cloud-api-token-here"
|
proxmox_endpoint = "https://100.105.0.115:8006/"
|
||||||
|
proxmox_api_token_id = "terraform-prov@pve!k8s-cluster"
|
||||||
|
proxmox_api_token_secret = "your-proxmox-api-token-secret"
|
||||||
|
|
||||||
ssh_public_key = "~/.ssh/hetzner_k8s.pub"
|
ssh_public_key = "~/.ssh/infra.pub"
|
||||||
ssh_private_key = "~/.ssh/hetzner_k8s"
|
ssh_private_key = "~/.ssh/infra"
|
||||||
|
|
||||||
s3_access_key = "your-backblaze-key-id"
|
s3_access_key = "your-backblaze-key-id"
|
||||||
s3_secret_key = "your-backblaze-application-key"
|
s3_secret_key = "your-backblaze-application-key"
|
||||||
s3_endpoint = "https://s3.eu-central-003.backblazeb2.com"
|
s3_endpoint = "https://s3.eu-central-003.backblazeb2.com"
|
||||||
s3_bucket = "k8s-terraform-state"
|
s3_bucket = "k8s-terraform-state"
|
||||||
|
|
||||||
cluster_name = "k8s-prod"
|
cluster_name = "k8s-cluster"
|
||||||
|
|
||||||
tailscale_tailnet = "yourtailnet.ts.net"
|
tailscale_tailnet = "yourtailnet.ts.net"
|
||||||
|
|
||||||
restrict_api_ssh_to_tailnet = true
|
kube_api_vip = "10.27.27.40"
|
||||||
tailnet_cidr = "100.64.0.0/10"
|
|
||||||
enable_nodeport_public = false
|
|
||||||
|
|
||||||
control_plane_count = 3
|
control_plane_count = 3
|
||||||
control_plane_type = "cx23"
|
control_plane_ips = ["10.27.27.30", "10.27.27.31", "10.27.27.32"]
|
||||||
|
control_plane_vm_ids = [200, 201, 202]
|
||||||
|
|
||||||
worker_count = 4
|
worker_count = 5
|
||||||
worker_type = "cx33"
|
worker_ips = ["10.27.27.41", "10.27.27.42", "10.27.27.43", "10.27.27.44", "10.27.27.45"]
|
||||||
|
worker_vm_ids = [210, 211, 212, 213, 214]
|
||||||
|
|
||||||
location = "nbg1"
|
proxmox_node_name = "flex"
|
||||||
|
proxmox_template_vm_id = 9000
|
||||||
allowed_ssh_ips = []
|
proxmox_vm_storage_pool = "Flash"
|
||||||
|
proxmox_cloud_init_storage_pool = "Flash"
|
||||||
allowed_api_ips = []
|
proxmox_bridge = "vmbr0"
|
||||||
|
proxmox_gateway = "10.27.27.1"
|
||||||
|
proxmox_dns_servers = ["1.1.1.1", "8.8.8.8"]
|
||||||
|
|||||||
@@ -1,118 +0,0 @@
|
|||||||
locals {
|
|
||||||
ssh_source_ips = var.restrict_api_ssh_to_tailnet ? concat([var.tailnet_cidr], var.allowed_ssh_ips) : var.allowed_ssh_ips
|
|
||||||
api_source_ips = var.restrict_api_ssh_to_tailnet ? concat([var.tailnet_cidr], var.allowed_api_ips) : var.allowed_api_ips
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "hcloud_firewall" "cluster" {
|
|
||||||
name = "${var.cluster_name}-firewall"
|
|
||||||
|
|
||||||
rule {
|
|
||||||
description = "SSH"
|
|
||||||
direction = "in"
|
|
||||||
protocol = "tcp"
|
|
||||||
port = "22"
|
|
||||||
source_ips = local.ssh_source_ips
|
|
||||||
}
|
|
||||||
|
|
||||||
rule {
|
|
||||||
description = "Kubernetes API"
|
|
||||||
direction = "in"
|
|
||||||
protocol = "tcp"
|
|
||||||
port = "6443"
|
|
||||||
source_ips = local.api_source_ips
|
|
||||||
}
|
|
||||||
|
|
||||||
rule {
|
|
||||||
description = "Tailscale WireGuard"
|
|
||||||
direction = "in"
|
|
||||||
protocol = "udp"
|
|
||||||
port = "41641"
|
|
||||||
source_ips = ["0.0.0.0/0"]
|
|
||||||
}
|
|
||||||
|
|
||||||
rule {
|
|
||||||
description = "Kubernetes API (internal)"
|
|
||||||
direction = "in"
|
|
||||||
protocol = "tcp"
|
|
||||||
port = "6443"
|
|
||||||
source_ips = [var.subnet_cidr]
|
|
||||||
}
|
|
||||||
|
|
||||||
rule {
|
|
||||||
description = "k3s Supervisor"
|
|
||||||
direction = "in"
|
|
||||||
protocol = "tcp"
|
|
||||||
port = "9345"
|
|
||||||
source_ips = [var.subnet_cidr]
|
|
||||||
}
|
|
||||||
|
|
||||||
rule {
|
|
||||||
description = "etcd Client"
|
|
||||||
direction = "in"
|
|
||||||
protocol = "tcp"
|
|
||||||
port = "2379"
|
|
||||||
source_ips = [var.subnet_cidr]
|
|
||||||
}
|
|
||||||
|
|
||||||
rule {
|
|
||||||
description = "etcd Peer"
|
|
||||||
direction = "in"
|
|
||||||
protocol = "tcp"
|
|
||||||
port = "2380"
|
|
||||||
source_ips = [var.subnet_cidr]
|
|
||||||
}
|
|
||||||
|
|
||||||
rule {
|
|
||||||
description = "Flannel VXLAN"
|
|
||||||
direction = "in"
|
|
||||||
protocol = "udp"
|
|
||||||
port = "8472"
|
|
||||||
source_ips = [var.subnet_cidr]
|
|
||||||
}
|
|
||||||
|
|
||||||
rule {
|
|
||||||
description = "Kubelet"
|
|
||||||
direction = "in"
|
|
||||||
protocol = "tcp"
|
|
||||||
port = "10250"
|
|
||||||
source_ips = [var.subnet_cidr]
|
|
||||||
}
|
|
||||||
|
|
||||||
dynamic "rule" {
|
|
||||||
for_each = var.enable_nodeport_public ? [1] : []
|
|
||||||
content {
|
|
||||||
description = "NodePorts"
|
|
||||||
direction = "in"
|
|
||||||
protocol = "tcp"
|
|
||||||
port = "30000-32767"
|
|
||||||
source_ips = ["0.0.0.0/0"]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
rule {
|
|
||||||
description = "HTTP from Load Balancer"
|
|
||||||
direction = "in"
|
|
||||||
protocol = "tcp"
|
|
||||||
port = "80"
|
|
||||||
source_ips = ["0.0.0.0/0"]
|
|
||||||
}
|
|
||||||
|
|
||||||
rule {
|
|
||||||
description = "HTTPS from Load Balancer"
|
|
||||||
direction = "in"
|
|
||||||
protocol = "tcp"
|
|
||||||
port = "443"
|
|
||||||
source_ips = ["0.0.0.0/0"]
|
|
||||||
}
|
|
||||||
|
|
||||||
rule {
|
|
||||||
description = "ICMP"
|
|
||||||
direction = "in"
|
|
||||||
protocol = "icmp"
|
|
||||||
source_ips = ["0.0.0.0/0"]
|
|
||||||
}
|
|
||||||
|
|
||||||
apply_to {
|
|
||||||
label_selector = "cluster=${var.cluster_name}"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,50 +0,0 @@
|
|||||||
# Load Balancer for Kubernetes API High Availability
|
|
||||||
# Provides a single endpoint for all control planes
|
|
||||||
|
|
||||||
resource "hcloud_load_balancer" "kube_api" {
|
|
||||||
name = "${var.cluster_name}-api"
|
|
||||||
load_balancer_type = "lb11" # Cheapest tier: €5.39/month
|
|
||||||
location = var.location
|
|
||||||
|
|
||||||
labels = {
|
|
||||||
cluster = var.cluster_name
|
|
||||||
role = "kube-api"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
# Attach Load Balancer to private network (required for use_private_ip)
|
|
||||||
resource "hcloud_load_balancer_network" "kube_api" {
|
|
||||||
load_balancer_id = hcloud_load_balancer.kube_api.id
|
|
||||||
network_id = hcloud_network.cluster.id
|
|
||||||
ip = cidrhost(var.subnet_cidr, 5) # 10.0.1.5
|
|
||||||
}
|
|
||||||
|
|
||||||
# Attach all control plane servers as targets
|
|
||||||
resource "hcloud_load_balancer_target" "kube_api_targets" {
|
|
||||||
count = var.control_plane_count
|
|
||||||
type = "server"
|
|
||||||
load_balancer_id = hcloud_load_balancer.kube_api.id
|
|
||||||
server_id = hcloud_server.control_plane[count.index].id
|
|
||||||
use_private_ip = true
|
|
||||||
|
|
||||||
depends_on = [hcloud_load_balancer_network.kube_api, hcloud_server.control_plane]
|
|
||||||
}
|
|
||||||
|
|
||||||
# Kubernetes API service on port 6443
|
|
||||||
resource "hcloud_load_balancer_service" "kube_api" {
|
|
||||||
load_balancer_id = hcloud_load_balancer.kube_api.id
|
|
||||||
protocol = "tcp"
|
|
||||||
listen_port = 6443
|
|
||||||
destination_port = 6443
|
|
||||||
|
|
||||||
health_check {
|
|
||||||
protocol = "tcp"
|
|
||||||
port = 6443
|
|
||||||
interval = 15
|
|
||||||
timeout = 10
|
|
||||||
retries = 3
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
# Firewall rule to allow LB access to control planes on 6443
|
|
||||||
# This is added to the existing cluster firewall
|
|
||||||
+12
-5
@@ -2,13 +2,20 @@ terraform {
|
|||||||
required_version = ">= 1.0"
|
required_version = ">= 1.0"
|
||||||
|
|
||||||
required_providers {
|
required_providers {
|
||||||
hcloud = {
|
local = {
|
||||||
source = "hetznercloud/hcloud"
|
source = "hashicorp/local"
|
||||||
version = "~> 1.45"
|
version = "~> 2.5"
|
||||||
|
}
|
||||||
|
|
||||||
|
proxmox = {
|
||||||
|
source = "bpg/proxmox"
|
||||||
|
version = ">= 0.60.0"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
provider "hcloud" {
|
provider "proxmox" {
|
||||||
token = var.hcloud_token
|
endpoint = var.proxmox_endpoint
|
||||||
|
api_token = "${var.proxmox_api_token_id}=${var.proxmox_api_token_secret}"
|
||||||
|
insecure = var.proxmox_insecure
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,11 +0,0 @@
|
|||||||
resource "hcloud_network" "cluster" {
|
|
||||||
name = "${var.cluster_name}-network"
|
|
||||||
ip_range = var.network_cidr
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "hcloud_network_subnet" "servers" {
|
|
||||||
network_id = hcloud_network.cluster.id
|
|
||||||
type = "cloud"
|
|
||||||
network_zone = "eu-central"
|
|
||||||
ip_range = var.subnet_cidr
|
|
||||||
}
|
|
||||||
+9
-15
@@ -1,42 +1,36 @@
|
|||||||
output "control_plane_ips" {
|
output "control_plane_ips" {
|
||||||
description = "Public IPs of control plane nodes"
|
description = "Public IPs of control plane nodes"
|
||||||
value = [for cp in hcloud_server.control_plane : cp.ipv4_address]
|
value = var.control_plane_ips
|
||||||
}
|
}
|
||||||
|
|
||||||
output "control_plane_names" {
|
output "control_plane_names" {
|
||||||
description = "Control plane hostnames"
|
description = "Control plane hostnames"
|
||||||
value = [for cp in hcloud_server.control_plane : cp.name]
|
value = [for idx in range(var.control_plane_count) : format("%s-cp-%d", var.cluster_name, idx + 1)]
|
||||||
}
|
}
|
||||||
|
|
||||||
output "control_plane_private_ips" {
|
output "control_plane_private_ips" {
|
||||||
description = "Private IPs of control plane nodes"
|
description = "Private IPs of control plane nodes"
|
||||||
value = [
|
value = var.control_plane_ips
|
||||||
for idx, cp in hcloud_server.control_plane :
|
|
||||||
try(one(cp.network).ip, cidrhost(var.subnet_cidr, 10 + idx))
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
|
|
||||||
output "primary_control_plane_ip" {
|
output "primary_control_plane_ip" {
|
||||||
description = "Public IP of the primary control plane (first node)"
|
description = "Public IP of the primary control plane (first node)"
|
||||||
value = hcloud_server.control_plane[0].ipv4_address
|
value = var.control_plane_ips[0]
|
||||||
}
|
}
|
||||||
|
|
||||||
output "worker_ips" {
|
output "worker_ips" {
|
||||||
description = "Public IPs of worker nodes"
|
description = "Public IPs of worker nodes"
|
||||||
value = [for worker in hcloud_server.workers : worker.ipv4_address]
|
value = var.worker_ips
|
||||||
}
|
}
|
||||||
|
|
||||||
output "worker_names" {
|
output "worker_names" {
|
||||||
description = "Worker hostnames"
|
description = "Worker hostnames"
|
||||||
value = [for worker in hcloud_server.workers : worker.name]
|
value = [for idx in range(var.worker_count) : format("%s-worker-%d", var.cluster_name, idx + 1)]
|
||||||
}
|
}
|
||||||
|
|
||||||
output "worker_private_ips" {
|
output "worker_private_ips" {
|
||||||
description = "Private IPs of worker nodes"
|
description = "Private IPs of worker nodes"
|
||||||
value = [
|
value = var.worker_ips
|
||||||
for idx, worker in hcloud_server.workers :
|
|
||||||
try(one(worker.network).ip, cidrhost(var.subnet_cidr, 20 + idx))
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
|
|
||||||
output "ssh_private_key_path" {
|
output "ssh_private_key_path" {
|
||||||
@@ -61,10 +55,10 @@ output "network_cidr" {
|
|||||||
|
|
||||||
output "kubeconfig_command" {
|
output "kubeconfig_command" {
|
||||||
description = "Command to fetch kubeconfig"
|
description = "Command to fetch kubeconfig"
|
||||||
value = "ssh root@${hcloud_server.control_plane[0].ipv4_address} 'cat /etc/rancher/k3s/k3s.yaml' > kubeconfig && sed -i 's/127.0.0.1/${hcloud_server.control_plane[0].ipv4_address}/g' kubeconfig"
|
value = "ssh ubuntu@${var.control_plane_ips[0]} 'sudo cat /etc/rancher/k3s/k3s.yaml' > kubeconfig && sed -i 's/127.0.0.1/${var.control_plane_ips[0]}/g' kubeconfig"
|
||||||
}
|
}
|
||||||
|
|
||||||
output "kube_api_lb_ip" {
|
output "kube_api_lb_ip" {
|
||||||
description = "Load Balancer private IP for Kubernetes API (used for cluster joins)"
|
description = "Load Balancer private IP for Kubernetes API (used for cluster joins)"
|
||||||
value = hcloud_load_balancer_network.kube_api.ip
|
value = var.kube_api_vip
|
||||||
}
|
}
|
||||||
|
|||||||
+111
-50
@@ -1,60 +1,121 @@
|
|||||||
data "hcloud_image" "ubuntu" {
|
data "local_file" "ssh_public_key" {
|
||||||
name = "ubuntu-24.04"
|
filename = pathexpand(var.ssh_public_key)
|
||||||
with_status = ["available"]
|
|
||||||
}
|
}
|
||||||
|
|
||||||
resource "hcloud_server" "control_plane" {
|
locals {
|
||||||
count = var.control_plane_count
|
subnet_prefix = split("/", var.subnet_cidr)[1]
|
||||||
|
|
||||||
name = "${var.cluster_name}-cp-${count.index + 1}"
|
control_planes = {
|
||||||
server_type = var.control_plane_type
|
for idx in range(var.control_plane_count) :
|
||||||
image = data.hcloud_image.ubuntu.id
|
format("%s-cp-%d", var.cluster_name, idx + 1) => {
|
||||||
location = var.location
|
|
||||||
ssh_keys = [data.hcloud_ssh_key.cluster.id]
|
|
||||||
|
|
||||||
labels = {
|
|
||||||
cluster = var.cluster_name
|
|
||||||
role = "control-plane"
|
role = "control-plane"
|
||||||
|
vm_id = var.control_plane_vm_ids[idx]
|
||||||
|
ip = var.control_plane_ips[idx]
|
||||||
|
cpu = var.control_plane_cores
|
||||||
|
memory_mb = var.control_plane_memory_mb
|
||||||
|
disk_gb = var.control_plane_disk_gb
|
||||||
|
startup = 1
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
network {
|
workers = {
|
||||||
network_id = hcloud_network.cluster.id
|
for idx in range(var.worker_count) :
|
||||||
ip = cidrhost(var.subnet_cidr, 10 + count.index)
|
format("%s-worker-%d", var.cluster_name, idx + 1) => {
|
||||||
}
|
|
||||||
|
|
||||||
public_net {
|
|
||||||
ipv4_enabled = true
|
|
||||||
ipv6_enabled = true
|
|
||||||
}
|
|
||||||
|
|
||||||
firewall_ids = [hcloud_firewall.cluster.id]
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "hcloud_server" "workers" {
|
|
||||||
count = var.worker_count
|
|
||||||
|
|
||||||
name = "${var.cluster_name}-worker-${count.index + 1}"
|
|
||||||
server_type = var.worker_type
|
|
||||||
image = data.hcloud_image.ubuntu.id
|
|
||||||
location = var.location
|
|
||||||
ssh_keys = [data.hcloud_ssh_key.cluster.id]
|
|
||||||
|
|
||||||
labels = {
|
|
||||||
cluster = var.cluster_name
|
|
||||||
role = "worker"
|
role = "worker"
|
||||||
|
vm_id = var.worker_vm_ids[idx]
|
||||||
|
ip = var.worker_ips[idx]
|
||||||
|
cpu = var.worker_cores
|
||||||
|
memory_mb = var.worker_memory_mb
|
||||||
|
disk_gb = var.worker_disk_gb
|
||||||
|
startup = 2
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
network {
|
nodes = merge(local.control_planes, local.workers)
|
||||||
network_id = hcloud_network.cluster.id
|
}
|
||||||
ip = cidrhost(var.subnet_cidr, 20 + count.index)
|
|
||||||
}
|
resource "proxmox_virtual_environment_vm" "nodes" {
|
||||||
|
for_each = local.nodes
|
||||||
public_net {
|
|
||||||
ipv4_enabled = true
|
name = each.key
|
||||||
ipv6_enabled = true
|
description = "Managed by Terraform for ${var.cluster_name}"
|
||||||
}
|
tags = ["terraform", var.cluster_name, each.value.role]
|
||||||
|
node_name = var.proxmox_node_name
|
||||||
firewall_ids = [hcloud_firewall.cluster.id]
|
vm_id = each.value.vm_id
|
||||||
|
|
||||||
depends_on = [hcloud_server.control_plane]
|
on_boot = true
|
||||||
|
started = true
|
||||||
|
stop_on_destroy = true
|
||||||
|
reboot_after_update = true
|
||||||
|
timeout_clone = 1800
|
||||||
|
timeout_create = 1800
|
||||||
|
timeout_shutdown_vm = 300
|
||||||
|
timeout_start_vm = 300
|
||||||
|
scsi_hardware = "virtio-scsi-single"
|
||||||
|
|
||||||
|
clone {
|
||||||
|
vm_id = var.proxmox_template_vm_id
|
||||||
|
datastore_id = var.proxmox_vm_storage_pool
|
||||||
|
full = var.proxmox_clone_full
|
||||||
|
retries = 3
|
||||||
|
}
|
||||||
|
|
||||||
|
agent {
|
||||||
|
enabled = true
|
||||||
|
trim = true
|
||||||
|
}
|
||||||
|
|
||||||
|
cpu {
|
||||||
|
cores = each.value.cpu
|
||||||
|
type = "x86-64-v2-AES"
|
||||||
|
}
|
||||||
|
|
||||||
|
memory {
|
||||||
|
dedicated = each.value.memory_mb
|
||||||
|
floating = each.value.memory_mb
|
||||||
|
}
|
||||||
|
|
||||||
|
startup {
|
||||||
|
order = tostring(each.value.startup)
|
||||||
|
up_delay = "20"
|
||||||
|
down_delay = "20"
|
||||||
|
}
|
||||||
|
|
||||||
|
disk {
|
||||||
|
datastore_id = var.proxmox_vm_storage_pool
|
||||||
|
interface = "scsi0"
|
||||||
|
size = each.value.disk_gb
|
||||||
|
discard = "on"
|
||||||
|
iothread = true
|
||||||
|
ssd = true
|
||||||
|
}
|
||||||
|
|
||||||
|
initialization {
|
||||||
|
datastore_id = var.proxmox_cloud_init_storage_pool
|
||||||
|
|
||||||
|
dns {
|
||||||
|
servers = var.proxmox_dns_servers
|
||||||
|
}
|
||||||
|
|
||||||
|
ip_config {
|
||||||
|
ipv4 {
|
||||||
|
address = "${each.value.ip}/${local.subnet_prefix}"
|
||||||
|
gateway = var.proxmox_gateway
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
user_account {
|
||||||
|
username = var.proxmox_ssh_username
|
||||||
|
keys = [trimspace(data.local_file.ssh_public_key.content)]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
network_device {
|
||||||
|
bridge = var.proxmox_bridge
|
||||||
|
model = "virtio"
|
||||||
|
}
|
||||||
|
|
||||||
|
operating_system {
|
||||||
|
type = "l26"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,7 +0,0 @@
|
|||||||
data "local_file" "ssh_public_key" {
|
|
||||||
filename = pathexpand(var.ssh_public_key)
|
|
||||||
}
|
|
||||||
|
|
||||||
data "hcloud_ssh_key" "cluster" {
|
|
||||||
name = "infra"
|
|
||||||
}
|
|
||||||
+142
-22
@@ -1,19 +1,13 @@
|
|||||||
variable "hcloud_token" {
|
|
||||||
description = "Hetzner Cloud API token"
|
|
||||||
type = string
|
|
||||||
sensitive = true
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "ssh_public_key" {
|
variable "ssh_public_key" {
|
||||||
description = "Path to SSH public key"
|
description = "Path to SSH public key"
|
||||||
type = string
|
type = string
|
||||||
default = "~/.ssh/id_ed25519.pub"
|
default = "~/.ssh/infra.pub"
|
||||||
}
|
}
|
||||||
|
|
||||||
variable "ssh_private_key" {
|
variable "ssh_private_key" {
|
||||||
description = "Path to SSH private key"
|
description = "Path to SSH private key"
|
||||||
type = string
|
type = string
|
||||||
default = "~/.ssh/id_ed25519"
|
default = "~/.ssh/infra"
|
||||||
}
|
}
|
||||||
|
|
||||||
variable "cluster_name" {
|
variable "cluster_name" {
|
||||||
@@ -28,28 +22,112 @@ variable "control_plane_count" {
|
|||||||
default = 3
|
default = 3
|
||||||
}
|
}
|
||||||
|
|
||||||
variable "control_plane_type" {
|
variable "control_plane_cores" {
|
||||||
description = "Hetzner server type for control plane"
|
description = "vCPU count for control plane VMs"
|
||||||
type = string
|
type = number
|
||||||
default = "cx23"
|
default = 2
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "control_plane_memory_mb" {
|
||||||
|
description = "Dedicated memory for control plane VMs in MiB"
|
||||||
|
type = number
|
||||||
|
default = 4096
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "control_plane_disk_gb" {
|
||||||
|
description = "Disk size for control plane VMs in GiB"
|
||||||
|
type = number
|
||||||
|
default = 32
|
||||||
}
|
}
|
||||||
|
|
||||||
variable "worker_count" {
|
variable "worker_count" {
|
||||||
description = "Number of worker nodes"
|
description = "Number of worker nodes"
|
||||||
type = number
|
type = number
|
||||||
default = 3
|
default = 5
|
||||||
}
|
}
|
||||||
|
|
||||||
variable "worker_type" {
|
variable "worker_cores" {
|
||||||
description = "Hetzner server type for workers"
|
description = "vCPU count for worker VMs"
|
||||||
type = string
|
type = number
|
||||||
default = "cx33"
|
default = 4
|
||||||
}
|
}
|
||||||
|
|
||||||
variable "location" {
|
variable "worker_memory_mb" {
|
||||||
description = "Hetzner datacenter location"
|
description = "Dedicated memory for worker VMs in MiB"
|
||||||
|
type = number
|
||||||
|
default = 8192
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "worker_disk_gb" {
|
||||||
|
description = "Disk size for worker VMs in GiB"
|
||||||
|
type = number
|
||||||
|
default = 64
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "proxmox_endpoint" {
|
||||||
|
description = "Proxmox API endpoint without /api2/json suffix"
|
||||||
type = string
|
type = string
|
||||||
default = "nbg1"
|
default = "https://100.105.0.115:8006/"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "proxmox_api_token_id" {
|
||||||
|
description = "Proxmox API token ID"
|
||||||
|
type = string
|
||||||
|
sensitive = true
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "proxmox_api_token_secret" {
|
||||||
|
description = "Proxmox API token secret"
|
||||||
|
type = string
|
||||||
|
sensitive = true
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "proxmox_insecure" {
|
||||||
|
description = "Skip TLS verification for the Proxmox API"
|
||||||
|
type = bool
|
||||||
|
default = true
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "proxmox_node_name" {
|
||||||
|
description = "Fixed Proxmox node name for all cluster VMs"
|
||||||
|
type = string
|
||||||
|
default = "flex"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "proxmox_template_vm_id" {
|
||||||
|
description = "Template VM ID used for linked clones"
|
||||||
|
type = number
|
||||||
|
default = 9000
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "proxmox_clone_full" {
|
||||||
|
description = "Whether to use full clones instead of linked clones"
|
||||||
|
type = bool
|
||||||
|
default = false
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "proxmox_vm_storage_pool" {
|
||||||
|
description = "Proxmox datastore for VM disks"
|
||||||
|
type = string
|
||||||
|
default = "Flash"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "proxmox_cloud_init_storage_pool" {
|
||||||
|
description = "Proxmox datastore for cloud-init disks"
|
||||||
|
type = string
|
||||||
|
default = "Flash"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "proxmox_bridge" {
|
||||||
|
description = "Proxmox bridge for cluster VM interfaces"
|
||||||
|
type = string
|
||||||
|
default = "vmbr0"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "proxmox_ssh_username" {
|
||||||
|
description = "Cloud-init user injected into cloned VMs"
|
||||||
|
type = string
|
||||||
|
default = "ubuntu"
|
||||||
}
|
}
|
||||||
|
|
||||||
variable "allowed_ssh_ips" {
|
variable "allowed_ssh_ips" {
|
||||||
@@ -90,13 +168,55 @@ variable "enable_nodeport_public" {
|
|||||||
variable "network_cidr" {
|
variable "network_cidr" {
|
||||||
description = "CIDR for private network"
|
description = "CIDR for private network"
|
||||||
type = string
|
type = string
|
||||||
default = "10.0.0.0/16"
|
default = "10.27.27.0/24"
|
||||||
}
|
}
|
||||||
|
|
||||||
variable "subnet_cidr" {
|
variable "subnet_cidr" {
|
||||||
description = "CIDR for server subnet"
|
description = "CIDR for server subnet"
|
||||||
type = string
|
type = string
|
||||||
default = "10.0.1.0/24"
|
default = "10.27.27.0/24"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "proxmox_gateway" {
|
||||||
|
description = "Gateway for cluster VM networking"
|
||||||
|
type = string
|
||||||
|
default = "10.27.27.1"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "proxmox_dns_servers" {
|
||||||
|
description = "DNS servers configured through cloud-init"
|
||||||
|
type = list(string)
|
||||||
|
default = ["1.1.1.1", "8.8.8.8"]
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "control_plane_ips" {
|
||||||
|
description = "Static IPv4 addresses for control plane VMs"
|
||||||
|
type = list(string)
|
||||||
|
default = ["10.27.27.30", "10.27.27.31", "10.27.27.32"]
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "worker_ips" {
|
||||||
|
description = "Static IPv4 addresses for worker VMs"
|
||||||
|
type = list(string)
|
||||||
|
default = ["10.27.27.41", "10.27.27.42", "10.27.27.43", "10.27.27.44", "10.27.27.45"]
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "control_plane_vm_ids" {
|
||||||
|
description = "Fixed VMIDs for control plane VMs"
|
||||||
|
type = list(number)
|
||||||
|
default = [200, 201, 202]
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "worker_vm_ids" {
|
||||||
|
description = "Fixed VMIDs for worker VMs"
|
||||||
|
type = list(number)
|
||||||
|
default = [210, 211, 212, 213, 214]
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "kube_api_vip" {
|
||||||
|
description = "Virtual IP advertised by kube-vip for the Kubernetes API"
|
||||||
|
type = string
|
||||||
|
default = "10.27.27.40"
|
||||||
}
|
}
|
||||||
|
|
||||||
variable "s3_access_key" {
|
variable "s3_access_key" {
|
||||||
|
|||||||
Reference in New Issue
Block a user