feat: migrate cluster baseline from Hetzner to Proxmox
Replace Hetzner infrastructure and cloud-provider assumptions with Proxmox VM clones, kube-vip API HA, and NFS-backed storage. Update bootstrap, Flux addons, CI workflows, and docs to target the new private Proxmox baseline while preserving the existing Tailscale, Doppler, Flux, Rancher, and B2 backup flows.
This commit is contained in:
+13
-123
@@ -10,107 +10,22 @@ on:
|
||||
|
||||
env:
|
||||
TF_VERSION: "1.7.0"
|
||||
TF_VAR_hcloud_token: ${{ secrets.HCLOUD_TOKEN }}
|
||||
TF_VAR_s3_access_key: ${{ secrets.S3_ACCESS_KEY }}
|
||||
TF_VAR_s3_secret_key: ${{ secrets.S3_SECRET_KEY }}
|
||||
TF_VAR_s3_endpoint: ${{ secrets.S3_ENDPOINT }}
|
||||
TF_VAR_s3_bucket: ${{ secrets.S3_BUCKET }}
|
||||
TF_VAR_tailscale_tailnet: ${{ secrets.TAILSCALE_TAILNET }}
|
||||
B2_ACCOUNT_ID: ${{ secrets.B2_ACCOUNT_ID }}
|
||||
B2_APPLICATION_KEY: ${{ secrets.B2_APPLICATION_KEY }}
|
||||
TF_VAR_proxmox_endpoint: ${{ secrets.PROXMOX_ENDPOINT }}
|
||||
TF_VAR_proxmox_api_token_id: ${{ secrets.PROXMOX_API_TOKEN_ID }}
|
||||
TF_VAR_proxmox_api_token_secret: ${{ secrets.PROXMOX_API_TOKEN_SECRET }}
|
||||
TF_VAR_proxmox_insecure: "true"
|
||||
|
||||
jobs:
|
||||
pre-destroy-backup:
|
||||
name: Pre-Destroy Backup
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event.inputs.confirm == 'destroy'
|
||||
environment: destroy
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Terraform
|
||||
uses: hashicorp/setup-terraform@v3
|
||||
with:
|
||||
terraform_version: ${{ env.TF_VERSION }}
|
||||
|
||||
- name: Terraform Init
|
||||
working-directory: terraform
|
||||
run: |
|
||||
terraform init \
|
||||
-backend-config="endpoint=${{ secrets.S3_ENDPOINT }}" \
|
||||
-backend-config="bucket=${{ secrets.S3_BUCKET }}" \
|
||||
-backend-config="region=auto" \
|
||||
-backend-config="access_key=${{ secrets.S3_ACCESS_KEY }}" \
|
||||
-backend-config="secret_key=${{ secrets.S3_SECRET_KEY }}" \
|
||||
-backend-config="skip_requesting_account_id=true"
|
||||
|
||||
- name: Setup SSH Keys
|
||||
run: |
|
||||
mkdir -p ~/.ssh
|
||||
echo "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/id_ed25519
|
||||
chmod 600 ~/.ssh/id_ed25519
|
||||
echo "${{ secrets.SSH_PUBLIC_KEY }}" > ~/.ssh/id_ed25519.pub
|
||||
chmod 644 ~/.ssh/id_ed25519.pub
|
||||
|
||||
- name: Get Control Plane IP
|
||||
id: cp_ip
|
||||
working-directory: terraform
|
||||
run: |
|
||||
PRIMARY_IP=$(terraform output -raw primary_control_plane_ip)
|
||||
echo "PRIMARY_IP=${PRIMARY_IP}" >> "$GITHUB_ENV"
|
||||
|
||||
- name: Pre-Destroy pg_dump to B2
|
||||
run: |
|
||||
set +e
|
||||
echo "Attempting pre-destroy backup to B2..."
|
||||
ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null root@${PRIMARY_IP} << 'EOF'
|
||||
set -e
|
||||
# Check if kubectl is available and cluster is up
|
||||
if ! command -v kubectl &> /dev/null; then
|
||||
echo "kubectl not found, skipping pre-destroy backup"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Check if we can reach the cluster
|
||||
if ! kubectl cluster-info &> /dev/null; then
|
||||
echo "Cannot reach cluster, skipping pre-destroy backup"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Check if CNP is deployed
|
||||
if ! kubectl get namespace cnpg-cluster &> /dev/null; then
|
||||
echo "CNP namespace not found, skipping pre-destroy backup"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Run backup using the pgdump image directly
|
||||
BACKUP_FILE="rancher-backup-$(date +%Y%m%d-%H%M%S).sql.gz"
|
||||
B2_ACCOUNT_ID="$(cat /etc/kubernetes/secret/b2_account_id 2>/dev/null || echo '')"
|
||||
B2_APPLICATION_KEY="$(cat /etc/kubernetes/secret/b2_application_key 2>/dev/null || echo '')"
|
||||
|
||||
if [ -z "$B2_ACCOUNT_ID" ] || [ -z "$B2_APPLICATION_KEY" ]; then
|
||||
echo "B2 credentials not found in secret, skipping pre-destroy backup"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
kubectl run pgdump-manual --image=ghcr.io/cloudnative-pg/pgbackrest:latest --restart=Never \
|
||||
-n cnpg-cluster --dry-run=client -o yaml | \
|
||||
kubectl apply -f -
|
||||
|
||||
echo "Waiting for backup job to complete..."
|
||||
kubectl wait --for=condition=complete job/pgdump-manual -n cnpg-cluster --timeout=300s || true
|
||||
kubectl logs job/pgdump-manual -n cnpg-cluster || true
|
||||
kubectl delete job pgdump-manual -n cnpg-cluster --ignore-not-found=true || true
|
||||
EOF
|
||||
echo "Pre-destroy backup step completed (failure is non-fatal)"
|
||||
|
||||
destroy:
|
||||
name: Destroy Cluster
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event.inputs.confirm == 'destroy'
|
||||
environment: destroy
|
||||
needs: pre-destroy-backup
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
@@ -120,6 +35,14 @@ jobs:
|
||||
with:
|
||||
terraform_version: ${{ env.TF_VERSION }}
|
||||
|
||||
- name: Setup SSH Keys
|
||||
run: |
|
||||
mkdir -p ~/.ssh
|
||||
echo "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/id_ed25519
|
||||
chmod 600 ~/.ssh/id_ed25519
|
||||
echo "${{ secrets.SSH_PUBLIC_KEY }}" > ~/.ssh/id_ed25519.pub
|
||||
chmod 644 ~/.ssh/id_ed25519.pub
|
||||
|
||||
- name: Terraform Init
|
||||
working-directory: terraform
|
||||
run: |
|
||||
@@ -131,19 +54,6 @@ jobs:
|
||||
-backend-config="secret_key=${{ secrets.S3_SECRET_KEY }}" \
|
||||
-backend-config="skip_requesting_account_id=true"
|
||||
|
||||
- name: Setup SSH Keys
|
||||
run: |
|
||||
mkdir -p ~/.ssh
|
||||
echo "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/id_ed25519
|
||||
chmod 600 ~/.ssh/id_ed25519
|
||||
echo "${{ secrets.SSH_PUBLIC_KEY }}" > ~/.ssh/id_ed25519.pub
|
||||
chmod 644 ~/.ssh/id_ed25519.pub
|
||||
|
||||
- name: Install jq
|
||||
run: |
|
||||
apt-get update
|
||||
apt-get install -y jq
|
||||
|
||||
- name: Terraform Destroy
|
||||
id: destroy
|
||||
working-directory: terraform
|
||||
@@ -152,7 +62,6 @@ jobs:
|
||||
for attempt in 1 2 3; do
|
||||
echo "Terraform destroy attempt ${attempt}/3"
|
||||
terraform destroy \
|
||||
-var="hcloud_token=${{ secrets.HCLOUD_TOKEN }}" \
|
||||
-var="ssh_public_key=$HOME/.ssh/id_ed25519.pub" \
|
||||
-var="ssh_private_key=$HOME/.ssh/id_ed25519" \
|
||||
-auto-approve
|
||||
@@ -164,32 +73,13 @@ jobs:
|
||||
echo "Terraform destroy failed with exit code ${rc}; retrying in 30s"
|
||||
sleep 30
|
||||
terraform refresh \
|
||||
-var="hcloud_token=${{ secrets.HCLOUD_TOKEN }}" \
|
||||
-var="ssh_public_key=$HOME/.ssh/id_ed25519.pub" \
|
||||
-var="ssh_private_key=$HOME/.ssh/id_ed25519" || true
|
||||
fi
|
||||
done
|
||||
exit "$rc"
|
||||
|
||||
- name: Hetzner destroy diagnostics
|
||||
- name: Terraform state diagnostics
|
||||
if: failure() && steps.destroy.outcome == 'failure'
|
||||
env:
|
||||
HCLOUD_TOKEN: ${{ secrets.HCLOUD_TOKEN }}
|
||||
run: |
|
||||
set +e
|
||||
echo "== Terraform state list =="
|
||||
terraform -chdir=terraform state list || true
|
||||
|
||||
network_id=$(terraform -chdir=terraform state show hcloud_network.cluster 2>/dev/null | awk '/^id *=/ {gsub(/"/, "", $3); print $3; exit}')
|
||||
if [ -z "$network_id" ]; then
|
||||
network_id="11988935"
|
||||
fi
|
||||
|
||||
echo "== Hetzner network =="
|
||||
curl -fsSL -H "Authorization: Bearer ${HCLOUD_TOKEN}" "https://api.hetzner.cloud/v1/networks/${network_id}" | jq . || true
|
||||
|
||||
echo "== Hetzner servers attached to network =="
|
||||
curl -fsSL -H "Authorization: Bearer ${HCLOUD_TOKEN}" "https://api.hetzner.cloud/v1/servers" | jq --argjson id "$network_id" '.servers[] | select(any(.private_net[]?; .network == $id)) | {id, name, private_net}' || true
|
||||
|
||||
echo "== Hetzner load balancers attached to network =="
|
||||
curl -fsSL -H "Authorization: Bearer ${HCLOUD_TOKEN}" "https://api.hetzner.cloud/v1/load_balancers" | jq --argjson id "$network_id" '.load_balancers[] | select(any(.private_net[]?; .network == $id)) | {id, name, private_net}' || true
|
||||
|
||||
Reference in New Issue
Block a user