Files
HetznerTerra/.gitea/workflows/destroy.yml
MichaelFisher1997 9d601dc77c
Some checks failed
Deploy Cluster / Terraform (push) Successful in 4m16s
Deploy Cluster / Ansible (push) Failing after 12m27s
feat: Add CloudNativePG with B2 backups for persistent Rancher database
- Add Local Path Provisioner for storage
- Add CloudNativePG operator (v1.27.0) via Flux
- Create PostgreSQL cluster with B2 (Backblaze) auto-backup/restore
- Update Rancher to use external PostgreSQL via CATTLE_DB_CATTLE_* env vars
- Add weekly pg_dump CronJob to B2 (Sundays 2AM)
- Add pre-destroy backup hook to destroy workflow
- Add B2 credentials to Doppler (B2_ACCOUNT_ID, B2_APPLICATION_KEY)
- Generate RANCHER_DB_PASSWORD in Doppler

Backup location: HetznerTerra/rancher-backups/
Retention: 14 backups
2026-03-25 23:06:45 +00:00

196 lines
7.5 KiB
YAML

name: Destroy
on:
workflow_dispatch:
inputs:
confirm:
description: 'Type "destroy" to confirm'
required: true
default: ''
env:
TF_VERSION: "1.7.0"
TF_VAR_hcloud_token: ${{ secrets.HCLOUD_TOKEN }}
TF_VAR_s3_access_key: ${{ secrets.S3_ACCESS_KEY }}
TF_VAR_s3_secret_key: ${{ secrets.S3_SECRET_KEY }}
TF_VAR_s3_endpoint: ${{ secrets.S3_ENDPOINT }}
TF_VAR_s3_bucket: ${{ secrets.S3_BUCKET }}
TF_VAR_tailscale_tailnet: ${{ secrets.TAILSCALE_TAILNET }}
B2_ACCOUNT_ID: ${{ secrets.B2_ACCOUNT_ID }}
B2_APPLICATION_KEY: ${{ secrets.B2_APPLICATION_KEY }}
jobs:
pre-destroy-backup:
name: Pre-Destroy Backup
runs-on: ubuntu-latest
if: github.event.inputs.confirm == 'destroy'
environment: destroy
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Setup Terraform
uses: hashicorp/setup-terraform@v3
with:
terraform_version: ${{ env.TF_VERSION }}
- name: Terraform Init
working-directory: terraform
run: |
terraform init \
-backend-config="endpoint=${{ secrets.S3_ENDPOINT }}" \
-backend-config="bucket=${{ secrets.S3_BUCKET }}" \
-backend-config="region=auto" \
-backend-config="access_key=${{ secrets.S3_ACCESS_KEY }}" \
-backend-config="secret_key=${{ secrets.S3_SECRET_KEY }}" \
-backend-config="skip_requesting_account_id=true"
- name: Setup SSH Keys
run: |
mkdir -p ~/.ssh
echo "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/id_ed25519
chmod 600 ~/.ssh/id_ed25519
echo "${{ secrets.SSH_PUBLIC_KEY }}" > ~/.ssh/id_ed25519.pub
chmod 644 ~/.ssh/id_ed25519.pub
- name: Get Control Plane IP
id: cp_ip
working-directory: terraform
run: |
PRIMARY_IP=$(terraform output -raw primary_control_plane_ip)
echo "PRIMARY_IP=${PRIMARY_IP}" >> "$GITHUB_ENV"
- name: Pre-Destroy pg_dump to B2
run: |
set +e
echo "Attempting pre-destroy backup to B2..."
ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null root@${PRIMARY_IP} << 'EOF'
set -e
# Check if kubectl is available and cluster is up
if ! command -v kubectl &> /dev/null; then
echo "kubectl not found, skipping pre-destroy backup"
exit 0
fi
# Check if we can reach the cluster
if ! kubectl cluster-info &> /dev/null; then
echo "Cannot reach cluster, skipping pre-destroy backup"
exit 0
fi
# Check if CNP is deployed
if ! kubectl get namespace cnpg-cluster &> /dev/null; then
echo "CNP namespace not found, skipping pre-destroy backup"
exit 0
fi
# Run backup using the pgdump image directly
BACKUP_FILE="rancher-backup-$(date +%Y%m%d-%H%M%S).sql.gz"
B2_ACCOUNT_ID="$(cat /etc/kubernetes/secret/b2_account_id 2>/dev/null || echo '')"
B2_APPLICATION_KEY="$(cat /etc/kubernetes/secret/b2_application_key 2>/dev/null || echo '')"
if [ -z "$B2_ACCOUNT_ID" ] || [ -z "$B2_APPLICATION_KEY" ]; then
echo "B2 credentials not found in secret, skipping pre-destroy backup"
exit 0
fi
kubectl run pgdump-manual --image=ghcr.io/cloudnative-pg/pgbackrest:latest --restart=Never \
-n cnpg-cluster --dry-run=client -o yaml | \
kubectl apply -f -
echo "Waiting for backup job to complete..."
kubectl wait --for=condition=complete job/pgdump-manual -n cnpg-cluster --timeout=300s || true
kubectl logs job/pgdump-manual -n cnpg-cluster || true
kubectl delete job pgdump-manual -n cnpg-cluster --ignore-not-found=true || true
EOF
echo "Pre-destroy backup step completed (failure is non-fatal)"
destroy:
name: Destroy Cluster
runs-on: ubuntu-latest
if: github.event.inputs.confirm == 'destroy'
environment: destroy
needs: pre-destroy-backup
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Setup Terraform
uses: hashicorp/setup-terraform@v3
with:
terraform_version: ${{ env.TF_VERSION }}
- name: Terraform Init
working-directory: terraform
run: |
terraform init \
-backend-config="endpoint=${{ secrets.S3_ENDPOINT }}" \
-backend-config="bucket=${{ secrets.S3_BUCKET }}" \
-backend-config="region=auto" \
-backend-config="access_key=${{ secrets.S3_ACCESS_KEY }}" \
-backend-config="secret_key=${{ secrets.S3_SECRET_KEY }}" \
-backend-config="skip_requesting_account_id=true"
- name: Setup SSH Keys
run: |
mkdir -p ~/.ssh
echo "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/id_ed25519
chmod 600 ~/.ssh/id_ed25519
echo "${{ secrets.SSH_PUBLIC_KEY }}" > ~/.ssh/id_ed25519.pub
chmod 644 ~/.ssh/id_ed25519.pub
- name: Install jq
run: |
apt-get update
apt-get install -y jq
- name: Terraform Destroy
id: destroy
working-directory: terraform
run: |
set +e
for attempt in 1 2 3; do
echo "Terraform destroy attempt ${attempt}/3"
terraform destroy \
-var="hcloud_token=${{ secrets.HCLOUD_TOKEN }}" \
-var="ssh_public_key=$HOME/.ssh/id_ed25519.pub" \
-var="ssh_private_key=$HOME/.ssh/id_ed25519" \
-auto-approve
rc=$?
if [ "$rc" -eq 0 ]; then
exit 0
fi
if [ "$attempt" -lt 3 ]; then
echo "Terraform destroy failed with exit code ${rc}; retrying in 30s"
sleep 30
terraform refresh \
-var="hcloud_token=${{ secrets.HCLOUD_TOKEN }}" \
-var="ssh_public_key=$HOME/.ssh/id_ed25519.pub" \
-var="ssh_private_key=$HOME/.ssh/id_ed25519" || true
fi
done
exit "$rc"
- name: Hetzner destroy diagnostics
if: failure() && steps.destroy.outcome == 'failure'
env:
HCLOUD_TOKEN: ${{ secrets.HCLOUD_TOKEN }}
run: |
set +e
echo "== Terraform state list =="
terraform -chdir=terraform state list || true
network_id=$(terraform -chdir=terraform state show hcloud_network.cluster 2>/dev/null | awk '/^id *=/ {gsub(/"/, "", $3); print $3; exit}')
if [ -z "$network_id" ]; then
network_id="11988935"
fi
echo "== Hetzner network =="
curl -fsSL -H "Authorization: Bearer ${HCLOUD_TOKEN}" "https://api.hetzner.cloud/v1/networks/${network_id}" | jq . || true
echo "== Hetzner servers attached to network =="
curl -fsSL -H "Authorization: Bearer ${HCLOUD_TOKEN}" "https://api.hetzner.cloud/v1/servers" | jq --argjson id "$network_id" '.servers[] | select(any(.private_net[]?; .network == $id)) | {id, name, private_net}' || true
echo "== Hetzner load balancers attached to network =="
curl -fsSL -H "Authorization: Bearer ${HCLOUD_TOKEN}" "https://api.hetzner.cloud/v1/load_balancers" | jq --argjson id "$network_id" '.load_balancers[] | select(any(.private_net[]?; .network == $id)) | {id, name, private_net}' || true