fix: harden cluster rebuild determinism
Deploy Grafana Content / Grafana Content (push) Failing after 1m14s
Deploy Cluster / Terraform (push) Failing after 4m59s
Deploy Cluster / Ansible (push) Has been skipped

This commit is contained in:
2026-04-30 07:36:27 +00:00
parent f52e657f9f
commit a33a993867
38 changed files with 865 additions and 289 deletions
+24 -1
View File
@@ -27,7 +27,7 @@ env:
jobs:
destroy:
name: Destroy Cluster
runs-on: ubuntu-latest
runs-on: ubuntu-22.04
if: github.event.inputs.confirm == 'destroy'
environment: destroy
steps:
@@ -51,6 +51,7 @@ jobs:
working-directory: terraform
run: |
terraform init \
-lockfile=readonly \
-backend-config="endpoint=${{ secrets.S3_ENDPOINT }}" \
-backend-config="bucket=${{ secrets.S3_BUCKET }}" \
-backend-config="region=auto" \
@@ -58,6 +59,19 @@ jobs:
-backend-config="secret_key=${{ secrets.S3_SECRET_KEY }}" \
-backend-config="skip_requesting_account_id=true"
- name: Save Proxmox target list
run: |
mkdir -p outputs
if ! terraform -chdir=terraform output -json proxmox_target_vms > outputs/proxmox_target_vms.json; then
terraform -chdir=terraform plan \
-refresh=false \
-var="ssh_public_key=$HOME/.ssh/id_ed25519.pub" \
-var="ssh_private_key=$HOME/.ssh/id_ed25519" \
-out=cleanup.tfplan \
-no-color || true
printf '[]' > outputs/proxmox_target_vms.json
fi
- name: Terraform Destroy
id: destroy
working-directory: terraform
@@ -66,6 +80,7 @@ jobs:
for attempt in 1 2 3; do
echo "Terraform destroy attempt ${attempt}/3"
terraform destroy \
-parallelism=2 \
-var="ssh_public_key=$HOME/.ssh/id_ed25519.pub" \
-var="ssh_private_key=$HOME/.ssh/id_ed25519" \
-auto-approve
@@ -83,6 +98,14 @@ jobs:
done
exit "$rc"
- name: Verify Proxmox target VMs removed
if: success()
run: |
python3 scripts/proxmox-rebuild-cleanup.py --mode post-destroy --targets-file outputs/proxmox_target_vms.json
if [ -f terraform/cleanup.tfplan ]; then
python3 scripts/proxmox-rebuild-cleanup.py --mode post-destroy --terraform-dir terraform --plan cleanup.tfplan
fi
- name: Terraform state diagnostics
if: failure() && steps.destroy.outcome == 'failure'
run: |