feat: Add CloudNativePG with B2 backups for persistent Rancher database
- Add Local Path Provisioner for storage - Add CloudNativePG operator (v1.27.0) via Flux - Create PostgreSQL cluster with B2 (Backblaze) auto-backup/restore - Update Rancher to use external PostgreSQL via CATTLE_DB_CATTLE_* env vars - Add weekly pg_dump CronJob to B2 (Sundays 2AM) - Add pre-destroy backup hook to destroy workflow - Add B2 credentials to Doppler (B2_ACCOUNT_ID, B2_APPLICATION_KEY) - Generate RANCHER_DB_PASSWORD in Doppler Backup location: HetznerTerra/rancher-backups/ Retention: 14 backups
This commit is contained in:
@@ -16,13 +16,101 @@ env:
|
||||
TF_VAR_s3_endpoint: ${{ secrets.S3_ENDPOINT }}
|
||||
TF_VAR_s3_bucket: ${{ secrets.S3_BUCKET }}
|
||||
TF_VAR_tailscale_tailnet: ${{ secrets.TAILSCALE_TAILNET }}
|
||||
B2_ACCOUNT_ID: ${{ secrets.B2_ACCOUNT_ID }}
|
||||
B2_APPLICATION_KEY: ${{ secrets.B2_APPLICATION_KEY }}
|
||||
|
||||
jobs:
|
||||
pre-destroy-backup:
|
||||
name: Pre-Destroy Backup
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event.inputs.confirm == 'destroy'
|
||||
environment: destroy
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Terraform
|
||||
uses: hashicorp/setup-terraform@v3
|
||||
with:
|
||||
terraform_version: ${{ env.TF_VERSION }}
|
||||
|
||||
- name: Terraform Init
|
||||
working-directory: terraform
|
||||
run: |
|
||||
terraform init \
|
||||
-backend-config="endpoint=${{ secrets.S3_ENDPOINT }}" \
|
||||
-backend-config="bucket=${{ secrets.S3_BUCKET }}" \
|
||||
-backend-config="region=auto" \
|
||||
-backend-config="access_key=${{ secrets.S3_ACCESS_KEY }}" \
|
||||
-backend-config="secret_key=${{ secrets.S3_SECRET_KEY }}" \
|
||||
-backend-config="skip_requesting_account_id=true"
|
||||
|
||||
- name: Setup SSH Keys
|
||||
run: |
|
||||
mkdir -p ~/.ssh
|
||||
echo "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/id_ed25519
|
||||
chmod 600 ~/.ssh/id_ed25519
|
||||
echo "${{ secrets.SSH_PUBLIC_KEY }}" > ~/.ssh/id_ed25519.pub
|
||||
chmod 644 ~/.ssh/id_ed25519.pub
|
||||
|
||||
- name: Get Control Plane IP
|
||||
id: cp_ip
|
||||
working-directory: terraform
|
||||
run: |
|
||||
PRIMARY_IP=$(terraform output -raw primary_control_plane_ip)
|
||||
echo "PRIMARY_IP=${PRIMARY_IP}" >> "$GITHUB_ENV"
|
||||
|
||||
- name: Pre-Destroy pg_dump to B2
|
||||
run: |
|
||||
set +e
|
||||
echo "Attempting pre-destroy backup to B2..."
|
||||
ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null root@${PRIMARY_IP} << 'EOF'
|
||||
set -e
|
||||
# Check if kubectl is available and cluster is up
|
||||
if ! command -v kubectl &> /dev/null; then
|
||||
echo "kubectl not found, skipping pre-destroy backup"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Check if we can reach the cluster
|
||||
if ! kubectl cluster-info &> /dev/null; then
|
||||
echo "Cannot reach cluster, skipping pre-destroy backup"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Check if CNP is deployed
|
||||
if ! kubectl get namespace cnpg-cluster &> /dev/null; then
|
||||
echo "CNP namespace not found, skipping pre-destroy backup"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Run backup using the pgdump image directly
|
||||
BACKUP_FILE="rancher-backup-$(date +%Y%m%d-%H%M%S).sql.gz"
|
||||
B2_ACCOUNT_ID="$(cat /etc/kubernetes/secret/b2_account_id 2>/dev/null || echo '')"
|
||||
B2_APPLICATION_KEY="$(cat /etc/kubernetes/secret/b2_application_key 2>/dev/null || echo '')"
|
||||
|
||||
if [ -z "$B2_ACCOUNT_ID" ] || [ -z "$B2_APPLICATION_KEY" ]; then
|
||||
echo "B2 credentials not found in secret, skipping pre-destroy backup"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
kubectl run pgdump-manual --image=ghcr.io/cloudnative-pg/pgbackrest:latest --restart=Never \
|
||||
-n cnpg-cluster --dry-run=client -o yaml | \
|
||||
kubectl apply -f -
|
||||
|
||||
echo "Waiting for backup job to complete..."
|
||||
kubectl wait --for=condition=complete job/pgdump-manual -n cnpg-cluster --timeout=300s || true
|
||||
kubectl logs job/pgdump-manual -n cnpg-cluster || true
|
||||
kubectl delete job pgdump-manual -n cnpg-cluster --ignore-not-found=true || true
|
||||
EOF
|
||||
echo "Pre-destroy backup step completed (failure is non-fatal)"
|
||||
|
||||
destroy:
|
||||
name: Destroy Cluster
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event.inputs.confirm == 'destroy'
|
||||
environment: destroy
|
||||
needs: pre-destroy-backup
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
@@ -0,0 +1,25 @@
|
||||
apiVersion: external-secrets.io/v1
|
||||
kind: ExternalSecret
|
||||
metadata:
|
||||
name: b2-credentials
|
||||
namespace: cnpg-cluster
|
||||
spec:
|
||||
refreshInterval: 1h
|
||||
secretStoreRef:
|
||||
name: doppler-hetznerterra
|
||||
kind: ClusterSecretStore
|
||||
target:
|
||||
name: b2-credentials
|
||||
creationPolicy: Owner
|
||||
template:
|
||||
type: Opaque
|
||||
data:
|
||||
B2_ACCOUNT_ID: "{{ .B2_ACCOUNT_ID }}"
|
||||
B2_APPLICATION_KEY: "{{ .B2_APPLICATION_KEY }}"
|
||||
data:
|
||||
- secretKey: B2_ACCOUNT_ID
|
||||
remoteRef:
|
||||
key: B2_ACCOUNT_ID
|
||||
- secretKey: B2_APPLICATION_KEY
|
||||
remoteRef:
|
||||
key: B2_APPLICATION_KEY
|
||||
19
infrastructure/addons/cnpg/cnpg-cluster-rw-svc.yaml
Normal file
19
infrastructure/addons/cnpg/cnpg-cluster-rw-svc.yaml
Normal file
@@ -0,0 +1,19 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: cnpg-cluster-rw
|
||||
namespace: cnpg-cluster
|
||||
labels:
|
||||
app.kubernetes.io/name: rancher-db
|
||||
cnpg.io/cluster: rancher-db
|
||||
spec:
|
||||
type: ClusterIP
|
||||
clusterIP: None
|
||||
ports:
|
||||
- port: 5432
|
||||
targetPort: 5432
|
||||
protocol: TCP
|
||||
selector:
|
||||
app.kubernetes.io/name: postgresql
|
||||
cnpg.io/cluster: rancher-db
|
||||
role: primary
|
||||
27
infrastructure/addons/cnpg/helmrelease-cnpg.yaml
Normal file
27
infrastructure/addons/cnpg/helmrelease-cnpg.yaml
Normal file
@@ -0,0 +1,27 @@
|
||||
apiVersion: helm.toolkit.fluxcd.io/v2
|
||||
kind: HelmRelease
|
||||
metadata:
|
||||
name: cnpg
|
||||
namespace: flux-system
|
||||
spec:
|
||||
interval: 10m
|
||||
targetNamespace: cnpg-system
|
||||
chart:
|
||||
spec:
|
||||
chart: cloudnative-pg
|
||||
version: 1.27.0
|
||||
sourceRef:
|
||||
kind: HelmRepository
|
||||
name: cnpg
|
||||
namespace: flux-system
|
||||
install:
|
||||
createNamespace: true
|
||||
remediation:
|
||||
retries: 3
|
||||
upgrade:
|
||||
remediation:
|
||||
retries: 3
|
||||
values:
|
||||
image:
|
||||
repository: ghcr.io/cloudnative-pg/postgresql
|
||||
clusterImage: ghcr.io/cloudnative-pg/postgresql:17.4
|
||||
8
infrastructure/addons/cnpg/helmrepository-cnpg.yaml
Normal file
8
infrastructure/addons/cnpg/helmrepository-cnpg.yaml
Normal file
@@ -0,0 +1,8 @@
|
||||
apiVersion: source.toolkit.fluxcd.io/v1
|
||||
kind: HelmRepository
|
||||
metadata:
|
||||
name: cnpg
|
||||
namespace: flux-system
|
||||
spec:
|
||||
interval: 1h
|
||||
url: https://cloudnative-pg.github.io/charts
|
||||
11
infrastructure/addons/cnpg/kustomization.yaml
Normal file
11
infrastructure/addons/cnpg/kustomization.yaml
Normal file
@@ -0,0 +1,11 @@
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
resources:
|
||||
- helmrepository-cnpg.yaml
|
||||
- helmrelease-cnpg.yaml
|
||||
- namespace.yaml
|
||||
- b2-credentials-externalsecret.yaml
|
||||
- rancher-db-password-externalsecret.yaml
|
||||
- postgres-cluster.yaml
|
||||
- cnpg-cluster-rw-svc.yaml
|
||||
- pgdump-cronjob.yaml
|
||||
4
infrastructure/addons/cnpg/namespace.yaml
Normal file
4
infrastructure/addons/cnpg/namespace.yaml
Normal file
@@ -0,0 +1,4 @@
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: cnpg-cluster
|
||||
61
infrastructure/addons/cnpg/pgdump-cronjob.yaml
Normal file
61
infrastructure/addons/cnpg/pgdump-cronjob.yaml
Normal file
@@ -0,0 +1,61 @@
|
||||
apiVersion: batch/v1
|
||||
kind: CronJob
|
||||
metadata:
|
||||
name: pgdump-rancher
|
||||
namespace: cnpg-cluster
|
||||
spec:
|
||||
schedule: "0 2 * * 0"
|
||||
concurrencyPolicy: Forbid
|
||||
successfulJobsHistoryLimit: 4
|
||||
failedJobsHistoryLimit: 4
|
||||
jobTemplate:
|
||||
spec:
|
||||
backoffLimit: 3
|
||||
template:
|
||||
spec:
|
||||
restartPolicy: OnFailure
|
||||
containers:
|
||||
- name: pgdump
|
||||
image: ghcr.io/cloudnative-pg/pgbackrest:latest
|
||||
command:
|
||||
- /bin/sh
|
||||
- -c
|
||||
- |
|
||||
set -e
|
||||
export AWS_ACCESS_KEY_ID=$(cat /etc/b2/credentials/B2_ACCOUNT_ID)
|
||||
export AWS_SECRET_ACCESS_KEY=$(cat /etc/b2/credentials/B2_APPLICATION_KEY)
|
||||
export AWS_ENDPOINT=https://s3.us-east-005.backblazeb2.com
|
||||
|
||||
BACKUP_FILE="rancher-backup-$(date +%Y%m%d-%H%M%S).sql.gz"
|
||||
|
||||
pg_dump -h cnpg-cluster-rw.cnpg-cluster.svc -U postgres -d postgres --no-owner --clean | gzip | \
|
||||
aws s3 cp - s3://HetznerTerra/rancher-backups/$BACKUP_FILE
|
||||
|
||||
echo "Backup completed: $BACKUP_FILE"
|
||||
env:
|
||||
- name: PGPASSWORD
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: rancher-db-password
|
||||
key: password
|
||||
volumeMounts:
|
||||
- name: b2-credentials
|
||||
mountPath: /etc/b2/credentials
|
||||
readOnly: true
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 128Mi
|
||||
limits:
|
||||
cpu: 500m
|
||||
memory: 512Mi
|
||||
volumes:
|
||||
- name: b2-credentials
|
||||
secret:
|
||||
secretName: b2-credentials
|
||||
nodeSelector:
|
||||
kubernetes.io/hostname: k8s-cluster-cp-1
|
||||
tolerations:
|
||||
- key: node-role.kubernetes.io/control-plane
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
70
infrastructure/addons/cnpg/postgres-cluster.yaml
Normal file
70
infrastructure/addons/cnpg/postgres-cluster.yaml
Normal file
@@ -0,0 +1,70 @@
|
||||
apiVersion: postgresql.cnpg.io/v1
|
||||
kind: Cluster
|
||||
metadata:
|
||||
name: rancher-db
|
||||
namespace: cnpg-cluster
|
||||
spec:
|
||||
description: "Rancher external database cluster"
|
||||
imageName: ghcr.io/cloudnative-pg/postgresql:17.4
|
||||
imagePullPolicy: IfNotPresent
|
||||
|
||||
instances: 1
|
||||
primaryUpdateStrategy: unsupervised
|
||||
|
||||
storage:
|
||||
storageClass: local-path
|
||||
size: 50Gi
|
||||
resizeStorageStorageClassName: local-path
|
||||
|
||||
resources:
|
||||
requests:
|
||||
cpu: 250m
|
||||
memory: 512Mi
|
||||
limits:
|
||||
cpu: 1000m
|
||||
memory: 2Gi
|
||||
|
||||
bootstrap:
|
||||
recovery:
|
||||
externalClusters:
|
||||
- name: b2-backup
|
||||
s3Compatible:
|
||||
bucket: HetznerTerra
|
||||
region: us-east-005
|
||||
endpoint: s3.us-east-005.backblazeb2.com
|
||||
prefix: rancher-backups/
|
||||
credentials:
|
||||
name: b2-credentials
|
||||
accessKey: B2_ACCOUNT_ID
|
||||
secretKey: B2_APPLICATION_KEY
|
||||
|
||||
backup:
|
||||
b2:
|
||||
bucket: HetznerTerra
|
||||
region: us-east-005
|
||||
endpoint: s3.us-east-005.backblazeb2.com
|
||||
prefix: rancher-backups/
|
||||
credentials:
|
||||
name: b2-credentials
|
||||
accessKey: B2_ACCOUNT_ID
|
||||
secretKey: B2_APPLICATION_KEY
|
||||
retentionPolicy: keep14
|
||||
|
||||
serviceAccountTemplate:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: rancher-db
|
||||
|
||||
superuserSecret:
|
||||
name: rancher-db-password
|
||||
|
||||
monitoring:
|
||||
enablePodMonitor: true
|
||||
|
||||
affinity:
|
||||
nodeSelector:
|
||||
kubernetes.io/hostname: k8s-cluster-cp-1
|
||||
tolerations:
|
||||
- key: node-role.kubernetes.io/control-plane
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
@@ -0,0 +1,21 @@
|
||||
apiVersion: external-secrets.io/v1
|
||||
kind: ExternalSecret
|
||||
metadata:
|
||||
name: rancher-db-password
|
||||
namespace: cnpg-cluster
|
||||
spec:
|
||||
refreshInterval: 1h
|
||||
secretStoreRef:
|
||||
name: doppler-hetznerterra
|
||||
kind: ClusterSecretStore
|
||||
target:
|
||||
name: rancher-db-password
|
||||
creationPolicy: Owner
|
||||
template:
|
||||
type: Opaque
|
||||
data:
|
||||
password: "{{ .RANCHER_DB_PASSWORD }}"
|
||||
data:
|
||||
- secretKey: RANCHER_DB_PASSWORD
|
||||
remoteRef:
|
||||
key: RANCHER_DB_PASSWORD
|
||||
4
infrastructure/addons/kustomization-cnpg.yaml
Normal file
4
infrastructure/addons/kustomization-cnpg.yaml
Normal file
@@ -0,0 +1,4 @@
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
resources:
|
||||
- cnpg
|
||||
4
infrastructure/addons/kustomization-lpp.yaml
Normal file
4
infrastructure/addons/kustomization-lpp.yaml
Normal file
@@ -0,0 +1,4 @@
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
resources:
|
||||
- lpp
|
||||
@@ -10,4 +10,6 @@ resources:
|
||||
- kustomization-flux-ui.yaml
|
||||
- kustomization-observability.yaml
|
||||
- kustomization-observability-content.yaml
|
||||
- kustomization-rancher.yaml
|
||||
- kustomization-lpp.yaml
|
||||
- kustomization-cnpg.yaml
|
||||
- kustomization-rancher.yaml
|
||||
@@ -0,0 +1,31 @@
|
||||
apiVersion: helm.toolkit.fluxcd.io/v2
|
||||
kind: HelmRelease
|
||||
metadata:
|
||||
name: local-path-provisioner
|
||||
namespace: flux-system
|
||||
spec:
|
||||
interval: 10m
|
||||
targetNamespace: kube-system
|
||||
chart:
|
||||
spec:
|
||||
chart: local-path-provisioner
|
||||
version: 1.12.1
|
||||
sourceRef:
|
||||
kind: HelmRepository
|
||||
name: local-path
|
||||
namespace: flux-system
|
||||
install:
|
||||
createNamespace: true
|
||||
remediation:
|
||||
retries: 3
|
||||
upgrade:
|
||||
remediation:
|
||||
retries: 3
|
||||
values:
|
||||
nodePathMap:
|
||||
- node: /var/lib/rancher/k3s/storage
|
||||
paths:
|
||||
- /var/lib/rancher/k3s/storage
|
||||
storageClass:
|
||||
defaultClass: true
|
||||
name: local-path
|
||||
8
infrastructure/addons/lpp/helmrepository-local-path.yaml
Normal file
8
infrastructure/addons/lpp/helmrepository-local-path.yaml
Normal file
@@ -0,0 +1,8 @@
|
||||
apiVersion: source.toolkit.fluxcd.io/v1
|
||||
kind: HelmRepository
|
||||
metadata:
|
||||
name: local-path
|
||||
namespace: flux-system
|
||||
spec:
|
||||
interval: 1h
|
||||
url: https://charts.rancher.io
|
||||
5
infrastructure/addons/lpp/kustomization.yaml
Normal file
5
infrastructure/addons/lpp/kustomization.yaml
Normal file
@@ -0,0 +1,5 @@
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
resources:
|
||||
- helmrepository-local-path.yaml
|
||||
- helmrelease-local-path-provisioner.yaml
|
||||
@@ -26,6 +26,19 @@ spec:
|
||||
tls: external
|
||||
replicas: 1
|
||||
extraEnv:
|
||||
- name: CATTLE_DB_CATTLE_HOST
|
||||
value: cnpg-cluster-rw.cnpg-cluster.svc
|
||||
- name: CATTLE_DB_CATTLE_PORT
|
||||
value: "5432"
|
||||
- name: CATTLE_DB_CATTLE_DATABASE
|
||||
value: postgres
|
||||
- name: CATTLE_DB_CATTLE_USERNAME
|
||||
value: postgres
|
||||
- name: CATTLE_DB_CATTLE_PASSWORD
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: rancher-db-password
|
||||
key: password
|
||||
- name: CATTLE_PROMETHEUS_METRICS
|
||||
value: "true"
|
||||
resources:
|
||||
|
||||
Reference in New Issue
Block a user