name: Deploy Cluster on: push: branches: - main pull_request: branches: - main workflow_dispatch: env: TF_VERSION: "1.7.0" TF_VAR_s3_access_key: ${{ secrets.S3_ACCESS_KEY }} TF_VAR_s3_secret_key: ${{ secrets.S3_SECRET_KEY }} TF_VAR_s3_endpoint: ${{ secrets.S3_ENDPOINT }} TF_VAR_s3_bucket: ${{ secrets.S3_BUCKET }} TF_VAR_tailscale_tailnet: ${{ secrets.TAILSCALE_TAILNET }} TF_VAR_proxmox_endpoint: ${{ secrets.PROXMOX_ENDPOINT }} TF_VAR_proxmox_api_token_id: ${{ secrets.PROXMOX_API_TOKEN_ID }} TF_VAR_proxmox_api_token_secret: ${{ secrets.PROXMOX_API_TOKEN_SECRET }} TF_VAR_proxmox_insecure: "true" TS_OAUTH_CLIENT_ID: ${{ secrets.TAILSCALE_OAUTH_CLIENT_ID }} TS_OAUTH_CLIENT_SECRET: ${{ secrets.TAILSCALE_OAUTH_CLIENT_SECRET }} jobs: terraform: name: Terraform runs-on: ubuntu-latest steps: - name: Checkout uses: actions/checkout@v4 - name: Setup Terraform uses: hashicorp/setup-terraform@v3 with: terraform_version: ${{ env.TF_VERSION }} - name: Terraform Format Check working-directory: terraform run: terraform fmt -check -recursive - name: Terraform Init working-directory: terraform run: | terraform init \ -backend-config="endpoint=${{ secrets.S3_ENDPOINT }}" \ -backend-config="bucket=${{ secrets.S3_BUCKET }}" \ -backend-config="region=auto" \ -backend-config="access_key=${{ secrets.S3_ACCESS_KEY }}" \ -backend-config="secret_key=${{ secrets.S3_SECRET_KEY }}" \ -backend-config="skip_requesting_account_id=true" - name: Terraform Validate working-directory: terraform run: terraform validate - name: Setup SSH Keys run: | mkdir -p ~/.ssh echo "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/id_ed25519 chmod 600 ~/.ssh/id_ed25519 echo "${{ secrets.SSH_PUBLIC_KEY }}" > ~/.ssh/id_ed25519.pub chmod 644 ~/.ssh/id_ed25519.pub - name: Terraform Plan id: plan working-directory: terraform run: | terraform plan \ -var="ssh_public_key=$HOME/.ssh/id_ed25519.pub" \ -var="ssh_private_key=$HOME/.ssh/id_ed25519" \ -out=tfplan \ -no-color continue-on-error: true - name: Post Plan to PR if: github.event_name == 'pull_request' uses: actions/github-script@v7 with: script: | const output = `#### Terraform Plan \`\`\` ${{ steps.plan.outputs.stdout }} \`\`\``; github.rest.issues.createComment({ issue_number: context.issue.number, owner: context.repo.owner, repo: context.repo.repo, body: output }); - name: Fail if plan failed if: steps.plan.outcome == 'failure' run: exit 1 - name: Cleanup orphan Proxmox cloud-init volumes if: github.ref == 'refs/heads/main' && github.event_name == 'push' run: | set -euo pipefail python3 - <<'PY' import os import ssl import urllib.error import urllib.parse import urllib.request endpoint = os.environ["TF_VAR_proxmox_endpoint"].strip().removesuffix("/api2/json").rstrip("/") token_id = os.environ["TF_VAR_proxmox_api_token_id"] token_secret = os.environ["TF_VAR_proxmox_api_token_secret"] insecure = os.environ.get("TF_VAR_proxmox_insecure", "false").lower() == "true" node = "flex" storage = "Flash" vm_ids = [200, 201, 202, 210, 211, 212, 213, 214] context = ssl._create_unverified_context() if insecure else None headers = {"Authorization": f"PVEAPIToken={token_id}={token_secret}"} def request(method, path): req = urllib.request.Request( f"{endpoint}/api2/json{path}", method=method, headers=headers, ) return urllib.request.urlopen(req, context=context, timeout=30) def vm_exists(vmid): try: request("GET", f"/nodes/{node}/qemu/{vmid}/status/current").close() return True except urllib.error.HTTPError as err: if err.code == 404: return False if err.code == 500 and "conf' does not exist" in err.reason: return False raise for vmid in vm_ids: if vm_exists(vmid): print(f"VM {vmid} exists; keeping cloud-init volume") continue volume = urllib.parse.quote(f"{storage}:vm-{vmid}-cloudinit", safe="") try: request("DELETE", f"/nodes/{node}/storage/{storage}/content/{volume}").close() print(f"Deleted orphan cloud-init volume for VM {vmid}") except urllib.error.HTTPError as err: if err.code == 404: print(f"No orphan cloud-init volume for VM {vmid}") continue raise PY - name: Terraform Apply if: github.ref == 'refs/heads/main' && github.event_name == 'push' working-directory: terraform run: | set -euo pipefail run_apply() { local log_file="$1" terraform apply \ -var="ssh_public_key=$HOME/.ssh/id_ed25519.pub" \ -var="ssh_private_key=$HOME/.ssh/id_ed25519" \ -auto-approve 2>&1 | tee "${log_file}" return "${PIPESTATUS[0]}" } cleanup_untracked_target_vms() { python3 - <<'PY' import os import ssl import subprocess import time import urllib.error import urllib.parse import urllib.request endpoint = os.environ["TF_VAR_proxmox_endpoint"].strip().removesuffix("/api2/json").rstrip("/") token_id = os.environ["TF_VAR_proxmox_api_token_id"] token_secret = os.environ["TF_VAR_proxmox_api_token_secret"] insecure = os.environ.get("TF_VAR_proxmox_insecure", "false").lower() == "true" node = "flex" storage = "Flash" context = ssl._create_unverified_context() if insecure else None headers = {"Authorization": f"PVEAPIToken={token_id}={token_secret}"} targets = { 'proxmox_virtual_environment_vm.nodes["k8s-cluster-cp-1"]': (200, "k8s-cluster-cp-1"), 'proxmox_virtual_environment_vm.nodes["k8s-cluster-cp-2"]': (201, "k8s-cluster-cp-2"), 'proxmox_virtual_environment_vm.nodes["k8s-cluster-cp-3"]': (202, "k8s-cluster-cp-3"), 'proxmox_virtual_environment_vm.nodes["k8s-cluster-worker-1"]': (210, "k8s-cluster-worker-1"), 'proxmox_virtual_environment_vm.nodes["k8s-cluster-worker-2"]': (211, "k8s-cluster-worker-2"), 'proxmox_virtual_environment_vm.nodes["k8s-cluster-worker-3"]': (212, "k8s-cluster-worker-3"), 'proxmox_virtual_environment_vm.nodes["k8s-cluster-worker-4"]': (213, "k8s-cluster-worker-4"), 'proxmox_virtual_environment_vm.nodes["k8s-cluster-worker-5"]': (214, "k8s-cluster-worker-5"), } def request(method, path, data=None): body = None req_headers = dict(headers) if data is not None: encoded = urllib.parse.urlencode(data) if method == "DELETE": path = f"{path}?{encoded}" else: body = encoded.encode() req_headers["Content-Type"] = "application/x-www-form-urlencoded" req = urllib.request.Request( f"{endpoint}/api2/json{path}", method=method, headers=req_headers, data=body, ) with urllib.request.urlopen(req, context=context, timeout=60) as resp: return resp.read() def vm_status(vmid): try: request("GET", f"/nodes/{node}/qemu/{vmid}/status/current") return True except urllib.error.HTTPError as err: if err.code == 404 or (err.code == 500 and "conf' does not exist" in err.reason): return False raise def vm_config(vmid): try: raw = request("GET", f"/nodes/{node}/qemu/{vmid}/config") except urllib.error.HTTPError as err: if err.code == 404 or (err.code == 500 and "conf' does not exist" in err.reason): return {} raise import json return json.loads(raw).get("data", {}) def wait_absent(vmid): for _ in range(60): if not vm_status(vmid): return time.sleep(5) raise RuntimeError(f"VM {vmid} still exists after delete") state = set( subprocess.run( ["terraform", "state", "list"], check=False, text=True, stdout=subprocess.PIPE, ).stdout.splitlines() ) for address, (vmid, expected_name) in targets.items(): if address in state: continue if not vm_status(vmid): continue config = vm_config(vmid) actual_name = config.get("name") if actual_name != expected_name: raise RuntimeError( f"Refusing to delete VM {vmid}: expected name {expected_name!r}, got {actual_name!r}" ) print(f"Deleting partial Terraform-untracked VM {vmid} ({expected_name}) before retry") try: request("POST", f"/nodes/{node}/qemu/{vmid}/status/stop") time.sleep(10) except urllib.error.HTTPError as err: if err.code not in (400, 500): raise request( "DELETE", f"/nodes/{node}/qemu/{vmid}", {"purge": "1", "destroy-unreferenced-disks": "1"}, ) wait_absent(vmid) volume = urllib.parse.quote(f"{storage}:vm-{vmid}-cloudinit", safe="") try: request("DELETE", f"/nodes/{node}/storage/{storage}/content/{volume}") except urllib.error.HTTPError as err: if err.code != 404: raise PY } for attempt in 1 2 3; do log_file="/tmp/terraform-apply-${attempt}.log" if run_apply "${log_file}"; then exit 0 fi if [ "${attempt}" = "3" ]; then exit 1 fi if ! grep -Eq 'HTTP 596|Broken pipe|disk update fails' "${log_file}"; then exit 1 fi echo "Terraform apply hit transient Proxmox API failure; cleaning partial VM creates before retry ${attempt}/2" cleanup_untracked_target_vms sleep 20 done - name: Save Terraform Outputs if: github.ref == 'refs/heads/main' && github.event_name == 'push' run: | mkdir -p outputs terraform output -json > outputs/terraform_outputs.json working-directory: terraform - name: Upload Outputs if: github.ref == 'refs/heads/main' && github.event_name == 'push' uses: actions/upload-artifact@v3 with: name: terraform-outputs path: outputs/terraform_outputs.json ansible: name: Ansible runs-on: ubuntu-latest needs: terraform if: github.ref == 'refs/heads/main' && github.event_name == 'push' steps: - name: Checkout uses: actions/checkout@v4 - name: Setup Terraform uses: hashicorp/setup-terraform@v3 with: terraform_version: ${{ env.TF_VERSION }} - name: Setup SSH Keys run: | mkdir -p ~/.ssh echo "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/id_ed25519 chmod 600 ~/.ssh/id_ed25519 echo "${{ secrets.SSH_PUBLIC_KEY }}" > ~/.ssh/id_ed25519.pub chmod 644 ~/.ssh/id_ed25519.pub - name: Terraform Init working-directory: terraform run: | terraform init \ -backend-config="endpoint=${{ secrets.S3_ENDPOINT }}" \ -backend-config="bucket=${{ secrets.S3_BUCKET }}" \ -backend-config="region=auto" \ -backend-config="access_key=${{ secrets.S3_ACCESS_KEY }}" \ -backend-config="secret_key=${{ secrets.S3_SECRET_KEY }}" \ -backend-config="skip_requesting_account_id=true" - name: Get Terraform Outputs working-directory: terraform run: | mkdir -p ../outputs terraform output -json > ../outputs/terraform_outputs.json - name: Install Python Dependencies run: | apt-get update && apt-get install -y python3-pip pip3 install --break-system-packages ansible kubernetes jinja2 pyyaml - name: Install Ansible Collections run: ansible-galaxy collection install -r ansible/requirements.yml - name: Install skopeo run: | apt-get update apt-get install -y skopeo - name: Generate Ansible Inventory working-directory: ansible run: python3 generate_inventory.py - name: Prepare kube-vip image archive run: | set -euo pipefail mkdir -p outputs for attempt in 1 2 3; do if skopeo copy \ docker://ghcr.io/kube-vip/kube-vip:v1.1.2 \ docker-archive:outputs/kube-vip-bootstrap.tar:ghcr.io/kube-vip/kube-vip:v1.1.2; then exit 0 fi sleep 10 done echo "Failed to prepare kube-vip image archive on runner" >&2 exit 1 - name: Prepare bootstrap image archives run: | set -euo pipefail archive_name() { printf '%s' "$1" | tr '/:' '__' } prepare_image_archive() { local image="$1" local archive="outputs/bootstrap-image-archives/$(archive_name "${image}").tar" mkdir -p outputs/bootstrap-image-archives for attempt in 1 2 3; do if skopeo copy "docker://${image}" "docker-archive:${archive}:${image}"; then return 0 fi sleep 10 done echo "Failed to prepare bootstrap image archive for ${image}" >&2 return 1 } for image in \ ghcr.io/fluxcd/source-controller:v1.8.0 \ ghcr.io/fluxcd/kustomize-controller:v1.8.1 \ ghcr.io/fluxcd/helm-controller:v1.5.1 \ ghcr.io/fluxcd/notification-controller:v1.8.1 \ oci.external-secrets.io/external-secrets/external-secrets:v2.1.0 \ ghcr.io/tailscale/k8s-operator:v1.96.5 \ ghcr.io/tailscale/tailscale:v1.96.5 \ registry.k8s.io/sig-storage/nfs-subdir-external-provisioner:v4.0.2 \ docker.io/grafana/loki:3.5.7 \ docker.io/kiwigrid/k8s-sidecar:1.30.10 \ docker.io/grafana/promtail:3.0.0 \ docker.io/grafana/grafana:11.4.0 \ quay.io/prometheus-operator/prometheus-operator:v0.79.2 \ quay.io/prometheus-operator/prometheus-config-reloader:v0.79.2 \ quay.io/prometheus/prometheus:v3.1.0 \ registry.k8s.io/kube-state-metrics/kube-state-metrics:v2.14.0 \ quay.io/prometheus/node-exporter:v1.8.2; do prepare_image_archive "${image}" done - name: Run Ansible Playbook working-directory: ansible run: | ansible-playbook site.yml \ -e "tailscale_auth_key=${{ secrets.TAILSCALE_AUTH_KEY }}" \ -e "tailscale_tailnet=${{ secrets.TAILSCALE_TAILNET }}" \ -e "tailscale_oauth_client_id=${{ secrets.TAILSCALE_OAUTH_CLIENT_ID }}" \ -e "tailscale_oauth_client_secret=${{ secrets.TAILSCALE_OAUTH_CLIENT_SECRET }}" \ -e "doppler_hetznerterra_service_token=${{ secrets.DOPPLER_HETZNERTERRA_SERVICE_TOKEN }}" \ -e "tailscale_api_key=${{ secrets.TAILSCALE_API_KEY }}" \ -e "grafana_admin_password=${{ secrets.GRAFANA_ADMIN_PASSWORD }}" \ -e "cluster_name=k8s-cluster" env: ANSIBLE_HOST_KEY_CHECKING: "False" - name: Install kubectl run: | curl -fsSL -o /usr/local/bin/kubectl "https://dl.k8s.io/release/$(curl -fsSL https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl" chmod +x /usr/local/bin/kubectl - name: Rewrite kubeconfig for runner-reachable API working-directory: terraform run: | set -euo pipefail PRIMARY_IP=$(terraform output -raw primary_control_plane_ip) sed -i "s#https://k8s-cluster-cp-1\.[^:]*:6443#https://${PRIMARY_IP}:6443#g" ../outputs/kubeconfig - name: Bootstrap Flux source and reconciliation graph env: KUBECONFIG: outputs/kubeconfig FLUX_GIT_HOST: 64.176.189.59 FLUX_GIT_PORT: "2222" run: | set -euo pipefail flux_rollout_status() { local deployment="$1" if ! kubectl -n flux-system rollout status "deployment/${deployment}" --timeout=900s; then kubectl -n flux-system get pods -o wide kubectl -n flux-system describe deployment "${deployment}" kubectl -n flux-system describe pods -l "app=${deployment}" exit 1 fi } wait_for_resource() { local namespace="$1" local resource="$2" local timeout_seconds="$3" local elapsed=0 until { if [ -n "${namespace}" ]; then kubectl -n "${namespace}" get "${resource}" >/dev/null 2>&1 else kubectl get "${resource}" >/dev/null 2>&1 fi }; do if [ "${elapsed}" -ge "${timeout_seconds}" ]; then echo "Timed out waiting for ${resource} to exist" >&2 kubectl -n flux-system get kustomizations,helmreleases || true exit 1 fi sleep 10 elapsed=$((elapsed + 10)) done } pull_required_image() { local image="$1" local host_ip="$2" local attempts="$3" local sleep_seconds="$4" local failure_message="$5" local pulled=false for attempt in $(seq 1 "${attempts}"); do echo "Pre-pulling ${image} on ${host_ip} (${attempt}/${attempts})" if ssh -i "$HOME/.ssh/id_ed25519" -o StrictHostKeyChecking=no -o ConnectTimeout=10 "ubuntu@${host_ip}" \ "sudo k3s crictl inspecti '${image}' >/dev/null 2>&1 || (sudo k3s crictl pull '${image}' && sudo k3s crictl inspecti '${image}' >/dev/null 2>&1)"; then pulled=true break fi sleep "${sleep_seconds}" done if [ "${pulled}" != "true" ]; then echo "${failure_message} ${image} on ${host_ip}" >&2 exit 1 fi } import_required_image() { local image="$1" local host_ip="$2" local archive_name local archive_path archive_name="$(printf '%s' "${image}" | tr '/:' '__').tar" archive_path="outputs/bootstrap-image-archives/${archive_name}" if [ ! -s "${archive_path}" ]; then echo "Missing required bootstrap image archive ${archive_path} for ${image}" >&2 exit 1 fi if ssh -i "$HOME/.ssh/id_ed25519" -o StrictHostKeyChecking=no -o ConnectTimeout=10 "ubuntu@${host_ip}" \ "sudo k3s crictl inspecti '${image}' >/dev/null 2>&1"; then return 0 fi echo "Importing ${image} archive on ${host_ip}" timeout 180s scp -i "$HOME/.ssh/id_ed25519" -o StrictHostKeyChecking=no -o ConnectTimeout=10 -o ServerAliveInterval=15 -o ServerAliveCountMax=4 \ "${archive_path}" "ubuntu@${host_ip}:/tmp/${archive_name}" timeout 300s ssh -i "$HOME/.ssh/id_ed25519" -o StrictHostKeyChecking=no -o ConnectTimeout=10 -o ServerAliveInterval=15 -o ServerAliveCountMax=4 "ubuntu@${host_ip}" \ "set -euo pipefail; \ if sudo k3s crictl inspecti '${image}' >/dev/null 2>&1; then exit 0; fi; \ for attempt in 1 2 3 4 5; do \ echo 'Importing ${image} archive with ctr'; \ if sudo k3s ctr -n k8s.io images import '/tmp/${archive_name}' && sudo k3s crictl inspecti '${image}' >/dev/null; then exit 0; fi; \ sleep 10; \ done; \ sudo systemctl status k3s --no-pager -l || true; \ sudo journalctl -u k3s -n 80 --no-pager || true; \ exit 1" } import_required_image_on_all_nodes() { local image="$1" local host_ip for host_ip in ${ALL_NODE_IPS}; do import_required_image "${image}" "${host_ip}" done } eso_diagnostics() { kubectl -n flux-system get kustomizations,ocirepositories,helmrepositories,helmcharts,helmreleases || true kubectl -n flux-system describe kustomization addon-external-secrets || true kubectl -n flux-system describe ocirepository external-secrets || true kubectl -n flux-system describe helmrelease external-secrets || true kubectl -n external-secrets get pods -o wide || true } wait_for_helmrelease_ready() { local release_name="$1" local target_namespace="$2" local timeout_seconds="$3" local elapsed=0 local ready local stalled while [ "${elapsed}" -lt "${timeout_seconds}" ]; do ready="$(kubectl -n flux-system get "helmrelease/${release_name}" -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' 2>/dev/null || true)" stalled="$(kubectl -n flux-system get "helmrelease/${release_name}" -o jsonpath='{.status.conditions[?(@.type=="Stalled")].status}' 2>/dev/null || true)" if [ "${ready}" = "True" ]; then return 0 fi if [ "${stalled}" = "True" ]; then echo "HelmRelease ${release_name} is stalled" >&2 kubectl -n flux-system describe "helmrelease/${release_name}" || true kubectl -n "${target_namespace}" get pods -o wide || true exit 1 fi sleep 10 elapsed=$((elapsed + 10)) done echo "Timed out waiting for HelmRelease ${release_name} to become Ready" >&2 kubectl -n flux-system describe "helmrelease/${release_name}" || true kubectl -n "${target_namespace}" get pods -o wide || true exit 1 } wait_for_flux_oci_helm_release() { local oci_name="$1" local release_name="$2" local target_namespace="$3" local oci_timeout="$4" local release_timeout="$5" local reconcile_at local artifact_storage wait_for_resource flux-system "ocirepository.source.toolkit.fluxcd.io/${oci_name}" 600 reconcile_at="$(date +%s)" kubectl -n flux-system annotate "helmrelease/${release_name}" \ reconcile.fluxcd.io/requestedAt="${reconcile_at}" \ reconcile.fluxcd.io/resetAt="${reconcile_at}" \ reconcile.fluxcd.io/forceAt="${reconcile_at}" \ --overwrite if ! kubectl -n flux-system wait --for=condition=Ready "ocirepository/${oci_name}" --timeout="${oci_timeout}"; then artifact_storage="$(kubectl -n flux-system get "ocirepository/${oci_name}" -o jsonpath='{.status.conditions[?(@.type=="ArtifactInStorage")].status}' 2>/dev/null || true)" if [ "${artifact_storage}" = "True" ]; then echo "OCIRepository ${oci_name} is not currently Ready; continuing with cached artifact" >&2 else eso_diagnostics exit 1 fi fi wait_for_helmrelease_ready "${release_name}" "${target_namespace}" "${release_timeout}" } flux_helm_diagnostics() { local repo_name="$1" local chart_name="$2" local release_name="$3" local target_namespace="$4" kubectl -n flux-system get helmrepositories,helmcharts,helmreleases || true kubectl -n flux-system describe helmrepository "${repo_name}" || true kubectl -n flux-system describe helmchart.source.toolkit.fluxcd.io "${chart_name}" || true kubectl -n flux-system describe helmrelease "${release_name}" || true kubectl -n "${target_namespace}" get pods -o wide || true } wait_for_flux_helm_release() { local repo_name="$1" local chart_name="$2" local release_name="$3" local target_namespace="$4" local repo_timeout="$5" local chart_timeout="$6" local release_timeout="$7" local reconcile_at wait_for_resource flux-system "helmrepository.source.toolkit.fluxcd.io/${repo_name}" 600 if ! kubectl -n flux-system wait --for=condition=Ready "helmrepository/${repo_name}" --timeout="${repo_timeout}"; then echo "HelmRepository ${repo_name} is not currently Ready; continuing because a cached artifact may still satisfy HelmChart ${chart_name}" >&2 kubectl -n flux-system describe helmrepository "${repo_name}" || true fi wait_for_resource flux-system "helmchart.source.toolkit.fluxcd.io/${chart_name}" 600 reconcile_at="$(date +%s)" kubectl -n flux-system annotate "helmchart.source.toolkit.fluxcd.io/${chart_name}" reconcile.fluxcd.io/requestedAt="${reconcile_at}" --overwrite kubectl -n flux-system annotate "helmrelease/${release_name}" \ reconcile.fluxcd.io/requestedAt="${reconcile_at}" \ reconcile.fluxcd.io/resetAt="${reconcile_at}" \ reconcile.fluxcd.io/forceAt="${reconcile_at}" \ --overwrite for attempt in $(seq 1 6); do if kubectl -n flux-system wait --for=condition=Ready "helmchart.source.toolkit.fluxcd.io/${chart_name}" --timeout="${chart_timeout}"; then wait_for_helmrelease_ready "${release_name}" "${target_namespace}" "${release_timeout}" return 0 fi echo "HelmChart ${chart_name} did not become Ready after ${chart_timeout}; forcing retry (${attempt}/6)" >&2 reconcile_at="$(date +%s)" kubectl -n flux-system annotate "helmchart.source.toolkit.fluxcd.io/${chart_name}" reconcile.fluxcd.io/requestedAt="${reconcile_at}" --overwrite kubectl -n flux-system annotate "helmrelease/${release_name}" reconcile.fluxcd.io/requestedAt="${reconcile_at}" --overwrite done flux_helm_diagnostics "${repo_name}" "${chart_name}" "${release_name}" "${target_namespace}" exit 1 } kubectl create namespace flux-system --dry-run=client -o yaml | kubectl apply -f - ssh-keyscan -p "${FLUX_GIT_PORT}" "${FLUX_GIT_HOST}" > /tmp/flux_known_hosts kubectl -n flux-system create secret generic flux-system \ --from-file=identity="$HOME/.ssh/id_ed25519" \ --from-file=known_hosts=/tmp/flux_known_hosts \ --dry-run=client -o yaml | kubectl apply -f - PRIMARY_CP_IP=$(python3 -c 'import json; print(json.load(open("outputs/terraform_outputs.json"))["primary_control_plane_ip"]["value"])') ALL_NODE_IPS=$(python3 -c 'import json; outputs = json.load(open("outputs/terraform_outputs.json")); print(" ".join(outputs["control_plane_ips"]["value"] + outputs["worker_ips"]["value"]))') for image in \ ghcr.io/fluxcd/source-controller:v1.8.0 \ ghcr.io/fluxcd/kustomize-controller:v1.8.1 \ ghcr.io/fluxcd/helm-controller:v1.5.1 \ ghcr.io/fluxcd/notification-controller:v1.8.1; do import_required_image "${image}" "${PRIMARY_CP_IP}" done # Apply CRDs and controllers first kubectl apply -f clusters/prod/flux-system/gotk-components.yaml # Wait for CRDs to be established kubectl wait --for=condition=Established crd --all --timeout=120s # Then apply custom resources kubectl apply -f clusters/prod/flux-system/gitrepository-platform.yaml kubectl apply -f clusters/prod/flux-system/kustomization-infrastructure.yaml kubectl apply -f clusters/prod/flux-system/kustomization-apps.yaml # Patch Flux controllers to run on cp-1 and tolerate the control-plane taint PATCH='{"spec":{"template":{"spec":{"nodeSelector":{"kubernetes.io/hostname":"k8s-cluster-cp-1"},"tolerations":[{"key":"node-role.kubernetes.io/control-plane","operator":"Exists","effect":"NoSchedule"}]}}}}' kubectl -n flux-system patch deployment source-controller --type='merge' -p="$PATCH" kubectl -n flux-system patch deployment kustomize-controller --type='merge' -p="$PATCH" kubectl -n flux-system patch deployment helm-controller --type='merge' -p="$PATCH" kubectl -n flux-system patch deployment notification-controller --type='merge' -p="$PATCH" kubectl -n flux-system delete pod --field-selector=status.phase!=Running || true flux_rollout_status source-controller flux_rollout_status kustomize-controller flux_rollout_status helm-controller kubectl -n flux-system wait --for=condition=Ready gitrepository/platform --timeout=300s kubectl -n flux-system wait --for=condition=Ready kustomization/infrastructure --timeout=600s # Wait directly on the ESO Helm objects; Kustomization readiness hides useful failure details. wait_for_resource flux-system kustomization.kustomize.toolkit.fluxcd.io/addon-external-secrets 600 kubectl -n flux-system annotate kustomization/addon-external-secrets reconcile.fluxcd.io/requestedAt="$(date +%s)" --overwrite import_required_image oci.external-secrets.io/external-secrets/external-secrets:v2.1.0 "${PRIMARY_CP_IP}" wait_for_flux_oci_helm_release external-secrets external-secrets external-secrets 600s 600 wait_for_resource "" crd/clustersecretstores.external-secrets.io 900 wait_for_resource "" crd/externalsecrets.external-secrets.io 900 kubectl wait --for=condition=established --timeout=600s crd/clustersecretstores.external-secrets.io kubectl wait --for=condition=established --timeout=600s crd/externalsecrets.external-secrets.io kubectl -n external-secrets rollout status deployment/external-secrets-external-secrets --timeout=600s kubectl -n external-secrets rollout status deployment/external-secrets-external-secrets-webhook --timeout=600s wait_for_resource external-secrets service/external-secrets-external-secrets-webhook 600 wait_for_resource external-secrets endpoints/external-secrets-external-secrets-webhook 600 kubectl -n external-secrets wait --for=jsonpath='{.subsets[0].addresses[0].ip}' endpoints/external-secrets-external-secrets-webhook --timeout=600s # Create Doppler ClusterSecretStore now that ESO CRDs are available kubectl apply -f - <<'EOF' apiVersion: external-secrets.io/v1 kind: ClusterSecretStore metadata: name: doppler-hetznerterra spec: provider: doppler: auth: secretRef: dopplerToken: name: doppler-hetznerterra-service-token key: dopplerToken namespace: external-secrets EOF # Wait for the storage layer and private access components import_required_image ghcr.io/tailscale/k8s-operator:v1.96.5 "${PRIMARY_CP_IP}" import_required_image ghcr.io/tailscale/tailscale:v1.96.5 "${PRIMARY_CP_IP}" kubectl -n flux-system annotate kustomization/addon-tailscale-operator reconcile.fluxcd.io/requestedAt="$(date +%s)" --overwrite kubectl -n flux-system wait --for=condition=Ready kustomization/addon-tailscale-operator --timeout=600s kubectl -n tailscale-system rollout status deployment/operator --timeout=600s import_required_image registry.k8s.io/sig-storage/nfs-subdir-external-provisioner:v4.0.2 "${PRIMARY_CP_IP}" kubectl -n flux-system annotate kustomization/addon-nfs-storage reconcile.fluxcd.io/requestedAt="$(date +%s)" --overwrite kubectl -n flux-system wait --for=condition=Ready kustomization/addon-nfs-storage --timeout=300s kubectl -n kube-system rollout status deployment/nfs-subdir-external-provisioner --timeout=300s kubectl annotate storageclass local-path storageclass.kubernetes.io/is-default-class=false --overwrite kubectl annotate storageclass flash-nfs storageclass.kubernetes.io/is-default-class=true --overwrite kubectl get storageclass flash-nfs - name: Wait for Rancher and backup operator env: KUBECONFIG: outputs/kubeconfig run: | set -euo pipefail wait_for_resource() { local namespace="$1" local resource="$2" local timeout_seconds="$3" local elapsed=0 until { if [ -n "${namespace}" ]; then kubectl -n "${namespace}" get "${resource}" >/dev/null 2>&1 else kubectl get "${resource}" >/dev/null 2>&1 fi }; do if [ "${elapsed}" -ge "${timeout_seconds}" ]; then echo "Timed out waiting for ${resource} to exist" >&2 kubectl -n flux-system get kustomizations,helmrepositories,helmcharts,helmreleases || true exit 1 fi sleep 10 elapsed=$((elapsed + 10)) done } reconcile_helmrelease() { local release_name="$1" local reconcile_at reconcile_at="$(date +%s)" kubectl -n flux-system annotate "helmrelease/${release_name}" \ reconcile.fluxcd.io/requestedAt="${reconcile_at}" \ reconcile.fluxcd.io/resetAt="${reconcile_at}" \ reconcile.fluxcd.io/forceAt="${reconcile_at}" \ --overwrite } wait_for_helmchart_ready() { local chart_name="$1" local release_name="$2" local timeout="$3" local attempts="$4" local reconcile_at wait_for_resource flux-system "helmchart.source.toolkit.fluxcd.io/${chart_name}" 600 for attempt in $(seq 1 "${attempts}"); do reconcile_at="$(date +%s)" kubectl -n flux-system annotate "helmchart.source.toolkit.fluxcd.io/${chart_name}" reconcile.fluxcd.io/requestedAt="${reconcile_at}" --overwrite kubectl -n flux-system annotate "helmrelease/${release_name}" reconcile.fluxcd.io/requestedAt="${reconcile_at}" --overwrite if kubectl -n flux-system wait --for=condition=Ready "helmchart.source.toolkit.fluxcd.io/${chart_name}" --timeout="${timeout}"; then return 0 fi echo "HelmChart ${chart_name} did not become Ready after ${timeout}; forcing retry (${attempt}/${attempts})" >&2 done kubectl -n flux-system describe "helmchart.source.toolkit.fluxcd.io/${chart_name}" || true kubectl -n flux-system describe "helmrelease/${release_name}" || true exit 1 } wait_for_helmrelease_ready() { local release_name="$1" local target_namespace="$2" local timeout_seconds="$3" local elapsed=0 local ready local stalled while [ "${elapsed}" -lt "${timeout_seconds}" ]; do ready="$(kubectl -n flux-system get "helmrelease/${release_name}" -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' 2>/dev/null || true)" stalled="$(kubectl -n flux-system get "helmrelease/${release_name}" -o jsonpath='{.status.conditions[?(@.type=="Stalled")].status}' 2>/dev/null || true)" if [ "${ready}" = "True" ]; then return 0 fi if [ "${stalled}" = "True" ]; then echo "HelmRelease ${release_name} is stalled" >&2 kubectl -n flux-system describe "helmrelease/${release_name}" || true kubectl -n "${target_namespace}" get pods -o wide || true exit 1 fi sleep 10 elapsed=$((elapsed + 10)) done echo "Timed out waiting for HelmRelease ${release_name} to become Ready" >&2 kubectl -n flux-system describe "helmrelease/${release_name}" || true kubectl -n "${target_namespace}" get pods -o wide || true exit 1 } pull_image_on_matching_pod_nodes() { local namespace="$1" local selector="$2" local image="$3" local attempts="$4" local sleep_seconds="$5" local nodes nodes="$(kubectl -n "${namespace}" get pods -l "${selector}" -o jsonpath='{range .items[*]}{.spec.nodeName}{"\n"}{end}' 2>/dev/null | sort -u)" if [ -z "${nodes}" ]; then echo "No pods found for ${namespace}/${selector}; skipping targeted image pull for ${image}" >&2 return 0 fi for node in ${nodes}; do local node_ip local pulled=false node_ip="$(kubectl get node "${node}" -o jsonpath='{.status.addresses[?(@.type=="InternalIP")].address}')" for attempt in $(seq 1 "${attempts}"); do echo "Pre-pulling ${image} on ${node}/${node_ip} (${attempt}/${attempts})" if ssh -i "$HOME/.ssh/id_ed25519" -o StrictHostKeyChecking=no -o ConnectTimeout=10 "ubuntu@${node_ip}" \ "sudo k3s crictl inspecti '${image}' >/dev/null 2>&1 || (sudo k3s crictl pull '${image}' && sudo k3s crictl inspecti '${image}' >/dev/null 2>&1)"; then pulled=true break fi sleep "${sleep_seconds}" done if [ "${pulled}" != "true" ]; then echo "Best-effort targeted image pre-pull did not complete for ${image} on ${node}/${node_ip}" >&2 fi done } echo "Waiting for Rancher..." wait_for_resource flux-system kustomization.kustomize.toolkit.fluxcd.io/addon-rancher 600 kubectl -n flux-system annotate kustomization/addon-rancher reconcile.fluxcd.io/requestedAt="$(date +%s)" --overwrite wait_for_resource flux-system helmrelease.helm.toolkit.fluxcd.io/rancher 600 reconcile_helmrelease rancher wait_for_helmchart_ready flux-system-rancher rancher 180s 5 wait_for_helmrelease_ready rancher cattle-system 900 wait_for_resource "" namespace/cattle-system 600 wait_for_resource cattle-system deployment/cattle-system-rancher 600 kubectl -n cattle-system rollout status deployment/cattle-system-rancher --timeout=900s wait_for_resource cattle-system deployment/rancher-webhook 900 pull_image_on_matching_pod_nodes cattle-system app=rancher-webhook registry.rancher.com/rancher/rancher-webhook:v0.9.3 12 10 kubectl -n cattle-system rollout restart deployment/rancher-webhook kubectl -n cattle-system rollout status deployment/rancher-webhook --timeout=900s wait_for_resource cattle-system issuer/cattle-system-rancher 900 wait_for_resource cattle-system certificate/tls-rancher-ingress 900 kubectl -n cattle-system wait --for=condition=Ready issuer/cattle-system-rancher --timeout=900s kubectl -n cattle-system wait --for=condition=Ready certificate/tls-rancher-ingress --timeout=900s echo "Waiting for rancher-backup operator..." wait_for_resource flux-system kustomization.kustomize.toolkit.fluxcd.io/addon-rancher-backup 600 kubectl -n flux-system annotate kustomization/addon-rancher-backup reconcile.fluxcd.io/requestedAt="$(date +%s)" --overwrite wait_for_resource flux-system helmrelease.helm.toolkit.fluxcd.io/rancher-backup-crd 600 wait_for_resource flux-system helmrelease.helm.toolkit.fluxcd.io/rancher-backup 600 reconcile_helmrelease rancher-backup-crd reconcile_helmrelease rancher-backup wait_for_helmchart_ready flux-system-rancher-backup-crd rancher-backup-crd 180s 5 wait_for_helmchart_ready flux-system-rancher-backup rancher-backup 180s 5 wait_for_helmrelease_ready rancher-backup-crd cattle-resources-system 600 wait_for_helmrelease_ready rancher-backup cattle-resources-system 600 wait_for_resource "" namespace/cattle-resources-system 600 kubectl -n cattle-resources-system rollout status deployment/rancher-backup --timeout=900s - name: Restore Rancher from latest B2 backup env: KUBECONFIG: outputs/kubeconfig B2_ACCOUNT_ID: ${{ secrets.B2_ACCOUNT_ID }} B2_APPLICATION_KEY: ${{ secrets.B2_APPLICATION_KEY }} run: | echo "Finding latest backup in B2..." CREDS=$(echo -n "${B2_ACCOUNT_ID}:${B2_APPLICATION_KEY}" | base64) AUTH_RESP=$(curl -sS -H "Authorization: Basic ${CREDS}" https://api.backblazeb2.com/b2api/v2/b2_authorize_account) API_URL=$(echo "$AUTH_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin)['apiUrl'])") AUTH_TOKEN=$(echo "$AUTH_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin)['authorizationToken'])") BUCKET_ID=$(echo "$AUTH_RESP" | python3 -c " import json,sys resp = json.load(sys.stdin) bid = resp.get('allowed', {}).get('bucketId') if bid: print(bid) else: print('') ") if [ -z "$BUCKET_ID" ]; then echo "Restricted B2 key - resolving bucket ID by name..." BUCKET_ID=$(curl -sS -H "Authorization: Bearer ${AUTH_TOKEN}" \ "${API_URL}/b2api/v2/b2_list_buckets?accountId=${B2_ACCOUNT_ID}&bucketName=HetznerTerra" \ | python3 -c "import json,sys; buckets=json.load(sys.stdin).get('buckets',[]); print(buckets[0]['bucketId'] if buckets else '')") fi LATEST=$(curl -sS -H "Authorization: Bearer ${AUTH_TOKEN}" \ "${API_URL}/b2api/v2/b2_list_file_names?bucketId=${BUCKET_ID}&prefix=rancher-backups/&maxFileCount=100" \ | python3 -c " import json,sys files = json.load(sys.stdin).get('files', []) tars = [f['fileName'] for f in files if f['fileName'].endswith('.tar.gz')] if not tars: print('NONE') else: tars.sort() print(tars[-1]) ") if [ "$LATEST" = "NONE" ]; then echo "No backups found in B2. Skipping restore." exit 0 fi BACKUP_FILE=$(basename "$LATEST") echo "Latest backup: ${BACKUP_FILE}" echo "Creating Restore CR..." kubectl apply -f - </dev/null || echo "Unknown") MESSAGE=$(kubectl get restore restore-from-b2 -n cattle-resources-system -o jsonpath='{.status.conditions[?(@.type=="Ready")].message}' 2>/dev/null || echo "") echo " Restore status: ${STATUS} - ${MESSAGE}" if [ "$STATUS" = "True" ]; then echo "Restore completed successfully!" exit 0 fi sleep 10 done echo "Restore did not complete within timeout. Continuing anyway." - name: Seed observability runtime images run: | set -euo pipefail archive_name() { printf '%s' "$1" | tr '/:' '__' } import_required_image() { local image="$1" local host_ip="$2" local archive_name local archive_path archive_name="$(archive_name "${image}").tar" archive_path="outputs/bootstrap-image-archives/${archive_name}" if [ ! -s "${archive_path}" ]; then echo "Missing required bootstrap image archive ${archive_path} for ${image}" >&2 return 1 fi if ssh -i "$HOME/.ssh/id_ed25519" -o StrictHostKeyChecking=no -o ConnectTimeout=10 "ubuntu@${host_ip}" \ "sudo k3s crictl inspecti '${image}' >/dev/null 2>&1"; then return 0 fi echo "Importing ${image} archive on ${host_ip}" timeout 180s scp -i "$HOME/.ssh/id_ed25519" -o StrictHostKeyChecking=no -o ConnectTimeout=10 -o ServerAliveInterval=15 -o ServerAliveCountMax=4 \ "${archive_path}" "ubuntu@${host_ip}:/tmp/${archive_name}" timeout 300s ssh -i "$HOME/.ssh/id_ed25519" -o StrictHostKeyChecking=no -o ConnectTimeout=10 -o ServerAliveInterval=15 -o ServerAliveCountMax=4 "ubuntu@${host_ip}" \ "set -euo pipefail; \ if sudo k3s crictl inspecti '${image}' >/dev/null 2>&1; then exit 0; fi; \ for attempt in 1 2 3; do \ echo 'Importing ${image} archive with ctr'; \ if sudo k3s ctr -n k8s.io images import '/tmp/${archive_name}' && sudo k3s crictl inspecti '${image}' >/dev/null; then exit 0; fi; \ sleep 10; \ done; \ sudo systemctl status k3s --no-pager -l || true; \ sudo journalctl -u k3s -n 80 --no-pager || true; \ exit 1" } import_required_image_on_all_nodes() { local image="$1" local status_dir local host_ip local pid local failed=false status_dir="$(mktemp -d)" for host_ip in ${ALL_NODE_IPS}; do ( import_required_image "${image}" "${host_ip}" ) >"${status_dir}/${host_ip}.log" 2>&1 & done for pid in $(jobs -p); do if ! wait "${pid}"; then failed=true fi done for host_ip in ${ALL_NODE_IPS}; do sed "s/^/[${host_ip}] /" "${status_dir}/${host_ip}.log" done if [ "${failed}" = "true" ]; then echo "Failed to import ${image} on one or more nodes" >&2 return 1 fi } ALL_NODE_IPS=$(python3 -c 'import json; outputs = json.load(open("outputs/terraform_outputs.json")); print(" ".join(outputs["control_plane_ips"]["value"] + outputs["worker_ips"]["value"]))') for image in \ docker.io/grafana/loki:3.5.7 \ docker.io/kiwigrid/k8s-sidecar:1.30.10 \ docker.io/grafana/promtail:3.0.0 \ docker.io/grafana/grafana:11.4.0 \ quay.io/prometheus-operator/prometheus-operator:v0.79.2 \ quay.io/prometheus-operator/prometheus-config-reloader:v0.79.2 \ quay.io/prometheus/prometheus:v3.1.0 \ registry.k8s.io/kube-state-metrics/kube-state-metrics:v2.14.0 \ quay.io/prometheus/node-exporter:v1.8.2; do import_required_image_on_all_nodes "${image}" done - name: Post-deploy cluster health checks working-directory: ansible run: | set -euo pipefail ansible -i inventory.ini 'control_plane[0]' -m shell -a ' set -euo pipefail kubectl get nodes -o wide kubectl -n flux-system get gitrepositories,kustomizations,helmreleases,ocirepositories kubectl -n flux-system wait --for=condition=Ready kustomization/infrastructure --timeout=300s kubectl -n flux-system wait --for=condition=Ready kustomization/addon-cert-manager --timeout=300s kubectl -n flux-system wait --for=condition=Ready kustomization/addon-external-secrets --timeout=300s kubectl -n flux-system wait --for=condition=Ready kustomization/addon-nfs-storage --timeout=300s kubectl -n flux-system wait --for=condition=Ready kustomization/addon-tailscale-operator --timeout=300s kubectl -n flux-system wait --for=condition=Ready kustomization/addon-tailscale-proxyclass --timeout=300s kubectl -n flux-system wait --for=condition=Ready kustomization/addon-rancher --timeout=900s kubectl -n flux-system wait --for=condition=Ready kustomization/addon-rancher-config --timeout=300s kubectl -n flux-system wait --for=condition=Ready kustomization/addon-rancher-backup --timeout=300s kubectl -n flux-system wait --for=condition=Ready kustomization/addon-rancher-backup-config --timeout=300s reconcile_at=$(date +%s) for release in kube-prometheus-stack loki promtail; do kubectl -n flux-system annotate "helmrelease/${release}" \ reconcile.fluxcd.io/requestedAt="${reconcile_at}" \ reconcile.fluxcd.io/resetAt="${reconcile_at}" \ reconcile.fluxcd.io/forceAt="${reconcile_at}" \ --overwrite done kubectl -n flux-system annotate kustomization/addon-observability \ reconcile.fluxcd.io/requestedAt="${reconcile_at}" \ --overwrite kubectl -n flux-system wait --for=condition=Ready kustomization/addon-observability --timeout=1200s kubectl -n flux-system wait --for=condition=Ready kustomization/addon-observability-content --timeout=300s kubectl -n flux-system wait --for=condition=Ready helmrelease --all --timeout=1200s kubectl annotate storageclass local-path storageclass.kubernetes.io/is-default-class=false --overwrite kubectl annotate storageclass flash-nfs storageclass.kubernetes.io/is-default-class=true --overwrite kubectl get storageclass | grep -E "^flash-nfs.*\\(default\\)" ! kubectl get storageclass | grep -E "^local-path.*\\(default\\)" unhealthy_pods=$(mktemp) kubectl get pods -A --no-headers \ | grep -Ev "[[:space:]](Running|Completed)[[:space:]]" \ | grep -Ev "^cattle-system[[:space:]]+helm-operation-" \ | grep -Ev "^cattle-capi-system[[:space:]]+capi-controller-manager-" \ | grep -Ev "^cattle-turtles-system[[:space:]]+cluster-api-operator-resources-cleanup-" \ | grep -Ev "^cattle-resources-system[[:space:]]+rancher-backup-patch-sa-" \ | grep -Ev "^kube-system[[:space:]]+helm-install-" \ | tee "${unhealthy_pods}" || true test ! -s "${unhealthy_pods}" kubectl -n kube-system get pods -o wide kubectl -n tailscale-system get pods -o wide kubectl -n external-secrets get pods -o wide ' -e ansible_shell_executable=/bin/bash env: ANSIBLE_HOST_KEY_CHECKING: "False" - name: Post-deploy tailnet smoke checks working-directory: ansible run: | ansible -i inventory.ini 'control_plane[0]' -m script -a "../scripts/smoke-check-tailnet-services.sh" env: ANSIBLE_HOST_KEY_CHECKING: "False" - name: Upload Kubeconfig uses: actions/upload-artifact@v3 with: name: kubeconfig path: outputs/kubeconfig