HetznerTerra/.gitea/workflows/deploy.yml at a33a9938672e5bf678fbf3bab51e43908414494d

OpenStaticFish/HetznerTerra

Fork 0

Files

T

micqdf a33a993867

Deploy Grafana Content / Grafana Content (push) Failing after 1m14s

Details

Deploy Cluster / Terraform (push) Failing after 4m59s

Details

Deploy Cluster / Ansible (push) Has been skipped

Details

fix: harden cluster rebuild determinism

2026-04-30 07:36:27 +00:00

1107 lines

49 KiB

YAML

Raw Blame History

 name: Deploy Cluster
 on:
   push:
     branches:
       - main
   pull_request:
     branches:
       - main
   workflow_dispatch:
 concurrency:
   group: prod-cluster
   cancel-in-progress: false
 env:
   TF_VERSION: "1.7.0"
   KUBECTL_VERSION: "v1.34.6"
   TF_VAR_s3_access_key: ${{ secrets.S3_ACCESS_KEY }}
   TF_VAR_s3_secret_key: ${{ secrets.S3_SECRET_KEY }}
   TF_VAR_s3_endpoint: ${{ secrets.S3_ENDPOINT }}
   TF_VAR_s3_bucket: ${{ secrets.S3_BUCKET }}
   TF_VAR_tailscale_tailnet: ${{ secrets.TAILSCALE_TAILNET }}
   TF_VAR_proxmox_endpoint: ${{ secrets.PROXMOX_ENDPOINT }}
   TF_VAR_proxmox_api_token_id: ${{ secrets.PROXMOX_API_TOKEN_ID }}
   TF_VAR_proxmox_api_token_secret: ${{ secrets.PROXMOX_API_TOKEN_SECRET }}
   TF_VAR_proxmox_insecure: "true"
   TS_OAUTH_CLIENT_ID: ${{ secrets.TAILSCALE_OAUTH_CLIENT_ID }}
   TS_OAUTH_CLIENT_SECRET: ${{ secrets.TAILSCALE_OAUTH_CLIENT_SECRET }}
 jobs:
   terraform:
     name: Terraform
     runs-on: ubuntu-22.04
     steps:
       - name: Checkout
         uses: actions/checkout@v4
       - name: Setup Terraform
         uses: hashicorp/setup-terraform@v3
         with:
           terraform_version: ${{ env.TF_VERSION }}
       - name: Terraform Format Check
         working-directory: terraform
         run: terraform fmt -check -recursive
       - name: Terraform Init
         working-directory: terraform
         run: |
           terraform init \
             -lockfile=readonly \
             -backend-config="endpoint=${{ secrets.S3_ENDPOINT }}" \
             -backend-config="bucket=${{ secrets.S3_BUCKET }}" \
             -backend-config="region=auto" \
             -backend-config="access_key=${{ secrets.S3_ACCESS_KEY }}" \
             -backend-config="secret_key=${{ secrets.S3_SECRET_KEY }}" \
             -backend-config="skip_requesting_account_id=true"
       - name: Terraform Validate
         working-directory: terraform
         run: terraform validate
       - name: Setup SSH Keys
         run: |
           mkdir -p ~/.ssh
           echo "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/id_ed25519
           chmod 600 ~/.ssh/id_ed25519
           echo "${{ secrets.SSH_PUBLIC_KEY }}" > ~/.ssh/id_ed25519.pub
           chmod 644 ~/.ssh/id_ed25519.pub
       - name: Terraform Plan
         id: plan
         working-directory: terraform
         run: |
           terraform plan \
             -var="ssh_public_key=$HOME/.ssh/id_ed25519.pub" \
             -var="ssh_private_key=$HOME/.ssh/id_ed25519" \
             -out=tfplan \
             -no-color
         continue-on-error: true
       - name: Post Plan to PR
         if: github.event_name == 'pull_request'
         uses: actions/github-script@v7
         with:
           script: |
             const output = `#### Terraform Plan
             \`\`\`
             ${{ steps.plan.outputs.stdout }}
             \`\`\``;
             github.rest.issues.createComment({
               issue_number: context.issue.number,
               owner: context.repo.owner,
               repo: context.repo.repo,
               body: output
             });
       - name: Fail if plan failed
         if: steps.plan.outcome == 'failure'
         run: exit 1
       - name: Cleanup orphan Proxmox cloud-init volumes
         if: github.ref == 'refs/heads/main' && github.event_name == 'push'
         run: python3 scripts/proxmox-rebuild-cleanup.py --mode orphan-cloudinit --terraform-dir terraform --plan tfplan
       - name: Terraform Apply
         if: github.ref == 'refs/heads/main' && github.event_name == 'push'
         working-directory: terraform
         run: |
           set -euo pipefail
           run_apply() {
             local log_file="$1"
             terraform apply \
               -parallelism=2 \
               -var="ssh_public_key=$HOME/.ssh/id_ed25519.pub" \
               -var="ssh_private_key=$HOME/.ssh/id_ed25519" \
               -auto-approve 2>&1 | tee "${log_file}"
             return "${PIPESTATUS[0]}"
           }
           cleanup_untracked_target_vms() {
             python3 ../scripts/proxmox-rebuild-cleanup.py --mode untracked-vms --terraform-dir . --plan tfplan
           }
           cleanup_untracked_target_vms
           for attempt in 1 2 3; do
             log_file="/tmp/terraform-apply-${attempt}.log"
             if run_apply "${log_file}"; then
               exit 0
             fi
             if [ "${attempt}" = "3" ]; then
               exit 1
             fi
             echo "Terraform apply failed; cleaning Terraform-untracked partial VM creates before retry ${attempt}/2"
             cleanup_untracked_target_vms
             sleep 20
           done
       - name: Save Terraform Outputs
         if: github.ref == 'refs/heads/main' && github.event_name == 'push'
         run: |
           mkdir -p outputs
           terraform output -json > outputs/terraform_outputs.json
         working-directory: terraform
       - name: Upload Outputs
         if: github.ref == 'refs/heads/main' && github.event_name == 'push'
         uses: actions/upload-artifact@v3
         with:
           name: terraform-outputs
           path: terraform/outputs/terraform_outputs.json
   ansible:
     name: Ansible
     runs-on: ubuntu-22.04
     needs: terraform
     if: github.ref == 'refs/heads/main' && github.event_name == 'push'
     steps:
       - name: Checkout
         uses: actions/checkout@v4
       - name: Setup Terraform
         uses: hashicorp/setup-terraform@v3
         with:
           terraform_version: ${{ env.TF_VERSION }}
       - name: Setup SSH Keys
         run: |
           mkdir -p ~/.ssh
           echo "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/id_ed25519
           chmod 600 ~/.ssh/id_ed25519
           echo "${{ secrets.SSH_PUBLIC_KEY }}" > ~/.ssh/id_ed25519.pub
           chmod 644 ~/.ssh/id_ed25519.pub
       - name: Terraform Init
         working-directory: terraform
         run: |
           terraform init \
             -lockfile=readonly \
             -backend-config="endpoint=${{ secrets.S3_ENDPOINT }}" \
             -backend-config="bucket=${{ secrets.S3_BUCKET }}" \
             -backend-config="region=auto" \
             -backend-config="access_key=${{ secrets.S3_ACCESS_KEY }}" \
             -backend-config="secret_key=${{ secrets.S3_SECRET_KEY }}" \
             -backend-config="skip_requesting_account_id=true"
       - name: Get Terraform Outputs
         working-directory: terraform
         run: |
           mkdir -p ../outputs
           terraform output -json > ../outputs/terraform_outputs.json
       - name: Install Python Dependencies
         run: |
           apt-get update && apt-get install -y python3-pip
           pip3 install --break-system-packages ansible==11.2.0 kubernetes==32.0.1 jinja2==3.1.5 pyyaml==6.0.2
       - name: Install Ansible Collections
         run: ansible-galaxy collection install -r ansible/requirements.yml
       - name: Install skopeo
         run: |
           apt-get update
           apt-get install -y skopeo
       - name: Generate Ansible Inventory
         working-directory: ansible
         run: python3 generate_inventory.py
       - name: Prepare kube-vip image archive
         run: |
           set -euo pipefail
           mkdir -p outputs
           for attempt in 1 2 3; do
             if skopeo copy \
               docker://ghcr.io/kube-vip/kube-vip:v1.1.2 \
               docker-archive:outputs/kube-vip-bootstrap.tar:ghcr.io/kube-vip/kube-vip:v1.1.2; then
               exit 0
             fi
             sleep 10
           done
           echo "Failed to prepare kube-vip image archive on runner" >&2
           exit 1
       - name: Prepare bootstrap image archives
         run: |
           set -euo pipefail
           archive_name() {
             printf '%s' "$1" | tr '/:' '__'
           }
           prepare_image_archive() {
             local image="$1"
             local archive="outputs/bootstrap-image-archives/$(archive_name "${image}").tar"
             mkdir -p outputs/bootstrap-image-archives
             for attempt in 1 2 3; do
               if skopeo copy "docker://${image}" "docker-archive:${archive}:${image}"; then
                 return 0
               fi
               sleep 10
             done
             echo "Failed to prepare bootstrap image archive for ${image}" >&2
             return 1
           }
           for image in \
             ghcr.io/fluxcd/source-controller:v1.8.0 \
             ghcr.io/fluxcd/kustomize-controller:v1.8.1 \
             ghcr.io/fluxcd/helm-controller:v1.5.1 \
             ghcr.io/fluxcd/notification-controller:v1.8.1 \
             oci.external-secrets.io/external-secrets/external-secrets:v2.1.0 \
             ghcr.io/tailscale/k8s-operator:v1.96.5 \
             ghcr.io/tailscale/tailscale:v1.96.5 \
             registry.k8s.io/sig-storage/nfs-subdir-external-provisioner:v4.0.2 \
             docker.io/rancher/mirrored-pause:3.6 \
             quay.io/jetstack/cert-manager-controller:v1.17.2 \
             quay.io/jetstack/cert-manager-cainjector:v1.17.2 \
             quay.io/jetstack/cert-manager-webhook:v1.17.2 \
             quay.io/jetstack/cert-manager-startupapicheck:v1.17.2 \
             docker.io/library/busybox:1.31.1 \
             docker.io/grafana/loki:3.5.7 \
             quay.io/kiwigrid/k8s-sidecar:1.28.0 \
             docker.io/kiwigrid/k8s-sidecar:1.30.10 \
             docker.io/grafana/promtail:3.0.0 \
             docker.io/grafana/grafana:11.4.0 \
             quay.io/prometheus-operator/prometheus-operator:v0.79.2 \
             quay.io/prometheus-operator/prometheus-config-reloader:v0.79.2 \
             quay.io/prometheus/prometheus:v3.1.0 \
             registry.k8s.io/kube-state-metrics/kube-state-metrics:v2.14.0 \
             quay.io/prometheus/node-exporter:v1.8.2; do
             prepare_image_archive "${image}"
           done
       - name: Run Ansible Playbook
         working-directory: ansible
         run: |
           ansible-playbook site.yml \
             -e "tailscale_auth_key=${{ secrets.TAILSCALE_AUTH_KEY }}" \
             -e "tailscale_tailnet=${{ secrets.TAILSCALE_TAILNET }}" \
             -e "tailscale_oauth_client_id=${{ secrets.TAILSCALE_OAUTH_CLIENT_ID }}" \
             -e "tailscale_oauth_client_secret=${{ secrets.TAILSCALE_OAUTH_CLIENT_SECRET }}" \
             -e "doppler_hetznerterra_service_token=${{ secrets.DOPPLER_HETZNERTERRA_SERVICE_TOKEN }}" \
             -e "tailscale_api_key=${{ secrets.TAILSCALE_API_KEY }}" \
             -e "grafana_admin_password=${{ secrets.GRAFANA_ADMIN_PASSWORD }}" \
             -e "cluster_name=k8s-cluster"
         env:
           ANSIBLE_HOST_KEY_CHECKING: "False"
       - name: Install kubectl
         run: |
           curl -fsSL -o /usr/local/bin/kubectl "https://dl.k8s.io/release/${KUBECTL_VERSION}/bin/linux/amd64/kubectl"
           chmod +x /usr/local/bin/kubectl
       - name: Rewrite kubeconfig for runner-reachable API
         working-directory: terraform
         run: |
           set -euo pipefail
           PRIMARY_IP=$(terraform output -raw primary_control_plane_ip)
           sed -i "s#https://k8s-cluster-cp-1\.[^:]*:6443#https://${PRIMARY_IP}:6443#g" ../outputs/kubeconfig
       - name: Bootstrap Flux source and reconciliation graph
         env:
           KUBECONFIG: outputs/kubeconfig
           FLUX_GIT_HOST: 64.176.189.59
           FLUX_GIT_PORT: "2222"
           FLUX_KNOWN_HOSTS: ${{ secrets.FLUX_KNOWN_HOSTS }}
         run: |
           set -euo pipefail
           flux_rollout_status() {
             local deployment="$1"
             if ! kubectl -n flux-system rollout status "deployment/${deployment}" --timeout=900s; then
               kubectl -n flux-system get pods -o wide
               kubectl -n flux-system describe deployment "${deployment}"
               kubectl -n flux-system describe pods -l "app=${deployment}"
               exit 1
             fi
           }
           wait_for_resource() {
             local namespace="$1"
             local resource="$2"
             local timeout_seconds="$3"
             local elapsed=0
             until {
               if [ -n "${namespace}" ]; then
                 kubectl -n "${namespace}" get "${resource}" >/dev/null 2>&1
               else
                 kubectl get "${resource}" >/dev/null 2>&1
               fi
             }; do
               if [ "${elapsed}" -ge "${timeout_seconds}" ]; then
                 echo "Timed out waiting for ${resource} to exist" >&2
                 kubectl -n flux-system get kustomizations,helmreleases || true
                 exit 1
               fi
               sleep 10
               elapsed=$((elapsed + 10))
             done
           }
           wait_for_reconcile_handled() {
             local namespace="$1"
             local resource="$2"
             local reconcile_at="$3"
             local timeout_seconds="$4"
             local elapsed=0
             local handled
             while [ "${elapsed}" -lt "${timeout_seconds}" ]; do
               handled="$(kubectl -n "${namespace}" get "${resource}" -o jsonpath='{.status.lastHandledReconcileAt}' 2>/dev/null || true)"
               if [ "${handled}" = "${reconcile_at}" ]; then
                 return 0
               fi
               sleep 5
               elapsed=$((elapsed + 5))
             done
             echo "Timed out waiting for ${resource} to handle reconcile ${reconcile_at}" >&2
             kubectl -n "${namespace}" describe "${resource}" || true
             exit 1
           }
           reconcile_flux_resource() {
             local namespace="$1"
             local resource="$2"
             local timeout_seconds="$3"
             local reconcile_at
             reconcile_at="$(date +%s%N)"
             kubectl -n "${namespace}" annotate "${resource}" reconcile.fluxcd.io/requestedAt="${reconcile_at}" --overwrite
             wait_for_reconcile_handled "${namespace}" "${resource}" "${reconcile_at}" "${timeout_seconds}"
           }
           reconcile_helmrelease() {
             local release_name="$1"
             local timeout_seconds="$2"
             local reconcile_at
             reconcile_at="$(date +%s%N)"
             kubectl -n flux-system annotate "helmrelease/${release_name}" \
               reconcile.fluxcd.io/requestedAt="${reconcile_at}" \
               reconcile.fluxcd.io/resetAt="${reconcile_at}" \
               reconcile.fluxcd.io/forceAt="${reconcile_at}" \
               --overwrite
             wait_for_reconcile_handled flux-system "helmrelease/${release_name}" "${reconcile_at}" "${timeout_seconds}"
           }
           pull_required_image() {
             local image="$1"
             local host_ip="$2"
             local attempts="$3"
             local sleep_seconds="$4"
             local failure_message="$5"
             local pulled=false
             for attempt in $(seq 1 "${attempts}"); do
               echo "Pre-pulling ${image} on ${host_ip} (${attempt}/${attempts})"
               if ssh -i "$HOME/.ssh/id_ed25519" -o StrictHostKeyChecking=no -o ConnectTimeout=10 "ubuntu@${host_ip}" \
                 "sudo k3s crictl inspecti '${image}' >/dev/null 2>&1 || (sudo k3s crictl pull '${image}' && sudo k3s crictl inspecti '${image}' >/dev/null 2>&1)"; then
                 pulled=true
                 break
               fi
               sleep "${sleep_seconds}"
             done
             if [ "${pulled}" != "true" ]; then
               echo "${failure_message} ${image} on ${host_ip}" >&2
               exit 1
             fi
           }
           import_required_image() {
             local image="$1"
             local host_ip="$2"
             local archive_name
             local archive_path
             archive_name="$(printf '%s' "${image}" | tr '/:' '__').tar"
             archive_path="outputs/bootstrap-image-archives/${archive_name}"
             if [ ! -s "${archive_path}" ]; then
               echo "Missing required bootstrap image archive ${archive_path} for ${image}" >&2
               exit 1
             fi
             if ssh -i "$HOME/.ssh/id_ed25519" -o StrictHostKeyChecking=no -o ConnectTimeout=10 "ubuntu@${host_ip}" \
               "sudo k3s crictl inspecti '${image}' >/dev/null 2>&1"; then
               return 0
             fi
             echo "Importing ${image} archive on ${host_ip}"
             timeout 180s scp -i "$HOME/.ssh/id_ed25519" -o StrictHostKeyChecking=no -o ConnectTimeout=10 -o ServerAliveInterval=15 -o ServerAliveCountMax=4 \
               "${archive_path}" "ubuntu@${host_ip}:/tmp/${archive_name}"
             timeout 300s ssh -i "$HOME/.ssh/id_ed25519" -o StrictHostKeyChecking=no -o ConnectTimeout=10 -o ServerAliveInterval=15 -o ServerAliveCountMax=4 "ubuntu@${host_ip}" \
               "set -euo pipefail; \
               if sudo k3s crictl inspecti '${image}' >/dev/null 2>&1; then exit 0; fi; \
               for attempt in 1 2 3 4 5; do \
                 echo 'Importing ${image} archive with ctr'; \
                 if sudo k3s ctr -n k8s.io images import '/tmp/${archive_name}' && sudo k3s crictl inspecti '${image}' >/dev/null; then exit 0; fi; \
                 sleep 10; \
               done; \
               sudo systemctl status k3s --no-pager -l || true; \
               sudo journalctl -u k3s -n 80 --no-pager || true; \
               exit 1"
           }
           import_required_image_on_all_nodes() {
             local image="$1"
             local host_ip
             for host_ip in ${ALL_NODE_IPS}; do
               import_required_image "${image}" "${host_ip}"
             done
           }
           eso_diagnostics() {
             kubectl -n flux-system get kustomizations,ocirepositories,helmrepositories,helmcharts,helmreleases || true
             kubectl -n flux-system describe kustomization addon-external-secrets || true
             kubectl -n flux-system describe ocirepository external-secrets || true
             kubectl -n flux-system describe helmrelease external-secrets || true
             kubectl -n external-secrets get pods -o wide || true
           }
           wait_for_helmrelease_ready() {
             local release_name="$1"
             local target_namespace="$2"
             local timeout_seconds="$3"
             local elapsed=0
             local ready
             local stalled
             local generation
             local observed_generation
             while [ "${elapsed}" -lt "${timeout_seconds}" ]; do
               ready="$(kubectl -n flux-system get "helmrelease/${release_name}" -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' 2>/dev/null || true)"
               stalled="$(kubectl -n flux-system get "helmrelease/${release_name}" -o jsonpath='{.status.conditions[?(@.type=="Stalled")].status}' 2>/dev/null || true)"
               generation="$(kubectl -n flux-system get "helmrelease/${release_name}" -o jsonpath='{.metadata.generation}' 2>/dev/null || true)"
               observed_generation="$(kubectl -n flux-system get "helmrelease/${release_name}" -o jsonpath='{.status.observedGeneration}' 2>/dev/null || true)"
               if [ "${ready}" = "True" ] && [ "${observed_generation}" = "${generation}" ]; then
                 return 0
               fi
               if [ "${stalled}" = "True" ]; then
                 echo "HelmRelease ${release_name} is stalled" >&2
                 kubectl -n flux-system describe "helmrelease/${release_name}" || true
                 kubectl -n "${target_namespace}" get pods -o wide || true
                 exit 1
               fi
               sleep 10
               elapsed=$((elapsed + 10))
             done
             echo "Timed out waiting for HelmRelease ${release_name} to become Ready" >&2
             kubectl -n flux-system describe "helmrelease/${release_name}" || true
             kubectl -n "${target_namespace}" get pods -o wide || true
             exit 1
           }
           wait_for_flux_oci_helm_release() {
             local oci_name="$1"
             local release_name="$2"
             local target_namespace="$3"
             local oci_timeout="$4"
             local release_timeout="$5"
             local artifact_storage
             wait_for_resource flux-system "ocirepository.source.toolkit.fluxcd.io/${oci_name}" 600
             reconcile_helmrelease "${release_name}" 300
             if ! kubectl -n flux-system wait --for=condition=Ready "ocirepository/${oci_name}" --timeout="${oci_timeout}"; then
               artifact_storage="$(kubectl -n flux-system get "ocirepository/${oci_name}" -o jsonpath='{.status.conditions[?(@.type=="ArtifactInStorage")].status}' 2>/dev/null || true)"
               if [ "${artifact_storage}" = "True" ]; then
                 echo "OCIRepository ${oci_name} is not currently Ready; continuing with cached artifact" >&2
               else
                 eso_diagnostics
                 exit 1
               fi
             fi
             wait_for_helmrelease_ready "${release_name}" "${target_namespace}" "${release_timeout}"
           }
           flux_helm_diagnostics() {
             local repo_name="$1"
             local chart_name="$2"
             local release_name="$3"
             local target_namespace="$4"
             kubectl -n flux-system get helmrepositories,helmcharts,helmreleases || true
             kubectl -n flux-system describe helmrepository "${repo_name}" || true
             kubectl -n flux-system describe helmchart.source.toolkit.fluxcd.io "${chart_name}" || true
             kubectl -n flux-system describe helmrelease "${release_name}" || true
             kubectl -n "${target_namespace}" get pods -o wide || true
           }
           wait_for_flux_helm_release() {
             local repo_name="$1"
             local chart_name="$2"
             local release_name="$3"
             local target_namespace="$4"
             local repo_timeout="$5"
             local chart_timeout="$6"
             local release_timeout="$7"
             wait_for_resource flux-system "helmrepository.source.toolkit.fluxcd.io/${repo_name}" 600
             if ! kubectl -n flux-system wait --for=condition=Ready "helmrepository/${repo_name}" --timeout="${repo_timeout}"; then
               echo "HelmRepository ${repo_name} is not currently Ready; continuing because a cached artifact may still satisfy HelmChart ${chart_name}" >&2
               kubectl -n flux-system describe helmrepository "${repo_name}" || true
             fi
             wait_for_resource flux-system "helmchart.source.toolkit.fluxcd.io/${chart_name}" 600
             reconcile_flux_resource flux-system "helmchart.source.toolkit.fluxcd.io/${chart_name}" 300
             reconcile_helmrelease "${release_name}" 300
             for attempt in $(seq 1 6); do
               if kubectl -n flux-system wait --for=condition=Ready "helmchart.source.toolkit.fluxcd.io/${chart_name}" --timeout="${chart_timeout}"; then
                 wait_for_helmrelease_ready "${release_name}" "${target_namespace}" "${release_timeout}"
                 return 0
               fi
               echo "HelmChart ${chart_name} did not become Ready after ${chart_timeout}; forcing retry (${attempt}/6)" >&2
               reconcile_flux_resource flux-system "helmchart.source.toolkit.fluxcd.io/${chart_name}" 300
               reconcile_helmrelease "${release_name}" 300
             done
             flux_helm_diagnostics "${repo_name}" "${chart_name}" "${release_name}" "${target_namespace}"
             exit 1
           }
           kubectl create namespace flux-system --dry-run=client -o yaml | kubectl apply -f -
           if [ -n "${FLUX_KNOWN_HOSTS}" ]; then
             printf '%s\n' "${FLUX_KNOWN_HOSTS}" > /tmp/flux_known_hosts
           else
             ssh-keyscan -p "${FLUX_GIT_PORT}" "${FLUX_GIT_HOST}" > /tmp/flux_known_hosts
           fi
           kubectl -n flux-system create secret generic flux-system \
             --from-file=identity="$HOME/.ssh/id_ed25519" \
             --from-file=known_hosts=/tmp/flux_known_hosts \
             --dry-run=client -o yaml | kubectl apply -f -
           PRIMARY_CP_IP=$(python3 -c 'import json; print(json.load(open("outputs/terraform_outputs.json"))["primary_control_plane_ip"]["value"])')
           ALL_NODE_IPS=$(python3 -c 'import json; outputs = json.load(open("outputs/terraform_outputs.json")); print(" ".join(outputs["control_plane_ips"]["value"] + outputs["worker_ips"]["value"]))')
           for image in \
             ghcr.io/fluxcd/source-controller:v1.8.0 \
             ghcr.io/fluxcd/kustomize-controller:v1.8.1 \
             ghcr.io/fluxcd/helm-controller:v1.5.1 \
             ghcr.io/fluxcd/notification-controller:v1.8.1; do
             import_required_image "${image}" "${PRIMARY_CP_IP}"
           done
           for image in \
             docker.io/rancher/mirrored-pause:3.6 \
             quay.io/jetstack/cert-manager-controller:v1.17.2 \
             quay.io/jetstack/cert-manager-cainjector:v1.17.2 \
             quay.io/jetstack/cert-manager-webhook:v1.17.2 \
             quay.io/jetstack/cert-manager-startupapicheck:v1.17.2; do
             import_required_image_on_all_nodes "${image}"
           done
           # Apply CRDs and controllers first
           kubectl apply -f clusters/prod/flux-system/gotk-components.yaml
           # Wait for CRDs to be established
           kubectl wait --for=condition=Established crd --all --timeout=120s
           # Then apply custom resources
           kubectl apply -f clusters/prod/flux-system/gitrepository-platform.yaml
           kubectl apply -f clusters/prod/flux-system/kustomization-infrastructure.yaml
           kubectl apply -f clusters/prod/flux-system/kustomization-apps.yaml
           # Patch Flux controllers to run on cp-1 and tolerate the control-plane taint
           PATCH='{"spec":{"template":{"spec":{"nodeSelector":{"kubernetes.io/hostname":"k8s-cluster-cp-1"},"tolerations":[{"key":"node-role.kubernetes.io/control-plane","operator":"Exists","effect":"NoSchedule"}]}}}}'
           kubectl -n flux-system patch deployment source-controller --type='merge' -p="$PATCH"
           kubectl -n flux-system patch deployment kustomize-controller --type='merge' -p="$PATCH"
           kubectl -n flux-system patch deployment helm-controller --type='merge' -p="$PATCH"
           kubectl -n flux-system patch deployment notification-controller --type='merge' -p="$PATCH"
           flux_rollout_status source-controller
           flux_rollout_status kustomize-controller
           flux_rollout_status helm-controller
           kubectl -n flux-system wait --for=condition=Ready gitrepository/platform --timeout=300s
           kubectl -n flux-system wait --for=condition=Ready kustomization/infrastructure --timeout=600s
           reconcile_flux_resource flux-system kustomization/addon-cert-manager 300
           kubectl -n flux-system wait --for=condition=Ready kustomization/addon-cert-manager --timeout=1200s
           kubectl -n flux-system wait --for=condition=Ready helmrelease/cert-manager --timeout=1200s
           # Wait directly on the ESO Helm objects; Kustomization readiness hides useful failure details.
           wait_for_resource flux-system kustomization.kustomize.toolkit.fluxcd.io/addon-external-secrets 600
           reconcile_flux_resource flux-system kustomization/addon-external-secrets 300
           import_required_image oci.external-secrets.io/external-secrets/external-secrets:v2.1.0 "${PRIMARY_CP_IP}"
           wait_for_flux_oci_helm_release external-secrets external-secrets external-secrets 600s 600
           wait_for_resource "" crd/clustersecretstores.external-secrets.io 900
           wait_for_resource "" crd/externalsecrets.external-secrets.io 900
           kubectl wait --for=condition=established --timeout=600s crd/clustersecretstores.external-secrets.io
           kubectl wait --for=condition=established --timeout=600s crd/externalsecrets.external-secrets.io
           kubectl -n external-secrets rollout status deployment/external-secrets-external-secrets --timeout=600s
           kubectl -n external-secrets rollout status deployment/external-secrets-external-secrets-webhook --timeout=600s
           wait_for_resource external-secrets service/external-secrets-external-secrets-webhook 600
           wait_for_resource external-secrets endpoints/external-secrets-external-secrets-webhook 600
           kubectl -n external-secrets wait --for=jsonpath='{.subsets[0].addresses[0].ip}' endpoints/external-secrets-external-secrets-webhook --timeout=600s
           wait_for_resource flux-system kustomization.kustomize.toolkit.fluxcd.io/addon-external-secrets-store 600
           reconcile_flux_resource flux-system kustomization/addon-external-secrets-store 300
           kubectl -n flux-system wait --for=condition=Ready kustomization/addon-external-secrets-store --timeout=600s
           # Wait for the storage layer and private access components
           import_required_image ghcr.io/tailscale/k8s-operator:v1.96.5 "${PRIMARY_CP_IP}"
           import_required_image ghcr.io/tailscale/tailscale:v1.96.5 "${PRIMARY_CP_IP}"
           reconcile_flux_resource flux-system kustomization/addon-tailscale-operator 300
           kubectl -n flux-system wait --for=condition=Ready kustomization/addon-tailscale-operator --timeout=600s
           kubectl -n tailscale-system rollout status deployment/operator --timeout=600s
           import_required_image registry.k8s.io/sig-storage/nfs-subdir-external-provisioner:v4.0.2 "${PRIMARY_CP_IP}"
           reconcile_flux_resource flux-system kustomization/addon-nfs-storage 300
           kubectl -n flux-system wait --for=condition=Ready kustomization/addon-nfs-storage --timeout=300s
           kubectl -n kube-system rollout status deployment/nfs-subdir-external-provisioner --timeout=300s
           kubectl annotate storageclass local-path storageclass.kubernetes.io/is-default-class=false --overwrite
           kubectl annotate storageclass flash-nfs storageclass.kubernetes.io/is-default-class=true --overwrite
           kubectl get storageclass flash-nfs
           import_required_image docker.io/library/busybox:1.31.1 "${PRIMARY_CP_IP}"
           kubectl -n kube-system delete pod/nfs-smoke pvc/nfs-smoke --ignore-not-found=true
           kubectl apply -f - <<'EOF'
           apiVersion: v1
           kind: PersistentVolumeClaim
           metadata:
             name: nfs-smoke
             namespace: kube-system
           spec:
             accessModes:
               - ReadWriteOnce
             storageClassName: flash-nfs
             resources:
               requests:
                 storage: 1Mi
           ---
           apiVersion: v1
           kind: Pod
           metadata:
             name: nfs-smoke
             namespace: kube-system
           spec:
             restartPolicy: Never
             nodeSelector:
               kubernetes.io/hostname: k8s-cluster-cp-1
             tolerations:
               - key: node-role.kubernetes.io/control-plane
                 operator: Exists
                 effect: NoSchedule
             containers:
               - name: smoke
                 image: docker.io/library/busybox:1.31.1
                 command:
                   - sh
                   - -c
                   - echo ok >/data/smoke && test -s /data/smoke && sleep 30
                 volumeMounts:
                   - name: data
                     mountPath: /data
             volumes:
               - name: data
                 persistentVolumeClaim:
                   claimName: nfs-smoke
           EOF
           kubectl -n kube-system wait --for=condition=Ready pod/nfs-smoke --timeout=180s
           kubectl -n kube-system delete pod/nfs-smoke pvc/nfs-smoke --ignore-not-found=true --wait=false
       - name: Wait for Rancher
         env:
           KUBECONFIG: outputs/kubeconfig
         run: |
           set -euo pipefail
           wait_for_resource() {
             local namespace="$1"
             local resource="$2"
             local timeout_seconds="$3"
             local elapsed=0
             until {
               if [ -n "${namespace}" ]; then
                 kubectl -n "${namespace}" get "${resource}" >/dev/null 2>&1
               else
                 kubectl get "${resource}" >/dev/null 2>&1
               fi
             }; do
               if [ "${elapsed}" -ge "${timeout_seconds}" ]; then
                 echo "Timed out waiting for ${resource} to exist" >&2
                 kubectl -n flux-system get kustomizations,helmrepositories,helmcharts,helmreleases || true
                 exit 1
               fi
               sleep 10
               elapsed=$((elapsed + 10))
             done
           }
           wait_for_reconcile_handled() {
             local namespace="$1"
             local resource="$2"
             local reconcile_at="$3"
             local timeout_seconds="$4"
             local elapsed=0
             local handled
             while [ "${elapsed}" -lt "${timeout_seconds}" ]; do
               handled="$(kubectl -n "${namespace}" get "${resource}" -o jsonpath='{.status.lastHandledReconcileAt}' 2>/dev/null || true)"
               if [ "${handled}" = "${reconcile_at}" ]; then
                 return 0
               fi
               sleep 5
               elapsed=$((elapsed + 5))
             done
             echo "Timed out waiting for ${resource} to handle reconcile ${reconcile_at}" >&2
             kubectl -n "${namespace}" describe "${resource}" || true
             exit 1
           }
           reconcile_flux_resource() {
             local namespace="$1"
             local resource="$2"
             local timeout_seconds="$3"
             local reconcile_at
             reconcile_at="$(date +%s%N)"
             kubectl -n "${namespace}" annotate "${resource}" reconcile.fluxcd.io/requestedAt="${reconcile_at}" --overwrite
             wait_for_reconcile_handled "${namespace}" "${resource}" "${reconcile_at}" "${timeout_seconds}"
           }
           reconcile_helmrelease() {
             local release_name="$1"
             local timeout_seconds="${2:-300}"
             local reconcile_at
             reconcile_at="$(date +%s%N)"
             kubectl -n flux-system annotate "helmrelease/${release_name}" \
               reconcile.fluxcd.io/requestedAt="${reconcile_at}" \
               reconcile.fluxcd.io/resetAt="${reconcile_at}" \
               reconcile.fluxcd.io/forceAt="${reconcile_at}" \
               --overwrite
             wait_for_reconcile_handled flux-system "helmrelease/${release_name}" "${reconcile_at}" "${timeout_seconds}"
           }
           wait_for_helmchart_ready() {
             local chart_name="$1"
             local release_name="$2"
             local timeout="$3"
             local attempts="$4"
             wait_for_resource flux-system "helmchart.source.toolkit.fluxcd.io/${chart_name}" 600
             for attempt in $(seq 1 "${attempts}"); do
               reconcile_flux_resource flux-system "helmchart.source.toolkit.fluxcd.io/${chart_name}" 300
               reconcile_helmrelease "${release_name}" 300
               if kubectl -n flux-system wait --for=condition=Ready "helmchart.source.toolkit.fluxcd.io/${chart_name}" --timeout="${timeout}"; then
                 return 0
               fi
               echo "HelmChart ${chart_name} did not become Ready after ${timeout}; forcing retry (${attempt}/${attempts})" >&2
             done
             kubectl -n flux-system describe "helmchart.source.toolkit.fluxcd.io/${chart_name}" || true
             kubectl -n flux-system describe "helmrelease/${release_name}" || true
             exit 1
           }
           wait_for_helmrelease_ready() {
             local release_name="$1"
             local target_namespace="$2"
             local timeout_seconds="$3"
             local elapsed=0
             local ready
             local stalled
             local generation
             local observed_generation
             while [ "${elapsed}" -lt "${timeout_seconds}" ]; do
               ready="$(kubectl -n flux-system get "helmrelease/${release_name}" -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' 2>/dev/null || true)"
               stalled="$(kubectl -n flux-system get "helmrelease/${release_name}" -o jsonpath='{.status.conditions[?(@.type=="Stalled")].status}' 2>/dev/null || true)"
               generation="$(kubectl -n flux-system get "helmrelease/${release_name}" -o jsonpath='{.metadata.generation}' 2>/dev/null || true)"
               observed_generation="$(kubectl -n flux-system get "helmrelease/${release_name}" -o jsonpath='{.status.observedGeneration}' 2>/dev/null || true)"
               if [ "${ready}" = "True" ] && [ "${observed_generation}" = "${generation}" ]; then
                 return 0
               fi
               if [ "${stalled}" = "True" ]; then
                 echo "HelmRelease ${release_name} is stalled" >&2
                 kubectl -n flux-system describe "helmrelease/${release_name}" || true
                 kubectl -n "${target_namespace}" get pods -o wide || true
                 exit 1
               fi
               sleep 10
               elapsed=$((elapsed + 10))
             done
             echo "Timed out waiting for HelmRelease ${release_name} to become Ready" >&2
             kubectl -n flux-system describe "helmrelease/${release_name}" || true
             kubectl -n "${target_namespace}" get pods -o wide || true
             exit 1
           }
           pull_image_on_matching_pod_nodes() {
             local namespace="$1"
             local selector="$2"
             local image="$3"
             local attempts="$4"
             local sleep_seconds="$5"
             local nodes
             nodes="$(kubectl -n "${namespace}" get pods -l "${selector}" -o jsonpath='{range .items[*]}{.spec.nodeName}{"\n"}{end}' 2>/dev/null | sort -u)"
             if [ -z "${nodes}" ]; then
               echo "No pods found for ${namespace}/${selector}; skipping targeted image pull for ${image}" >&2
               return 0
             fi
             for node in ${nodes}; do
               local node_ip
               local pulled=false
               node_ip="$(kubectl get node "${node}" -o jsonpath='{.status.addresses[?(@.type=="InternalIP")].address}')"
               for attempt in $(seq 1 "${attempts}"); do
                 echo "Pre-pulling ${image} on ${node}/${node_ip} (${attempt}/${attempts})"
                 if ssh -i "$HOME/.ssh/id_ed25519" -o StrictHostKeyChecking=no -o ConnectTimeout=10 "ubuntu@${node_ip}" \
                   "sudo k3s crictl inspecti '${image}' >/dev/null 2>&1 || (sudo k3s crictl pull '${image}' && sudo k3s crictl inspecti '${image}' >/dev/null 2>&1)"; then
                   pulled=true
                   break
                 fi
                 sleep "${sleep_seconds}"
               done
               if [ "${pulled}" != "true" ]; then
                 echo "Best-effort targeted image pre-pull did not complete for ${image} on ${node}/${node_ip}" >&2
               fi
             done
           }
           echo "Waiting for Rancher..."
           wait_for_resource flux-system kustomization.kustomize.toolkit.fluxcd.io/addon-rancher-secrets 600
           reconcile_flux_resource flux-system kustomization/addon-rancher-secrets 300
           kubectl -n flux-system wait --for=condition=Ready kustomization/addon-rancher-secrets --timeout=600s
           wait_for_resource flux-system kustomization.kustomize.toolkit.fluxcd.io/addon-rancher 600
           reconcile_flux_resource flux-system kustomization/addon-rancher 300
           wait_for_resource flux-system helmrelease.helm.toolkit.fluxcd.io/rancher 600
           reconcile_helmrelease rancher 300
           wait_for_helmchart_ready flux-system-rancher rancher 180s 5
           wait_for_helmrelease_ready rancher cattle-system 900
           wait_for_resource "" namespace/cattle-system 600
           wait_for_resource cattle-system deployment/cattle-system-rancher 600
           kubectl -n cattle-system rollout status deployment/cattle-system-rancher --timeout=900s
           wait_for_resource cattle-system deployment/rancher-webhook 900
           pull_image_on_matching_pod_nodes cattle-system app=rancher-webhook registry.rancher.com/rancher/rancher-webhook:v0.9.3 12 10
           kubectl -n cattle-system rollout restart deployment/rancher-webhook
           kubectl -n cattle-system rollout status deployment/rancher-webhook --timeout=900s
           wait_for_resource cattle-system issuer/cattle-system-rancher 900
           wait_for_resource cattle-system certificate/tls-rancher-ingress 900
           kubectl -n cattle-system wait --for=condition=Ready issuer/cattle-system-rancher --timeout=900s
           kubectl -n cattle-system wait --for=condition=Ready certificate/tls-rancher-ingress --timeout=900s
       - name: Seed observability runtime images
         env:
           KUBECONFIG: outputs/kubeconfig
         run: |
           set -euo pipefail
           archive_name() {
             printf '%s' "$1" | tr '/:' '__'
           }
           wait_for_resource() {
             local namespace="$1"
             local resource="$2"
             local timeout_seconds="$3"
             local elapsed=0
             until kubectl -n "${namespace}" get "${resource}" >/dev/null 2>&1; do
               if [ "${elapsed}" -ge "${timeout_seconds}" ]; then
                 echo "Timed out waiting for ${resource} to exist" >&2
                 kubectl -n flux-system get kustomizations,helmreleases || true
                 exit 1
               fi
               sleep 10
               elapsed=$((elapsed + 10))
             done
           }
           wait_for_reconcile_handled() {
             local resource="$1"
             local reconcile_at="$2"
             local timeout_seconds="$3"
             local elapsed=0
             local handled
             while [ "${elapsed}" -lt "${timeout_seconds}" ]; do
               handled="$(kubectl -n flux-system get "${resource}" -o jsonpath='{.status.lastHandledReconcileAt}' 2>/dev/null || true)"
               if [ "${handled}" = "${reconcile_at}" ]; then
                 return 0
               fi
               sleep 5
               elapsed=$((elapsed + 5))
             done
             echo "Timed out waiting for ${resource} to handle reconcile ${reconcile_at}" >&2
             kubectl -n flux-system describe "${resource}" || true
             exit 1
           }
           reconcile_flux_resource() {
             local resource="$1"
             local reconcile_at
             reconcile_at="$(date +%s%N)"
             kubectl -n flux-system annotate "${resource}" reconcile.fluxcd.io/requestedAt="${reconcile_at}" --overwrite
             wait_for_reconcile_handled "${resource}" "${reconcile_at}" 300
           }
           reconcile_helmrelease() {
             local release="$1"
             local reconcile_at
             reconcile_at="$(date +%s%N)"
             kubectl -n flux-system annotate "helmrelease/${release}" \
               reconcile.fluxcd.io/requestedAt="${reconcile_at}" \
               reconcile.fluxcd.io/resetAt="${reconcile_at}" \
               reconcile.fluxcd.io/forceAt="${reconcile_at}" \
               --overwrite
             wait_for_reconcile_handled "helmrelease/${release}" "${reconcile_at}" 300
           }
           import_required_image() {
             local image="$1"
             local host_ip="$2"
             local archive_name
             local archive_path
             archive_name="$(archive_name "${image}").tar"
             archive_path="outputs/bootstrap-image-archives/${archive_name}"
             if [ ! -s "${archive_path}" ]; then
               echo "Missing required bootstrap image archive ${archive_path} for ${image}" >&2
               return 1
             fi
             if ssh -i "$HOME/.ssh/id_ed25519" -o StrictHostKeyChecking=no -o ConnectTimeout=10 "ubuntu@${host_ip}" \
               "sudo k3s crictl inspecti '${image}' >/dev/null 2>&1"; then
               return 0
             fi
             echo "Importing ${image} archive on ${host_ip}"
             timeout 180s scp -i "$HOME/.ssh/id_ed25519" -o StrictHostKeyChecking=no -o ConnectTimeout=10 -o ServerAliveInterval=15 -o ServerAliveCountMax=4 \
               "${archive_path}" "ubuntu@${host_ip}:/tmp/${archive_name}"
             timeout 300s ssh -i "$HOME/.ssh/id_ed25519" -o StrictHostKeyChecking=no -o ConnectTimeout=10 -o ServerAliveInterval=15 -o ServerAliveCountMax=4 "ubuntu@${host_ip}" \
               "set -euo pipefail; \
               if sudo k3s crictl inspecti '${image}' >/dev/null 2>&1; then exit 0; fi; \
               for attempt in 1 2 3; do \
                 echo 'Importing ${image} archive with ctr'; \
                 if sudo k3s ctr -n k8s.io images import '/tmp/${archive_name}' && sudo k3s crictl inspecti '${image}' >/dev/null; then exit 0; fi; \
                 sleep 10; \
               done; \
               sudo systemctl status k3s --no-pager -l || true; \
               sudo journalctl -u k3s -n 80 --no-pager || true; \
               exit 1"
           }
           import_required_image_on_all_nodes() {
             local image="$1"
             local status_dir
             local host_ip
             local pid
             local failed=false
             status_dir="$(mktemp -d)"
             for host_ip in ${ALL_NODE_IPS}; do
               (
                 import_required_image "${image}" "${host_ip}"
               ) >"${status_dir}/${host_ip}.log" 2>&1 &
             done
             for pid in $(jobs -p); do
               if ! wait "${pid}"; then
                 failed=true
               fi
             done
             for host_ip in ${ALL_NODE_IPS}; do
               sed "s/^/[${host_ip}] /" "${status_dir}/${host_ip}.log"
             done
             if [ "${failed}" = "true" ]; then
               echo "Failed to import required image ${image} on one or more nodes" >&2
               exit 1
             fi
           }
           ALL_NODE_IPS=$(python3 -c 'import json; outputs = json.load(open("outputs/terraform_outputs.json")); print(" ".join(outputs["control_plane_ips"]["value"] + outputs["worker_ips"]["value"]))')
           for image in \
             docker.io/library/busybox:1.31.1 \
             docker.io/grafana/loki:3.5.7 \
             quay.io/kiwigrid/k8s-sidecar:1.28.0 \
             docker.io/kiwigrid/k8s-sidecar:1.30.10 \
             docker.io/grafana/promtail:3.0.0 \
             docker.io/grafana/grafana:11.4.0 \
             quay.io/prometheus-operator/prometheus-operator:v0.79.2 \
             quay.io/prometheus-operator/prometheus-config-reloader:v0.79.2 \
             quay.io/prometheus/prometheus:v3.1.0 \
             registry.k8s.io/kube-state-metrics/kube-state-metrics:v2.14.0 \
             quay.io/prometheus/node-exporter:v1.8.2; do
             import_required_image_on_all_nodes "${image}"
           done
           reconcile_flux_resource kustomization/addon-observability
           kubectl -n flux-system wait --for=condition=Ready kustomization/addon-observability --timeout=1200s
           for release in kube-prometheus-stack loki promtail; do
             reconcile_helmrelease "${release}"
           done
           kubectl -n observability rollout restart deployment/observability-kube-prometheus-stack-grafana || true
       - name: Post-deploy cluster health checks
         working-directory: ansible
         run: |
           set -euo pipefail
           ansible -i inventory.ini 'control_plane[0]' -m shell -a '
             set -euo pipefail
             kubectl get nodes -o wide
             kubectl -n flux-system get gitrepositories,kustomizations,helmreleases,ocirepositories
             kubectl -n flux-system wait --for=condition=Ready kustomization/infrastructure --timeout=300s
             kubectl -n flux-system wait --for=condition=Ready kustomization/addon-cert-manager --timeout=300s
             kubectl -n flux-system wait --for=condition=Ready kustomization/addon-external-secrets --timeout=300s
             kubectl -n flux-system wait --for=condition=Ready kustomization/addon-external-secrets-store --timeout=300s
             kubectl -n flux-system wait --for=condition=Ready kustomization/addon-nfs-storage --timeout=300s
             kubectl -n flux-system wait --for=condition=Ready kustomization/addon-tailscale-operator --timeout=300s
             kubectl -n flux-system wait --for=condition=Ready kustomization/addon-tailscale-proxyclass --timeout=300s
             kubectl -n flux-system wait --for=condition=Ready kustomization/addon-rancher-secrets --timeout=300s
             kubectl -n flux-system wait --for=condition=Ready kustomization/addon-rancher --timeout=900s
             kubectl -n flux-system wait --for=condition=Ready kustomization/addon-rancher-config --timeout=300s
             kubectl -n flux-system wait --for=condition=Ready kustomization/addon-observability-secrets --timeout=300s
             kubectl -n flux-system wait --for=condition=Ready kustomization/addon-observability --timeout=1200s
             kubectl -n flux-system wait --for=condition=Ready kustomization/addon-observability-content --timeout=300s
             kubectl -n flux-system wait --for=condition=Ready helmrelease --all --timeout=1200s
             kubectl annotate storageclass local-path storageclass.kubernetes.io/is-default-class=false --overwrite
             kubectl annotate storageclass flash-nfs storageclass.kubernetes.io/is-default-class=true --overwrite
             kubectl get storageclass | grep -E "^flash-nfs.*\\(default\\)"
             ! kubectl get storageclass | grep -E "^local-path.*\\(default\\)"
             unhealthy_pods=$(mktemp)
             kubectl get pods -A --no-headers \
               | grep -Ev "[[:space:]](Running|Completed)[[:space:]]" \
               | grep -Ev "^cattle-system[[:space:]]+helm-operation-" \
               | grep -Ev "^cattle-capi-system[[:space:]]+capi-controller-manager-" \
               | grep -Ev "^cattle-turtles-system[[:space:]]+cluster-api-operator-resources-cleanup-" \
               | grep -Ev "^kube-system[[:space:]]+helm-install-" \
               | tee "${unhealthy_pods}" || true
             test ! -s "${unhealthy_pods}"
             kubectl -n kube-system get pods -o wide
             kubectl -n tailscale-system get pods -o wide
             kubectl -n external-secrets get pods -o wide
           ' -e ansible_shell_executable=/bin/bash
         env:
           ANSIBLE_HOST_KEY_CHECKING: "False"
       - name: Post-deploy tailnet smoke checks
         working-directory: ansible
         run: |
           ansible -i inventory.ini 'control_plane[0]' -m script -a "../scripts/smoke-check-tailnet-services.sh"
         env:
           ANSIBLE_HOST_KEY_CHECKING: "False"
       - name: Upload Kubeconfig
         uses: actions/upload-artifact@v3
         with:
           name: kubeconfig
           path: outputs/kubeconfig

1107 lines 49 KiB YAML Raw Blame History

1107 lines

49 KiB

YAML

Raw Blame History