320 lines
14 KiB
YAML
320 lines
14 KiB
YAML
name: Deploy Cluster
|
|
|
|
on:
|
|
push:
|
|
branches:
|
|
- main
|
|
pull_request:
|
|
branches:
|
|
- main
|
|
workflow_dispatch:
|
|
|
|
env:
|
|
TF_VERSION: "1.7.0"
|
|
TF_VAR_hcloud_token: ${{ secrets.HCLOUD_TOKEN }}
|
|
TF_VAR_s3_access_key: ${{ secrets.S3_ACCESS_KEY }}
|
|
TF_VAR_s3_secret_key: ${{ secrets.S3_SECRET_KEY }}
|
|
TF_VAR_s3_endpoint: ${{ secrets.S3_ENDPOINT }}
|
|
TF_VAR_s3_bucket: ${{ secrets.S3_BUCKET }}
|
|
TF_VAR_tailscale_tailnet: ${{ secrets.TAILSCALE_TAILNET }}
|
|
TS_OAUTH_CLIENT_ID: ${{ secrets.TAILSCALE_OAUTH_CLIENT_ID }}
|
|
TS_OAUTH_CLIENT_SECRET: ${{ secrets.TAILSCALE_OAUTH_CLIENT_SECRET }}
|
|
|
|
jobs:
|
|
terraform:
|
|
name: Terraform
|
|
runs-on: ubuntu-latest
|
|
steps:
|
|
- name: Checkout
|
|
uses: actions/checkout@v4
|
|
|
|
- name: Setup Terraform
|
|
uses: hashicorp/setup-terraform@v3
|
|
with:
|
|
terraform_version: ${{ env.TF_VERSION }}
|
|
|
|
- name: Terraform Format Check
|
|
working-directory: terraform
|
|
run: terraform fmt -check -recursive
|
|
|
|
- name: Terraform Init
|
|
working-directory: terraform
|
|
run: |
|
|
terraform init \
|
|
-backend-config="endpoint=${{ secrets.S3_ENDPOINT }}" \
|
|
-backend-config="bucket=${{ secrets.S3_BUCKET }}" \
|
|
-backend-config="region=auto" \
|
|
-backend-config="access_key=${{ secrets.S3_ACCESS_KEY }}" \
|
|
-backend-config="secret_key=${{ secrets.S3_SECRET_KEY }}" \
|
|
-backend-config="skip_requesting_account_id=true"
|
|
|
|
- name: Terraform Validate
|
|
working-directory: terraform
|
|
run: terraform validate
|
|
|
|
- name: Setup SSH Keys
|
|
run: |
|
|
mkdir -p ~/.ssh
|
|
echo "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/id_ed25519
|
|
chmod 600 ~/.ssh/id_ed25519
|
|
echo "${{ secrets.SSH_PUBLIC_KEY }}" > ~/.ssh/id_ed25519.pub
|
|
chmod 644 ~/.ssh/id_ed25519.pub
|
|
|
|
- name: Install jq
|
|
run: |
|
|
apt-get update
|
|
apt-get install -y jq
|
|
|
|
- name: Import existing servers into state (if missing)
|
|
working-directory: terraform
|
|
env:
|
|
HCLOUD_TOKEN: ${{ secrets.HCLOUD_TOKEN }}
|
|
run: |
|
|
set -e
|
|
ensure_import() {
|
|
address="$1"
|
|
name="$2"
|
|
if terraform state show "$address" >/dev/null 2>&1; then
|
|
echo "$address already in state"
|
|
return
|
|
fi
|
|
id=$(curl -sS -H "Authorization: Bearer ${HCLOUD_TOKEN}" "https://api.hetzner.cloud/v1/servers?name=${name}" | jq -r '.servers[0].id // empty')
|
|
if [ -n "$id" ]; then
|
|
echo "Importing $address from server $name ($id)"
|
|
terraform import "$address" "$id"
|
|
else
|
|
echo "No existing server found for $name; skipping import"
|
|
fi
|
|
}
|
|
|
|
ensure_import 'hcloud_server.control_plane[0]' 'k8s-cluster-cp-1'
|
|
ensure_import 'hcloud_server.workers[0]' 'k8s-cluster-worker-1'
|
|
ensure_import 'hcloud_server.workers[1]' 'k8s-cluster-worker-2'
|
|
|
|
- name: Terraform Plan
|
|
id: plan
|
|
working-directory: terraform
|
|
run: |
|
|
terraform plan \
|
|
-var="ssh_public_key=$HOME/.ssh/id_ed25519.pub" \
|
|
-var="ssh_private_key=$HOME/.ssh/id_ed25519" \
|
|
-out=tfplan \
|
|
-no-color
|
|
continue-on-error: true
|
|
|
|
- name: Post Plan to PR
|
|
if: github.event_name == 'pull_request'
|
|
uses: actions/github-script@v7
|
|
with:
|
|
script: |
|
|
const output = `#### Terraform Plan
|
|
\`\`\`
|
|
${{ steps.plan.outputs.stdout }}
|
|
\`\`\``;
|
|
github.rest.issues.createComment({
|
|
issue_number: context.issue.number,
|
|
owner: context.repo.owner,
|
|
repo: context.repo.repo,
|
|
body: output
|
|
});
|
|
|
|
- name: Fail if plan failed
|
|
if: steps.plan.outcome == 'failure'
|
|
run: exit 1
|
|
|
|
- name: Terraform Apply
|
|
if: github.ref == 'refs/heads/main' && github.event_name == 'push'
|
|
working-directory: terraform
|
|
run: |
|
|
terraform apply \
|
|
-var="ssh_public_key=$HOME/.ssh/id_ed25519.pub" \
|
|
-var="ssh_private_key=$HOME/.ssh/id_ed25519" \
|
|
-auto-approve
|
|
|
|
- name: Save Terraform Outputs
|
|
if: github.ref == 'refs/heads/main' && github.event_name == 'push'
|
|
run: |
|
|
mkdir -p outputs
|
|
terraform output -json > outputs/terraform_outputs.json
|
|
working-directory: terraform
|
|
|
|
- name: Upload Outputs
|
|
if: github.ref == 'refs/heads/main' && github.event_name == 'push'
|
|
uses: actions/upload-artifact@v3
|
|
with:
|
|
name: terraform-outputs
|
|
path: outputs/terraform_outputs.json
|
|
|
|
ansible:
|
|
name: Ansible
|
|
runs-on: ubuntu-latest
|
|
needs: terraform
|
|
if: github.ref == 'refs/heads/main' && github.event_name == 'push'
|
|
steps:
|
|
- name: Checkout
|
|
uses: actions/checkout@v4
|
|
|
|
- name: Setup Terraform
|
|
uses: hashicorp/setup-terraform@v3
|
|
with:
|
|
terraform_version: ${{ env.TF_VERSION }}
|
|
|
|
- name: Setup SSH Keys
|
|
run: |
|
|
mkdir -p ~/.ssh
|
|
echo "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/id_ed25519
|
|
chmod 600 ~/.ssh/id_ed25519
|
|
echo "${{ secrets.SSH_PUBLIC_KEY }}" > ~/.ssh/id_ed25519.pub
|
|
chmod 644 ~/.ssh/id_ed25519.pub
|
|
|
|
- name: Terraform Init
|
|
working-directory: terraform
|
|
run: |
|
|
terraform init \
|
|
-backend-config="endpoint=${{ secrets.S3_ENDPOINT }}" \
|
|
-backend-config="bucket=${{ secrets.S3_BUCKET }}" \
|
|
-backend-config="region=auto" \
|
|
-backend-config="access_key=${{ secrets.S3_ACCESS_KEY }}" \
|
|
-backend-config="secret_key=${{ secrets.S3_SECRET_KEY }}" \
|
|
-backend-config="skip_requesting_account_id=true"
|
|
|
|
- name: Get Terraform Outputs
|
|
working-directory: terraform
|
|
run: |
|
|
mkdir -p ../outputs
|
|
terraform output -json > ../outputs/terraform_outputs.json
|
|
|
|
- name: Detect runner egress IP
|
|
run: |
|
|
RUNNER_IP=$(curl -fsSL https://api.ipify.org)
|
|
echo "RUNNER_CIDR=[\"${RUNNER_IP}/32\"]" >> "$GITHUB_ENV"
|
|
echo "Runner egress IP: ${RUNNER_IP}"
|
|
|
|
- name: Open SSH/API for current runner CIDR
|
|
working-directory: terraform
|
|
run: |
|
|
terraform apply \
|
|
-target=hcloud_firewall.cluster \
|
|
-var="ssh_public_key=$HOME/.ssh/id_ed25519.pub" \
|
|
-var="ssh_private_key=$HOME/.ssh/id_ed25519" \
|
|
-var="allowed_ssh_ips=${RUNNER_CIDR}" \
|
|
-var="allowed_api_ips=${RUNNER_CIDR}" \
|
|
-auto-approve
|
|
|
|
- name: Install Python Dependencies
|
|
run: |
|
|
apt-get update && apt-get install -y python3-pip
|
|
pip3 install --break-system-packages ansible kubernetes jinja2 pyyaml
|
|
|
|
- name: Note runner connectivity mode
|
|
run: |
|
|
echo "Using runner public network access with RUNNER_ALLOWED_CIDRS for SSH/API"
|
|
|
|
- name: Install Ansible Collections
|
|
run: ansible-galaxy collection install -r ansible/requirements.yml
|
|
|
|
- name: Generate Ansible Inventory
|
|
working-directory: ansible
|
|
run: python3 generate_inventory.py
|
|
|
|
- name: Run Ansible Playbook
|
|
working-directory: ansible
|
|
run: |
|
|
ansible-playbook site.yml \
|
|
-e "hcloud_token=${{ secrets.HCLOUD_TOKEN }}" \
|
|
-e "tailscale_auth_key=${{ secrets.TAILSCALE_AUTH_KEY }}" \
|
|
-e "tailscale_tailnet=${{ secrets.TAILSCALE_TAILNET }}" \
|
|
-e "tailscale_oauth_client_id=${{ secrets.TAILSCALE_OAUTH_CLIENT_ID }}" \
|
|
-e "tailscale_oauth_client_secret=${{ secrets.TAILSCALE_OAUTH_CLIENT_SECRET }}" \
|
|
-e "doppler_hetznerterra_service_token=${{ secrets.DOPPLER_HETZNERTERRA_SERVICE_TOKEN }}" \
|
|
-e "grafana_admin_password=${{ secrets.GRAFANA_ADMIN_PASSWORD }}" \
|
|
-e "cluster_name=k8s-cluster"
|
|
env:
|
|
ANSIBLE_HOST_KEY_CHECKING: "False"
|
|
|
|
- name: Install kubectl
|
|
run: |
|
|
curl -fsSL -o /usr/local/bin/kubectl "https://dl.k8s.io/release/$(curl -fsSL https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl"
|
|
chmod +x /usr/local/bin/kubectl
|
|
|
|
- name: Rewrite kubeconfig for runner-reachable API
|
|
working-directory: terraform
|
|
run: |
|
|
PRIMARY_IP=$(terraform output -raw primary_control_plane_ip)
|
|
sed -i "s#https://k8s-cluster-cp-1\.[^:]*:6443#https://${PRIMARY_IP}:6443#g" ../outputs/kubeconfig
|
|
|
|
- name: Bootstrap Flux source and reconciliation graph
|
|
env:
|
|
KUBECONFIG: outputs/kubeconfig
|
|
FLUX_GIT_HOST: 64.176.189.59
|
|
FLUX_GIT_PORT: "2222"
|
|
run: |
|
|
kubectl create namespace flux-system --dry-run=client -o yaml | kubectl apply -f -
|
|
ssh-keyscan -p "${FLUX_GIT_PORT}" "${FLUX_GIT_HOST}" > /tmp/flux_known_hosts
|
|
kubectl -n flux-system create secret generic flux-system \
|
|
--from-file=identity="$HOME/.ssh/id_ed25519" \
|
|
--from-file=known_hosts=/tmp/flux_known_hosts \
|
|
--dry-run=client -o yaml | kubectl apply -f -
|
|
# Apply CRDs and controllers first
|
|
kubectl apply -f clusters/prod/flux-system/gotk-components.yaml
|
|
# Wait for CRDs to be established
|
|
kubectl wait --for=condition=Established crd --all --timeout=120s
|
|
# Then apply custom resources
|
|
kubectl apply -f clusters/prod/flux-system/gitrepository-platform.yaml
|
|
kubectl apply -f clusters/prod/flux-system/kustomization-infrastructure.yaml
|
|
kubectl apply -f clusters/prod/flux-system/kustomization-apps.yaml
|
|
# Patch Flux controllers to run on cp-1 only
|
|
kubectl -n flux-system patch deployment source-controller --type='merge' -p='{"spec":{"template":{"spec":{"nodeSelector":{"kubernetes.io/hostname":"k8s-cluster-cp-1"}}}}}'
|
|
kubectl -n flux-system patch deployment kustomize-controller --type='merge' -p='{"spec":{"template":{"spec":{"nodeSelector":{"kubernetes.io/hostname":"k8s-cluster-cp-1"}}}}}'
|
|
kubectl -n flux-system patch deployment helm-controller --type='merge' -p='{"spec":{"template":{"spec":{"nodeSelector":{"kubernetes.io/hostname":"k8s-cluster-cp-1"}}}}}'
|
|
kubectl -n flux-system patch deployment notification-controller --type='merge' -p='{"spec":{"template":{"spec":{"nodeSelector":{"kubernetes.io/hostname":"k8s-cluster-cp-1"}}}}}'
|
|
kubectl -n flux-system rollout status deployment/source-controller --timeout=180s
|
|
kubectl -n flux-system rollout status deployment/kustomize-controller --timeout=180s
|
|
kubectl -n flux-system rollout status deployment/helm-controller --timeout=180s
|
|
kubectl -n flux-system wait --for=condition=Ready gitrepository/platform --timeout=180s
|
|
kubectl -n flux-system wait --for=condition=Ready kustomization/infrastructure --timeout=300s
|
|
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-external-secrets --timeout=300s
|
|
# Create Doppler ClusterSecretStore now that ESO CRDs are available
|
|
kubectl apply -f - <<'EOF'
|
|
apiVersion: external-secrets.io/v1
|
|
kind: ClusterSecretStore
|
|
metadata:
|
|
name: doppler-hetznerterra
|
|
spec:
|
|
provider:
|
|
doppler:
|
|
auth:
|
|
secretRef:
|
|
dopplerToken:
|
|
name: doppler-hetznerterra-service-token
|
|
key: dopplerToken
|
|
namespace: external-secrets
|
|
EOF
|
|
# CCM and CSI are suspended for stable baseline - using k3s embedded cloud provider
|
|
# kubectl -n flux-system wait --for=condition=Ready kustomization/addon-ccm --timeout=300s
|
|
# kubectl -n flux-system wait --for=condition=Ready kustomization/addon-csi --timeout=300s
|
|
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-tailscale-operator --timeout=300s
|
|
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-observability --timeout=600s
|
|
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-observability-content --timeout=600s
|
|
|
|
- name: Post-deploy cluster health checks
|
|
working-directory: ansible
|
|
run: |
|
|
ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl get nodes -o wide"
|
|
ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl -n flux-system get gitrepositories,kustomizations,helmreleases"
|
|
ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl -n kube-system get pods -o wide"
|
|
ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl get storageclass"
|
|
ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl -n observability get pods -o wide"
|
|
ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl -n observability get pvc"
|
|
ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl -n tailscale-system get pods -o wide"
|
|
ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl -n observability get svc kube-prometheus-stack-grafana kube-prometheus-stack-prometheus"
|
|
ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl -n observability describe svc kube-prometheus-stack-grafana"
|
|
env:
|
|
ANSIBLE_HOST_KEY_CHECKING: "False"
|
|
|
|
- name: Upload Kubeconfig
|
|
uses: actions/upload-artifact@v3
|
|
with:
|
|
name: kubeconfig
|
|
path: outputs/kubeconfig
|