- Enable --kubelet-arg=cloud-provider=external on all nodes (control planes and workers) - Activate CCM Kustomization with 10m timeout for Hetzner cloud-controller-manager - Activate CSI Kustomization with dependsOn CCM and 10m timeout for hcloud-csi - Update deploy workflow to wait for CCM/CSI readiness (600s timeout) - Add providerID verification to post-deploy health checks This enables proper cloud provider integration with Hetzner CCM for node labeling and Hetzner CSI for persistent volume provisioning.
319 lines
13 KiB
YAML
319 lines
13 KiB
YAML
name: Deploy Cluster
|
|
|
|
on:
|
|
push:
|
|
branches:
|
|
- main
|
|
pull_request:
|
|
branches:
|
|
- main
|
|
workflow_dispatch:
|
|
|
|
env:
|
|
TF_VERSION: "1.7.0"
|
|
TF_VAR_hcloud_token: ${{ secrets.HCLOUD_TOKEN }}
|
|
TF_VAR_s3_access_key: ${{ secrets.S3_ACCESS_KEY }}
|
|
TF_VAR_s3_secret_key: ${{ secrets.S3_SECRET_KEY }}
|
|
TF_VAR_s3_endpoint: ${{ secrets.S3_ENDPOINT }}
|
|
TF_VAR_s3_bucket: ${{ secrets.S3_BUCKET }}
|
|
TF_VAR_tailscale_tailnet: ${{ secrets.TAILSCALE_TAILNET }}
|
|
TS_OAUTH_CLIENT_ID: ${{ secrets.TAILSCALE_OAUTH_CLIENT_ID }}
|
|
TS_OAUTH_CLIENT_SECRET: ${{ secrets.TAILSCALE_OAUTH_CLIENT_SECRET }}
|
|
|
|
jobs:
|
|
terraform:
|
|
name: Terraform
|
|
runs-on: ubuntu-latest
|
|
steps:
|
|
- name: Checkout
|
|
uses: actions/checkout@v4
|
|
|
|
- name: Setup Terraform
|
|
uses: hashicorp/setup-terraform@v3
|
|
with:
|
|
terraform_version: ${{ env.TF_VERSION }}
|
|
|
|
- name: Terraform Format Check
|
|
working-directory: terraform
|
|
run: terraform fmt -check -recursive
|
|
|
|
- name: Terraform Init
|
|
working-directory: terraform
|
|
run: |
|
|
terraform init \
|
|
-backend-config="endpoint=${{ secrets.S3_ENDPOINT }}" \
|
|
-backend-config="bucket=${{ secrets.S3_BUCKET }}" \
|
|
-backend-config="region=auto" \
|
|
-backend-config="access_key=${{ secrets.S3_ACCESS_KEY }}" \
|
|
-backend-config="secret_key=${{ secrets.S3_SECRET_KEY }}" \
|
|
-backend-config="skip_requesting_account_id=true"
|
|
|
|
- name: Terraform Validate
|
|
working-directory: terraform
|
|
run: terraform validate
|
|
|
|
- name: Setup SSH Keys
|
|
run: |
|
|
mkdir -p ~/.ssh
|
|
echo "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/id_ed25519
|
|
chmod 600 ~/.ssh/id_ed25519
|
|
echo "${{ secrets.SSH_PUBLIC_KEY }}" > ~/.ssh/id_ed25519.pub
|
|
chmod 644 ~/.ssh/id_ed25519.pub
|
|
|
|
- name: Install jq
|
|
run: |
|
|
apt-get update
|
|
apt-get install -y jq
|
|
|
|
- name: Import existing servers into state (if missing)
|
|
working-directory: terraform
|
|
env:
|
|
HCLOUD_TOKEN: ${{ secrets.HCLOUD_TOKEN }}
|
|
run: |
|
|
set -e
|
|
ensure_import() {
|
|
address="$1"
|
|
name="$2"
|
|
if terraform state show "$address" >/dev/null 2>&1; then
|
|
echo "$address already in state"
|
|
return
|
|
fi
|
|
id=$(curl -sS -H "Authorization: Bearer ${HCLOUD_TOKEN}" "https://api.hetzner.cloud/v1/servers?name=${name}" | jq -r '.servers[0].id // empty')
|
|
if [ -n "$id" ]; then
|
|
echo "Importing $address from server $name ($id)"
|
|
terraform import "$address" "$id"
|
|
else
|
|
echo "No existing server found for $name; skipping import"
|
|
fi
|
|
}
|
|
|
|
ensure_import 'hcloud_server.control_plane[0]' 'k8s-cluster-cp-1'
|
|
ensure_import 'hcloud_server.workers[0]' 'k8s-cluster-worker-1'
|
|
ensure_import 'hcloud_server.workers[1]' 'k8s-cluster-worker-2'
|
|
|
|
- name: Terraform Plan
|
|
id: plan
|
|
working-directory: terraform
|
|
run: |
|
|
terraform plan \
|
|
-var="ssh_public_key=$HOME/.ssh/id_ed25519.pub" \
|
|
-var="ssh_private_key=$HOME/.ssh/id_ed25519" \
|
|
-out=tfplan \
|
|
-no-color
|
|
continue-on-error: true
|
|
|
|
- name: Post Plan to PR
|
|
if: github.event_name == 'pull_request'
|
|
uses: actions/github-script@v7
|
|
with:
|
|
script: |
|
|
const output = `#### Terraform Plan
|
|
\`\`\`
|
|
${{ steps.plan.outputs.stdout }}
|
|
\`\`\``;
|
|
github.rest.issues.createComment({
|
|
issue_number: context.issue.number,
|
|
owner: context.repo.owner,
|
|
repo: context.repo.repo,
|
|
body: output
|
|
});
|
|
|
|
- name: Fail if plan failed
|
|
if: steps.plan.outcome == 'failure'
|
|
run: exit 1
|
|
|
|
- name: Terraform Apply
|
|
if: github.ref == 'refs/heads/main' && github.event_name == 'push'
|
|
working-directory: terraform
|
|
run: |
|
|
terraform apply \
|
|
-var="ssh_public_key=$HOME/.ssh/id_ed25519.pub" \
|
|
-var="ssh_private_key=$HOME/.ssh/id_ed25519" \
|
|
-auto-approve
|
|
|
|
- name: Save Terraform Outputs
|
|
if: github.ref == 'refs/heads/main' && github.event_name == 'push'
|
|
run: |
|
|
mkdir -p outputs
|
|
terraform output -json > outputs/terraform_outputs.json
|
|
working-directory: terraform
|
|
|
|
- name: Upload Outputs
|
|
if: github.ref == 'refs/heads/main' && github.event_name == 'push'
|
|
uses: actions/upload-artifact@v3
|
|
with:
|
|
name: terraform-outputs
|
|
path: outputs/terraform_outputs.json
|
|
|
|
ansible:
|
|
name: Ansible
|
|
runs-on: ubuntu-latest
|
|
needs: terraform
|
|
if: github.ref == 'refs/heads/main' && github.event_name == 'push'
|
|
steps:
|
|
- name: Checkout
|
|
uses: actions/checkout@v4
|
|
|
|
- name: Setup Terraform
|
|
uses: hashicorp/setup-terraform@v3
|
|
with:
|
|
terraform_version: ${{ env.TF_VERSION }}
|
|
|
|
- name: Setup SSH Keys
|
|
run: |
|
|
mkdir -p ~/.ssh
|
|
echo "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/id_ed25519
|
|
chmod 600 ~/.ssh/id_ed25519
|
|
echo "${{ secrets.SSH_PUBLIC_KEY }}" > ~/.ssh/id_ed25519.pub
|
|
chmod 644 ~/.ssh/id_ed25519.pub
|
|
|
|
- name: Terraform Init
|
|
working-directory: terraform
|
|
run: |
|
|
terraform init \
|
|
-backend-config="endpoint=${{ secrets.S3_ENDPOINT }}" \
|
|
-backend-config="bucket=${{ secrets.S3_BUCKET }}" \
|
|
-backend-config="region=auto" \
|
|
-backend-config="access_key=${{ secrets.S3_ACCESS_KEY }}" \
|
|
-backend-config="secret_key=${{ secrets.S3_SECRET_KEY }}" \
|
|
-backend-config="skip_requesting_account_id=true"
|
|
|
|
- name: Get Terraform Outputs
|
|
working-directory: terraform
|
|
run: |
|
|
mkdir -p ../outputs
|
|
terraform output -json > ../outputs/terraform_outputs.json
|
|
|
|
- name: Detect runner egress IP
|
|
run: |
|
|
RUNNER_IP=$(curl -fsSL https://api.ipify.org)
|
|
echo "RUNNER_CIDR=[\"${RUNNER_IP}/32\"]" >> "$GITHUB_ENV"
|
|
echo "Runner egress IP: ${RUNNER_IP}"
|
|
|
|
- name: Open SSH/API for current runner CIDR
|
|
working-directory: terraform
|
|
run: |
|
|
terraform apply \
|
|
-target=hcloud_firewall.cluster \
|
|
-var="ssh_public_key=$HOME/.ssh/id_ed25519.pub" \
|
|
-var="ssh_private_key=$HOME/.ssh/id_ed25519" \
|
|
-var="allowed_ssh_ips=${RUNNER_CIDR}" \
|
|
-var="allowed_api_ips=${RUNNER_CIDR}" \
|
|
-auto-approve
|
|
|
|
- name: Install Python Dependencies
|
|
run: |
|
|
apt-get update && apt-get install -y python3-pip
|
|
pip3 install --break-system-packages ansible kubernetes jinja2 pyyaml
|
|
|
|
- name: Note runner connectivity mode
|
|
run: |
|
|
echo "Using runner public network access with RUNNER_ALLOWED_CIDRS for SSH/API"
|
|
|
|
- name: Install Ansible Collections
|
|
run: ansible-galaxy collection install -r ansible/requirements.yml
|
|
|
|
- name: Generate Ansible Inventory
|
|
working-directory: ansible
|
|
run: python3 generate_inventory.py
|
|
|
|
- name: Run Ansible Playbook
|
|
working-directory: ansible
|
|
run: |
|
|
ansible-playbook site.yml \
|
|
-e "hcloud_token=${{ secrets.HCLOUD_TOKEN }}" \
|
|
-e "tailscale_auth_key=${{ secrets.TAILSCALE_AUTH_KEY }}" \
|
|
-e "tailscale_tailnet=${{ secrets.TAILSCALE_TAILNET }}" \
|
|
-e "tailscale_oauth_client_id=${{ secrets.TAILSCALE_OAUTH_CLIENT_ID }}" \
|
|
-e "tailscale_oauth_client_secret=${{ secrets.TAILSCALE_OAUTH_CLIENT_SECRET }}" \
|
|
-e "doppler_hetznerterra_service_token=${{ secrets.DOPPLER_HETZNERTERRA_SERVICE_TOKEN }}" \
|
|
-e "grafana_admin_password=${{ secrets.GRAFANA_ADMIN_PASSWORD }}" \
|
|
-e "cluster_name=k8s-cluster"
|
|
env:
|
|
ANSIBLE_HOST_KEY_CHECKING: "False"
|
|
|
|
- name: Install kubectl
|
|
run: |
|
|
curl -fsSL -o /usr/local/bin/kubectl "https://dl.k8s.io/release/$(curl -fsSL https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl"
|
|
chmod +x /usr/local/bin/kubectl
|
|
|
|
- name: Rewrite kubeconfig for runner-reachable API
|
|
working-directory: terraform
|
|
run: |
|
|
PRIMARY_IP=$(terraform output -raw primary_control_plane_ip)
|
|
sed -i "s#https://k8s-cluster-cp-1\.[^:]*:6443#https://${PRIMARY_IP}:6443#g" ../outputs/kubeconfig
|
|
|
|
- name: Bootstrap Flux source and reconciliation graph
|
|
env:
|
|
KUBECONFIG: outputs/kubeconfig
|
|
FLUX_GIT_HOST: 64.176.189.59
|
|
FLUX_GIT_PORT: "2222"
|
|
run: |
|
|
kubectl create namespace flux-system --dry-run=client -o yaml | kubectl apply -f -
|
|
ssh-keyscan -p "${FLUX_GIT_PORT}" "${FLUX_GIT_HOST}" > /tmp/flux_known_hosts
|
|
kubectl -n flux-system create secret generic flux-system \
|
|
--from-file=identity="$HOME/.ssh/id_ed25519" \
|
|
--from-file=known_hosts=/tmp/flux_known_hosts \
|
|
--dry-run=client -o yaml | kubectl apply -f -
|
|
# Apply CRDs and controllers first
|
|
kubectl apply -f clusters/prod/flux-system/gotk-components.yaml
|
|
# Wait for CRDs to be established
|
|
kubectl wait --for=condition=Established crd --all --timeout=120s
|
|
# Then apply custom resources
|
|
kubectl apply -f clusters/prod/flux-system/gitrepository-platform.yaml
|
|
kubectl apply -f clusters/prod/flux-system/kustomization-infrastructure.yaml
|
|
kubectl apply -f clusters/prod/flux-system/kustomization-apps.yaml
|
|
# Patch Flux controllers to run on cp-1 only
|
|
kubectl -n flux-system patch deployment source-controller --type='merge' -p='{"spec":{"template":{"spec":{"nodeSelector":{"kubernetes.io/hostname":"k8s-cluster-cp-1"}}}}}'
|
|
kubectl -n flux-system patch deployment kustomize-controller --type='merge' -p='{"spec":{"template":{"spec":{"nodeSelector":{"kubernetes.io/hostname":"k8s-cluster-cp-1"}}}}}'
|
|
kubectl -n flux-system patch deployment helm-controller --type='merge' -p='{"spec":{"template":{"spec":{"nodeSelector":{"kubernetes.io/hostname":"k8s-cluster-cp-1"}}}}}'
|
|
kubectl -n flux-system patch deployment notification-controller --type='merge' -p='{"spec":{"template":{"spec":{"nodeSelector":{"kubernetes.io/hostname":"k8s-cluster-cp-1"}}}}}'
|
|
kubectl -n flux-system rollout status deployment/source-controller --timeout=180s
|
|
kubectl -n flux-system rollout status deployment/kustomize-controller --timeout=180s
|
|
kubectl -n flux-system rollout status deployment/helm-controller --timeout=180s
|
|
kubectl -n flux-system wait --for=condition=Ready gitrepository/platform --timeout=180s
|
|
kubectl -n flux-system wait --for=condition=Ready kustomization/infrastructure --timeout=300s
|
|
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-external-secrets --timeout=300s
|
|
# Create Doppler ClusterSecretStore now that ESO CRDs are available
|
|
kubectl apply -f - <<'EOF'
|
|
apiVersion: external-secrets.io/v1
|
|
kind: ClusterSecretStore
|
|
metadata:
|
|
name: doppler-hetznerterra
|
|
spec:
|
|
provider:
|
|
doppler:
|
|
auth:
|
|
secretRef:
|
|
dopplerToken:
|
|
name: doppler-hetznerterra-service-token
|
|
key: dopplerToken
|
|
namespace: external-secrets
|
|
EOF
|
|
# Wait for CCM and CSI (Hetzner cloud integration)
|
|
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-ccm --timeout=600s
|
|
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-csi --timeout=600s
|
|
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-tailscale-operator --timeout=300s
|
|
# Observability stack deferred - complex helm release timing out, debug separately
|
|
# kubectl -n flux-system wait --for=condition=Ready kustomization/addon-observability --timeout=300s
|
|
# kubectl -n flux-system wait --for=condition=Ready kustomization/addon-observability-content --timeout=300s
|
|
|
|
- name: Post-deploy cluster health checks
|
|
working-directory: ansible
|
|
run: |
|
|
ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl get nodes -o wide"
|
|
ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl describe nodes | grep -E '(Name:|providerID:)'"
|
|
ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl -n flux-system get gitrepositories,kustomizations,helmreleases"
|
|
ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl -n kube-system get pods -o wide"
|
|
ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl get storageclass"
|
|
ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl -n tailscale-system get pods -o wide"
|
|
ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl -n external-secrets get pods"
|
|
env:
|
|
ANSIBLE_HOST_KEY_CHECKING: "False"
|
|
|
|
- name: Upload Kubeconfig
|
|
uses: actions/upload-artifact@v3
|
|
with:
|
|
name: kubeconfig
|
|
path: outputs/kubeconfig
|