From 3b3084b997d82032a926a872b9fc1ec14a58a765 Mon Sep 17 00:00:00 2001 From: MichaelFisher1997 Date: Sat, 28 Feb 2026 20:24:55 +0000 Subject: [PATCH] feat: Add HA Kubernetes cluster with Terraform + Ansible - 3x CX23 control plane nodes (HA) - 4x CX33 worker nodes - k3s with embedded etcd - Hetzner CCM for load balancers - Gitea CI/CD workflows - Backblaze B2 for Terraform state --- .gitea/workflows/ansible.yml | 95 ++++++++ .gitea/workflows/destroy.yml | 49 ++++ .gitea/workflows/terraform.yml | 149 ++++++++++++ .gitignore | 38 +++ README.md | 265 +++++++++++++++++++++ ansible/ansible.cfg | 14 ++ ansible/generate_inventory.py | 45 ++++ ansible/inventory.tmpl | 18 ++ ansible/requirements.yml | 5 + ansible/roles/ccm/defaults/main.yml | 3 + ansible/roles/ccm/tasks/main.yml | 40 ++++ ansible/roles/common/defaults/main.yml | 2 + ansible/roles/common/tasks/main.yml | 58 +++++ ansible/roles/k3s-agent/defaults/main.yml | 4 + ansible/roles/k3s-agent/tasks/main.yml | 30 +++ ansible/roles/k3s-server/defaults/main.yml | 3 + ansible/roles/k3s-server/tasks/main.yml | 56 +++++ ansible/site.yml | 94 ++++++++ terraform.tfvars.example | 23 ++ terraform/backend.tf | 10 + terraform/firewall.tf | 86 +++++++ terraform/main.tf | 14 ++ terraform/network.tf | 11 + terraform/outputs.tf | 44 ++++ terraform/servers.tf | 60 +++++ terraform/ssh.tf | 8 + terraform/variables.tf | 100 ++++++++ 27 files changed, 1324 insertions(+) create mode 100644 .gitea/workflows/ansible.yml create mode 100644 .gitea/workflows/destroy.yml create mode 100644 .gitea/workflows/terraform.yml create mode 100644 .gitignore create mode 100644 ansible/ansible.cfg create mode 100644 ansible/generate_inventory.py create mode 100644 ansible/inventory.tmpl create mode 100644 ansible/requirements.yml create mode 100644 ansible/roles/ccm/defaults/main.yml create mode 100644 ansible/roles/ccm/tasks/main.yml create mode 100644 ansible/roles/common/defaults/main.yml create mode 100644 ansible/roles/common/tasks/main.yml create mode 100644 ansible/roles/k3s-agent/defaults/main.yml create mode 100644 ansible/roles/k3s-agent/tasks/main.yml create mode 100644 ansible/roles/k3s-server/defaults/main.yml create mode 100644 ansible/roles/k3s-server/tasks/main.yml create mode 100644 ansible/site.yml create mode 100644 terraform.tfvars.example create mode 100644 terraform/backend.tf create mode 100644 terraform/firewall.tf create mode 100644 terraform/main.tf create mode 100644 terraform/network.tf create mode 100644 terraform/outputs.tf create mode 100644 terraform/servers.tf create mode 100644 terraform/ssh.tf create mode 100644 terraform/variables.tf diff --git a/.gitea/workflows/ansible.yml b/.gitea/workflows/ansible.yml new file mode 100644 index 0000000..1113367 --- /dev/null +++ b/.gitea/workflows/ansible.yml @@ -0,0 +1,95 @@ +name: Ansible + +on: + workflow_run: + workflows: ["Terraform"] + types: + - completed + branches: + - main + workflow_dispatch: + inputs: + tags: + description: 'Ansible tags to run' + required: false + default: '' + +env: + ANSIBLE_VERSION: "2.16" + +jobs: + deploy: + name: Deploy Cluster + runs-on: ubuntu-latest + if: ${{ github.event.workflow_run.conclusion == 'success' }} || github.event_name == 'workflow_dispatch' + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Download Terraform Outputs + if: github.event_name != 'workflow_dispatch' + uses: actions/download-artifact@v4 + with: + name: terraform-outputs + path: outputs/ + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install Ansible + run: | + pip install ansible==${{ env.ANSIBLE_VERSION }}.* + pip install jinja2 pyyaml kubernetes + + - name: Setup SSH Key + run: | + mkdir -p ~/.ssh + echo "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/id_ed25519 + chmod 600 ~/.ssh/id_ed25519 + ssh-keyscan -H ${{ secrets.CONTROL_PLANE_IP }} >> ~/.ssh/known_hosts 2>/dev/null || true + + - name: Generate Ansible Inventory + working-directory: ansible + run: | + python3 generate_inventory.py + env: + TF_VAR_hcloud_token: ${{ secrets.HCLOUD_TOKEN }} + + - name: Run Ansible Playbook + working-directory: ansible + run: | + ansible-playbook site.yml \ + ${{ github.event.inputs.tags != '' && format('-t {0}', github.event.inputs.tags) || '' }} \ + -e "hcloud_token=${{ secrets.HCLOUD_TOKEN }}" \ + -e "cluster_name=k8s-cluster" + env: + ANSIBLE_HOST_KEY_CHECKING: "False" + + - name: Upload Kubeconfig + uses: actions/upload-artifact@v4 + with: + name: kubeconfig + path: outputs/kubeconfig + + verify: + name: Verify Cluster + runs-on: ubuntu-latest + needs: deploy + steps: + - name: Download Kubeconfig + uses: actions/download-artifact@v4 + with: + name: kubeconfig + + - name: Setup kubectl + uses: azure/setup-kubectl@v3 + with: + version: 'v1.30.0' + + - name: Verify Cluster + run: | + export KUBECONFIG=$(pwd)/kubeconfig + kubectl get nodes + kubectl get pods -A diff --git a/.gitea/workflows/destroy.yml b/.gitea/workflows/destroy.yml new file mode 100644 index 0000000..1c06e68 --- /dev/null +++ b/.gitea/workflows/destroy.yml @@ -0,0 +1,49 @@ +name: Destroy + +on: + workflow_dispatch: + inputs: + confirm: + description: 'Type "destroy" to confirm' + required: true + default: '' + +env: + TF_VERSION: "1.7.0" + +jobs: + destroy: + name: Destroy Cluster + runs-on: ubuntu-latest + if: github.event.inputs.confirm == 'destroy' + environment: destroy + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Terraform + uses: hashicorp/setup-terraform@v3 + with: + terraform_version: ${{ env.TF_VERSION }} + + - name: Terraform Init + working-directory: terraform + run: | + terraform init \ + -backend-config="endpoint=${{ secrets.S3_ENDPOINT }}" \ + -backend-config="bucket=${{ secrets.S3_BUCKET }}" \ + -backend-config="region=auto" \ + -backend-config="access_key=${{ secrets.S3_ACCESS_KEY }}" \ + -backend-config="secret_key=${{ secrets.S3_SECRET_KEY }}" + + - name: Terraform Destroy + working-directory: terraform + run: | + terraform destroy \ + -var="hcloud_token=${{ secrets.HCLOUD_TOKEN }}" \ + -var="ssh_public_key=${{ secrets.SSH_PUBLIC_KEY }}" \ + -var="ssh_private_key=${{ secrets.SSH_PRIVATE_KEY }}" \ + -var="s3_access_key=${{ secrets.S3_ACCESS_KEY }}" \ + -var="s3_secret_key=${{ secrets.S3_SECRET_KEY }}" \ + -var="s3_endpoint=${{ secrets.S3_ENDPOINT }}" \ + -auto-approve diff --git a/.gitea/workflows/terraform.yml b/.gitea/workflows/terraform.yml new file mode 100644 index 0000000..851428c --- /dev/null +++ b/.gitea/workflows/terraform.yml @@ -0,0 +1,149 @@ +name: Terraform + +on: + push: + branches: + - main + paths: + - 'terraform/**' + - '.gitea/workflows/terraform.yml' + pull_request: + branches: + - main + paths: + - 'terraform/**' + - '.gitea/workflows/terraform.yml' + +env: + TF_VERSION: "1.7.0" + TF_VAR_hcloud_token: ${{ secrets.HCLOUD_TOKEN }} + TF_VAR_s3_access_key: ${{ secrets.S3_ACCESS_KEY }} + TF_VAR_s3_secret_key: ${{ secrets.S3_SECRET_KEY }} + +jobs: + validate: + name: Validate + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Terraform + uses: hashicorp/setup-terraform@v3 + with: + terraform_version: ${{ env.TF_VERSION }} + + - name: Terraform Format Check + working-directory: terraform + run: terraform fmt -check -recursive + + - name: Terraform Init + working-directory: terraform + run: | + terraform init \ + -backend-config="endpoint=${{ secrets.S3_ENDPOINT }}" \ + -backend-config="bucket=${{ secrets.S3_BUCKET }}" \ + -backend-config="region=auto" \ + -backend-config="access_key=${{ secrets.S3_ACCESS_KEY }}" \ + -backend-config="secret_key=${{ secrets.S3_SECRET_KEY }}" + + - name: Terraform Validate + working-directory: terraform + run: terraform validate + + plan: + name: Plan + runs-on: ubuntu-latest + needs: validate + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Terraform + uses: hashicorp/setup-terraform@v3 + with: + terraform_version: ${{ env.TF_VERSION }} + + - name: Terraform Init + working-directory: terraform + run: | + terraform init \ + -backend-config="endpoint=${{ secrets.S3_ENDPOINT }}" \ + -backend-config="bucket=${{ secrets.S3_BUCKET }}" \ + -backend-config="region=auto" \ + -backend-config="access_key=${{ secrets.S3_ACCESS_KEY }}" \ + -backend-config="secret_key=${{ secrets.S3_SECRET_KEY }}" + + - name: Terraform Plan + id: plan + working-directory: terraform + run: | + terraform plan \ + -var="ssh_public_key=${{ secrets.SSH_PUBLIC_KEY }}" \ + -var="ssh_private_key=${{ secrets.SSH_PRIVATE_KEY }}" \ + -out=tfplan \ + -no-color + continue-on-error: true + + - name: Post Plan to PR + if: github.event_name == 'pull_request' + uses: actions/github-script@v7 + with: + script: | + const output = `#### Terraform Plan + \`\`\` + ${{ steps.plan.outputs.stdout }} + \`\`\``; + github.rest.issues.createComment({ + issue_number: context.issue.number, + owner: context.repo.owner, + repo: context.repo.repo, + body: output + }); + + - name: Fail if plan failed + if: steps.plan.outcome == 'failure' + run: exit 1 + + apply: + name: Apply + runs-on: ubuntu-latest + needs: plan + if: github.ref == 'refs/heads/main' && github.event_name == 'push' + environment: production + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Terraform + uses: hashicorp/setup-terraform@v3 + with: + terraform_version: ${{ env.TF_VERSION }} + + - name: Terraform Init + working-directory: terraform + run: | + terraform init \ + -backend-config="endpoint=${{ secrets.S3_ENDPOINT }}" \ + -backend-config="bucket=${{ secrets.S3_BUCKET }}" \ + -backend-config="region=auto" \ + -backend-config="access_key=${{ secrets.S3_ACCESS_KEY }}" \ + -backend-config="secret_key=${{ secrets.S3_SECRET_KEY }}" + + - name: Terraform Apply + working-directory: terraform + run: | + terraform apply \ + -var="ssh_public_key=${{ secrets.SSH_PUBLIC_KEY }}" \ + -var="ssh_private_key=${{ secrets.SSH_PRIVATE_KEY }}" \ + -auto-approve + + - name: Save Terraform Outputs + working-directory: terraform + run: terraform output -json > ../outputs/terraform_outputs.json + + - name: Upload Outputs + uses: actions/upload-artifact@v4 + with: + name: terraform-outputs + path: outputs/terraform_outputs.json diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..89c9ea6 --- /dev/null +++ b/.gitignore @@ -0,0 +1,38 @@ +# Terraform +*.tfstate +*.tfstate.* +*.tfstate.backup +.terraform/ +.terraform.lock.hcl +terraform.tfvars +crash.log +override.tf +override.tf.json +*_override.tf +*_override.tf.json + +# Ansible +ansible/inventory.ini +ansible/*.retry +.ansible/ + +# Outputs +outputs/ +*.pem +*.key + +# IDE +.idea/ +.vscode/ +*.swp +*.swo +*~ + +# OS +.DS_Store +Thumbs.db + +# Secrets +*.tfvars +secrets/ +.env diff --git a/README.md b/README.md index e69de29..72f7dd7 100644 --- a/README.md +++ b/README.md @@ -0,0 +1,265 @@ +# Hetzner Kubernetes Cluster + +Production-ready Kubernetes cluster on Hetzner Cloud using Terraform and Ansible. + +## Architecture + +| Component | Details | +|-----------|---------| +| **Control Plane** | 3x CX23 (HA) | +| **Workers** | 4x CX33 | +| **Total Cost** | €28.93/mo | +| **K8s** | k3s (latest, HA) | +| **Addons** | Hetzner CCM (load balancers) | +| **Bootstrap** | Terraform + Ansible | + +### Cluster Resources +- 22 vCPU total (6 CP + 16 workers) +- 44 GB RAM total (12 CP + 32 workers) +- 440 GB SSD storage +- 140 TB bandwidth allocation + +## Prerequisites + +### 1. Hetzner Cloud API Token + +1. Go to [Hetzner Cloud Console](https://console.hetzner.com/) +2. Select your project (or create a new one) +3. Navigate to **Security** → **API Tokens** +4. Click **Generate API Token** +5. Set description: `k8s-cluster-terraform` +6. Select permissions: **Read & Write** +7. Click **Generate API Token** +8. **Copy the token immediately** - it won't be shown again! + +### 2. Backblaze B2 Bucket (for Terraform State) + +1. Go to [Backblaze B2](https://secure.backblaze.com/b2_buckets.htm) +2. Click **Create a Bucket** +3. Set bucket name: `k8s-terraform-state` (must be globally unique) +4. Choose **Private** access +5. Click **Create Bucket** +6. Create application key: + - Go to **App Keys** → **Add a New Application Key** + - Name: `terraform-state` + - Allow access to: `k8s-terraform-state` bucket only + - Type: **Read and Write** + - Copy **keyID** (access key) and **applicationKey** (secret key) +7. Note your bucket's S3 endpoint (e.g., `https://s3.eu-central-003.backblazeb2.com`) + +### 3. SSH Key Pair + +```bash +ssh-keygen -t ed25519 -C "k8s@hetzner" -f ~/.ssh/hetzner_k8s +``` + +### 4. Local Tools + +- [Terraform](https://terraform.io/downloads) >= 1.0 +- [Ansible](https://docs.ansible.com/ansible/latest/installation_guide/intro_installation.html) >= 2.9 +- Python 3 with `jinja2` and `pyyaml` + +## Setup + +### 1. Clone Repository + +```bash +git clone /HetznerTerra.git +cd HetznerTerra +``` + +### 2. Configure Variables + +```bash +cp terraform.tfvars.example terraform.tfvars +``` + +Edit `terraform.tfvars`: + +```hcl +hcloud_token = "your-hetzner-api-token" + +ssh_public_key = "~/.ssh/hetzner_k8s.pub" +ssh_private_key = "~/.ssh/hetzner_k8s" + +s3_access_key = "your-backblaze-key-id" +s3_secret_key = "your-backblaze-application-key" +s3_endpoint = "https://s3.eu-central-003.backblazeb2.com" +s3_bucket = "k8s-terraform-state" + +allowed_ssh_ips = ["your.ip.address/32"] +``` + +### 3. Initialize Terraform + +```bash +cd terraform + +# Create backend config file (or use CLI args) +cat > backend.hcl << EOF +endpoint = "https://s3.eu-central-003.backblazeb2.com" +bucket = "k8s-terraform-state" +access_key = "your-backblaze-key-id" +secret_key = "your-backblaze-application-key" +EOF + +terraform init -backend-config=backend.hcl +``` + +### 4. Plan and Apply + +```bash +terraform plan -var-file=../terraform.tfvars +terraform apply -var-file=../terraform.tfvars +``` + +### 5. Generate Ansible Inventory + +```bash +cd ../ansible +python3 generate_inventory.py +``` + +### 6. Bootstrap Cluster + +```bash +ansible-playbook site.yml +``` + +### 7. Get Kubeconfig + +```bash +export KUBECONFIG=$(pwd)/outputs/kubeconfig +kubectl get nodes +``` + +## Gitea CI/CD + +This repository includes Gitea workflows for: + +- **terraform-plan**: Runs on PRs, shows planned changes +- **terraform-apply**: Runs on main branch after merge +- **ansible-deploy**: Runs after terraform apply + +### Required Gitea Secrets + +Set these in your Gitea repository settings (**Settings** → **Secrets** → **Actions**): + +| Secret | Description | +|--------|-------------| +| `HCLOUD_TOKEN` | Hetzner Cloud API token | +| `S3_ACCESS_KEY` | Backblaze B2 keyID | +| `S3_SECRET_KEY` | Backblaze B2 applicationKey | +| `S3_ENDPOINT` | Backblaze S3 endpoint (e.g., `https://s3.eu-central-003.backblazeb2.com`) | +| `S3_BUCKET` | S3 bucket name (e.g., `k8s-terraform-state`) | +| `SSH_PUBLIC_KEY` | SSH public key content | +| `SSH_PRIVATE_KEY` | SSH private key content | + +## File Structure + +``` +. +├── terraform/ +│ ├── main.tf +│ ├── variables.tf +│ ├── network.tf +│ ├── firewall.tf +│ ├── ssh.tf +│ ├── servers.tf +│ ├── outputs.tf +│ └── backend.tf +├── ansible/ +│ ├── inventory.tmpl +│ ├── generate_inventory.py +│ ├── site.yml +│ ├── roles/ +│ │ ├── common/ +│ │ ├── k3s-server/ +│ │ ├── k3s-agent/ +│ │ └── ccm/ +│ └── ansible.cfg +├── .gitea/ +│ └── workflows/ +│ ├── terraform.yml +│ └── ansible.yml +├── outputs/ +├── terraform.tfvars.example +└── README.md +``` + +## Firewall Rules + +| Port | Source | Purpose | +|------|--------|---------| +| 22 | Any | SSH | +| 6443 | Configured IPs + internal | Kubernetes API | +| 9345 | 10.0.0.0/16 | k3s Supervisor (HA join) | +| 2379 | 10.0.0.0/16 | etcd Client | +| 2380 | 10.0.0.0/16 | etcd Peer | +| 8472 | 10.0.0.0/16 | Flannel VXLAN | +| 10250 | 10.0.0.0/16 | Kubelet | +| 30000-32767 | Any | NodePorts | + +## Operations + +### Scale Workers + +Edit `terraform.tfvars`: + +```hcl +worker_count = 5 +``` + +Then: + +```bash +terraform apply +ansible-playbook site.yml +``` + +### Upgrade k3s + +```bash +ansible-playbook site.yml -t upgrade +``` + +### Destroy Cluster + +```bash +terraform destroy +``` + +## Troubleshooting + +### Check k3s Logs + +```bash +ssh root@ journalctl -u k3s -f +``` + +### Reset k3s + +```bash +ansible-playbook site.yml -t reset +``` + +## Costs Breakdown + +| Resource | Quantity | Unit Price | Monthly | +|----------|----------|------------|---------| +| CX23 (Control Plane) | 3 | €2.99 | €8.97 | +| CX33 (Workers) | 4 | €4.99 | €19.96 | +| Backblaze B2 | ~1 GB | Free (first 10GB) | €0.00 | +| **Total** | | | **€28.93/mo** | + +## Security Notes + +- Control plane has HA (3 nodes, can survive 1 failure) +- Consider adding Hetzner load balancer for API server +- Rotate API tokens regularly +- Use network policies in Kubernetes +- Enable audit logging for production + +## License + +MIT diff --git a/ansible/ansible.cfg b/ansible/ansible.cfg new file mode 100644 index 0000000..ed5cfba --- /dev/null +++ b/ansible/ansible.cfg @@ -0,0 +1,14 @@ +[defaults] +inventory = inventory.ini +host_key_checking = False +private_key_file = {{ private_key_file }} +retry_files_enabled = False +roles_path = roles +stdout_callback = yaml +interpreter_python = auto_silent + +[privilege_escalation] +become = True +become_method = sudo +become_user = root +become_ask_pass = False diff --git a/ansible/generate_inventory.py b/ansible/generate_inventory.py new file mode 100644 index 0000000..ffdb382 --- /dev/null +++ b/ansible/generate_inventory.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python3 + +import json +import os +import subprocess +import sys +from pathlib import Path + +from jinja2 import Environment, FileSystemLoader + + +def get_terraform_outputs(): + result = subprocess.run( + ["terraform", "output", "-json"], + cwd="../terraform", + capture_output=True, + text=True, + ) + + if result.returncode != 0: + print(f"Error running terraform output: {result.stderr}") + sys.exit(1) + + return json.loads(result.stdout) + + +def main(): + outputs = get_terraform_outputs() + + data = { + "control_plane_ips": outputs["control_plane_ips"]["value"], + "worker_ips": outputs["worker_ips"]["value"], + "private_key_file": outputs["ssh_private_key_path"]["value"], + } + + env = Environment(loader=FileSystemLoader(".")) + template = env.get_template("inventory.tmpl") + inventory = template.render(**data) + + Path("inventory.ini").write_text(inventory) + print("Generated inventory.ini") + + +if __name__ == "__main__": + main() diff --git a/ansible/inventory.tmpl b/ansible/inventory.tmpl new file mode 100644 index 0000000..8e2bf15 --- /dev/null +++ b/ansible/inventory.tmpl @@ -0,0 +1,18 @@ +[control_plane] +{% for ip in control_plane_ips %} +{{ ip }} +{% endfor %} + +[workers] +{% for ip in worker_ips %} +{{ ip }} +{% endfor %} + +[cluster:children] +control_plane +workers + +[cluster:vars] +ansible_user=root +ansible_python_interpreter=/usr/bin/python3 +k3s_version=latest diff --git a/ansible/requirements.yml b/ansible/requirements.yml new file mode 100644 index 0000000..11cfdb6 --- /dev/null +++ b/ansible/requirements.yml @@ -0,0 +1,5 @@ +collections: + - name: kubernetes.core + version: ">=2.4.0" + - name: community.general + version: ">=8.0.0" diff --git a/ansible/roles/ccm/defaults/main.yml b/ansible/roles/ccm/defaults/main.yml new file mode 100644 index 0000000..af33d94 --- /dev/null +++ b/ansible/roles/ccm/defaults/main.yml @@ -0,0 +1,3 @@ +--- +hcloud_token: "" +cluster_name: "k8s-cluster" diff --git a/ansible/roles/ccm/tasks/main.yml b/ansible/roles/ccm/tasks/main.yml new file mode 100644 index 0000000..63b4ca8 --- /dev/null +++ b/ansible/roles/ccm/tasks/main.yml @@ -0,0 +1,40 @@ +--- +- name: Check if Hetzner CCM is already deployed + command: kubectl get namespace hetzner-cloud-system + register: ccm_namespace + failed_when: false + changed_when: false + +- name: Create Hetzner CCM namespace + command: kubectl create namespace hetzner-cloud-system + when: ccm_namespace.rc != 0 + changed_when: true + +- name: Create Hetzner cloud secret + kubernetes.core.k8s: + state: present + definition: + apiVersion: v1 + kind: Secret + metadata: + name: hcloud + namespace: hetzner-cloud-system + stringData: + token: "{{ hcloud_token }}" + network: "{{ cluster_name }}-network" + no_log: true + when: hcloud_token is defined + +- name: Deploy Hetzner CCM + kubernetes.core.k8s: + state: present + src: "{{ item }}" + loop: + - https://raw.githubusercontent.com/hetznercloud/hcloud-cloud-controller-manager/main/deploy/ccm-networks.yaml + when: ccm_namespace.rc != 0 + +- name: Wait for CCM pods to be ready + command: kubectl rollout status deployment/hcloud-cloud-controller-manager -n hetzner-cloud-system + changed_when: false + retries: 30 + delay: 10 diff --git a/ansible/roles/common/defaults/main.yml b/ansible/roles/common/defaults/main.yml new file mode 100644 index 0000000..79c3ef4 --- /dev/null +++ b/ansible/roles/common/defaults/main.yml @@ -0,0 +1,2 @@ +--- +common_upgrade_packages: false diff --git a/ansible/roles/common/tasks/main.yml b/ansible/roles/common/tasks/main.yml new file mode 100644 index 0000000..b5067b2 --- /dev/null +++ b/ansible/roles/common/tasks/main.yml @@ -0,0 +1,58 @@ +--- +- name: Update apt cache + apt: + update_cache: true + cache_valid_time: 3600 + +- name: Upgrade packages + apt: + upgrade: dist + when: common_upgrade_packages | default(false) + +- name: Install required packages + apt: + name: + - apt-transport-https + - ca-certificates + - curl + - gnupg + - lsb-release + - software-properties-common + - jq + - htop + - vim + state: present + +- name: Disable swap + command: swapoff -a + changed_when: true + +- name: Remove swap from fstab + mount: + name: swap + fstype: swap + state: absent + +- name: Load br_netfilter module + modprobe: + name: br_netfilter + state: present + +- name: Persist br_netfilter module + copy: + dest: /etc/modules-load.d/k8s.conf + content: | + br_netfilter + overlay + mode: "0644" + +- name: Configure sysctl for Kubernetes + sysctl: + name: "{{ item.name }}" + value: "{{ item.value }}" + state: present + reload: true + loop: + - { name: net.bridge.bridge-nf-call-iptables, value: 1 } + - { name: net.bridge.bridge-nf-call-ip6tables, value: 1 } + - { name: net.ipv4.ip_forward, value: 1 } diff --git a/ansible/roles/k3s-agent/defaults/main.yml b/ansible/roles/k3s-agent/defaults/main.yml new file mode 100644 index 0000000..24b07d2 --- /dev/null +++ b/ansible/roles/k3s-agent/defaults/main.yml @@ -0,0 +1,4 @@ +--- +k3s_version: latest +k3s_server_url: "" +k3s_token: "" diff --git a/ansible/roles/k3s-agent/tasks/main.yml b/ansible/roles/k3s-agent/tasks/main.yml new file mode 100644 index 0000000..0171cf4 --- /dev/null +++ b/ansible/roles/k3s-agent/tasks/main.yml @@ -0,0 +1,30 @@ +--- +- name: Check if k3s agent is already installed + stat: + path: /usr/local/bin/k3s-agent + register: k3s_agent_binary + +- name: Download k3s install script + get_url: + url: https://get.k3s.io + dest: /tmp/install-k3s.sh + mode: "0755" + when: not k3s_agent_binary.stat.exists + +- name: Install k3s agent + environment: + INSTALL_K3S_VERSION: "{{ k3s_version if k3s_version != 'latest' else '' }}" + K3S_URL: "{{ k3s_server_url }}" + K3S_TOKEN: "{{ k3s_token }}" + command: /tmp/install-k3s.sh agent + args: + creates: /usr/local/bin/k3s-agent + when: not k3s_agent_binary.stat.exists + +- name: Wait for k3s agent to be ready + command: systemctl is-active k3s-agent + register: agent_status + until: agent_status.stdout == "active" + retries: 30 + delay: 10 + changed_when: false diff --git a/ansible/roles/k3s-server/defaults/main.yml b/ansible/roles/k3s-server/defaults/main.yml new file mode 100644 index 0000000..8df928c --- /dev/null +++ b/ansible/roles/k3s-server/defaults/main.yml @@ -0,0 +1,3 @@ +--- +k3s_version: latest +k3s_token: "" diff --git a/ansible/roles/k3s-server/tasks/main.yml b/ansible/roles/k3s-server/tasks/main.yml new file mode 100644 index 0000000..762c714 --- /dev/null +++ b/ansible/roles/k3s-server/tasks/main.yml @@ -0,0 +1,56 @@ +--- +- name: Check if k3s is already installed + stat: + path: /usr/local/bin/k3s + register: k3s_binary + +- name: Download k3s install script + get_url: + url: https://get.k3s.io + dest: /tmp/install-k3s.sh + mode: "0755" + when: not k3s_binary.stat.exists + +- name: Install k3s server (primary) + environment: + INSTALL_K3S_VERSION: "{{ k3s_version if k3s_version != 'latest' else '' }}" + K3S_TOKEN: "{{ k3s_token }}" + command: /tmp/install-k3s.sh server --cluster-init + args: + creates: /usr/local/bin/k3s + when: + - not k3s_binary.stat.exists + - k3s_primary | default(false) + +- name: Install k3s server (secondary) + environment: + INSTALL_K3S_VERSION: "{{ k3s_version if k3s_version != 'latest' else '' }}" + K3S_TOKEN: "{{ k3s_token }}" + command: /tmp/install-k3s.sh server --server https://{{ k3s_primary_ip }}:6443 + args: + creates: /usr/local/bin/k3s + when: + - not k3s_binary.stat.exists + - not (k3s_primary | default(false)) + +- name: Wait for k3s to be ready + command: kubectl get nodes + register: k3s_ready + until: k3s_ready.rc == 0 + retries: 30 + delay: 10 + changed_when: false + +- name: Copy kubeconfig to default location for root + file: + src: /etc/rancher/k3s/k3s.yaml + dest: /root/.kube/config + state: link + force: true + +- name: Ensure .kube directory exists for ansible user + file: + path: "/home/{{ ansible_user }}/.kube" + state: directory + mode: "0755" + when: ansible_user != 'root' diff --git a/ansible/site.yml b/ansible/site.yml new file mode 100644 index 0000000..5a077ec --- /dev/null +++ b/ansible/site.yml @@ -0,0 +1,94 @@ +--- +- name: Bootstrap Kubernetes cluster + hosts: cluster + become: true + gather_facts: true + + pre_tasks: + - name: Wait for SSH + wait_for_connection: + delay: 10 + timeout: 300 + + roles: + - common + +- name: Setup primary control plane + hosts: control_plane[0] + become: true + + vars: + k3s_primary: true + k3s_token: "{{ lookup('password', '/dev/null length=32 chars=ascii_letters,digits') }}" + + roles: + - k3s-server + +- name: Get join info from primary + hosts: control_plane[0] + become: true + tasks: + - name: Fetch node token + command: cat /var/lib/rancher/k3s/server/node-token + register: node_token + changed_when: false + + - name: Set join token fact + set_fact: + k3s_token: "{{ node_token.stdout }}" + k3s_primary_ip: "{{ ansible_default_ipv4.address }}" + + - name: Fetch kubeconfig + fetch: + src: /etc/rancher/k3s/k3s.yaml + dest: ../outputs/kubeconfig + flat: true + +- name: Setup secondary control planes + hosts: control_plane[1:] + become: true + + vars: + k3s_primary: false + k3s_token: "{{ hostvars[groups['control_plane'][0]]['k3s_token'] }}" + k3s_primary_ip: "{{ hostvars[groups['control_plane'][0]]['ansible_default_ipv4']['address'] }}" + + roles: + - k3s-server + +- name: Setup workers + hosts: workers + become: true + + vars: + k3s_token: "{{ hostvars[groups['control_plane'][0]]['k3s_token'] }}" + k3s_server_url: "https://{{ hostvars[groups['control_plane'][0]]['ansible_default_ipv4']['address'] }}:6443" + + roles: + - k3s-agent + +- name: Deploy Hetzner CCM + hosts: control_plane[0] + become: true + + roles: + - ccm + +- name: Finalize + hosts: localhost + connection: local + tasks: + - name: Update kubeconfig server address + command: | + sed -i 's/127.0.0.1/{{ hostvars[groups["control_plane"][0]]["ansible_default_ipv4"]["address"] }}/g' ../outputs/kubeconfig + changed_when: true + + - name: Display success message + debug: + msg: | + Cluster setup complete! + Control planes: {{ groups['control_plane'] | length }} + Workers: {{ groups['workers'] | length }} + To access the cluster: + export KUBECONFIG={{ playbook_dir }}/../outputs/kubeconfig + kubectl get nodes diff --git a/terraform.tfvars.example b/terraform.tfvars.example new file mode 100644 index 0000000..ca9a579 --- /dev/null +++ b/terraform.tfvars.example @@ -0,0 +1,23 @@ +hcloud_token = "your-hetzner-cloud-api-token-here" + +ssh_public_key = "~/.ssh/hetzner_k8s.pub" +ssh_private_key = "~/.ssh/hetzner_k8s" + +s3_access_key = "your-backblaze-key-id" +s3_secret_key = "your-backblaze-application-key" +s3_endpoint = "https://s3.eu-central-003.backblazeb2.com" +s3_bucket = "k8s-terraform-state" + +cluster_name = "k8s-prod" + +control_plane_count = 3 +control_plane_type = "cx23" + +worker_count = 4 +worker_type = "cx33" + +location = "fsn1" + +allowed_ssh_ips = ["0.0.0.0/0"] + +allowed_api_ips = ["0.0.0.0/0"] diff --git a/terraform/backend.tf b/terraform/backend.tf new file mode 100644 index 0000000..df54d30 --- /dev/null +++ b/terraform/backend.tf @@ -0,0 +1,10 @@ +terraform { + backend "s3" { + key = "terraform.tfstate" + region = "auto" + skip_credentials_validation = true + skip_metadata_api_check = true + skip_region_validation = true + force_path_style = true + } +} diff --git a/terraform/firewall.tf b/terraform/firewall.tf new file mode 100644 index 0000000..aee04c2 --- /dev/null +++ b/terraform/firewall.tf @@ -0,0 +1,86 @@ +resource "hcloud_firewall" "cluster" { + name = "${var.cluster_name}-firewall" + + rule { + description = "SSH" + direction = "in" + protocol = "tcp" + port = "22" + source_ips = var.allowed_ssh_ips + } + + rule { + description = "Kubernetes API" + direction = "in" + protocol = "tcp" + port = "6443" + source_ips = var.allowed_api_ips + } + + rule { + description = "Kubernetes API (internal)" + direction = "in" + protocol = "tcp" + port = "6443" + source_ips = [var.subnet_cidr] + } + + rule { + description = "k3s Supervisor" + direction = "in" + protocol = "tcp" + port = "9345" + source_ips = [var.subnet_cidr] + } + + rule { + description = "etcd Client" + direction = "in" + protocol = "tcp" + port = "2379" + source_ips = [var.subnet_cidr] + } + + rule { + description = "etcd Peer" + direction = "in" + protocol = "tcp" + port = "2380" + source_ips = [var.subnet_cidr] + } + + rule { + description = "Flannel VXLAN" + direction = "in" + protocol = "udp" + port = "8472" + source_ips = [var.subnet_cidr] + } + + rule { + description = "Kubelet" + direction = "in" + protocol = "tcp" + port = "10250" + source_ips = [var.subnet_cidr] + } + + rule { + description = "NodePorts" + direction = "in" + protocol = "tcp" + port = "30000-32767" + source_ips = ["0.0.0.0/0"] + } + + rule { + description = "ICMP" + direction = "in" + protocol = "icmp" + source_ips = ["0.0.0.0/0"] + } + + apply_to { + label_selector = "cluster=${var.cluster_name}" + } +} diff --git a/terraform/main.tf b/terraform/main.tf new file mode 100644 index 0000000..f3c7352 --- /dev/null +++ b/terraform/main.tf @@ -0,0 +1,14 @@ +terraform { + required_version = ">= 1.0" + + required_providers { + hcloud = { + source = "hetznercloud/hcloud" + version = "~> 1.45" + } + } +} + +provider "hcloud" { + token = var.hcloud_token +} diff --git a/terraform/network.tf b/terraform/network.tf new file mode 100644 index 0000000..08e2e9a --- /dev/null +++ b/terraform/network.tf @@ -0,0 +1,11 @@ +resource "hcloud_network" "cluster" { + name = "${var.cluster_name}-network" + ip_range = var.network_cidr +} + +resource "hcloud_network_subnet" "servers" { + network_id = hcloud_network.cluster.id + type = "cloud" + network_zone = "${var.location}-network" + ip_range = var.subnet_cidr +} diff --git a/terraform/outputs.tf b/terraform/outputs.tf new file mode 100644 index 0000000..1ba4221 --- /dev/null +++ b/terraform/outputs.tf @@ -0,0 +1,44 @@ +output "control_plane_ips" { + description = "Public IPs of control plane nodes" + value = [for cp in hcloud_server.control_plane : cp.ipv4_address] +} + +output "control_plane_private_ips" { + description = "Private IPs of control plane nodes" + value = [for cp in hcloud_server.control_plane : cp.network[0].ip] +} + +output "primary_control_plane_ip" { + description = "Public IP of the primary control plane (first node)" + value = hcloud_server.control_plane[0].ipv4_address +} + +output "worker_ips" { + description = "Public IPs of worker nodes" + value = [for worker in hcloud_server.workers : worker.ipv4_address] +} + +output "worker_private_ips" { + description = "Private IPs of worker nodes" + value = [for worker in hcloud_server.workers : worker.network[0].ip] +} + +output "ssh_private_key_path" { + description = "Path to SSH private key" + value = var.ssh_private_key +} + +output "cluster_name" { + description = "Cluster name" + value = var.cluster_name +} + +output "network_cidr" { + description = "Private network CIDR" + value = var.subnet_cidr +} + +output "kubeconfig_command" { + description = "Command to fetch kubeconfig" + value = "ssh root@${hcloud_server.control_plane[0].ipv4_address} 'cat /etc/rancher/k3s/k3s.yaml' > kubeconfig && sed -i 's/127.0.0.1/${hcloud_server.control_plane[0].ipv4_address}/g' kubeconfig" +} diff --git a/terraform/servers.tf b/terraform/servers.tf new file mode 100644 index 0000000..e4f5535 --- /dev/null +++ b/terraform/servers.tf @@ -0,0 +1,60 @@ +data "hcloud_image" "ubuntu" { + name = "ubuntu-24.04" + with_status = ["available"] +} + +resource "hcloud_server" "control_plane" { + count = var.control_plane_count + + name = "${var.cluster_name}-cp-${count.index + 1}" + server_type = var.control_plane_type + image = data.hcloud_image.ubuntu.id + location = var.location + ssh_keys = [hcloud_ssh_key.cluster.id] + + labels = { + cluster = var.cluster_name + role = "control-plane" + } + + network { + network_id = hcloud_network.cluster.id + ip = cidrhost(var.subnet_cidr, 10 + count.index) + } + + public_net { + ipv4_enabled = true + ipv6_enabled = true + } + + firewall_ids = [hcloud_firewall.cluster.id] +} + +resource "hcloud_server" "workers" { + count = var.worker_count + + name = "${var.cluster_name}-worker-${count.index + 1}" + server_type = var.worker_type + image = data.hcloud_image.ubuntu.id + location = var.location + ssh_keys = [hcloud_ssh_key.cluster.id] + + labels = { + cluster = var.cluster_name + role = "worker" + } + + network { + network_id = hcloud_network.cluster.id + ip = cidrhost(var.subnet_cidr, 20 + count.index) + } + + public_net { + ipv4_enabled = true + ipv6_enabled = true + } + + firewall_ids = [hcloud_firewall.cluster.id] + + depends_on = [hcloud_server.control_plane] +} diff --git a/terraform/ssh.tf b/terraform/ssh.tf new file mode 100644 index 0000000..7041784 --- /dev/null +++ b/terraform/ssh.tf @@ -0,0 +1,8 @@ +data "local_file" "ssh_public_key" { + filename = pathexpand(var.ssh_public_key) +} + +resource "hcloud_ssh_key" "cluster" { + name = "${var.cluster_name}-ssh-key" + public_key = data.local_file.ssh_public_key.content +} diff --git a/terraform/variables.tf b/terraform/variables.tf new file mode 100644 index 0000000..2761a56 --- /dev/null +++ b/terraform/variables.tf @@ -0,0 +1,100 @@ +variable "hcloud_token" { + description = "Hetzner Cloud API token" + type = string + sensitive = true +} + +variable "ssh_public_key" { + description = "Path to SSH public key" + type = string + default = "~/.ssh/id_ed25519.pub" +} + +variable "ssh_private_key" { + description = "Path to SSH private key" + type = string + default = "~/.ssh/id_ed25519" +} + +variable "cluster_name" { + description = "Name of the Kubernetes cluster" + type = string + default = "k8s-cluster" +} + +variable "control_plane_count" { + description = "Number of control plane nodes" + type = number + default = 3 +} + +variable "control_plane_type" { + description = "Hetzner server type for control plane" + type = string + default = "cx23" +} + +variable "worker_count" { + description = "Number of worker nodes" + type = number + default = 4 +} + +variable "worker_type" { + description = "Hetzner server type for workers" + type = string + default = "cx33" +} + +variable "location" { + description = "Hetzner datacenter location" + type = string + default = "fsn1" +} + +variable "allowed_ssh_ips" { + description = "IP ranges allowed for SSH access" + type = list(string) + default = ["0.0.0.0/0"] +} + +variable "allowed_api_ips" { + description = "IP ranges allowed for Kubernetes API access" + type = list(string) + default = ["0.0.0.0/0"] +} + +variable "network_cidr" { + description = "CIDR for private network" + type = string + default = "10.0.0.0/16" +} + +variable "subnet_cidr" { + description = "CIDR for server subnet" + type = string + default = "10.0.1.0/24" +} + +variable "s3_access_key" { + description = "S3 access key for Terraform state" + type = string + sensitive = true +} + +variable "s3_secret_key" { + description = "S3 secret key for Terraform state" + type = string + sensitive = true +} + +variable "s3_endpoint" { + description = "S3 endpoint URL" + type = string +} + +variable "s3_bucket" { + description = "S3 bucket name for Terraform state" + type = string + default = "k8s-terraform-state" +}