feat: deploy lightweight observability stack via Ansible
This commit is contained in:
@@ -226,6 +226,7 @@ jobs:
|
|||||||
-e "hcloud_token=${{ secrets.HCLOUD_TOKEN }}" \
|
-e "hcloud_token=${{ secrets.HCLOUD_TOKEN }}" \
|
||||||
-e "tailscale_auth_key=${{ secrets.TAILSCALE_AUTH_KEY }}" \
|
-e "tailscale_auth_key=${{ secrets.TAILSCALE_AUTH_KEY }}" \
|
||||||
-e "tailscale_tailnet=${{ secrets.TAILSCALE_TAILNET }}" \
|
-e "tailscale_tailnet=${{ secrets.TAILSCALE_TAILNET }}" \
|
||||||
|
-e "grafana_admin_password=${{ secrets.GRAFANA_ADMIN_PASSWORD }}" \
|
||||||
-e "cluster_name=k8s-cluster"
|
-e "cluster_name=k8s-cluster"
|
||||||
env:
|
env:
|
||||||
ANSIBLE_HOST_KEY_CHECKING: "False"
|
ANSIBLE_HOST_KEY_CHECKING: "False"
|
||||||
@@ -236,6 +237,8 @@ jobs:
|
|||||||
ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl get nodes -o wide"
|
ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl get nodes -o wide"
|
||||||
ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl -n kube-system get pods -o wide"
|
ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl -n kube-system get pods -o wide"
|
||||||
ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl get storageclass"
|
ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl get storageclass"
|
||||||
|
ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl -n observability get pods -o wide"
|
||||||
|
ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl -n observability get pvc"
|
||||||
env:
|
env:
|
||||||
ANSIBLE_HOST_KEY_CHECKING: "False"
|
ANSIBLE_HOST_KEY_CHECKING: "False"
|
||||||
|
|
||||||
|
|||||||
35
README.md
35
README.md
@@ -10,7 +10,7 @@ Production-ready Kubernetes cluster on Hetzner Cloud using Terraform and Ansible
|
|||||||
| **Workers** | 4x CX33 |
|
| **Workers** | 4x CX33 |
|
||||||
| **Total Cost** | €28.93/mo |
|
| **Total Cost** | €28.93/mo |
|
||||||
| **K8s** | k3s (latest, HA) |
|
| **K8s** | k3s (latest, HA) |
|
||||||
| **Addons** | Hetzner CCM + CSI |
|
| **Addons** | Hetzner CCM + CSI + Prometheus + Grafana + Loki |
|
||||||
| **Access** | SSH/API restricted to Tailnet |
|
| **Access** | SSH/API restricted to Tailnet |
|
||||||
| **Bootstrap** | Terraform + Ansible |
|
| **Bootstrap** | Terraform + Ansible |
|
||||||
|
|
||||||
@@ -166,10 +166,40 @@ Set these in your Gitea repository settings (**Settings** → **Secrets** → **
|
|||||||
| `S3_BUCKET` | S3 bucket name (e.g., `k8s-terraform-state`) |
|
| `S3_BUCKET` | S3 bucket name (e.g., `k8s-terraform-state`) |
|
||||||
| `TAILSCALE_AUTH_KEY` | Tailscale auth key for node bootstrap |
|
| `TAILSCALE_AUTH_KEY` | Tailscale auth key for node bootstrap |
|
||||||
| `TAILSCALE_TAILNET` | Tailnet domain (e.g., `yourtailnet.ts.net`) |
|
| `TAILSCALE_TAILNET` | Tailnet domain (e.g., `yourtailnet.ts.net`) |
|
||||||
|
| `GRAFANA_ADMIN_PASSWORD` | Optional admin password for Grafana (auto-generated if unset) |
|
||||||
| `RUNNER_ALLOWED_CIDRS` | Optional CIDR list for CI runner access if you choose to pass it via tfvars/secrets |
|
| `RUNNER_ALLOWED_CIDRS` | Optional CIDR list for CI runner access if you choose to pass it via tfvars/secrets |
|
||||||
| `SSH_PUBLIC_KEY` | SSH public key content |
|
| `SSH_PUBLIC_KEY` | SSH public key content |
|
||||||
| `SSH_PRIVATE_KEY` | SSH private key content |
|
| `SSH_PRIVATE_KEY` | SSH private key content |
|
||||||
|
|
||||||
|
## Observability Stack
|
||||||
|
|
||||||
|
The Ansible playbook deploys a lightweight observability stack in the `observability` namespace:
|
||||||
|
|
||||||
|
- `kube-prometheus-stack` (Prometheus + Grafana)
|
||||||
|
- `loki`
|
||||||
|
- `promtail`
|
||||||
|
|
||||||
|
Services are kept internal for tailnet-first access.
|
||||||
|
|
||||||
|
### Access Grafana and Prometheus
|
||||||
|
|
||||||
|
Run from a tailnet-connected machine:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export KUBECONFIG=$(pwd)/outputs/kubeconfig
|
||||||
|
|
||||||
|
kubectl -n observability port-forward svc/kube-prometheus-stack-grafana 3000:80
|
||||||
|
kubectl -n observability port-forward svc/kube-prometheus-stack-prometheus 9090:9090
|
||||||
|
```
|
||||||
|
|
||||||
|
Then open:
|
||||||
|
|
||||||
|
- Grafana: http://127.0.0.1:3000
|
||||||
|
- Prometheus: http://127.0.0.1:9090
|
||||||
|
|
||||||
|
Grafana user: `admin`
|
||||||
|
Grafana password: value of `GRAFANA_ADMIN_PASSWORD` secret (or the generated value shown by Ansible output)
|
||||||
|
|
||||||
## File Structure
|
## File Structure
|
||||||
|
|
||||||
```
|
```
|
||||||
@@ -192,7 +222,8 @@ Set these in your Gitea repository settings (**Settings** → **Secrets** → **
|
|||||||
│ │ ├── k3s-server/
|
│ │ ├── k3s-server/
|
||||||
│ │ ├── k3s-agent/
|
│ │ ├── k3s-agent/
|
||||||
│ │ ├── ccm/
|
│ │ ├── ccm/
|
||||||
│ │ └── csi/
|
│ │ ├── csi/
|
||||||
|
│ │ └── observability/
|
||||||
│ └── ansible.cfg
|
│ └── ansible.cfg
|
||||||
├── .gitea/
|
├── .gitea/
|
||||||
│ └── workflows/
|
│ └── workflows/
|
||||||
|
|||||||
16
ansible/roles/observability/defaults/main.yml
Normal file
16
ansible/roles/observability/defaults/main.yml
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
---
|
||||||
|
observability_namespace: "observability"
|
||||||
|
|
||||||
|
prometheus_chart_version: "68.4.4"
|
||||||
|
loki_chart_version: "6.24.0"
|
||||||
|
promtail_chart_version: "6.16.6"
|
||||||
|
|
||||||
|
grafana_admin_password: ""
|
||||||
|
|
||||||
|
prometheus_storage_size: "10Gi"
|
||||||
|
grafana_storage_size: "5Gi"
|
||||||
|
loki_storage_size: "10Gi"
|
||||||
|
|
||||||
|
prometheus_storage_class: "local-path"
|
||||||
|
grafana_storage_class: "local-path"
|
||||||
|
loki_storage_class: "local-path"
|
||||||
166
ansible/roles/observability/tasks/main.yml
Normal file
166
ansible/roles/observability/tasks/main.yml
Normal file
@@ -0,0 +1,166 @@
|
|||||||
|
---
|
||||||
|
- name: Check if Helm is installed
|
||||||
|
command: helm version --short
|
||||||
|
register: helm_check
|
||||||
|
changed_when: false
|
||||||
|
failed_when: false
|
||||||
|
|
||||||
|
- name: Install Helm
|
||||||
|
shell: curl -fsSL https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
|
||||||
|
when: helm_check.rc != 0
|
||||||
|
changed_when: true
|
||||||
|
|
||||||
|
- name: Ensure observability namespace exists
|
||||||
|
command: kubectl create namespace {{ observability_namespace }}
|
||||||
|
register: create_observability_ns
|
||||||
|
failed_when: create_observability_ns.rc != 0 and "AlreadyExists" not in create_observability_ns.stderr
|
||||||
|
changed_when: create_observability_ns.rc == 0
|
||||||
|
|
||||||
|
- name: Set Grafana admin password
|
||||||
|
set_fact:
|
||||||
|
grafana_password_effective: "{{ grafana_admin_password if grafana_admin_password | length > 0 else lookup('password', '/dev/null length=32 chars=ascii_letters,digits') }}"
|
||||||
|
|
||||||
|
- name: Write kube-prometheus-stack values
|
||||||
|
copy:
|
||||||
|
dest: /tmp/kube-prometheus-stack-values.yaml
|
||||||
|
mode: "0644"
|
||||||
|
content: |
|
||||||
|
grafana:
|
||||||
|
enabled: true
|
||||||
|
adminPassword: {{ grafana_password_effective }}
|
||||||
|
persistence:
|
||||||
|
enabled: true
|
||||||
|
storageClassName: {{ grafana_storage_class }}
|
||||||
|
size: {{ grafana_storage_size }}
|
||||||
|
service:
|
||||||
|
type: ClusterIP
|
||||||
|
prometheus:
|
||||||
|
prometheusSpec:
|
||||||
|
retention: 7d
|
||||||
|
storageSpec:
|
||||||
|
volumeClaimTemplate:
|
||||||
|
spec:
|
||||||
|
storageClassName: {{ prometheus_storage_class }}
|
||||||
|
accessModes: ["ReadWriteOnce"]
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: {{ prometheus_storage_size }}
|
||||||
|
alertmanager:
|
||||||
|
enabled: false
|
||||||
|
kubeEtcd:
|
||||||
|
enabled: false
|
||||||
|
kubeControllerManager:
|
||||||
|
enabled: false
|
||||||
|
kubeScheduler:
|
||||||
|
enabled: false
|
||||||
|
|
||||||
|
- name: Add Prometheus Helm repo
|
||||||
|
command: helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
|
||||||
|
register: add_prom_repo
|
||||||
|
failed_when: add_prom_repo.rc != 0 and "already exists" not in add_prom_repo.stderr
|
||||||
|
changed_when: add_prom_repo.rc == 0
|
||||||
|
|
||||||
|
- name: Add Grafana Helm repo
|
||||||
|
command: helm repo add grafana https://grafana.github.io/helm-charts
|
||||||
|
register: add_grafana_repo
|
||||||
|
failed_when: add_grafana_repo.rc != 0 and "already exists" not in add_grafana_repo.stderr
|
||||||
|
changed_when: add_grafana_repo.rc == 0
|
||||||
|
|
||||||
|
- name: Update Helm repos
|
||||||
|
command: helm repo update
|
||||||
|
changed_when: false
|
||||||
|
|
||||||
|
- name: Install kube-prometheus-stack
|
||||||
|
command: >-
|
||||||
|
helm upgrade --install kube-prometheus-stack prometheus-community/kube-prometheus-stack
|
||||||
|
--namespace {{ observability_namespace }}
|
||||||
|
--version {{ prometheus_chart_version }}
|
||||||
|
--values /tmp/kube-prometheus-stack-values.yaml
|
||||||
|
--wait
|
||||||
|
--timeout 10m
|
||||||
|
changed_when: true
|
||||||
|
|
||||||
|
- name: Write Loki values
|
||||||
|
copy:
|
||||||
|
dest: /tmp/loki-values.yaml
|
||||||
|
mode: "0644"
|
||||||
|
content: |
|
||||||
|
loki:
|
||||||
|
auth_enabled: false
|
||||||
|
commonConfig:
|
||||||
|
replication_factor: 1
|
||||||
|
storage:
|
||||||
|
type: filesystem
|
||||||
|
singleBinary:
|
||||||
|
replicas: 1
|
||||||
|
persistence:
|
||||||
|
enabled: true
|
||||||
|
storageClass: {{ loki_storage_class }}
|
||||||
|
size: {{ loki_storage_size }}
|
||||||
|
test:
|
||||||
|
enabled: false
|
||||||
|
monitoring:
|
||||||
|
selfMonitoring:
|
||||||
|
enabled: false
|
||||||
|
lokiCanary:
|
||||||
|
enabled: false
|
||||||
|
|
||||||
|
- name: Install Loki
|
||||||
|
command: >-
|
||||||
|
helm upgrade --install loki grafana/loki
|
||||||
|
--namespace {{ observability_namespace }}
|
||||||
|
--version {{ loki_chart_version }}
|
||||||
|
--values /tmp/loki-values.yaml
|
||||||
|
--wait
|
||||||
|
--timeout 10m
|
||||||
|
changed_when: true
|
||||||
|
|
||||||
|
- name: Write Promtail values
|
||||||
|
copy:
|
||||||
|
dest: /tmp/promtail-values.yaml
|
||||||
|
mode: "0644"
|
||||||
|
content: |
|
||||||
|
config:
|
||||||
|
clients:
|
||||||
|
- url: http://loki-gateway.{{ observability_namespace }}.svc.cluster.local/loki/api/v1/push
|
||||||
|
|
||||||
|
- name: Install Promtail
|
||||||
|
command: >-
|
||||||
|
helm upgrade --install promtail grafana/promtail
|
||||||
|
--namespace {{ observability_namespace }}
|
||||||
|
--version {{ promtail_chart_version }}
|
||||||
|
--values /tmp/promtail-values.yaml
|
||||||
|
--wait
|
||||||
|
--timeout 10m
|
||||||
|
changed_when: true
|
||||||
|
|
||||||
|
- name: Create Grafana Loki datasource
|
||||||
|
command: kubectl apply -f -
|
||||||
|
args:
|
||||||
|
stdin: |
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: grafana-datasource-loki
|
||||||
|
namespace: {{ observability_namespace }}
|
||||||
|
labels:
|
||||||
|
grafana_datasource: "1"
|
||||||
|
data:
|
||||||
|
loki-datasource.yaml: |
|
||||||
|
apiVersion: 1
|
||||||
|
datasources:
|
||||||
|
- name: Loki
|
||||||
|
type: loki
|
||||||
|
access: proxy
|
||||||
|
url: http://loki-gateway.{{ observability_namespace }}.svc.cluster.local
|
||||||
|
isDefault: false
|
||||||
|
changed_when: true
|
||||||
|
|
||||||
|
- name: Show observability access details
|
||||||
|
debug:
|
||||||
|
msg: |
|
||||||
|
Observability stack deployed.
|
||||||
|
Namespace: {{ observability_namespace }}
|
||||||
|
Grafana (tailnet): kubectl -n {{ observability_namespace }} port-forward svc/kube-prometheus-stack-grafana 3000:80
|
||||||
|
Prometheus (tailnet): kubectl -n {{ observability_namespace }} port-forward svc/kube-prometheus-stack-prometheus 9090:9090
|
||||||
|
Grafana admin password: {{ grafana_password_effective }}
|
||||||
@@ -89,6 +89,13 @@
|
|||||||
roles:
|
roles:
|
||||||
- csi
|
- csi
|
||||||
|
|
||||||
|
- name: Deploy observability stack
|
||||||
|
hosts: control_plane[0]
|
||||||
|
become: true
|
||||||
|
|
||||||
|
roles:
|
||||||
|
- observability
|
||||||
|
|
||||||
- name: Finalize
|
- name: Finalize
|
||||||
hosts: localhost
|
hosts: localhost
|
||||||
connection: local
|
connection: local
|
||||||
|
|||||||
Reference in New Issue
Block a user