diff --git a/README.md b/README.md index 6401181..30e8f10 100644 --- a/README.md +++ b/README.md @@ -174,6 +174,47 @@ Set these in your Gitea repository settings (**Settings** → **Secrets** → ** | `SSH_PUBLIC_KEY` | SSH public key content | | `SSH_PRIVATE_KEY` | SSH private key content | +## GitOps (Flux) + +This repo now includes a Flux GitOps layout for phased migration from imperative Ansible applies to continuous reconciliation. + +### Repository layout + +- `clusters/prod/`: cluster entrypoint and Flux reconciliation objects +- `clusters/prod/flux-system/`: `GitRepository` source and top-level `Kustomization` graph +- `infrastructure/`: infrastructure addon reconciliation graph +- `infrastructure/addons/*`: per-addon manifests (observability + observability-content migrated) +- `apps/`: application workload layer (currently scaffolded) + +### Reconciliation graph + +- `infrastructure` (top-level) + - `addon-ccm` + - `addon-csi` depends on `addon-ccm` + - `addon-tailscale-operator` + - `addon-observability` + - `addon-observability-content` depends on `addon-observability` +- `apps` depends on `infrastructure` + +### Bootstrap notes + +1. Install Flux controllers in `flux-system`. +2. Create the Flux deploy key/secret named `flux-system` in `flux-system` namespace. +3. Apply `clusters/prod/flux-system/` once to establish source + reconciliation graph. +4. Unsuspend addon `Kustomization` objects one-by-one as each addon is migrated from Ansible. + +### Current migration status + +- `addon-observability-content` is now GitOps-managed from `infrastructure/addons/observability-content/`. +- `addon-observability` is now GitOps-managed from `infrastructure/addons/observability/` using Flux `HelmRelease` resources for: + - `kube-prometheus-stack` + - `loki` + - `promtail` +- Remaining addons stay suspended until migrated. +- During transition, avoid applying Grafana content from both Flux and Ansible at the same time. + +Ansible `site.yml` now skips `observability` and `observability-content` roles by default when `observability_gitops_enabled=true` (default). + ## Observability Stack The Ansible playbook deploys a lightweight observability stack in the `observability` namespace: @@ -182,7 +223,7 @@ The Ansible playbook deploys a lightweight observability stack in the `observabi - `loki` - `promtail` -Grafana content is managed as code via ConfigMaps in `ansible/roles/observability-content/`. +Grafana content is managed as code via ConfigMaps in `infrastructure/addons/observability-content/` (Flux), migrated from `ansible/roles/observability-content/`. Services are kept internal by default, with optional declarative Tailscale exposure when the Tailscale Kubernetes Operator is healthy. diff --git a/ansible/site.yml b/ansible/site.yml index 7a16a6d..efa0c26 100644 --- a/ansible/site.yml +++ b/ansible/site.yml @@ -99,6 +99,7 @@ - name: Deploy observability stack hosts: control_plane[0] become: true + when: not (observability_gitops_enabled | default(true) | bool) roles: - observability @@ -106,6 +107,7 @@ - name: Provision Grafana content hosts: control_plane[0] become: true + when: not (observability_gitops_enabled | default(true) | bool) roles: - observability-content diff --git a/apps/kustomization.yaml b/apps/kustomization.yaml new file mode 100644 index 0000000..b83b23e --- /dev/null +++ b/apps/kustomization.yaml @@ -0,0 +1,3 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: [] diff --git a/clusters/prod/flux-system/gitrepository-platform.yaml b/clusters/prod/flux-system/gitrepository-platform.yaml new file mode 100644 index 0000000..14ab5af --- /dev/null +++ b/clusters/prod/flux-system/gitrepository-platform.yaml @@ -0,0 +1,12 @@ +apiVersion: source.toolkit.fluxcd.io/v1 +kind: GitRepository +metadata: + name: platform + namespace: flux-system +spec: + interval: 1m + ref: + branch: main + url: ssh://git@tea.michaelfisher.tech/HomeInfra/HetznerTerra.git + secretRef: + name: flux-system diff --git a/clusters/prod/flux-system/kustomization-apps.yaml b/clusters/prod/flux-system/kustomization-apps.yaml new file mode 100644 index 0000000..ad14056 --- /dev/null +++ b/clusters/prod/flux-system/kustomization-apps.yaml @@ -0,0 +1,17 @@ +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: apps + namespace: flux-system +spec: + interval: 10m + prune: true + sourceRef: + kind: GitRepository + name: platform + path: ./apps + dependsOn: + - name: infrastructure + wait: true + timeout: 5m + suspend: true diff --git a/clusters/prod/flux-system/kustomization-infrastructure.yaml b/clusters/prod/flux-system/kustomization-infrastructure.yaml new file mode 100644 index 0000000..0aa0cf0 --- /dev/null +++ b/clusters/prod/flux-system/kustomization-infrastructure.yaml @@ -0,0 +1,14 @@ +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: infrastructure + namespace: flux-system +spec: + interval: 10m + prune: true + sourceRef: + kind: GitRepository + name: platform + path: ./infrastructure + wait: true + timeout: 5m diff --git a/clusters/prod/flux-system/kustomization.yaml b/clusters/prod/flux-system/kustomization.yaml new file mode 100644 index 0000000..656d1dd --- /dev/null +++ b/clusters/prod/flux-system/kustomization.yaml @@ -0,0 +1,6 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - gitrepository-platform.yaml + - kustomization-infrastructure.yaml + - kustomization-apps.yaml diff --git a/clusters/prod/kustomization.yaml b/clusters/prod/kustomization.yaml new file mode 100644 index 0000000..77d582a --- /dev/null +++ b/clusters/prod/kustomization.yaml @@ -0,0 +1,4 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - flux-system diff --git a/infrastructure/addons/ccm/kustomization.yaml b/infrastructure/addons/ccm/kustomization.yaml new file mode 100644 index 0000000..b83b23e --- /dev/null +++ b/infrastructure/addons/ccm/kustomization.yaml @@ -0,0 +1,3 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: [] diff --git a/infrastructure/addons/csi/kustomization.yaml b/infrastructure/addons/csi/kustomization.yaml new file mode 100644 index 0000000..b83b23e --- /dev/null +++ b/infrastructure/addons/csi/kustomization.yaml @@ -0,0 +1,3 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: [] diff --git a/infrastructure/addons/kustomization-ccm.yaml b/infrastructure/addons/kustomization-ccm.yaml new file mode 100644 index 0000000..9d20eee --- /dev/null +++ b/infrastructure/addons/kustomization-ccm.yaml @@ -0,0 +1,15 @@ +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: addon-ccm + namespace: flux-system +spec: + interval: 10m + prune: true + sourceRef: + kind: GitRepository + name: platform + path: ./infrastructure/addons/ccm + wait: true + timeout: 5m + suspend: true diff --git a/infrastructure/addons/kustomization-csi.yaml b/infrastructure/addons/kustomization-csi.yaml new file mode 100644 index 0000000..ed2d6f7 --- /dev/null +++ b/infrastructure/addons/kustomization-csi.yaml @@ -0,0 +1,17 @@ +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: addon-csi + namespace: flux-system +spec: + interval: 10m + prune: true + sourceRef: + kind: GitRepository + name: platform + path: ./infrastructure/addons/csi + dependsOn: + - name: addon-ccm + wait: true + timeout: 5m + suspend: true diff --git a/infrastructure/addons/kustomization-observability-content.yaml b/infrastructure/addons/kustomization-observability-content.yaml new file mode 100644 index 0000000..f9cb16a --- /dev/null +++ b/infrastructure/addons/kustomization-observability-content.yaml @@ -0,0 +1,17 @@ +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: addon-observability-content + namespace: flux-system +spec: + interval: 10m + prune: true + sourceRef: + kind: GitRepository + name: platform + path: ./infrastructure/addons/observability-content + dependsOn: + - name: addon-observability + wait: true + timeout: 5m + suspend: false diff --git a/infrastructure/addons/kustomization-observability.yaml b/infrastructure/addons/kustomization-observability.yaml new file mode 100644 index 0000000..17ce3da --- /dev/null +++ b/infrastructure/addons/kustomization-observability.yaml @@ -0,0 +1,15 @@ +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: addon-observability + namespace: flux-system +spec: + interval: 10m + prune: true + sourceRef: + kind: GitRepository + name: platform + path: ./infrastructure/addons/observability + wait: true + timeout: 5m + suspend: false diff --git a/infrastructure/addons/kustomization-tailscale-operator.yaml b/infrastructure/addons/kustomization-tailscale-operator.yaml new file mode 100644 index 0000000..4472e14 --- /dev/null +++ b/infrastructure/addons/kustomization-tailscale-operator.yaml @@ -0,0 +1,15 @@ +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: addon-tailscale-operator + namespace: flux-system +spec: + interval: 10m + prune: true + sourceRef: + kind: GitRepository + name: platform + path: ./infrastructure/addons/tailscale-operator + wait: true + timeout: 5m + suspend: true diff --git a/infrastructure/addons/kustomization.yaml b/infrastructure/addons/kustomization.yaml new file mode 100644 index 0000000..d14e849 --- /dev/null +++ b/infrastructure/addons/kustomization.yaml @@ -0,0 +1,8 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - kustomization-ccm.yaml + - kustomization-csi.yaml + - kustomization-tailscale-operator.yaml + - kustomization-observability.yaml + - kustomization-observability-content.yaml diff --git a/infrastructure/addons/observability-content/grafana-dashboard-k8s-overview-configmap.yaml b/infrastructure/addons/observability-content/grafana-dashboard-k8s-overview-configmap.yaml new file mode 100644 index 0000000..e7eb7e5 --- /dev/null +++ b/infrastructure/addons/observability-content/grafana-dashboard-k8s-overview-configmap.yaml @@ -0,0 +1,60 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: grafana-dashboard-k8s-overview + namespace: observability + labels: + grafana_dashboard: "1" +data: + k8s-overview.json: | + { + "annotations": {"list": []}, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [], + "panels": [ + { + "datasource": {"type": "prometheus", "uid": "${DS_PROMETHEUS}"}, + "fieldConfig": {"defaults": {"unit": "none"}, "overrides": []}, + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 0}, + "id": 1, + "options": {"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "textMode": "auto"}, + "targets": [ + { + "expr": "count(kube_node_status_condition{condition=\"Ready\",status=\"true\"})", + "legendFormat": "ready", + "refId": "A" + } + ], + "title": "Ready Nodes", + "type": "stat" + }, + { + "datasource": {"type": "prometheus", "uid": "${DS_PROMETHEUS}"}, + "fieldConfig": {"defaults": {"unit": "percentunit"}, "overrides": []}, + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 0}, + "id": 2, + "targets": [ + { + "expr": "1 - avg(rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))", + "legendFormat": "cpu", + "refId": "A" + } + ], + "title": "Cluster CPU Usage", + "type": "timeseries" + } + ], + "refresh": "30s", + "schemaVersion": 39, + "style": "dark", + "tags": ["kubernetes", "infrastructure"], + "templating": {"list": []}, + "time": {"from": "now-1h", "to": "now"}, + "timezone": "browser", + "title": "K8s Cluster Overview", + "uid": "k8s-cluster-overview", + "version": 1 + } diff --git a/infrastructure/addons/observability-content/grafana-datasources-core-configmap.yaml b/infrastructure/addons/observability-content/grafana-datasources-core-configmap.yaml new file mode 100644 index 0000000..92538fa --- /dev/null +++ b/infrastructure/addons/observability-content/grafana-datasources-core-configmap.yaml @@ -0,0 +1,16 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: grafana-datasources-core + namespace: observability + labels: + grafana_datasource: "1" +data: + datasources.yaml: | + apiVersion: 1 + datasources: + - name: Loki + type: loki + access: proxy + url: "http://loki.observability.svc.cluster.local:3100" + isDefault: false diff --git a/infrastructure/addons/observability-content/kustomization.yaml b/infrastructure/addons/observability-content/kustomization.yaml new file mode 100644 index 0000000..c9a6b15 --- /dev/null +++ b/infrastructure/addons/observability-content/kustomization.yaml @@ -0,0 +1,5 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - grafana-datasources-core-configmap.yaml + - grafana-dashboard-k8s-overview-configmap.yaml diff --git a/infrastructure/addons/observability/helmrelease-kube-prometheus-stack.yaml b/infrastructure/addons/observability/helmrelease-kube-prometheus-stack.yaml new file mode 100644 index 0000000..2adcb62 --- /dev/null +++ b/infrastructure/addons/observability/helmrelease-kube-prometheus-stack.yaml @@ -0,0 +1,63 @@ +apiVersion: helm.toolkit.fluxcd.io/v2 +kind: HelmRelease +metadata: + name: kube-prometheus-stack + namespace: flux-system +spec: + interval: 10m + targetNamespace: observability + chart: + spec: + chart: kube-prometheus-stack + version: 68.4.4 + sourceRef: + kind: HelmRepository + name: prometheus-community + namespace: flux-system + install: + createNamespace: true + remediation: + retries: 3 + upgrade: + remediation: + retries: 3 + values: + grafana: + enabled: true + persistence: + enabled: true + storageClassName: local-path + size: 5Gi + service: + type: ClusterIP + sidecar: + datasources: + enabled: true + label: grafana_datasource + searchNamespace: observability + dashboards: + enabled: true + label: grafana_dashboard + searchNamespace: observability + prometheus: + service: + type: ClusterIP + prometheusSpec: + retention: 7d + storageSpec: + volumeClaimTemplate: + spec: + storageClassName: local-path + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 10Gi + alertmanager: + enabled: false + kubeEtcd: + enabled: false + kubeControllerManager: + enabled: false + kubeScheduler: + enabled: false diff --git a/infrastructure/addons/observability/helmrelease-loki.yaml b/infrastructure/addons/observability/helmrelease-loki.yaml new file mode 100644 index 0000000..4763f56 --- /dev/null +++ b/infrastructure/addons/observability/helmrelease-loki.yaml @@ -0,0 +1,93 @@ +apiVersion: helm.toolkit.fluxcd.io/v2 +kind: HelmRelease +metadata: + name: loki + namespace: flux-system +spec: + interval: 10m + targetNamespace: observability + chart: + spec: + chart: loki + version: 6.10.0 + sourceRef: + kind: HelmRepository + name: grafana + namespace: flux-system + install: + createNamespace: true + remediation: + retries: 3 + upgrade: + remediation: + retries: 3 + values: + deploymentMode: SingleBinary + loki: + auth_enabled: false + commonConfig: + replication_factor: 1 + schemaConfig: + configs: + - from: "2024-04-01" + store: tsdb + object_store: filesystem + schema: v13 + index: + prefix: loki_index_ + period: 24h + storage: + type: filesystem + limits_config: + allow_structured_metadata: true + volume_enabled: true + retention_period: 168h + pattern_ingester: + enabled: true + ruler: + enable_api: true + singleBinary: + replicas: 1 + persistence: + size: 10Gi + storageClass: local-path + resources: + requests: + cpu: 100m + memory: 256Mi + limits: + cpu: 500m + memory: 1Gi + backend: + replicas: 0 + read: + replicas: 0 + write: + replicas: 0 + ingester: + replicas: 0 + querier: + replicas: 0 + queryFrontend: + replicas: 0 + queryScheduler: + replicas: 0 + distributor: + replicas: 0 + compactor: + replicas: 0 + indexGateway: + replicas: 0 + bloomCompactor: + replicas: 0 + bloomGateway: + replicas: 0 + gateway: + enabled: false + test: + enabled: false + monitoring: + selfMonitoring: + enabled: false + lokiCanary: + enabled: false diff --git a/infrastructure/addons/observability/helmrelease-promtail.yaml b/infrastructure/addons/observability/helmrelease-promtail.yaml new file mode 100644 index 0000000..2fe09f0 --- /dev/null +++ b/infrastructure/addons/observability/helmrelease-promtail.yaml @@ -0,0 +1,27 @@ +apiVersion: helm.toolkit.fluxcd.io/v2 +kind: HelmRelease +metadata: + name: promtail + namespace: flux-system +spec: + interval: 10m + targetNamespace: observability + chart: + spec: + chart: promtail + version: 6.16.6 + sourceRef: + kind: HelmRepository + name: grafana + namespace: flux-system + install: + createNamespace: true + remediation: + retries: 3 + upgrade: + remediation: + retries: 3 + values: + config: + clients: + - url: http://loki.observability.svc.cluster.local:3100/loki/api/v1/push diff --git a/infrastructure/addons/observability/helmrepository-grafana.yaml b/infrastructure/addons/observability/helmrepository-grafana.yaml new file mode 100644 index 0000000..1235012 --- /dev/null +++ b/infrastructure/addons/observability/helmrepository-grafana.yaml @@ -0,0 +1,8 @@ +apiVersion: source.toolkit.fluxcd.io/v1 +kind: HelmRepository +metadata: + name: grafana + namespace: flux-system +spec: + interval: 1h + url: https://grafana.github.io/helm-charts diff --git a/infrastructure/addons/observability/helmrepository-prometheus-community.yaml b/infrastructure/addons/observability/helmrepository-prometheus-community.yaml new file mode 100644 index 0000000..e6c9333 --- /dev/null +++ b/infrastructure/addons/observability/helmrepository-prometheus-community.yaml @@ -0,0 +1,8 @@ +apiVersion: source.toolkit.fluxcd.io/v1 +kind: HelmRepository +metadata: + name: prometheus-community + namespace: flux-system +spec: + interval: 1h + url: https://prometheus-community.github.io/helm-charts diff --git a/infrastructure/addons/observability/kustomization.yaml b/infrastructure/addons/observability/kustomization.yaml new file mode 100644 index 0000000..52be631 --- /dev/null +++ b/infrastructure/addons/observability/kustomization.yaml @@ -0,0 +1,9 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - namespace.yaml + - helmrepository-prometheus-community.yaml + - helmrepository-grafana.yaml + - helmrelease-kube-prometheus-stack.yaml + - helmrelease-loki.yaml + - helmrelease-promtail.yaml diff --git a/infrastructure/addons/observability/namespace.yaml b/infrastructure/addons/observability/namespace.yaml new file mode 100644 index 0000000..4f75b8c --- /dev/null +++ b/infrastructure/addons/observability/namespace.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: observability diff --git a/infrastructure/addons/tailscale-operator/kustomization.yaml b/infrastructure/addons/tailscale-operator/kustomization.yaml new file mode 100644 index 0000000..b83b23e --- /dev/null +++ b/infrastructure/addons/tailscale-operator/kustomization.yaml @@ -0,0 +1,3 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: [] diff --git a/infrastructure/kustomization.yaml b/infrastructure/kustomization.yaml new file mode 100644 index 0000000..05a626a --- /dev/null +++ b/infrastructure/kustomization.yaml @@ -0,0 +1,4 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - addons