feat: migrate observability stack to flux gitops
Some checks failed
Deploy Cluster / Terraform (push) Successful in 45s
Deploy Cluster / Ansible (push) Failing after 1m11s

This commit is contained in:
2026-03-04 23:38:40 +00:00
parent 480a079dc8
commit 8b403cd1d6
28 changed files with 493 additions and 1 deletions

View File

@@ -0,0 +1,3 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources: []

View File

@@ -0,0 +1,3 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources: []

View File

@@ -0,0 +1,15 @@
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: addon-ccm
namespace: flux-system
spec:
interval: 10m
prune: true
sourceRef:
kind: GitRepository
name: platform
path: ./infrastructure/addons/ccm
wait: true
timeout: 5m
suspend: true

View File

@@ -0,0 +1,17 @@
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: addon-csi
namespace: flux-system
spec:
interval: 10m
prune: true
sourceRef:
kind: GitRepository
name: platform
path: ./infrastructure/addons/csi
dependsOn:
- name: addon-ccm
wait: true
timeout: 5m
suspend: true

View File

@@ -0,0 +1,17 @@
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: addon-observability-content
namespace: flux-system
spec:
interval: 10m
prune: true
sourceRef:
kind: GitRepository
name: platform
path: ./infrastructure/addons/observability-content
dependsOn:
- name: addon-observability
wait: true
timeout: 5m
suspend: false

View File

@@ -0,0 +1,15 @@
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: addon-observability
namespace: flux-system
spec:
interval: 10m
prune: true
sourceRef:
kind: GitRepository
name: platform
path: ./infrastructure/addons/observability
wait: true
timeout: 5m
suspend: false

View File

@@ -0,0 +1,15 @@
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: addon-tailscale-operator
namespace: flux-system
spec:
interval: 10m
prune: true
sourceRef:
kind: GitRepository
name: platform
path: ./infrastructure/addons/tailscale-operator
wait: true
timeout: 5m
suspend: true

View File

@@ -0,0 +1,8 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- kustomization-ccm.yaml
- kustomization-csi.yaml
- kustomization-tailscale-operator.yaml
- kustomization-observability.yaml
- kustomization-observability-content.yaml

View File

@@ -0,0 +1,60 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: grafana-dashboard-k8s-overview
namespace: observability
labels:
grafana_dashboard: "1"
data:
k8s-overview.json: |
{
"annotations": {"list": []},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": null,
"links": [],
"panels": [
{
"datasource": {"type": "prometheus", "uid": "${DS_PROMETHEUS}"},
"fieldConfig": {"defaults": {"unit": "none"}, "overrides": []},
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 0},
"id": 1,
"options": {"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "textMode": "auto"},
"targets": [
{
"expr": "count(kube_node_status_condition{condition=\"Ready\",status=\"true\"})",
"legendFormat": "ready",
"refId": "A"
}
],
"title": "Ready Nodes",
"type": "stat"
},
{
"datasource": {"type": "prometheus", "uid": "${DS_PROMETHEUS}"},
"fieldConfig": {"defaults": {"unit": "percentunit"}, "overrides": []},
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 0},
"id": 2,
"targets": [
{
"expr": "1 - avg(rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))",
"legendFormat": "cpu",
"refId": "A"
}
],
"title": "Cluster CPU Usage",
"type": "timeseries"
}
],
"refresh": "30s",
"schemaVersion": 39,
"style": "dark",
"tags": ["kubernetes", "infrastructure"],
"templating": {"list": []},
"time": {"from": "now-1h", "to": "now"},
"timezone": "browser",
"title": "K8s Cluster Overview",
"uid": "k8s-cluster-overview",
"version": 1
}

View File

@@ -0,0 +1,16 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: grafana-datasources-core
namespace: observability
labels:
grafana_datasource: "1"
data:
datasources.yaml: |
apiVersion: 1
datasources:
- name: Loki
type: loki
access: proxy
url: "http://loki.observability.svc.cluster.local:3100"
isDefault: false

View File

@@ -0,0 +1,5 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- grafana-datasources-core-configmap.yaml
- grafana-dashboard-k8s-overview-configmap.yaml

View File

@@ -0,0 +1,63 @@
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: kube-prometheus-stack
namespace: flux-system
spec:
interval: 10m
targetNamespace: observability
chart:
spec:
chart: kube-prometheus-stack
version: 68.4.4
sourceRef:
kind: HelmRepository
name: prometheus-community
namespace: flux-system
install:
createNamespace: true
remediation:
retries: 3
upgrade:
remediation:
retries: 3
values:
grafana:
enabled: true
persistence:
enabled: true
storageClassName: local-path
size: 5Gi
service:
type: ClusterIP
sidecar:
datasources:
enabled: true
label: grafana_datasource
searchNamespace: observability
dashboards:
enabled: true
label: grafana_dashboard
searchNamespace: observability
prometheus:
service:
type: ClusterIP
prometheusSpec:
retention: 7d
storageSpec:
volumeClaimTemplate:
spec:
storageClassName: local-path
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 10Gi
alertmanager:
enabled: false
kubeEtcd:
enabled: false
kubeControllerManager:
enabled: false
kubeScheduler:
enabled: false

View File

@@ -0,0 +1,93 @@
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: loki
namespace: flux-system
spec:
interval: 10m
targetNamespace: observability
chart:
spec:
chart: loki
version: 6.10.0
sourceRef:
kind: HelmRepository
name: grafana
namespace: flux-system
install:
createNamespace: true
remediation:
retries: 3
upgrade:
remediation:
retries: 3
values:
deploymentMode: SingleBinary
loki:
auth_enabled: false
commonConfig:
replication_factor: 1
schemaConfig:
configs:
- from: "2024-04-01"
store: tsdb
object_store: filesystem
schema: v13
index:
prefix: loki_index_
period: 24h
storage:
type: filesystem
limits_config:
allow_structured_metadata: true
volume_enabled: true
retention_period: 168h
pattern_ingester:
enabled: true
ruler:
enable_api: true
singleBinary:
replicas: 1
persistence:
size: 10Gi
storageClass: local-path
resources:
requests:
cpu: 100m
memory: 256Mi
limits:
cpu: 500m
memory: 1Gi
backend:
replicas: 0
read:
replicas: 0
write:
replicas: 0
ingester:
replicas: 0
querier:
replicas: 0
queryFrontend:
replicas: 0
queryScheduler:
replicas: 0
distributor:
replicas: 0
compactor:
replicas: 0
indexGateway:
replicas: 0
bloomCompactor:
replicas: 0
bloomGateway:
replicas: 0
gateway:
enabled: false
test:
enabled: false
monitoring:
selfMonitoring:
enabled: false
lokiCanary:
enabled: false

View File

@@ -0,0 +1,27 @@
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: promtail
namespace: flux-system
spec:
interval: 10m
targetNamespace: observability
chart:
spec:
chart: promtail
version: 6.16.6
sourceRef:
kind: HelmRepository
name: grafana
namespace: flux-system
install:
createNamespace: true
remediation:
retries: 3
upgrade:
remediation:
retries: 3
values:
config:
clients:
- url: http://loki.observability.svc.cluster.local:3100/loki/api/v1/push

View File

@@ -0,0 +1,8 @@
apiVersion: source.toolkit.fluxcd.io/v1
kind: HelmRepository
metadata:
name: grafana
namespace: flux-system
spec:
interval: 1h
url: https://grafana.github.io/helm-charts

View File

@@ -0,0 +1,8 @@
apiVersion: source.toolkit.fluxcd.io/v1
kind: HelmRepository
metadata:
name: prometheus-community
namespace: flux-system
spec:
interval: 1h
url: https://prometheus-community.github.io/helm-charts

View File

@@ -0,0 +1,9 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- namespace.yaml
- helmrepository-prometheus-community.yaml
- helmrepository-grafana.yaml
- helmrelease-kube-prometheus-stack.yaml
- helmrelease-loki.yaml
- helmrelease-promtail.yaml

View File

@@ -0,0 +1,4 @@
apiVersion: v1
kind: Namespace
metadata:
name: observability

View File

@@ -0,0 +1,3 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources: []

View File

@@ -0,0 +1,4 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- addons