feat: manage grafana content as code with fast dashboard workflow
This commit is contained in:
7
ansible/dashboards.yml
Normal file
7
ansible/dashboards.yml
Normal file
@@ -0,0 +1,7 @@
|
||||
---
|
||||
- name: Provision Grafana dashboards and datasources
|
||||
hosts: control_plane[0]
|
||||
become: true
|
||||
|
||||
roles:
|
||||
- observability-content
|
||||
5
ansible/roles/observability-content/defaults/main.yml
Normal file
5
ansible/roles/observability-content/defaults/main.yml
Normal file
@@ -0,0 +1,5 @@
|
||||
---
|
||||
observability_namespace: "observability"
|
||||
grafana_dashboard_configmap_name: "grafana-dashboard-k8s-overview"
|
||||
grafana_datasource_configmap_name: "grafana-datasources-core"
|
||||
loki_enabled: true
|
||||
37
ansible/roles/observability-content/tasks/main.yml
Normal file
37
ansible/roles/observability-content/tasks/main.yml
Normal file
@@ -0,0 +1,37 @@
|
||||
---
|
||||
- name: Ensure observability namespace exists
|
||||
command: kubectl create namespace {{ observability_namespace }}
|
||||
register: create_observability_ns
|
||||
failed_when: create_observability_ns.rc != 0 and "AlreadyExists" not in create_observability_ns.stderr
|
||||
changed_when: create_observability_ns.rc == 0
|
||||
|
||||
- name: Wait for Grafana deployment rollout
|
||||
command: kubectl -n {{ observability_namespace }} rollout status deployment/kube-prometheus-stack-grafana --timeout=5m
|
||||
changed_when: false
|
||||
|
||||
- name: Write Grafana datasources ConfigMap
|
||||
template:
|
||||
src: grafana-datasources.yaml.j2
|
||||
dest: /tmp/grafana-datasources.yaml
|
||||
mode: "0644"
|
||||
|
||||
- name: Apply Grafana datasources ConfigMap
|
||||
command: kubectl apply -f /tmp/grafana-datasources.yaml
|
||||
changed_when: true
|
||||
|
||||
- name: Write Grafana dashboard ConfigMap
|
||||
template:
|
||||
src: grafana-dashboard-k8s-overview.yaml.j2
|
||||
dest: /tmp/grafana-dashboard-k8s-overview.yaml
|
||||
mode: "0644"
|
||||
|
||||
- name: Apply Grafana dashboard ConfigMap
|
||||
command: kubectl apply -f /tmp/grafana-dashboard-k8s-overview.yaml
|
||||
changed_when: true
|
||||
|
||||
- name: Show Grafana content provisioning summary
|
||||
debug:
|
||||
msg: |
|
||||
Grafana content applied.
|
||||
Datasources ConfigMap: {{ grafana_datasource_configmap_name }}
|
||||
Dashboard ConfigMap: {{ grafana_dashboard_configmap_name }}
|
||||
@@ -0,0 +1,60 @@
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: {{ grafana_dashboard_configmap_name }}
|
||||
namespace: {{ observability_namespace }}
|
||||
labels:
|
||||
grafana_dashboard: "1"
|
||||
data:
|
||||
k8s-overview.json: |
|
||||
{
|
||||
"annotations": {"list": []},
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 0,
|
||||
"id": null,
|
||||
"links": [],
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "${DS_PROMETHEUS}"},
|
||||
"fieldConfig": {"defaults": {"unit": "none"}, "overrides": []},
|
||||
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 0},
|
||||
"id": 1,
|
||||
"options": {"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "textMode": "auto"},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "count(kube_node_status_condition{condition=\"Ready\",status=\"true\"})",
|
||||
"legendFormat": "ready",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Ready Nodes",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "${DS_PROMETHEUS}"},
|
||||
"fieldConfig": {"defaults": {"unit": "percentunit"}, "overrides": []},
|
||||
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 0},
|
||||
"id": 2,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "1 - avg(rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))",
|
||||
"legendFormat": "cpu",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Cluster CPU Usage",
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"refresh": "30s",
|
||||
"schemaVersion": 39,
|
||||
"style": "dark",
|
||||
"tags": ["kubernetes", "infrastructure"],
|
||||
"templating": {"list": []},
|
||||
"time": {"from": "now-1h", "to": "now"},
|
||||
"timezone": "browser",
|
||||
"title": "K8s Cluster Overview",
|
||||
"uid": "k8s-cluster-overview",
|
||||
"version": 1
|
||||
}
|
||||
@@ -0,0 +1,23 @@
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: {{ grafana_datasource_configmap_name }}
|
||||
namespace: {{ observability_namespace }}
|
||||
labels:
|
||||
grafana_datasource: "1"
|
||||
data:
|
||||
datasources.yaml: |
|
||||
apiVersion: 1
|
||||
datasources:
|
||||
- name: Prometheus
|
||||
type: prometheus
|
||||
access: proxy
|
||||
url: http://kube-prometheus-stack-prometheus.{{ observability_namespace }}.svc.cluster.local:9090
|
||||
isDefault: true
|
||||
{% if loki_enabled %}
|
||||
- name: Loki
|
||||
type: loki
|
||||
access: proxy
|
||||
url: http://loki.{{ observability_namespace }}.svc.cluster.local:3100
|
||||
isDefault: false
|
||||
{% endif %}
|
||||
@@ -52,6 +52,17 @@
|
||||
--timeout 10m
|
||||
changed_when: true
|
||||
|
||||
- name: Wait for Grafana deployment rollout
|
||||
command: kubectl -n {{ observability_namespace }} rollout status deployment/kube-prometheus-stack-grafana --timeout=5m
|
||||
changed_when: false
|
||||
|
||||
- name: Reset Grafana admin password in Grafana database
|
||||
shell: >-
|
||||
kubectl -n {{ observability_namespace }} exec
|
||||
"$(kubectl -n {{ observability_namespace }} get pod -l app.kubernetes.io/name=grafana -o jsonpath='{.items[0].metadata.name}')"
|
||||
-c grafana -- grafana cli admin reset-admin-password '{{ grafana_password_effective }}'
|
||||
changed_when: true
|
||||
|
||||
- name: Write Loki values
|
||||
template:
|
||||
src: loki-values.yaml.j2
|
||||
@@ -144,18 +155,6 @@
|
||||
changed_when: true
|
||||
when: loki_enabled
|
||||
|
||||
- name: Write Grafana Loki datasource manifest
|
||||
template:
|
||||
src: grafana-datasource-loki.yaml.j2
|
||||
dest: /tmp/grafana-datasource-loki.yaml
|
||||
mode: "0644"
|
||||
when: loki_enabled
|
||||
|
||||
- name: Create Grafana Loki datasource
|
||||
command: kubectl apply -f /tmp/grafana-datasource-loki.yaml
|
||||
changed_when: true
|
||||
when: loki_enabled
|
||||
|
||||
- name: Check Tailscale service readiness for Grafana
|
||||
command: kubectl -n {{ observability_namespace }} get svc kube-prometheus-stack-grafana -o jsonpath='{.status.conditions[?(@.type=="TailscaleProxyReady")].status}'
|
||||
register: grafana_tailscale_ready
|
||||
|
||||
@@ -103,6 +103,13 @@
|
||||
roles:
|
||||
- observability
|
||||
|
||||
- name: Provision Grafana content
|
||||
hosts: control_plane[0]
|
||||
become: true
|
||||
|
||||
roles:
|
||||
- observability-content
|
||||
|
||||
- name: Finalize
|
||||
hosts: localhost
|
||||
connection: local
|
||||
|
||||
Reference in New Issue
Block a user