2026-03-04 03:36:01 +00:00
|
|
|
---
|
|
|
|
|
- name: Ensure observability namespace exists
|
|
|
|
|
command: kubectl create namespace {{ observability_namespace }}
|
|
|
|
|
register: create_observability_ns
|
|
|
|
|
failed_when: create_observability_ns.rc != 0 and "AlreadyExists" not in create_observability_ns.stderr
|
|
|
|
|
changed_when: create_observability_ns.rc == 0
|
|
|
|
|
|
|
|
|
|
- name: Wait for Grafana deployment rollout
|
|
|
|
|
command: kubectl -n {{ observability_namespace }} rollout status deployment/kube-prometheus-stack-grafana --timeout=5m
|
|
|
|
|
changed_when: false
|
|
|
|
|
|
2026-03-04 19:22:31 +00:00
|
|
|
- name: Set default Prometheus datasource URL
|
|
|
|
|
set_fact:
|
|
|
|
|
grafana_prometheus_effective_url: "{{ grafana_prometheus_url }}"
|
2026-03-04 19:39:16 +00:00
|
|
|
grafana_loki_effective_url: "{{ grafana_loki_url }}"
|
2026-03-04 19:22:31 +00:00
|
|
|
|
|
|
|
|
- name: Get Grafana pod name
|
|
|
|
|
command: kubectl -n {{ observability_namespace }} get pod -l app.kubernetes.io/name=grafana -o jsonpath='{.items[0].metadata.name}'
|
|
|
|
|
register: grafana_pod_name
|
|
|
|
|
changed_when: false
|
|
|
|
|
|
|
|
|
|
- name: Probe Prometheus from Grafana pod via default datasource URL
|
|
|
|
|
shell: >-
|
|
|
|
|
kubectl -n {{ observability_namespace }} exec {{ grafana_pod_name.stdout }} -c grafana --
|
|
|
|
|
sh -c 'wget -qO- --timeout=5 {{ grafana_prometheus_url }}/-/ready >/dev/null'
|
|
|
|
|
register: grafana_prometheus_probe
|
|
|
|
|
changed_when: false
|
|
|
|
|
failed_when: false
|
|
|
|
|
|
2026-03-04 19:39:16 +00:00
|
|
|
- name: Probe Loki from Grafana pod via default datasource URL
|
|
|
|
|
shell: >-
|
|
|
|
|
kubectl -n {{ observability_namespace }} exec {{ grafana_pod_name.stdout }} -c grafana --
|
|
|
|
|
sh -c 'wget -qO- --timeout=5 {{ grafana_loki_url }}/ready >/dev/null'
|
|
|
|
|
register: grafana_loki_probe
|
|
|
|
|
changed_when: false
|
|
|
|
|
failed_when: false
|
|
|
|
|
when: loki_enabled
|
|
|
|
|
|
2026-03-04 19:22:31 +00:00
|
|
|
- name: Get Prometheus pod host IP for fallback
|
|
|
|
|
command: kubectl -n {{ observability_namespace }} get pod -l app.kubernetes.io/name=prometheus -o jsonpath='{.items[0].status.hostIP}'
|
|
|
|
|
register: prometheus_host_ip
|
|
|
|
|
changed_when: false
|
|
|
|
|
when:
|
|
|
|
|
- grafana_use_prometheus_nodeport_fallback | bool
|
|
|
|
|
- grafana_prometheus_probe.rc != 0
|
|
|
|
|
|
|
|
|
|
- name: Get Prometheus service NodePort for fallback
|
|
|
|
|
command: kubectl -n {{ observability_namespace }} get svc kube-prometheus-stack-prometheus -o jsonpath='{.spec.ports[?(@.name=="http-web")].nodePort}'
|
|
|
|
|
register: prometheus_nodeport
|
|
|
|
|
changed_when: false
|
|
|
|
|
when:
|
|
|
|
|
- grafana_use_prometheus_nodeport_fallback | bool
|
|
|
|
|
- grafana_prometheus_probe.rc != 0
|
|
|
|
|
|
|
|
|
|
- name: Enable Prometheus NodePort fallback datasource URL
|
|
|
|
|
set_fact:
|
|
|
|
|
grafana_prometheus_effective_url: "http://{{ prometheus_host_ip.stdout }}:{{ prometheus_nodeport.stdout }}"
|
|
|
|
|
when:
|
|
|
|
|
- grafana_use_prometheus_nodeport_fallback | bool
|
|
|
|
|
- grafana_prometheus_probe.rc != 0
|
|
|
|
|
- prometheus_host_ip.stdout | length > 0
|
|
|
|
|
- prometheus_nodeport.stdout | length > 0
|
|
|
|
|
|
2026-03-04 19:39:16 +00:00
|
|
|
- name: Ensure Loki service uses NodePort for fallback
|
|
|
|
|
command: kubectl -n {{ observability_namespace }} patch svc loki -p '{"spec":{"type":"NodePort"}}'
|
|
|
|
|
changed_when: false
|
|
|
|
|
failed_when: false
|
|
|
|
|
when:
|
|
|
|
|
- loki_enabled
|
|
|
|
|
- grafana_use_loki_nodeport_fallback | bool
|
|
|
|
|
- grafana_loki_probe.rc != 0
|
|
|
|
|
|
|
|
|
|
- name: Get Loki pod host IP for fallback
|
|
|
|
|
command: kubectl -n {{ observability_namespace }} get pod loki-0 -o jsonpath='{.status.hostIP}'
|
|
|
|
|
register: loki_host_ip
|
|
|
|
|
changed_when: false
|
|
|
|
|
when:
|
|
|
|
|
- loki_enabled
|
|
|
|
|
- grafana_use_loki_nodeport_fallback | bool
|
|
|
|
|
- grafana_loki_probe.rc != 0
|
|
|
|
|
|
|
|
|
|
- name: Get Loki service NodePort for fallback
|
|
|
|
|
command: kubectl -n {{ observability_namespace }} get svc loki -o jsonpath='{.spec.ports[?(@.name=="http-metrics")].nodePort}'
|
|
|
|
|
register: loki_nodeport
|
|
|
|
|
changed_when: false
|
|
|
|
|
when:
|
|
|
|
|
- loki_enabled
|
|
|
|
|
- grafana_use_loki_nodeport_fallback | bool
|
|
|
|
|
- grafana_loki_probe.rc != 0
|
|
|
|
|
|
|
|
|
|
- name: Enable Loki NodePort fallback datasource URL
|
|
|
|
|
set_fact:
|
|
|
|
|
grafana_loki_effective_url: "http://{{ loki_host_ip.stdout }}:{{ loki_nodeport.stdout }}"
|
|
|
|
|
when:
|
|
|
|
|
- loki_enabled
|
|
|
|
|
- grafana_use_loki_nodeport_fallback | bool
|
|
|
|
|
- grafana_loki_probe.rc != 0
|
|
|
|
|
- loki_host_ip.stdout | length > 0
|
|
|
|
|
- loki_nodeport.stdout | length > 0
|
|
|
|
|
|
2026-03-04 21:00:01 +00:00
|
|
|
- name: Query Loki labels endpoint from Grafana pod
|
|
|
|
|
shell: >-
|
|
|
|
|
kubectl -n {{ observability_namespace }} exec {{ grafana_pod_name.stdout }} -c grafana --
|
|
|
|
|
sh -c 'wget -qO- --timeout=10 {{ grafana_loki_effective_url }}/loki/api/v1/labels'
|
|
|
|
|
register: grafana_loki_labels
|
|
|
|
|
changed_when: false
|
|
|
|
|
failed_when: false
|
|
|
|
|
when: loki_enabled
|
|
|
|
|
|
|
|
|
|
- name: Fail when Loki is reachable but has zero indexed labels
|
|
|
|
|
fail:
|
|
|
|
|
msg: >-
|
|
|
|
|
Loki is reachable from Grafana at {{ grafana_loki_effective_url }} but /loki/api/v1/labels returned no labels.
|
|
|
|
|
This usually means no logs are ingested yet. Check Promtail and tenant configuration.
|
|
|
|
|
when:
|
|
|
|
|
- loki_enabled
|
|
|
|
|
- grafana_loki_labels.rc == 0
|
|
|
|
|
- "'\"status\":\"success\"' in (grafana_loki_labels.stdout | replace(' ', ''))"
|
|
|
|
|
- "'\"data\":[]' in (grafana_loki_labels.stdout | replace(' ', ''))"
|
|
|
|
|
|
2026-03-04 19:22:31 +00:00
|
|
|
- name: Write default Prometheus datasource ConfigMap patch
|
|
|
|
|
template:
|
|
|
|
|
src: grafana-default-prometheus-datasource.yaml.j2
|
|
|
|
|
dest: /tmp/grafana-default-prometheus-datasource.yaml
|
|
|
|
|
mode: "0644"
|
|
|
|
|
|
|
|
|
|
- name: Apply default Prometheus datasource ConfigMap patch
|
|
|
|
|
command: kubectl apply -f /tmp/grafana-default-prometheus-datasource.yaml
|
|
|
|
|
changed_when: true
|
|
|
|
|
|
|
|
|
|
- name: Remove legacy Loki datasource ConfigMap
|
|
|
|
|
command: kubectl -n {{ observability_namespace }} delete configmap grafana-datasource-loki --ignore-not-found=true
|
|
|
|
|
changed_when: false
|
|
|
|
|
failed_when: false
|
|
|
|
|
|
2026-03-04 03:36:01 +00:00
|
|
|
- name: Write Grafana datasources ConfigMap
|
|
|
|
|
template:
|
|
|
|
|
src: grafana-datasources.yaml.j2
|
|
|
|
|
dest: /tmp/grafana-datasources.yaml
|
|
|
|
|
mode: "0644"
|
2026-03-04 19:22:31 +00:00
|
|
|
when: loki_enabled
|
2026-03-04 03:36:01 +00:00
|
|
|
|
|
|
|
|
- name: Apply Grafana datasources ConfigMap
|
|
|
|
|
command: kubectl apply -f /tmp/grafana-datasources.yaml
|
|
|
|
|
changed_when: true
|
2026-03-04 19:22:31 +00:00
|
|
|
when: loki_enabled
|
|
|
|
|
|
|
|
|
|
- name: Restart Grafana to load datasource updates deterministically
|
|
|
|
|
command: kubectl -n {{ observability_namespace }} rollout restart deployment/kube-prometheus-stack-grafana
|
|
|
|
|
changed_when: true
|
|
|
|
|
|
|
|
|
|
- name: Wait for Grafana rollout after datasource update
|
|
|
|
|
command: kubectl -n {{ observability_namespace }} rollout status deployment/kube-prometheus-stack-grafana --timeout=5m
|
|
|
|
|
changed_when: false
|
2026-03-04 03:36:01 +00:00
|
|
|
|
|
|
|
|
- name: Write Grafana dashboard ConfigMap
|
|
|
|
|
template:
|
|
|
|
|
src: grafana-dashboard-k8s-overview.yaml.j2
|
|
|
|
|
dest: /tmp/grafana-dashboard-k8s-overview.yaml
|
|
|
|
|
mode: "0644"
|
|
|
|
|
|
|
|
|
|
- name: Apply Grafana dashboard ConfigMap
|
|
|
|
|
command: kubectl apply -f /tmp/grafana-dashboard-k8s-overview.yaml
|
|
|
|
|
changed_when: true
|
|
|
|
|
|
|
|
|
|
- name: Show Grafana content provisioning summary
|
|
|
|
|
debug:
|
|
|
|
|
msg: |
|
|
|
|
|
Grafana content applied.
|
|
|
|
|
Datasources ConfigMap: {{ grafana_datasource_configmap_name }}
|
2026-03-04 19:22:31 +00:00
|
|
|
Prometheus datasource URL: {{ grafana_prometheus_effective_url }}
|
2026-03-04 19:39:16 +00:00
|
|
|
Loki datasource URL: {{ grafana_loki_effective_url }}
|
2026-03-04 03:36:01 +00:00
|
|
|
Dashboard ConfigMap: {{ grafana_dashboard_configmap_name }}
|