--- - name: Ensure observability namespace exists command: kubectl create namespace {{ observability_namespace }} register: create_observability_ns failed_when: create_observability_ns.rc != 0 and "AlreadyExists" not in create_observability_ns.stderr changed_when: create_observability_ns.rc == 0 - name: Wait for Grafana deployment rollout command: kubectl -n {{ observability_namespace }} rollout status deployment/kube-prometheus-stack-grafana --timeout=5m changed_when: false - name: Set default Prometheus datasource URL set_fact: grafana_prometheus_effective_url: "{{ grafana_prometheus_url }}" grafana_loki_effective_url: "{{ grafana_loki_url }}" - name: Get Grafana pod name command: kubectl -n {{ observability_namespace }} get pod -l app.kubernetes.io/name=grafana -o jsonpath='{.items[0].metadata.name}' register: grafana_pod_name changed_when: false - name: Probe Prometheus from Grafana pod via default datasource URL shell: >- kubectl -n {{ observability_namespace }} exec {{ grafana_pod_name.stdout }} -c grafana -- sh -c 'wget -qO- --timeout=5 {{ grafana_prometheus_url }}/-/ready >/dev/null' register: grafana_prometheus_probe changed_when: false failed_when: false - name: Probe Loki from Grafana pod via default datasource URL shell: >- kubectl -n {{ observability_namespace }} exec {{ grafana_pod_name.stdout }} -c grafana -- sh -c 'wget -qO- --timeout=5 {{ grafana_loki_url }}/ready >/dev/null' register: grafana_loki_probe changed_when: false failed_when: false when: loki_enabled - name: Get Prometheus pod host IP for fallback command: kubectl -n {{ observability_namespace }} get pod -l app.kubernetes.io/name=prometheus -o jsonpath='{.items[0].status.hostIP}' register: prometheus_host_ip changed_when: false when: - grafana_use_prometheus_nodeport_fallback | bool - grafana_prometheus_probe.rc != 0 - name: Get Prometheus service NodePort for fallback command: kubectl -n {{ observability_namespace }} get svc kube-prometheus-stack-prometheus -o jsonpath='{.spec.ports[?(@.name=="http-web")].nodePort}' register: prometheus_nodeport changed_when: false when: - grafana_use_prometheus_nodeport_fallback | bool - grafana_prometheus_probe.rc != 0 - name: Enable Prometheus NodePort fallback datasource URL set_fact: grafana_prometheus_effective_url: "http://{{ prometheus_host_ip.stdout }}:{{ prometheus_nodeport.stdout }}" when: - grafana_use_prometheus_nodeport_fallback | bool - grafana_prometheus_probe.rc != 0 - prometheus_host_ip.stdout | length > 0 - prometheus_nodeport.stdout | length > 0 - name: Ensure Loki service uses NodePort for fallback command: kubectl -n {{ observability_namespace }} patch svc loki -p '{"spec":{"type":"NodePort"}}' changed_when: false failed_when: false when: - loki_enabled - grafana_use_loki_nodeport_fallback | bool - grafana_loki_probe.rc != 0 - name: Get Loki pod host IP for fallback command: kubectl -n {{ observability_namespace }} get pod loki-0 -o jsonpath='{.status.hostIP}' register: loki_host_ip changed_when: false when: - loki_enabled - grafana_use_loki_nodeport_fallback | bool - grafana_loki_probe.rc != 0 - name: Get Loki service NodePort for fallback command: kubectl -n {{ observability_namespace }} get svc loki -o jsonpath='{.spec.ports[?(@.name=="http-metrics")].nodePort}' register: loki_nodeport changed_when: false when: - loki_enabled - grafana_use_loki_nodeport_fallback | bool - grafana_loki_probe.rc != 0 - name: Enable Loki NodePort fallback datasource URL set_fact: grafana_loki_effective_url: "http://{{ loki_host_ip.stdout }}:{{ loki_nodeport.stdout }}" when: - loki_enabled - grafana_use_loki_nodeport_fallback | bool - grafana_loki_probe.rc != 0 - loki_host_ip.stdout | length > 0 - loki_nodeport.stdout | length > 0 - name: Write default Prometheus datasource ConfigMap patch template: src: grafana-default-prometheus-datasource.yaml.j2 dest: /tmp/grafana-default-prometheus-datasource.yaml mode: "0644" - name: Apply default Prometheus datasource ConfigMap patch command: kubectl apply -f /tmp/grafana-default-prometheus-datasource.yaml changed_when: true - name: Remove legacy Loki datasource ConfigMap command: kubectl -n {{ observability_namespace }} delete configmap grafana-datasource-loki --ignore-not-found=true changed_when: false failed_when: false - name: Write Grafana datasources ConfigMap template: src: grafana-datasources.yaml.j2 dest: /tmp/grafana-datasources.yaml mode: "0644" when: loki_enabled - name: Apply Grafana datasources ConfigMap command: kubectl apply -f /tmp/grafana-datasources.yaml changed_when: true when: loki_enabled - name: Restart Grafana to load datasource updates deterministically command: kubectl -n {{ observability_namespace }} rollout restart deployment/kube-prometheus-stack-grafana changed_when: true - name: Wait for Grafana rollout after datasource update command: kubectl -n {{ observability_namespace }} rollout status deployment/kube-prometheus-stack-grafana --timeout=5m changed_when: false - name: Write Grafana dashboard ConfigMap template: src: grafana-dashboard-k8s-overview.yaml.j2 dest: /tmp/grafana-dashboard-k8s-overview.yaml mode: "0644" - name: Apply Grafana dashboard ConfigMap command: kubectl apply -f /tmp/grafana-dashboard-k8s-overview.yaml changed_when: true - name: Show Grafana content provisioning summary debug: msg: | Grafana content applied. Datasources ConfigMap: {{ grafana_datasource_configmap_name }} Prometheus datasource URL: {{ grafana_prometheus_effective_url }} Loki datasource URL: {{ grafana_loki_effective_url }} Dashboard ConfigMap: {{ grafana_dashboard_configmap_name }}