fix: make grafana prometheus datasource resilient with nodeport fallback
This commit is contained in:
@@ -3,3 +3,6 @@ observability_namespace: "observability"
|
|||||||
grafana_dashboard_configmap_name: "grafana-dashboard-k8s-overview"
|
grafana_dashboard_configmap_name: "grafana-dashboard-k8s-overview"
|
||||||
grafana_datasource_configmap_name: "grafana-datasources-core"
|
grafana_datasource_configmap_name: "grafana-datasources-core"
|
||||||
loki_enabled: true
|
loki_enabled: true
|
||||||
|
grafana_prometheus_url: "http://kube-prometheus-stack-prometheus.{{ observability_namespace }}.svc.cluster.local:9090"
|
||||||
|
grafana_loki_url: "http://loki.{{ observability_namespace }}.svc.cluster.local:3100"
|
||||||
|
grafana_use_prometheus_nodeport_fallback: true
|
||||||
|
|||||||
@@ -9,15 +9,82 @@
|
|||||||
command: kubectl -n {{ observability_namespace }} rollout status deployment/kube-prometheus-stack-grafana --timeout=5m
|
command: kubectl -n {{ observability_namespace }} rollout status deployment/kube-prometheus-stack-grafana --timeout=5m
|
||||||
changed_when: false
|
changed_when: false
|
||||||
|
|
||||||
|
- name: Set default Prometheus datasource URL
|
||||||
|
set_fact:
|
||||||
|
grafana_prometheus_effective_url: "{{ grafana_prometheus_url }}"
|
||||||
|
|
||||||
|
- name: Get Grafana pod name
|
||||||
|
command: kubectl -n {{ observability_namespace }} get pod -l app.kubernetes.io/name=grafana -o jsonpath='{.items[0].metadata.name}'
|
||||||
|
register: grafana_pod_name
|
||||||
|
changed_when: false
|
||||||
|
|
||||||
|
- name: Probe Prometheus from Grafana pod via default datasource URL
|
||||||
|
shell: >-
|
||||||
|
kubectl -n {{ observability_namespace }} exec {{ grafana_pod_name.stdout }} -c grafana --
|
||||||
|
sh -c 'wget -qO- --timeout=5 {{ grafana_prometheus_url }}/-/ready >/dev/null'
|
||||||
|
register: grafana_prometheus_probe
|
||||||
|
changed_when: false
|
||||||
|
failed_when: false
|
||||||
|
|
||||||
|
- name: Get Prometheus pod host IP for fallback
|
||||||
|
command: kubectl -n {{ observability_namespace }} get pod -l app.kubernetes.io/name=prometheus -o jsonpath='{.items[0].status.hostIP}'
|
||||||
|
register: prometheus_host_ip
|
||||||
|
changed_when: false
|
||||||
|
when:
|
||||||
|
- grafana_use_prometheus_nodeport_fallback | bool
|
||||||
|
- grafana_prometheus_probe.rc != 0
|
||||||
|
|
||||||
|
- name: Get Prometheus service NodePort for fallback
|
||||||
|
command: kubectl -n {{ observability_namespace }} get svc kube-prometheus-stack-prometheus -o jsonpath='{.spec.ports[?(@.name=="http-web")].nodePort}'
|
||||||
|
register: prometheus_nodeport
|
||||||
|
changed_when: false
|
||||||
|
when:
|
||||||
|
- grafana_use_prometheus_nodeport_fallback | bool
|
||||||
|
- grafana_prometheus_probe.rc != 0
|
||||||
|
|
||||||
|
- name: Enable Prometheus NodePort fallback datasource URL
|
||||||
|
set_fact:
|
||||||
|
grafana_prometheus_effective_url: "http://{{ prometheus_host_ip.stdout }}:{{ prometheus_nodeport.stdout }}"
|
||||||
|
when:
|
||||||
|
- grafana_use_prometheus_nodeport_fallback | bool
|
||||||
|
- grafana_prometheus_probe.rc != 0
|
||||||
|
- prometheus_host_ip.stdout | length > 0
|
||||||
|
- prometheus_nodeport.stdout | length > 0
|
||||||
|
|
||||||
|
- name: Write default Prometheus datasource ConfigMap patch
|
||||||
|
template:
|
||||||
|
src: grafana-default-prometheus-datasource.yaml.j2
|
||||||
|
dest: /tmp/grafana-default-prometheus-datasource.yaml
|
||||||
|
mode: "0644"
|
||||||
|
|
||||||
|
- name: Apply default Prometheus datasource ConfigMap patch
|
||||||
|
command: kubectl apply -f /tmp/grafana-default-prometheus-datasource.yaml
|
||||||
|
changed_when: true
|
||||||
|
|
||||||
|
- name: Remove legacy Loki datasource ConfigMap
|
||||||
|
command: kubectl -n {{ observability_namespace }} delete configmap grafana-datasource-loki --ignore-not-found=true
|
||||||
|
changed_when: false
|
||||||
|
failed_when: false
|
||||||
|
|
||||||
- name: Write Grafana datasources ConfigMap
|
- name: Write Grafana datasources ConfigMap
|
||||||
template:
|
template:
|
||||||
src: grafana-datasources.yaml.j2
|
src: grafana-datasources.yaml.j2
|
||||||
dest: /tmp/grafana-datasources.yaml
|
dest: /tmp/grafana-datasources.yaml
|
||||||
mode: "0644"
|
mode: "0644"
|
||||||
|
when: loki_enabled
|
||||||
|
|
||||||
- name: Apply Grafana datasources ConfigMap
|
- name: Apply Grafana datasources ConfigMap
|
||||||
command: kubectl apply -f /tmp/grafana-datasources.yaml
|
command: kubectl apply -f /tmp/grafana-datasources.yaml
|
||||||
changed_when: true
|
changed_when: true
|
||||||
|
when: loki_enabled
|
||||||
|
|
||||||
|
- name: Restart Grafana to load datasource updates deterministically
|
||||||
|
command: kubectl -n {{ observability_namespace }} rollout restart deployment/kube-prometheus-stack-grafana
|
||||||
|
changed_when: true
|
||||||
|
|
||||||
|
- name: Wait for Grafana rollout after datasource update
|
||||||
|
command: kubectl -n {{ observability_namespace }} rollout status deployment/kube-prometheus-stack-grafana --timeout=5m
|
||||||
|
changed_when: false
|
||||||
|
|
||||||
- name: Write Grafana dashboard ConfigMap
|
- name: Write Grafana dashboard ConfigMap
|
||||||
template:
|
template:
|
||||||
@@ -34,4 +101,5 @@
|
|||||||
msg: |
|
msg: |
|
||||||
Grafana content applied.
|
Grafana content applied.
|
||||||
Datasources ConfigMap: {{ grafana_datasource_configmap_name }}
|
Datasources ConfigMap: {{ grafana_datasource_configmap_name }}
|
||||||
|
Prometheus datasource URL: {{ grafana_prometheus_effective_url }}
|
||||||
Dashboard ConfigMap: {{ grafana_dashboard_configmap_name }}
|
Dashboard ConfigMap: {{ grafana_dashboard_configmap_name }}
|
||||||
|
|||||||
@@ -9,15 +9,10 @@ data:
|
|||||||
datasources.yaml: |
|
datasources.yaml: |
|
||||||
apiVersion: 1
|
apiVersion: 1
|
||||||
datasources:
|
datasources:
|
||||||
- name: Prometheus
|
|
||||||
type: prometheus
|
|
||||||
access: proxy
|
|
||||||
url: http://kube-prometheus-stack-prometheus.{{ observability_namespace }}.svc.cluster.local:9090
|
|
||||||
isDefault: true
|
|
||||||
{% if loki_enabled %}
|
{% if loki_enabled %}
|
||||||
- name: Loki
|
- name: Loki
|
||||||
type: loki
|
type: loki
|
||||||
access: proxy
|
access: proxy
|
||||||
url: http://loki.{{ observability_namespace }}.svc.cluster.local:3100
|
url: "{{ grafana_loki_url }}"
|
||||||
isDefault: false
|
isDefault: false
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|||||||
@@ -0,0 +1,26 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: kube-prometheus-stack-grafana-datasource
|
||||||
|
namespace: {{ observability_namespace }}
|
||||||
|
data:
|
||||||
|
datasource.yaml: |-
|
||||||
|
apiVersion: 1
|
||||||
|
datasources:
|
||||||
|
- name: "Prometheus"
|
||||||
|
type: prometheus
|
||||||
|
uid: prometheus
|
||||||
|
url: {{ grafana_prometheus_effective_url }}/
|
||||||
|
access: proxy
|
||||||
|
isDefault: true
|
||||||
|
jsonData:
|
||||||
|
httpMethod: POST
|
||||||
|
timeInterval: 30s
|
||||||
|
- name: "Alertmanager"
|
||||||
|
type: alertmanager
|
||||||
|
uid: alertmanager
|
||||||
|
url: http://kube-prometheus-stack-alertmanager.{{ observability_namespace }}:9093/
|
||||||
|
access: proxy
|
||||||
|
jsonData:
|
||||||
|
handleGrafanaManagedAlerts: false
|
||||||
|
implementation: prometheus
|
||||||
Reference in New Issue
Block a user