--- - name: Check if Helm is installed command: helm version --short register: helm_check changed_when: false failed_when: false - name: Install Helm shell: curl -fsSL https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash when: helm_check.rc != 0 changed_when: true - name: Ensure observability namespace exists command: kubectl create namespace {{ observability_namespace }} register: create_observability_ns failed_when: create_observability_ns.rc != 0 and "AlreadyExists" not in create_observability_ns.stderr changed_when: create_observability_ns.rc == 0 - name: Set Grafana admin password set_fact: grafana_password_effective: "{{ grafana_admin_password if grafana_admin_password | length > 0 else lookup('password', '/dev/null length=32 chars=ascii_letters,digits') }}" - name: Write kube-prometheus-stack values template: src: kube-prometheus-stack-values.yaml.j2 dest: /tmp/kube-prometheus-stack-values.yaml mode: "0644" - name: Add Prometheus Helm repo command: helm repo add prometheus-community https://prometheus-community.github.io/helm-charts register: add_prom_repo failed_when: add_prom_repo.rc != 0 and "already exists" not in add_prom_repo.stderr changed_when: add_prom_repo.rc == 0 - name: Add Grafana Helm repo command: helm repo add grafana https://grafana.github.io/helm-charts register: add_grafana_repo failed_when: add_grafana_repo.rc != 0 and "already exists" not in add_grafana_repo.stderr changed_when: add_grafana_repo.rc == 0 - name: Update Helm repos command: helm repo update changed_when: false - name: Install kube-prometheus-stack command: >- helm upgrade --install kube-prometheus-stack prometheus-community/kube-prometheus-stack --namespace {{ observability_namespace }} --version {{ prometheus_chart_version }} --values /tmp/kube-prometheus-stack-values.yaml --wait --timeout 10m changed_when: true - name: Write Loki values template: src: loki-values.yaml.j2 dest: /tmp/loki-values.yaml mode: "0644" when: loki_enabled - name: Validate Loki chart produces resources command: >- helm template loki grafana/loki --namespace {{ observability_namespace }} --version {{ loki_chart_version }} --values /tmp/loki-values.yaml register: loki_template changed_when: false failed_when: "loki_template.rc != 0 or 'kind: StatefulSet' not in loki_template.stdout" when: loki_enabled - name: Remove legacy Loki resources command: >- kubectl -n {{ observability_namespace }} delete deployment/loki-gateway statefulset/loki statefulset/loki-chunks-cache statefulset/loki-results-cache statefulset/loki-backend statefulset/loki-read statefulset/loki-write poddisruptionbudget/loki-memcached-chunks-cache poddisruptionbudget/loki-memcached-results-cache --ignore-not-found=true changed_when: false failed_when: false when: loki_enabled - name: Clear stuck Helm lock for Loki command: kubectl -n {{ observability_namespace }} delete secret sh.helm.release.v1.loki.v1 --ignore-not-found=true changed_when: false failed_when: false when: loki_enabled - name: Uninstall failed Loki release (if stuck) command: helm uninstall loki -n {{ observability_namespace }} changed_when: false failed_when: false when: loki_enabled - name: Install Loki command: >- helm upgrade --install loki grafana/loki --namespace {{ observability_namespace }} --version {{ loki_chart_version }} --values /tmp/loki-values.yaml register: loki_install changed_when: true when: loki_enabled - name: Wait for Loki StatefulSet command: kubectl -n {{ observability_namespace }} rollout status statefulset/loki --timeout=10m register: loki_rollout changed_when: false when: loki_enabled - name: Show Loki pod status command: kubectl -n {{ observability_namespace }} get pods -l app.kubernetes.io/name=loki -o wide register: loki_pods changed_when: false when: loki_enabled - name: Debug Loki pods debug: msg: "{{ loki_pods.stdout }}" when: loki_enabled - name: Write Promtail values template: src: promtail-values.yaml.j2 dest: /tmp/promtail-values.yaml mode: "0644" when: loki_enabled - name: Install Promtail command: >- helm upgrade --install promtail grafana/promtail --namespace {{ observability_namespace }} --version {{ promtail_chart_version }} --values /tmp/promtail-values.yaml --wait --timeout 10m changed_when: true when: loki_enabled - name: Write Grafana Loki datasource manifest template: src: grafana-datasource-loki.yaml.j2 dest: /tmp/grafana-datasource-loki.yaml mode: "0644" when: loki_enabled - name: Create Grafana Loki datasource command: kubectl apply -f /tmp/grafana-datasource-loki.yaml changed_when: true when: loki_enabled - name: Configure Grafana for Tailscale access block: - name: Patch Grafana service for Tailscale command: >- kubectl -n {{ observability_namespace }} patch svc kube-prometheus-stack-grafana -p '{"metadata":{"annotations":{"tailscale.com/hostname":"grafana"}},"spec":{"type":"LoadBalancer","loadBalancerClass":"tailscale"}}' register: grafana_patch changed_when: true - name: Patch Prometheus service for Tailscale command: >- kubectl -n {{ observability_namespace }} patch svc kube-prometheus-stack-prometheus -p '{"metadata":{"annotations":{"tailscale.com/hostname":"prometheus"}},"spec":{"type":"LoadBalancer","loadBalancerClass":"tailscale"}}' register: prometheus_patch changed_when: true - name: Wait for Tailscale endpoint (IP/hostname) for Grafana shell: >- kubectl -n {{ observability_namespace }} get svc kube-prometheus-stack-grafana -o go-template='{{"{{"}}range .status.loadBalancer.ingress{{"}}"}}{{"{{"}}if .ip{{"}}"}}{{"{{"}}.ip{{"}}"}}{{"{{"}}else{{"}}"}}{{"{{"}}.hostname{{"}}"}}{{"{{"}}end{{"}}"}}{{"{{"}}end{{"}}"}}' register: grafana_lb_ip until: grafana_lb_ip.stdout | length > 0 retries: 18 delay: 10 changed_when: false failed_when: false - name: Wait for Tailscale endpoint (IP/hostname) for Prometheus shell: >- kubectl -n {{ observability_namespace }} get svc kube-prometheus-stack-prometheus -o go-template='{{"{{"}}range .status.loadBalancer.ingress{{"}}"}}{{"{{"}}if .ip{{"}}"}}{{"{{"}}.ip{{"}}"}}{{"{{"}}else{{"}}"}}{{"{{"}}.hostname{{"}}"}}{{"{{"}}end{{"}}"}}{{"{{"}}end{{"}}"}}' register: prometheus_lb_ip until: prometheus_lb_ip.stdout | length > 0 retries: 18 delay: 10 changed_when: false failed_when: false - name: Warn if Tailscale endpoint assignment is still pending debug: msg: | Tailscale service endpoint assignment is still pending. Grafana endpoint: {{ grafana_lb_ip.stdout | default('') }} Prometheus endpoint: {{ prometheus_lb_ip.stdout | default('') }} Deployment continues; services may become reachable shortly. when: (grafana_lb_ip.stdout | default('') | length == 0) or (prometheus_lb_ip.stdout | default('') | length == 0) - name: Show Tailscale access details debug: msg: | Observability stack deployed with Tailscale access! Grafana: http://grafana{% if grafana_lb_ip.stdout | default('') | length > 0 %} (or http://{{ grafana_lb_ip.stdout }}){% endif %} Prometheus: http://prometheus{% if prometheus_lb_ip.stdout | default('') | length > 0 %} (or http://{{ prometheus_lb_ip.stdout }}){% endif %} Login: admin / {{ grafana_password_effective }} Access via: - MagicDNS: http://grafana or http://prometheus (if enabled) - Direct endpoint: {% if grafana_lb_ip.stdout | default('') | length > 0 %}http://{{ grafana_lb_ip.stdout }}{% else %}(pending){% endif %} / {% if prometheus_lb_ip.stdout | default('') | length > 0 %}http://{{ prometheus_lb_ip.stdout }}{% else %}(pending){% endif %} - Tailnet FQDN: http://grafana.{{ tailscale_tailnet | default('tailnet.ts.net') }} Note: Ensure Tailscale Kubernetes Operator is installed first when: - tailscale_oauth_client_id | default('') | length > 0 - tailscale_oauth_client_secret | default('') | length > 0 - name: Show observability access details (fallback) debug: msg: | Observability stack deployed. Namespace: {{ observability_namespace }} Grafana (tailnet): kubectl -n {{ observability_namespace }} port-forward svc/kube-prometheus-stack-grafana 3000:80 Prometheus (tailnet): kubectl -n {{ observability_namespace }} port-forward svc/kube-prometheus-stack-prometheus 9090:9090 Grafana admin password: {{ grafana_password_effective }} {% if loki_enabled %} Loki: Enabled - logs available in Grafana {% else %} Loki: Disabled {% endif %} when: - tailscale_oauth_client_id | default('') | length == 0 or tailscale_oauth_client_secret | default('') | length == 0