241 lines
8.9 KiB
YAML
241 lines
8.9 KiB
YAML
---
|
|
- name: Check if Helm is installed
|
|
command: helm version --short
|
|
register: helm_check
|
|
changed_when: false
|
|
failed_when: false
|
|
|
|
- name: Install Helm
|
|
shell: curl -fsSL https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
|
|
when: helm_check.rc != 0
|
|
changed_when: true
|
|
|
|
- name: Ensure observability namespace exists
|
|
command: kubectl create namespace {{ observability_namespace }}
|
|
register: create_observability_ns
|
|
failed_when: create_observability_ns.rc != 0 and "AlreadyExists" not in create_observability_ns.stderr
|
|
changed_when: create_observability_ns.rc == 0
|
|
|
|
- name: Set Grafana admin password
|
|
set_fact:
|
|
grafana_password_effective: "{{ grafana_admin_password if grafana_admin_password | length > 0 else lookup('password', '/dev/null length=32 chars=ascii_letters,digits') }}"
|
|
|
|
- name: Write kube-prometheus-stack values
|
|
template:
|
|
src: kube-prometheus-stack-values.yaml.j2
|
|
dest: /tmp/kube-prometheus-stack-values.yaml
|
|
mode: "0644"
|
|
|
|
- name: Add Prometheus Helm repo
|
|
command: helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
|
|
register: add_prom_repo
|
|
failed_when: add_prom_repo.rc != 0 and "already exists" not in add_prom_repo.stderr
|
|
changed_when: add_prom_repo.rc == 0
|
|
|
|
- name: Add Grafana Helm repo
|
|
command: helm repo add grafana https://grafana.github.io/helm-charts
|
|
register: add_grafana_repo
|
|
failed_when: add_grafana_repo.rc != 0 and "already exists" not in add_grafana_repo.stderr
|
|
changed_when: add_grafana_repo.rc == 0
|
|
|
|
- name: Update Helm repos
|
|
command: helm repo update
|
|
changed_when: false
|
|
|
|
- name: Install kube-prometheus-stack
|
|
command: >-
|
|
helm upgrade --install kube-prometheus-stack prometheus-community/kube-prometheus-stack
|
|
--namespace {{ observability_namespace }}
|
|
--version {{ prometheus_chart_version }}
|
|
--values /tmp/kube-prometheus-stack-values.yaml
|
|
--wait
|
|
--timeout 10m
|
|
changed_when: true
|
|
|
|
- name: Write Loki values
|
|
template:
|
|
src: loki-values.yaml.j2
|
|
dest: /tmp/loki-values.yaml
|
|
mode: "0644"
|
|
when: loki_enabled
|
|
|
|
- name: Validate Loki chart produces resources
|
|
command: >-
|
|
helm template loki grafana/loki
|
|
--namespace {{ observability_namespace }}
|
|
--version {{ loki_chart_version }}
|
|
--values /tmp/loki-values.yaml
|
|
register: loki_template
|
|
changed_when: false
|
|
failed_when: "loki_template.rc != 0 or 'kind: StatefulSet' not in loki_template.stdout"
|
|
when: loki_enabled
|
|
|
|
- name: Remove legacy Loki resources
|
|
command: >-
|
|
kubectl -n {{ observability_namespace }} delete
|
|
deployment/loki-gateway
|
|
statefulset/loki
|
|
statefulset/loki-chunks-cache
|
|
statefulset/loki-results-cache
|
|
statefulset/loki-backend
|
|
statefulset/loki-read
|
|
statefulset/loki-write
|
|
poddisruptionbudget/loki-memcached-chunks-cache
|
|
poddisruptionbudget/loki-memcached-results-cache
|
|
--ignore-not-found=true
|
|
changed_when: false
|
|
failed_when: false
|
|
when: loki_enabled
|
|
|
|
- name: Clear stuck Helm lock for Loki
|
|
command: kubectl -n {{ observability_namespace }} delete secret sh.helm.release.v1.loki.v1 --ignore-not-found=true
|
|
changed_when: false
|
|
failed_when: false
|
|
when: loki_enabled
|
|
|
|
- name: Uninstall failed Loki release (if stuck)
|
|
command: helm uninstall loki -n {{ observability_namespace }}
|
|
changed_when: false
|
|
failed_when: false
|
|
when: loki_enabled
|
|
|
|
- name: Install Loki
|
|
command: >-
|
|
helm upgrade --install loki grafana/loki
|
|
--namespace {{ observability_namespace }}
|
|
--version {{ loki_chart_version }}
|
|
--values /tmp/loki-values.yaml
|
|
register: loki_install
|
|
changed_when: true
|
|
when: loki_enabled
|
|
|
|
- name: Wait for Loki StatefulSet
|
|
command: kubectl -n {{ observability_namespace }} rollout status statefulset/loki --timeout=10m
|
|
register: loki_rollout
|
|
changed_when: false
|
|
when: loki_enabled
|
|
|
|
- name: Show Loki pod status
|
|
command: kubectl -n {{ observability_namespace }} get pods -l app.kubernetes.io/name=loki -o wide
|
|
register: loki_pods
|
|
changed_when: false
|
|
when: loki_enabled
|
|
|
|
- name: Debug Loki pods
|
|
debug:
|
|
msg: "{{ loki_pods.stdout }}"
|
|
when: loki_enabled
|
|
|
|
- name: Write Promtail values
|
|
template:
|
|
src: promtail-values.yaml.j2
|
|
dest: /tmp/promtail-values.yaml
|
|
mode: "0644"
|
|
when: loki_enabled
|
|
|
|
- name: Install Promtail
|
|
command: >-
|
|
helm upgrade --install promtail grafana/promtail
|
|
--namespace {{ observability_namespace }}
|
|
--version {{ promtail_chart_version }}
|
|
--values /tmp/promtail-values.yaml
|
|
--wait
|
|
--timeout 10m
|
|
changed_when: true
|
|
when: loki_enabled
|
|
|
|
- name: Write Grafana Loki datasource manifest
|
|
template:
|
|
src: grafana-datasource-loki.yaml.j2
|
|
dest: /tmp/grafana-datasource-loki.yaml
|
|
mode: "0644"
|
|
when: loki_enabled
|
|
|
|
- name: Create Grafana Loki datasource
|
|
command: kubectl apply -f /tmp/grafana-datasource-loki.yaml
|
|
changed_when: true
|
|
when: loki_enabled
|
|
|
|
- name: Configure Grafana for Tailscale access
|
|
block:
|
|
- name: Patch Grafana service for Tailscale
|
|
command: >-
|
|
kubectl -n {{ observability_namespace }} patch svc kube-prometheus-stack-grafana
|
|
-p '{"metadata":{"annotations":{"tailscale.com/hostname":"grafana"}},"spec":{"type":"LoadBalancer","loadBalancerClass":"tailscale"}}'
|
|
register: grafana_patch
|
|
changed_when: true
|
|
|
|
- name: Patch Prometheus service for Tailscale
|
|
command: >-
|
|
kubectl -n {{ observability_namespace }} patch svc kube-prometheus-stack-prometheus
|
|
-p '{"metadata":{"annotations":{"tailscale.com/hostname":"prometheus"}},"spec":{"type":"LoadBalancer","loadBalancerClass":"tailscale"}}'
|
|
register: prometheus_patch
|
|
changed_when: true
|
|
|
|
- name: Wait for Tailscale endpoint (IP/hostname) for Grafana
|
|
shell: >-
|
|
kubectl -n {{ observability_namespace }} get svc kube-prometheus-stack-grafana
|
|
-o go-template='{{"{{"}}range .status.loadBalancer.ingress{{"}}"}}{{"{{"}}if .ip{{"}}"}}{{"{{"}}.ip{{"}}"}}{{"{{"}}else{{"}}"}}{{"{{"}}.hostname{{"}}"}}{{"{{"}}end{{"}}"}}{{"{{"}}end{{"}}"}}'
|
|
register: grafana_lb_ip
|
|
until: grafana_lb_ip.stdout | length > 0
|
|
retries: 18
|
|
delay: 10
|
|
changed_when: false
|
|
failed_when: false
|
|
|
|
- name: Wait for Tailscale endpoint (IP/hostname) for Prometheus
|
|
shell: >-
|
|
kubectl -n {{ observability_namespace }} get svc kube-prometheus-stack-prometheus
|
|
-o go-template='{{"{{"}}range .status.loadBalancer.ingress{{"}}"}}{{"{{"}}if .ip{{"}}"}}{{"{{"}}.ip{{"}}"}}{{"{{"}}else{{"}}"}}{{"{{"}}.hostname{{"}}"}}{{"{{"}}end{{"}}"}}{{"{{"}}end{{"}}"}}'
|
|
register: prometheus_lb_ip
|
|
until: prometheus_lb_ip.stdout | length > 0
|
|
retries: 18
|
|
delay: 10
|
|
changed_when: false
|
|
failed_when: false
|
|
|
|
- name: Warn if Tailscale endpoint assignment is still pending
|
|
debug:
|
|
msg: |
|
|
Tailscale service endpoint assignment is still pending.
|
|
Grafana endpoint: {{ grafana_lb_ip.stdout | default('') }}
|
|
Prometheus endpoint: {{ prometheus_lb_ip.stdout | default('') }}
|
|
Deployment continues; services may become reachable shortly.
|
|
when: (grafana_lb_ip.stdout | default('') | length == 0) or (prometheus_lb_ip.stdout | default('') | length == 0)
|
|
|
|
- name: Show Tailscale access details
|
|
debug:
|
|
msg: |
|
|
Observability stack deployed with Tailscale access!
|
|
|
|
Grafana: http://grafana{% if grafana_lb_ip.stdout | default('') | length > 0 %} (or http://{{ grafana_lb_ip.stdout }}){% endif %}
|
|
Prometheus: http://prometheus{% if prometheus_lb_ip.stdout | default('') | length > 0 %} (or http://{{ prometheus_lb_ip.stdout }}){% endif %}
|
|
|
|
Login: admin / {{ grafana_password_effective }}
|
|
|
|
Access via:
|
|
- MagicDNS: http://grafana or http://prometheus (if enabled)
|
|
- Direct endpoint: {% if grafana_lb_ip.stdout | default('') | length > 0 %}http://{{ grafana_lb_ip.stdout }}{% else %}(pending){% endif %} / {% if prometheus_lb_ip.stdout | default('') | length > 0 %}http://{{ prometheus_lb_ip.stdout }}{% else %}(pending){% endif %}
|
|
- Tailnet FQDN: http://grafana.{{ tailscale_tailnet | default('tailnet.ts.net') }}
|
|
|
|
Note: Ensure Tailscale Kubernetes Operator is installed first
|
|
when:
|
|
- tailscale_oauth_client_id | default('') | length > 0
|
|
- tailscale_oauth_client_secret | default('') | length > 0
|
|
|
|
- name: Show observability access details (fallback)
|
|
debug:
|
|
msg: |
|
|
Observability stack deployed.
|
|
Namespace: {{ observability_namespace }}
|
|
Grafana (tailnet): kubectl -n {{ observability_namespace }} port-forward svc/kube-prometheus-stack-grafana 3000:80
|
|
Prometheus (tailnet): kubectl -n {{ observability_namespace }} port-forward svc/kube-prometheus-stack-prometheus 9090:9090
|
|
Grafana admin password: {{ grafana_password_effective }}
|
|
{% if loki_enabled %}
|
|
Loki: Enabled - logs available in Grafana
|
|
{% else %}
|
|
Loki: Disabled
|
|
{% endif %}
|
|
when:
|
|
- tailscale_oauth_client_id | default('') | length == 0 or tailscale_oauth_client_secret | default('') | length == 0
|