diff --git a/.gitea/workflows/deploy.yml b/.gitea/workflows/deploy.yml index 89286ab..58c8f61 100644 --- a/.gitea/workflows/deploy.yml +++ b/.gitea/workflows/deploy.yml @@ -243,6 +243,9 @@ jobs: ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl get storageclass" ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl -n observability get pods -o wide" ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl -n observability get pvc" + ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl -n tailscale-system get pods -o wide" + ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl -n observability get svc kube-prometheus-stack-grafana kube-prometheus-stack-prometheus" + ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl -n observability describe svc kube-prometheus-stack-grafana" env: ANSIBLE_HOST_KEY_CHECKING: "False" diff --git a/README.md b/README.md index 345e0a6..56a8f96 100644 --- a/README.md +++ b/README.md @@ -166,6 +166,8 @@ Set these in your Gitea repository settings (**Settings** → **Secrets** → ** | `S3_BUCKET` | S3 bucket name (e.g., `k8s-terraform-state`) | | `TAILSCALE_AUTH_KEY` | Tailscale auth key for node bootstrap | | `TAILSCALE_TAILNET` | Tailnet domain (e.g., `yourtailnet.ts.net`) | +| `TAILSCALE_OAUTH_CLIENT_ID` | Tailscale OAuth client ID for Kubernetes Operator | +| `TAILSCALE_OAUTH_CLIENT_SECRET` | Tailscale OAuth client secret for Kubernetes Operator | | `GRAFANA_ADMIN_PASSWORD` | Optional admin password for Grafana (auto-generated if unset) | | `RUNNER_ALLOWED_CIDRS` | Optional CIDR list for CI runner access if you choose to pass it via tfvars/secrets | | `SSH_PUBLIC_KEY` | SSH public key content | @@ -179,10 +181,17 @@ The Ansible playbook deploys a lightweight observability stack in the `observabi - `loki` - `promtail` -Services are kept internal for tailnet-first access. +Services are kept internal by default, with optional declarative Tailscale exposure when the Tailscale Kubernetes Operator is healthy. ### Access Grafana and Prometheus +Preferred (when Tailscale Operator is healthy): + +- Grafana: `http://grafana` (or `http://grafana.`) +- Prometheus: `http://prometheus` (or `http://prometheus.`) + +Fallback (port-forward from a tailnet-connected machine): + Run from a tailnet-connected machine: ```bash @@ -200,6 +209,25 @@ Then open: Grafana user: `admin` Grafana password: value of `GRAFANA_ADMIN_PASSWORD` secret (or the generated value shown by Ansible output) +### Verify Tailscale exposure + +```bash +export KUBECONFIG=$(pwd)/outputs/kubeconfig + +kubectl -n tailscale-system get pods +kubectl -n observability get svc kube-prometheus-stack-grafana kube-prometheus-stack-prometheus +kubectl -n observability describe svc kube-prometheus-stack-grafana | grep TailscaleProxyReady +kubectl -n observability describe svc kube-prometheus-stack-prometheus | grep TailscaleProxyReady +``` + +If `TailscaleProxyReady=False`, check: + +```bash +kubectl -n tailscale-system logs deployment/operator --tail=100 +``` + +Common cause: OAuth client missing tag/scopes permissions. + ## File Structure ``` @@ -223,6 +251,7 @@ Grafana password: value of `GRAFANA_ADMIN_PASSWORD` secret (or the generated val │ │ ├── k3s-agent/ │ │ ├── ccm/ │ │ ├── csi/ +│ │ ├── tailscale-operator/ │ │ └── observability/ │ └── ansible.cfg ├── .gitea/ diff --git a/ansible/roles/observability/defaults/main.yml b/ansible/roles/observability/defaults/main.yml index 8df65f3..efa2c44 100644 --- a/ansible/roles/observability/defaults/main.yml +++ b/ansible/roles/observability/defaults/main.yml @@ -20,3 +20,8 @@ loki_enabled: true tailscale_oauth_client_id: "" tailscale_oauth_client_secret: "" tailscale_tailnet: "" + +observability_tailscale_expose: true +grafana_tailscale_hostname: "grafana" +prometheus_tailscale_hostname: "prometheus" +tailscale_proxyclass_name: "infra-stable" diff --git a/ansible/roles/observability/tasks/main.yml b/ansible/roles/observability/tasks/main.yml index 29d37f6..5388790 100644 --- a/ansible/roles/observability/tasks/main.yml +++ b/ansible/roles/observability/tasks/main.yml @@ -156,64 +156,66 @@ changed_when: true when: loki_enabled -- name: Configure Grafana for Tailscale access - block: - - name: Patch Grafana service for Tailscale - command: >- - kubectl -n {{ observability_namespace }} patch svc kube-prometheus-stack-grafana - -p '{"metadata":{"annotations":{"tailscale.com/hostname":"grafana"}},"spec":{"type":"LoadBalancer","loadBalancerClass":"tailscale"}}' - register: grafana_patch - changed_when: true - - - name: Patch Prometheus service for Tailscale - command: >- - kubectl -n {{ observability_namespace }} patch svc kube-prometheus-stack-prometheus - -p '{"metadata":{"annotations":{"tailscale.com/hostname":"prometheus"}},"spec":{"type":"LoadBalancer","loadBalancerClass":"tailscale"}}' - register: prometheus_patch - changed_when: true - - - name: Check Tailscale endpoint (IP/hostname) for Grafana - shell: >- - kubectl -n {{ observability_namespace }} get svc kube-prometheus-stack-grafana - -o go-template='{{"{{"}}range .status.loadBalancer.ingress{{"}}"}}{{"{{"}}if .ip{{"}}"}}{{"{{"}}.ip{{"}}"}}{{"{{"}}else{{"}}"}}{{"{{"}}.hostname{{"}}"}}{{"{{"}}end{{"}}"}}{{"{{"}}end{{"}}"}}' - register: grafana_lb_ip - changed_when: false - failed_when: false - - - name: Check Tailscale endpoint (IP/hostname) for Prometheus - shell: >- - kubectl -n {{ observability_namespace }} get svc kube-prometheus-stack-prometheus - -o go-template='{{"{{"}}range .status.loadBalancer.ingress{{"}}"}}{{"{{"}}if .ip{{"}}"}}{{"{{"}}.ip{{"}}"}}{{"{{"}}else{{"}}"}}{{"{{"}}.hostname{{"}}"}}{{"{{"}}end{{"}}"}}{{"{{"}}end{{"}}"}}' - register: prometheus_lb_ip - changed_when: false - failed_when: false - - - name: Warn if Tailscale endpoint assignment is still pending - debug: - msg: | - Tailscale service endpoint assignment is still pending. - Grafana endpoint: {{ grafana_lb_ip.stdout | default('') }} - Prometheus endpoint: {{ prometheus_lb_ip.stdout | default('') }} - Deployment continues; services may become reachable shortly. - when: (grafana_lb_ip.stdout | default('') | length == 0) or (prometheus_lb_ip.stdout | default('') | length == 0) - - - name: Show Tailscale access details - debug: - msg: | - Observability stack deployed with Tailscale access! - - Grafana: http://grafana{% if grafana_lb_ip.stdout | default('') | length > 0 %} (or http://{{ grafana_lb_ip.stdout }}){% endif %} - Prometheus: http://prometheus{% if prometheus_lb_ip.stdout | default('') | length > 0 %} (or http://{{ prometheus_lb_ip.stdout }}){% endif %} - - Login: admin / {{ grafana_password_effective }} - - Access via: - - MagicDNS: http://grafana or http://prometheus (if enabled) - - Direct endpoint: {% if grafana_lb_ip.stdout | default('') | length > 0 %}http://{{ grafana_lb_ip.stdout }}{% else %}(pending){% endif %} / {% if prometheus_lb_ip.stdout | default('') | length > 0 %}http://{{ prometheus_lb_ip.stdout }}{% else %}(pending){% endif %} - - Tailnet FQDN: http://grafana.{{ tailscale_tailnet | default('tailnet.ts.net') }} - - Note: Ensure Tailscale Kubernetes Operator is installed first +- name: Check Tailscale service readiness for Grafana + command: kubectl -n {{ observability_namespace }} get svc kube-prometheus-stack-grafana -o jsonpath='{.status.conditions[?(@.type=="TailscaleProxyReady")].status}' + register: grafana_tailscale_ready + changed_when: false + failed_when: false when: + - observability_tailscale_expose | bool + - tailscale_operator_ready | default(false) | bool + +- name: Check Tailscale service readiness for Prometheus + command: kubectl -n {{ observability_namespace }} get svc kube-prometheus-stack-prometheus -o jsonpath='{.status.conditions[?(@.type=="TailscaleProxyReady")].status}' + register: prometheus_tailscale_ready + changed_when: false + failed_when: false + when: + - observability_tailscale_expose | bool + - tailscale_operator_ready | default(false) | bool + +- name: Check Tailscale endpoint (IP/hostname) for Grafana + shell: >- + kubectl -n {{ observability_namespace }} get svc kube-prometheus-stack-grafana + -o go-template='{{"{{"}}range .status.loadBalancer.ingress{{"}}"}}{{"{{"}}if .ip{{"}}"}}{{"{{"}}.ip{{"}}"}}{{"{{"}}else{{"}}"}}{{"{{"}}.hostname{{"}}"}}{{"{{"}}end{{"}}"}}{{"{{"}}end{{"}}"}}' + register: grafana_lb_ip + changed_when: false + failed_when: false + when: + - observability_tailscale_expose | bool + - tailscale_operator_ready | default(false) | bool + +- name: Check Tailscale endpoint (IP/hostname) for Prometheus + shell: >- + kubectl -n {{ observability_namespace }} get svc kube-prometheus-stack-prometheus + -o go-template='{{"{{"}}range .status.loadBalancer.ingress{{"}}"}}{{"{{"}}if .ip{{"}}"}}{{"{{"}}.ip{{"}}"}}{{"{{"}}else{{"}}"}}{{"{{"}}.hostname{{"}}"}}{{"{{"}}end{{"}}"}}{{"{{"}}end{{"}}"}}' + register: prometheus_lb_ip + changed_when: false + failed_when: false + when: + - observability_tailscale_expose | bool + - tailscale_operator_ready | default(false) | bool + +- name: Show Tailscale access details + debug: + msg: | + Observability stack deployed with Tailscale access! + + Grafana: http://{{ grafana_tailscale_hostname }}{% if grafana_lb_ip.stdout | default('') | length > 0 %} (or http://{{ grafana_lb_ip.stdout }}){% endif %} + Prometheus: http://{{ prometheus_tailscale_hostname }}{% if prometheus_lb_ip.stdout | default('') | length > 0 %} (or http://{{ prometheus_lb_ip.stdout }}){% endif %} + + Login: admin / {{ grafana_password_effective }} + + Tailscale readiness: + - Grafana proxy ready: {{ grafana_tailscale_ready.stdout | default('pending') }} + - Prometheus proxy ready: {{ prometheus_tailscale_ready.stdout | default('pending') }} + + Access via: + - MagicDNS: http://{{ grafana_tailscale_hostname }} and http://{{ prometheus_tailscale_hostname }} + - Tailnet FQDN: http://{{ grafana_tailscale_hostname }}.{{ tailscale_tailnet | default('tailnet.ts.net') }} + - Direct endpoint: {% if grafana_lb_ip.stdout | default('') | length > 0 %}http://{{ grafana_lb_ip.stdout }}{% else %}(pending){% endif %} / {% if prometheus_lb_ip.stdout | default('') | length > 0 %}http://{{ prometheus_lb_ip.stdout }}{% else %}(pending){% endif %} + when: + - observability_tailscale_expose | bool - tailscale_operator_ready | default(false) | bool - name: Show observability access details (fallback) @@ -230,4 +232,4 @@ Loki: Disabled {% endif %} when: - - not (tailscale_operator_ready | default(false) | bool) + - not (observability_tailscale_expose | bool and (tailscale_operator_ready | default(false) | bool)) diff --git a/ansible/roles/observability/templates/kube-prometheus-stack-values.yaml.j2 b/ansible/roles/observability/templates/kube-prometheus-stack-values.yaml.j2 index 5388e98..7091c80 100644 --- a/ansible/roles/observability/templates/kube-prometheus-stack-values.yaml.j2 +++ b/ansible/roles/observability/templates/kube-prometheus-stack-values.yaml.j2 @@ -6,8 +6,26 @@ grafana: storageClassName: {{ grafana_storage_class }} size: {{ grafana_storage_size }} service: +{% if observability_tailscale_expose and (tailscale_operator_ready | default(false)) %} + type: LoadBalancer + loadBalancerClass: tailscale + annotations: + tailscale.com/hostname: {{ grafana_tailscale_hostname }} + tailscale.com/proxy-class: {{ tailscale_proxyclass_name }} +{% else %} type: ClusterIP +{% endif %} prometheus: + service: +{% if observability_tailscale_expose and (tailscale_operator_ready | default(false)) %} + type: LoadBalancer + loadBalancerClass: tailscale + annotations: + tailscale.com/hostname: {{ prometheus_tailscale_hostname }} + tailscale.com/proxy-class: {{ tailscale_proxyclass_name }} +{% else %} + type: ClusterIP +{% endif %} prometheusSpec: retention: 7d storageSpec: diff --git a/ansible/roles/tailscale-operator/defaults/main.yml b/ansible/roles/tailscale-operator/defaults/main.yml index 09f7341..13dd1a9 100644 --- a/ansible/roles/tailscale-operator/defaults/main.yml +++ b/ansible/roles/tailscale-operator/defaults/main.yml @@ -8,6 +8,8 @@ tailscale_oauth_client_secret: "" tailscale_operator_default_tags: - "tag:k8s" +tailscale_proxyclass_name: "infra-stable" + tailscale_operator_required: false tailscale_operator_node_selector: diff --git a/ansible/roles/tailscale-operator/tasks/main.yml b/ansible/roles/tailscale-operator/tasks/main.yml index 33eaa06..61225d7 100644 --- a/ansible/roles/tailscale-operator/tasks/main.yml +++ b/ansible/roles/tailscale-operator/tasks/main.yml @@ -157,3 +157,15 @@ {{ tailscale_rollout.stdout | default('') }} {{ tailscale_deploy.stdout | default('') }} when: tailscale_rollout.rc != 0 + +- name: Write Tailscale default ProxyClass manifest + template: + src: proxyclass.yaml.j2 + dest: /tmp/tailscale-proxyclass.yaml + mode: "0644" + when: tailscale_operator_ready | default(false) | bool + +- name: Apply Tailscale default ProxyClass + command: kubectl apply -f /tmp/tailscale-proxyclass.yaml + changed_when: true + when: tailscale_operator_ready | default(false) | bool diff --git a/ansible/roles/tailscale-operator/templates/operator-values.yaml.j2 b/ansible/roles/tailscale-operator/templates/operator-values.yaml.j2 index 2079211..53dd362 100644 --- a/ansible/roles/tailscale-operator/templates/operator-values.yaml.j2 +++ b/ansible/roles/tailscale-operator/templates/operator-values.yaml.j2 @@ -21,3 +21,4 @@ installCRDs: true proxyConfig: defaultTags: "{{ tailscale_operator_default_tags | join(',') }}" + defaultProxyClass: "{{ tailscale_proxyclass_name }}" diff --git a/ansible/roles/tailscale-operator/templates/proxyclass.yaml.j2 b/ansible/roles/tailscale-operator/templates/proxyclass.yaml.j2 new file mode 100644 index 0000000..530f9e1 --- /dev/null +++ b/ansible/roles/tailscale-operator/templates/proxyclass.yaml.j2 @@ -0,0 +1,17 @@ +apiVersion: tailscale.com/v1alpha1 +kind: ProxyClass +metadata: + name: {{ tailscale_proxyclass_name }} +spec: + statefulSet: + pod: + nodeSelector: +{% for key, value in tailscale_operator_node_selector.items() %} + {{ key }}: "{{ value }}" +{% endfor %} + tolerations: +{% for tol in tailscale_operator_tolerations %} + - key: "{{ tol.key }}" + operator: "{{ tol.operator }}" + effect: "{{ tol.effect }}" +{% endfor %}