feat: stabilize tailscale observability exposure with declarative proxy class
All checks were successful
Deploy Cluster / Terraform (push) Successful in 54s
Deploy Cluster / Ansible (push) Successful in 22m19s

This commit is contained in:
2026-03-04 01:37:00 +00:00
parent 28eaa36ec4
commit 1c39274df7
9 changed files with 148 additions and 59 deletions

View File

@@ -243,6 +243,9 @@ jobs:
ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl get storageclass" ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl get storageclass"
ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl -n observability get pods -o wide" ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl -n observability get pods -o wide"
ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl -n observability get pvc" ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl -n observability get pvc"
ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl -n tailscale-system get pods -o wide"
ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl -n observability get svc kube-prometheus-stack-grafana kube-prometheus-stack-prometheus"
ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl -n observability describe svc kube-prometheus-stack-grafana"
env: env:
ANSIBLE_HOST_KEY_CHECKING: "False" ANSIBLE_HOST_KEY_CHECKING: "False"

View File

@@ -166,6 +166,8 @@ Set these in your Gitea repository settings (**Settings** → **Secrets** → **
| `S3_BUCKET` | S3 bucket name (e.g., `k8s-terraform-state`) | | `S3_BUCKET` | S3 bucket name (e.g., `k8s-terraform-state`) |
| `TAILSCALE_AUTH_KEY` | Tailscale auth key for node bootstrap | | `TAILSCALE_AUTH_KEY` | Tailscale auth key for node bootstrap |
| `TAILSCALE_TAILNET` | Tailnet domain (e.g., `yourtailnet.ts.net`) | | `TAILSCALE_TAILNET` | Tailnet domain (e.g., `yourtailnet.ts.net`) |
| `TAILSCALE_OAUTH_CLIENT_ID` | Tailscale OAuth client ID for Kubernetes Operator |
| `TAILSCALE_OAUTH_CLIENT_SECRET` | Tailscale OAuth client secret for Kubernetes Operator |
| `GRAFANA_ADMIN_PASSWORD` | Optional admin password for Grafana (auto-generated if unset) | | `GRAFANA_ADMIN_PASSWORD` | Optional admin password for Grafana (auto-generated if unset) |
| `RUNNER_ALLOWED_CIDRS` | Optional CIDR list for CI runner access if you choose to pass it via tfvars/secrets | | `RUNNER_ALLOWED_CIDRS` | Optional CIDR list for CI runner access if you choose to pass it via tfvars/secrets |
| `SSH_PUBLIC_KEY` | SSH public key content | | `SSH_PUBLIC_KEY` | SSH public key content |
@@ -179,10 +181,17 @@ The Ansible playbook deploys a lightweight observability stack in the `observabi
- `loki` - `loki`
- `promtail` - `promtail`
Services are kept internal for tailnet-first access. Services are kept internal by default, with optional declarative Tailscale exposure when the Tailscale Kubernetes Operator is healthy.
### Access Grafana and Prometheus ### Access Grafana and Prometheus
Preferred (when Tailscale Operator is healthy):
- Grafana: `http://grafana` (or `http://grafana.<your-tailnet>`)
- Prometheus: `http://prometheus` (or `http://prometheus.<your-tailnet>`)
Fallback (port-forward from a tailnet-connected machine):
Run from a tailnet-connected machine: Run from a tailnet-connected machine:
```bash ```bash
@@ -200,6 +209,25 @@ Then open:
Grafana user: `admin` Grafana user: `admin`
Grafana password: value of `GRAFANA_ADMIN_PASSWORD` secret (or the generated value shown by Ansible output) Grafana password: value of `GRAFANA_ADMIN_PASSWORD` secret (or the generated value shown by Ansible output)
### Verify Tailscale exposure
```bash
export KUBECONFIG=$(pwd)/outputs/kubeconfig
kubectl -n tailscale-system get pods
kubectl -n observability get svc kube-prometheus-stack-grafana kube-prometheus-stack-prometheus
kubectl -n observability describe svc kube-prometheus-stack-grafana | grep TailscaleProxyReady
kubectl -n observability describe svc kube-prometheus-stack-prometheus | grep TailscaleProxyReady
```
If `TailscaleProxyReady=False`, check:
```bash
kubectl -n tailscale-system logs deployment/operator --tail=100
```
Common cause: OAuth client missing tag/scopes permissions.
## File Structure ## File Structure
``` ```
@@ -223,6 +251,7 @@ Grafana password: value of `GRAFANA_ADMIN_PASSWORD` secret (or the generated val
│ │ ├── k3s-agent/ │ │ ├── k3s-agent/
│ │ ├── ccm/ │ │ ├── ccm/
│ │ ├── csi/ │ │ ├── csi/
│ │ ├── tailscale-operator/
│ │ └── observability/ │ │ └── observability/
│ └── ansible.cfg │ └── ansible.cfg
├── .gitea/ ├── .gitea/

View File

@@ -20,3 +20,8 @@ loki_enabled: true
tailscale_oauth_client_id: "" tailscale_oauth_client_id: ""
tailscale_oauth_client_secret: "" tailscale_oauth_client_secret: ""
tailscale_tailnet: "" tailscale_tailnet: ""
observability_tailscale_expose: true
grafana_tailscale_hostname: "grafana"
prometheus_tailscale_hostname: "prometheus"
tailscale_proxyclass_name: "infra-stable"

View File

@@ -156,64 +156,66 @@
changed_when: true changed_when: true
when: loki_enabled when: loki_enabled
- name: Configure Grafana for Tailscale access - name: Check Tailscale service readiness for Grafana
block: command: kubectl -n {{ observability_namespace }} get svc kube-prometheus-stack-grafana -o jsonpath='{.status.conditions[?(@.type=="TailscaleProxyReady")].status}'
- name: Patch Grafana service for Tailscale register: grafana_tailscale_ready
command: >- changed_when: false
kubectl -n {{ observability_namespace }} patch svc kube-prometheus-stack-grafana failed_when: false
-p '{"metadata":{"annotations":{"tailscale.com/hostname":"grafana"}},"spec":{"type":"LoadBalancer","loadBalancerClass":"tailscale"}}'
register: grafana_patch
changed_when: true
- name: Patch Prometheus service for Tailscale
command: >-
kubectl -n {{ observability_namespace }} patch svc kube-prometheus-stack-prometheus
-p '{"metadata":{"annotations":{"tailscale.com/hostname":"prometheus"}},"spec":{"type":"LoadBalancer","loadBalancerClass":"tailscale"}}'
register: prometheus_patch
changed_when: true
- name: Check Tailscale endpoint (IP/hostname) for Grafana
shell: >-
kubectl -n {{ observability_namespace }} get svc kube-prometheus-stack-grafana
-o go-template='{{"{{"}}range .status.loadBalancer.ingress{{"}}"}}{{"{{"}}if .ip{{"}}"}}{{"{{"}}.ip{{"}}"}}{{"{{"}}else{{"}}"}}{{"{{"}}.hostname{{"}}"}}{{"{{"}}end{{"}}"}}{{"{{"}}end{{"}}"}}'
register: grafana_lb_ip
changed_when: false
failed_when: false
- name: Check Tailscale endpoint (IP/hostname) for Prometheus
shell: >-
kubectl -n {{ observability_namespace }} get svc kube-prometheus-stack-prometheus
-o go-template='{{"{{"}}range .status.loadBalancer.ingress{{"}}"}}{{"{{"}}if .ip{{"}}"}}{{"{{"}}.ip{{"}}"}}{{"{{"}}else{{"}}"}}{{"{{"}}.hostname{{"}}"}}{{"{{"}}end{{"}}"}}{{"{{"}}end{{"}}"}}'
register: prometheus_lb_ip
changed_when: false
failed_when: false
- name: Warn if Tailscale endpoint assignment is still pending
debug:
msg: |
Tailscale service endpoint assignment is still pending.
Grafana endpoint: {{ grafana_lb_ip.stdout | default('') }}
Prometheus endpoint: {{ prometheus_lb_ip.stdout | default('') }}
Deployment continues; services may become reachable shortly.
when: (grafana_lb_ip.stdout | default('') | length == 0) or (prometheus_lb_ip.stdout | default('') | length == 0)
- name: Show Tailscale access details
debug:
msg: |
Observability stack deployed with Tailscale access!
Grafana: http://grafana{% if grafana_lb_ip.stdout | default('') | length > 0 %} (or http://{{ grafana_lb_ip.stdout }}){% endif %}
Prometheus: http://prometheus{% if prometheus_lb_ip.stdout | default('') | length > 0 %} (or http://{{ prometheus_lb_ip.stdout }}){% endif %}
Login: admin / {{ grafana_password_effective }}
Access via:
- MagicDNS: http://grafana or http://prometheus (if enabled)
- Direct endpoint: {% if grafana_lb_ip.stdout | default('') | length > 0 %}http://{{ grafana_lb_ip.stdout }}{% else %}(pending){% endif %} / {% if prometheus_lb_ip.stdout | default('') | length > 0 %}http://{{ prometheus_lb_ip.stdout }}{% else %}(pending){% endif %}
- Tailnet FQDN: http://grafana.{{ tailscale_tailnet | default('tailnet.ts.net') }}
Note: Ensure Tailscale Kubernetes Operator is installed first
when: when:
- observability_tailscale_expose | bool
- tailscale_operator_ready | default(false) | bool
- name: Check Tailscale service readiness for Prometheus
command: kubectl -n {{ observability_namespace }} get svc kube-prometheus-stack-prometheus -o jsonpath='{.status.conditions[?(@.type=="TailscaleProxyReady")].status}'
register: prometheus_tailscale_ready
changed_when: false
failed_when: false
when:
- observability_tailscale_expose | bool
- tailscale_operator_ready | default(false) | bool
- name: Check Tailscale endpoint (IP/hostname) for Grafana
shell: >-
kubectl -n {{ observability_namespace }} get svc kube-prometheus-stack-grafana
-o go-template='{{"{{"}}range .status.loadBalancer.ingress{{"}}"}}{{"{{"}}if .ip{{"}}"}}{{"{{"}}.ip{{"}}"}}{{"{{"}}else{{"}}"}}{{"{{"}}.hostname{{"}}"}}{{"{{"}}end{{"}}"}}{{"{{"}}end{{"}}"}}'
register: grafana_lb_ip
changed_when: false
failed_when: false
when:
- observability_tailscale_expose | bool
- tailscale_operator_ready | default(false) | bool
- name: Check Tailscale endpoint (IP/hostname) for Prometheus
shell: >-
kubectl -n {{ observability_namespace }} get svc kube-prometheus-stack-prometheus
-o go-template='{{"{{"}}range .status.loadBalancer.ingress{{"}}"}}{{"{{"}}if .ip{{"}}"}}{{"{{"}}.ip{{"}}"}}{{"{{"}}else{{"}}"}}{{"{{"}}.hostname{{"}}"}}{{"{{"}}end{{"}}"}}{{"{{"}}end{{"}}"}}'
register: prometheus_lb_ip
changed_when: false
failed_when: false
when:
- observability_tailscale_expose | bool
- tailscale_operator_ready | default(false) | bool
- name: Show Tailscale access details
debug:
msg: |
Observability stack deployed with Tailscale access!
Grafana: http://{{ grafana_tailscale_hostname }}{% if grafana_lb_ip.stdout | default('') | length > 0 %} (or http://{{ grafana_lb_ip.stdout }}){% endif %}
Prometheus: http://{{ prometheus_tailscale_hostname }}{% if prometheus_lb_ip.stdout | default('') | length > 0 %} (or http://{{ prometheus_lb_ip.stdout }}){% endif %}
Login: admin / {{ grafana_password_effective }}
Tailscale readiness:
- Grafana proxy ready: {{ grafana_tailscale_ready.stdout | default('pending') }}
- Prometheus proxy ready: {{ prometheus_tailscale_ready.stdout | default('pending') }}
Access via:
- MagicDNS: http://{{ grafana_tailscale_hostname }} and http://{{ prometheus_tailscale_hostname }}
- Tailnet FQDN: http://{{ grafana_tailscale_hostname }}.{{ tailscale_tailnet | default('tailnet.ts.net') }}
- Direct endpoint: {% if grafana_lb_ip.stdout | default('') | length > 0 %}http://{{ grafana_lb_ip.stdout }}{% else %}(pending){% endif %} / {% if prometheus_lb_ip.stdout | default('') | length > 0 %}http://{{ prometheus_lb_ip.stdout }}{% else %}(pending){% endif %}
when:
- observability_tailscale_expose | bool
- tailscale_operator_ready | default(false) | bool - tailscale_operator_ready | default(false) | bool
- name: Show observability access details (fallback) - name: Show observability access details (fallback)
@@ -230,4 +232,4 @@
Loki: Disabled Loki: Disabled
{% endif %} {% endif %}
when: when:
- not (tailscale_operator_ready | default(false) | bool) - not (observability_tailscale_expose | bool and (tailscale_operator_ready | default(false) | bool))

View File

@@ -6,8 +6,26 @@ grafana:
storageClassName: {{ grafana_storage_class }} storageClassName: {{ grafana_storage_class }}
size: {{ grafana_storage_size }} size: {{ grafana_storage_size }}
service: service:
{% if observability_tailscale_expose and (tailscale_operator_ready | default(false)) %}
type: LoadBalancer
loadBalancerClass: tailscale
annotations:
tailscale.com/hostname: {{ grafana_tailscale_hostname }}
tailscale.com/proxy-class: {{ tailscale_proxyclass_name }}
{% else %}
type: ClusterIP type: ClusterIP
{% endif %}
prometheus: prometheus:
service:
{% if observability_tailscale_expose and (tailscale_operator_ready | default(false)) %}
type: LoadBalancer
loadBalancerClass: tailscale
annotations:
tailscale.com/hostname: {{ prometheus_tailscale_hostname }}
tailscale.com/proxy-class: {{ tailscale_proxyclass_name }}
{% else %}
type: ClusterIP
{% endif %}
prometheusSpec: prometheusSpec:
retention: 7d retention: 7d
storageSpec: storageSpec:

View File

@@ -8,6 +8,8 @@ tailscale_oauth_client_secret: ""
tailscale_operator_default_tags: tailscale_operator_default_tags:
- "tag:k8s" - "tag:k8s"
tailscale_proxyclass_name: "infra-stable"
tailscale_operator_required: false tailscale_operator_required: false
tailscale_operator_node_selector: tailscale_operator_node_selector:

View File

@@ -157,3 +157,15 @@
{{ tailscale_rollout.stdout | default('') }} {{ tailscale_rollout.stdout | default('') }}
{{ tailscale_deploy.stdout | default('') }} {{ tailscale_deploy.stdout | default('') }}
when: tailscale_rollout.rc != 0 when: tailscale_rollout.rc != 0
- name: Write Tailscale default ProxyClass manifest
template:
src: proxyclass.yaml.j2
dest: /tmp/tailscale-proxyclass.yaml
mode: "0644"
when: tailscale_operator_ready | default(false) | bool
- name: Apply Tailscale default ProxyClass
command: kubectl apply -f /tmp/tailscale-proxyclass.yaml
changed_when: true
when: tailscale_operator_ready | default(false) | bool

View File

@@ -21,3 +21,4 @@ installCRDs: true
proxyConfig: proxyConfig:
defaultTags: "{{ tailscale_operator_default_tags | join(',') }}" defaultTags: "{{ tailscale_operator_default_tags | join(',') }}"
defaultProxyClass: "{{ tailscale_proxyclass_name }}"

View File

@@ -0,0 +1,17 @@
apiVersion: tailscale.com/v1alpha1
kind: ProxyClass
metadata:
name: {{ tailscale_proxyclass_name }}
spec:
statefulSet:
pod:
nodeSelector:
{% for key, value in tailscale_operator_node_selector.items() %}
{{ key }}: "{{ value }}"
{% endfor %}
tolerations:
{% for tol in tailscale_operator_tolerations %}
- key: "{{ tol.key }}"
operator: "{{ tol.operator }}"
effect: "{{ tol.effect }}"
{% endfor %}