Files
HetznerTerra/scripts/smoke-check-tailnet-services.sh
micqdf 7385c2263e
Deploy Cluster / Terraform (push) Successful in 49s
Deploy Cluster / Ansible (push) Successful in 5m55s
fix: add tailnet smoke checks and move Tailscale operator to stable
Add a post-deploy smoke test that validates Tailscale DNS, proxy readiness,
reachability, and service responses for Rancher, Grafana, and Prometheus.
Move the operator to the stable Helm repo/version and align the baseline docs
with the current HA private-only architecture.
2026-04-18 19:59:13 +00:00

85 lines
2.2 KiB
Bash

#!/usr/bin/env bash
set -euo pipefail
retry() {
local attempts="$1"
local delay_seconds="$2"
shift 2
local attempt=1
until "$@"; do
if [ "$attempt" -ge "$attempts" ]; then
return 1
fi
sleep "$delay_seconds"
attempt=$((attempt + 1))
done
}
service_proxy_ready() {
local namespace="$1"
local service_name="$2"
kubectl get svc "$service_name" -n "$namespace" \
-o jsonpath='{.status.conditions[?(@.type=="TailscaleProxyReady")].status}' 2>/dev/null \
| grep -qx 'True'
}
assigned_hostname_matches() {
local namespace="$1"
local service_name="$2"
local expected_hostname="$3"
kubectl get svc "$service_name" -n "$namespace" \
-o jsonpath='{.status.loadBalancer.ingress[0].hostname}' 2>/dev/null \
| grep -qx "$expected_hostname"
}
dns_resolves() {
local hostname="$1"
getent hosts "$hostname" >/dev/null 2>&1
}
tailscale_ping_succeeds() {
local hostname="$1"
tailscale ping -c 1 "$hostname" >/dev/null 2>&1
}
http_status_is_expected() {
local url="$1"
local status
status="$(curl -skS -o /dev/null -w '%{http_code}' --max-time 15 "$url" || true)"
case "$status" in
200|301|302|401|403)
return 0
;;
*)
echo "Unexpected HTTP status for $url: $status" >&2
return 1
;;
esac
}
check_service() {
local namespace="$1"
local service_name="$2"
local hostname="$3"
local url="$4"
echo "Checking $namespace/$service_name -> $hostname"
retry 18 10 service_proxy_ready "$namespace" "$service_name"
retry 18 10 assigned_hostname_matches "$namespace" "$service_name" "$hostname"
retry 18 10 dns_resolves "$hostname"
retry 18 10 tailscale_ping_succeeds "$hostname"
retry 18 10 http_status_is_expected "$url"
echo "Resolved hostname: $(getent hosts "$hostname" | awk '{print $1}' | head -1)"
echo "HTTP status OK for $url"
}
check_service "cattle-system" "rancher-tailscale" "rancher.silverside-gopher.ts.net" "https://rancher.silverside-gopher.ts.net/"
check_service "observability" "grafana-tailscale" "grafana.silverside-gopher.ts.net" "http://grafana.silverside-gopher.ts.net/"
check_service "observability" "prometheus-tailscale" "prometheus.silverside-gopher.ts.net" "http://prometheus.silverside-gopher.ts.net:9090/"