2026-04-18 19:59:13 +00:00
|
|
|
#!/usr/bin/env bash
|
|
|
|
|
set -euo pipefail
|
|
|
|
|
|
2026-04-26 03:09:18 +00:00
|
|
|
if kubectl get --raw=/readyz >/dev/null 2>&1; then
|
|
|
|
|
KUBECTL=(kubectl)
|
|
|
|
|
else
|
|
|
|
|
KUBECTL=(sudo k3s kubectl)
|
|
|
|
|
fi
|
|
|
|
|
|
2026-04-18 19:59:13 +00:00
|
|
|
retry() {
|
|
|
|
|
local attempts="$1"
|
|
|
|
|
local delay_seconds="$2"
|
|
|
|
|
shift 2
|
|
|
|
|
|
|
|
|
|
local attempt=1
|
|
|
|
|
until "$@"; do
|
|
|
|
|
if [ "$attempt" -ge "$attempts" ]; then
|
|
|
|
|
return 1
|
|
|
|
|
fi
|
|
|
|
|
sleep "$delay_seconds"
|
|
|
|
|
attempt=$((attempt + 1))
|
|
|
|
|
done
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-03 23:02:04 +00:00
|
|
|
restart_unhealthy_tailscale_proxies() {
|
|
|
|
|
local unhealthy_pods
|
|
|
|
|
unhealthy_pods="$(mktemp)"
|
|
|
|
|
|
|
|
|
|
"${KUBECTL[@]}" -n tailscale-system get pods -l tailscale.com/managed=true --no-headers \
|
|
|
|
|
| grep -Ev "[[:space:]](Running|Completed)[[:space:]]" \
|
|
|
|
|
| awk '{print $1}' >"${unhealthy_pods}" || true
|
|
|
|
|
|
|
|
|
|
if [ ! -s "${unhealthy_pods}" ]; then
|
|
|
|
|
rm -f "${unhealthy_pods}"
|
|
|
|
|
return 0
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
echo "Restarting unhealthy Tailscale-managed proxy pods before smoke checks"
|
|
|
|
|
while read -r pod; do
|
|
|
|
|
"${KUBECTL[@]}" -n tailscale-system delete pod "${pod}" --wait=false
|
|
|
|
|
done <"${unhealthy_pods}"
|
|
|
|
|
rm -f "${unhealthy_pods}"
|
|
|
|
|
|
|
|
|
|
sleep 30
|
|
|
|
|
"${KUBECTL[@]}" -n tailscale-system wait --for=condition=Ready pod -l tailscale.com/managed=true --timeout=600s
|
|
|
|
|
}
|
|
|
|
|
|
2026-04-18 19:59:13 +00:00
|
|
|
service_proxy_ready() {
|
|
|
|
|
local namespace="$1"
|
|
|
|
|
local service_name="$2"
|
|
|
|
|
|
2026-04-26 03:09:18 +00:00
|
|
|
"${KUBECTL[@]}" get svc "$service_name" -n "$namespace" \
|
2026-04-18 19:59:13 +00:00
|
|
|
-o jsonpath='{.status.conditions[?(@.type=="TailscaleProxyReady")].status}' 2>/dev/null \
|
|
|
|
|
| grep -qx 'True'
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
assigned_hostname_matches() {
|
|
|
|
|
local namespace="$1"
|
|
|
|
|
local service_name="$2"
|
|
|
|
|
local expected_hostname="$3"
|
|
|
|
|
|
2026-04-26 03:09:18 +00:00
|
|
|
"${KUBECTL[@]}" get svc "$service_name" -n "$namespace" \
|
2026-04-18 19:59:13 +00:00
|
|
|
-o jsonpath='{.status.loadBalancer.ingress[0].hostname}' 2>/dev/null \
|
|
|
|
|
| grep -qx "$expected_hostname"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
dns_resolves() {
|
|
|
|
|
local hostname="$1"
|
|
|
|
|
getent hosts "$hostname" >/dev/null 2>&1
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
tailscale_ping_succeeds() {
|
|
|
|
|
local hostname="$1"
|
2026-04-26 03:09:18 +00:00
|
|
|
timeout 20s tailscale ping -c 1 "$hostname" >/dev/null 2>&1
|
2026-04-18 19:59:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
http_status_is_expected() {
|
|
|
|
|
local url="$1"
|
|
|
|
|
local status
|
|
|
|
|
|
|
|
|
|
status="$(curl -skS -o /dev/null -w '%{http_code}' --max-time 15 "$url" || true)"
|
|
|
|
|
|
|
|
|
|
case "$status" in
|
|
|
|
|
200|301|302|401|403)
|
|
|
|
|
return 0
|
|
|
|
|
;;
|
|
|
|
|
*)
|
|
|
|
|
echo "Unexpected HTTP status for $url: $status" >&2
|
|
|
|
|
return 1
|
|
|
|
|
;;
|
|
|
|
|
esac
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
check_service() {
|
|
|
|
|
local namespace="$1"
|
|
|
|
|
local service_name="$2"
|
|
|
|
|
local hostname="$3"
|
|
|
|
|
local url="$4"
|
|
|
|
|
|
|
|
|
|
echo "Checking $namespace/$service_name -> $hostname"
|
|
|
|
|
retry 18 10 service_proxy_ready "$namespace" "$service_name"
|
|
|
|
|
retry 18 10 assigned_hostname_matches "$namespace" "$service_name" "$hostname"
|
|
|
|
|
retry 18 10 dns_resolves "$hostname"
|
|
|
|
|
retry 18 10 tailscale_ping_succeeds "$hostname"
|
|
|
|
|
retry 18 10 http_status_is_expected "$url"
|
|
|
|
|
|
|
|
|
|
echo "Resolved hostname: $(getent hosts "$hostname" | awk '{print $1}' | head -1)"
|
|
|
|
|
echo "HTTP status OK for $url"
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-03 23:02:04 +00:00
|
|
|
restart_unhealthy_tailscale_proxies
|
|
|
|
|
|
2026-04-18 19:59:13 +00:00
|
|
|
check_service "cattle-system" "rancher-tailscale" "rancher.silverside-gopher.ts.net" "https://rancher.silverside-gopher.ts.net/"
|
|
|
|
|
check_service "observability" "grafana-tailscale" "grafana.silverside-gopher.ts.net" "http://grafana.silverside-gopher.ts.net/"
|
|
|
|
|
check_service "observability" "prometheus-tailscale" "prometheus.silverside-gopher.ts.net" "http://prometheus.silverside-gopher.ts.net:9090/"
|