diff --git a/scripts/smoke-check-tailnet-services.sh b/scripts/smoke-check-tailnet-services.sh index 50c6282..6409090 100644 --- a/scripts/smoke-check-tailnet-services.sh +++ b/scripts/smoke-check-tailnet-services.sh @@ -22,6 +22,29 @@ retry() { done } +restart_unhealthy_tailscale_proxies() { + local unhealthy_pods + unhealthy_pods="$(mktemp)" + + "${KUBECTL[@]}" -n tailscale-system get pods -l tailscale.com/managed=true --no-headers \ + | grep -Ev "[[:space:]](Running|Completed)[[:space:]]" \ + | awk '{print $1}' >"${unhealthy_pods}" || true + + if [ ! -s "${unhealthy_pods}" ]; then + rm -f "${unhealthy_pods}" + return 0 + fi + + echo "Restarting unhealthy Tailscale-managed proxy pods before smoke checks" + while read -r pod; do + "${KUBECTL[@]}" -n tailscale-system delete pod "${pod}" --wait=false + done <"${unhealthy_pods}" + rm -f "${unhealthy_pods}" + + sleep 30 + "${KUBECTL[@]}" -n tailscale-system wait --for=condition=Ready pod -l tailscale.com/managed=true --timeout=600s +} + service_proxy_ready() { local namespace="$1" local service_name="$2" @@ -85,6 +108,8 @@ check_service() { echo "HTTP status OK for $url" } +restart_unhealthy_tailscale_proxies + check_service "cattle-system" "rancher-tailscale" "rancher.silverside-gopher.ts.net" "https://rancher.silverside-gopher.ts.net/" check_service "observability" "grafana-tailscale" "grafana.silverside-gopher.ts.net" "http://grafana.silverside-gopher.ts.net/" check_service "observability" "prometheus-tailscale" "prometheus.silverside-gopher.ts.net" "http://prometheus.silverside-gopher.ts.net:9090/"