From 6b9a77aae734454b6bdac03c7d9382eb8c04ce1b Mon Sep 17 00:00:00 2001 From: MichaelFisher1997 Date: Mon, 4 May 2026 06:20:26 +0000 Subject: [PATCH] fix: recover tailscale operator during smoke checks --- scripts/smoke-check-tailnet-services.sh | 31 ++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/scripts/smoke-check-tailnet-services.sh b/scripts/smoke-check-tailnet-services.sh index 6db62d6..ec1c9e8 100644 --- a/scripts/smoke-check-tailnet-services.sh +++ b/scripts/smoke-check-tailnet-services.sh @@ -22,6 +22,24 @@ retry() { done } +restart_tailscale_operator() { + if ! "${KUBECTL[@]}" -n tailscale-system get deployment/operator >/dev/null 2>&1; then + return 0 + fi + + if "${KUBECTL[@]}" -n tailscale-system rollout status deployment/operator --timeout=60s; then + return 0 + fi + + echo "Restarting unhealthy Tailscale operator before smoke checks" + "${KUBECTL[@]}" -n tailscale-system delete pod -l app=operator --wait=false + if ! "${KUBECTL[@]}" -n tailscale-system rollout status deployment/operator --timeout=600s; then + "${KUBECTL[@]}" -n tailscale-system get pods -o wide >&2 || true + "${KUBECTL[@]}" -n tailscale-system logs deployment/operator --tail=100 >&2 || true + return 1 + fi +} + restart_unhealthy_tailscale_proxies() { local unhealthy_pods unhealthy_pods="$(mktemp)" @@ -59,9 +77,15 @@ restart_service_tailscale_proxy() { proxy_pods="$("${KUBECTL[@]}" -n tailscale-system get pods -l "app=${service_uid}" -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}' 2>/dev/null || true)" if [ -z "$proxy_pods" ]; then - echo "Cannot restart proxy for $namespace/$service_name: no proxy pod with app=$service_uid" >&2 - "${KUBECTL[@]}" -n tailscale-system get pods -o wide >&2 || true - return 1 + echo "No proxy pod with app=$service_uid for $namespace/$service_name; restarting operator and waiting for proxy creation" >&2 + restart_tailscale_operator + sleep 30 + if ! retry 18 10 service_proxy_ready "$namespace" "$service_name"; then + "${KUBECTL[@]}" -n "$namespace" get svc "$service_name" -o yaml >&2 || true + "${KUBECTL[@]}" -n tailscale-system get pods -o wide >&2 || true + return 1 + fi + return 0 fi echo "Restarting Tailscale proxy pod for $namespace/$service_name" @@ -163,6 +187,7 @@ check_service() { echo "HTTP status OK for $url" } +restart_tailscale_operator restart_unhealthy_tailscale_proxies check_service "cattle-system" "rancher-tailscale" "rancher.silverside-gopher.ts.net" "https://rancher.silverside-gopher.ts.net/"