fix: recover tailscale operator during smoke checks
This commit is contained in:
@@ -22,6 +22,24 @@ retry() {
|
|||||||
done
|
done
|
||||||
}
|
}
|
||||||
|
|
||||||
|
restart_tailscale_operator() {
|
||||||
|
if ! "${KUBECTL[@]}" -n tailscale-system get deployment/operator >/dev/null 2>&1; then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
if "${KUBECTL[@]}" -n tailscale-system rollout status deployment/operator --timeout=60s; then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Restarting unhealthy Tailscale operator before smoke checks"
|
||||||
|
"${KUBECTL[@]}" -n tailscale-system delete pod -l app=operator --wait=false
|
||||||
|
if ! "${KUBECTL[@]}" -n tailscale-system rollout status deployment/operator --timeout=600s; then
|
||||||
|
"${KUBECTL[@]}" -n tailscale-system get pods -o wide >&2 || true
|
||||||
|
"${KUBECTL[@]}" -n tailscale-system logs deployment/operator --tail=100 >&2 || true
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
restart_unhealthy_tailscale_proxies() {
|
restart_unhealthy_tailscale_proxies() {
|
||||||
local unhealthy_pods
|
local unhealthy_pods
|
||||||
unhealthy_pods="$(mktemp)"
|
unhealthy_pods="$(mktemp)"
|
||||||
@@ -59,9 +77,15 @@ restart_service_tailscale_proxy() {
|
|||||||
|
|
||||||
proxy_pods="$("${KUBECTL[@]}" -n tailscale-system get pods -l "app=${service_uid}" -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}' 2>/dev/null || true)"
|
proxy_pods="$("${KUBECTL[@]}" -n tailscale-system get pods -l "app=${service_uid}" -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}' 2>/dev/null || true)"
|
||||||
if [ -z "$proxy_pods" ]; then
|
if [ -z "$proxy_pods" ]; then
|
||||||
echo "Cannot restart proxy for $namespace/$service_name: no proxy pod with app=$service_uid" >&2
|
echo "No proxy pod with app=$service_uid for $namespace/$service_name; restarting operator and waiting for proxy creation" >&2
|
||||||
"${KUBECTL[@]}" -n tailscale-system get pods -o wide >&2 || true
|
restart_tailscale_operator
|
||||||
return 1
|
sleep 30
|
||||||
|
if ! retry 18 10 service_proxy_ready "$namespace" "$service_name"; then
|
||||||
|
"${KUBECTL[@]}" -n "$namespace" get svc "$service_name" -o yaml >&2 || true
|
||||||
|
"${KUBECTL[@]}" -n tailscale-system get pods -o wide >&2 || true
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
return 0
|
||||||
fi
|
fi
|
||||||
|
|
||||||
echo "Restarting Tailscale proxy pod for $namespace/$service_name"
|
echo "Restarting Tailscale proxy pod for $namespace/$service_name"
|
||||||
@@ -163,6 +187,7 @@ check_service() {
|
|||||||
echo "HTTP status OK for $url"
|
echo "HTTP status OK for $url"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
restart_tailscale_operator
|
||||||
restart_unhealthy_tailscale_proxies
|
restart_unhealthy_tailscale_proxies
|
||||||
|
|
||||||
check_service "cattle-system" "rancher-tailscale" "rancher.silverside-gopher.ts.net" "https://rancher.silverside-gopher.ts.net/"
|
check_service "cattle-system" "rancher-tailscale" "rancher.silverside-gopher.ts.net" "https://rancher.silverside-gopher.ts.net/"
|
||||||
|
|||||||
Reference in New Issue
Block a user