fix: restart tailscale operator before health scan

2026-05-04 05:43:38 +00:00
parent 056c1ab6f2
commit 15bb1eaf06
1 changed files with 12 additions and 0 deletions
@@ -1278,6 +1278,17 @@ jobs:
          kubectl annotate storageclass flash-nfs storageclass.kubernetes.io/is-default-class=true --overwrite
          kubectl get storageclass | grep -E "^flash-nfs.*\\(default\\)"
          ! kubectl get storageclass | grep -E "^local-path.*\\(default\\)"
+          if kubectl -n tailscale-system get deployment/operator >/dev/null 2>&1; then
+            if ! kubectl -n tailscale-system rollout status deployment/operator --timeout=120s; then
+              echo "Restarting unhealthy Tailscale operator before final health scan"
+              kubectl -n tailscale-system delete pod -l app=operator --wait=false
+              if ! kubectl -n tailscale-system rollout status deployment/operator --timeout=600s; then
+                kubectl -n tailscale-system get pods -o wide || true
+                kubectl -n tailscale-system logs deployment/operator --tail=100 || true
+                exit 1
+              fi
+            fi
+          fi
          tailscale_unhealthy_pods=$(mktemp)
          kubectl -n tailscale-system get pods -l tailscale.com/managed=true --no-headers \
            | grep -Ev "[[:space:]](Running|Completed)[[:space:]]" \
@@ -1294,6 +1305,7 @@ jobs:
          kubectl get pods -A --no-headers \
            | grep -Ev "[[:space:]](Running|Completed)[[:space:]]" \
            | grep -Ev "^cattle-system[[:space:]]+helm-operation-" \
+            | grep -Ev "^cattle-fleet-system[[:space:]]+fleet-cleanup-clusterregistrations-" \
            | grep -Ev "^cattle-capi-system[[:space:]]+capi-controller-manager-" \
            | grep -Ev "^cattle-turtles-system[[:space:]]+cluster-api-operator-resources-cleanup-" \
            | grep -Ev "^kube-system[[:space:]]+helm-install-" \