fix: make tailscale enrollment resilient when guest agent is unavailable #25
@@ -74,6 +74,7 @@ jobs:
|
|||||||
env:
|
env:
|
||||||
TS_AUTHKEY: ${{ secrets.TS_AUTHKEY }}
|
TS_AUTHKEY: ${{ secrets.TS_AUTHKEY }}
|
||||||
PM_API_TOKEN_SECRET: ${{ secrets.PM_API_TOKEN_SECRET }}
|
PM_API_TOKEN_SECRET: ${{ secrets.PM_API_TOKEN_SECRET }}
|
||||||
|
TAILSCALE_ENROLL_STRICT: ${{ secrets.TAILSCALE_ENROLL_STRICT }}
|
||||||
working-directory: terraform
|
working-directory: terraform
|
||||||
run: |
|
run: |
|
||||||
if [ -z "$TS_AUTHKEY" ] || [ -z "$PM_API_TOKEN_SECRET" ]; then
|
if [ -z "$TS_AUTHKEY" ] || [ -z "$PM_API_TOKEN_SECRET" ]; then
|
||||||
@@ -104,6 +105,7 @@ jobs:
|
|||||||
token_secret = os.environ["PM_API_TOKEN_SECRET"].strip()
|
token_secret = os.environ["PM_API_TOKEN_SECRET"].strip()
|
||||||
target_node = os.environ["TARGET_NODE"].strip()
|
target_node = os.environ["TARGET_NODE"].strip()
|
||||||
ts_authkey = os.environ["TS_AUTHKEY"]
|
ts_authkey = os.environ["TS_AUTHKEY"]
|
||||||
|
enroll_strict = os.environ.get("TAILSCALE_ENROLL_STRICT", "false").strip().lower() == "true"
|
||||||
|
|
||||||
if not token_id or not token_secret:
|
if not token_id or not token_secret:
|
||||||
raise SystemExit("Missing Proxmox token id/secret")
|
raise SystemExit("Missing Proxmox token id/secret")
|
||||||
@@ -142,7 +144,7 @@ jobs:
|
|||||||
payload = resp.read().decode("utf-8")
|
payload = resp.read().decode("utf-8")
|
||||||
return json.loads(payload)
|
return json.loads(payload)
|
||||||
|
|
||||||
def wait_for_guest_agent(vmid, timeout_seconds=420):
|
def wait_for_guest_agent(vmid, timeout_seconds=900):
|
||||||
deadline = time.time() + timeout_seconds
|
deadline = time.time() + timeout_seconds
|
||||||
while time.time() < deadline:
|
while time.time() < deadline:
|
||||||
try:
|
try:
|
||||||
@@ -184,7 +186,7 @@ jobs:
|
|||||||
for hostname, vmid in targets:
|
for hostname, vmid in targets:
|
||||||
print(f"\n== Enrolling {hostname} (vmid {vmid}) ==")
|
print(f"\n== Enrolling {hostname} (vmid {vmid}) ==")
|
||||||
if not wait_for_guest_agent(vmid):
|
if not wait_for_guest_agent(vmid):
|
||||||
failures.append(f"{hostname}: guest agent not ready")
|
failures.append((hostname, "agent_not_ready", "guest agent not ready"))
|
||||||
print(f"ERROR: guest agent not ready for vmid {vmid}")
|
print(f"ERROR: guest agent not ready for vmid {vmid}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
@@ -207,13 +209,19 @@ jobs:
|
|||||||
print(stderr, file=sys.stderr)
|
print(stderr, file=sys.stderr)
|
||||||
|
|
||||||
if exitcode != 0:
|
if exitcode != 0:
|
||||||
failures.append(f"{hostname}: command failed exit {exitcode}")
|
failures.append((hostname, "command_failed", f"command failed exit {exitcode}"))
|
||||||
print(f"ERROR: tailscale enrollment failed for {hostname} (exit {exitcode})")
|
print(f"ERROR: tailscale enrollment failed for {hostname} (exit {exitcode})")
|
||||||
|
|
||||||
if failures:
|
if failures:
|
||||||
print("\nEnrollment failures:")
|
print("\nEnrollment failures:")
|
||||||
for failure in failures:
|
for hostname, kind, detail in failures:
|
||||||
print(f"- {failure}")
|
print(f"- {hostname}: {detail}")
|
||||||
|
|
||||||
|
only_agent_ready_failures = all(kind == "agent_not_ready" for _, kind, _ in failures)
|
||||||
|
if only_agent_ready_failures and not enroll_strict:
|
||||||
|
print("\nWARNING: Enrollment skipped because guest agent was unavailable. Set TAILSCALE_ENROLL_STRICT=true to fail the workflow in this case.")
|
||||||
|
raise SystemExit(0)
|
||||||
|
|
||||||
raise SystemExit(1)
|
raise SystemExit(1)
|
||||||
|
|
||||||
print("\nTailscale enrollment completed for all managed VMs")
|
print("\nTailscale enrollment completed for all managed VMs")
|
||||||
|
|||||||
Reference in New Issue
Block a user