Merge pull request 'fix: make tailscale enrollment resilient when guest agent is unavailable' (#25) from stage into master
Some checks failed
Terraform Apply / Terraform Apply (push) Has been cancelled
Some checks failed
Terraform Apply / Terraform Apply (push) Has been cancelled
Reviewed-on: #25
This commit was merged in pull request #25.
This commit is contained in:
@@ -74,6 +74,7 @@ jobs:
|
||||
env:
|
||||
TS_AUTHKEY: ${{ secrets.TS_AUTHKEY }}
|
||||
PM_API_TOKEN_SECRET: ${{ secrets.PM_API_TOKEN_SECRET }}
|
||||
TAILSCALE_ENROLL_STRICT: ${{ secrets.TAILSCALE_ENROLL_STRICT }}
|
||||
working-directory: terraform
|
||||
run: |
|
||||
if [ -z "$TS_AUTHKEY" ] || [ -z "$PM_API_TOKEN_SECRET" ]; then
|
||||
@@ -104,6 +105,7 @@ jobs:
|
||||
token_secret = os.environ["PM_API_TOKEN_SECRET"].strip()
|
||||
target_node = os.environ["TARGET_NODE"].strip()
|
||||
ts_authkey = os.environ["TS_AUTHKEY"]
|
||||
enroll_strict = os.environ.get("TAILSCALE_ENROLL_STRICT", "false").strip().lower() == "true"
|
||||
|
||||
if not token_id or not token_secret:
|
||||
raise SystemExit("Missing Proxmox token id/secret")
|
||||
@@ -142,7 +144,7 @@ jobs:
|
||||
payload = resp.read().decode("utf-8")
|
||||
return json.loads(payload)
|
||||
|
||||
def wait_for_guest_agent(vmid, timeout_seconds=420):
|
||||
def wait_for_guest_agent(vmid, timeout_seconds=900):
|
||||
deadline = time.time() + timeout_seconds
|
||||
while time.time() < deadline:
|
||||
try:
|
||||
@@ -184,7 +186,7 @@ jobs:
|
||||
for hostname, vmid in targets:
|
||||
print(f"\n== Enrolling {hostname} (vmid {vmid}) ==")
|
||||
if not wait_for_guest_agent(vmid):
|
||||
failures.append(f"{hostname}: guest agent not ready")
|
||||
failures.append((hostname, "agent_not_ready", "guest agent not ready"))
|
||||
print(f"ERROR: guest agent not ready for vmid {vmid}")
|
||||
continue
|
||||
|
||||
@@ -207,13 +209,19 @@ jobs:
|
||||
print(stderr, file=sys.stderr)
|
||||
|
||||
if exitcode != 0:
|
||||
failures.append(f"{hostname}: command failed exit {exitcode}")
|
||||
failures.append((hostname, "command_failed", f"command failed exit {exitcode}"))
|
||||
print(f"ERROR: tailscale enrollment failed for {hostname} (exit {exitcode})")
|
||||
|
||||
if failures:
|
||||
print("\nEnrollment failures:")
|
||||
for failure in failures:
|
||||
print(f"- {failure}")
|
||||
for hostname, kind, detail in failures:
|
||||
print(f"- {hostname}: {detail}")
|
||||
|
||||
only_agent_ready_failures = all(kind == "agent_not_ready" for _, kind, _ in failures)
|
||||
if only_agent_ready_failures and not enroll_strict:
|
||||
print("\nWARNING: Enrollment skipped because guest agent was unavailable. Set TAILSCALE_ENROLL_STRICT=true to fail the workflow in this case.")
|
||||
raise SystemExit(0)
|
||||
|
||||
raise SystemExit(1)
|
||||
|
||||
print("\nTailscale enrollment completed for all managed VMs")
|
||||
|
||||
Reference in New Issue
Block a user