fix: stabilize tailscale enrollment without cloud-init rollback

Create /etc/tailscale before writing runtime key, add progress logging and unbuffered output in enroll script, and shorten guest-agent wait to fail faster when enrollment cannot run.
This commit is contained in:
2026-02-28 12:09:40 +00:00
parent 6fbc4dd80f
commit 510ba707ad
2 changed files with 12 additions and 2 deletions

View File

@@ -144,15 +144,21 @@ jobs:
payload = resp.read().decode("utf-8")
return json.loads(payload)
def wait_for_guest_agent(vmid, timeout_seconds=900):
def wait_for_guest_agent(vmid, timeout_seconds=300):
deadline = time.time() + timeout_seconds
tries = 0
while time.time() < deadline:
tries += 1
try:
res = api_request("GET", f"/api2/json/nodes/{target_node}/qemu/{vmid}/agent/ping")
if res.get("data") == "pong":
print(f"Guest agent ready for vmid {vmid}", flush=True)
return True
except Exception:
pass
if tries % 6 == 0:
remaining = int(deadline - time.time())
print(f"Waiting for guest agent on vmid {vmid} ({remaining}s left)", flush=True)
time.sleep(5)
return False
@@ -193,6 +199,7 @@ jobs:
safe_hostname = hostname.replace("'", "'\"'\"'")
cmd = (
"set -e; "
"install -d -m 700 /etc/tailscale; "
f"printf '%s' '{safe_key}' > /etc/tailscale/authkey; "
f"printf '%s' '{safe_hostname}' > /etc/tailscale/hostname; "
"chmod 600 /etc/tailscale/authkey; "
@@ -227,4 +234,4 @@ jobs:
print("\nTailscale enrollment completed for all managed VMs")
PY
python3 enroll_tailscale.py
python3 -u enroll_tailscale.py

View File

@@ -49,6 +49,8 @@
RemainAfterExit = true;
};
script = ''
install -d -m 0700 /etc/tailscale
if [ ! -s /etc/tailscale/authkey ]; then
exit 0
fi
@@ -59,6 +61,7 @@
ts_hostname="--hostname=$(cat /etc/tailscale/hostname)"
fi
install -d -m 0700 /var/lib/tailscale
rm -f /var/lib/tailscale/tailscaled.state
${pkgs.tailscale}/bin/tailscale up --reset --auth-key="$key" $ts_hostname