diff --git a/.gitea/workflows/terraform-apply.yml b/.gitea/workflows/terraform-apply.yml index d35fe3d..c18bd2b 100644 --- a/.gitea/workflows/terraform-apply.yml +++ b/.gitea/workflows/terraform-apply.yml @@ -73,33 +73,147 @@ jobs: - name: Enroll VMs in Tailscale env: TS_AUTHKEY: ${{ secrets.TS_AUTHKEY }} - TAILSCALE_ENROLL_HOSTS: ${{ secrets.TAILSCALE_ENROLL_HOSTS }} - VM_SSH_PRIVATE_KEY: ${{ secrets.VM_SSH_PRIVATE_KEY }} + PM_API_TOKEN_SECRET: ${{ secrets.PM_API_TOKEN_SECRET }} + working-directory: terraform run: | - if [ -z "$TS_AUTHKEY" ] || [ -z "$TAILSCALE_ENROLL_HOSTS" ] || [ -z "$VM_SSH_PRIVATE_KEY" ]; then - echo "Skipping Tailscale enrollment (missing TS_AUTHKEY, TAILSCALE_ENROLL_HOSTS, or VM_SSH_PRIVATE_KEY)." + if [ -z "$TS_AUTHKEY" ] || [ -z "$PM_API_TOKEN_SECRET" ]; then + echo "Skipping Tailscale enrollment (missing TS_AUTHKEY or PM_API_TOKEN_SECRET)." exit 0 fi - echo "Expected format: host or host=hostname (comma-separated)" + PM_API_URL=$(awk -F'"' '/^pm_api_url/{print $2}' terraform.tfvars) + PM_API_TOKEN_ID=$(awk -F'"' '/^pm_api_token_id/{print $2}' terraform.tfvars) + TARGET_NODE=$(awk -F'"' '/^target_node/{print $2}' terraform.tfvars) - install -m 700 -d ~/.ssh - printf '%s\n' "$VM_SSH_PRIVATE_KEY" > ~/.ssh/id_rsa - chmod 600 ~/.ssh/id_rsa + export PM_API_URL PM_API_TOKEN_ID TARGET_NODE - for target in $(printf '%s' "$TAILSCALE_ENROLL_HOSTS" | tr ',' ' '); do - host="${target%%=*}" - ts_hostname="" - if [ "$host" != "$target" ]; then - ts_hostname="${target#*=}" - fi + terraform output -json > tfoutputs.json + cat > enroll_tailscale.py <<'PY' + import json + import os + import ssl + import sys + import time + import urllib.parse + import urllib.request - echo "Enrolling $host into Tailscale" - if [ -n "$ts_hostname" ]; then - ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i ~/.ssh/id_rsa "micqdf@$host" \ - "set -e; echo '$TS_AUTHKEY' | sudo tee /etc/tailscale/authkey >/dev/null; echo '$ts_hostname' | sudo tee /etc/tailscale/hostname >/dev/null; sudo chmod 600 /etc/tailscale/authkey; sudo hostnamectl set-hostname '$ts_hostname' || true; sudo systemctl restart tailscaled; sudo systemctl start tailscale-firstboot.service" - else - ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i ~/.ssh/id_rsa "micqdf@$host" \ - "set -e; echo '$TS_AUTHKEY' | sudo tee /etc/tailscale/authkey >/dev/null; sudo chmod 600 /etc/tailscale/authkey; sudo systemctl restart tailscaled; sudo systemctl start tailscale-firstboot.service" - fi - done + api_url = os.environ["PM_API_URL"].rstrip("/") + if api_url.endswith("/api2/json"): + api_url = api_url[: -len("/api2/json")] + token_id = os.environ["PM_API_TOKEN_ID"].strip() + token_secret = os.environ["PM_API_TOKEN_SECRET"].strip() + target_node = os.environ["TARGET_NODE"].strip() + ts_authkey = os.environ["TS_AUTHKEY"] + + if not token_id or not token_secret: + raise SystemExit("Missing Proxmox token id/secret") + + with open("tfoutputs.json", "r", encoding="utf-8") as f: + outputs = json.load(f) + + targets = [] + for output_name in ("alpaca_vm_ids", "llama_vm_ids"): + mapping = outputs.get(output_name, {}).get("value", {}) + if isinstance(mapping, dict): + for hostname, vmid in mapping.items(): + targets.append((str(hostname), int(vmid))) + + if not targets: + print("No VMs found in terraform outputs; skipping tailscale enrollment") + raise SystemExit(0) + + print("Tailscale enrollment targets:", ", ".join(f"{h}:{v}" for h, v in targets)) + + ssl_ctx = ssl._create_unverified_context() + auth_header = f"PVEAPIToken={token_id}={token_secret}" + + def api_request(method, path, data=None): + url = f"{api_url}{path}" + headers = {"Authorization": auth_header} + body = None + if data is not None: + body = urllib.parse.urlencode(data, doseq=True).encode("utf-8") + headers["Content-Type"] = "application/x-www-form-urlencoded" + req = urllib.request.Request(url, data=body, headers=headers, method=method) + with urllib.request.urlopen(req, context=ssl_ctx, timeout=30) as resp: + payload = resp.read().decode("utf-8") + return json.loads(payload) + + def wait_for_guest_agent(vmid, timeout_seconds=420): + deadline = time.time() + timeout_seconds + while time.time() < deadline: + try: + res = api_request("GET", f"/api2/json/nodes/{target_node}/qemu/{vmid}/agent/ping") + if res.get("data") == "pong": + return True + except Exception: + pass + time.sleep(5) + return False + + def exec_guest(vmid, command): + res = api_request( + "POST", + f"/api2/json/nodes/{target_node}/qemu/{vmid}/agent/exec", + { + "command": "/run/current-system/sw/bin/sh", + "extra-args": ["-lc", command], + }, + ) + pid = res["data"]["pid"] + for _ in range(120): + status = api_request( + "GET", + f"/api2/json/nodes/{target_node}/qemu/{vmid}/agent/exec-status?pid={pid}", + ).get("data", {}) + if status.get("exited"): + return ( + int(status.get("exitcode", 1)), + status.get("out-data", ""), + status.get("err-data", ""), + ) + time.sleep(2) + return (124, "", "Timed out waiting for guest command") + + failures = [] + safe_key = ts_authkey.replace("'", "'\"'\"'") + + for hostname, vmid in targets: + print(f"\n== Enrolling {hostname} (vmid {vmid}) ==") + if not wait_for_guest_agent(vmid): + failures.append(f"{hostname}: guest agent not ready") + print(f"ERROR: guest agent not ready for vmid {vmid}") + continue + + safe_hostname = hostname.replace("'", "'\"'\"'") + cmd = ( + "set -e; " + f"printf '%s' '{safe_key}' > /etc/tailscale/authkey; " + f"printf '%s' '{safe_hostname}' > /etc/tailscale/hostname; " + "chmod 600 /etc/tailscale/authkey; " + f"hostnamectl set-hostname '{safe_hostname}' || true; " + "systemctl restart tailscaled; " + "systemctl start tailscale-firstboot.service; " + "tailscale status || true" + ) + + exitcode, stdout, stderr = exec_guest(vmid, cmd) + if stdout: + print(stdout) + if stderr: + print(stderr, file=sys.stderr) + + if exitcode != 0: + failures.append(f"{hostname}: command failed exit {exitcode}") + print(f"ERROR: tailscale enrollment failed for {hostname} (exit {exitcode})") + + if failures: + print("\nEnrollment failures:") + for failure in failures: + print(f"- {failure}") + raise SystemExit(1) + + print("\nTailscale enrollment completed for all managed VMs") + PY + + python3 enroll_tailscale.py