Merge pull request 'fix: map SSH-discovered nodes by VMID when hostnames are generic' (#105) from stage into master
Some checks failed
Terraform Apply / Terraform Apply (push) Failing after 27m43s

Reviewed-on: #105
This commit was merged in pull request #105.
This commit is contained in:
2026-03-03 23:37:45 +00:00

View File

@@ -6,7 +6,7 @@ import json
import os
import subprocess
import sys
from typing import Dict, Set
from typing import Dict, Set, Tuple
def derive_prefix(payload: dict) -> str:
@@ -25,7 +25,7 @@ def derive_prefix(payload: dict) -> str:
return "10.27.27"
def ssh_hostname(ip: str, users: list[str], key_path: str, timeout_sec: int) -> tuple[str, str] | None:
def ssh_probe(ip: str, users: list[str], key_path: str, timeout_sec: int) -> Tuple[str, str, str] | None:
cmd_tail = [
"-o",
"BatchMode=yes",
@@ -39,13 +39,23 @@ def ssh_hostname(ip: str, users: list[str], key_path: str, timeout_sec: int) ->
key_path,
]
for user in users:
cmd = ["ssh", *cmd_tail, f"{user}@{ip}", "hostnamectl --static 2>/dev/null || hostname"]
cmd = [
"ssh",
*cmd_tail,
f"{user}@{ip}",
"hn=$(hostnamectl --static 2>/dev/null || hostname); serial=$(cat /sys/class/dmi/id/product_serial 2>/dev/null || true); printf '%s|%s\n' \"$hn\" \"$serial\"",
]
try:
out = subprocess.check_output(cmd, stderr=subprocess.DEVNULL, text=True, timeout=timeout_sec + 2).strip()
except Exception:
continue
if out:
return out.splitlines()[0].strip(), ip
line = out.splitlines()[0].strip()
if "|" in line:
host, serial = line.split("|", 1)
else:
host, serial = line, ""
return host.strip(), ip, serial.strip()
return None
@@ -89,21 +99,30 @@ def main() -> int:
scan_ips = [str(ipaddress.IPv4Address(f"{prefix}.{i}")) for i in range(start, end + 1)]
found: Dict[str, str] = {}
vmid_to_name: Dict[str, str] = {}
for name, vmid in payload.get("control_plane_vm_ids", {}).get("value", {}).items():
vmid_to_name[str(vmid)] = name
for name, vmid in payload.get("worker_vm_ids", {}).get("value", {}).items():
vmid_to_name[str(vmid)] = name
seen_hostnames: Dict[str, str] = {}
def run_pass(pass_timeout: int, pass_workers: int) -> None:
with concurrent.futures.ThreadPoolExecutor(max_workers=pass_workers) as pool:
futures = [pool.submit(ssh_hostname, ip, users, key_path, pass_timeout) for ip in scan_ips]
futures = [pool.submit(ssh_probe, ip, users, key_path, pass_timeout) for ip in scan_ips]
for fut in concurrent.futures.as_completed(futures):
result = fut.result()
if not result:
continue
host, ip = result
host, ip, serial = result
if host not in seen_hostnames:
seen_hostnames[host] = ip
if host in target_names and host not in found:
found[host] = ip
elif serial in vmid_to_name:
inferred = vmid_to_name[serial]
if inferred not in found:
found[inferred] = ip
if all(name in found for name in target_names):
return