fix: add heuristic SSH inventory fallback for generic hostnames
All checks were successful
Terraform Plan / Terraform Plan (push) Successful in 16s

When Proxmox guest-agent IPs are empty and SSH discovery returns duplicate generic hostnames (e.g. flex), assign remaining missing nodes from unmatched SSH-reachable IPs in deterministic order. Also emit SSH-reachable IP diagnostics on failure.
This commit is contained in:
2026-03-04 23:07:45 +00:00
parent a70de061b0
commit fc4eb1bc6e

View File

@@ -117,6 +117,7 @@ def main() -> int:
vmid_to_name[str(vmid)] = name vmid_to_name[str(vmid)] = name
seen_hostnames: Dict[str, str] = {} seen_hostnames: Dict[str, str] = {}
seen_ips: Dict[str, Tuple[str, str]] = {}
def run_pass(pass_timeout: int, pass_workers: int) -> None: def run_pass(pass_timeout: int, pass_workers: int) -> None:
with concurrent.futures.ThreadPoolExecutor(max_workers=pass_workers) as pool: with concurrent.futures.ThreadPoolExecutor(max_workers=pass_workers) as pool:
@@ -128,6 +129,8 @@ def main() -> int:
host, ip, serial = result host, ip, serial = result
if host not in seen_hostnames: if host not in seen_hostnames:
seen_hostnames[host] = ip seen_hostnames[host] = ip
if ip not in seen_ips:
seen_ips[ip] = (host, serial)
target = None target = None
if serial in vmid_to_name: if serial in vmid_to_name:
inferred = vmid_to_name[serial] inferred = vmid_to_name[serial]
@@ -147,11 +150,25 @@ def main() -> int:
# Slower second pass for busy runners/networks. # Slower second pass for busy runners/networks.
run_pass(max(timeout_sec + 2, 8), max(8, max_workers // 2)) run_pass(max(timeout_sec + 2, 8), max(8, max_workers // 2))
# Heuristic fallback: if nodes still missing, assign from remaining SSH-reachable
# IPs not already used, ordered by IP. This helps when cloned nodes temporarily
# share a generic hostname (e.g. "flex") and DMI serial mapping is unavailable.
missing = sorted([n for n in target_names if n not in found])
if missing:
used_ips = set(found.values())
candidates = sorted(ip for ip in seen_ips.keys() if ip not in used_ips)
if len(candidates) >= len(missing):
for name, ip in zip(missing, candidates):
found[name] = ip
missing = sorted([n for n in target_names if n not in found]) missing = sorted([n for n in target_names if n not in found])
if missing: if missing:
discovered = ", ".join(sorted(seen_hostnames.keys())[:20]) discovered = ", ".join(sorted(seen_hostnames.keys())[:20])
if discovered: if discovered:
sys.stderr.write(f"Discovered hostnames during scan: {discovered}\n") sys.stderr.write(f"Discovered hostnames during scan: {discovered}\n")
if seen_ips:
sample = ", ".join(f"{ip}={meta[0]}" for ip, meta in list(sorted(seen_ips.items()))[:20])
sys.stderr.write(f"SSH-reachable IPs: {sample}\n")
raise SystemExit( raise SystemExit(
"Failed SSH-based IP discovery for nodes: " + ", ".join(missing) + "Failed SSH-based IP discovery for nodes: " + ", ".join(missing) +
f" (scanned {prefix}.{start}-{prefix}.{end})" f" (scanned {prefix}.{start}-{prefix}.{end})"