fix: fallback SSH user per host during bootstrap steps
All checks were successful
Terraform Plan / Terraform Plan (push) Successful in 10m6s

This commit is contained in:
2026-03-01 13:34:15 +00:00
parent 8bd064c828
commit 88db11292d

View File

@@ -84,8 +84,26 @@ remote() {
local host_ip="$1"
local cmd="$2"
local quoted_cmd
local candidate
local candidates=()
candidates+=("$ACTIVE_SSH_USER")
for candidate in $SSH_USER_CANDIDATES; do
if [ "$candidate" != "$ACTIVE_SSH_USER" ]; then
candidates+=("$candidate")
fi
done
quoted_cmd="$(printf '%q' "$cmd")"
ssh $SSH_OPTS "$ACTIVE_SSH_USER@$host_ip" "bash -lc $quoted_cmd"
for candidate in "${candidates[@]}"; do
if ssh $SSH_OPTS "$candidate@$host_ip" "bash -lc $quoted_cmd"; then
ACTIVE_SSH_USER="$candidate"
return 0
fi
done
echo "Remote command failed for all SSH users on $host_ip"
return 1
}
detect_ssh_user() {
@@ -130,6 +148,7 @@ rebuild_node() {
local node_ip="$2"
echo "==> Rebuilding $node_name on $node_ip"
detect_ssh_user "$node_ip"
timeout "$REBUILD_TIMEOUT" nixos-rebuild switch \
--flake "$FLAKE_DIR#$node_name" \
--target-host "$ACTIVE_SSH_USER@$node_ip" \
@@ -233,6 +252,7 @@ if [ "$worker_failures" -gt 0 ]; then
fi
echo "==> Initializing control plane on $PRIMARY_CONTROL_PLANE"
detect_ssh_user "$PRIMARY_CP_IP"
if cluster_ready; then
echo "==> Existing cluster detected on $PRIMARY_CONTROL_PLANE; skipping kubeadm init"
else