fix: auto-detect SSH login user for node operations
All checks were successful
Terraform Plan / Terraform Plan (push) Successful in 18s

This commit is contained in:
2026-02-28 19:25:48 +00:00
parent 6105a314b7
commit 244887e9c2
2 changed files with 43 additions and 3 deletions

View File

@@ -17,6 +17,7 @@ source "$INVENTORY_FILE"
SSH_USER="${SSH_USER:-micqdf}"
SSH_KEY_PATH="${SSH_KEY_PATH:-$HOME/.ssh/id_ed25519}"
SSH_OPTS="${SSH_OPTS:--o BatchMode=yes -o IdentitiesOnly=yes -o StrictHostKeyChecking=accept-new -i $SSH_KEY_PATH}"
SSH_USER_CANDIDATES="${SSH_USER_CANDIDATES:-$SSH_USER root}"
declare -A NODE_IPS=()
declare -a CP_NAMES=()
@@ -78,7 +79,23 @@ populate_nodes() {
remote() {
local host_ip="$1"
local cmd="$2"
ssh $SSH_OPTS "$SSH_USER@$host_ip" "$cmd"
ssh $SSH_OPTS "$ACTIVE_SSH_USER@$host_ip" "$cmd"
}
detect_ssh_user() {
local probe_ip="$1"
local candidate
for candidate in $SSH_USER_CANDIDATES; do
if ssh $SSH_OPTS "$candidate@$probe_ip" "true" >/dev/null 2>&1; then
ACTIVE_SSH_USER="$candidate"
echo "==> Using SSH user '$ACTIVE_SSH_USER'"
return 0
fi
done
echo "Unable to authenticate to $probe_ip with candidates: $SSH_USER_CANDIDATES"
return 1
}
prepare_known_hosts() {
@@ -109,7 +126,7 @@ rebuild_node() {
echo "==> Rebuilding $node_name on $node_ip"
nixos-rebuild switch \
--flake "$FLAKE_DIR#$node_name" \
--target-host "$SSH_USER@$node_ip" \
--target-host "$ACTIVE_SSH_USER@$node_ip" \
--use-remote-sudo
}
@@ -122,6 +139,8 @@ if [ -z "${NODE_IPS[$PRIMARY_CONTROL_PLANE]:-}" ]; then
PRIMARY_CONTROL_PLANE="${CP_NAMES[0]}"
fi
PRIMARY_CP_IP="${NODE_IPS[$PRIMARY_CONTROL_PLANE]}"
ACTIVE_SSH_USER="$SSH_USER"
detect_ssh_user "$PRIMARY_CP_IP"
for node in "${CP_NAMES[@]}"; do
rebuild_node "$node" "${NODE_IPS[$node]}"

View File

@@ -16,6 +16,7 @@ source "$INVENTORY_FILE"
SSH_USER="${SSH_USER:-micqdf}"
SSH_KEY_PATH="${SSH_KEY_PATH:-$HOME/.ssh/id_ed25519}"
SSH_OPTS="${SSH_OPTS:--o BatchMode=yes -o IdentitiesOnly=yes -o StrictHostKeyChecking=accept-new -i $SSH_KEY_PATH}"
SSH_USER_CANDIDATES="${SSH_USER_CANDIDATES:-$SSH_USER root}"
declare -A NODE_IPS=()
@@ -59,6 +60,22 @@ if [ "${#NODE_IPS[@]}" -eq 0 ]; then
exit 1
fi
detect_ssh_user() {
local probe_ip="$1"
local candidate
for candidate in $SSH_USER_CANDIDATES; do
if ssh $SSH_OPTS "$candidate@$probe_ip" "true" >/dev/null 2>&1; then
ACTIVE_SSH_USER="$candidate"
echo "==> Using SSH user '$ACTIVE_SSH_USER'"
return 0
fi
done
echo "Unable to authenticate to $probe_ip with candidates: $SSH_USER_CANDIDATES"
return 1
}
mkdir -p "$HOME/.ssh"
chmod 700 "$HOME/.ssh"
touch "$HOME/.ssh/known_hosts"
@@ -72,9 +89,13 @@ reset_node() {
local node_name="$1"
local node_ip="$2"
echo "==> Resetting $node_name ($node_ip)"
ssh $SSH_OPTS "$SSH_USER@$node_ip" "sudo kubeadm reset -f && sudo systemctl stop kubelet && sudo rm -rf /etc/kubernetes /var/lib/etcd /var/lib/cni /etc/cni/net.d"
ssh $SSH_OPTS "$ACTIVE_SSH_USER@$node_ip" "sudo kubeadm reset -f && sudo systemctl stop kubelet && sudo rm -rf /etc/kubernetes /var/lib/etcd /var/lib/cni /etc/cni/net.d"
}
FIRST_NODE_IP="${NODE_IPS[$(printf '%s\n' "${!NODE_IPS[@]}" | sort -V | head -n1)]}"
ACTIVE_SSH_USER="$SSH_USER"
detect_ssh_user "$FIRST_NODE_IP"
while IFS= read -r node_name; do
reset_node "$node_name" "${NODE_IPS[$node_name]}"
done < <(printf '%s\n' "${!NODE_IPS[@]}" | sort -V)