#!/usr/bin/env bash set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" FLAKE_DIR="${FLAKE_DIR:-$(cd "$SCRIPT_DIR/.." && pwd)}" INVENTORY_FILE="${1:-$SCRIPT_DIR/inventory.env}" if [ ! -f "$INVENTORY_FILE" ]; then echo "Missing inventory file: $INVENTORY_FILE" echo "Copy $SCRIPT_DIR/inventory.example.env to $SCRIPT_DIR/inventory.env and edit node mappings." exit 1 fi # shellcheck disable=SC1090 source "$INVENTORY_FILE" SSH_USER="${SSH_USER:-micqdf}" SSH_KEY_PATH="${SSH_KEY_PATH:-$HOME/.ssh/id_ed25519}" SSH_OPTS="${SSH_OPTS:--o BatchMode=yes -o IdentitiesOnly=yes -o StrictHostKeyChecking=accept-new -i $SSH_KEY_PATH}" SSH_USER_CANDIDATES="${SSH_USER_CANDIDATES:-root $SSH_USER}" REBUILD_TIMEOUT="${REBUILD_TIMEOUT:-45m}" REBUILD_RETRIES="${REBUILD_RETRIES:-2}" WORKER_PARALLELISM="${WORKER_PARALLELISM:-3}" FAST_MODE="${FAST_MODE:-1}" declare -A NODE_IPS=() declare -a CP_NAMES=() declare -a WK_NAMES=() add_node_pair() { local role="$1" local pair="$2" local name="${pair%%=*}" local ip="${pair#*=}" if [ -z "$name" ] || [ -z "$ip" ] || [ "$name" = "$ip" ]; then echo "Invalid node pair '$pair' (expected name=ip)." exit 1 fi NODE_IPS["$name"]="$ip" if [ "$role" = "cp" ]; then CP_NAMES+=("$name") else WK_NAMES+=("$name") fi } populate_nodes() { if [ -n "${CONTROL_PLANES:-}" ]; then for pair in $CONTROL_PLANES; do add_node_pair "cp" "$pair" done else while IFS= read -r var_name; do idx="${var_name#CP_}" add_node_pair "cp" "cp-$idx=${!var_name}" done < <(compgen -A variable | grep -E '^CP_[0-9]+$' | sort -V) fi if [ -n "${WORKERS:-}" ]; then for pair in $WORKERS; do add_node_pair "wk" "$pair" done else while IFS= read -r var_name; do idx="${var_name#WK_}" add_node_pair "wk" "wk-$idx=${!var_name}" done < <(compgen -A variable | grep -E '^WK_[0-9]+$' | sort -V) fi if [ "${#CP_NAMES[@]}" -eq 0 ]; then echo "No control planes found in inventory." exit 1 fi if [ "${#WK_NAMES[@]}" -eq 0 ]; then echo "No workers found in inventory." exit 1 fi } remote() { local host_ip="$1" local cmd="$2" local quoted_cmd quoted_cmd="$(printf '%q' "$cmd")" ssh $SSH_OPTS "$ACTIVE_SSH_USER@$host_ip" "bash -lc $quoted_cmd" } detect_ssh_user() { local probe_ip="$1" local candidate for candidate in $SSH_USER_CANDIDATES; do if ssh $SSH_OPTS "$candidate@$probe_ip" "true" >/dev/null 2>&1; then ACTIVE_SSH_USER="$candidate" echo "==> Using SSH user '$ACTIVE_SSH_USER'" return 0 fi done echo "Unable to authenticate to $probe_ip with candidates: $SSH_USER_CANDIDATES" return 1 } prepare_known_hosts() { mkdir -p "$HOME/.ssh" chmod 700 "$HOME/.ssh" touch "$HOME/.ssh/known_hosts" chmod 600 "$HOME/.ssh/known_hosts" for node in "${!NODE_IPS[@]}"; do ssh-keygen -R "${NODE_IPS[$node]}" >/dev/null 2>&1 || true ssh-keyscan -H "${NODE_IPS[$node]}" >> "$HOME/.ssh/known_hosts" 2>/dev/null || true done } cluster_has_node() { local node_name="$1" remote "$PRIMARY_CP_IP" "sudo kubectl --kubeconfig /etc/kubernetes/admin.conf get node $node_name >/dev/null 2>&1" } cluster_ready() { remote "$PRIMARY_CP_IP" "test -f /etc/kubernetes/admin.conf && sudo kubectl --kubeconfig /etc/kubernetes/admin.conf get nodes >/dev/null 2>&1" } rebuild_node() { local node_name="$1" local node_ip="$2" echo "==> Rebuilding $node_name on $node_ip" timeout "$REBUILD_TIMEOUT" nixos-rebuild switch \ --flake "$FLAKE_DIR#$node_name" \ --target-host "$ACTIVE_SSH_USER@$node_ip" \ --use-remote-sudo } rebuild_node_with_retry() { local node_name="$1" local node_ip="$2" local attempt=1 local max_attempts=$((REBUILD_RETRIES + 1)) while [ "$attempt" -le "$max_attempts" ]; do echo "==> Rebuild attempt $attempt/$max_attempts for $node_name" if rebuild_node "$node_name" "$node_ip"; then return 0 fi if [ "$attempt" -lt "$max_attempts" ]; then echo "==> Rebuild failed for $node_name, retrying after 20s" sleep 20 fi attempt=$((attempt + 1)) done echo "==> Rebuild failed permanently for $node_name" return 1 } prepare_remote_nix_trust() { local node_ip="$1" echo "==> Ensuring nix trusted-users on $node_ip" remote "$node_ip" "sudo mkdir -p /etc/nix" remote "$node_ip" "if [ -f /etc/nix/nix.conf ]; then sudo sed -i '/^trusted-users[[:space:]]*=/d' /etc/nix/nix.conf; fi" remote "$node_ip" "echo 'trusted-users = root micqdf' | sudo tee -a /etc/nix/nix.conf >/dev/null" remote "$node_ip" "sudo systemctl restart nix-daemon 2>/dev/null || true" } prepare_remote_space() { local node_ip="$1" echo "==> Reclaiming disk space on $node_ip" remote "$node_ip" "sudo nix-collect-garbage -d || true" remote "$node_ip" "sudo nix --extra-experimental-features nix-command store gc || true" remote "$node_ip" "sudo rm -rf /tmp/nix* /tmp/nixos-rebuild* || true" } populate_nodes prepare_known_hosts export NIX_SSHOPTS="$SSH_OPTS" PRIMARY_CONTROL_PLANE="${PRIMARY_CONTROL_PLANE:-cp-1}" if [ -z "${NODE_IPS[$PRIMARY_CONTROL_PLANE]:-}" ]; then PRIMARY_CONTROL_PLANE="${CP_NAMES[0]}" fi PRIMARY_CP_IP="${NODE_IPS[$PRIMARY_CONTROL_PLANE]}" ACTIVE_SSH_USER="$SSH_USER" detect_ssh_user "$PRIMARY_CP_IP" for node in "${CP_NAMES[@]}"; do prepare_remote_nix_trust "${NODE_IPS[$node]}" if [ "$FAST_MODE" != "1" ]; then prepare_remote_space "${NODE_IPS[$node]}" fi rebuild_node_with_retry "$node" "${NODE_IPS[$node]}" done worker_failures=0 for node in "${WK_NAMES[@]}"; do prepare_remote_nix_trust "${NODE_IPS[$node]}" if [ "$FAST_MODE" != "1" ]; then prepare_remote_space "${NODE_IPS[$node]}" fi done active_jobs=0 for node in "${WK_NAMES[@]}"; do ( rebuild_node_with_retry "$node" "${NODE_IPS[$node]}" ) & active_jobs=$((active_jobs + 1)) if [ "$active_jobs" -ge "$WORKER_PARALLELISM" ]; then if ! wait -n; then worker_failures=$((worker_failures + 1)) fi active_jobs=$((active_jobs - 1)) fi done while [ "$active_jobs" -gt 0 ]; do if ! wait -n; then worker_failures=$((worker_failures + 1)) fi active_jobs=$((active_jobs - 1)) done if [ "$worker_failures" -gt 0 ]; then echo "==> $worker_failures worker rebuild job(s) failed" exit 1 fi echo "==> Initializing control plane on $PRIMARY_CONTROL_PLANE" if cluster_ready; then echo "==> Existing cluster detected on $PRIMARY_CONTROL_PLANE; skipping kubeadm init" else remote "$PRIMARY_CP_IP" "sudo th-kubeadm-init" echo "==> Installing Cilium on $PRIMARY_CONTROL_PLANE" remote "$PRIMARY_CP_IP" "helm repo add cilium https://helm.cilium.io >/dev/null 2>&1 || true" remote "$PRIMARY_CP_IP" "helm repo update >/dev/null" remote "$PRIMARY_CP_IP" "kubectl create namespace kube-system >/dev/null 2>&1 || true" remote "$PRIMARY_CP_IP" "helm upgrade --install cilium cilium/cilium --namespace kube-system --set kubeProxyReplacement=true" fi echo "==> Building kubeadm join commands" JOIN_CMD="$(remote "$PRIMARY_CP_IP" "sudo kubeadm token create --print-join-command")" CERT_KEY="$(remote "$PRIMARY_CP_IP" "sudo kubeadm init phase upload-certs --upload-certs | tail -n 1")" CP_JOIN_CMD="$JOIN_CMD --control-plane --certificate-key $CERT_KEY" join_control_plane() { local node_ip="$1" local encoded encoded="$(printf '%s' "$CP_JOIN_CMD" | base64 -w0)" remote "$node_ip" "sudo th-kubeadm-join-control-plane \"\$(echo $encoded | base64 -d)\"" } join_worker() { local node_ip="$1" local encoded encoded="$(printf '%s' "$JOIN_CMD" | base64 -w0)" remote "$node_ip" "sudo th-kubeadm-join-worker \"\$(echo $encoded | base64 -d)\"" } echo "==> Joining remaining control planes" for node in "${CP_NAMES[@]}"; do if [ "$node" = "$PRIMARY_CONTROL_PLANE" ]; then continue fi if cluster_has_node "$node"; then echo "$node already joined; skipping" else join_control_plane "${NODE_IPS[$node]}" fi done echo "==> Joining workers" for node in "${WK_NAMES[@]}"; do if cluster_has_node "$node"; then echo "$node already joined; skipping" else join_worker "${NODE_IPS[$node]}" fi done echo "==> Final node list" remote "$PRIMARY_CP_IP" "kubectl get nodes -o wide"