#!/usr/bin/env bash set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" FLAKE_DIR="${FLAKE_DIR:-$(cd "$SCRIPT_DIR/.." && pwd)}" INVENTORY_FILE="${1:-$SCRIPT_DIR/inventory.env}" if [ ! -f "$INVENTORY_FILE" ]; then echo "Missing inventory file: $INVENTORY_FILE" echo "Copy $SCRIPT_DIR/inventory.example.env to $SCRIPT_DIR/inventory.env and edit IPs." exit 1 fi # shellcheck disable=SC1090 source "$INVENTORY_FILE" SSH_USER="${SSH_USER:-micqdf}" SSH_OPTS="${SSH_OPTS:- -o BatchMode=yes -o StrictHostKeyChecking=accept-new }" required=(CP_1 CP_2 CP_3 WK_1 WK_2 WK_3) for key in "${required[@]}"; do if [ -z "${!key:-}" ]; then echo "Missing required inventory variable: $key" exit 1 fi done cluster_has_node() { local node_name="$1" remote "$CP_1" "sudo kubectl --kubeconfig /etc/kubernetes/admin.conf get node $node_name >/dev/null 2>&1" } cluster_ready() { remote "$CP_1" "test -f /etc/kubernetes/admin.conf && sudo kubectl --kubeconfig /etc/kubernetes/admin.conf get nodes >/dev/null 2>&1" } remote() { local host_ip="$1" local cmd="$2" ssh $SSH_OPTS "$SSH_USER@$host_ip" "$cmd" } rebuild_node() { local node_name="$1" local node_ip="$2" echo "==> Rebuilding $node_name on $node_ip" nixos-rebuild switch \ --flake "$FLAKE_DIR#$node_name" \ --target-host "$SSH_USER@$node_ip" \ --use-remote-sudo } for node in cp-1 cp-2 cp-3 wk-1 wk-2 wk-3; do key="${node^^}" key="${key//-/_}" rebuild_node "$node" "${!key}" done echo "==> Initializing control plane on cp-1" if cluster_ready; then echo "==> Existing cluster detected on cp-1; skipping kubeadm init" else remote "$CP_1" "sudo th-kubeadm-init" echo "==> Installing Cilium on cp-1" remote "$CP_1" "helm repo add cilium https://helm.cilium.io >/dev/null 2>&1 || true" remote "$CP_1" "helm repo update >/dev/null" remote "$CP_1" "kubectl create namespace kube-system >/dev/null 2>&1 || true" remote "$CP_1" "helm upgrade --install cilium cilium/cilium --namespace kube-system --set kubeProxyReplacement=true" fi echo "==> Building kubeadm join commands" JOIN_CMD="$(remote "$CP_1" "sudo kubeadm token create --print-join-command")" CERT_KEY="$(remote "$CP_1" "sudo kubeadm init phase upload-certs --upload-certs | tail -n 1")" CP_JOIN_CMD="$JOIN_CMD --control-plane --certificate-key $CERT_KEY" join_control_plane() { local node_ip="$1" local encoded encoded="$(printf '%s' "$CP_JOIN_CMD" | base64 -w0)" remote "$node_ip" "sudo th-kubeadm-join-control-plane \"\$(echo $encoded | base64 -d)\"" } join_worker() { local node_ip="$1" local encoded encoded="$(printf '%s' "$JOIN_CMD" | base64 -w0)" remote "$node_ip" "sudo th-kubeadm-join-worker \"\$(echo $encoded | base64 -d)\"" } echo "==> Joining remaining control planes" if cluster_has_node "cp-2"; then echo "cp-2 already joined; skipping" else join_control_plane "$CP_2" fi if cluster_has_node "cp-3"; then echo "cp-3 already joined; skipping" else join_control_plane "$CP_3" fi echo "==> Joining workers" if cluster_has_node "wk-1"; then echo "wk-1 already joined; skipping" else join_worker "$WK_1" fi if cluster_has_node "wk-2"; then echo "wk-2 already joined; skipping" else join_worker "$WK_2" fi if cluster_has_node "wk-3"; then echo "wk-3 already joined; skipping" else join_worker "$WK_3" fi echo "==> Final node list" remote "$CP_1" "kubectl get nodes -o wide"