Files
TerraHome/nixos/kubeadm/scripts/rebuild-and-bootstrap.sh

126 lines
3.3 KiB
Bash
Raw Normal View History

#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
FLAKE_DIR="${FLAKE_DIR:-$(cd "$SCRIPT_DIR/.." && pwd)}"
INVENTORY_FILE="${1:-$SCRIPT_DIR/inventory.env}"
if [ ! -f "$INVENTORY_FILE" ]; then
echo "Missing inventory file: $INVENTORY_FILE"
echo "Copy $SCRIPT_DIR/inventory.example.env to $SCRIPT_DIR/inventory.env and edit IPs."
exit 1
fi
# shellcheck disable=SC1090
source "$INVENTORY_FILE"
SSH_USER="${SSH_USER:-micqdf}"
SSH_OPTS="${SSH_OPTS:- -o BatchMode=yes -o StrictHostKeyChecking=accept-new }"
required=(CP_1 CP_2 CP_3 WK_1 WK_2 WK_3)
for key in "${required[@]}"; do
if [ -z "${!key:-}" ]; then
echo "Missing required inventory variable: $key"
exit 1
fi
done
cluster_has_node() {
local node_name="$1"
remote "$CP_1" "sudo kubectl --kubeconfig /etc/kubernetes/admin.conf get node $node_name >/dev/null 2>&1"
}
cluster_ready() {
remote "$CP_1" "test -f /etc/kubernetes/admin.conf && sudo kubectl --kubeconfig /etc/kubernetes/admin.conf get nodes >/dev/null 2>&1"
}
remote() {
local host_ip="$1"
local cmd="$2"
ssh $SSH_OPTS "$SSH_USER@$host_ip" "$cmd"
}
rebuild_node() {
local node_name="$1"
local node_ip="$2"
echo "==> Rebuilding $node_name on $node_ip"
nixos-rebuild switch \
--flake "$FLAKE_DIR#$node_name" \
--target-host "$SSH_USER@$node_ip" \
--use-remote-sudo
}
for node in cp-1 cp-2 cp-3 wk-1 wk-2 wk-3; do
key="${node^^}"
key="${key//-/_}"
rebuild_node "$node" "${!key}"
done
echo "==> Initializing control plane on cp-1"
if cluster_ready; then
echo "==> Existing cluster detected on cp-1; skipping kubeadm init"
else
remote "$CP_1" "sudo th-kubeadm-init"
echo "==> Installing Cilium on cp-1"
remote "$CP_1" "helm repo add cilium https://helm.cilium.io >/dev/null 2>&1 || true"
remote "$CP_1" "helm repo update >/dev/null"
remote "$CP_1" "kubectl create namespace kube-system >/dev/null 2>&1 || true"
remote "$CP_1" "helm upgrade --install cilium cilium/cilium --namespace kube-system --set kubeProxyReplacement=true"
fi
echo "==> Building kubeadm join commands"
JOIN_CMD="$(remote "$CP_1" "sudo kubeadm token create --print-join-command")"
CERT_KEY="$(remote "$CP_1" "sudo kubeadm init phase upload-certs --upload-certs | tail -n 1")"
CP_JOIN_CMD="$JOIN_CMD --control-plane --certificate-key $CERT_KEY"
join_control_plane() {
local node_ip="$1"
local encoded
encoded="$(printf '%s' "$CP_JOIN_CMD" | base64 -w0)"
remote "$node_ip" "sudo th-kubeadm-join-control-plane \"\$(echo $encoded | base64 -d)\""
}
join_worker() {
local node_ip="$1"
local encoded
encoded="$(printf '%s' "$JOIN_CMD" | base64 -w0)"
remote "$node_ip" "sudo th-kubeadm-join-worker \"\$(echo $encoded | base64 -d)\""
}
echo "==> Joining remaining control planes"
if cluster_has_node "cp-2"; then
echo "cp-2 already joined; skipping"
else
join_control_plane "$CP_2"
fi
if cluster_has_node "cp-3"; then
echo "cp-3 already joined; skipping"
else
join_control_plane "$CP_3"
fi
echo "==> Joining workers"
if cluster_has_node "wk-1"; then
echo "wk-1 already joined; skipping"
else
join_worker "$WK_1"
fi
if cluster_has_node "wk-2"; then
echo "wk-2 already joined; skipping"
else
join_worker "$WK_2"
fi
if cluster_has_node "wk-3"; then
echo "wk-3 already joined; skipping"
else
join_worker "$WK_3"
fi
echo "==> Final node list"
remote "$CP_1" "kubectl get nodes -o wide"