feat: add repeatable kubeadm rebuild and reset scripts
All checks were successful
Terraform Plan / Terraform Plan (push) Successful in 17s
All checks were successful
Terraform Plan / Terraform Plan (push) Successful in 17s
This commit is contained in:
@@ -90,6 +90,31 @@ kubectl get nodes -o wide
|
|||||||
kubectl -n kube-system get pods -o wide
|
kubectl -n kube-system get pods -o wide
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Repeatable rebuild flow (recommended)
|
||||||
|
|
||||||
|
1. Copy and edit inventory:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cp ./scripts/inventory.example.env ./scripts/inventory.env
|
||||||
|
$EDITOR ./scripts/inventory.env
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Rebuild all nodes and bootstrap cluster:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./scripts/rebuild-and-bootstrap.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
3. If you only want to reset Kubernetes state on existing VMs:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./scripts/reset-cluster-nodes.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
For a full nuke/recreate lifecycle:
|
||||||
|
- run Terraform destroy/apply for VMs first,
|
||||||
|
- then run `./scripts/rebuild-and-bootstrap.sh` again.
|
||||||
|
|
||||||
## Notes
|
## Notes
|
||||||
|
|
||||||
- Scripts are intentionally manual-triggered (predictable for homelab bring-up).
|
- Scripts are intentionally manual-triggered (predictable for homelab bring-up).
|
||||||
|
|||||||
11
nixos/kubeadm/scripts/inventory.example.env
Normal file
11
nixos/kubeadm/scripts/inventory.example.env
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
SSH_USER=micqdf
|
||||||
|
|
||||||
|
# Control planes
|
||||||
|
CP_1=192.168.1.101
|
||||||
|
CP_2=192.168.1.102
|
||||||
|
CP_3=192.168.1.103
|
||||||
|
|
||||||
|
# Workers
|
||||||
|
WK_1=192.168.1.111
|
||||||
|
WK_2=192.168.1.112
|
||||||
|
WK_3=192.168.1.113
|
||||||
89
nixos/kubeadm/scripts/rebuild-and-bootstrap.sh
Executable file
89
nixos/kubeadm/scripts/rebuild-and-bootstrap.sh
Executable file
@@ -0,0 +1,89 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
FLAKE_DIR="${FLAKE_DIR:-$(cd "$SCRIPT_DIR/.." && pwd)}"
|
||||||
|
INVENTORY_FILE="${1:-$SCRIPT_DIR/inventory.env}"
|
||||||
|
|
||||||
|
if [ ! -f "$INVENTORY_FILE" ]; then
|
||||||
|
echo "Missing inventory file: $INVENTORY_FILE"
|
||||||
|
echo "Copy $SCRIPT_DIR/inventory.example.env to $SCRIPT_DIR/inventory.env and edit IPs."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# shellcheck disable=SC1090
|
||||||
|
source "$INVENTORY_FILE"
|
||||||
|
|
||||||
|
SSH_USER="${SSH_USER:-micqdf}"
|
||||||
|
SSH_OPTS="${SSH_OPTS:- -o BatchMode=yes -o StrictHostKeyChecking=accept-new }"
|
||||||
|
|
||||||
|
required=(CP_1 CP_2 CP_3 WK_1 WK_2 WK_3)
|
||||||
|
for key in "${required[@]}"; do
|
||||||
|
if [ -z "${!key:-}" ]; then
|
||||||
|
echo "Missing required inventory variable: $key"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
remote() {
|
||||||
|
local host_ip="$1"
|
||||||
|
local cmd="$2"
|
||||||
|
ssh $SSH_OPTS "$SSH_USER@$host_ip" "$cmd"
|
||||||
|
}
|
||||||
|
|
||||||
|
rebuild_node() {
|
||||||
|
local node_name="$1"
|
||||||
|
local node_ip="$2"
|
||||||
|
|
||||||
|
echo "==> Rebuilding $node_name on $node_ip"
|
||||||
|
nixos-rebuild switch \
|
||||||
|
--flake "$FLAKE_DIR#$node_name" \
|
||||||
|
--target-host "$SSH_USER@$node_ip" \
|
||||||
|
--use-remote-sudo
|
||||||
|
}
|
||||||
|
|
||||||
|
for node in cp-1 cp-2 cp-3 wk-1 wk-2 wk-3; do
|
||||||
|
key="${node^^}"
|
||||||
|
key="${key//-/_}"
|
||||||
|
rebuild_node "$node" "${!key}"
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "==> Initializing control plane on cp-1"
|
||||||
|
remote "$CP_1" "sudo th-kubeadm-init"
|
||||||
|
|
||||||
|
echo "==> Installing Cilium on cp-1"
|
||||||
|
remote "$CP_1" "helm repo add cilium https://helm.cilium.io >/dev/null 2>&1 || true"
|
||||||
|
remote "$CP_1" "helm repo update >/dev/null"
|
||||||
|
remote "$CP_1" "kubectl create namespace kube-system >/dev/null 2>&1 || true"
|
||||||
|
remote "$CP_1" "helm upgrade --install cilium cilium/cilium --namespace kube-system --set kubeProxyReplacement=true"
|
||||||
|
|
||||||
|
echo "==> Building kubeadm join commands"
|
||||||
|
JOIN_CMD="$(remote "$CP_1" "sudo kubeadm token create --print-join-command")"
|
||||||
|
CERT_KEY="$(remote "$CP_1" "sudo kubeadm init phase upload-certs --upload-certs | tail -n 1")"
|
||||||
|
CP_JOIN_CMD="$JOIN_CMD --control-plane --certificate-key $CERT_KEY"
|
||||||
|
|
||||||
|
join_control_plane() {
|
||||||
|
local node_ip="$1"
|
||||||
|
local encoded
|
||||||
|
encoded="$(printf '%s' "$CP_JOIN_CMD" | base64 -w0)"
|
||||||
|
remote "$node_ip" "sudo th-kubeadm-join-control-plane \"\$(echo $encoded | base64 -d)\""
|
||||||
|
}
|
||||||
|
|
||||||
|
join_worker() {
|
||||||
|
local node_ip="$1"
|
||||||
|
local encoded
|
||||||
|
encoded="$(printf '%s' "$JOIN_CMD" | base64 -w0)"
|
||||||
|
remote "$node_ip" "sudo th-kubeadm-join-worker \"\$(echo $encoded | base64 -d)\""
|
||||||
|
}
|
||||||
|
|
||||||
|
echo "==> Joining remaining control planes"
|
||||||
|
join_control_plane "$CP_2"
|
||||||
|
join_control_plane "$CP_3"
|
||||||
|
|
||||||
|
echo "==> Joining workers"
|
||||||
|
join_worker "$WK_1"
|
||||||
|
join_worker "$WK_2"
|
||||||
|
join_worker "$WK_3"
|
||||||
|
|
||||||
|
echo "==> Final node list"
|
||||||
|
remote "$CP_1" "kubectl get nodes -o wide"
|
||||||
37
nixos/kubeadm/scripts/reset-cluster-nodes.sh
Executable file
37
nixos/kubeadm/scripts/reset-cluster-nodes.sh
Executable file
@@ -0,0 +1,37 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
INVENTORY_FILE="${1:-$SCRIPT_DIR/inventory.env}"
|
||||||
|
|
||||||
|
if [ ! -f "$INVENTORY_FILE" ]; then
|
||||||
|
echo "Missing inventory file: $INVENTORY_FILE"
|
||||||
|
echo "Copy $SCRIPT_DIR/inventory.example.env to $SCRIPT_DIR/inventory.env and edit IPs."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# shellcheck disable=SC1090
|
||||||
|
source "$INVENTORY_FILE"
|
||||||
|
|
||||||
|
SSH_USER="${SSH_USER:-micqdf}"
|
||||||
|
SSH_OPTS="${SSH_OPTS:- -o BatchMode=yes -o StrictHostKeyChecking=accept-new }"
|
||||||
|
|
||||||
|
required=(CP_1 CP_2 CP_3 WK_1 WK_2 WK_3)
|
||||||
|
for key in "${required[@]}"; do
|
||||||
|
if [ -z "${!key:-}" ]; then
|
||||||
|
echo "Missing required inventory variable: $key"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
reset_node() {
|
||||||
|
local node_ip="$1"
|
||||||
|
echo "==> Resetting $node_ip"
|
||||||
|
ssh $SSH_OPTS "$SSH_USER@$node_ip" "sudo kubeadm reset -f && sudo systemctl stop kubelet && sudo rm -rf /etc/kubernetes /var/lib/etcd /var/lib/cni /etc/cni/net.d"
|
||||||
|
}
|
||||||
|
|
||||||
|
for key in CP_1 CP_2 CP_3 WK_1 WK_2 WK_3; do
|
||||||
|
reset_node "${!key}"
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "Cluster components reset on all listed nodes."
|
||||||
Reference in New Issue
Block a user