feat: make kubeadm workflows auto-scale with terraform outputs
All checks were successful
Terraform Plan / Terraform Plan (push) Successful in 19s

This commit is contained in:
2026-02-28 16:43:22 +00:00
parent f341816112
commit 5669305e59
8 changed files with 207 additions and 141 deletions

View File

@@ -99,7 +99,7 @@ cp ./scripts/inventory.example.env ./scripts/inventory.env
$EDITOR ./scripts/inventory.env
```
2. Rebuild all nodes and bootstrap cluster:
2. Rebuild all nodes and bootstrap/reconcile cluster:
```bash
./scripts/rebuild-and-bootstrap.sh
@@ -115,6 +115,9 @@ For a full nuke/recreate lifecycle:
- run Terraform destroy/apply for VMs first,
- then run `./scripts/rebuild-and-bootstrap.sh` again.
Node lists are discovered from Terraform outputs, so adding new workers/control
planes in Terraform is picked up automatically by the bootstrap/reconcile flow.
## Optional Gitea workflow automation
Primary flow:

View File

@@ -1,11 +1,7 @@
SSH_USER=micqdf
PRIMARY_CONTROL_PLANE=cp-1
# Control planes
CP_1=192.168.1.101
CP_2=192.168.1.102
CP_3=192.168.1.103
# Name=IP pairs (space-separated)
CONTROL_PLANES="cp-1=192.168.1.101 cp-2=192.168.1.102 cp-3=192.168.1.103"
# Workers
WK_1=192.168.1.111
WK_2=192.168.1.112
WK_3=192.168.1.113
WORKERS="wk-1=192.168.1.111 wk-2=192.168.1.112 wk-3=192.168.1.113"

View File

@@ -7,7 +7,7 @@ INVENTORY_FILE="${1:-$SCRIPT_DIR/inventory.env}"
if [ ! -f "$INVENTORY_FILE" ]; then
echo "Missing inventory file: $INVENTORY_FILE"
echo "Copy $SCRIPT_DIR/inventory.example.env to $SCRIPT_DIR/inventory.env and edit IPs."
echo "Copy $SCRIPT_DIR/inventory.example.env to $SCRIPT_DIR/inventory.env and edit node mappings."
exit 1
fi
@@ -17,21 +17,61 @@ source "$INVENTORY_FILE"
SSH_USER="${SSH_USER:-micqdf}"
SSH_OPTS="${SSH_OPTS:- -o BatchMode=yes -o StrictHostKeyChecking=accept-new }"
required=(CP_1 CP_2 CP_3 WK_1 WK_2 WK_3)
for key in "${required[@]}"; do
if [ -z "${!key:-}" ]; then
echo "Missing required inventory variable: $key"
declare -A NODE_IPS=()
declare -a CP_NAMES=()
declare -a WK_NAMES=()
add_node_pair() {
local role="$1"
local pair="$2"
local name="${pair%%=*}"
local ip="${pair#*=}"
if [ -z "$name" ] || [ -z "$ip" ] || [ "$name" = "$ip" ]; then
echo "Invalid node pair '$pair' (expected name=ip)."
exit 1
fi
done
cluster_has_node() {
local node_name="$1"
remote "$CP_1" "sudo kubectl --kubeconfig /etc/kubernetes/admin.conf get node $node_name >/dev/null 2>&1"
NODE_IPS["$name"]="$ip"
if [ "$role" = "cp" ]; then
CP_NAMES+=("$name")
else
WK_NAMES+=("$name")
fi
}
cluster_ready() {
remote "$CP_1" "test -f /etc/kubernetes/admin.conf && sudo kubectl --kubeconfig /etc/kubernetes/admin.conf get nodes >/dev/null 2>&1"
populate_nodes() {
if [ -n "${CONTROL_PLANES:-}" ]; then
for pair in $CONTROL_PLANES; do
add_node_pair "cp" "$pair"
done
else
while IFS= read -r var_name; do
idx="${var_name#CP_}"
add_node_pair "cp" "cp-$idx=${!var_name}"
done < <(compgen -A variable | grep -E '^CP_[0-9]+$' | sort -V)
fi
if [ -n "${WORKERS:-}" ]; then
for pair in $WORKERS; do
add_node_pair "wk" "$pair"
done
else
while IFS= read -r var_name; do
idx="${var_name#WK_}"
add_node_pair "wk" "wk-$idx=${!var_name}"
done < <(compgen -A variable | grep -E '^WK_[0-9]+$' | sort -V)
fi
if [ "${#CP_NAMES[@]}" -eq 0 ]; then
echo "No control planes found in inventory."
exit 1
fi
if [ "${#WK_NAMES[@]}" -eq 0 ]; then
echo "No workers found in inventory."
exit 1
fi
}
remote() {
@@ -40,6 +80,15 @@ remote() {
ssh $SSH_OPTS "$SSH_USER@$host_ip" "$cmd"
}
cluster_has_node() {
local node_name="$1"
remote "$PRIMARY_CP_IP" "sudo kubectl --kubeconfig /etc/kubernetes/admin.conf get node $node_name >/dev/null 2>&1"
}
cluster_ready() {
remote "$PRIMARY_CP_IP" "test -f /etc/kubernetes/admin.conf && sudo kubectl --kubeconfig /etc/kubernetes/admin.conf get nodes >/dev/null 2>&1"
}
rebuild_node() {
local node_name="$1"
local node_ip="$2"
@@ -51,28 +100,38 @@ rebuild_node() {
--use-remote-sudo
}
for node in cp-1 cp-2 cp-3 wk-1 wk-2 wk-3; do
key="${node^^}"
key="${key//-/_}"
rebuild_node "$node" "${!key}"
populate_nodes
PRIMARY_CONTROL_PLANE="${PRIMARY_CONTROL_PLANE:-cp-1}"
if [ -z "${NODE_IPS[$PRIMARY_CONTROL_PLANE]:-}" ]; then
PRIMARY_CONTROL_PLANE="${CP_NAMES[0]}"
fi
PRIMARY_CP_IP="${NODE_IPS[$PRIMARY_CONTROL_PLANE]}"
for node in "${CP_NAMES[@]}"; do
rebuild_node "$node" "${NODE_IPS[$node]}"
done
echo "==> Initializing control plane on cp-1"
if cluster_ready; then
echo "==> Existing cluster detected on cp-1; skipping kubeadm init"
else
remote "$CP_1" "sudo th-kubeadm-init"
for node in "${WK_NAMES[@]}"; do
rebuild_node "$node" "${NODE_IPS[$node]}"
done
echo "==> Installing Cilium on cp-1"
remote "$CP_1" "helm repo add cilium https://helm.cilium.io >/dev/null 2>&1 || true"
remote "$CP_1" "helm repo update >/dev/null"
remote "$CP_1" "kubectl create namespace kube-system >/dev/null 2>&1 || true"
remote "$CP_1" "helm upgrade --install cilium cilium/cilium --namespace kube-system --set kubeProxyReplacement=true"
echo "==> Initializing control plane on $PRIMARY_CONTROL_PLANE"
if cluster_ready; then
echo "==> Existing cluster detected on $PRIMARY_CONTROL_PLANE; skipping kubeadm init"
else
remote "$PRIMARY_CP_IP" "sudo th-kubeadm-init"
echo "==> Installing Cilium on $PRIMARY_CONTROL_PLANE"
remote "$PRIMARY_CP_IP" "helm repo add cilium https://helm.cilium.io >/dev/null 2>&1 || true"
remote "$PRIMARY_CP_IP" "helm repo update >/dev/null"
remote "$PRIMARY_CP_IP" "kubectl create namespace kube-system >/dev/null 2>&1 || true"
remote "$PRIMARY_CP_IP" "helm upgrade --install cilium cilium/cilium --namespace kube-system --set kubeProxyReplacement=true"
fi
echo "==> Building kubeadm join commands"
JOIN_CMD="$(remote "$CP_1" "sudo kubeadm token create --print-join-command")"
CERT_KEY="$(remote "$CP_1" "sudo kubeadm init phase upload-certs --upload-certs | tail -n 1")"
JOIN_CMD="$(remote "$PRIMARY_CP_IP" "sudo kubeadm token create --print-join-command")"
CERT_KEY="$(remote "$PRIMARY_CP_IP" "sudo kubeadm init phase upload-certs --upload-certs | tail -n 1")"
CP_JOIN_CMD="$JOIN_CMD --control-plane --certificate-key $CERT_KEY"
join_control_plane() {
@@ -90,36 +149,26 @@ join_worker() {
}
echo "==> Joining remaining control planes"
if cluster_has_node "cp-2"; then
echo "cp-2 already joined; skipping"
else
join_control_plane "$CP_2"
fi
for node in "${CP_NAMES[@]}"; do
if [ "$node" = "$PRIMARY_CONTROL_PLANE" ]; then
continue
fi
if cluster_has_node "cp-3"; then
echo "cp-3 already joined; skipping"
else
join_control_plane "$CP_3"
fi
if cluster_has_node "$node"; then
echo "$node already joined; skipping"
else
join_control_plane "${NODE_IPS[$node]}"
fi
done
echo "==> Joining workers"
if cluster_has_node "wk-1"; then
echo "wk-1 already joined; skipping"
else
join_worker "$WK_1"
fi
if cluster_has_node "wk-2"; then
echo "wk-2 already joined; skipping"
else
join_worker "$WK_2"
fi
if cluster_has_node "wk-3"; then
echo "wk-3 already joined; skipping"
else
join_worker "$WK_3"
fi
for node in "${WK_NAMES[@]}"; do
if cluster_has_node "$node"; then
echo "$node already joined; skipping"
else
join_worker "${NODE_IPS[$node]}"
fi
done
echo "==> Final node list"
remote "$CP_1" "kubectl get nodes -o wide"
remote "$PRIMARY_CP_IP" "kubectl get nodes -o wide"

View File

@@ -0,0 +1,40 @@
#!/usr/bin/env python3
import json
import os
import re
import sys
def natural_key(name: str):
m = re.match(r"^([a-zA-Z-]+)-(\d+)$", name)
if m:
return (m.group(1), int(m.group(2)))
return (name, 0)
def map_to_pairs(items: dict[str, str]) -> str:
ordered = sorted(items.items(), key=lambda kv: natural_key(kv[0]))
return " ".join(f"{k}={v}" for k, v in ordered)
def main() -> int:
payload = json.load(sys.stdin)
cp_map = payload.get("control_plane_vm_ipv4", {}).get("value", {})
wk_map = payload.get("worker_vm_ipv4", {}).get("value", {})
if not cp_map or not wk_map:
raise SystemExit("Missing control_plane_vm_ipv4 or worker_vm_ipv4 in terraform output")
ssh_user = os.environ.get("KUBEADM_SSH_USER", "").strip() or "micqdf"
print(f"SSH_USER={ssh_user}")
print("PRIMARY_CONTROL_PLANE=cp-1")
print(f"CONTROL_PLANES=\"{map_to_pairs(cp_map)}\"")
print(f"WORKERS=\"{map_to_pairs(wk_map)}\"")
return 0
if __name__ == "__main__":
raise SystemExit(main())

View File

@@ -6,7 +6,7 @@ INVENTORY_FILE="${1:-$SCRIPT_DIR/inventory.env}"
if [ ! -f "$INVENTORY_FILE" ]; then
echo "Missing inventory file: $INVENTORY_FILE"
echo "Copy $SCRIPT_DIR/inventory.example.env to $SCRIPT_DIR/inventory.env and edit IPs."
echo "Copy $SCRIPT_DIR/inventory.example.env to $SCRIPT_DIR/inventory.env and edit node mappings."
exit 1
fi
@@ -16,22 +16,57 @@ source "$INVENTORY_FILE"
SSH_USER="${SSH_USER:-micqdf}"
SSH_OPTS="${SSH_OPTS:- -o BatchMode=yes -o StrictHostKeyChecking=accept-new }"
required=(CP_1 CP_2 CP_3 WK_1 WK_2 WK_3)
for key in "${required[@]}"; do
if [ -z "${!key:-}" ]; then
echo "Missing required inventory variable: $key"
declare -A NODE_IPS=()
add_pair() {
local pair="$1"
local name="${pair%%=*}"
local ip="${pair#*=}"
if [ -z "$name" ] || [ -z "$ip" ] || [ "$name" = "$ip" ]; then
echo "Invalid node pair '$pair' (expected name=ip)."
exit 1
fi
done
NODE_IPS["$name"]="$ip"
}
if [ -n "${CONTROL_PLANES:-}" ]; then
for pair in $CONTROL_PLANES; do
add_pair "$pair"
done
else
while IFS= read -r var_name; do
idx="${var_name#CP_}"
add_pair "cp-$idx=${!var_name}"
done < <(compgen -A variable | grep -E '^CP_[0-9]+$' | sort -V)
fi
if [ -n "${WORKERS:-}" ]; then
for pair in $WORKERS; do
add_pair "$pair"
done
else
while IFS= read -r var_name; do
idx="${var_name#WK_}"
add_pair "wk-$idx=${!var_name}"
done < <(compgen -A variable | grep -E '^WK_[0-9]+$' | sort -V)
fi
if [ "${#NODE_IPS[@]}" -eq 0 ]; then
echo "No nodes found in inventory."
exit 1
fi
reset_node() {
local node_ip="$1"
echo "==> Resetting $node_ip"
local node_name="$1"
local node_ip="$2"
echo "==> Resetting $node_name ($node_ip)"
ssh $SSH_OPTS "$SSH_USER@$node_ip" "sudo kubeadm reset -f && sudo systemctl stop kubelet && sudo rm -rf /etc/kubernetes /var/lib/etcd /var/lib/cni /etc/cni/net.d"
}
for key in CP_1 CP_2 CP_3 WK_1 WK_2 WK_3; do
reset_node "${!key}"
done
while IFS= read -r node_name; do
reset_node "$node_name" "${NODE_IPS[$node_name]}"
done < <(printf '%s\n' "${!NODE_IPS[@]}" | sort -V)
echo "Cluster components reset on all listed nodes."