Merge pull request 'perf: speed up first bootstrap with fast-mode defaults' (#64) from stage into master
Some checks failed
Terraform Apply / Terraform Apply (push) Failing after 5m14s

Reviewed-on: #64
This commit was merged in pull request #64.
This commit is contained in:
2026-03-01 03:36:21 +00:00
4 changed files with 20 additions and 4 deletions

View File

@@ -165,6 +165,10 @@ jobs:
- name: Run cluster rebuild and bootstrap
env:
NIX_CONFIG: experimental-features = nix-command flakes
FAST_MODE: "1"
WORKER_PARALLELISM: "3"
REBUILD_TIMEOUT: "45m"
REBUILD_RETRIES: "2"
run: |
if [ -f "$HOME/.nix-profile/etc/profile.d/nix.sh" ]; then
. "$HOME/.nix-profile/etc/profile.d/nix.sh"

View File

@@ -193,6 +193,10 @@ jobs:
- name: Rebuild and bootstrap/reconcile kubeadm cluster
env:
NIX_CONFIG: experimental-features = nix-command flakes
FAST_MODE: "1"
WORKER_PARALLELISM: "3"
REBUILD_TIMEOUT: "45m"
REBUILD_RETRIES: "2"
run: |
if [ -f "$HOME/.nix-profile/etc/profile.d/nix.sh" ]; then
. "$HOME/.nix-profile/etc/profile.d/nix.sh"

View File

@@ -108,9 +108,12 @@ $EDITOR ./scripts/inventory.env
Optional tuning env vars:
```bash
WORKER_PARALLELISM=2 REBUILD_TIMEOUT=45m REBUILD_RETRIES=2 ./scripts/rebuild-and-bootstrap.sh
FAST_MODE=1 WORKER_PARALLELISM=3 REBUILD_TIMEOUT=45m REBUILD_RETRIES=2 ./scripts/rebuild-and-bootstrap.sh
```
- `FAST_MODE=1` skips pre-rebuild remote GC cleanup to reduce wall-clock time.
- Set `FAST_MODE=0` for a slower but more aggressive space cleanup pass.
3. If you only want to reset Kubernetes state on existing VMs:
```bash

View File

@@ -20,7 +20,8 @@ SSH_OPTS="${SSH_OPTS:--o BatchMode=yes -o IdentitiesOnly=yes -o StrictHostKeyChe
SSH_USER_CANDIDATES="${SSH_USER_CANDIDATES:-root $SSH_USER}"
REBUILD_TIMEOUT="${REBUILD_TIMEOUT:-45m}"
REBUILD_RETRIES="${REBUILD_RETRIES:-2}"
WORKER_PARALLELISM="${WORKER_PARALLELISM:-2}"
WORKER_PARALLELISM="${WORKER_PARALLELISM:-3}"
FAST_MODE="${FAST_MODE:-1}"
declare -A NODE_IPS=()
declare -a CP_NAMES=()
@@ -190,14 +191,18 @@ detect_ssh_user "$PRIMARY_CP_IP"
for node in "${CP_NAMES[@]}"; do
prepare_remote_nix_trust "${NODE_IPS[$node]}"
prepare_remote_space "${NODE_IPS[$node]}"
if [ "$FAST_MODE" != "1" ]; then
prepare_remote_space "${NODE_IPS[$node]}"
fi
rebuild_node_with_retry "$node" "${NODE_IPS[$node]}"
done
worker_failures=0
for node in "${WK_NAMES[@]}"; do
prepare_remote_nix_trust "${NODE_IPS[$node]}"
prepare_remote_space "${NODE_IPS[$node]}"
if [ "$FAST_MODE" != "1" ]; then
prepare_remote_space "${NODE_IPS[$node]}"
fi
done
active_jobs=0