Merge pull request 'fix: disable lingering kubelet service before node rebuild' (#84) from stage into master
Some checks failed
Terraform Apply / Terraform Apply (push) Failing after 18m50s

Reviewed-on: #84
This commit was merged in pull request #84.
This commit is contained in:
2026-03-02 10:09:20 +00:00

View File

@@ -208,6 +208,14 @@ prepare_remote_space() {
remote "$node_ip" "sudo rm -rf /tmp/nix* /tmp/nixos-rebuild* || true"
}
prepare_remote_kubelet() {
local node_ip="$1"
echo "==> Quiescing kubelet on $node_ip"
remote "$node_ip" "sudo systemctl disable --now kubelet >/dev/null 2>&1 || true"
remote "$node_ip" "sudo systemctl reset-failed kubelet >/dev/null 2>&1 || true"
remote "$node_ip" "sudo rm -f /etc/systemd/system/multi-user.target.wants/kubelet.service || true"
}
populate_nodes
prepare_known_hosts
export NIX_SSHOPTS="$SSH_OPTS"
@@ -222,6 +230,7 @@ detect_ssh_user "$PRIMARY_CP_IP"
for node in "${CP_NAMES[@]}"; do
prepare_remote_nix_trust "${NODE_IPS[$node]}"
prepare_remote_kubelet "${NODE_IPS[$node]}"
if [ "$FAST_MODE" != "1" ]; then
prepare_remote_space "${NODE_IPS[$node]}"
fi
@@ -231,6 +240,7 @@ done
worker_failures=0
for node in "${WK_NAMES[@]}"; do
prepare_remote_nix_trust "${NODE_IPS[$node]}"
prepare_remote_kubelet "${NODE_IPS[$node]}"
if [ "$FAST_MODE" != "1" ]; then
prepare_remote_space "${NODE_IPS[$node]}"
fi