fix: force fresh kubeadm init after rebuild and make kubelet enable-able
All checks were successful
Terraform Plan / Terraform Plan (push) Successful in 17s
All checks were successful
Terraform Plan / Terraform Plan (push) Successful in 17s
Always re-run primary init when reconcile performs node rebuilds to avoid stale/partial cluster state causing join preflight failures. Also add wantedBy for kubelet so systemctl enable works as expected during join/init flows.
This commit is contained in:
@@ -124,6 +124,7 @@ class Controller:
|
||||
self.worker_parallelism = int(self.env.get("WORKER_PARALLELISM", "3"))
|
||||
self.fast_mode = self.env.get("FAST_MODE", "1")
|
||||
self.skip_rebuild = self.env.get("SKIP_REBUILD", "0") == "1"
|
||||
self.force_reinit = False
|
||||
|
||||
def log(self, msg):
|
||||
print(f"==> {msg}")
|
||||
@@ -299,6 +300,7 @@ class Controller:
|
||||
raise RuntimeError(f"Worker rebuild failures: {failures}")
|
||||
|
||||
# Rebuild can invalidate prior bootstrap stages; force reconciliation.
|
||||
self.force_reinit = True
|
||||
self.clear_done([
|
||||
"primary_initialized",
|
||||
"cni_installed",
|
||||
@@ -316,10 +318,10 @@ class Controller:
|
||||
return self.remote(self.primary_ip, cmd, check=False).returncode == 0
|
||||
|
||||
def stage_init_primary(self):
|
||||
if self.stage_done("primary_initialized") and self.has_admin_conf() and self.cluster_ready():
|
||||
if (not self.force_reinit) and self.stage_done("primary_initialized") and self.has_admin_conf() and self.cluster_ready():
|
||||
self.log("Primary control plane init already complete")
|
||||
return
|
||||
if self.has_admin_conf() and self.cluster_ready():
|
||||
if (not self.force_reinit) and self.has_admin_conf() and self.cluster_ready():
|
||||
self.log("Existing cluster detected on primary control plane")
|
||||
else:
|
||||
self.log(f"Initializing primary control plane on {self.primary_cp}")
|
||||
|
||||
@@ -343,6 +343,7 @@ in
|
||||
|
||||
systemd.services.kubelet = {
|
||||
description = "Kubernetes Kubelet";
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
wants = [ "network-online.target" ];
|
||||
after = [ "containerd.service" "network-online.target" ];
|
||||
serviceConfig = {
|
||||
|
||||
Reference in New Issue
Block a user