Merge pull request 'fix: force fresh bootstrap stages after rebuild and stabilize join node identity' (#106) from stage into master
Some checks failed
Terraform Apply / Terraform Apply (push) Failing after 20m28s
Some checks failed
Terraform Apply / Terraform Apply (push) Failing after 20m28s
Reviewed-on: #106
This commit was merged in pull request #106.
This commit is contained in:
@@ -198,6 +198,13 @@ class Controller:
|
||||
state["updated_at"] = int(time.time())
|
||||
self.set_state(state)
|
||||
|
||||
def clear_done(self, keys):
|
||||
state = self.get_state()
|
||||
for key in keys:
|
||||
state.pop(key, None)
|
||||
state["updated_at"] = int(time.time())
|
||||
self.set_state(state)
|
||||
|
||||
def stage_done(self, key):
|
||||
return bool(self.get_state().get(key))
|
||||
|
||||
@@ -291,6 +298,14 @@ class Controller:
|
||||
if failures:
|
||||
raise RuntimeError(f"Worker rebuild failures: {failures}")
|
||||
|
||||
# Rebuild can invalidate prior bootstrap stages; force reconciliation.
|
||||
self.clear_done([
|
||||
"primary_initialized",
|
||||
"cni_installed",
|
||||
"control_planes_joined",
|
||||
"workers_joined",
|
||||
"verified",
|
||||
])
|
||||
self.mark_done("nodes_rebuilt")
|
||||
|
||||
def has_admin_conf(self):
|
||||
@@ -301,7 +316,7 @@ class Controller:
|
||||
return self.remote(self.primary_ip, cmd, check=False).returncode == 0
|
||||
|
||||
def stage_init_primary(self):
|
||||
if self.stage_done("primary_initialized"):
|
||||
if self.stage_done("primary_initialized") and self.has_admin_conf() and self.cluster_ready():
|
||||
self.log("Primary control plane init already complete")
|
||||
return
|
||||
if self.has_admin_conf() and self.cluster_ready():
|
||||
@@ -312,7 +327,7 @@ class Controller:
|
||||
self.mark_done("primary_initialized")
|
||||
|
||||
def stage_install_cni(self):
|
||||
if self.stage_done("cni_installed"):
|
||||
if self.stage_done("cni_installed") and self.cluster_ready():
|
||||
self.log("CNI install already complete")
|
||||
return
|
||||
self.log("Installing or upgrading Cilium")
|
||||
@@ -348,7 +363,6 @@ class Controller:
|
||||
self.log("Control-plane join already complete")
|
||||
return
|
||||
_, cp_join = self.build_join_cmds()
|
||||
encoded = base64.b64encode(cp_join.encode()).decode()
|
||||
for node in self.cp_names:
|
||||
if node == self.primary_cp:
|
||||
continue
|
||||
@@ -357,7 +371,8 @@ class Controller:
|
||||
continue
|
||||
self.log(f"Joining control plane {node}")
|
||||
ip = self.node_ips[node]
|
||||
self.remote(ip, f"sudo th-kubeadm-join-control-plane \"$(echo {encoded} | base64 -d)\"")
|
||||
node_join = f"{cp_join} --node-name {node}"
|
||||
self.remote(ip, f"sudo th-kubeadm-join-control-plane {shlex.quote(node_join)}")
|
||||
self.mark_done("control_planes_joined")
|
||||
|
||||
def stage_join_workers(self):
|
||||
@@ -365,14 +380,14 @@ class Controller:
|
||||
self.log("Worker join already complete")
|
||||
return
|
||||
join_cmd, _ = self.build_join_cmds()
|
||||
encoded = base64.b64encode(join_cmd.encode()).decode()
|
||||
for node in self.wk_names:
|
||||
if self.cluster_has_node(node):
|
||||
self.log(f"{node} already joined")
|
||||
continue
|
||||
self.log(f"Joining worker {node}")
|
||||
ip = self.node_ips[node]
|
||||
self.remote(ip, f"sudo th-kubeadm-join-worker \"$(echo {encoded} | base64 -d)\"")
|
||||
node_join = f"{join_cmd} --node-name {node}"
|
||||
self.remote(ip, f"sudo th-kubeadm-join-worker {shlex.quote(node_join)}")
|
||||
self.mark_done("workers_joined")
|
||||
|
||||
def stage_verify(self):
|
||||
|
||||
@@ -309,6 +309,7 @@ in
|
||||
|
||||
systemctl unmask kubelet || true
|
||||
systemctl stop kubelet || true
|
||||
systemctl enable kubelet || true
|
||||
systemctl reset-failed kubelet || true
|
||||
systemctl daemon-reload
|
||||
eval "$1"
|
||||
@@ -326,6 +327,7 @@ in
|
||||
|
||||
systemctl unmask kubelet || true
|
||||
systemctl stop kubelet || true
|
||||
systemctl enable kubelet || true
|
||||
systemctl reset-failed kubelet || true
|
||||
systemctl daemon-reload
|
||||
eval "$1"
|
||||
|
||||
Reference in New Issue
Block a user