Compare commits

...

2 Commits

Author SHA1 Message Date
d964ff8b50 Merge pull request 'fix: point Cilium directly at API server and print rollout diagnostics' (#120) from stage into master
Some checks failed
Terraform Apply / Terraform Apply (push) Failing after 26m43s
Reviewed-on: #120
2026-03-05 01:25:52 +00:00
e06b2c692e fix: point Cilium directly at API server and print rollout diagnostics
All checks were successful
Terraform Plan / Terraform Plan (push) Successful in 18s
Set Cilium k8sServiceHost/k8sServicePort to the primary control-plane API endpoint to avoid in-cluster service routing dependency during early bootstrap. Also print cilium daemonset/pod/log diagnostics when rollout times out.
2026-03-05 01:21:21 +00:00

View File

@@ -343,6 +343,8 @@ class Controller:
"sudo KUBECONFIG=/etc/kubernetes/admin.conf "
"helm upgrade --install cilium cilium/cilium "
"--namespace kube-system "
f"--set k8sServiceHost={shlex.quote(self.primary_ip)} "
"--set k8sServicePort=6443 "
f"--set kubeProxyReplacement={shlex.quote(self.cilium_kpr)}"
),
)
@@ -410,21 +412,24 @@ class Controller:
)
except Exception:
self.log("Cilium rollout failed; collecting diagnostics")
self.remote(
proc = self.remote(
self.primary_ip,
"sudo kubectl --kubeconfig /etc/kubernetes/admin.conf -n kube-system get ds cilium -o wide || true",
check=False,
)
self.remote(
print(proc.stdout)
proc = self.remote(
self.primary_ip,
"sudo kubectl --kubeconfig /etc/kubernetes/admin.conf -n kube-system get pods -l k8s-app=cilium -o wide || true",
check=False,
)
self.remote(
print(proc.stdout)
proc = self.remote(
self.primary_ip,
"for p in $(sudo kubectl --kubeconfig /etc/kubernetes/admin.conf -n kube-system get pods -l k8s-app=cilium -o name 2>/dev/null); do sudo kubectl --kubeconfig /etc/kubernetes/admin.conf -n kube-system logs --tail=120 $p || true; done",
check=False,
)
print(proc.stdout)
raise
self.remote(
self.primary_ip,