Merge pull request 'fix: stabilize Cilium install defaults and add rollout diagnostics' (#119) from stage into master
Some checks failed
Terraform Apply / Terraform Apply (push) Failing after 26m43s
Some checks failed
Terraform Apply / Terraform Apply (push) Failing after 26m43s
Reviewed-on: #119
This commit was merged in pull request #119.
This commit is contained in:
@@ -125,6 +125,7 @@ class Controller:
|
|||||||
self.fast_mode = self.env.get("FAST_MODE", "1")
|
self.fast_mode = self.env.get("FAST_MODE", "1")
|
||||||
self.skip_rebuild = self.env.get("SKIP_REBUILD", "0") == "1"
|
self.skip_rebuild = self.env.get("SKIP_REBUILD", "0") == "1"
|
||||||
self.force_reinit = False
|
self.force_reinit = False
|
||||||
|
self.cilium_kpr = self.env.get("CILIUM_KUBE_PROXY_REPLACEMENT", "false")
|
||||||
|
|
||||||
def log(self, msg):
|
def log(self, msg):
|
||||||
print(f"==> {msg}")
|
print(f"==> {msg}")
|
||||||
@@ -338,7 +339,12 @@ class Controller:
|
|||||||
self.remote(self.primary_ip, "sudo kubectl --kubeconfig /etc/kubernetes/admin.conf create namespace kube-system >/dev/null 2>&1 || true")
|
self.remote(self.primary_ip, "sudo kubectl --kubeconfig /etc/kubernetes/admin.conf create namespace kube-system >/dev/null 2>&1 || true")
|
||||||
self.remote(
|
self.remote(
|
||||||
self.primary_ip,
|
self.primary_ip,
|
||||||
"sudo KUBECONFIG=/etc/kubernetes/admin.conf helm upgrade --install cilium cilium/cilium --namespace kube-system --set kubeProxyReplacement=true",
|
(
|
||||||
|
"sudo KUBECONFIG=/etc/kubernetes/admin.conf "
|
||||||
|
"helm upgrade --install cilium cilium/cilium "
|
||||||
|
"--namespace kube-system "
|
||||||
|
f"--set kubeProxyReplacement={shlex.quote(self.cilium_kpr)}"
|
||||||
|
),
|
||||||
)
|
)
|
||||||
self.mark_done("cni_installed")
|
self.mark_done("cni_installed")
|
||||||
|
|
||||||
@@ -397,10 +403,29 @@ class Controller:
|
|||||||
self.log("Verification already complete")
|
self.log("Verification already complete")
|
||||||
return
|
return
|
||||||
self.log("Final node verification")
|
self.log("Final node verification")
|
||||||
self.remote(
|
try:
|
||||||
self.primary_ip,
|
self.remote(
|
||||||
"sudo kubectl --kubeconfig /etc/kubernetes/admin.conf -n kube-system rollout status ds/cilium --timeout=10m",
|
self.primary_ip,
|
||||||
)
|
"sudo kubectl --kubeconfig /etc/kubernetes/admin.conf -n kube-system rollout status ds/cilium --timeout=10m",
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
self.log("Cilium rollout failed; collecting diagnostics")
|
||||||
|
self.remote(
|
||||||
|
self.primary_ip,
|
||||||
|
"sudo kubectl --kubeconfig /etc/kubernetes/admin.conf -n kube-system get ds cilium -o wide || true",
|
||||||
|
check=False,
|
||||||
|
)
|
||||||
|
self.remote(
|
||||||
|
self.primary_ip,
|
||||||
|
"sudo kubectl --kubeconfig /etc/kubernetes/admin.conf -n kube-system get pods -l k8s-app=cilium -o wide || true",
|
||||||
|
check=False,
|
||||||
|
)
|
||||||
|
self.remote(
|
||||||
|
self.primary_ip,
|
||||||
|
"for p in $(sudo kubectl --kubeconfig /etc/kubernetes/admin.conf -n kube-system get pods -l k8s-app=cilium -o name 2>/dev/null); do sudo kubectl --kubeconfig /etc/kubernetes/admin.conf -n kube-system logs --tail=120 $p || true; done",
|
||||||
|
check=False,
|
||||||
|
)
|
||||||
|
raise
|
||||||
self.remote(
|
self.remote(
|
||||||
self.primary_ip,
|
self.primary_ip,
|
||||||
"sudo kubectl --kubeconfig /etc/kubernetes/admin.conf wait --for=condition=Ready nodes --all --timeout=10m",
|
"sudo kubectl --kubeconfig /etc/kubernetes/admin.conf wait --for=condition=Ready nodes --all --timeout=10m",
|
||||||
|
|||||||
Reference in New Issue
Block a user