Merge pull request 'fix: vendor Flannel manifest and harden CNI bootstrap timing' (#124) from stage into master
All checks were successful
Terraform Apply / Terraform Apply (push) Successful in 15m11s

Reviewed-on: #124
This commit was merged in pull request #124.
This commit is contained in:
2026-03-08 04:10:47 +00:00
3 changed files with 247 additions and 1 deletions

View File

@@ -265,11 +265,42 @@ class Controller:
def stage_install_cni(self): def stage_install_cni(self):
self.log("Installing Flannel") self.log("Installing Flannel")
manifest_path = self.script_dir.parent / "manifests" / "kube-flannel.yml"
manifest_b64 = base64.b64encode(manifest_path.read_bytes()).decode()
self.remote( self.remote(
self.primary_ip, self.primary_ip,
"sudo kubectl --kubeconfig /etc/kubernetes/admin.conf apply -f https://raw.githubusercontent.com/flannel-io/flannel/v0.25.5/Documentation/kube-flannel.yml", (
"sudo mkdir -p /var/lib/terrahome && "
f"echo {shlex.quote(manifest_b64)} | base64 -d | sudo tee /var/lib/terrahome/kube-flannel.yml >/dev/null"
),
) )
self.log("Waiting for API readiness before applying Flannel")
ready = False
for _ in range(30):
if self.cluster_ready():
ready = True
break
time.sleep(10)
if not ready:
raise RuntimeError("API server did not become ready before Flannel install")
last_error = None
for attempt in range(1, 6):
proc = self.remote(
self.primary_ip,
"sudo kubectl --kubeconfig /etc/kubernetes/admin.conf apply -f /var/lib/terrahome/kube-flannel.yml",
check=False,
)
if proc.returncode == 0:
return
last_error = (proc.stdout or "") + ("\n" if proc.stdout and proc.stderr else "") + (proc.stderr or "")
self.log(f"Flannel apply attempt {attempt}/5 failed; retrying in 15s")
time.sleep(15)
raise RuntimeError(f"Flannel apply failed after retries\n{last_error or ''}")
def cluster_has_node(self, name): def cluster_has_node(self, name):
cmd = f"sudo kubectl --kubeconfig /etc/kubernetes/admin.conf get node {shlex.quote(name)} >/dev/null 2>&1" cmd = f"sudo kubectl --kubeconfig /etc/kubernetes/admin.conf get node {shlex.quote(name)} >/dev/null 2>&1"
return self.remote(self.primary_ip, cmd, check=False).returncode == 0 return self.remote(self.primary_ip, cmd, check=False).returncode == 0

View File

@@ -0,0 +1,212 @@
---
kind: Namespace
apiVersion: v1
metadata:
name: kube-flannel
labels:
k8s-app: flannel
pod-security.kubernetes.io/enforce: privileged
---
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
labels:
k8s-app: flannel
name: flannel
rules:
- apiGroups:
- ""
resources:
- pods
verbs:
- get
- apiGroups:
- ""
resources:
- nodes
verbs:
- get
- list
- watch
- apiGroups:
- ""
resources:
- nodes/status
verbs:
- patch
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
labels:
k8s-app: flannel
name: flannel
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: flannel
subjects:
- kind: ServiceAccount
name: flannel
namespace: kube-flannel
---
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
k8s-app: flannel
name: flannel
namespace: kube-flannel
---
kind: ConfigMap
apiVersion: v1
metadata:
name: kube-flannel-cfg
namespace: kube-flannel
labels:
tier: node
k8s-app: flannel
app: flannel
data:
cni-conf.json: |
{
"name": "cbr0",
"cniVersion": "0.3.1",
"plugins": [
{
"type": "flannel",
"delegate": {
"hairpinMode": true,
"isDefaultGateway": true
}
},
{
"type": "portmap",
"capabilities": {
"portMappings": true
}
}
]
}
net-conf.json: |
{
"Network": "10.244.0.0/16",
"EnableNFTables": false,
"Backend": {
"Type": "vxlan"
}
}
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: kube-flannel-ds
namespace: kube-flannel
labels:
tier: node
app: flannel
k8s-app: flannel
spec:
selector:
matchLabels:
app: flannel
template:
metadata:
labels:
tier: node
app: flannel
spec:
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: kubernetes.io/os
operator: In
values:
- linux
hostNetwork: true
priorityClassName: system-node-critical
tolerations:
- operator: Exists
effect: NoSchedule
serviceAccountName: flannel
initContainers:
- name: install-cni-plugin
image: docker.io/flannel/flannel-cni-plugin:v1.5.1-flannel1
command:
- cp
args:
- -f
- /flannel
- /opt/cni/bin/flannel
volumeMounts:
- name: cni-plugin
mountPath: /opt/cni/bin
- name: install-cni
image: docker.io/flannel/flannel:v0.25.5
command:
- cp
args:
- -f
- /etc/kube-flannel/cni-conf.json
- /etc/cni/net.d/10-flannel.conflist
volumeMounts:
- name: cni
mountPath: /etc/cni/net.d
- name: flannel-cfg
mountPath: /etc/kube-flannel/
containers:
- name: kube-flannel
image: docker.io/flannel/flannel:v0.25.5
command:
- /opt/bin/flanneld
args:
- --ip-masq
- --kube-subnet-mgr
resources:
requests:
cpu: "100m"
memory: "50Mi"
securityContext:
privileged: false
capabilities:
add: ["NET_ADMIN", "NET_RAW"]
env:
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: EVENT_QUEUE_DEPTH
value: "5000"
volumeMounts:
- name: run
mountPath: /run/flannel
- name: flannel-cfg
mountPath: /etc/kube-flannel/
- name: xtables-lock
mountPath: /run/xtables.lock
volumes:
- name: run
hostPath:
path: /run/flannel
type: DirectoryOrCreate
- name: cni-plugin
hostPath:
path: /opt/cni/bin
type: DirectoryOrCreate
- name: cni
hostPath:
path: /etc/cni/net.d
type: DirectoryOrCreate
- name: flannel-cfg
configMap:
name: kube-flannel-cfg
- name: xtables-lock
hostPath:
path: /run/xtables.lock
type: FileOrCreate

View File

@@ -410,6 +410,9 @@ in
systemd.tmpfiles.rules = [ systemd.tmpfiles.rules = [
"d /etc/kubernetes 0755 root root -" "d /etc/kubernetes 0755 root root -"
"d /etc/kubernetes/manifests 0755 root root -" "d /etc/kubernetes/manifests 0755 root root -"
"d /etc/cni/net.d 0755 root root -"
"d /opt/cni/bin 0755 root root -"
"d /run/flannel 0755 root root -"
"d /var/lib/kubelet 0755 root root -" "d /var/lib/kubelet 0755 root root -"
"d /var/lib/kubelet/pki 0755 root root -" "d /var/lib/kubelet/pki 0755 root root -"
]; ];