diff --git a/nixos/kubeadm/bootstrap/controller.py b/nixos/kubeadm/bootstrap/controller.py index d03fd9d..7549684 100755 --- a/nixos/kubeadm/bootstrap/controller.py +++ b/nixos/kubeadm/bootstrap/controller.py @@ -265,11 +265,42 @@ class Controller: def stage_install_cni(self): self.log("Installing Flannel") + manifest_path = self.script_dir.parent / "manifests" / "kube-flannel.yml" + manifest_b64 = base64.b64encode(manifest_path.read_bytes()).decode() + self.remote( self.primary_ip, - "sudo kubectl --kubeconfig /etc/kubernetes/admin.conf apply -f https://raw.githubusercontent.com/flannel-io/flannel/v0.25.5/Documentation/kube-flannel.yml", + ( + "sudo mkdir -p /var/lib/terrahome && " + f"echo {shlex.quote(manifest_b64)} | base64 -d | sudo tee /var/lib/terrahome/kube-flannel.yml >/dev/null" + ), ) + self.log("Waiting for API readiness before applying Flannel") + ready = False + for _ in range(30): + if self.cluster_ready(): + ready = True + break + time.sleep(10) + if not ready: + raise RuntimeError("API server did not become ready before Flannel install") + + last_error = None + for attempt in range(1, 6): + proc = self.remote( + self.primary_ip, + "sudo kubectl --kubeconfig /etc/kubernetes/admin.conf apply -f /var/lib/terrahome/kube-flannel.yml", + check=False, + ) + if proc.returncode == 0: + return + last_error = (proc.stdout or "") + ("\n" if proc.stdout and proc.stderr else "") + (proc.stderr or "") + self.log(f"Flannel apply attempt {attempt}/5 failed; retrying in 15s") + time.sleep(15) + + raise RuntimeError(f"Flannel apply failed after retries\n{last_error or ''}") + def cluster_has_node(self, name): cmd = f"sudo kubectl --kubeconfig /etc/kubernetes/admin.conf get node {shlex.quote(name)} >/dev/null 2>&1" return self.remote(self.primary_ip, cmd, check=False).returncode == 0 diff --git a/nixos/kubeadm/manifests/kube-flannel.yml b/nixos/kubeadm/manifests/kube-flannel.yml new file mode 100644 index 0000000..5c850cb --- /dev/null +++ b/nixos/kubeadm/manifests/kube-flannel.yml @@ -0,0 +1,212 @@ +--- +kind: Namespace +apiVersion: v1 +metadata: + name: kube-flannel + labels: + k8s-app: flannel + pod-security.kubernetes.io/enforce: privileged +--- +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + labels: + k8s-app: flannel + name: flannel +rules: +- apiGroups: + - "" + resources: + - pods + verbs: + - get +- apiGroups: + - "" + resources: + - nodes + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + - nodes/status + verbs: + - patch +--- +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + labels: + k8s-app: flannel + name: flannel +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: flannel +subjects: +- kind: ServiceAccount + name: flannel + namespace: kube-flannel +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + k8s-app: flannel + name: flannel + namespace: kube-flannel +--- +kind: ConfigMap +apiVersion: v1 +metadata: + name: kube-flannel-cfg + namespace: kube-flannel + labels: + tier: node + k8s-app: flannel + app: flannel +data: + cni-conf.json: | + { + "name": "cbr0", + "cniVersion": "0.3.1", + "plugins": [ + { + "type": "flannel", + "delegate": { + "hairpinMode": true, + "isDefaultGateway": true + } + }, + { + "type": "portmap", + "capabilities": { + "portMappings": true + } + } + ] + } + net-conf.json: | + { + "Network": "10.244.0.0/16", + "EnableNFTables": false, + "Backend": { + "Type": "vxlan" + } + } +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: kube-flannel-ds + namespace: kube-flannel + labels: + tier: node + app: flannel + k8s-app: flannel +spec: + selector: + matchLabels: + app: flannel + template: + metadata: + labels: + tier: node + app: flannel + spec: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/os + operator: In + values: + - linux + hostNetwork: true + priorityClassName: system-node-critical + tolerations: + - operator: Exists + effect: NoSchedule + serviceAccountName: flannel + initContainers: + - name: install-cni-plugin + image: docker.io/flannel/flannel-cni-plugin:v1.5.1-flannel1 + command: + - cp + args: + - -f + - /flannel + - /opt/cni/bin/flannel + volumeMounts: + - name: cni-plugin + mountPath: /opt/cni/bin + - name: install-cni + image: docker.io/flannel/flannel:v0.25.5 + command: + - cp + args: + - -f + - /etc/kube-flannel/cni-conf.json + - /etc/cni/net.d/10-flannel.conflist + volumeMounts: + - name: cni + mountPath: /etc/cni/net.d + - name: flannel-cfg + mountPath: /etc/kube-flannel/ + containers: + - name: kube-flannel + image: docker.io/flannel/flannel:v0.25.5 + command: + - /opt/bin/flanneld + args: + - --ip-masq + - --kube-subnet-mgr + resources: + requests: + cpu: "100m" + memory: "50Mi" + securityContext: + privileged: false + capabilities: + add: ["NET_ADMIN", "NET_RAW"] + env: + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: EVENT_QUEUE_DEPTH + value: "5000" + volumeMounts: + - name: run + mountPath: /run/flannel + - name: flannel-cfg + mountPath: /etc/kube-flannel/ + - name: xtables-lock + mountPath: /run/xtables.lock + volumes: + - name: run + hostPath: + path: /run/flannel + type: DirectoryOrCreate + - name: cni-plugin + hostPath: + path: /opt/cni/bin + type: DirectoryOrCreate + - name: cni + hostPath: + path: /etc/cni/net.d + type: DirectoryOrCreate + - name: flannel-cfg + configMap: + name: kube-flannel-cfg + - name: xtables-lock + hostPath: + path: /run/xtables.lock + type: FileOrCreate diff --git a/nixos/kubeadm/modules/k8s-common.nix b/nixos/kubeadm/modules/k8s-common.nix index 06a292b..344ed27 100644 --- a/nixos/kubeadm/modules/k8s-common.nix +++ b/nixos/kubeadm/modules/k8s-common.nix @@ -410,6 +410,9 @@ in systemd.tmpfiles.rules = [ "d /etc/kubernetes 0755 root root -" "d /etc/kubernetes/manifests 0755 root root -" + "d /etc/cni/net.d 0755 root root -" + "d /opt/cni/bin 0755 root root -" + "d /run/flannel 0755 root root -" "d /var/lib/kubelet 0755 root root -" "d /var/lib/kubelet/pki 0755 root root -" ];