4 Commits

Author SHA1 Message Date
97295a7071 Merge pull request 'ci: speed up Terraform destroy plan by skipping refresh' (#125) from stage into master
Some checks failed
Terraform Apply / Terraform Apply (push) Failing after 7m0s
Reviewed-on: #125
2026-03-08 04:47:02 +00:00
6ca189b32c Merge pull request 'fix: vendor Flannel manifest and harden CNI bootstrap timing' (#124) from stage into master
All checks were successful
Terraform Apply / Terraform Apply (push) Successful in 15m11s
Reviewed-on: #124
2026-03-08 04:10:47 +00:00
2aa9950f59 Merge pull request 'fix: add mount utility to kubelet service PATH' (#123) from stage into master
Some checks failed
Terraform Apply / Terraform Apply (push) Failing after 11m10s
Reviewed-on: #123
2026-03-08 02:16:23 +00:00
c1f86483ad Merge pull request 'debug: print detailed Flannel pod diagnostics on rollout timeout' (#122) from stage into master
Some checks failed
Terraform Apply / Terraform Apply (push) Failing after 23m50s
Reviewed-on: #122
2026-03-07 12:31:43 +00:00
7 changed files with 21 additions and 34 deletions

View File

@@ -27,7 +27,7 @@ jobs:
fi
- name: Checkout repository
uses: actions/checkout@v4
uses: https://gitea.com/actions/checkout@v4
- name: Create SSH key
run: |

View File

@@ -27,7 +27,7 @@ jobs:
fi
- name: Checkout repository
uses: actions/checkout@v4
uses: https://gitea.com/actions/checkout@v4
- name: Create SSH key
run: |

View File

@@ -16,7 +16,7 @@ jobs:
steps:
- name: Checkout repository
uses: actions/checkout@v4
uses: https://gitea.com/actions/checkout@v4
- name: Create secrets.tfvars
working-directory: terraform

View File

@@ -36,7 +36,7 @@ jobs:
fi
- name: Checkout repository
uses: actions/checkout@v4
uses: https://gitea.com/actions/checkout@v4
- name: Create Terraform secret files
working-directory: terraform

View File

@@ -17,7 +17,7 @@ jobs:
steps:
- name: Checkout repository
uses: actions/checkout@v4
uses: https://gitea.com/actions/checkout@v4
- name: Create secrets.tfvars
working-directory: terraform

View File

@@ -110,9 +110,7 @@ class Controller:
"-o",
"IdentitiesOnly=yes",
"-o",
"StrictHostKeyChecking=no",
"-o",
"UserKnownHostsFile=/dev/null",
"StrictHostKeyChecking=accept-new",
"-i",
self.ssh_key,
]
@@ -123,8 +121,6 @@ class Controller:
self.fast_mode = self.env.get("FAST_MODE", "1")
self.skip_rebuild = self.env.get("SKIP_REBUILD", "0") == "1"
self.force_reinit = True
self.ssh_ready_retries = int(self.env.get("SSH_READY_RETRIES", "20"))
self.ssh_ready_delay = int(self.env.get("SSH_READY_DELAY_SEC", "15"))
def log(self, msg):
print(f"==> {msg}")
@@ -134,26 +130,13 @@ class Controller:
return run_local(full, check=check, capture=True)
def detect_user(self, ip):
for attempt in range(1, self.ssh_ready_retries + 1):
for user in self.ssh_candidates:
proc = self._ssh(user, ip, "true", check=False)
if proc.returncode == 0:
self.active_ssh_user = user
self.log(f"Using SSH user '{user}' for {ip}")
return
if attempt < self.ssh_ready_retries:
self.log(
f"SSH not ready on {ip} yet; retrying in {self.ssh_ready_delay}s "
f"({attempt}/{self.ssh_ready_retries})"
)
time.sleep(self.ssh_ready_delay)
raise RuntimeError(
"Unable to authenticate to "
f"{ip} with users: {', '.join(self.ssh_candidates)}. "
"If this is a freshly cloned VM, the Proxmox source template likely does not yet include the "
"current cloud-init-capable NixOS template configuration from nixos/template-base. "
"Terraform can only clone what exists in Proxmox; it cannot retrofit cloud-init support into an old template."
)
for user in self.ssh_candidates:
proc = self._ssh(user, ip, "true", check=False)
if proc.returncode == 0:
self.active_ssh_user = user
self.log(f"Using SSH user '{user}' for {ip}")
return
raise RuntimeError(f"Unable to authenticate to {ip} with users: {', '.join(self.ssh_candidates)}")
def remote(self, ip, cmd, check=True):
ordered = [self.active_ssh_user] + [u for u in self.ssh_candidates if u != self.active_ssh_user]
@@ -174,7 +157,14 @@ class Controller:
return last
def prepare_known_hosts(self):
pass
ssh_dir = Path.home() / ".ssh"
ssh_dir.mkdir(parents=True, exist_ok=True)
(ssh_dir / "known_hosts").touch()
run_local(["chmod", "700", str(ssh_dir)])
run_local(["chmod", "600", str(ssh_dir / "known_hosts")])
for ip in self.node_ips.values():
run_local(["ssh-keygen", "-R", ip], check=False)
run_local(f"ssh-keyscan -H {shlex.quote(ip)} >> {shlex.quote(str(ssh_dir / 'known_hosts'))}", check=False)
def prepare_remote_nix(self, ip):
self.remote(ip, "sudo mkdir -p /etc/nix")

View File

@@ -11,7 +11,6 @@ in
networking.hostName = "k8s-base-template";
networking.useDHCP = lib.mkDefault true;
networking.useNetworkd = true;
networking.nameservers = [ "1.1.1.1" "8.8.8.8" ];
boot.loader.systemd-boot.enable = lib.mkForce false;
@@ -21,8 +20,6 @@ in
};
services.qemuGuest.enable = true;
services.cloud-init.enable = true;
services.cloud-init.network.enable = true;
services.openssh.enable = true;
services.openssh.settings = {
PasswordAuthentication = false;