From 063d6dfcc099f967be46e43a2f668257d696dfec Mon Sep 17 00:00:00 2001 From: MichaelFisher1997 Date: Sun, 1 Mar 2026 01:25:20 +0000 Subject: [PATCH] fix: auto-reset broken secondary k3s servers and precheck join ports --- ansible/roles/k3s-server/tasks/main.yml | 64 ++++++++++++++++++++++--- 1 file changed, 57 insertions(+), 7 deletions(-) diff --git a/ansible/roles/k3s-server/tasks/main.yml b/ansible/roles/k3s-server/tasks/main.yml index 35a1a41..a4fda5b 100644 --- a/ansible/roles/k3s-server/tasks/main.yml +++ b/ansible/roles/k3s-server/tasks/main.yml @@ -4,25 +4,75 @@ path: /etc/systemd/system/k3s.service register: k3s_service -- name: Check if k3s is part of cluster +- name: Check k3s service state + command: systemctl is-active k3s + register: k3s_service_state + changed_when: false + failed_when: false + when: k3s_service.stat.exists + +- name: Determine whether k3s install is needed + set_fact: + k3s_install_needed: "{{ (not k3s_service.stat.exists) or ((k3s_service_state.stdout | default('')) != 'active') }}" + +- name: Wait for primary API on 6443 (secondary only) + wait_for: + host: "{{ k3s_primary_ip }}" + port: 6443 + state: started + timeout: 120 + when: + - not (k3s_primary | default(false)) + +- name: Wait for primary supervisor on 9345 (secondary only) + wait_for: + host: "{{ k3s_primary_ip }}" + port: 9345 + state: started + timeout: 120 + when: + - not (k3s_primary | default(false)) + +- name: Check if uninstall script exists stat: - path: /var/lib/rancher/k3s/server/node-token - register: k3s_cluster_member + path: /usr/local/bin/k3s-uninstall.sh + register: k3s_uninstall_script + when: + - not (k3s_primary | default(false)) + - k3s_install_needed + +- name: Reset broken secondary k3s install before rejoin + command: /usr/local/bin/k3s-uninstall.sh + when: + - not (k3s_primary | default(false)) + - k3s_install_needed + - k3s_uninstall_script.stat.exists + +- name: Remove stale k3s data on secondary + file: + path: "{{ item }}" + state: absent + loop: + - /etc/rancher/k3s + - /var/lib/rancher/k3s + when: + - not (k3s_primary | default(false)) + - k3s_install_needed - name: Download k3s install script get_url: url: https://get.k3s.io dest: /tmp/install-k3s.sh mode: "0755" - when: not k3s_service.stat.exists + when: k3s_install_needed - name: Install k3s server (primary) environment: INSTALL_K3S_VERSION: "{{ k3s_version if k3s_version != 'latest' else '' }}" K3S_TOKEN: "{{ k3s_token }}" - command: /tmp/install-k3s.sh server --cluster-init --advertise-address={{ k3s_primary_ip }} --node-ip={{ k3s_node_ip }} + command: /tmp/install-k3s.sh server --cluster-init --advertise-address={{ k3s_primary_ip }} --node-ip={{ k3s_node_ip }} --tls-san={{ k3s_primary_ip }} when: - - not k3s_service.stat.exists + - k3s_install_needed - k3s_primary | default(false) - name: Install k3s server (secondary) @@ -31,7 +81,7 @@ K3S_TOKEN: "{{ k3s_token }}" command: /tmp/install-k3s.sh server --server https://{{ k3s_primary_ip }}:6443 --advertise-address={{ k3s_node_ip }} --node-ip={{ k3s_node_ip }} when: - - not k3s_service.stat.exists + - k3s_install_needed - not (k3s_primary | default(false)) - name: Wait for k3s to be ready