fix: auto-reset broken secondary k3s servers and precheck join ports
Some checks failed
Deploy Cluster / Terraform (push) Successful in 22s
Deploy Cluster / Ansible (push) Failing after 4m37s

This commit is contained in:
2026-03-01 01:25:20 +00:00
parent f699936172
commit 063d6dfcc0

View File

@@ -4,25 +4,75 @@
path: /etc/systemd/system/k3s.service
register: k3s_service
- name: Check if k3s is part of cluster
- name: Check k3s service state
command: systemctl is-active k3s
register: k3s_service_state
changed_when: false
failed_when: false
when: k3s_service.stat.exists
- name: Determine whether k3s install is needed
set_fact:
k3s_install_needed: "{{ (not k3s_service.stat.exists) or ((k3s_service_state.stdout | default('')) != 'active') }}"
- name: Wait for primary API on 6443 (secondary only)
wait_for:
host: "{{ k3s_primary_ip }}"
port: 6443
state: started
timeout: 120
when:
- not (k3s_primary | default(false))
- name: Wait for primary supervisor on 9345 (secondary only)
wait_for:
host: "{{ k3s_primary_ip }}"
port: 9345
state: started
timeout: 120
when:
- not (k3s_primary | default(false))
- name: Check if uninstall script exists
stat:
path: /var/lib/rancher/k3s/server/node-token
register: k3s_cluster_member
path: /usr/local/bin/k3s-uninstall.sh
register: k3s_uninstall_script
when:
- not (k3s_primary | default(false))
- k3s_install_needed
- name: Reset broken secondary k3s install before rejoin
command: /usr/local/bin/k3s-uninstall.sh
when:
- not (k3s_primary | default(false))
- k3s_install_needed
- k3s_uninstall_script.stat.exists
- name: Remove stale k3s data on secondary
file:
path: "{{ item }}"
state: absent
loop:
- /etc/rancher/k3s
- /var/lib/rancher/k3s
when:
- not (k3s_primary | default(false))
- k3s_install_needed
- name: Download k3s install script
get_url:
url: https://get.k3s.io
dest: /tmp/install-k3s.sh
mode: "0755"
when: not k3s_service.stat.exists
when: k3s_install_needed
- name: Install k3s server (primary)
environment:
INSTALL_K3S_VERSION: "{{ k3s_version if k3s_version != 'latest' else '' }}"
K3S_TOKEN: "{{ k3s_token }}"
command: /tmp/install-k3s.sh server --cluster-init --advertise-address={{ k3s_primary_ip }} --node-ip={{ k3s_node_ip }}
command: /tmp/install-k3s.sh server --cluster-init --advertise-address={{ k3s_primary_ip }} --node-ip={{ k3s_node_ip }} --tls-san={{ k3s_primary_ip }}
when:
- not k3s_service.stat.exists
- k3s_install_needed
- k3s_primary | default(false)
- name: Install k3s server (secondary)
@@ -31,7 +81,7 @@
K3S_TOKEN: "{{ k3s_token }}"
command: /tmp/install-k3s.sh server --server https://{{ k3s_primary_ip }}:6443 --advertise-address={{ k3s_node_ip }} --node-ip={{ k3s_node_ip }}
when:
- not k3s_service.stat.exists
- k3s_install_needed
- not (k3s_primary | default(false))
- name: Wait for k3s to be ready