From d1c31cdb91e026b716072926ff84da512583e97f Mon Sep 17 00:00:00 2001 From: MichaelFisher1997 Date: Wed, 22 Apr 2026 04:14:31 +0000 Subject: [PATCH] fix: rely on k3s service readiness instead of installer exit code The k3s install script can return non-zero while systemd is still bringing the service up, especially on worker agents. Do not fail immediately on the installer command; wait for the service to become active and only emit install diagnostics if the later readiness check fails. --- ansible/roles/k3s-agent/tasks/main.yml | 55 +++++++++++++++---------- ansible/roles/k3s-server/tasks/main.yml | 30 +------------- 2 files changed, 35 insertions(+), 50 deletions(-) diff --git a/ansible/roles/k3s-agent/tasks/main.yml b/ansible/roles/k3s-agent/tasks/main.yml index 021e171..a7d7182 100644 --- a/ansible/roles/k3s-agent/tasks/main.yml +++ b/ansible/roles/k3s-agent/tasks/main.yml @@ -24,30 +24,10 @@ --node-ip {{ k3s_node_ip }} --flannel-iface={{ k3s_flannel_iface }} {% if k3s_kubelet_cloud_provider_external | bool %}--kubelet-arg=cloud-provider=external{% endif %} + register: k3s_agent_install + failed_when: false args: creates: /usr/local/bin/k3s-agent - rescue: - - name: Show k3s-agent service status after failed install - command: systemctl status k3s-agent --no-pager - register: k3s_agent_status_after_install - changed_when: false - failed_when: false - - - name: Show recent k3s-agent logs after failed install - command: journalctl -u k3s-agent -n 120 --no-pager - register: k3s_agent_journal_after_install - changed_when: false - failed_when: false - - - name: Fail with k3s-agent diagnostics - fail: - msg: | - k3s agent install failed on {{ inventory_hostname }}. - Service status: - {{ k3s_agent_status_after_install.stdout | default('n/a') }} - - Recent logs: - {{ k3s_agent_journal_after_install.stdout | default('n/a') }} - name: Wait for k3s agent to be ready command: systemctl is-active k3s-agent @@ -56,3 +36,34 @@ retries: 30 delay: 10 changed_when: false + +- name: Show k3s-agent service status on failure + command: systemctl status k3s-agent --no-pager + register: k3s_agent_status + changed_when: false + failed_when: false + when: agent_status is failed + +- name: Show recent k3s-agent logs on failure + command: journalctl -u k3s-agent -n 120 --no-pager + register: k3s_agent_journal + changed_when: false + failed_when: false + when: agent_status is failed + +- name: Fail with k3s-agent diagnostics + fail: + msg: | + k3s agent failed to become ready on {{ inventory_hostname }}. + Install stdout: + {{ k3s_agent_install.stdout | default('n/a') }} + + Install stderr: + {{ k3s_agent_install.stderr | default('n/a') }} + + Service status: + {{ k3s_agent_status.stdout | default('n/a') }} + + Recent logs: + {{ k3s_agent_journal.stdout | default('n/a') }} + when: agent_status is failed diff --git a/ansible/roles/k3s-server/tasks/main.yml b/ansible/roles/k3s-server/tasks/main.yml index 0963fd3..dba8da5 100644 --- a/ansible/roles/k3s-server/tasks/main.yml +++ b/ansible/roles/k3s-server/tasks/main.yml @@ -69,6 +69,8 @@ {% if k3s_disable_embedded_ccm | bool %}--disable-cloud-controller{% endif %} {% if k3s_disable_servicelb | bool %}--disable=servicelb{% endif %} {% if k3s_kubelet_cloud_provider_external | bool %}--kubelet-arg=cloud-provider=external{% endif %} + register: primary_install + failed_when: false when: - k3s_install_needed - k3s_primary | default(false) @@ -92,36 +94,8 @@ {% if k3s_disable_servicelb | bool %}--disable=servicelb{% endif %} {% if k3s_kubelet_cloud_provider_external | bool %}--kubelet-arg=cloud-provider=external{% endif %} register: secondary_install - - rescue: - - name: Show k3s service status after failed secondary install - command: systemctl status k3s --no-pager - register: k3s_status_after_install - changed_when: false failed_when: false - - name: Show recent k3s logs after failed secondary install - command: journalctl -u k3s -n 120 --no-pager - register: k3s_journal_after_install - changed_when: false - failed_when: false - - - name: Fail with secondary install diagnostics - fail: - msg: | - Secondary k3s install failed on {{ inventory_hostname }}. - Install stdout: - {{ secondary_install.stdout | default('n/a') }} - - Install stderr: - {{ secondary_install.stderr | default('n/a') }} - - Service status: - {{ k3s_status_after_install.stdout | default('n/a') }} - - Recent logs: - {{ k3s_journal_after_install.stdout | default('n/a') }} - - name: Wait for k3s to be ready command: "{{ (k3s_primary | default(false)) | ternary('kubectl get nodes', 'systemctl is-active k3s') }}" register: k3s_ready