fix: rely on k3s service readiness instead of installer exit code
The k3s install script can return non-zero while systemd is still bringing the service up, especially on worker agents. Do not fail immediately on the installer command; wait for the service to become active and only emit install diagnostics if the later readiness check fails.
This commit is contained in:
@@ -24,30 +24,10 @@
|
|||||||
--node-ip {{ k3s_node_ip }}
|
--node-ip {{ k3s_node_ip }}
|
||||||
--flannel-iface={{ k3s_flannel_iface }}
|
--flannel-iface={{ k3s_flannel_iface }}
|
||||||
{% if k3s_kubelet_cloud_provider_external | bool %}--kubelet-arg=cloud-provider=external{% endif %}
|
{% if k3s_kubelet_cloud_provider_external | bool %}--kubelet-arg=cloud-provider=external{% endif %}
|
||||||
|
register: k3s_agent_install
|
||||||
|
failed_when: false
|
||||||
args:
|
args:
|
||||||
creates: /usr/local/bin/k3s-agent
|
creates: /usr/local/bin/k3s-agent
|
||||||
rescue:
|
|
||||||
- name: Show k3s-agent service status after failed install
|
|
||||||
command: systemctl status k3s-agent --no-pager
|
|
||||||
register: k3s_agent_status_after_install
|
|
||||||
changed_when: false
|
|
||||||
failed_when: false
|
|
||||||
|
|
||||||
- name: Show recent k3s-agent logs after failed install
|
|
||||||
command: journalctl -u k3s-agent -n 120 --no-pager
|
|
||||||
register: k3s_agent_journal_after_install
|
|
||||||
changed_when: false
|
|
||||||
failed_when: false
|
|
||||||
|
|
||||||
- name: Fail with k3s-agent diagnostics
|
|
||||||
fail:
|
|
||||||
msg: |
|
|
||||||
k3s agent install failed on {{ inventory_hostname }}.
|
|
||||||
Service status:
|
|
||||||
{{ k3s_agent_status_after_install.stdout | default('n/a') }}
|
|
||||||
|
|
||||||
Recent logs:
|
|
||||||
{{ k3s_agent_journal_after_install.stdout | default('n/a') }}
|
|
||||||
|
|
||||||
- name: Wait for k3s agent to be ready
|
- name: Wait for k3s agent to be ready
|
||||||
command: systemctl is-active k3s-agent
|
command: systemctl is-active k3s-agent
|
||||||
@@ -56,3 +36,34 @@
|
|||||||
retries: 30
|
retries: 30
|
||||||
delay: 10
|
delay: 10
|
||||||
changed_when: false
|
changed_when: false
|
||||||
|
|
||||||
|
- name: Show k3s-agent service status on failure
|
||||||
|
command: systemctl status k3s-agent --no-pager
|
||||||
|
register: k3s_agent_status
|
||||||
|
changed_when: false
|
||||||
|
failed_when: false
|
||||||
|
when: agent_status is failed
|
||||||
|
|
||||||
|
- name: Show recent k3s-agent logs on failure
|
||||||
|
command: journalctl -u k3s-agent -n 120 --no-pager
|
||||||
|
register: k3s_agent_journal
|
||||||
|
changed_when: false
|
||||||
|
failed_when: false
|
||||||
|
when: agent_status is failed
|
||||||
|
|
||||||
|
- name: Fail with k3s-agent diagnostics
|
||||||
|
fail:
|
||||||
|
msg: |
|
||||||
|
k3s agent failed to become ready on {{ inventory_hostname }}.
|
||||||
|
Install stdout:
|
||||||
|
{{ k3s_agent_install.stdout | default('n/a') }}
|
||||||
|
|
||||||
|
Install stderr:
|
||||||
|
{{ k3s_agent_install.stderr | default('n/a') }}
|
||||||
|
|
||||||
|
Service status:
|
||||||
|
{{ k3s_agent_status.stdout | default('n/a') }}
|
||||||
|
|
||||||
|
Recent logs:
|
||||||
|
{{ k3s_agent_journal.stdout | default('n/a') }}
|
||||||
|
when: agent_status is failed
|
||||||
|
|||||||
@@ -69,6 +69,8 @@
|
|||||||
{% if k3s_disable_embedded_ccm | bool %}--disable-cloud-controller{% endif %}
|
{% if k3s_disable_embedded_ccm | bool %}--disable-cloud-controller{% endif %}
|
||||||
{% if k3s_disable_servicelb | bool %}--disable=servicelb{% endif %}
|
{% if k3s_disable_servicelb | bool %}--disable=servicelb{% endif %}
|
||||||
{% if k3s_kubelet_cloud_provider_external | bool %}--kubelet-arg=cloud-provider=external{% endif %}
|
{% if k3s_kubelet_cloud_provider_external | bool %}--kubelet-arg=cloud-provider=external{% endif %}
|
||||||
|
register: primary_install
|
||||||
|
failed_when: false
|
||||||
when:
|
when:
|
||||||
- k3s_install_needed
|
- k3s_install_needed
|
||||||
- k3s_primary | default(false)
|
- k3s_primary | default(false)
|
||||||
@@ -92,36 +94,8 @@
|
|||||||
{% if k3s_disable_servicelb | bool %}--disable=servicelb{% endif %}
|
{% if k3s_disable_servicelb | bool %}--disable=servicelb{% endif %}
|
||||||
{% if k3s_kubelet_cloud_provider_external | bool %}--kubelet-arg=cloud-provider=external{% endif %}
|
{% if k3s_kubelet_cloud_provider_external | bool %}--kubelet-arg=cloud-provider=external{% endif %}
|
||||||
register: secondary_install
|
register: secondary_install
|
||||||
|
|
||||||
rescue:
|
|
||||||
- name: Show k3s service status after failed secondary install
|
|
||||||
command: systemctl status k3s --no-pager
|
|
||||||
register: k3s_status_after_install
|
|
||||||
changed_when: false
|
|
||||||
failed_when: false
|
failed_when: false
|
||||||
|
|
||||||
- name: Show recent k3s logs after failed secondary install
|
|
||||||
command: journalctl -u k3s -n 120 --no-pager
|
|
||||||
register: k3s_journal_after_install
|
|
||||||
changed_when: false
|
|
||||||
failed_when: false
|
|
||||||
|
|
||||||
- name: Fail with secondary install diagnostics
|
|
||||||
fail:
|
|
||||||
msg: |
|
|
||||||
Secondary k3s install failed on {{ inventory_hostname }}.
|
|
||||||
Install stdout:
|
|
||||||
{{ secondary_install.stdout | default('n/a') }}
|
|
||||||
|
|
||||||
Install stderr:
|
|
||||||
{{ secondary_install.stderr | default('n/a') }}
|
|
||||||
|
|
||||||
Service status:
|
|
||||||
{{ k3s_status_after_install.stdout | default('n/a') }}
|
|
||||||
|
|
||||||
Recent logs:
|
|
||||||
{{ k3s_journal_after_install.stdout | default('n/a') }}
|
|
||||||
|
|
||||||
- name: Wait for k3s to be ready
|
- name: Wait for k3s to be ready
|
||||||
command: "{{ (k3s_primary | default(false)) | ternary('kubectl get nodes', 'systemctl is-active k3s') }}"
|
command: "{{ (k3s_primary | default(false)) | ternary('kubectl get nodes', 'systemctl is-active k3s') }}"
|
||||||
register: k3s_ready
|
register: k3s_ready
|
||||||
|
|||||||
Reference in New Issue
Block a user