From 3f52bad854f60136bb2d57cc739fb233fd8b9d9a Mon Sep 17 00:00:00 2001 From: MichaelFisher1997 Date: Fri, 24 Apr 2026 11:44:11 +0000 Subject: [PATCH] fix: make Ansible reruns faster and idempotent --- .../bootstrap-image-prepull/tasks/main.yml | 26 ++++++++++++++----- ansible/roles/common/tasks/main.yml | 17 +++++++++++- ansible/roles/k3s-agent/tasks/main.yml | 21 +++++++++++---- .../rancher-image-prepull/tasks/main.yml | 26 ++++++++++++++----- 4 files changed, 72 insertions(+), 18 deletions(-) diff --git a/ansible/roles/bootstrap-image-prepull/tasks/main.yml b/ansible/roles/bootstrap-image-prepull/tasks/main.yml index 87565e0..77bc04f 100644 --- a/ansible/roles/bootstrap-image-prepull/tasks/main.yml +++ b/ansible/roles/bootstrap-image-prepull/tasks/main.yml @@ -1,19 +1,33 @@ --- - name: Pre-pull bootstrap images into containerd - command: timeout 180s /usr/local/bin/ctr -n k8s.io images pull {{ item }} + shell: | + if /usr/local/bin/ctr -n k8s.io images ls -q | grep -Fx -- "{{ item }}" >/dev/null; then + echo "already present" + exit 0 + fi + + for attempt in 1 2 3; do + if timeout 120s /usr/local/bin/ctr -n k8s.io images pull "{{ item }}"; then + echo "pulled image" + exit 0 + fi + + sleep 10 + done + + exit 1 + args: + executable: /bin/bash register: bootstrap_image_pull loop: "{{ bootstrap_prepull_images }}" - retries: 6 - delay: 20 - until: bootstrap_image_pull.rc == 0 - changed_when: bootstrap_image_pull.rc == 0 + changed_when: "'pulled image' in bootstrap_image_pull.stdout" failed_when: false - name: Report bootstrap images that did not pre-pull after retries debug: msg: >- Best-effort bootstrap image pre-pull did not complete for {{ item.item }} after - {{ item.attempts | default(1) }} attempt(s): {{ item.stderr | default('no stderr') }} + 3 attempt(s): {{ item.stderr | default('no stderr') }} loop: "{{ bootstrap_image_pull.results | default([]) }}" loop_control: label: "{{ item.item }}" diff --git a/ansible/roles/common/tasks/main.yml b/ansible/roles/common/tasks/main.yml index ace04fd..a34104f 100644 --- a/ansible/roles/common/tasks/main.yml +++ b/ansible/roles/common/tasks/main.yml @@ -45,9 +45,16 @@ state: present lock_timeout: 600 +- name: Check active swap + command: swapon --noheadings + register: active_swap + changed_when: false + failed_when: false + - name: Disable swap command: swapoff -a changed_when: true + when: active_swap.stdout | trim | length > 0 - name: Remove swap from fstab lineinfile: @@ -100,9 +107,17 @@ failed_when: false when: tailscale_auth_key | length > 0 +- name: Parse tailscale connection state + set_fact: + tailscale_backend_state: "{{ (tailscale_status.stdout | from_json).BackendState | default('') }}" + when: + - tailscale_auth_key | length > 0 + - tailscale_status.rc == 0 + - tailscale_status.stdout | length > 0 + - name: Connect node to tailnet command: tailscale up --authkey {{ tailscale_auth_key }} --hostname {{ inventory_hostname }} --ssh={{ tailscale_ssh | ternary('true', 'false') }} --accept-routes={{ tailscale_accept_routes | ternary('true', 'false') }} when: - tailscale_auth_key | length > 0 - - tailscale_status.rc != 0 or '"BackendState":"Running"' not in tailscale_status.stdout + - tailscale_status.rc != 0 or (tailscale_backend_state | default('')) != 'Running' changed_when: true diff --git a/ansible/roles/k3s-agent/tasks/main.yml b/ansible/roles/k3s-agent/tasks/main.yml index a7d7182..7185a2e 100644 --- a/ansible/roles/k3s-agent/tasks/main.yml +++ b/ansible/roles/k3s-agent/tasks/main.yml @@ -1,18 +1,29 @@ --- -- name: Check if k3s agent is already installed +- name: Check if k3s agent service exists stat: - path: /usr/local/bin/k3s-agent - register: k3s_agent_binary + path: /etc/systemd/system/k3s-agent.service + register: k3s_agent_service + +- name: Check k3s agent service state + command: systemctl is-active k3s-agent + register: k3s_agent_service_state + changed_when: false + failed_when: false + when: k3s_agent_service.stat.exists + +- name: Determine whether k3s agent install is needed + set_fact: + k3s_agent_install_needed: "{{ (not k3s_agent_service.stat.exists) or ((k3s_agent_service_state.stdout | default('')) != 'active') }}" - name: Download k3s install script get_url: url: https://get.k3s.io dest: /tmp/install-k3s.sh mode: "0755" - when: not k3s_agent_binary.stat.exists + when: k3s_agent_install_needed - name: Install k3s agent - when: not k3s_agent_binary.stat.exists + when: k3s_agent_install_needed block: - name: Run k3s agent install environment: diff --git a/ansible/roles/rancher-image-prepull/tasks/main.yml b/ansible/roles/rancher-image-prepull/tasks/main.yml index 26b28c0..76ae8ae 100644 --- a/ansible/roles/rancher-image-prepull/tasks/main.yml +++ b/ansible/roles/rancher-image-prepull/tasks/main.yml @@ -1,19 +1,33 @@ --- - name: Pre-pull Rancher images into containerd - command: timeout 180s /usr/local/bin/ctr -n k8s.io images pull {{ item }} + shell: | + if /usr/local/bin/ctr -n k8s.io images ls -q | grep -Fx -- "{{ item }}" >/dev/null; then + echo "already present" + exit 0 + fi + + for attempt in 1 2 3; do + if timeout 120s /usr/local/bin/ctr -n k8s.io images pull "{{ item }}"; then + echo "pulled image" + exit 0 + fi + + sleep 10 + done + + exit 1 + args: + executable: /bin/bash register: rancher_image_pull loop: "{{ rancher_images_to_prepull }}" - retries: 6 - delay: 20 - until: rancher_image_pull.rc == 0 - changed_when: rancher_image_pull.rc == 0 + changed_when: "'pulled image' in rancher_image_pull.stdout" failed_when: false - name: Report Rancher images that did not pre-pull after retries debug: msg: >- Best-effort Rancher image pre-pull did not complete for {{ item.item }} after - {{ item.attempts | default(1) }} attempt(s): {{ item.stderr | default('no stderr') }} + 3 attempt(s): {{ item.stderr | default('no stderr') }} loop: "{{ rancher_image_pull.results | default([]) }}" loop_control: label: "{{ item.item }}"