Files
HetznerTerra/ansible/roles/k3s-server/tasks/main.yml
MichaelFisher1997 ff31cb4e74
Some checks failed
Deploy Cluster / Terraform (push) Failing after 10s
Deploy Cluster / Ansible (push) Has been skipped
Implement HA control plane with Load Balancer (3-3 topology)
Major changes:
- Terraform: Scale to 3 control planes (cx23) + 3 workers (cx33)
- Terraform: Add Hetzner Load Balancer (lb11) for Kubernetes API
- Terraform: Add kube_api_lb_ip output
- Ansible: Add community.network collection to requirements
- Ansible: Update inventory to include LB endpoint
- Ansible: Configure secondary CPs and workers to join via LB
- Ansible: Add k3s_join_endpoint variable for HA joins
- Workflow: Add imports for cp-2, cp-3, and worker-3
- Docs: Update STABLE_BASELINE.md with HA topology and phase gates

Topology:
- 3 control planes (cx23 - 2 vCPU, 8GB RAM each)
- 3 workers (cx33 - 4 vCPU, 16GB RAM each)
- 1 Load Balancer (lb11) routing to all 3 control planes on port 6443
- Workers and secondary CPs join via LB endpoint for HA

Cost impact: +~€26/month (2 extra CPs + 1 extra worker + LB)
2026-03-23 02:39:39 +00:00

177 lines
5.1 KiB
YAML

---
- name: Check if k3s service exists
stat:
path: /etc/systemd/system/k3s.service
register: k3s_service
- name: Check k3s service state
command: systemctl is-active k3s
register: k3s_service_state
changed_when: false
failed_when: false
when: k3s_service.stat.exists
- name: Determine whether k3s install is needed
set_fact:
k3s_install_needed: "{{ (not k3s_service.stat.exists) or ((k3s_service_state.stdout | default('')) != 'active') }}"
- name: Wait for API endpoint on 6443 (secondary only)
wait_for:
host: "{{ k3s_join_endpoint | default(k3s_primary_ip) }}"
port: 6443
state: started
timeout: 120
when:
- not (k3s_primary | default(false))
- name: Check if uninstall script exists
stat:
path: /usr/local/bin/k3s-uninstall.sh
register: k3s_uninstall_script
when: k3s_install_needed
- name: Reset broken k3s install before reinstall
command: /usr/local/bin/k3s-uninstall.sh
when:
- k3s_install_needed
- k3s_uninstall_script.stat.exists
- name: Remove stale k3s data
file:
path: "{{ item }}"
state: absent
loop:
- /etc/rancher/k3s
- /var/lib/rancher/k3s
when: k3s_install_needed
- name: Download k3s install script
get_url:
url: https://get.k3s.io
dest: /tmp/install-k3s.sh
mode: "0755"
when: k3s_install_needed
- name: Install k3s server (primary)
environment:
INSTALL_K3S_VERSION: "{{ k3s_version if k3s_version != 'latest' else '' }}"
K3S_TOKEN: "{{ k3s_token }}"
command: >-
/tmp/install-k3s.sh server
--cluster-init
--advertise-address={{ k3s_primary_ip }}
--node-ip={{ k3s_node_ip }}
--tls-san={{ k3s_primary_ip }}
--tls-san={{ k3s_primary_public_ip }}
{% if k3s_disable_embedded_ccm | bool %}--disable-cloud-controller{% endif %}
{% if k3s_disable_servicelb | bool %}--disable=servicelb{% endif %}
{% if k3s_kubelet_cloud_provider_external | bool %}--kubelet-arg=cloud-provider=external{% endif %}
when:
- k3s_install_needed
- k3s_primary | default(false)
- name: Install k3s server (secondary)
when:
- k3s_install_needed
- not (k3s_primary | default(false))
block:
- name: Run secondary k3s install
environment:
INSTALL_K3S_VERSION: "{{ k3s_version if k3s_version != 'latest' else '' }}"
K3S_TOKEN: "{{ k3s_token }}"
command: >-
/tmp/install-k3s.sh server
--server https://{{ k3s_join_endpoint | default(k3s_primary_ip) }}:6443
--advertise-address={{ k3s_node_ip }}
--node-ip={{ k3s_node_ip }}
{% if k3s_disable_embedded_ccm | bool %}--disable-cloud-controller{% endif %}
{% if k3s_disable_servicelb | bool %}--disable=servicelb{% endif %}
{% if k3s_kubelet_cloud_provider_external | bool %}--kubelet-arg=cloud-provider=external{% endif %}
register: secondary_install
rescue:
- name: Show k3s service status after failed secondary install
command: systemctl status k3s --no-pager
register: k3s_status_after_install
changed_when: false
failed_when: false
- name: Show recent k3s logs after failed secondary install
command: journalctl -u k3s -n 120 --no-pager
register: k3s_journal_after_install
changed_when: false
failed_when: false
- name: Fail with secondary install diagnostics
fail:
msg: |
Secondary k3s install failed on {{ inventory_hostname }}.
Install stdout:
{{ secondary_install.stdout | default('n/a') }}
Install stderr:
{{ secondary_install.stderr | default('n/a') }}
Service status:
{{ k3s_status_after_install.stdout | default('n/a') }}
Recent logs:
{{ k3s_journal_after_install.stdout | default('n/a') }}
- name: Wait for k3s to be ready
command: "{{ (k3s_primary | default(false)) | ternary('kubectl get nodes', 'systemctl is-active k3s') }}"
register: k3s_ready
until: k3s_ready.rc == 0
retries: 120
delay: 10
changed_when: false
- name: Show k3s service status on failure
command: systemctl status k3s --no-pager
register: k3s_status
changed_when: false
failed_when: false
when: k3s_ready is failed
- name: Show recent k3s logs on failure
command: journalctl -u k3s -n 80 --no-pager
register: k3s_journal
changed_when: false
failed_when: false
when: k3s_ready is failed
- name: Fail with k3s diagnostics
fail:
msg: |
k3s failed to become ready on {{ inventory_hostname }}.
Service status:
{{ k3s_status.stdout | default('n/a') }}
Recent logs:
{{ k3s_journal.stdout | default('n/a') }}
when: k3s_ready is failed
- name: Copy kubeconfig to default location for root
file:
path: /root/.kube
state: directory
mode: "0700"
when: k3s_primary | default(false)
- name: Copy kubeconfig to default location for root
file:
src: /etc/rancher/k3s/k3s.yaml
dest: /root/.kube/config
state: link
force: true
when: k3s_primary | default(false)
- name: Ensure .kube directory exists for ansible user
file:
path: "/home/{{ ansible_user }}/.kube"
state: directory
mode: "0755"
when:
- ansible_user != 'root'
- k3s_primary | default(false)