Files
HetznerTerra/ansible/site.yml
T
micqdf ee6417c18e
Deploy Cluster / Terraform (push) Successful in 28s
Deploy Cluster / Ansible (push) Has been cancelled
fix: pre-pull core bootstrap images on cp1 before Flux bootstrap
Fresh clusters were repeatedly timing out while kubelet pulled the pause image,
k3s packaged component images, and Flux controller images onto the first
control plane. Pre-pull the core control-plane bootstrap images into
containerd on cp-1 so Flux and packaged addons start from a warm cache instead
of racing registry TLS timeouts.
2026-04-23 05:55:14 +00:00

194 lines
5.0 KiB
YAML

---
- name: Clean up stale Tailscale cluster node devices
hosts: localhost
connection: local
vars:
tailscale_reserved_hostnames: "{{ groups['cluster'] | default([]) | list }}"
roles:
- tailscale-cleanup
- name: Bootstrap Kubernetes cluster
hosts: cluster
become: true
gather_facts: true
pre_tasks:
- name: Wait for SSH
wait_for_connection:
delay: 10
timeout: 300
roles:
- common
- name: Setup primary control plane
hosts: control_plane[0]
become: true
vars:
k3s_primary: true
k3s_token: "{{ lookup('password', '/dev/null length=32 chars=ascii_letters,digits') }}"
k3s_primary_private_ip: "{{ k3s_private_ip }}"
k3s_primary_public_ip: "{{ ansible_host }}"
k3s_primary_ip: "{{ k3s_private_ip }}"
k3s_node_ip: "{{ k3s_private_ip }}"
# kube_api_endpoint is set in inventory group_vars
roles:
- k3s-server
- name: Get join info from primary
hosts: control_plane[0]
become: true
tasks:
- name: Fetch node token
command: cat /var/lib/rancher/k3s/server/node-token
register: node_token
changed_when: false
- name: Set join token fact
set_fact:
k3s_token: "{{ node_token.stdout }}"
k3s_primary_private_ip: "{{ k3s_private_ip }}"
k3s_primary_public_ip: "{{ ansible_host }}"
- name: Fetch kubeconfig
fetch:
src: /etc/rancher/k3s/k3s.yaml
dest: ../outputs/kubeconfig
flat: true
- name: Bootstrap addon prerequisite secrets
hosts: control_plane[0]
become: true
roles:
- addon-secrets-bootstrap
- name: Deploy kube-vip for API HA
hosts: control_plane[0]
become: true
roles:
- kube-vip-deploy
- name: Setup secondary control planes
hosts: control_plane[1:]
become: true
vars:
k3s_primary: false
k3s_token: "{{ hostvars[groups['control_plane'][0]]['k3s_token'] }}"
k3s_primary_ip: "{{ hostvars[groups['control_plane'][0]]['k3s_primary_private_ip'] }}"
k3s_primary_public_ip: "{{ hostvars[groups['control_plane'][0]]['k3s_primary_public_ip'] }}"
k3s_node_ip: "{{ k3s_private_ip }}"
# Use Load Balancer for HA - all control planes join via LB endpoint
k3s_join_endpoint: "{{ kube_api_endpoint | default(hostvars[groups['control_plane'][0]]['k3s_primary_private_ip']) }}"
roles:
- k3s-server
- name: Setup workers
hosts: workers
become: true
vars:
k3s_token: "{{ hostvars[groups['control_plane'][0]]['k3s_token'] }}"
# Use Load Balancer for HA - workers join via LB endpoint
k3s_server_url: "https://{{ kube_api_endpoint | default(hostvars[groups['control_plane'][0]]['k3s_primary_private_ip']) }}:6443"
k3s_node_ip: "{{ k3s_private_ip }}"
roles:
- k3s-agent
- name: Pre-pull bootstrap control-plane images
hosts: control_plane[0]
become: true
roles:
- bootstrap-image-prepull
- name: Pre-pull Rancher bootstrap images
hosts: cluster
become: true
roles:
- rancher-image-prepull
- name: Deploy observability stack
hosts: control_plane[0]
become: true
roles:
- role: observability
when: not (observability_gitops_enabled | default(true) | bool)
- name: Provision Grafana content
hosts: control_plane[0]
become: true
roles:
- role: observability-content
when: not (observability_gitops_enabled | default(true) | bool)
- name: Bootstrap Doppler access for External Secrets
hosts: control_plane[0]
become: true
roles:
- doppler-bootstrap
- name: Detect existing Tailscale service proxies
hosts: control_plane[0]
become: true
tasks:
- name: Check for current Tailscale service hostnames
command: kubectl get svc -A -o jsonpath='{range .items[*]}{.metadata.annotations.tailscale\.com/hostname}{"\n"}{end}'
register: existing_tailscale_hostnames
changed_when: false
failed_when: false
- name: Clean up stale Tailscale devices
hosts: localhost
connection: local
vars:
tailscale_reserved_hostnames:
- rancher
- grafana
- prometheus
- flux
tasks:
- name: Delete stale devices only before service proxies exist
include_role:
name: tailscale-cleanup
when: >-
hostvars[groups['control_plane'][0]].existing_tailscale_hostnames.stdout_lines | default([])
| intersect(tailscale_reserved_hostnames)
| length == 0
- name: Finalize
hosts: localhost
connection: local
tasks:
- name: Check whether kubeconfig was fetched
stat:
path: ../outputs/kubeconfig
register: kubeconfig_file
- name: Update kubeconfig server address
command: |
sed -i 's/127.0.0.1/{{ hostvars[groups["control_plane"][0]]["ansible_host"] }}/g' ../outputs/kubeconfig
changed_when: true
when: kubeconfig_file.stat.exists
- name: Display success message
debug:
msg: |
Cluster setup complete!
Control planes: {{ groups['control_plane'] | length }}
Workers: {{ groups['workers'] | length }}
To access the cluster:
export KUBECONFIG={{ playbook_dir }}/../outputs/kubeconfig
kubectl get nodes