b20356e9fe
The Tailscale cleanup role was deleting reserved service hostnames on later deploy runs, which removed the live Rancher/Grafana/Prometheus/Flux proxy nodes from the tailnet. Skip cleanup whenever the current cluster already has those Tailscale services, while still allowing cleanup on fresh rebuilds.
165 lines
4.5 KiB
YAML
165 lines
4.5 KiB
YAML
---
|
|
- name: Bootstrap Kubernetes cluster
|
|
hosts: cluster
|
|
become: true
|
|
gather_facts: true
|
|
|
|
pre_tasks:
|
|
- name: Wait for SSH
|
|
wait_for_connection:
|
|
delay: 10
|
|
timeout: 300
|
|
|
|
roles:
|
|
- common
|
|
|
|
- name: Setup primary control plane
|
|
hosts: control_plane[0]
|
|
become: true
|
|
|
|
vars:
|
|
k3s_primary: true
|
|
k3s_token: "{{ lookup('password', '/dev/null length=32 chars=ascii_letters,digits') }}"
|
|
k3s_primary_private_ip: "{{ k3s_private_ip }}"
|
|
k3s_primary_public_ip: "{{ ansible_host }}"
|
|
k3s_primary_ip: "{{ k3s_private_ip }}"
|
|
k3s_node_ip: "{{ k3s_private_ip }}"
|
|
# kube_api_endpoint is set in inventory group_vars
|
|
|
|
roles:
|
|
- k3s-server
|
|
|
|
- name: Get join info from primary
|
|
hosts: control_plane[0]
|
|
become: true
|
|
tasks:
|
|
- name: Fetch node token
|
|
command: cat /var/lib/rancher/k3s/server/node-token
|
|
register: node_token
|
|
changed_when: false
|
|
|
|
- name: Set join token fact
|
|
set_fact:
|
|
k3s_token: "{{ node_token.stdout }}"
|
|
k3s_primary_private_ip: "{{ k3s_private_ip }}"
|
|
k3s_primary_public_ip: "{{ ansible_host }}"
|
|
|
|
- name: Fetch kubeconfig
|
|
fetch:
|
|
src: /etc/rancher/k3s/k3s.yaml
|
|
dest: ../outputs/kubeconfig
|
|
flat: true
|
|
|
|
- name: Bootstrap addon prerequisite secrets
|
|
hosts: control_plane[0]
|
|
become: true
|
|
|
|
roles:
|
|
- addon-secrets-bootstrap
|
|
|
|
- name: Deploy Hetzner CCM (required for workers with external cloud provider)
|
|
hosts: control_plane[0]
|
|
become: true
|
|
|
|
roles:
|
|
- ccm-deploy
|
|
|
|
- name: Setup secondary control planes
|
|
hosts: control_plane[1:]
|
|
become: true
|
|
|
|
vars:
|
|
k3s_primary: false
|
|
k3s_token: "{{ hostvars[groups['control_plane'][0]]['k3s_token'] }}"
|
|
k3s_primary_ip: "{{ hostvars[groups['control_plane'][0]]['k3s_primary_private_ip'] }}"
|
|
k3s_primary_public_ip: "{{ hostvars[groups['control_plane'][0]]['k3s_primary_public_ip'] }}"
|
|
k3s_node_ip: "{{ k3s_private_ip }}"
|
|
# Use Load Balancer for HA - all control planes join via LB endpoint
|
|
k3s_join_endpoint: "{{ kube_api_endpoint | default(hostvars[groups['control_plane'][0]]['k3s_primary_private_ip']) }}"
|
|
|
|
roles:
|
|
- k3s-server
|
|
|
|
- name: Setup workers
|
|
hosts: workers
|
|
become: true
|
|
|
|
vars:
|
|
k3s_token: "{{ hostvars[groups['control_plane'][0]]['k3s_token'] }}"
|
|
# Use Load Balancer for HA - workers join via LB endpoint
|
|
k3s_server_url: "https://{{ kube_api_endpoint | default(hostvars[groups['control_plane'][0]]['k3s_primary_private_ip']) }}:6443"
|
|
k3s_node_ip: "{{ k3s_private_ip }}"
|
|
|
|
roles:
|
|
- k3s-agent
|
|
|
|
- name: Deploy observability stack
|
|
hosts: control_plane[0]
|
|
become: true
|
|
|
|
roles:
|
|
- role: observability
|
|
when: not (observability_gitops_enabled | default(true) | bool)
|
|
|
|
- name: Provision Grafana content
|
|
hosts: control_plane[0]
|
|
become: true
|
|
|
|
roles:
|
|
- role: observability-content
|
|
when: not (observability_gitops_enabled | default(true) | bool)
|
|
|
|
- name: Bootstrap Doppler access for External Secrets
|
|
hosts: control_plane[0]
|
|
become: true
|
|
|
|
roles:
|
|
- doppler-bootstrap
|
|
|
|
- name: Detect existing Tailscale service proxies
|
|
hosts: control_plane[0]
|
|
become: true
|
|
tasks:
|
|
- name: Check for current Tailscale service hostnames
|
|
command: kubectl get svc -A -o jsonpath='{range .items[*]}{.metadata.annotations.tailscale\.com/hostname}{"\n"}{end}'
|
|
register: existing_tailscale_hostnames
|
|
changed_when: false
|
|
failed_when: false
|
|
|
|
- name: Clean up stale Tailscale devices
|
|
hosts: localhost
|
|
connection: local
|
|
vars:
|
|
tailscale_reserved_hostnames:
|
|
- rancher
|
|
- grafana
|
|
- prometheus
|
|
- flux
|
|
tasks:
|
|
- name: Delete stale devices only before service proxies exist
|
|
include_role:
|
|
name: tailscale-cleanup
|
|
when: >-
|
|
hostvars[groups['control_plane'][0]].existing_tailscale_hostnames.stdout_lines | default([])
|
|
| intersect(tailscale_reserved_hostnames)
|
|
| length == 0
|
|
|
|
- name: Finalize
|
|
hosts: localhost
|
|
connection: local
|
|
tasks:
|
|
- name: Update kubeconfig server address
|
|
command: |
|
|
sed -i 's/127.0.0.1/{{ hostvars[groups["control_plane"][0]]["ansible_host"] }}/g' ../outputs/kubeconfig
|
|
changed_when: true
|
|
|
|
- name: Display success message
|
|
debug:
|
|
msg: |
|
|
Cluster setup complete!
|
|
Control planes: {{ groups['control_plane'] | length }}
|
|
Workers: {{ groups['workers'] | length }}
|
|
To access the cluster:
|
|
export KUBECONFIG={{ playbook_dir }}/../outputs/kubeconfig
|
|
kubectl get nodes
|