This fixes the chicken-and-egg problem where workers with --kubelet-arg=cloud-provider=external couldn't join because CCM wasn't running yet to remove the node.cloudprovider.kubernetes.io/uninitialized taint. Changes: - Create ansible/roles/ccm-deploy/ to deploy CCM via Helm during Ansible phase - Reorder site.yml: CCM deploys after secrets but before workers join - CCM runs on control_plane[0] with proper tolerations for control plane nodes - Add 10s pause after CCM ready to ensure it can process new nodes - Workers can now successfully join with external cloud provider enabled Flux still manages CCM for updates, but initial install happens in Ansible.
63 lines
1.9 KiB
YAML
63 lines
1.9 KiB
YAML
---
|
|
- name: Check if hcloud secret exists
|
|
command: kubectl -n kube-system get secret hcloud
|
|
register: hcloud_secret_check
|
|
changed_when: false
|
|
failed_when: false
|
|
|
|
- name: Fail if hcloud secret is missing
|
|
fail:
|
|
msg: "hcloud secret not found in kube-system namespace. CCM requires it."
|
|
when: hcloud_secret_check.rc != 0
|
|
|
|
- name: Add Hetzner Helm repository
|
|
kubernetes.core.helm_repository:
|
|
name: hcloud
|
|
repo_url: https://charts.hetzner.cloud
|
|
kubeconfig: /etc/rancher/k3s/k3s.yaml
|
|
environment:
|
|
KUBECONFIG: /etc/rancher/k3s/k3s.yaml
|
|
|
|
- name: Deploy Hetzner Cloud Controller Manager
|
|
kubernetes.core.helm:
|
|
name: hcloud-cloud-controller-manager
|
|
chart_ref: hcloud/hcloud-cloud-controller-manager
|
|
release_namespace: kube-system
|
|
create_namespace: true
|
|
values:
|
|
networking:
|
|
enabled: true
|
|
nodeSelector:
|
|
kubernetes.io/hostname: "{{ inventory_hostname }}"
|
|
additionalTolerations:
|
|
- key: node-role.kubernetes.io/control-plane
|
|
operator: Exists
|
|
effect: NoSchedule
|
|
kubeconfig: /etc/rancher/k3s/k3s.yaml
|
|
wait: true
|
|
wait_timeout: 300s
|
|
environment:
|
|
KUBECONFIG: /etc/rancher/k3s/k3s.yaml
|
|
|
|
- name: Wait for CCM to be ready
|
|
command: kubectl -n kube-system rollout status deployment/hcloud-cloud-controller-manager --timeout=120s
|
|
changed_when: false
|
|
register: ccm_rollout
|
|
until: ccm_rollout.rc == 0
|
|
retries: 3
|
|
delay: 10
|
|
|
|
- name: Pause to ensure CCM is fully ready to process new nodes
|
|
pause:
|
|
seconds: 10
|
|
|
|
- name: Verify CCM is removing uninitialized taints
|
|
command: kubectl get nodes -o jsonpath='{.items[*].spec.taints[?(@.key=="node.cloudprovider.kubernetes.io/uninitialized")].key}'
|
|
register: uninitialized_taints
|
|
changed_when: false
|
|
failed_when: false
|
|
|
|
- name: Display taint status
|
|
debug:
|
|
msg: "Nodes with uninitialized taint: {{ uninitialized_taints.stdout }}"
|