Deploy CCM via Ansible before workers join to fix external cloud provider
This fixes the chicken-and-egg problem where workers with --kubelet-arg=cloud-provider=external couldn't join because CCM wasn't running yet to remove the node.cloudprovider.kubernetes.io/uninitialized taint. Changes: - Create ansible/roles/ccm-deploy/ to deploy CCM via Helm during Ansible phase - Reorder site.yml: CCM deploys after secrets but before workers join - CCM runs on control_plane[0] with proper tolerations for control plane nodes - Add 10s pause after CCM ready to ensure it can process new nodes - Workers can now successfully join with external cloud provider enabled Flux still manages CCM for updates, but initial install happens in Ansible.
This commit is contained in:
62
ansible/roles/ccm-deploy/tasks/main.yml
Normal file
62
ansible/roles/ccm-deploy/tasks/main.yml
Normal file
@@ -0,0 +1,62 @@
|
||||
---
|
||||
- name: Check if hcloud secret exists
|
||||
command: kubectl -n kube-system get secret hcloud
|
||||
register: hcloud_secret_check
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
|
||||
- name: Fail if hcloud secret is missing
|
||||
fail:
|
||||
msg: "hcloud secret not found in kube-system namespace. CCM requires it."
|
||||
when: hcloud_secret_check.rc != 0
|
||||
|
||||
- name: Add Hetzner Helm repository
|
||||
kubernetes.core.helm_repository:
|
||||
name: hcloud
|
||||
repo_url: https://charts.hetzner.cloud
|
||||
kubeconfig: /etc/rancher/k3s/k3s.yaml
|
||||
environment:
|
||||
KUBECONFIG: /etc/rancher/k3s/k3s.yaml
|
||||
|
||||
- name: Deploy Hetzner Cloud Controller Manager
|
||||
kubernetes.core.helm:
|
||||
name: hcloud-cloud-controller-manager
|
||||
chart_ref: hcloud/hcloud-cloud-controller-manager
|
||||
release_namespace: kube-system
|
||||
create_namespace: true
|
||||
values:
|
||||
networking:
|
||||
enabled: true
|
||||
nodeSelector:
|
||||
kubernetes.io/hostname: "{{ inventory_hostname }}"
|
||||
additionalTolerations:
|
||||
- key: node-role.kubernetes.io/control-plane
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
kubeconfig: /etc/rancher/k3s/k3s.yaml
|
||||
wait: true
|
||||
wait_timeout: 300s
|
||||
environment:
|
||||
KUBECONFIG: /etc/rancher/k3s/k3s.yaml
|
||||
|
||||
- name: Wait for CCM to be ready
|
||||
command: kubectl -n kube-system rollout status deployment/hcloud-cloud-controller-manager --timeout=120s
|
||||
changed_when: false
|
||||
register: ccm_rollout
|
||||
until: ccm_rollout.rc == 0
|
||||
retries: 3
|
||||
delay: 10
|
||||
|
||||
- name: Pause to ensure CCM is fully ready to process new nodes
|
||||
pause:
|
||||
seconds: 10
|
||||
|
||||
- name: Verify CCM is removing uninitialized taints
|
||||
command: kubectl get nodes -o jsonpath='{.items[*].spec.taints[?(@.key=="node.cloudprovider.kubernetes.io/uninitialized")].key}'
|
||||
register: uninitialized_taints
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
|
||||
- name: Display taint status
|
||||
debug:
|
||||
msg: "Nodes with uninitialized taint: {{ uninitialized_taints.stdout }}"
|
||||
Reference in New Issue
Block a user