diff --git a/.gitea/workflows/deploy.yml b/.gitea/workflows/deploy.yml index 653878e..093cfa4 100644 --- a/.gitea/workflows/deploy.yml +++ b/.gitea/workflows/deploy.yml @@ -779,6 +779,15 @@ jobs: kubectl -n flux-system wait --for=condition=Ready kustomization/addon-rancher-config --timeout=300s kubectl -n flux-system wait --for=condition=Ready kustomization/addon-rancher-backup --timeout=300s kubectl -n flux-system wait --for=condition=Ready kustomization/addon-rancher-backup-config --timeout=300s + reconcile_at=$(date +%s) + kubectl -n flux-system annotate helmrelease/kube-prometheus-stack \ + reconcile.fluxcd.io/requestedAt="${reconcile_at}" \ + reconcile.fluxcd.io/resetAt="${reconcile_at}" \ + reconcile.fluxcd.io/forceAt="${reconcile_at}" \ + --overwrite + kubectl -n flux-system annotate kustomization/addon-observability \ + reconcile.fluxcd.io/requestedAt="${reconcile_at}" \ + --overwrite kubectl -n flux-system wait --for=condition=Ready kustomization/addon-observability --timeout=1200s kubectl -n flux-system wait --for=condition=Ready kustomization/addon-observability-content --timeout=300s kubectl -n flux-system wait --for=condition=Ready helmrelease --all --timeout=1200s @@ -786,13 +795,15 @@ jobs: kubectl annotate storageclass flash-nfs storageclass.kubernetes.io/is-default-class=true --overwrite kubectl get storageclass | grep -E "^flash-nfs.*\\(default\\)" ! kubectl get storageclass | grep -E "^local-path.*\\(default\\)" + unhealthy_pods=$(mktemp) kubectl get pods -A --no-headers \ | grep -Ev "[[:space:]](Running|Completed)[[:space:]]" \ | grep -Ev "^cattle-system[[:space:]]+helm-operation-" \ + | grep -Ev "^cattle-capi-system[[:space:]]+capi-controller-manager-" \ | grep -Ev "^cattle-resources-system[[:space:]]+rancher-backup-patch-sa-" \ | grep -Ev "^kube-system[[:space:]]+helm-install-" \ - | tee /tmp/unhealthy-pods || true - test ! -s /tmp/unhealthy-pods + | tee "${unhealthy_pods}" || true + test ! -s "${unhealthy_pods}" kubectl -n kube-system get pods -o wide kubectl -n tailscale-system get pods -o wide kubectl -n external-secrets get pods -o wide diff --git a/ansible/site.yml b/ansible/site.yml index 28bf38a..4b5d1ab 100644 --- a/ansible/site.yml +++ b/ansible/site.yml @@ -89,6 +89,39 @@ roles: - k3s-server +- name: Export kube-vip image from primary control plane + hosts: control_plane[0] + become: true + + tasks: + - name: Export kube-vip image for secondary control planes + command: >- + /usr/local/bin/ctr -n k8s.io images export + /tmp/kube-vip-bootstrap.tar + ghcr.io/kube-vip/kube-vip:v1.1.2 + changed_when: false + + - name: Fetch kube-vip image archive + fetch: + src: /tmp/kube-vip-bootstrap.tar + dest: ../outputs/kube-vip-bootstrap.tar + flat: true + +- name: Seed kube-vip image on secondary control planes + hosts: control_plane[1:] + become: true + + tasks: + - name: Copy kube-vip image archive + copy: + src: ../outputs/kube-vip-bootstrap.tar + dest: /tmp/kube-vip-bootstrap.tar + mode: "0644" + + - name: Import kube-vip image into containerd + command: /usr/local/bin/ctr -n k8s.io images import /tmp/kube-vip-bootstrap.tar + changed_when: false + - name: Setup workers hosts: workers become: true diff --git a/infrastructure/addons/rancher/helmrelease-rancher.yaml b/infrastructure/addons/rancher/helmrelease-rancher.yaml index e9dd725..d576251 100644 --- a/infrastructure/addons/rancher/helmrelease-rancher.yaml +++ b/infrastructure/addons/rancher/helmrelease-rancher.yaml @@ -23,6 +23,7 @@ spec: retries: 3 values: hostname: rancher.silverside-gopher.ts.net + systemDefaultRegistry: registry.rancher.com replicas: 1 extraEnv: - name: CATTLE_PROMETHEUS_METRICS @@ -31,7 +32,7 @@ spec: value: "managed-system-upgrade-controller=false" webhook: image: - repository: registry.rancher.com/rancher/rancher-webhook + repository: rancher/rancher-webhook tag: v0.9.3 imagePullPolicy: IfNotPresent resources: