fix: harden final health checks
This commit is contained in:
@@ -779,6 +779,15 @@ jobs:
|
||||
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-rancher-config --timeout=300s
|
||||
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-rancher-backup --timeout=300s
|
||||
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-rancher-backup-config --timeout=300s
|
||||
reconcile_at=$(date +%s)
|
||||
kubectl -n flux-system annotate helmrelease/kube-prometheus-stack \
|
||||
reconcile.fluxcd.io/requestedAt="${reconcile_at}" \
|
||||
reconcile.fluxcd.io/resetAt="${reconcile_at}" \
|
||||
reconcile.fluxcd.io/forceAt="${reconcile_at}" \
|
||||
--overwrite
|
||||
kubectl -n flux-system annotate kustomization/addon-observability \
|
||||
reconcile.fluxcd.io/requestedAt="${reconcile_at}" \
|
||||
--overwrite
|
||||
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-observability --timeout=1200s
|
||||
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-observability-content --timeout=300s
|
||||
kubectl -n flux-system wait --for=condition=Ready helmrelease --all --timeout=1200s
|
||||
@@ -786,13 +795,15 @@ jobs:
|
||||
kubectl annotate storageclass flash-nfs storageclass.kubernetes.io/is-default-class=true --overwrite
|
||||
kubectl get storageclass | grep -E "^flash-nfs.*\\(default\\)"
|
||||
! kubectl get storageclass | grep -E "^local-path.*\\(default\\)"
|
||||
unhealthy_pods=$(mktemp)
|
||||
kubectl get pods -A --no-headers \
|
||||
| grep -Ev "[[:space:]](Running|Completed)[[:space:]]" \
|
||||
| grep -Ev "^cattle-system[[:space:]]+helm-operation-" \
|
||||
| grep -Ev "^cattle-capi-system[[:space:]]+capi-controller-manager-" \
|
||||
| grep -Ev "^cattle-resources-system[[:space:]]+rancher-backup-patch-sa-" \
|
||||
| grep -Ev "^kube-system[[:space:]]+helm-install-" \
|
||||
| tee /tmp/unhealthy-pods || true
|
||||
test ! -s /tmp/unhealthy-pods
|
||||
| tee "${unhealthy_pods}" || true
|
||||
test ! -s "${unhealthy_pods}"
|
||||
kubectl -n kube-system get pods -o wide
|
||||
kubectl -n tailscale-system get pods -o wide
|
||||
kubectl -n external-secrets get pods -o wide
|
||||
|
||||
@@ -89,6 +89,39 @@
|
||||
roles:
|
||||
- k3s-server
|
||||
|
||||
- name: Export kube-vip image from primary control plane
|
||||
hosts: control_plane[0]
|
||||
become: true
|
||||
|
||||
tasks:
|
||||
- name: Export kube-vip image for secondary control planes
|
||||
command: >-
|
||||
/usr/local/bin/ctr -n k8s.io images export
|
||||
/tmp/kube-vip-bootstrap.tar
|
||||
ghcr.io/kube-vip/kube-vip:v1.1.2
|
||||
changed_when: false
|
||||
|
||||
- name: Fetch kube-vip image archive
|
||||
fetch:
|
||||
src: /tmp/kube-vip-bootstrap.tar
|
||||
dest: ../outputs/kube-vip-bootstrap.tar
|
||||
flat: true
|
||||
|
||||
- name: Seed kube-vip image on secondary control planes
|
||||
hosts: control_plane[1:]
|
||||
become: true
|
||||
|
||||
tasks:
|
||||
- name: Copy kube-vip image archive
|
||||
copy:
|
||||
src: ../outputs/kube-vip-bootstrap.tar
|
||||
dest: /tmp/kube-vip-bootstrap.tar
|
||||
mode: "0644"
|
||||
|
||||
- name: Import kube-vip image into containerd
|
||||
command: /usr/local/bin/ctr -n k8s.io images import /tmp/kube-vip-bootstrap.tar
|
||||
changed_when: false
|
||||
|
||||
- name: Setup workers
|
||||
hosts: workers
|
||||
become: true
|
||||
|
||||
@@ -23,6 +23,7 @@ spec:
|
||||
retries: 3
|
||||
values:
|
||||
hostname: rancher.silverside-gopher.ts.net
|
||||
systemDefaultRegistry: registry.rancher.com
|
||||
replicas: 1
|
||||
extraEnv:
|
||||
- name: CATTLE_PROMETHEUS_METRICS
|
||||
@@ -31,7 +32,7 @@ spec:
|
||||
value: "managed-system-upgrade-controller=false"
|
||||
webhook:
|
||||
image:
|
||||
repository: registry.rancher.com/rancher/rancher-webhook
|
||||
repository: rancher/rancher-webhook
|
||||
tag: v0.9.3
|
||||
imagePullPolicy: IfNotPresent
|
||||
resources:
|
||||
|
||||
Reference in New Issue
Block a user