fix: harden final health checks
This commit is contained in:
@@ -779,6 +779,15 @@ jobs:
|
|||||||
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-rancher-config --timeout=300s
|
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-rancher-config --timeout=300s
|
||||||
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-rancher-backup --timeout=300s
|
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-rancher-backup --timeout=300s
|
||||||
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-rancher-backup-config --timeout=300s
|
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-rancher-backup-config --timeout=300s
|
||||||
|
reconcile_at=$(date +%s)
|
||||||
|
kubectl -n flux-system annotate helmrelease/kube-prometheus-stack \
|
||||||
|
reconcile.fluxcd.io/requestedAt="${reconcile_at}" \
|
||||||
|
reconcile.fluxcd.io/resetAt="${reconcile_at}" \
|
||||||
|
reconcile.fluxcd.io/forceAt="${reconcile_at}" \
|
||||||
|
--overwrite
|
||||||
|
kubectl -n flux-system annotate kustomization/addon-observability \
|
||||||
|
reconcile.fluxcd.io/requestedAt="${reconcile_at}" \
|
||||||
|
--overwrite
|
||||||
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-observability --timeout=1200s
|
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-observability --timeout=1200s
|
||||||
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-observability-content --timeout=300s
|
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-observability-content --timeout=300s
|
||||||
kubectl -n flux-system wait --for=condition=Ready helmrelease --all --timeout=1200s
|
kubectl -n flux-system wait --for=condition=Ready helmrelease --all --timeout=1200s
|
||||||
@@ -786,13 +795,15 @@ jobs:
|
|||||||
kubectl annotate storageclass flash-nfs storageclass.kubernetes.io/is-default-class=true --overwrite
|
kubectl annotate storageclass flash-nfs storageclass.kubernetes.io/is-default-class=true --overwrite
|
||||||
kubectl get storageclass | grep -E "^flash-nfs.*\\(default\\)"
|
kubectl get storageclass | grep -E "^flash-nfs.*\\(default\\)"
|
||||||
! kubectl get storageclass | grep -E "^local-path.*\\(default\\)"
|
! kubectl get storageclass | grep -E "^local-path.*\\(default\\)"
|
||||||
|
unhealthy_pods=$(mktemp)
|
||||||
kubectl get pods -A --no-headers \
|
kubectl get pods -A --no-headers \
|
||||||
| grep -Ev "[[:space:]](Running|Completed)[[:space:]]" \
|
| grep -Ev "[[:space:]](Running|Completed)[[:space:]]" \
|
||||||
| grep -Ev "^cattle-system[[:space:]]+helm-operation-" \
|
| grep -Ev "^cattle-system[[:space:]]+helm-operation-" \
|
||||||
|
| grep -Ev "^cattle-capi-system[[:space:]]+capi-controller-manager-" \
|
||||||
| grep -Ev "^cattle-resources-system[[:space:]]+rancher-backup-patch-sa-" \
|
| grep -Ev "^cattle-resources-system[[:space:]]+rancher-backup-patch-sa-" \
|
||||||
| grep -Ev "^kube-system[[:space:]]+helm-install-" \
|
| grep -Ev "^kube-system[[:space:]]+helm-install-" \
|
||||||
| tee /tmp/unhealthy-pods || true
|
| tee "${unhealthy_pods}" || true
|
||||||
test ! -s /tmp/unhealthy-pods
|
test ! -s "${unhealthy_pods}"
|
||||||
kubectl -n kube-system get pods -o wide
|
kubectl -n kube-system get pods -o wide
|
||||||
kubectl -n tailscale-system get pods -o wide
|
kubectl -n tailscale-system get pods -o wide
|
||||||
kubectl -n external-secrets get pods -o wide
|
kubectl -n external-secrets get pods -o wide
|
||||||
|
|||||||
@@ -89,6 +89,39 @@
|
|||||||
roles:
|
roles:
|
||||||
- k3s-server
|
- k3s-server
|
||||||
|
|
||||||
|
- name: Export kube-vip image from primary control plane
|
||||||
|
hosts: control_plane[0]
|
||||||
|
become: true
|
||||||
|
|
||||||
|
tasks:
|
||||||
|
- name: Export kube-vip image for secondary control planes
|
||||||
|
command: >-
|
||||||
|
/usr/local/bin/ctr -n k8s.io images export
|
||||||
|
/tmp/kube-vip-bootstrap.tar
|
||||||
|
ghcr.io/kube-vip/kube-vip:v1.1.2
|
||||||
|
changed_when: false
|
||||||
|
|
||||||
|
- name: Fetch kube-vip image archive
|
||||||
|
fetch:
|
||||||
|
src: /tmp/kube-vip-bootstrap.tar
|
||||||
|
dest: ../outputs/kube-vip-bootstrap.tar
|
||||||
|
flat: true
|
||||||
|
|
||||||
|
- name: Seed kube-vip image on secondary control planes
|
||||||
|
hosts: control_plane[1:]
|
||||||
|
become: true
|
||||||
|
|
||||||
|
tasks:
|
||||||
|
- name: Copy kube-vip image archive
|
||||||
|
copy:
|
||||||
|
src: ../outputs/kube-vip-bootstrap.tar
|
||||||
|
dest: /tmp/kube-vip-bootstrap.tar
|
||||||
|
mode: "0644"
|
||||||
|
|
||||||
|
- name: Import kube-vip image into containerd
|
||||||
|
command: /usr/local/bin/ctr -n k8s.io images import /tmp/kube-vip-bootstrap.tar
|
||||||
|
changed_when: false
|
||||||
|
|
||||||
- name: Setup workers
|
- name: Setup workers
|
||||||
hosts: workers
|
hosts: workers
|
||||||
become: true
|
become: true
|
||||||
|
|||||||
@@ -23,6 +23,7 @@ spec:
|
|||||||
retries: 3
|
retries: 3
|
||||||
values:
|
values:
|
||||||
hostname: rancher.silverside-gopher.ts.net
|
hostname: rancher.silverside-gopher.ts.net
|
||||||
|
systemDefaultRegistry: registry.rancher.com
|
||||||
replicas: 1
|
replicas: 1
|
||||||
extraEnv:
|
extraEnv:
|
||||||
- name: CATTLE_PROMETHEUS_METRICS
|
- name: CATTLE_PROMETHEUS_METRICS
|
||||||
@@ -31,7 +32,7 @@ spec:
|
|||||||
value: "managed-system-upgrade-controller=false"
|
value: "managed-system-upgrade-controller=false"
|
||||||
webhook:
|
webhook:
|
||||||
image:
|
image:
|
||||||
repository: registry.rancher.com/rancher/rancher-webhook
|
repository: rancher/rancher-webhook
|
||||||
tag: v0.9.3
|
tag: v0.9.3
|
||||||
imagePullPolicy: IfNotPresent
|
imagePullPolicy: IfNotPresent
|
||||||
resources:
|
resources:
|
||||||
|
|||||||
Reference in New Issue
Block a user