fix: enforce post-deploy health checks
This commit is contained in:
@@ -407,6 +407,8 @@ jobs:
|
||||
kubectl -n tailscale-system rollout status deployment/operator --timeout=600s
|
||||
wait_for_flux_helm_release nfs-subdir-external-provisioner flux-system-nfs-subdir-external-provisioner nfs-subdir-external-provisioner kube-system 600s 600s 600
|
||||
kubectl -n kube-system rollout status deployment/kube-system-nfs-subdir-external-provisioner --timeout=600s
|
||||
kubectl annotate storageclass local-path storageclass.kubernetes.io/is-default-class=false --overwrite
|
||||
kubectl annotate storageclass flash-nfs storageclass.kubernetes.io/is-default-class=true --overwrite
|
||||
kubectl get storageclass flash-nfs
|
||||
|
||||
- name: Wait for Rancher and backup operator
|
||||
@@ -595,12 +597,31 @@ jobs:
|
||||
- name: Post-deploy cluster health checks
|
||||
working-directory: ansible
|
||||
run: |
|
||||
ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl get nodes -o wide"
|
||||
ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl -n flux-system get gitrepositories,kustomizations,helmreleases"
|
||||
ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl -n kube-system get pods -o wide"
|
||||
ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl get storageclass flash-nfs"
|
||||
ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl -n tailscale-system get pods -o wide"
|
||||
ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl -n external-secrets get pods"
|
||||
set -euo pipefail
|
||||
ansible -i inventory.ini 'control_plane[0]' -m shell -a '
|
||||
set -euo pipefail
|
||||
kubectl get nodes -o wide
|
||||
kubectl -n flux-system get gitrepositories,kustomizations,helmreleases,ocirepositories
|
||||
kubectl -n flux-system wait --for=condition=Ready kustomization/infrastructure --timeout=60s
|
||||
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-cert-manager --timeout=60s
|
||||
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-external-secrets --timeout=60s
|
||||
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-nfs-storage --timeout=60s
|
||||
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-tailscale-operator --timeout=60s
|
||||
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-tailscale-proxyclass --timeout=60s
|
||||
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-rancher --timeout=60s
|
||||
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-rancher-config --timeout=60s
|
||||
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-rancher-backup --timeout=60s
|
||||
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-rancher-backup-config --timeout=60s
|
||||
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-observability --timeout=60s
|
||||
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-observability-content --timeout=60s
|
||||
kubectl -n flux-system wait --for=condition=Ready helmrelease --all --timeout=60s
|
||||
kubectl get storageclass | grep -E "^flash-nfs.*\\(default\\)"
|
||||
kubectl get pods -A --field-selector=status.phase!=Running,status.phase!=Succeeded --no-headers | tee /tmp/nonrunning-pods
|
||||
test ! -s /tmp/nonrunning-pods
|
||||
kubectl -n kube-system get pods -o wide
|
||||
kubectl -n tailscale-system get pods -o wide
|
||||
kubectl -n external-secrets get pods -o wide
|
||||
'
|
||||
env:
|
||||
ANSIBLE_HOST_KEY_CHECKING: "False"
|
||||
|
||||
|
||||
Reference in New Issue
Block a user