fix: enforce post-deploy health checks
This commit is contained in:
@@ -407,6 +407,8 @@ jobs:
|
||||
kubectl -n tailscale-system rollout status deployment/operator --timeout=600s
|
||||
wait_for_flux_helm_release nfs-subdir-external-provisioner flux-system-nfs-subdir-external-provisioner nfs-subdir-external-provisioner kube-system 600s 600s 600
|
||||
kubectl -n kube-system rollout status deployment/kube-system-nfs-subdir-external-provisioner --timeout=600s
|
||||
kubectl annotate storageclass local-path storageclass.kubernetes.io/is-default-class=false --overwrite
|
||||
kubectl annotate storageclass flash-nfs storageclass.kubernetes.io/is-default-class=true --overwrite
|
||||
kubectl get storageclass flash-nfs
|
||||
|
||||
- name: Wait for Rancher and backup operator
|
||||
@@ -595,12 +597,31 @@ jobs:
|
||||
- name: Post-deploy cluster health checks
|
||||
working-directory: ansible
|
||||
run: |
|
||||
ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl get nodes -o wide"
|
||||
ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl -n flux-system get gitrepositories,kustomizations,helmreleases"
|
||||
ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl -n kube-system get pods -o wide"
|
||||
ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl get storageclass flash-nfs"
|
||||
ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl -n tailscale-system get pods -o wide"
|
||||
ansible -i inventory.ini 'control_plane[0]' -m command -a "kubectl -n external-secrets get pods"
|
||||
set -euo pipefail
|
||||
ansible -i inventory.ini 'control_plane[0]' -m shell -a '
|
||||
set -euo pipefail
|
||||
kubectl get nodes -o wide
|
||||
kubectl -n flux-system get gitrepositories,kustomizations,helmreleases,ocirepositories
|
||||
kubectl -n flux-system wait --for=condition=Ready kustomization/infrastructure --timeout=60s
|
||||
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-cert-manager --timeout=60s
|
||||
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-external-secrets --timeout=60s
|
||||
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-nfs-storage --timeout=60s
|
||||
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-tailscale-operator --timeout=60s
|
||||
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-tailscale-proxyclass --timeout=60s
|
||||
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-rancher --timeout=60s
|
||||
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-rancher-config --timeout=60s
|
||||
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-rancher-backup --timeout=60s
|
||||
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-rancher-backup-config --timeout=60s
|
||||
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-observability --timeout=60s
|
||||
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-observability-content --timeout=60s
|
||||
kubectl -n flux-system wait --for=condition=Ready helmrelease --all --timeout=60s
|
||||
kubectl get storageclass | grep -E "^flash-nfs.*\\(default\\)"
|
||||
kubectl get pods -A --field-selector=status.phase!=Running,status.phase!=Succeeded --no-headers | tee /tmp/nonrunning-pods
|
||||
test ! -s /tmp/nonrunning-pods
|
||||
kubectl -n kube-system get pods -o wide
|
||||
kubectl -n tailscale-system get pods -o wide
|
||||
kubectl -n external-secrets get pods -o wide
|
||||
'
|
||||
env:
|
||||
ANSIBLE_HOST_KEY_CHECKING: "False"
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ metadata:
|
||||
namespace: flux-system
|
||||
spec:
|
||||
interval: 10m
|
||||
timeout: 15m
|
||||
targetNamespace: observability
|
||||
chart:
|
||||
spec:
|
||||
@@ -32,7 +33,7 @@ spec:
|
||||
serve_from_sub_path: false
|
||||
persistence:
|
||||
enabled: true
|
||||
storageClassName: local-path
|
||||
storageClassName: flash-nfs
|
||||
size: 5Gi
|
||||
service:
|
||||
type: ClusterIP
|
||||
@@ -55,7 +56,7 @@ spec:
|
||||
storageSpec:
|
||||
volumeClaimTemplate:
|
||||
spec:
|
||||
storageClassName: local-path
|
||||
storageClassName: flash-nfs
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
resources:
|
||||
|
||||
@@ -6,14 +6,10 @@ metadata:
|
||||
spec:
|
||||
interval: 10m
|
||||
targetNamespace: observability
|
||||
chart:
|
||||
spec:
|
||||
chart: loki
|
||||
version: 6.10.0
|
||||
sourceRef:
|
||||
kind: HelmRepository
|
||||
name: grafana
|
||||
namespace: flux-system
|
||||
chartRef:
|
||||
kind: OCIRepository
|
||||
name: loki
|
||||
namespace: flux-system
|
||||
install:
|
||||
createNamespace: true
|
||||
remediation:
|
||||
@@ -50,7 +46,7 @@ spec:
|
||||
replicas: 1
|
||||
persistence:
|
||||
size: 10Gi
|
||||
storageClass: local-path
|
||||
storageClass: flash-nfs
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
|
||||
@@ -6,14 +6,10 @@ metadata:
|
||||
spec:
|
||||
interval: 10m
|
||||
targetNamespace: observability
|
||||
chart:
|
||||
spec:
|
||||
chart: promtail
|
||||
version: 6.16.6
|
||||
sourceRef:
|
||||
kind: HelmRepository
|
||||
name: grafana
|
||||
namespace: flux-system
|
||||
chartRef:
|
||||
kind: OCIRepository
|
||||
name: promtail
|
||||
namespace: flux-system
|
||||
install:
|
||||
createNamespace: true
|
||||
remediation:
|
||||
|
||||
@@ -1,8 +0,0 @@
|
||||
apiVersion: source.toolkit.fluxcd.io/v1
|
||||
kind: HelmRepository
|
||||
metadata:
|
||||
name: grafana
|
||||
namespace: flux-system
|
||||
spec:
|
||||
interval: 1h
|
||||
url: https://grafana.github.io/helm-charts
|
||||
@@ -4,7 +4,8 @@ resources:
|
||||
- namespace.yaml
|
||||
- grafana-admin-externalsecret.yaml
|
||||
- helmrepository-prometheus-community.yaml
|
||||
- helmrepository-grafana.yaml
|
||||
- ocirepository-loki.yaml
|
||||
- ocirepository-promtail.yaml
|
||||
- helmrelease-kube-prometheus-stack.yaml
|
||||
- helmrelease-loki.yaml
|
||||
- helmrelease-promtail.yaml
|
||||
|
||||
@@ -0,0 +1,13 @@
|
||||
apiVersion: source.toolkit.fluxcd.io/v1
|
||||
kind: OCIRepository
|
||||
metadata:
|
||||
name: loki
|
||||
namespace: flux-system
|
||||
spec:
|
||||
interval: 10m
|
||||
url: oci://ghcr.io/grafana/helm-charts/loki
|
||||
ref:
|
||||
tag: 6.46.0
|
||||
layerSelector:
|
||||
mediaType: application/vnd.cncf.helm.chart.content.v1.tar+gzip
|
||||
operation: copy
|
||||
@@ -0,0 +1,13 @@
|
||||
apiVersion: source.toolkit.fluxcd.io/v1
|
||||
kind: OCIRepository
|
||||
metadata:
|
||||
name: promtail
|
||||
namespace: flux-system
|
||||
spec:
|
||||
interval: 10m
|
||||
url: oci://ghcr.io/grafana/helm-charts/promtail
|
||||
ref:
|
||||
tag: 6.16.6
|
||||
layerSelector:
|
||||
mediaType: application/vnd.cncf.helm.chart.content.v1.tar+gzip
|
||||
operation: copy
|
||||
Reference in New Issue
Block a user