fix: stop preloading observability images everywhere
This commit is contained in:
+16
-97
@@ -246,17 +246,7 @@ jobs:
|
|||||||
quay.io/jetstack/cert-manager-cainjector:v1.17.2 \
|
quay.io/jetstack/cert-manager-cainjector:v1.17.2 \
|
||||||
quay.io/jetstack/cert-manager-webhook:v1.17.2 \
|
quay.io/jetstack/cert-manager-webhook:v1.17.2 \
|
||||||
quay.io/jetstack/cert-manager-startupapicheck:v1.17.2 \
|
quay.io/jetstack/cert-manager-startupapicheck:v1.17.2 \
|
||||||
docker.io/library/busybox:1.31.1 \
|
docker.io/library/busybox:1.31.1; do
|
||||||
docker.io/grafana/loki:3.5.7 \
|
|
||||||
quay.io/kiwigrid/k8s-sidecar:1.28.0 \
|
|
||||||
docker.io/kiwigrid/k8s-sidecar:1.30.10 \
|
|
||||||
docker.io/grafana/promtail:3.0.0 \
|
|
||||||
docker.io/grafana/grafana:11.4.0 \
|
|
||||||
quay.io/prometheus-operator/prometheus-operator:v0.79.2 \
|
|
||||||
quay.io/prometheus-operator/prometheus-config-reloader:v0.79.2 \
|
|
||||||
quay.io/prometheus/prometheus:v3.1.0 \
|
|
||||||
registry.k8s.io/kube-state-metrics/kube-state-metrics:v2.14.0 \
|
|
||||||
quay.io/prometheus/node-exporter:v1.8.2; do
|
|
||||||
prepare_image_archive "${image}"
|
prepare_image_archive "${image}"
|
||||||
done
|
done
|
||||||
|
|
||||||
@@ -883,16 +873,12 @@ jobs:
|
|||||||
kubectl -n cattle-system wait --for=condition=Ready issuer/cattle-system-rancher --timeout=900s
|
kubectl -n cattle-system wait --for=condition=Ready issuer/cattle-system-rancher --timeout=900s
|
||||||
kubectl -n cattle-system wait --for=condition=Ready certificate/tls-rancher-ingress --timeout=900s
|
kubectl -n cattle-system wait --for=condition=Ready certificate/tls-rancher-ingress --timeout=900s
|
||||||
|
|
||||||
- name: Seed observability runtime images
|
- name: Reconcile observability stack
|
||||||
env:
|
env:
|
||||||
KUBECONFIG: outputs/kubeconfig
|
KUBECONFIG: outputs/kubeconfig
|
||||||
run: |
|
run: |
|
||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
|
|
||||||
archive_name() {
|
|
||||||
printf '%s' "$1" | tr '/:' '__'
|
|
||||||
}
|
|
||||||
|
|
||||||
wait_for_resource() {
|
wait_for_resource() {
|
||||||
local namespace="$1"
|
local namespace="$1"
|
||||||
local resource="$2"
|
local resource="$2"
|
||||||
@@ -954,86 +940,19 @@ jobs:
|
|||||||
wait_for_reconcile_handled "helmrelease/${release}" "${reconcile_at}" 300
|
wait_for_reconcile_handled "helmrelease/${release}" "${reconcile_at}" 300
|
||||||
}
|
}
|
||||||
|
|
||||||
import_required_image() {
|
wait_for_resource flux-system kustomization.kustomize.toolkit.fluxcd.io/addon-observability-secrets 600
|
||||||
local image="$1"
|
reconcile_flux_resource kustomization/addon-observability-secrets 600
|
||||||
local host_ip="$2"
|
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-observability-secrets --timeout=600s
|
||||||
local archive_name
|
wait_for_resource flux-system kustomization.kustomize.toolkit.fluxcd.io/addon-observability 600
|
||||||
local archive_path
|
reconcile_flux_resource kustomization/addon-observability 1800
|
||||||
archive_name="$(archive_name "${image}").tar"
|
if ! kubectl -n flux-system wait --for=condition=Ready kustomization/addon-observability --timeout=1800s; then
|
||||||
archive_path="outputs/bootstrap-image-archives/${archive_name}"
|
kubectl -n flux-system describe kustomization/addon-observability || true
|
||||||
|
kubectl -n flux-system describe helmrelease/kube-prometheus-stack || true
|
||||||
if [ ! -s "${archive_path}" ]; then
|
kubectl -n flux-system describe helmrelease/loki || true
|
||||||
echo "Missing required bootstrap image archive ${archive_path} for ${image}" >&2
|
kubectl -n flux-system describe helmrelease/promtail || true
|
||||||
return 1
|
kubectl -n observability get pods -o wide || true
|
||||||
fi
|
exit 1
|
||||||
|
fi
|
||||||
if ssh -i "$HOME/.ssh/id_ed25519" -o StrictHostKeyChecking=no -o ConnectTimeout=10 "ubuntu@${host_ip}" \
|
|
||||||
"sudo k3s crictl inspecti '${image}' >/dev/null 2>&1"; then
|
|
||||||
return 0
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "Importing ${image} archive on ${host_ip}"
|
|
||||||
timeout 180s scp -i "$HOME/.ssh/id_ed25519" -o StrictHostKeyChecking=no -o ConnectTimeout=10 -o ServerAliveInterval=15 -o ServerAliveCountMax=4 \
|
|
||||||
"${archive_path}" "ubuntu@${host_ip}:/tmp/${archive_name}"
|
|
||||||
timeout 300s ssh -i "$HOME/.ssh/id_ed25519" -o StrictHostKeyChecking=no -o ConnectTimeout=10 -o ServerAliveInterval=15 -o ServerAliveCountMax=4 "ubuntu@${host_ip}" \
|
|
||||||
"set -euo pipefail; \
|
|
||||||
if sudo k3s crictl inspecti '${image}' >/dev/null 2>&1; then exit 0; fi; \
|
|
||||||
for attempt in 1 2 3; do \
|
|
||||||
echo 'Importing ${image} archive with ctr'; \
|
|
||||||
if sudo k3s ctr -n k8s.io images import '/tmp/${archive_name}' && sudo k3s crictl inspecti '${image}' >/dev/null; then exit 0; fi; \
|
|
||||||
sleep 10; \
|
|
||||||
done; \
|
|
||||||
sudo systemctl status k3s --no-pager -l || true; \
|
|
||||||
sudo journalctl -u k3s -n 80 --no-pager || true; \
|
|
||||||
exit 1"
|
|
||||||
}
|
|
||||||
|
|
||||||
import_required_image_on_all_nodes() {
|
|
||||||
local image="$1"
|
|
||||||
local status_dir
|
|
||||||
local host_ip
|
|
||||||
local pid
|
|
||||||
local failed=false
|
|
||||||
status_dir="$(mktemp -d)"
|
|
||||||
|
|
||||||
for host_ip in ${ALL_NODE_IPS}; do
|
|
||||||
(
|
|
||||||
import_required_image "${image}" "${host_ip}"
|
|
||||||
) >"${status_dir}/${host_ip}.log" 2>&1 &
|
|
||||||
done
|
|
||||||
|
|
||||||
for pid in $(jobs -p); do
|
|
||||||
if ! wait "${pid}"; then
|
|
||||||
failed=true
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
|
|
||||||
for host_ip in ${ALL_NODE_IPS}; do
|
|
||||||
sed "s/^/[${host_ip}] /" "${status_dir}/${host_ip}.log"
|
|
||||||
done
|
|
||||||
|
|
||||||
if [ "${failed}" = "true" ]; then
|
|
||||||
echo "Warning: failed to import ${image} on one or more nodes; continuing so Flux/Kubernetes can schedule on seeded nodes or retry pulls" >&2
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
ALL_NODE_IPS=$(python3 -c 'import json; outputs = json.load(open("outputs/terraform_outputs.json")); print(" ".join(outputs["control_plane_ips"]["value"] + outputs["worker_ips"]["value"]))')
|
|
||||||
for image in \
|
|
||||||
docker.io/library/busybox:1.31.1 \
|
|
||||||
docker.io/grafana/loki:3.5.7 \
|
|
||||||
quay.io/kiwigrid/k8s-sidecar:1.28.0 \
|
|
||||||
docker.io/kiwigrid/k8s-sidecar:1.30.10 \
|
|
||||||
docker.io/grafana/promtail:3.0.0 \
|
|
||||||
docker.io/grafana/grafana:11.4.0 \
|
|
||||||
quay.io/prometheus-operator/prometheus-operator:v0.79.2 \
|
|
||||||
quay.io/prometheus-operator/prometheus-config-reloader:v0.79.2 \
|
|
||||||
quay.io/prometheus/prometheus:v3.1.0 \
|
|
||||||
registry.k8s.io/kube-state-metrics/kube-state-metrics:v2.14.0 \
|
|
||||||
quay.io/prometheus/node-exporter:v1.8.2; do
|
|
||||||
import_required_image_on_all_nodes "${image}"
|
|
||||||
done
|
|
||||||
reconcile_flux_resource kustomization/addon-observability 1200
|
|
||||||
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-observability --timeout=1200s
|
|
||||||
for release in kube-prometheus-stack loki promtail; do
|
for release in kube-prometheus-stack loki promtail; do
|
||||||
reconcile_helmrelease "${release}"
|
reconcile_helmrelease "${release}"
|
||||||
done
|
done
|
||||||
@@ -1058,7 +977,7 @@ jobs:
|
|||||||
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-rancher --timeout=900s
|
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-rancher --timeout=900s
|
||||||
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-rancher-config --timeout=300s
|
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-rancher-config --timeout=300s
|
||||||
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-observability-secrets --timeout=300s
|
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-observability-secrets --timeout=300s
|
||||||
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-observability --timeout=1200s
|
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-observability --timeout=1800s
|
||||||
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-observability-content --timeout=300s
|
kubectl -n flux-system wait --for=condition=Ready kustomization/addon-observability-content --timeout=300s
|
||||||
kubectl -n flux-system wait --for=condition=Ready helmrelease --all --timeout=1200s
|
kubectl -n flux-system wait --for=condition=Ready helmrelease --all --timeout=1200s
|
||||||
kubectl annotate storageclass local-path storageclass.kubernetes.io/is-default-class=false --overwrite
|
kubectl annotate storageclass local-path storageclass.kubernetes.io/is-default-class=false --overwrite
|
||||||
|
|||||||
@@ -29,5 +29,5 @@ spec:
|
|||||||
kind: HelmRelease
|
kind: HelmRelease
|
||||||
name: promtail
|
name: promtail
|
||||||
namespace: flux-system
|
namespace: flux-system
|
||||||
timeout: 15m
|
timeout: 30m
|
||||||
suspend: false
|
suspend: false
|
||||||
|
|||||||
Reference in New Issue
Block a user