fix: add local registry cache for rebuilds
Deploy Cluster / Terraform (push) Successful in 4m7s
Deploy Cluster / Ansible (push) Failing after 16m31s

This commit is contained in:
2026-05-03 00:02:33 +00:00
parent 8375333ac5
commit 1896108cbb
9 changed files with 334 additions and 11 deletions
+118
View File
@@ -0,0 +1,118 @@
#!/usr/bin/env bash
set -u
repeat_count="${NETWORK_PROBE_REPEAT_COUNT:-5}"
curl_timeout="${NETWORK_PROBE_CURL_TIMEOUT:-20}"
pull_timeout="${NETWORK_PROBE_PULL_TIMEOUT:-240}"
endpoints=(
"https://ghcr.io/v2/"
"https://auth.docker.io/token"
"https://registry-1.docker.io/v2/"
"https://quay.io/v2/"
"https://registry.k8s.io/v2/"
"https://api.doppler.com/v3/projects"
)
images=(
"ghcr.io/fluxcd/helm-controller:v1.5.1"
"oci.external-secrets.io/external-secrets/external-secrets:v2.1.0"
"docker.io/rancher/mirrored-library-busybox:1.37.0"
"ghcr.io/tailscale/tailscale:v1.96.5"
"quay.io/prometheus/node-exporter:v1.8.2"
)
have() {
command -v "$1" >/dev/null 2>&1
}
section() {
printf '\n== %s ==\n' "$1"
}
primary_iface() {
ip route get 1.1.1.1 2>/dev/null | awk '{for (i = 1; i <= NF; i++) if ($i == "dev") {print $(i + 1); exit}}'
}
endpoint_host() {
printf '%s\n' "$1" | sed -E 's#^https?://([^/:]+).*#\1#'
}
section "host"
hostname -f 2>/dev/null || hostname || true
date -Is 2>/dev/null || date || true
section "network"
iface="$(primary_iface)"
printf 'primary_iface=%s\n' "${iface:-unknown}"
if [ -n "${iface}" ] && [ -r "/sys/class/net/${iface}/mtu" ]; then
printf 'primary_mtu=%s\n' "$(cat "/sys/class/net/${iface}/mtu")"
fi
ip -brief addr || true
ip route || true
ip route get 1.1.1.1 || true
section "dns"
if [ -r /etc/resolv.conf ]; then
sed -n '/^nameserver/p;/^search/p;/^options/p' /etc/resolv.conf
fi
section "remote addresses"
for endpoint in "${endpoints[@]}"; do
host="$(endpoint_host "${endpoint}")"
printf '%s ' "${host}"
if have getent; then
getent ahosts "${host}" | awk '{print $1}' | sort -u | paste -sd ',' -
else
printf 'getent unavailable\n'
fi
done
if have tracepath; then
section "path mtu"
for endpoint in "${endpoints[@]}"; do
host="$(endpoint_host "${endpoint}")"
printf '\n-- tracepath %s --\n' "${host}"
tracepath -n "${host}" || true
done
fi
section "curl timings"
for endpoint in "${endpoints[@]}"; do
printf '\n-- %s --\n' "${endpoint}"
for attempt in $(seq 1 "${repeat_count}"); do
printf 'attempt=%s ' "${attempt}"
curl -fsSIL --connect-timeout "${curl_timeout}" --max-time "${curl_timeout}" \
-o /dev/null \
-w 'http_code=%{http_code} remote_ip=%{remote_ip} time_connect=%{time_connect} time_appconnect=%{time_appconnect} time_starttransfer=%{time_starttransfer} time_total=%{time_total}\n' \
"${endpoint}" || printf 'curl_failed rc=%s\n' "$?"
sleep 1
done
done
section "image pulls"
if have sudo && sudo -n true 2>/dev/null; then
sudo_cmd=(sudo)
else
sudo_cmd=()
fi
if have k3s; then
pull_cmd=(timeout "${pull_timeout}s" "${sudo_cmd[@]}" k3s crictl pull)
elif have crictl; then
pull_cmd=(timeout "${pull_timeout}s" "${sudo_cmd[@]}" crictl pull)
elif have ctr; then
pull_cmd=(timeout "${pull_timeout}s" "${sudo_cmd[@]}" ctr -n k8s.io images pull)
else
printf 'No k3s, crictl, or ctr found; skipping image pulls.\n'
exit 0
fi
for image in "${images[@]}"; do
printf '\n-- %s --\n' "${image}"
for attempt in $(seq 1 "${repeat_count}"); do
printf 'attempt=%s\n' "${attempt}"
"${pull_cmd[@]}" "${image}" && printf 'pull_ok\n' || printf 'pull_failed rc=%s\n' "$?"
sleep 2
done
done
+84
View File
@@ -0,0 +1,84 @@
#!/usr/bin/env bash
set -euo pipefail
listen_ip="${REGISTRY_CACHE_LISTEN_IP:-10.27.27.239}"
storage_root="${REGISTRY_CACHE_STORAGE_ROOT:-/var/lib/docker-registry-cache}"
if [ "$(id -u)" -ne 0 ]; then
echo "Run as root on the Proxmox host." >&2
exit 1
fi
apt-get update
apt-get install -y docker-registry
systemctl disable --now docker-registry.service || true
mkdir -p /etc/docker/registry "${storage_root}"
chown docker-registry:docker-registry "${storage_root}"
cat >/etc/systemd/system/docker-registry-cache@.service <<'UNIT'
[Unit]
Description=Docker registry pull-through cache for %i
After=network.target
[Service]
User=docker-registry
Group=docker-registry
ExecStart=/usr/bin/docker-registry serve /etc/docker/registry/cache-%i.yml
Restart=on-failure
RestartSec=5s
[Install]
WantedBy=multi-user.target
UNIT
write_config() {
local name="$1"
local port="$2"
local remote="$3"
local dir="${storage_root}/${name}"
mkdir -p "${dir}"
chown docker-registry:docker-registry "${dir}"
cat >"/etc/docker/registry/cache-${name}.yml" <<EOF
version: 0.1
log:
fields:
service: registry-cache-${name}
storage:
cache:
blobdescriptor: inmemory
filesystem:
rootdirectory: ${dir}
http:
addr: ${listen_ip}:${port}
headers:
X-Content-Type-Options: [nosniff]
proxy:
remoteurl: ${remote}
health:
storagedriver:
enabled: true
interval: 10s
threshold: 3
EOF
}
write_config dockerhub 5000 https://registry-1.docker.io
write_config ghcr 5001 https://ghcr.io
write_config quay 5002 https://quay.io
write_config k8s 5003 https://registry.k8s.io
write_config external-secrets 5004 https://oci.external-secrets.io
systemctl daemon-reload
for name in dockerhub ghcr quay k8s external-secrets; do
systemctl enable --now "docker-registry-cache@${name}.service"
done
systemctl --no-pager --full status \
docker-registry-cache@dockerhub.service \
docker-registry-cache@ghcr.service \
docker-registry-cache@quay.service \
docker-registry-cache@k8s.service \
docker-registry-cache@external-secrets.service