diff --git a/.gitea/workflows/ci.yml b/.gitea/workflows/ci.yml index 15c4e10..0f4d451 100644 --- a/.gitea/workflows/ci.yml +++ b/.gitea/workflows/ci.yml @@ -42,12 +42,12 @@ jobs: - uses: actions/cache@v4 with: path: target - # -v2-: the prior `cargo-target--*` cache was poisoned when the runner ran + # -v3-: the prior `cargo-target--*` cache was poisoned when the runner ran # out of disk mid-build and actions/cache saved a truncated target/ (a dep's .rmeta # went missing -> E0463 "can't find crate"). A suffix bump wouldn't help — restore-keys # would fall back to the poisoned prefix — so the prefix itself is versioned. - key: cargo-target-v2-${{ env.rustc }}-${{ hashFiles('Cargo.lock') }} - restore-keys: cargo-target-v2-${{ env.rustc }}- + key: cargo-target-v3-${{ env.rustc }}-${{ hashFiles('Cargo.lock') }} + restore-keys: cargo-target-v3-${{ env.rustc }}- - name: Format run: cargo fmt --all --check diff --git a/.gitea/workflows/deb.yml b/.gitea/workflows/deb.yml index b22e1f5..73cdc22 100644 --- a/.gitea/workflows/deb.yml +++ b/.gitea/workflows/deb.yml @@ -71,10 +71,10 @@ jobs: - uses: actions/cache@v4 with: path: target - # -v2-: bypass a target cache poisoned by a disk-full build (see ci.yml). Shares the + # -v3-: bypass a target cache poisoned by a disk-full build (see ci.yml). Shares the # key with ci.yml so the release build reuses its clean artifacts. - key: cargo-target-v2-${{ env.rustc }}-${{ hashFiles('Cargo.lock') }} - restore-keys: cargo-target-v2-${{ env.rustc }}- + key: cargo-target-v3-${{ env.rustc }}-${{ hashFiles('Cargo.lock') }} + restore-keys: cargo-target-v3-${{ env.rustc }}- - name: Build release host + client env: diff --git a/scripts/ci/docker-prune.service b/scripts/ci/docker-prune.service index 70d88b2..372117c 100644 --- a/scripts/ci/docker-prune.service +++ b/scripts/ci/docker-prune.service @@ -2,12 +2,14 @@ # # Why this exists: every CI push builds and sha--tags a Docker image per pipeline # (rust-ci, web, docs, fedora-rpm, fedora44-rpm, ...). Those tags are never dangling, so a -# plain `docker image prune` SKIPS them and they accumulate forever — that is what filled the -# disk (589 images / ~85 GB, builds failing on ENOSPC). This trims everything older than 24h; -# images IN USE by a running container are always protected regardless of age. -# +# plain `docker image prune` SKIPS them and they accumulate — that is what filled the disk. # Host-level, not per-repo CI, because the runner is shared (punktfunk + other orgs all benefit). # +# Two tiers: trim anything older than 12h normally, AND — because a push-burst can fill 99 GB +# WITHIN that 12h window (a fast iteration session hit 100% and poisoned the cargo cache with a +# truncated, half-saved target/) — a burst guard that prunes ALL idle images + cache once the +# disk is >85% full. Images IN USE by a running container are always protected. +# # Install on the runner host (root): # cp scripts/ci/docker-prune.{service,timer} /etc/systemd/system/ # systemctl daemon-reload && systemctl enable --now docker-prune.timer @@ -22,7 +24,10 @@ After=docker.service [Service] Type=oneshot # '-' prefix: each step is independent — a no-op/failure never blocks the others. -ExecStart=-/usr/bin/docker image prune -af --filter until=24h -ExecStart=-/usr/bin/docker builder prune -af --filter until=24h -ExecStart=-/usr/bin/docker buildx prune -af --filter until=24h -ExecStart=-/usr/bin/docker container prune -f --filter until=24h +ExecStart=-/usr/bin/docker image prune -af --filter until=12h +ExecStart=-/usr/bin/docker builder prune -af --filter until=12h +ExecStart=-/usr/bin/docker buildx prune -af --filter until=12h +ExecStart=-/usr/bin/docker container prune -f --filter until=12h +# Burst guard: if STILL >85% full, prune every idle image + all build cache (in-use protected), +# so a push-storm can't drive CI into ENOSPC (which truncates and poisons the actions/cargo cache). +ExecStart=-/bin/sh -c 'P=$(df --output=pcent / | tr -dc 0-9); [ "$P" -ge 85 ] && { docker image prune -af; docker builder prune -af; docker buildx prune -af; } || true' diff --git a/scripts/ci/docker-prune.timer b/scripts/ci/docker-prune.timer index 5a74efe..104d8d0 100644 --- a/scripts/ci/docker-prune.timer +++ b/scripts/ci/docker-prune.timer @@ -1,12 +1,12 @@ -# Runs docker-prune.service every 6h. Persistent=true catches up after downtime. -# Install: see the header of docker-prune.service. +# Runs docker-prune.service hourly (the burst guard needs to react within the hour, not every 6h). +# Persistent=true catches up after downtime. Install: see the header of docker-prune.service. [Unit] -Description=Run docker-prune every 6h (CI runner disk hygiene) +Description=Run docker-prune hourly (CI runner disk hygiene + burst guard) [Timer] -OnCalendar=*-*-* 00/6:00:00 -RandomizedDelaySec=600 +OnCalendar=hourly +RandomizedDelaySec=300 Persistent=true [Install]