fix(ci): inline docker buildx to stop concurrent-build cache contention

Two failing runs across refaire (run 1709) and rememed (run 1624)
both crash at the same step (`Set up Docker Buildx`) with a node error
on dist/index.js line 1, AND both reference the same act cache dir:
/root/.cache/act/6a647958c11e138a6cfcaf32d2b372bc8e0c97871d617bfb441d003d505b77cf

act keys remote-action cache entries by repo URL alone — pinning to
`@v3.10.0` doesn't help, every game that uses `docker/setup-buildx-
action` lands in the same dir. When you push N games at once on
home-runner-1, the act-runner does parallel `git clone` ops into that
shared dir; the loser's pull aborts ("worktree contains unstaged
changes") and leaves dist/ half-written, so the next job's `node
dist/index.js` throws on line 1 → step fails. That's the entire flake.

Fix: drop the two remote actions that were racing — setup-buildx-action
and build-push-action — and replace them with inline `docker buildx
create` + `docker buildx build --push` shell. Nothing is fetched from
GitHub at runtime, no cache dir is shared, the failure mode disappears.

Same image, same tags, same registry mirror, same cache-from/cache-to
shape, same secret-files mount (`--secret id=...,src=...`). Each job
gets a uniquely-named builder (`builder-<game>-<api-core|web>`) and a
teardown step so the runner host's docker state doesn't accumulate
abandoned builders.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-25 12:23:35 +02:00
parent 6c46ca98a7
commit 0c5f9a347a
+88 -50
View File
@@ -20,12 +20,17 @@ name: Build & Deploy played game (reusable)
# - STEP_CA_PROVISIONER_PASSWORD — for the cert-init container
#
# Notes on reliability:
# - All remote actions are pinned to immutable patch tags so the act-runner
# action cache hash is stable run-to-run. The cluster of "Cannot find
# module .../dist/index.js" failures on home-runner-1 was act re-using a
# partial cache dir for a moving tag (`@v3`); pinning kills that mode.
# - Registry login is an inline shell step instead of docker/login-action.
# One fewer remote-action download = one fewer failure point per job.
# - act keys its remote-action cache by repo URL alone (not by full
# ref), so every concurrent game build on home-runner-1 shares the
# same /root/.cache/act/<hash> dir for setup-buildx-action +
# build-push-action. Two builds racing on that dir corrupt the
# checked-out tree ("worktree contains unstaged changes") and the
# next read of dist/index.js throws → step exits 1. Pinning to
# patch tags didn't help because the cache key ignores the ref.
# Fix: do buildx setup + build via inline `docker buildx ...`
# shell, so nothing needs to be cloned from GitHub at runtime.
# - Registry login is also an inline shell step. One fewer remote-
# action download = one fewer failure point per job.
on:
workflow_call:
@@ -42,14 +47,25 @@ jobs:
- uses: actions/checkout@v4.2.2
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3.10.0
with:
config-inline: |
# Inline replacement for docker/setup-buildx-action. The job
# name (BUILDER) is unique-per-game-per-job so concurrent
# builds across games don't fight over a shared `default`
# builder either. Stop+rm in case a previous run on this
# runner left one behind.
env:
BUILDER: builder-${{ inputs.game-id }}-api-core
run: |
cat > /tmp/buildkitd.toml <<'EOF'
[registry."docker.io"]
mirrors = ["192.168.1.52:5000"]
[registry."192.168.1.52:5000"]
http = true
insecure = true
EOF
docker buildx rm "$BUILDER" 2>/dev/null || true
docker buildx create --name "$BUILDER" --use --bootstrap \
--driver docker-container \
--config /tmp/buildkitd.toml
- name: Log in to Gitea registry
env:
@@ -67,28 +83,33 @@ jobs:
printenv NPMRC > /tmp/.npmrc
- name: Build & push api-core
uses: docker/build-push-action@v6.16.0
with:
context: .
file: ./api/core/Dockerfile
push: true
tags: |
git.unom.io/${{ gitea.repository }}/api-core:latest
git.unom.io/${{ gitea.repository }}/api-core:${{ gitea.sha }}
secret-files: |
env=/tmp/.env.prod
npmrc=/tmp/.npmrc
cache-from: |
type=registry,ref=git.unom.io/${{ gitea.repository }}/api-core:cache
# mode=min: export only the final stage's layers. mode=max
# was re-uploading the bun-install cache mount (~4060s) to
# the Gitea OCI registry on every push, even no-op deploys.
# Trade-off: a cold buildkitd will re-run `bun install` from
# scratch on the installer stage instead of importing it
# from registry cache — a few-second tax in exchange for
# not paying the export tax on every run.
cache-to: |
type=registry,ref=git.unom.io/${{ gitea.repository }}/api-core:cache,mode=min
# Inline replacement for docker/build-push-action. Same
# tags / secrets / cache shape as before; mode=min on
# cache-to to skip re-exporting the bun-install cache
# mount on every push (see the original comment for why).
env:
BUILDER: builder-${{ inputs.game-id }}-api-core
IMAGE: git.unom.io/${{ gitea.repository }}/api-core
SHA: ${{ gitea.sha }}
run: |
docker buildx build \
--builder "$BUILDER" \
--push \
--file ./api/core/Dockerfile \
--tag "$IMAGE:latest" \
--tag "$IMAGE:$SHA" \
--secret id=env,src=/tmp/.env.prod \
--secret id=npmrc,src=/tmp/.npmrc \
--cache-from "type=registry,ref=$IMAGE:cache" \
--cache-to "type=registry,ref=$IMAGE:cache,mode=min" \
.
- name: Tear down builder
if: always()
env:
BUILDER: builder-${{ inputs.game-id }}-api-core
run: |
docker buildx rm "$BUILDER" 2>/dev/null || true
deploy-api-core:
runs-on: ubuntu-24.04
@@ -165,14 +186,22 @@ jobs:
- uses: actions/checkout@v4.2.2
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3.10.0
with:
config-inline: |
# See the build-api-core step of the same name above for why
# this is inline `docker buildx` rather than docker/setup-buildx-action.
env:
BUILDER: builder-${{ inputs.game-id }}-web
run: |
cat > /tmp/buildkitd.toml <<'EOF'
[registry."docker.io"]
mirrors = ["192.168.1.52:5000"]
[registry."192.168.1.52:5000"]
http = true
insecure = true
EOF
docker buildx rm "$BUILDER" 2>/dev/null || true
docker buildx create --name "$BUILDER" --use --bootstrap \
--driver docker-container \
--config /tmp/buildkitd.toml
- name: Log in to Gitea registry
env:
@@ -190,22 +219,31 @@ jobs:
printenv NPMRC > /tmp/.npmrc
- name: Build & push web
uses: docker/build-push-action@v6.16.0
with:
context: .
file: ./apps/web/Dockerfile
push: true
tags: |
git.unom.io/${{ gitea.repository }}/web:latest
git.unom.io/${{ gitea.repository }}/web:${{ gitea.sha }}
secret-files: |
env=/tmp/.env.prod
npmrc=/tmp/.npmrc
cache-from: |
type=registry,ref=git.unom.io/${{ gitea.repository }}/web:cache
# See the api-core cache-to block above for the mode=min rationale.
cache-to: |
type=registry,ref=git.unom.io/${{ gitea.repository }}/web:cache,mode=min
# Inline equivalent of docker/build-push-action. See the
# api-core cache-to block above for the mode=min rationale.
env:
BUILDER: builder-${{ inputs.game-id }}-web
IMAGE: git.unom.io/${{ gitea.repository }}/web
SHA: ${{ gitea.sha }}
run: |
docker buildx build \
--builder "$BUILDER" \
--push \
--file ./apps/web/Dockerfile \
--tag "$IMAGE:latest" \
--tag "$IMAGE:$SHA" \
--secret id=env,src=/tmp/.env.prod \
--secret id=npmrc,src=/tmp/.npmrc \
--cache-from "type=registry,ref=$IMAGE:cache" \
--cache-to "type=registry,ref=$IMAGE:cache,mode=min" \
.
- name: Tear down builder
if: always()
env:
BUILDER: builder-${{ inputs.game-id }}-web
run: |
docker buildx rm "$BUILDER" 2>/dev/null || true
deploy-web:
runs-on: ubuntu-24.04