From 0c5f9a347a588d90dad340f71b44bac5389d8d82 Mon Sep 17 00:00:00 2001 From: enricobuehler Date: Mon, 25 May 2026 12:23:35 +0200 Subject: [PATCH] fix(ci): inline docker buildx to stop concurrent-build cache contention MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two failing runs across refaire (run 1709) and rememed (run 1624) both crash at the same step (`Set up Docker Buildx`) with a node error on dist/index.js line 1, AND both reference the same act cache dir: /root/.cache/act/6a647958c11e138a6cfcaf32d2b372bc8e0c97871d617bfb441d003d505b77cf act keys remote-action cache entries by repo URL alone — pinning to `@v3.10.0` doesn't help, every game that uses `docker/setup-buildx- action` lands in the same dir. When you push N games at once on home-runner-1, the act-runner does parallel `git clone` ops into that shared dir; the loser's pull aborts ("worktree contains unstaged changes") and leaves dist/ half-written, so the next job's `node dist/index.js` throws on line 1 → step fails. That's the entire flake. Fix: drop the two remote actions that were racing — setup-buildx-action and build-push-action — and replace them with inline `docker buildx create` + `docker buildx build --push` shell. Nothing is fetched from GitHub at runtime, no cache dir is shared, the failure mode disappears. Same image, same tags, same registry mirror, same cache-from/cache-to shape, same secret-files mount (`--secret id=...,src=...`). Each job gets a uniquely-named builder (`builder--`) and a teardown step so the runner host's docker state doesn't accumulate abandoned builders. Co-Authored-By: Claude Opus 4.7 (1M context) --- .gitea/workflows/build-deploy-game.yml | 158 +++++++++++++++---------- 1 file changed, 98 insertions(+), 60 deletions(-) diff --git a/.gitea/workflows/build-deploy-game.yml b/.gitea/workflows/build-deploy-game.yml index 15a86da..375af7e 100644 --- a/.gitea/workflows/build-deploy-game.yml +++ b/.gitea/workflows/build-deploy-game.yml @@ -20,12 +20,17 @@ name: Build & Deploy played game (reusable) # - STEP_CA_PROVISIONER_PASSWORD — for the cert-init container # # Notes on reliability: -# - All remote actions are pinned to immutable patch tags so the act-runner -# action cache hash is stable run-to-run. The cluster of "Cannot find -# module .../dist/index.js" failures on home-runner-1 was act re-using a -# partial cache dir for a moving tag (`@v3`); pinning kills that mode. -# - Registry login is an inline shell step instead of docker/login-action. -# One fewer remote-action download = one fewer failure point per job. +# - act keys its remote-action cache by repo URL alone (not by full +# ref), so every concurrent game build on home-runner-1 shares the +# same /root/.cache/act/ dir for setup-buildx-action + +# build-push-action. Two builds racing on that dir corrupt the +# checked-out tree ("worktree contains unstaged changes") and the +# next read of dist/index.js throws → step exits 1. Pinning to +# patch tags didn't help because the cache key ignores the ref. +# Fix: do buildx setup + build via inline `docker buildx ...` +# shell, so nothing needs to be cloned from GitHub at runtime. +# - Registry login is also an inline shell step. One fewer remote- +# action download = one fewer failure point per job. on: workflow_call: @@ -42,14 +47,25 @@ jobs: - uses: actions/checkout@v4.2.2 - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3.10.0 - with: - config-inline: | - [registry."docker.io"] - mirrors = ["192.168.1.52:5000"] - [registry."192.168.1.52:5000"] - http = true - insecure = true + # Inline replacement for docker/setup-buildx-action. The job + # name (BUILDER) is unique-per-game-per-job so concurrent + # builds across games don't fight over a shared `default` + # builder either. Stop+rm in case a previous run on this + # runner left one behind. + env: + BUILDER: builder-${{ inputs.game-id }}-api-core + run: | + cat > /tmp/buildkitd.toml <<'EOF' + [registry."docker.io"] + mirrors = ["192.168.1.52:5000"] + [registry."192.168.1.52:5000"] + http = true + insecure = true + EOF + docker buildx rm "$BUILDER" 2>/dev/null || true + docker buildx create --name "$BUILDER" --use --bootstrap \ + --driver docker-container \ + --config /tmp/buildkitd.toml - name: Log in to Gitea registry env: @@ -67,28 +83,33 @@ jobs: printenv NPMRC > /tmp/.npmrc - name: Build & push api-core - uses: docker/build-push-action@v6.16.0 - with: - context: . - file: ./api/core/Dockerfile - push: true - tags: | - git.unom.io/${{ gitea.repository }}/api-core:latest - git.unom.io/${{ gitea.repository }}/api-core:${{ gitea.sha }} - secret-files: | - env=/tmp/.env.prod - npmrc=/tmp/.npmrc - cache-from: | - type=registry,ref=git.unom.io/${{ gitea.repository }}/api-core:cache - # mode=min: export only the final stage's layers. mode=max - # was re-uploading the bun-install cache mount (~40–60s) to - # the Gitea OCI registry on every push, even no-op deploys. - # Trade-off: a cold buildkitd will re-run `bun install` from - # scratch on the installer stage instead of importing it - # from registry cache — a few-second tax in exchange for - # not paying the export tax on every run. - cache-to: | - type=registry,ref=git.unom.io/${{ gitea.repository }}/api-core:cache,mode=min + # Inline replacement for docker/build-push-action. Same + # tags / secrets / cache shape as before; mode=min on + # cache-to to skip re-exporting the bun-install cache + # mount on every push (see the original comment for why). + env: + BUILDER: builder-${{ inputs.game-id }}-api-core + IMAGE: git.unom.io/${{ gitea.repository }}/api-core + SHA: ${{ gitea.sha }} + run: | + docker buildx build \ + --builder "$BUILDER" \ + --push \ + --file ./api/core/Dockerfile \ + --tag "$IMAGE:latest" \ + --tag "$IMAGE:$SHA" \ + --secret id=env,src=/tmp/.env.prod \ + --secret id=npmrc,src=/tmp/.npmrc \ + --cache-from "type=registry,ref=$IMAGE:cache" \ + --cache-to "type=registry,ref=$IMAGE:cache,mode=min" \ + . + + - name: Tear down builder + if: always() + env: + BUILDER: builder-${{ inputs.game-id }}-api-core + run: | + docker buildx rm "$BUILDER" 2>/dev/null || true deploy-api-core: runs-on: ubuntu-24.04 @@ -165,14 +186,22 @@ jobs: - uses: actions/checkout@v4.2.2 - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3.10.0 - with: - config-inline: | - [registry."docker.io"] - mirrors = ["192.168.1.52:5000"] - [registry."192.168.1.52:5000"] - http = true - insecure = true + # See the build-api-core step of the same name above for why + # this is inline `docker buildx` rather than docker/setup-buildx-action. + env: + BUILDER: builder-${{ inputs.game-id }}-web + run: | + cat > /tmp/buildkitd.toml <<'EOF' + [registry."docker.io"] + mirrors = ["192.168.1.52:5000"] + [registry."192.168.1.52:5000"] + http = true + insecure = true + EOF + docker buildx rm "$BUILDER" 2>/dev/null || true + docker buildx create --name "$BUILDER" --use --bootstrap \ + --driver docker-container \ + --config /tmp/buildkitd.toml - name: Log in to Gitea registry env: @@ -190,22 +219,31 @@ jobs: printenv NPMRC > /tmp/.npmrc - name: Build & push web - uses: docker/build-push-action@v6.16.0 - with: - context: . - file: ./apps/web/Dockerfile - push: true - tags: | - git.unom.io/${{ gitea.repository }}/web:latest - git.unom.io/${{ gitea.repository }}/web:${{ gitea.sha }} - secret-files: | - env=/tmp/.env.prod - npmrc=/tmp/.npmrc - cache-from: | - type=registry,ref=git.unom.io/${{ gitea.repository }}/web:cache - # See the api-core cache-to block above for the mode=min rationale. - cache-to: | - type=registry,ref=git.unom.io/${{ gitea.repository }}/web:cache,mode=min + # Inline equivalent of docker/build-push-action. See the + # api-core cache-to block above for the mode=min rationale. + env: + BUILDER: builder-${{ inputs.game-id }}-web + IMAGE: git.unom.io/${{ gitea.repository }}/web + SHA: ${{ gitea.sha }} + run: | + docker buildx build \ + --builder "$BUILDER" \ + --push \ + --file ./apps/web/Dockerfile \ + --tag "$IMAGE:latest" \ + --tag "$IMAGE:$SHA" \ + --secret id=env,src=/tmp/.env.prod \ + --secret id=npmrc,src=/tmp/.npmrc \ + --cache-from "type=registry,ref=$IMAGE:cache" \ + --cache-to "type=registry,ref=$IMAGE:cache,mode=min" \ + . + + - name: Tear down builder + if: always() + env: + BUILDER: builder-${{ inputs.game-id }}-web + run: | + docker buildx rm "$BUILDER" 2>/dev/null || true deploy-web: runs-on: ubuntu-24.04