From f9b857aac2a8ff2512b6ac725857613f5ee1da31 Mon Sep 17 00:00:00 2001 From: enricobuehler Date: Sat, 13 Jun 2026 09:35:28 +0000 Subject: [PATCH] feat(capture): true SHM path (PUNKTFUNK_FORCE_SHM) for race-free Mutter+NVIDIA MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Empirically, Mutter+NVIDIA dmabuf capture has NO working GPU sync — confirmed on worker-3: explicit sync fails buffer alloc (EINVAL, no cogl sync_fd), and the dmabuf carries no implicit fence (EXPORT_SYNC_FILE waited=false). So any dmabuf read — zero-copy import OR mmap — races Mutter's render and flashes the buffer's previous frame. The prior "CPU fallback" still listed DmaBuf in its buffer types, so Mutter kept handing dmabufs and it never fixed anything (got worse). PUNKTFUNK_FORCE_SHM=1 offers MemPtr+MemFd ONLY (no DmaBuf), forcing Mutter to glReadPixels-download into mappable memory — which orders against its render, so the frame is complete + current by construction (race-free). Costs the download (~3 ms) + zero-copy; correct at 1080p/4K60. KWin/gamescope are unaffected (they blit into the buffer, no read-before-render race) and keep zero-copy. Co-Authored-By: Claude Opus 4.8 (1M context) --- crates/punktfunk-host/src/capture/linux.rs | 38 ++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/crates/punktfunk-host/src/capture/linux.rs b/crates/punktfunk-host/src/capture/linux.rs index abd7d7e..e2f1d0f 100644 --- a/crates/punktfunk-host/src/capture/linux.rs +++ b/crates/punktfunk-host/src/capture/linux.rs @@ -665,6 +665,29 @@ mod pipewire { }) } + /// Build a Buffers param for a TRUE SHM path: MemPtr + MemFd only, NO DmaBuf. Forces the + /// producer to download into mappable memory (Mutter's `glReadPixels`), which orders against its + /// render — so the frame is complete and current by construction. This is the only race-free + /// capture of Mutter's virtual monitor on NVIDIA: the compositor renders straight into the buffer + /// pool, NVIDIA attaches no implicit dmabuf fence (verified: `EXPORT_SYNC_FILE` waited=false) and + /// can't produce an explicit sync_fd, so any dmabuf read (zero-copy OR mmap) races the render and + /// flashes the buffer's previous frame. Excluding DmaBuf is what makes the difference vs. + /// `build_mappable_buffers` (which still let Mutter hand dmabufs). + fn build_shm_only_buffers() -> Result> { + serialize_pod(pw::spa::pod::Object { + type_: pw::spa::utils::SpaTypes::ObjectParamBuffers.as_raw(), + id: pw::spa::param::ParamType::Buffers.as_raw(), + properties: vec![pw::spa::pod::Property { + key: pw::spa::sys::SPA_PARAM_BUFFERS_dataType, + flags: pw::spa::pod::PropertyFlags::empty(), + value: pw::spa::pod::Value::Int( + (1i32 << pw::spa::sys::SPA_DATA_MemPtr) + | (1i32 << pw::spa::sys::SPA_DATA_MemFd), + ), + }], + }) + } + /// Build a Buffers param requesting dmabuf-only buffers. fn build_dmabuf_buffers() -> Result> { serialize_pod(pw::spa::pod::Object { @@ -736,8 +759,16 @@ mod pipewire { if importer.is_some() && !modifiers.contains(&0) { modifiers.push(0); // DRM_FORMAT_MOD_LINEAR } - let want_dmabuf = importer.is_some() && !modifiers.is_empty(); - if zerocopy && !want_dmabuf { + // PUNKTFUNK_FORCE_SHM=1 forces the race-free download path (SHM, no dmabuf) — required on + // Mutter+NVIDIA where dmabuf capture has no working sync and shows stale frames. KWin/ + // gamescope don't need it (they blit into the buffer, so no read-before-render race). + let force_shm = std::env::var("PUNKTFUNK_FORCE_SHM").as_deref() == Ok("1"); + let want_dmabuf = importer.is_some() && !modifiers.is_empty() && !force_shm; + if force_shm { + tracing::info!( + "capture: PUNKTFUNK_FORCE_SHM — race-free SHM download path (no dmabuf, no zero-copy)" + ); + } else if zerocopy && !want_dmabuf { tracing::warn!("zero-copy: no EGL-importable dmabuf modifiers — using CPU path"); } else if want_dmabuf { tracing::info!( @@ -1069,6 +1100,9 @@ mod pipewire { Some(build_dmabuf_format(&modifiers, preferred)?), Some(build_dmabuf_buffers()?), ) + } else if force_shm { + // True SHM: exclude DmaBuf so Mutter MUST download (glReadPixels orders against render). + (None, Some(build_shm_only_buffers()?)) } else { // CPU path still accepts mappable dmabufs (gamescope offers only those once its // modifier-bearing format pod wins the intersection).