Files
punktfunk/crates/punktfunk-host/src/drm_sync.rs
T
enricobuehler 92c6da9546
ci / web (push) Failing after 42s
apple / swift (push) Failing after 1m5s
ci / rust (push) Failing after 1m10s
ci / docs-site (push) Failing after 44s
docker / build-push (., web/Dockerfile, punktfunk-web) (push) Successful in 5s
docker / build-push (ci, ci/fedora-rpm.Dockerfile, punktfunk-fedora-rpm) (push) Successful in 5s
docker / build-push (ci, ci/rust-ci.Dockerfile, punktfunk-rust-ci) (push) Successful in 5s
docker / build-push (docs-site, docs-site/Dockerfile, punktfunk-docs) (push) Successful in 5s
deb / build-publish (push) Successful in 2m54s
docker / deploy-docs (push) Successful in 18s
rpm / build-publish (push) Successful in 5m13s
fix(capture/mutter): restore zero-copy + sync via dmabuf implicit fence
The previous attempt (8531135) dropped zero-copy on Mutter+NVIDIA for a sticky
CPU/SHM fallback that (a) still listed SPA_DATA_DmaBuf in its buffer types, so
Mutter kept handing dmabufs that got mmap-read UNsynced — making the flashing
worse, not better — and (b) hinged on producer explicit sync, which Mutter+NVIDIA
cannot do (`error alloc buffers` / no cogl sync_fd, confirmed in worker-3 logs).

Revert the capture restructure to the original zero-copy dmabuf path, and fix the
NVIDIA stale-frame race the RIGHT way for a producer that can't do explicit sync:
the consumer snapshots the dmabuf's implicit fence (DMA_BUF_IOCTL_EXPORT_SYNC_FILE)
and waits the producer's render before sampling (new dmabuf_fence module, ioctl
number unit-tested). Covers the GPU import and the CPU mmap read. Logs once whether
a render was actually in flight (waited=true → the driver fences and the race is
closed; false → no implicit fence, so we learn zero-copy still needs SHM here).

drm_sync (the explicit-sync primitive) is kept and verified but marked unused —
no targeted compositor produces a usable sync_fd today; ready to wire in when one
does. The Bug-2 input fix (held-key release on disconnect) from 8531135 is kept.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-13 09:28:17 +00:00

218 lines
7.5 KiB
Rust

//! Minimal DRM timeline-syncobj operations — the consumer side of PipeWire explicit sync
//! (`SPA_META_SyncTimeline`).
//!
//! RETAINED BUT CURRENTLY UNUSED: producer-driven explicit sync is the "right" fix, but no
//! compositor we target produces a usable sync_fd today — Mutter+NVIDIA fails buffer allocation
//! (`error alloc buffers`, no cogl sync_fd), KWin/gamescope blit so they don't race at all. We sync
//! zero-copy from the consumer side instead (see [`crate::dmabuf_fence`]). This module is kept,
//! verified (ioctl numbers + a live signal→wait round trip), ready to wire in the moment a producer
//! gains working `SPA_META_SyncTimeline`.
#![allow(dead_code)]
//!
//! Compositors that render directly into the PipeWire buffer pool (Mutter's virtual
//! monitors) hand buffers over at GPU-submit time; on drivers without implicit dmabuf
//! fencing (NVIDIA) reading immediately races the render and shows the buffer's
//! *previous* contents. With explicit sync the producer attaches a timeline syncobj:
//! wait the acquire point before touching the buffer, signal the release point when done.
//!
//! Syncobjs are DRM-core objects: any render node can import and wait them, so this
//! opens its own fd independent of the capture GPU path.
use anyhow::{bail, Result};
use std::os::fd::RawFd;
// drm.h ioctls on the 'd' (0x64) magic. _IOWR = dir(3)<<30 | size<<16 | 0x64<<8 | nr.
const fn iowr(nr: u32, size: usize) -> u64 {
(3u64 << 30) | ((size as u64) << 16) | (0x64u64 << 8) | nr as u64
}
#[repr(C)]
#[derive(Default)]
struct DrmSyncobjHandle {
handle: u32,
flags: u32,
fd: i32,
pad: u32,
}
#[repr(C)]
#[derive(Default)]
struct DrmSyncobjDestroy {
handle: u32,
pad: u32,
}
#[repr(C)]
#[derive(Default)]
struct DrmSyncobjTimelineWait {
handles: u64,
points: u64,
/// Absolute CLOCK_MONOTONIC deadline, nanoseconds.
timeout_nsec: i64,
count_handles: u32,
flags: u32,
first_signaled: u32,
pad: u32,
}
#[repr(C)]
#[derive(Default)]
struct DrmSyncobjTimelineArray {
handles: u64,
points: u64,
count_handles: u32,
flags: u32,
}
const DRM_IOCTL_SYNCOBJ_DESTROY: u64 = iowr(0xC0, std::mem::size_of::<DrmSyncobjDestroy>());
const DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE: u64 = iowr(0xC2, std::mem::size_of::<DrmSyncobjHandle>());
const DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT: u64 =
iowr(0xCA, std::mem::size_of::<DrmSyncobjTimelineWait>());
const DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL: u64 =
iowr(0xCD, std::mem::size_of::<DrmSyncobjTimelineArray>());
/// The producer's point may not be attached yet when the buffer reaches us.
const DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT: u32 = 1 << 1;
pub struct DrmSync {
fd: RawFd,
}
impl DrmSync {
pub fn open() -> Result<DrmSync> {
let path = c"/dev/dri/renderD128";
let fd = unsafe { libc::open(path.as_ptr(), libc::O_RDWR | libc::O_CLOEXEC) };
if fd < 0 {
bail!("open /dev/dri/renderD128 for syncobj ops: {}", errno());
}
Ok(DrmSync { fd })
}
/// Import a syncobj fd into a (temporary) handle on our device.
fn import(&self, syncobj_fd: RawFd) -> Result<u32> {
let mut req = DrmSyncobjHandle {
fd: syncobj_fd,
..Default::default()
};
let r = unsafe { libc::ioctl(self.fd, DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, &mut req) };
if r < 0 {
bail!("SYNCOBJ_FD_TO_HANDLE: {}", errno());
}
Ok(req.handle)
}
fn destroy(&self, handle: u32) {
let mut req = DrmSyncobjDestroy {
handle,
..Default::default()
};
unsafe { libc::ioctl(self.fd, DRM_IOCTL_SYNCOBJ_DESTROY, &mut req) };
}
/// Block until `point` on the producer's timeline is signaled (the buffer's contents
/// are ready), or `timeout_ms` passes.
pub fn wait_point(&self, syncobj_fd: RawFd, point: u64, timeout_ms: u64) -> Result<()> {
let handle = self.import(syncobj_fd)?;
let mut now = libc::timespec {
tv_sec: 0,
tv_nsec: 0,
};
unsafe { libc::clock_gettime(libc::CLOCK_MONOTONIC, &mut now) };
let deadline = now.tv_sec * 1_000_000_000 + now.tv_nsec + timeout_ms as i64 * 1_000_000;
let handles = [handle];
let points = [point];
let mut req = DrmSyncobjTimelineWait {
handles: handles.as_ptr() as u64,
points: points.as_ptr() as u64,
timeout_nsec: deadline,
count_handles: 1,
flags: DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
..Default::default()
};
let r = unsafe { libc::ioctl(self.fd, DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT, &mut req) };
let saved = errno();
self.destroy(handle);
if r < 0 {
bail!("SYNCOBJ_TIMELINE_WAIT(point {point}): {saved}");
}
Ok(())
}
/// Signal `point` on the consumer release timeline — the producer may reuse the
/// buffer. Must be called for every buffer that carried sync metadata, even when the
/// frame was skipped, or the producer stalls waiting for it.
pub fn signal_point(&self, syncobj_fd: RawFd, point: u64) -> Result<()> {
let handle = self.import(syncobj_fd)?;
let handles = [handle];
let points = [point];
let mut req = DrmSyncobjTimelineArray {
handles: handles.as_ptr() as u64,
points: points.as_ptr() as u64,
count_handles: 1,
flags: 0,
};
let r = unsafe { libc::ioctl(self.fd, DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL, &mut req) };
let saved = errno();
self.destroy(handle);
if r < 0 {
bail!("SYNCOBJ_TIMELINE_SIGNAL(point {point}): {saved}");
}
Ok(())
}
}
impl Drop for DrmSync {
fn drop(&mut self) {
unsafe { libc::close(self.fd) };
}
}
fn errno() -> std::io::Error {
std::io::Error::last_os_error()
}
// `DrmSync::open` must not panic the PipeWire thread; everything is Result-based and the
// caller degrades to unsynchronized capture (with a loud warning) when it fails.
#[cfg(test)]
mod tests {
use super::*;
/// The ioctl numbers must match drm.h exactly — computed, so lock them down.
#[test]
fn ioctl_numbers_match_drm_h() {
assert_eq!(DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, 0xC010_64C2);
assert_eq!(DRM_IOCTL_SYNCOBJ_DESTROY, 0xC008_64C0);
assert_eq!(DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT, 0xC028_64CA);
assert_eq!(DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL, 0xC018_64CD);
}
/// Round-trip against the real DRM device when one exists (CI containers skip).
#[test]
fn signal_then_wait_roundtrip() {
let Ok(sync) = DrmSync::open() else {
eprintln!("no render node — skipping");
return;
};
// Create a fresh syncobj (CREATE = 0xBF), export it, signal point 1, wait point 1.
#[repr(C)]
#[derive(Default)]
struct Create {
handle: u32,
flags: u32,
}
const CREATE: u64 = iowr(0xBF, std::mem::size_of::<Create>());
const HANDLE_TO_FD: u64 = iowr(0xC1, std::mem::size_of::<DrmSyncobjHandle>());
let mut c = Create::default();
assert!(unsafe { libc::ioctl(sync.fd, CREATE, &mut c) } >= 0);
let mut h = DrmSyncobjHandle {
handle: c.handle,
..Default::default()
};
assert!(unsafe { libc::ioctl(sync.fd, HANDLE_TO_FD, &mut h) } >= 0);
sync.signal_point(h.fd, 1).expect("signal");
sync.wait_point(h.fd, 1, 100).expect("wait after signal");
unsafe { libc::close(h.fd) };
sync.destroy(c.handle);
}
}