From ba68a98873290efd3eafa320a247d8d689474a2b Mon Sep 17 00:00:00 2001 From: enricobuehler Date: Fri, 26 Jun 2026 09:00:30 +0000 Subject: [PATCH] docs(host): prove every unsafe block in the Linux FFI files + gate them (unsafe-proof program 2/N) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Continues the structural unsafe-proof program (every unsafe carries a documented proof of soundness; the file gains #![deny(clippy::undocumented_unsafe_blocks)] so it stays proven). This batch covers all 10 remaining pure-Linux files (104 blocks), each proof stating the REAL invariant — not boilerplate: zerocopy/cuda.rs (26) leaked process-lifetime libcuda fn-ptr table; opaque CUcontext never dereferenced; free-exactly-once via the Arc> ownership graph; dmabuf fd take/close split zerocopy/egl.rs (18) eglGetProcAddress'd procs with the GL context current; EGLImage liveness; the two-call modifier-query bounds zerocopy/vulkan.rs (4) copy-bounds arithmetic (src_size>=span); Send = thread confinement to the punktfunk-pipewire thread dmabuf_fence.rs (4) poll/ioctl/close fd liveness + ownership capture/linux/mod.rs (16) spa_data repr(transparent) cast; null-checked spa derefs; single-loop-thread buffer ownership until requeue inject/linux/gamepad.rs (10) uinput ioctl request-number ↔ struct-size match (static-asserted); InputEventRaw no-padding for the byte cast encode/linux/vaapi.rs (15) + encode/linux/mod.rs (9) ffmpeg object ownership/ free ladders; VAAPI/DRM graph; Send = single-thread transfer inject/linux/wlr.rs (2), vdisplay/linux/kwin.rs (1) No memory-unsafety SUSPECT blocks were found — the unsafe is sound. The vaapi agent did flag two real AVBufferRef *leaks* (not UB) in DmabufInner::open; marked inline with NOTE(leak) and addressed in a follow-up. Verified: cargo clippy -p punktfunk-host --all-targets -- -D warnings is clean (each file's deny gate hard-errors on any undocumented block). Co-Authored-By: Claude Opus 4.8 (1M context) --- .../punktfunk-host/src/capture/linux/mod.rs | 77 ++++++++++ crates/punktfunk-host/src/encode/linux/mod.rs | 53 +++++++ .../punktfunk-host/src/encode/linux/vaapi.rs | 134 +++++++++++++++++ .../src/inject/linux/gamepad.rs | 43 ++++++ crates/punktfunk-host/src/inject/linux/wlr.rs | 10 ++ .../punktfunk-host/src/linux/dmabuf_fence.rs | 18 +++ .../punktfunk-host/src/linux/zerocopy/cuda.rs | 140 +++++++++++++++++- .../punktfunk-host/src/linux/zerocopy/egl.rs | 113 +++++++++++++- .../src/linux/zerocopy/vulkan.rs | 42 +++++- .../punktfunk-host/src/vdisplay/linux/kwin.rs | 7 + 10 files changed, 631 insertions(+), 6 deletions(-) diff --git a/crates/punktfunk-host/src/capture/linux/mod.rs b/crates/punktfunk-host/src/capture/linux/mod.rs index 276652e..b420119 100644 --- a/crates/punktfunk-host/src/capture/linux/mod.rs +++ b/crates/punktfunk-host/src/capture/linux/mod.rs @@ -17,6 +17,9 @@ //! instead of leaking it to process exit. The portal thread (when used) still parks on its zbus //! connection until process exit. +// Every `unsafe` block in this file carries a `// SAFETY:` proof; enforce it (unsafe-proof program). +#![deny(clippy::undocumented_unsafe_blocks)] + use super::{CapturedFrame, Capturer, DmabufFrame, FramePayload, PixelFormat}; use anyhow::{anyhow, Context, Result}; use std::os::fd::OwnedFd; @@ -498,6 +501,12 @@ mod pipewire { impl DmabufMap { fn new(fd: i32, len: usize) -> Option { + // SAFETY: a null `addr` lets the kernel choose the mapping address; `fd` is a caller-owned + // dmabuf/MemFd fd, valid for the duration of this call, and `len` is the requested map length. + // `mmap` reads no Rust memory — it installs a fresh PROT_READ/MAP_SHARED page mapping and + // returns its base (or MAP_FAILED, checked below before `DmabufMap` adopts it). The returned + // region is a brand-new VMA, so it aliases no live Rust object, and it keeps the underlying + // object mapped independently of `fd` (which may be closed after this returns). let ptr = unsafe { libc::mmap( std::ptr::null_mut(), @@ -514,6 +523,11 @@ mod pipewire { impl Drop for DmabufMap { fn drop(&mut self) { + // SAFETY: `self.ptr`/`self.len` are exactly the base+length of a successful `mmap` in + // `DmabufMap::new` (constructed only when `ptr != MAP_FAILED`). This `DmabufMap` uniquely owns + // that mapping and `drop` runs once, so `munmap` releases a live mapping exactly once — no + // double-unmap. Every `&[u8]` derived from the mapping is bounded by this `DmabufMap`'s + // lifetime, so no borrow outlives the unmap. unsafe { libc::munmap(self.ptr, self.len); } @@ -719,6 +733,14 @@ mod pipewire { if !ud.active.load(Ordering::Relaxed) { return; } + // SAFETY: `spa_buf` is the `*mut spa_buffer` of the PipeWire buffer we dequeued and still hold for + // this `.process` callback (not requeued until after `consume_frame` returns), so it is live. The + // block null-checks `spa_buf`, requires `n_datas != 0`, and null-checks the `datas` array pointer + // before forming any slice. `(*spa_buf).datas` points to `n_datas` libspa `spa_data` structs, and + // `pw::spa::buffer::Data` is `#[repr(transparent)]` over `spa_data` (the same cast + // `Buffer::datas_mut` performs — see the function doc), so the pointer cast + length describe + // exactly that array, in bounds. The PipeWire loop is single-threaded and owns the buffer here, so + // this `&mut` slice is the only reference to it (no aliasing/data race). let datas: &mut [pw::spa::buffer::Data] = unsafe { if spa_buf.is_null() || (*spa_buf).n_datas == 0 || (*spa_buf).datas.is_null() { &mut [] @@ -783,6 +805,10 @@ mod pipewire { // dup the fd so it survives the SPA buffer recycle — the encode thread // imports it. (Content stability across the brief map+CSC window relies on // the compositor's buffer-pool depth, like any zero-copy capture.) + // SAFETY: `datas[0].fd()` is the dmabuf fd owned by the live PipeWire buffer (valid + // for this callback). `fcntl(fd, F_DUPFD_CLOEXEC, 0)` reads only the integer fd, + // touches no Rust memory, and returns a fresh independent CLOEXEC duplicate (or -1). + // The original stays owned by PipeWire; the dup is a new fd we own (checked >= 0). let dup = unsafe { libc::fcntl(datas[0].fd() as i32, libc::F_DUPFD_CLOEXEC, 0) }; if dup >= 0 { @@ -796,6 +822,10 @@ mod pipewire { pts_ns, format: fmt, payload: FramePayload::Dmabuf(DmabufFrame { + // SAFETY: `dup` is the fresh fd `fcntl(F_DUPFD_CLOEXEC)` just returned + // (checked `dup >= 0`); nothing else owns it, so `OwnedFd` takes sole + // ownership and closes it exactly once on drop — no alias, no + // double-close. fd: unsafe { OwnedFd::from_raw_fd(dup) }, fourcc, modifier: ud.modifier, @@ -930,6 +960,11 @@ mod pipewire { // cleanly if the real buffer is genuinely too small. MemPtr buffers (no fd) are same-process — // trust `d.data()`. let fd_len = if raw_fd > 0 { + // SAFETY: `libc::stat` is a C plain-old-data struct for which all-zero is a valid value, so + // `mem::zeroed()` is a sound initializer. `raw_fd` is the buffer's fd (`> 0` checked here) and + // valid for this callback; `fstat` writes metadata into `&mut st`, a live, aligned, + // correctly-sized stack `stat` that outlives the synchronous call. `st.st_size` is read only + // after the return value is confirmed `== 0`. `st` is a fresh local, so nothing aliases it. unsafe { let mut st: libc::stat = std::mem::zeroed(); (libc::fstat(raw_fd as i32, &mut st) == 0 && st.st_size > 0) @@ -946,6 +981,14 @@ mod pipewire { match DmabufMap::new(raw_fd as i32, map_len) { Some(m) => { _mapping = m; + // SAFETY: `_mapping` is the `DmabufMap` just stored; its `ptr`/`len` come from a + // successful `mmap` of `map_len` PROT_READ bytes, so `ptr` is non-null, page-aligned, + // and the VMA is one allocated object of `len` bytes valid for reads. In the common + // path `map_len == fd_len` (the fd's real size from `fstat`), so the mapping spans the + // whole object; the de-pad copy below is further bounded by the `offset <= buf.len()` + // and `needed > avail` guards. The `&[u8]` borrows `_mapping`, which lives to the end + // of `consume_frame`, so the slice never outlives the mapping, and the memory is only + // read here, so there is no aliasing/mutation. Some(unsafe { std::slice::from_raw_parts(_mapping.ptr as *const u8, _mapping.len) }) @@ -1177,24 +1220,43 @@ mod pipewire { // Latest-frame-only (OBS pattern): Mutter delivers buffers in bursts and // recycles its pool; an older queued buffer carries a STALE frame. Drain all // queued buffers, requeue the older ones, keep only the newest. + // SAFETY: `stream` is the live stream PipeWire passes into this `.process` callback on + // the loop thread, where `pw_stream_dequeue_buffer` is the documented call. It returns + // a `*mut pw_buffer` owned by the stream (or null when the queue is drained), + // null-checked before any use. The loop is single-threaded, so no concurrent access. let mut newest = unsafe { stream.dequeue_raw_buffer() }; if newest.is_null() { return; } let mut drained = 1u32; loop { + // SAFETY: same stream/loop-thread contract as the dequeue above; each call returns + // the next stream-owned `*mut pw_buffer` or null (null-checked before use). let next = unsafe { stream.dequeue_raw_buffer() }; if next.is_null() { break; } + // SAFETY: `newest` is a non-null `*mut pw_buffer` previously dequeued from this same + // stream and not yet requeued; `pw_stream_queue_buffer` hands ownership back to the + // stream. We immediately overwrite `newest = next`, so the requeued pointer is never + // touched again (no use-after-requeue). Loop thread, single-threaded. unsafe { stream.queue_raw_buffer(newest) }; newest = next; drained += 1; } + // SAFETY: `newest` is the non-null buffer we still own (dequeued, not requeued); + // `.buffer` is a `*mut spa_buffer` field libpipewire populated. This is a single field + // load through a valid pointer — no mutation or aliasing. let spa_buf = unsafe { (*newest).buffer }; // Inspect the newest buffer's header + first chunk for the diagnostic and the // CORRUPTED skip. SPA_META_Header is optional — `hdr` may be null. + // SAFETY: `spa_buf` is the `*mut spa_buffer` of the buffer we still hold. + // `spa_buffer_find_meta_data` scans that buffer's metadata array for a `SPA_META_Header` + // of at least `size_of::()` bytes and returns a pointer into the held + // buffer's metadata (or null). The size argument matches the struct the result is cast + // to, and the pointer stays valid as long as the buffer is held (until requeue). Null is + // handled below. let hdr = unsafe { spa::sys::spa_buffer_find_meta_data( spa_buf, @@ -1205,11 +1267,20 @@ mod pipewire { let hdr_flags = if hdr.is_null() { 0u32 } else { + // SAFETY: reached only when `hdr` is non-null; it points to a `spa_meta_header` + // inside the live buffer's metadata (returned for a size >= + // `size_of::()`, so `.flags` is in bounds). A single field read + // while the buffer is still held. unsafe { (*hdr).flags } }; // First data chunk's size + flags (used for the diagnostic + CORRUPTED check) // and its data type (a dmabuf legitimately reports chunk size 0, so the size-0 // stale skip only applies to mappable SHM buffers). + // SAFETY: every dereference is guarded in order before any field read — `spa_buf` + // non-null, `n_datas > 0`, the `datas` (`*mut spa_data`) array non-null, and the first + // element's `chunk` (`*mut spa_chunk`) non-null. `d0` is that first `spa_data` and `c` + // its chunk; reading `(*d0).type_`, `(*c).size`, `(*c).flags` are in-bounds field loads + // of libspa structs inside the buffer we still hold. Single-threaded loop, no mutation. let (chunk_size, chunk_flags, is_dmabuf) = unsafe { if !spa_buf.is_null() && (*spa_buf).n_datas > 0 @@ -1246,11 +1317,17 @@ mod pipewire { "capture: skipped a stale CORRUPTED/cursor buffer (GNOME)" ); } + // SAFETY: `newest` is the non-null buffer we own (dequeued, never requeued on this + // skip path); hand it back to the stream exactly once and return without touching it + // again. Loop thread inside `.process`. unsafe { stream.queue_raw_buffer(newest) }; return; } consume_frame(ud, spa_buf); + // SAFETY: `consume_frame` has finished reading `spa_buf` (and the `datas` borrows derived + // from `newest`), so requeuing the owned `newest` exactly once here is sound — no + // use-after-requeue. Loop thread inside `.process`. unsafe { stream.queue_raw_buffer(newest) }; })); if outcome.is_err() { diff --git a/crates/punktfunk-host/src/encode/linux/mod.rs b/crates/punktfunk-host/src/encode/linux/mod.rs index d9adede..ddd0fb3 100644 --- a/crates/punktfunk-host/src/encode/linux/mod.rs +++ b/crates/punktfunk-host/src/encode/linux/mod.rs @@ -8,6 +8,8 @@ //! does *not* accept — we expand it to `rgb0` (one padding byte/pixel, no colour math). //! The encoder is opened *without* a global header so VPS/SPS/PPS are emitted in-band on //! every IDR — the output is both a playable raw Annex-B stream and self-contained AUs. +// Every `unsafe` block in this file carries a `// SAFETY:` proof; enforce it (unsafe-proof program). +#![deny(clippy::undocumented_unsafe_blocks)] use super::{Codec, EncodedFrame, Encoder}; use crate::capture::{CapturedFrame, FramePayload, PixelFormat}; @@ -79,6 +81,12 @@ impl CudaHw { impl Drop for CudaHw { fn drop(&mut self) { + // SAFETY: `frames_ref`/`device_ref` are the two non-null `AVBufferRef`s `CudaHw::new` created + // (it bails before returning `Self` if either alloc fails, so a live `CudaHw` always holds + // both). `av_buffer_unref` drops one reference and nulls the pointer through the `&mut`. This + // `Drop` runs exactly once and `CudaHw` owns these refs exclusively → no double-free / + // use-after-free. Frames are unref'd before the device (the frames ctx internally refs the + // device; refcounted, so the order is sound regardless). unsafe { ffi::av_buffer_unref(&mut self.frames_ref); ffi::av_buffer_unref(&mut self.device_ref); @@ -136,6 +144,13 @@ pub struct NvencEncoder { // `CudaHw` holds raw `AVBufferRef`s; the encoder lives on a single thread. The CPU encoder is // already `Send` via ffmpeg-next; assert it for the CUDA fields too. +// SAFETY: `NvencEncoder` owns an ffmpeg-next `Encoder`/`VideoFrame` (already `Send`) plus a `CudaHw` +// holding raw `AVBufferRef`s, which are not `Send` by default. The encoder is owned and driven by +// exactly ONE thread — the per-session encode thread it is moved to — and is only touched through +// `&mut self` methods, so it is never aliased or accessed concurrently. The wrapped libav contexts +// (and the shared `CUcontext` the `CudaHw` references) have no thread affinity, so transferring +// ownership across threads is sound. This asserts `Send` (transfer) only, extending ffmpeg-next's +// existing `Send` to the raw CUDA fields; `Sync` (shared `&`) is deliberately NOT implemented. unsafe impl Send for NvencEncoder {} impl NvencEncoder { @@ -162,6 +177,9 @@ impl NvencEncoder { } ffmpeg::init().context("ffmpeg init")?; if std::env::var_os("PUNKTFUNK_FFMPEG_DEBUG").is_some() { + // SAFETY: `av_log_set_level` sets libav's global integer log level; `48` (= AV_LOG_DEBUG) + // is a valid level with no pointer args, and libav was just initialized by `ffmpeg::init()` + // above — always sound. unsafe { ffi::av_log_set_level(48) }; // AV_LOG_DEBUG — surface NVENC hw-frame rejects } let name = codec.nvenc_name(); @@ -195,6 +213,11 @@ impl NvencEncoder { .unwrap_or(1.0); let vbv_bits = ((bitrate_bps as f64 / fps.max(1) as f64) * vbv_frames as f64) .clamp(1.0, i32::MAX as f64); + // SAFETY: `video` is the ffmpeg-next encoder builder wrapping a freshly-allocated + // `AVCodecContext` that we hold by value and have not opened yet; `video.as_mut_ptr()` returns + // that non-null, properly-aligned, exclusively-owned context. Writing the plain `rc_buffer_size` + // int field before `open_with` is the supported way to set a field ffmpeg-next exposes no + // setter for. Sole owner → no aliasing; synchronous in-bounds scalar write. unsafe { (*video.as_mut_ptr()).rc_buffer_size = vbv_bits as i32; } @@ -204,6 +227,9 @@ impl NvencEncoder { // "freeze". NVENC emits one IDR at stream start, then P-frames only; `forced-idr` (below) // turns a client recovery request (RFI, via `request_keyframe`) into an IDR on demand. // This is the Moonlight/Sunshine low-latency model. + // SAFETY: same `video` builder as above — a non-null, properly-aligned, sole-owned, not-yet- + // opened `AVCodecContext`. We write the plain `gop_size` int field (= -1, infinite GOP) before + // `open_with`, which ffmpeg-next has no setter for. No aliasing; synchronous scalar write. unsafe { (*video.as_mut_ptr()).gop_size = -1; } @@ -214,6 +240,10 @@ impl NvencEncoder { // RGB-input paths leave these unset (NVENC's internal CSC writes its own VUI). Matches the // Windows NV12 path's BT.709 limited-range signalling. if matches!(format, PixelFormat::Nv12) { + // SAFETY: same `video` builder — `raw = video.as_mut_ptr()` is the non-null, properly- + // aligned, sole-owned, not-yet-opened `AVCodecContext`. We set its four VUI colour enum + // fields to valid `AVColorSpace`/`AVColorRange`/`AVColorPrimaries`/`AVColorTransfer- + // Characteristic` variants before `open_with`. Sole owner → no aliasing; synchronous writes. unsafe { let raw = video.as_mut_ptr(); (*raw).colorspace = ffi::AVColorSpace::AVCOL_SPC_BT709; @@ -228,7 +258,17 @@ impl NvencEncoder { // *before* open (NVENC derives the device from `hw_frames_ctx`). let cuda_hw = if cuda { let cu_ctx = crate::zerocopy::cuda::context().context("shared CUDA context")?; + // SAFETY: `CudaHw::new` (an `unsafe fn`) requires libav initialized (the `ffmpeg::init()` + // above ran) and a valid `CUcontext`; `cu_ctx` is the shared importer context from + // `zerocopy::cuda::context()?`, non-null on the `Ok` path. `nvenc_pixel` is a valid `Pixel` + // and `width`/`height` are the validated positive dims. It returns a RAII `CudaHw` wrapping + // (not owning) `cu_ctx` and owning two `AVBufferRef`s freed on drop. let hw = unsafe { CudaHw::new(cu_ctx, nvenc_pixel, width, height)? }; + // SAFETY: `raw = video.as_mut_ptr()` is the non-null, sole-owned, not-yet-opened + // `AVCodecContext`. We set `pix_fmt = CUDA` and attach NEW refs (`av_buffer_ref`) of + // `hw.device_ref`/`hw.frames_ref` — both non-null (`CudaHw::new` guarantees) and from the + // live `hw`, which is moved into `NvencEncoder.cuda` next to `enc` and so outlives the + // encoder. The context owns its own refs (freed when the context closes). No aliasing. unsafe { let raw = video.as_mut_ptr(); (*raw).pix_fmt = ffi::AVPixelFormat::AV_PIX_FMT_CUDA; @@ -428,6 +468,19 @@ impl NvencEncoder { // The device→device copy below uses our shared context directly; make it current on the // encode thread (ffmpeg pushes its own around the pool alloc, so order is fine). crate::zerocopy::cuda::make_current().context("CUDA context current (encode thread)")?; + // SAFETY: `frames_ref` is the non-null CUDA frames ctx from `self.cuda` (unwrapped via + // `.context(..)?` above), and the shared CUDA context was just made current on THIS thread + // (`make_current()?`), the precondition for the device-pointer copies below. + // * `av_frame_alloc` → `f` (null-checked). `av_hwframe_get_buffer(frames_ref, f, 0)` fills `f` + // with a pooled CUDA surface (sets `data[]`/`linesize[]`/`buf[0]`/`hw_frames_ctx`); on + // failure we free `f` and bail. + // * For NV12 we read `(*f).data[0..2]` / `linesize[0..2]` (Y + interleaved UV), else + // `data[0]`/`linesize[0]` — in-struct fields of the non-null `f`, valid for the surface dims + // ffmpeg allocated — and pass them to the cuda copy helpers, which device→device copy `buf` + // (the imported `DeviceBuffer`, owned by the caller and live for this call) into the surface. + // * On copy error we free `f` and return. Otherwise we write `pts`/`pict_type` through `f` and + // `avcodec_send_frame` it into the live owned `self.enc` context (which takes its own ref of + // the pooled surface), then free our `f` ref exactly once. Single-threaded encoder → no race. unsafe { let mut f = ffi::av_frame_alloc(); if f.is_null() { diff --git a/crates/punktfunk-host/src/encode/linux/vaapi.rs b/crates/punktfunk-host/src/encode/linux/vaapi.rs index 837d6eb..1a0c14b 100644 --- a/crates/punktfunk-host/src/encode/linux/vaapi.rs +++ b/crates/punktfunk-host/src/encode/linux/vaapi.rs @@ -19,6 +19,8 @@ //! hwdevice/hwframes/buffersrc/buffersink calls go through `ffmpeg::ffi` (= `ffmpeg_sys_next`), //! as the CUDA encode path and the clients' decode paths already do. The encoder is opened //! *without* a global header, so VPS/SPS/PPS are in-band on every IDR. +// Every `unsafe` block in this file carries a `// SAFETY:` proof; enforce it (unsafe-proof program). +#![deny(clippy::undocumented_unsafe_blocks)] use super::{Codec, EncodedFrame, Encoder}; use crate::capture::{CapturedFrame, DmabufFrame, FramePayload, PixelFormat}; @@ -133,6 +135,14 @@ pub fn probe_can_encode(codec: Codec) -> bool { if ffmpeg::init().is_err() { return false; } + // SAFETY: `ffmpeg::init()` returned Ok above, so libav is initialized. `av_log_get_level`/ + // `av_log_set_level` only read/write libav's global integer log level (no pointer args) and are + // always sound to call post-init. `VaapiHw::new` (an `unsafe fn`) builds a VAAPI device + NV12 + // frames pool from the literal NV12/640x480/pool=2 args and hands back a RAII handle that unrefs + // both `AVBufferRef`s on drop. `open_vaapi_encoder` (an `unsafe fn`) borrows `hw.device_ref`/ + // `hw.frames_ref` — the two non-null refs `VaapiHw::new` just created — and `av_buffer_ref`s them + // into the encoder; `hw` is a live local for the whole match arm, so the borrows outlive the + // synchronous call, and both `hw` and the probe encoder are dropped (RAII) when the arm ends. unsafe { // A missing VA device (non-VAAPI host, GPU-less CI) is an expected probe outcome — quiet // ffmpeg's "No VA display found" error for the probe, then restore the level. @@ -224,6 +234,12 @@ impl VaapiHw { impl Drop for VaapiHw { fn drop(&mut self) { + // SAFETY: `frames_ref`/`device_ref` are the two non-null `AVBufferRef`s `VaapiHw::new` + // created (it bails before constructing `Self` if either alloc fails, so a live `VaapiHw` + // always holds both). `av_buffer_unref` drops one reference and nulls the pointer through the + // `&mut`. This `Drop` runs exactly once and `VaapiHw` owns these refs exclusively, so there + // is no double-free / use-after-free. Frames are unref'd before the device because the frames + // ctx internally holds a ref on the device (refcounted, so the order is sound either way). unsafe { ffi::av_buffer_unref(&mut self.frames_ref); ffi::av_buffer_unref(&mut self.device_ref); @@ -252,7 +268,16 @@ impl CpuInner { ) -> Result { let src_pixel = vaapi_sws_src(format)?; const POOL: c_int = 16; + // SAFETY: `VaapiHw::new` (an `unsafe fn`) requires libav initialized — guaranteed because the + // only path here is `VaapiEncoder::open` → `ensure_inner` → `CpuInner::open`, and `open` ran + // `ffmpeg::init()`. The args are valid: NV12 sw_format, the validated positive `width`/`height`, + // pool=16. It returns a RAII `VaapiHw` that unrefs its two `AVBufferRef`s on drop. let hw = unsafe { VaapiHw::new(ffi::AVPixelFormat::AV_PIX_FMT_NV12, width, height, POOL)? }; + // SAFETY: `open_vaapi_encoder` (an `unsafe fn`) borrows `hw.device_ref`/`hw.frames_ref` — both + // non-null (`VaapiHw::new` guarantees it) and from the `hw` just built above, which is a live + // local that outlives this synchronous call. The fn `av_buffer_ref`s them into the encoder, so + // the encoder holds its own references; `hw` is also moved into the returned `CpuInner` next to + // `enc`, keeping the device/frames alive for the encoder's whole lifetime. let enc = unsafe { open_vaapi_encoder( codec, @@ -266,6 +291,12 @@ impl CpuInner { }; // swscale RGB→NV12, BT.709 limited (matches the VUI), no rescale. let src_av = pixel_to_av(src_pixel); + // SAFETY: `sws_getContext` allocates a swscale context for the given src/dst dimensions and + // pixel formats. All four dims are the encoder's positive `width`/`height` cast to `c_int`; + // `src_av` is a valid `AVPixelFormat` (from `pixel_to_av` of the `vaapi_sws_src`-validated + // `src_pixel`), the dst is NV12. The three trailing pointers (srcFilter, dstFilter, param) are + // explicitly null = "use defaults", which the API documents as accepted. No Rust memory is + // borrowed — only by-value ints/enums — and the returned pointer is null-checked just below. let sws = unsafe { ffi::sws_getContext( width as c_int, @@ -283,10 +314,23 @@ impl CpuInner { if sws.is_null() { bail!("sws_getContext(RGB→NV12) failed"); } + // SAFETY: `sws` is the non-null `SwsContext` from `sws_getContext` above (the `is_null()` + // check immediately preceding returned false). `sws_getCoefficients(SWS_CS_ITU709)` returns a + // pointer into a libswscale static const coefficient table valid for the whole process, reused + // here for both the inverse (src) and forward (dst) matrices. `sws_setColorspaceDetails` only + // reads those tables and writes scalar CSC settings into `sws`; the table pointer outlives the + // synchronous call and no Rust memory is passed. unsafe { let cs709 = ffi::sws_getCoefficients(SWS_CS_ITU709); ffi::sws_setColorspaceDetails(sws, cs709, 1, cs709, 0, 0, 1 << 16, 1 << 16); } + // SAFETY: `av_frame_alloc` returns a fresh, uniquely-owned heap `AVFrame` (null-checked — on + // null we free the already-built `sws` and bail). We then write the plain `format`/`width`/ + // `height` fields through the non-null, properly-aligned `f` (sole owner, not yet shared). + // `av_frame_get_buffer(f, 0)` allocates backing storage for those dims/format; on failure we + // free `f` and `sws` (unwinding the half-built state) and bail. On success `f` is a fully-owned + // NV12 frame stored in `CpuInner.nv12` and freed once in `CpuInner::drop`. `f` is a unique + // fresh pointer, so none of these writes alias anything. let nv12 = unsafe { let f = ffi::av_frame_alloc(); if f.is_null() { @@ -329,6 +373,18 @@ impl CpuInner { let h = self.height as usize; let src_row = w * self.src_format.bytes_per_pixel(); anyhow::ensure!(bytes.len() >= src_row * h, "captured buffer too small"); + // SAFETY: The `ensure!`s above guarantee `format == self.src_format` and + // `bytes.len() >= src_row * h`. `sws_scale` reads `h` rows of `src_row` bytes from + // `src_data[0] = bytes.as_ptr()` (the other planes null/0 — packed RGB is single-plane), all + // in bounds; `bytes`, `src_data`, `src_stride` are live locals for this synchronous call. + // `self.sws` is the non-null context built in `open`; it writes into `self.nv12` (a non-null + // owned frame whose `data`/`linesize` in-struct arrays were sized by `av_frame_get_buffer`). + // `av_frame_alloc` (null-checked) yields a fresh `hwf`; `av_hwframe_get_buffer` pulls a pooled + // VAAPI surface from the live non-null `self.hw.frames_ref`; `av_hwframe_transfer_data` uploads + // the staged NV12 into it — both frames live, failures free `hwf` and bail. We then write + // `pts`/`pict_type` through the non-null `hwf` and `avcodec_send_frame` it into the live + // owned `self.enc` context (which takes its own ref), then free our `hwf` ref exactly once. + // The encoder runs only on this thread (see `unsafe impl Send`), so no aliasing/data race. unsafe { let src_data: [*const u8; 4] = [bytes.as_ptr(), ptr::null(), ptr::null(), ptr::null()]; let src_stride: [c_int; 4] = [src_row as c_int, 0, 0, 0]; @@ -374,6 +430,12 @@ impl CpuInner { impl Drop for CpuInner { fn drop(&mut self) { + // SAFETY: `self.nv12` (an owned `AVFrame`) and `self.sws` (an owned `SwsContext`) are each + // freed exactly once here, guarded by `is_null()` so a never-set pointer is skipped (no double + // free). `CpuInner` owns both exclusively and `Drop` runs once. `av_frame_free` takes `&mut` + // and nulls the pointer. `self.enc`/`self.hw` are freed afterward by their own `Drop` impls; + // the encoder holds its own `av_buffer_ref`'d device/frames copies, so field-drop order is + // irrelevant to soundness. unsafe { if !self.nv12.is_null() { ffi::av_frame_free(&mut self.nv12); @@ -417,6 +479,31 @@ impl DmabufInner { let drm_fourcc = crate::zerocopy::drm_fourcc(format) .ok_or_else(|| anyhow!("no DRM fourcc for {format:?} (VAAPI zero-copy)"))?; let node = render_node(); + // SAFETY: libav is initialized (`VaapiEncoder::open` ran `ffmpeg::init()` before + // `ensure_inner` → `DmabufInner::open`). Every raw pointer dereferenced below is either freshly + // allocated by the immediately-preceding ffmpeg call and null-checked, or an in-struct field of + // such an object: + // * `node` is a `CString` (from `render_node`) live for the whole block; its `.as_ptr()` is a + // NUL-terminated path read only during `av_hwdevice_ctx_create`. + // * `av_hwdevice_ctx_create(&mut drm_device, DRM, …)` / `…_create_derived(&mut vaapi_device, + // VAAPI, drm_device, …)`: on `r < 0` the out-param stays null and we bail (the derive path + // unrefs `drm_device` first); on success each is a non-null owned `AVBufferRef`. + // * `av_hwframe_ctx_alloc(drm_device)` → `drm_frames` (null-checked); `(*drm_frames).data` is + // its `AVHWFramesContext` payload, written before `av_hwframe_ctx_init`. + // * `avfilter_graph_alloc` → `graph` (null-checked); `avfilter_get_by_name` returns a static + // const `AVFilter` (process-lifetime) or null; `avfilter_graph_alloc_filter` allocates each + // filter ctx inside `graph`; the four are null-checked together. `inst`/arg strings are + // 'static C literals. + // * `(*hwmap/scale).hw_device_ctx = av_buffer_ref(vaapi_device)` attaches a NEW ref owned by + // the filter (freed by `avfilter_graph_free`); our `vaapi_device` ref is untouched. + // * `av_buffersink_get_hw_frames_ctx(sink)` → `nv12_ctx` is a borrowed ref owned by the sink, + // valid while `graph` lives (and `graph` is moved into the returned `DmabufInner`). + // * `open_vaapi_encoder` borrows `vaapi_device` (our live owned ref) and `nv12_ctx` (sink's + // live ref) and `av_buffer_ref`s both into the encoder. + // Every early-error path unref's the allocated buffers and frees the graph in the right order + // before bailing; on success the four `AVBufferRef`s + `graph` + `src`/`sink` are moved into + // `DmabufInner` and freed in its `Drop`. (Two non-UB leaks noted below: `av_buffersrc_*` and + // the final `?`.) unsafe { // DRM device (source dmabuf frames) + a VAAPI device derived from it (same GPU) for // hwmap/scale_vaapi/the encoder. @@ -509,6 +596,11 @@ impl DmabufInner { num: 1, den: fps as c_int, }; + // NOTE(leak, not UB): `av_buffersrc_parameters_set` takes its OWN ref of + // `par->hw_frames_ctx`, and `av_free(par)` frees only the struct (not the ref). So this + // `av_buffer_ref(drm_frames)` ref is leaked once per session — assigning `drm_frames` + // borrowed (no extra ref) is the correct ffmpeg pattern. Sound (a refcount leak, never a + // dangling/double-free), but it keeps the DRM frames ctx + device alive past `Drop`. (*par).hw_frames_ctx = ffi::av_buffer_ref(drm_frames); let r = ffi::av_buffersrc_parameters_set(src, par); ffi::av_free(par as *mut _); @@ -564,6 +656,9 @@ impl DmabufInner { ffi::av_buffer_unref(&mut drm_device); bail!("filter sink has no VAAPI frames context"); } + // NOTE(leak, not UB): unlike the error paths above, this `?` returns without unref'ing + // `graph`/`drm_frames`/`vaapi_device`/`drm_device` — so an encoder-open failure leaks them. + // Sound (leak only); only the unhappy path, when the session is failing anyway. let enc = open_vaapi_encoder( codec, width, @@ -600,6 +695,23 @@ impl DmabufInner { dmabuf.fourcc, self.fourcc ); + // SAFETY: The `ensure!` above checked `dmabuf.fourcc == self.fourcc`. + // * `std::mem::zeroed::()` is sound: it is a `#[repr(C)]` POD of ints and + // nested int-struct arrays (no `NonNull`/refs), for which all-zero is a valid bit pattern; + // `Box` puts it on the heap with a unique owner. + // * `dmabuf.fd.as_raw_fd()` is the fd of the caller's `&DmabufFrame`, which owns it for the + // whole synchronous `submit`; we describe one object/layer/plane from its + // fourcc/modifier/offset/stride and pass `object.size = 0` (ffmpeg queries the real size). + // * `av_frame_alloc` → `drm` (null-checked); we set its scalar fields and + // `hw_frames_ctx = av_buffer_ref(self.drm_frames)` (new ref of the live owned ctx). + // * `data[0] = Box::into_raw(desc)` transfers the box into the frame; `buf[0] = + // av_buffer_create(.., free_desc, ..)` registers a destructor that reclaims it exactly once + // when the buffer's refcount hits zero — matched alloc/free, no leak/double-free. + // * `av_buffersrc_add_frame_flags(self.src, drm, KEEP_REF)` pushes a ref into the live + // buffersrc; KEEP_REF keeps our own `drm` ref, which we then `av_frame_free`. We pull the + // converted surface with `av_buffersink_get_frame(self.sink, nv12)` BEFORE returning, so the + // dmabuf (owned by the caller) is read while still valid. `nv12` is sent into the live owned + // `self.enc` (takes its own ref) and our ref freed once. Single-threaded encoder → no race. unsafe { // Build a DRM-PRIME AVFrame describing the dmabuf (one object/fd, one layer/plane). let mut desc: Box = Box::new(std::mem::zeroed()); @@ -626,6 +738,11 @@ impl DmabufInner { // Own the descriptor so it frees with the frame (the fd is owned by the DmabufFrame, // which outlives this call — the graph reads the surface before submit returns). extern "C" fn free_desc(_opaque: *mut std::ffi::c_void, data: *mut u8) { + // SAFETY: `data` is exactly the pointer produced by `Box::into_raw(desc)` and passed as + // `av_buffer_create`'s first arg, which libav hands back verbatim to this callback. It + // is a valid, uniquely-owned `Box` raw pointer; libav invokes the + // callback exactly once (when the last buffer ref drops), so `from_raw` + `drop` + // reclaims it exactly once — no double-free. `_opaque` is unused (we passed null). unsafe { drop(Box::from_raw(data as *mut ffi::AVDRMFrameDescriptor)) }; } (*drm).buf[0] = ffi::av_buffer_create( @@ -673,6 +790,13 @@ impl DmabufInner { impl Drop for DmabufInner { fn drop(&mut self) { + // SAFETY: `graph`/`drm_frames`/`vaapi_device`/`drm_device` are the non-null objects + // `DmabufInner::open` built and moved into `self` (open bails before constructing `Self` if any + // alloc fails). `avfilter_graph_free` frees the graph (and the per-filter device refs it owns); + // each `av_buffer_unref` drops one ref and nulls the pointer via `&mut`. `DmabufInner` owns all + // four exclusively and `Drop` runs once → no double-free/use-after-free. The graph is freed + // first (it holds refs on the devices), then frames, then the derived VAAPI device, then DRM. + // (`self.enc` drops via ffmpeg-next afterward, holding its own refs.) unsafe { ffi::avfilter_graph_free(&mut self.graph); ffi::av_buffer_unref(&mut self.drm_frames); @@ -703,6 +827,13 @@ pub struct VaapiEncoder { } // Raw FFI pointers; the encoder lives on a single thread (same contract as `NvencEncoder`). +// SAFETY: `VaapiEncoder`'s `Inner` holds raw FFI pointers (`SwsContext`, `AVFrame`, `AVBufferRef`, +// `AVFilterContext`, `AVCodecContext`) that are not `Send` by default. The encoder is owned and +// driven by exactly ONE thread — the host's per-session encode thread it is moved (transferred) to — +// and is only ever touched through `&mut self` methods, so it is never aliased or accessed +// concurrently from two threads. None of the underlying libav/libswscale objects have thread +// affinity (they are not thread-local), so transferring ownership across threads is sound. This +// asserts `Send` (transfer) only; `Sync` (shared `&`) is deliberately NOT implemented. unsafe impl Send for VaapiEncoder {} impl VaapiEncoder { @@ -720,6 +851,9 @@ impl VaapiEncoder { } ffmpeg::init().context("ffmpeg init")?; if std::env::var_os("PUNKTFUNK_FFMPEG_DEBUG").is_some() { + // SAFETY: `av_log_set_level` sets libav's global integer log level; `48` (= AV_LOG_DEBUG) + // is a valid level and there are no pointer args. libav was just initialized by the + // `ffmpeg::init()` above, so the call is always sound. unsafe { ffi::av_log_set_level(48) }; } // Validate the codec/format up front so a bad request fails at open, not on the first frame. diff --git a/crates/punktfunk-host/src/inject/linux/gamepad.rs b/crates/punktfunk-host/src/inject/linux/gamepad.rs index 9376e8a..96784a8 100644 --- a/crates/punktfunk-host/src/inject/linux/gamepad.rs +++ b/crates/punktfunk-host/src/inject/linux/gamepad.rs @@ -15,6 +15,9 @@ //! `` on x86_64. `/dev/uinput` needs a udev rule + `input` group membership //! (see `scripts/60-punktfunk.rules`); creation fails with a clear error otherwise. +// Every `unsafe` block in this file carries a `// SAFETY:` proof; enforce it (unsafe-proof program). +#![deny(clippy::undocumented_unsafe_blocks)] + use crate::gamestream::gamepad::{self, GamepadFrame, MAX_PADS}; use anyhow::{bail, Result}; use std::collections::HashMap; @@ -215,6 +218,11 @@ const _: () = { }; fn ioctl_int(fd: i32, req: libc::c_ulong, arg: libc::c_int, what: &str) -> Result<()> { + // SAFETY: every caller passes one of UI_SET_EVBIT/KEYBIT/FFBIT/UI_DEV_CREATE/UI_DEV_DESTROY as + // `req` — all integer-argument ioctls whose third arg the kernel takes BY VALUE, so nothing is + // dereferenced through `arg` and no memory must outlive the call. The only precondition is `fd` + // being a valid open descriptor; callers pass the live `/dev/uinput` fd, and even a stale fd + // would merely return -1/EBADF (reported below), never UB. if unsafe { libc::ioctl(fd, req, arg) } < 0 { bail!("{what}: {}", std::io::Error::last_os_error()); } @@ -222,6 +230,12 @@ fn ioctl_int(fd: i32, req: libc::c_ulong, arg: libc::c_int, what: &str) -> Resul } fn ioctl_ptr(fd: i32, req: libc::c_ulong, arg: *mut T, what: &str) -> Result<()> { + // SAFETY: `fd` is the caller's live `/dev/uinput` fd. Every call site passes `&mut x` for a live, + // uniquely-borrowed `#[repr(C)]` `x: T` whose size matches the struct the request number encodes + // (UI_DEV_SETUP=0x405c_5503 → 0x5c=92=size_of::(); UI_ABS_SETUP → 0x1c=28; the FF + // upload/erase ioctls → 0x68/0x0c — all pinned by the `size_of` asserts above). The kernel copies + // exactly that many bytes in/out through `arg`; the `&mut` keeps the pointee alive and unaliased + // for the whole synchronous call. if unsafe { libc::ioctl(fd, req, arg) } < 0 { bail!("{what}: {}", std::io::Error::last_os_error()); } @@ -251,6 +265,9 @@ pub struct VirtualPad { impl VirtualPad { pub fn create(index: usize, identity: PadIdentity) -> Result { use std::os::fd::FromRawFd; + // SAFETY: `c"/dev/uinput"` is a 'static NUL-terminated C string literal; `as_ptr()` yields a + // valid pointer the kernel only reads as a filesystem path. `open` returns a fresh fd (or -1) + // and retains nothing; no Rust memory is aliased or handed to the kernel beyond that 'static path. let raw = unsafe { libc::open( c"/dev/uinput".as_ptr(), @@ -264,6 +281,9 @@ impl VirtualPad { std::io::Error::last_os_error() ); } + // SAFETY: `raw >= 0` here (the `< 0` branch above already bailed), so it is a freshly-opened fd + // from `libc::open` that is not stored or owned anywhere else. Transferring it to `OwnedFd` makes + // this the unique owner, which will `close` it exactly once on drop (no double-close, no leak). let fd = unsafe { OwnedFd::from_raw_fd(raw) }; ioctl_int(raw, UI_SET_EVBIT, EV_KEY as i32, "UI_SET_EVBIT(EV_KEY)")?; @@ -356,6 +376,11 @@ impl VirtualPad { code, value, }; + // SAFETY: `ev` is a live local `#[repr(C)]` struct of all-integer fields with no padding bytes + // (timeval=16 + u16 + u16 + i32 = 24, the size asserted above), so every byte is initialized and + // valid to read as `u8`. The pointer is non-null and `u8`-aligned (align 1), the length is exactly + // `size_of::()` so the slice spans precisely `ev`'s bytes (in bounds), and `ev` + // outlives `bytes` (used immediately below) with no concurrent mutation (single-threaded local). let bytes = unsafe { std::slice::from_raw_parts( &ev as *const _ as *const u8, @@ -363,6 +388,10 @@ impl VirtualPad { ) }; // Best-effort: a full kernel queue drops the event; the next frame re-syncs state. + // SAFETY: `self.fd` is the live uinput `OwnedFd` (borrowed via `as_raw_fd`, so it stays open for + // the call); `bytes` is the slice above backed by the still-live local `ev`. `write` only READS + // exactly `bytes.len()` bytes from `bytes.as_ptr()` (in bounds) and retains nothing past return, + // so the buffer outlives the synchronous call and the read-only access cannot race or alias. let _ = unsafe { libc::write( self.fd.as_raw_fd(), @@ -404,6 +433,10 @@ impl VirtualPad { let raw = self.fd.as_raw_fd(); let mut buf = [0u8; std::mem::size_of::()]; loop { + // SAFETY: `raw` is the live raw fd of `self.fd` (the non-blocking uinput device). `buf` is a + // live local `[u8; size_of::()]`; `buf.as_mut_ptr()` is a valid writable pointer + // to its `buf.len()` bytes. `read` writes AT MOST `buf.len()` bytes (in bounds), the buffer + // outlives this synchronous call, and `buf` is borrowed uniquely here (no alias/race). let n = unsafe { libc::read(raw, buf.as_mut_ptr() as *mut libc::c_void, buf.len()) }; if n != buf.len() as isize { break; // EAGAIN / short read — queue drained @@ -415,6 +448,10 @@ impl VirtualPad { unsafe { std::ptr::read_unaligned(buf.as_ptr() as *const InputEventRaw) }; match (ev.type_, ev.code) { (EV_UINPUT, UI_FF_UPLOAD) => { + // SAFETY: `UinputFfUpload` is `#[repr(C)]` over integers (`u32`, `i32`) and two + // `FfEffect`s (integers + `[u8; 32]`); all-zero is a valid bit pattern for every field + // (no bool/NonZero/enum/reference niche), so `zeroed` yields a fully-initialized valid + // value — `request_id` is then set below and the rest filled by UI_BEGIN_FF_UPLOAD. let mut up: UinputFfUpload = unsafe { std::mem::zeroed() }; up.request_id = ev.value as u32; if ioctl_ptr(raw, UI_BEGIN_FF_UPLOAD, &mut up, "UI_BEGIN_FF_UPLOAD").is_ok() { @@ -442,6 +479,9 @@ impl VirtualPad { } } (EV_UINPUT, UI_FF_ERASE) => { + // SAFETY: `UinputFfErase` is `#[repr(C)]` over three integer fields (`u32`, `i32`, + // `u32`); all-zero is a valid bit pattern for each, so `zeroed` produces a fully-valid + // initialized value — `request_id` is set below and `effect_id` filled by the ioctl. let mut er: UinputFfErase = unsafe { std::mem::zeroed() }; er.request_id = ev.value as u32; if ioctl_ptr(raw, UI_BEGIN_FF_ERASE, &mut er, "UI_BEGIN_FF_ERASE").is_ok() { @@ -492,6 +532,9 @@ impl VirtualPad { impl Drop for VirtualPad { fn drop(&mut self) { + // SAFETY: `self.fd` is still the live owned uinput fd here (the `OwnedFd` field is closed only + // AFTER this `drop` body returns), borrowed by `as_raw_fd`. UI_DEV_DESTROY takes its argument + // (0) BY VALUE, so nothing is dereferenced or aliased; the ioctl just tears down the device. let _ = unsafe { libc::ioctl(self.fd.as_raw_fd(), UI_DEV_DESTROY, 0) }; } } diff --git a/crates/punktfunk-host/src/inject/linux/wlr.rs b/crates/punktfunk-host/src/inject/linux/wlr.rs index 6b21e50..248a619 100644 --- a/crates/punktfunk-host/src/inject/linux/wlr.rs +++ b/crates/punktfunk-host/src/inject/linux/wlr.rs @@ -5,6 +5,9 @@ //! keymap, and translate events into virtual pointer/keyboard requests, tracking modifier state //! so the compositor resolves shifted keysyms correctly. +// Every `unsafe` block in this file carries a `// SAFETY:` proof; enforce it (unsafe-proof program). +#![deny(clippy::undocumented_unsafe_blocks)] + use super::{gs_button_to_evdev, vk_to_evdev, InputEvent, InputInjector}; use anyhow::{bail, Context, Result}; use punktfunk_core::input::InputKind; @@ -264,10 +267,17 @@ impl InputInjector for WlrootsInjector { /// Create an anonymous in-memory file holding `s` + a trailing NUL (for the keymap fd). fn memfd_with(s: &str) -> Result { let name = b"punktfunk-keymap\0"; + // SAFETY: `name` is a byte-string literal with an explicit trailing NUL, so `name.as_ptr()` is a + // valid NUL-terminated C string; `memfd_create` only reads that name (copying it) and creates an + // anonymous file, returning a fresh fd (or -1). `MFD_CLOEXEC` is a valid flag. The 'static literal + // outlives the synchronous call and nothing aliases it. The result is checked `< 0` below. let fd = unsafe { libc::memfd_create(name.as_ptr() as *const libc::c_char, libc::MFD_CLOEXEC) }; if fd < 0 { bail!("memfd_create failed: {}", std::io::Error::last_os_error()); } + // SAFETY: `fd` is the fresh memfd `memfd_create` just returned and checked `>= 0`; it is a unique + // open fd nothing else owns, so `File` takes sole ownership and closes it exactly once on drop — + // no alias, no double-close. let mut f = unsafe { std::fs::File::from_raw_fd(fd) }; f.write_all(s.as_bytes()).context("write keymap")?; f.write_all(&[0]).context("write keymap NUL")?; diff --git a/crates/punktfunk-host/src/linux/dmabuf_fence.rs b/crates/punktfunk-host/src/linux/dmabuf_fence.rs index 223d6b7..cb5afb2 100644 --- a/crates/punktfunk-host/src/linux/dmabuf_fence.rs +++ b/crates/punktfunk-host/src/linux/dmabuf_fence.rs @@ -13,6 +13,9 @@ //! attaches none, the export yields an already-signaled sync_file (poll returns immediately) — no //! wait, no harm, and `waited=false` tells us the driver doesn't fence (so zero-copy would still race). +// Every `unsafe` block in this file carries a `// SAFETY:` proof; enforce it (unsafe-proof program). +#![deny(clippy::undocumented_unsafe_blocks)] + use std::os::fd::RawFd; // linux/dma-buf.h ioctls on the DMA_BUF_BASE ('b' = 0x62) magic. _IOWR = dir(3)<<30 | size<<16 | base<<8 | nr. @@ -40,6 +43,11 @@ pub fn wait_read_ready(dmabuf_fd: RawFd, timeout_ms: i32) -> std::io::Result()` + // — the exact byte count the kernel copies — and `&mut req` is a live, correctly-sized + // `#[repr(C)]` struct the EXPORT_SYNC_FILE ioctl reads (`flags`) and writes (`fd`). `req` + // outlives this synchronous call and is not aliased elsewhere. let r = unsafe { libc::ioctl(dmabuf_fd, DMA_BUF_IOCTL_EXPORT_SYNC_FILE, &mut req) }; if r < 0 { return Err(std::io::Error::last_os_error()); @@ -54,11 +62,21 @@ pub fn wait_read_ready(dmabuf_fd: RawFd, timeout_ms: i32) -> std::io::Result= 0`). + // `poll` reads `fd`/`events` and writes `revents` for this non-blocking (timeout 0) probe, then + // returns — `pfd` outlives the call and aliases nothing. let pending = unsafe { libc::poll(&mut pfd, 1, 0) } == 0; if pending { pfd.revents = 0; + // SAFETY: same live single-element `pfd` (its `revents` reset to 0 just above), `nfds == 1`, + // and `sync_fd` still open. This blocking `poll` (up to `timeout_ms`) waits for the render + // fence to signal; it reads `fd`/`events`, writes `revents`, and returns before `pfd` ends. unsafe { libc::poll(&mut pfd, 1, timeout_ms) }; // block until the render fence signals } + // SAFETY: `sync_fd` is the sync_file fd the EXPORT_SYNC_FILE ioctl created and handed us to own; + // this point is reached only when `sync_fd >= 0`, this `close` runs exactly once on it, and it is + // never used afterward — no double-close or use-after-close. unsafe { libc::close(sync_fd) }; Ok(pending) } diff --git a/crates/punktfunk-host/src/linux/zerocopy/cuda.rs b/crates/punktfunk-host/src/linux/zerocopy/cuda.rs index f62b5c9..03a2905 100644 --- a/crates/punktfunk-host/src/linux/zerocopy/cuda.rs +++ b/crates/punktfunk-host/src/linux/zerocopy/cuda.rs @@ -11,6 +11,8 @@ //! thread) and ffmpeg's `hevc_nvenc` (encode thread); each thread makes it current before use. #![allow(non_camel_case_types, non_snake_case)] +// Every `unsafe` block/impl below carries a `// SAFETY:` proof; enforce it (unsafe-proof program). +#![deny(clippy::undocumented_unsafe_blocks)] use anyhow::{bail, Result}; use std::os::raw::{c_int, c_uint, c_void}; @@ -128,8 +130,14 @@ struct CudaApi { ) -> CUresult, cuDestroyExternalMemory: unsafe extern "C" fn(CUexternalMemory) -> CUresult, } -// The resolved fn pointers are plain addresses into a process-lifetime mapping; safe to share. +// SAFETY: every field is a bare `extern "C" fn` address into the leaked, process-lifetime +// `libcuda` mapping (`cuda_api` `forget`s the `Library`, so it is never unloaded) — an immutable +// value with no interior mutability and no thread affinity. Moving the table to another thread +// cannot dangle (the code it points at stays mapped) or race (the fields are read-only). unsafe impl Send for CudaApi {} +// SAFETY: as above — the table is a set of immutable fn-pointer addresses with no interior +// mutability, so concurrent shared reads from multiple threads cannot race; the driver entry +// points they address are themselves thread-safe. unsafe impl Sync for CudaApi {} /// `CUresult` returned by the wrappers when `libcuda` isn't loaded (no NVIDIA driver). Non-zero so @@ -143,6 +151,14 @@ static CUDA_API: OnceLock> = OnceLock::new(); /// (the expected case on AMD/Intel hosts) — logged at debug, not an error. fn cuda_api() -> Option<&'static CudaApi> { CUDA_API + // SAFETY: `Library::new` runs `libcuda.so.1`'s initializers — it is the trusted NVIDIA + // driver library, so loading has no unexpected effects; `?`/`None` handle its absence. + // Each `lib.get::(name)` asserts the symbol's real ABI equals `T`: every NUL-terminated + // name is a documented CUDA Driver API entry point and `T` is the exact + // `unsafe extern "C" fn(..)` signature from cuda.h/cudaGL.h (`_v2` for ctx/mem ops). Each + // `Symbol` only borrows `lib` until the end of the struct-literal statement; we deref-copy + // the raw fn-pointer out first, then `forget(lib)` leaks the mapping so those addresses + // stay valid for the whole process. Runs once under the `OnceLock` init — no aliasing. .get_or_init(|| unsafe { let lib = libloading::Library::new("libcuda.so.1") .or_else(|_| libloading::Library::new("libcuda.so")) @@ -361,6 +377,12 @@ pub fn read_plane_to_host( Height: height, ..Default::default() }; + // SAFETY: `copy_blocking` is unsafe because it issues a CUDA copy; its contract is a valid + // descriptor with the shared context current (the caller's responsibility — self-test path). + // `©` is a live local `#[repr(C)] CUDA_MEMCPY2D` that outlives the synchronous call: + // `srcDevice`/`srcPitch` are the caller's live pitched device plane, `dstHost` addresses the + // freshly-allocated `host` `Vec` of exactly `width_bytes*height` bytes, and `WidthInBytes`× + // `Height` fit both. The copy is synchronous, so `host` is fully written before we return it. unsafe { copy_blocking(©, "cuMemcpy2DAsync_v2(dev->host)")? }; Ok(host) } @@ -369,7 +391,13 @@ pub fn read_plane_to_host( /// in a `OnceLock`; the raw `CUcontext` is thread-safe to make current from any thread. #[derive(Clone, Copy)] pub struct Context(pub CUcontext); +// SAFETY: `CUcontext` is an opaque CUDA driver handle, not a dereferenceable Rust pointer. It is +// created once and never destroyed (process lifetime), and the only thing done with it is +// `cuCtxSetCurrent`, which the Driver API explicitly allows from any thread — so transferring the +// handle to another thread cannot dangle or race (the driver owns the synchronization). unsafe impl Send for Context {} +// SAFETY: as above — the wrapped handle is an immutable opaque address and the driver does all the +// synchronization, so sharing `&Context` across threads is sound. unsafe impl Sync for Context {} static CONTEXT: OnceLock = OnceLock::new(); @@ -382,6 +410,12 @@ pub fn context() -> Result { if cuda_api().is_none() { bail!("libcuda.so.1 not available — no NVIDIA driver (CUDA zero-copy disabled)"); } + // SAFETY: we returned above unless `cuda_api()` is `Some`, so every wrapper here forwards into + // the live, leaked `libcuda` table rather than the not-loaded stub. `cuInit(0)` passes the + // API-required flags value 0. `&mut dev`/`&mut ctx` are live, zero/null-initialized stack + // out-params the driver writes the device handle / new context into; each outlives its + // synchronous call and they are distinct locals (no aliasing). `cuCtxCreate_v2` yields a valid + // `CUcontext` on success (`ck` bails otherwise), which becomes the block's value. let ctx = unsafe { ck(cuInit(0), "cuInit")?; let mut dev: CUdevice = 0; @@ -401,6 +435,10 @@ pub fn context() -> Result { /// Make the shared context current on the calling thread (required before any CUDA op here). pub fn make_current() -> Result<()> { let ctx = context()?; + // SAFETY: `ctx` came from `context()?`, so it is the live shared `CUcontext` and the driver + // table is present. `cuCtxSetCurrent` binds that opaque handle to the calling thread; it takes + // no Rust-memory pointer and is thread-safe (affects only this thread's current context), so + // there is no aliasing or lifetime hazard. unsafe { ck(cuCtxSetCurrent(ctx), "cuCtxSetCurrent") } } @@ -423,6 +461,12 @@ fn copy_stream() -> CUstream { if let Some(s) = cell.get() { return s; } + // SAFETY: `copy_stream` runs with the shared context current (its doc contract), so the + // wrappers forward into the live `libcuda` table. `&mut least`/`&mut greatest` are live + // stack `i32`s the driver fills with the priority range; `&mut s` is a live null-init + // `CUstream` the driver writes the new stream into. All out-params outlive their + // synchronous calls and are distinct locals. On any non-zero result we fall back to a null + // (NULL-stream) value and never read an uninitialized handle. let stream = unsafe { let (mut least, mut greatest) = (0i32, 0i32); if cuCtxGetStreamPriorityRange(&mut least, &mut greatest) != 0 { @@ -459,6 +503,11 @@ unsafe fn copy_blocking(copy: &CUDA_MEMCPY2D, what: &str) -> Result<()> { fn alloc_pitched(width: u32, height: u32) -> Result<(CUdeviceptr, usize)> { let mut ptr: CUdeviceptr = 0; let mut pitch: usize = 0; + // SAFETY: `cuMemAllocPitch_v2` allocates a pitched device buffer (the wrapper forwards to the + // live table on any path that reached allocation). `&mut ptr` (`CUdeviceptr`) and `&mut pitch` + // (`usize`) are live, distinct stack out-params the driver writes the allocation pointer and + // its pitch into; both outlive the synchronous call. Width/height/element-size are by-value + // ints. No aliasing — two separate locals. unsafe { ck( cuMemAllocPitch_v2( @@ -486,6 +535,10 @@ fn alloc_pitched_nv12( let mut y_pitch: usize = 0; let mut uv_ptr: CUdeviceptr = 0; let mut uv_pitch: usize = 0; + // SAFETY: two independent `cuMemAllocPitch_v2` calls (wrapper → live table). `&mut y_ptr`/ + // `&mut y_pitch` and `&mut uv_ptr`/`&mut uv_pitch` are live, distinct stack out-params the + // driver writes each plane's pointer and pitch into; all outlive their synchronous calls. The + // dimension/element-size args are by-value ints. No aliasing — four separate locals. unsafe { ck( cuMemAllocPitch_v2( @@ -524,6 +577,13 @@ struct PoolInner { impl Drop for PoolInner { fn drop(&mut self) { + // SAFETY: the pool only exists because allocation succeeded, so the driver table is live. + // `PoolInner` drops only once every `DeviceBuffer` that referenced it (each holds an `Arc` + // clone) has been recycled, so `free`/`free_uv` hold every outstanding allocation exactly + // once and nothing else still uses them — no double-free or use-after-free. We make the + // shared context current first (drop may run off the allocating thread) so `cuMemFree_v2` + // targets the right context. Each `p` is a `CUdeviceptr` previously returned by + // `cuMemAllocPitch_v2`; results are ignored (best-effort teardown). unsafe { if let Some(c) = CONTEXT.get() { let _ = cuCtxSetCurrent(c.0); @@ -697,6 +757,12 @@ impl Drop for DeviceBuffer { } } else { // The buffer may be freed on the encode thread; cuMemFree needs a current context. + // SAFETY: this is the un-pooled branch (`pool` is `None`), so this `DeviceBuffer` + // exclusively owns `self.ptr` (and `self.uv`'s `uv_ptr`), each returned by + // `cuMemAllocPitch_v2` and freed exactly once here — `drop` runs once and the + // `self.ptr == 0` guard above skips the sentinel/empty case, so no double-free. We set + // the shared context current first because drop may run on a thread where it isn't, and + // `cuMemFree_v2` needs it. Wrapper → live table; results ignored (teardown). unsafe { if let Some(c) = CONTEXT.get() { let _ = cuCtxSetCurrent(c.0); @@ -745,6 +811,16 @@ impl RegisteredTexture { /// unmap. The copy is synchronized (on our priority stream) before unmap so `dst` is ready /// before the source dmabuf is recycled. Always unmaps, even if the copy errors. pub fn copy_mapped_to(&mut self, dst: &DeviceBuffer) -> Result<()> { + // SAFETY: `self.resource` is the valid `CUgraphicsResource` from a successful `register_gl` + // (its only constructor), so the wrappers forward to the live table; the caller holds the + // GL+CUDA contexts current (the registration's contract). `cuGraphicsMapResources` maps + // `count == 1` resource via `&mut self.resource` (a live field) on the default stream; + // `cuGraphicsSubResourceGetMappedArray` writes the mapped `CUarray` into the live local + // `array` (index 0, mip 0). On failure we unmap and bail (balanced). `©` is a live + // local `CUDA_MEMCPY2D` outliving the synchronous `copy_blocking`: `srcArray` is valid + // while mapped, `dstDevice`/`dstPitch` are `dst`'s live allocation, `width*4`×`height` fit + // both. `copy_blocking` syncs before we unmap, so the array stays valid through the copy; + // we always unmap afterward (even on error), keeping the map/unmap pair balanced. unsafe { ck( cuGraphicsMapResources(1, &mut self.resource, std::ptr::null_mut()), @@ -783,6 +859,14 @@ impl RegisteredTexture { width_bytes: usize, height: usize, ) -> Result<()> { + // SAFETY: identical contract to `copy_mapped_to` — `self.resource` is the valid + // `CUgraphicsResource` from `register_gl` (wrappers → live table; caller holds GL+CUDA + // contexts current). Map `count == 1` resource via the live `&mut self.resource`; the + // mapped `CUarray` is written into the live local `array` (index 0, mip 0); on failure we + // unmap and bail (balanced). `©` is a live local outliving the synchronous + // `copy_blocking`: `srcArray` valid while mapped, `dstDevice`/`dstPitch` are the caller's + // live plane, `width_bytes`×`height` fit it. We always unmap afterward, even on copy error, + // so the map/unmap pair stays balanced and the array outlives the copy. unsafe { ck( cuGraphicsMapResources(1, &mut self.resource, std::ptr::null_mut()), @@ -847,6 +931,10 @@ pub fn copy_device_to_device( Height: src.height as usize, ..Default::default() }; + // SAFETY: `copy_blocking` is unsafe (issues a CUDA copy); the caller must have the shared + // context current (documented). `©` is a live local device→device `CUDA_MEMCPY2D` outliving + // the synchronous call: `srcDevice`/`srcPitch` are `src`'s live allocation, `dstDevice`/ + // `dstPitch` the caller's live region, `width*4`×`height` within both. Wrapper → live table. unsafe { copy_blocking(©, "cuMemcpy2DAsync_v2(dev->dev)") } } @@ -888,6 +976,12 @@ pub fn copy_nv12_to_device( Height: h / 2, ..Default::default() }; + // SAFETY: two unsafe `copy_blocking` device→device copies; the caller must have the shared + // context current (documented). `&y`/`&uv` are live local `CUDA_MEMCPY2D`s outliving each + // synchronous call. All four device pointers are valid: `src.ptr`/`src_uv_ptr` come from a live + // NV12 `DeviceBuffer` (its `.uv` presence was checked via `ok_or_else`), `y_dst`/`uv_dst` are + // the caller's live NVENC surface planes; the luma copy is `w`×`h`, the chroma copy + // `(w/2)*2`×`h/2`, each within its planes. Wrappers → live table. unsafe { copy_blocking(&y, "cuMemcpy2DAsync_v2(nv12 Y dev->dev)")?; copy_blocking(&uv, "cuMemcpy2DAsync_v2(nv12 UV dev->dev)") @@ -897,6 +991,12 @@ pub fn copy_nv12_to_device( impl Drop for RegisteredTexture { fn drop(&mut self) { if !self.resource.is_null() { + // SAFETY: `self.resource` is non-null (just checked) and is the valid + // `CUgraphicsResource` from `register_gl`, owned exclusively by this `RegisteredTexture` + // and unregistered exactly once here (drop runs once) — no use-after-free or + // double-unregister. `cuGraphicsUnregisterResource` releases the GL↔CUDA registration; + // wrapper → live table (the resource exists ⇒ the driver was present). Result ignored + // (best-effort teardown). unsafe { let _ = cuGraphicsUnregisterResource(self.resource); } @@ -913,7 +1013,11 @@ pub struct ExternalDmabuf { pub size: u64, } -// Raw driver handles; used from the single capture thread but moved with the importer. +// SAFETY: the fields are opaque CUDA driver handles — an external-memory handle and a device +// pointer — not dereferenceable Rust memory, and the value is uniquely owned (no `Clone`). It is +// used from a single capture thread but constructed on / moved between threads with the importer; +// transferring these handles is sound because uniqueness rules out aliasing and they are destroyed +// exactly once in `Drop`. Only `Send` (not `Sync`) is asserted, matching the single-thread use. unsafe impl Send for ExternalDmabuf {} impl ExternalDmabuf { @@ -921,6 +1025,9 @@ impl ExternalDmabuf { /// from then on) and map its full `size` bytes to a device pointer. The shared context /// must be current. pub fn import(fd: i32, size: u64) -> Result { + // SAFETY: `libc::dup` only reads the integer `fd` and returns a new descriptor (or -1); it + // touches no Rust memory and `fd` is the caller's still-owned dmabuf fd (not consumed + // here). No aliasing or lifetime concern — a pure syscall on an integer. let dup = unsafe { libc::dup(fd) }; if dup < 0 { bail!("dup(dmabuf fd) failed"); @@ -938,8 +1045,17 @@ impl ExternalDmabuf { }; desc.handle[0] = dup as u32 as u64; // union member `int fd` (little-endian low bytes) let mut ext: CUexternalMemory = std::ptr::null_mut(); + // SAFETY: `cuImportExternalMemory` imports the memory described by `&desc`, a live local + // `#[repr(C)] CUDA_EXTERNAL_MEMORY_HANDLE_DESC` (cuda.h 64-bit layout) that outlives this + // synchronous call: `type_` is OPAQUE_FD, `handle[0]` holds the dup'd fd in the union's + // `int fd` low bytes, `size` is set. `&mut ext` is a live null-init out-param the driver + // writes the imported handle into. The driver takes ownership of the fd only on success. + // Distinct locals → no aliasing. Wrapper → live table (caller holds the context current). let r = unsafe { cuImportExternalMemory(&mut ext, &desc) }; if r != 0 { + // SAFETY: import failed (`r != 0`), so the driver did NOT take ownership of `dup`; we + // still own it and close it exactly once here on the error path (the success path never + // closes it — the driver does). `libc::close` acts on the integer fd alone. unsafe { libc::close(dup) }; // import failed → the driver did not take the fd bail!("cuImportExternalMemory failed ({r}) — LINEAR dmabuf import unsupported?"); } @@ -949,8 +1065,17 @@ impl ExternalDmabuf { ..Default::default() }; let mut ptr: CUdeviceptr = 0; + // SAFETY: maps a device pointer from `ext` (the valid `CUexternalMemory` just imported) per + // `&buf`, a live local `CUDA_EXTERNAL_MEMORY_BUFFER_DESC` (offset 0, full `size`) that + // outlives this synchronous call. `&mut ptr` is a live zero-init out-param the driver writes + // the mapped device address into; distinct locals → no aliasing. Wrapper → live table + // (context current). let r = unsafe { cuExternalMemoryGetMappedBuffer(&mut ptr, ext, &buf) }; if r != 0 { + // SAFETY: mapping failed; `ext` is the valid `CUexternalMemory` we imported and + // exclusively own. We destroy it exactly once here on the error path (the success path + // instead moves it into the returned `ExternalDmabuf`, whose `Drop` destroys it), + // releasing the fd the driver took — no double-destroy or use-after-free. unsafe { let _ = cuDestroyExternalMemory(ext); } @@ -962,6 +1087,12 @@ impl ExternalDmabuf { impl Drop for ExternalDmabuf { fn drop(&mut self) { + // SAFETY: this `ExternalDmabuf` only exists after a successful import, so the driver table + // is live. It exclusively owns `self.ptr` (the mapped buffer) and `self.ext` (the external + // memory), each torn down exactly once here (drop runs once; guarded by `!= 0` / `!null`) — + // no double-free or use-after-free. We make the shared context current first because drop + // may run off the import thread, and we free the mapped buffer before destroying its + // backing external memory. Results ignored (best-effort teardown). unsafe { if let Some(c) = CONTEXT.get() { let _ = cuCtxSetCurrent(c.0); @@ -996,5 +1127,10 @@ pub fn copy_pitched_to_buffer( }; // copy_blocking syncs our priority stream before returning, so the copy is complete before the // dmabuf is requeued to the producer. + // SAFETY: `copy_blocking` is unsafe (issues a CUDA copy); the caller must have the shared + // context current (documented). `©` is a live local device→device `CUDA_MEMCPY2D` outliving + // the synchronous call: `srcDevice`/`srcPitch` are the caller's live mapped span (e.g. an + // `ExternalDmabuf`), `dstDevice`/`dstPitch` are `dst`'s live allocation, `width*4`×`height` + // within both. Wrapper → live table. unsafe { copy_blocking(©, "cuMemcpy2DAsync_v2(ext->dev)") } } diff --git a/crates/punktfunk-host/src/linux/zerocopy/egl.rs b/crates/punktfunk-host/src/linux/zerocopy/egl.rs index eee3acd..80d246e 100644 --- a/crates/punktfunk-host/src/linux/zerocopy/egl.rs +++ b/crates/punktfunk-host/src/linux/zerocopy/egl.rs @@ -12,6 +12,8 @@ //! owned [`DeviceBuffer`] so the dmabuf can be returned to the compositor immediately. #![allow(non_upper_case_globals)] +// Every `unsafe` block in this file carries a `// SAFETY:` proof; enforce it (unsafe-proof program). +#![deny(clippy::undocumented_unsafe_blocks)] use super::cuda::{self, DeviceBuffer}; use anyhow::{bail, ensure, Context as _, Result}; @@ -415,6 +417,14 @@ impl Nv12Blit { impl Drop for Nv12Blit { fn drop(&mut self) { + // SAFETY: these GL names (textures/FBOs/VAO/programs) were all created by THIS `Nv12Blit` + // in `Nv12Blit::new` on the current GL context, which is still current because the owning + // `EglImporter` is dropped on its single capture thread (fields drop before + // `EglImporter::drop`, which never releases the context). `glDelete*` takes a count + a + // pointer to that many names: `&self.y_tex`/`&self.vao` are `&u32` to one live field (n=1); + // `[self.y_fbo, self.uv_fbo].as_ptr()` points at a 2-element temporary that lives for the + // whole `glDeleteFramebuffers` call (n=2 matches). The symbols dispatch through libGL + // (libglvnd) to the driver for the current context. Each name is deleted exactly once. unsafe { glDeleteTextures(1, &self.y_tex); glDeleteTextures(1, &self.uv_tex); @@ -459,7 +469,14 @@ pub struct EglImporter { render_fd: c_int, } -// The EGL handles are confined to the capture thread; the struct is moved there once. +// SAFETY: `EglImporter` owns thread-affine handles — an EGLDisplay/contexts made current on one +// thread, a loaded GL proc pointer, a `gbm_device*`, a raw fd, and CUDA-registered GL textures — +// none safe to touch concurrently. It is constructed inside `pipewire_thread` on the dedicated +// `punktfunk-pipewire` thread, and every method (`import*`, `supported_modifiers`, `Drop`) runs on +// that same thread; it is never accessed through a shared `&` from another thread. `Send` asserts +// only that transferring *ownership* is sound (needed so the importer can live in the PipeWire +// stream's user-data, whose API imposes a `Send` bound) — the live handles are never used +// off-thread. `Sync` is deliberately NOT implied. unsafe impl Send for EglImporter {} impl EglImporter { @@ -470,16 +487,38 @@ impl EglImporter { // to the same DRM device CUDA-GL interop associates with, which the EGL device platform // did not (cuGraphicsGLRegisterImage rejected device-platform GL textures). let path = std::ffi::CString::new("/dev/dri/renderD128").unwrap(); + // SAFETY: `path` is a live local `CString` (built from a string with no interior NUL, so it + // is NUL-terminated); `path.as_ptr()` is a valid pointer to that buffer which outlives this + // synchronous `open`. `open` only reads the path and returns a new fd (or -1); it neither + // retains the pointer nor writes through it, so there is no aliasing or lifetime hazard. let render_fd = unsafe { libc::open(path.as_ptr(), libc::O_RDWR | libc::O_CLOEXEC) }; ensure!(render_fd >= 0, "open /dev/dri/renderD128 for GBM"); + // SAFETY: `render_fd` is the live DRM render-node fd just returned by `open` and checked + // `>= 0`. `gbm_create_device` (libgbm, linked above) builds a `gbm_device` over that fd and + // returns a `*mut gbm_device` (or null); it borrows but does not take ownership of the fd, + // which `EglImporter` keeps open and closes only in `Drop` after `gbm_device_destroy`. No + // Rust-owned memory is passed, so there is nothing to alias. let gbm = unsafe { gbm_create_device(render_fd) }; if gbm.is_null() { + // SAFETY: reached only when `gbm_create_device` failed (null) — the fd was not consumed + // and no `EglImporter` exists yet to close it again, so this `close` runs exactly once on + // the live `render_fd`, releasing it before the error return. No double-close. unsafe { libc::close(render_fd) }; anyhow::bail!("gbm_create_device failed"); } + // SAFETY: `Egl::load_required` dlopens the system libEGL and binds its entry points, + // trusting that libEGL (libglvnd) is a genuine EGL 1.5 implementation whose core symbols + // match the ABI the `khronos_egl` `EGL1_5` bindings declare. No Rust memory is passed; the + // returned instance is afterwards used only through the safe `khronos_egl` wrappers. let egl: Egl = unsafe { Egl::load_required() }.context("load libEGL (EGL 1.5 dynamic instance)")?; + // SAFETY: `gbm` is the non-null `gbm_device*` created just above (checked), and + // `EGL_PLATFORM_GBM_KHR` is exactly the platform enum that pairs with a GBM device as the + // native-display handle, so the `gbm as NativeDisplayType` cast hands EGL a valid native + // display for the requested platform. `&[egl::ATTRIB_NONE]` is a properly terminated, empty + // attribute array borrowed for this synchronous call; EGL only reads it and returns an + // `EGLDisplay`, retaining no pointer into Rust memory. let display = unsafe { egl.get_platform_display( EGL_PLATFORM_GBM_KHR, @@ -533,6 +572,13 @@ impl EglImporter { .context("eglCreateContext(OpenGL)")?; egl.make_current(display, None, None, Some(gl_ctx)) .context("eglMakeCurrent surfaceless (needs EGL_KHR_surfaceless_context)")?; + // SAFETY: the GL context was made current on this thread just above, which `eglGetProcAddress` + // requires to return a usable pointer. The non-null (`?`-checked) pointer it returns for + // "glEGLImageTargetTexture2DOES" is the driver's implementation of that GL-OES entry point, + // whose real ABI is `void(GLenum, GLeglImageOES)` = `(u32, *mut c_void)` `extern "system"`. + // `EglImageTargetFn` is declared with exactly that signature, so the transmute only retypes a + // same-size, same-ABI thin function pointer (no value/representation change). The function is + // present because `EGL_EXT_image_dma_buf_import` was asserted on this display above. let egl_image_target: EglImageTargetFn = unsafe { std::mem::transmute( egl.get_proc_address("glEGLImageTargetTexture2DOES") @@ -543,6 +589,10 @@ impl EglImporter { // Create the shared CUDA context up front so import() is pure hot path. cuda::context().context("create CUDA context")?; + // SAFETY: `egl::NO_CONTEXT` is EGL's defined sentinel (a null handle) for "no context"; + // `Context::from_ptr` only stores the handle (it never dereferences it), so wrapping the + // null sentinel is sound and yields exactly the `EGL_NO_CONTEXT` value that + // `eglCreateImage(EGL_LINUX_DMA_BUF_EXT)` requires as its context argument later. let no_ctx = unsafe { egl::Context::from_ptr(egl::NO_CONTEXT) }; tracing::info!( "zero-copy EGL importer ready (GBM platform + GL texture interop, dma_buf_import + modifiers)" @@ -602,8 +652,21 @@ impl EglImporter { let Some(sym) = self.egl.get_proc_address("eglQueryDmaBufModifiersEXT") else { return Vec::new(); }; + // SAFETY: `sym` is the non-null pointer `eglGetProcAddress("eglQueryDmaBufModifiersEXT")` + // returned (the `let-else` already bailed on `None`) — the driver's implementation of that + // EGL extension entry point. `QueryFn` is declared with that function's exact documented ABI + // (`EGLDisplay, EGLint, EGLint, EGLuint64* , EGLBoolean*, EGLint* -> EGLBoolean`), all + // `extern "system"`, so the transmute only retypes a same-size, same-ABI thin fn pointer. let query: QueryFn = unsafe { std::mem::transmute(sym) }; let dpy = self.display.as_ptr(); + // SAFETY: `dpy` is this importer's live, initialized `EGLDisplay`; `query` is the proc loaded + // just above. The first call passes null out-arrays with `max_modifiers == 0`, which the + // extension defines as "write only the count" — it writes solely through `&mut count` (a live + // local `i32`). For the second call, `mods`/`ext` are freshly allocated `Vec`s of exactly + // `count` elements and `max_modifiers == count`, so the driver writes at most `count` + // `u64`/`u32` entries (in bounds) plus the actual count through `&mut n` (a live local). All + // four Rust addresses outlive these synchronous calls and alias nothing else. `truncate` only + // shrinks, so even a misbehaving `n > count` cannot read out of bounds. unsafe { let mut count: i32 = 0; if query( @@ -699,6 +762,10 @@ impl EglImporter { ]); } attrs.push(egl::ATTRIB_NONE); + // SAFETY: `eglCreateImage(EGL_LINUX_DMA_BUF_EXT, ...)` mandates a NULL `EGLClientBuffer` + // (the source is described entirely by the attribute list built above), so wrapping + // `null_mut()` is the required value. `from_ptr` only stores the pointer without + // dereferencing it, so constructing it from null is sound. let client = unsafe { egl::ClientBuffer::from_ptr(std::ptr::null_mut()) }; let image = self .egl @@ -733,11 +800,21 @@ impl EglImporter { ) -> Result { cuda::make_current()?; if self.blit.as_ref().map(|b| (b.width, b.height)) != Some((width, height)) { + // SAFETY: `GlBlit::new` requires the GL context current on the calling thread and a + // current CUDA context. Both hold: this runs on the capture thread where + // `EglImporter::new` made the GL context current and never released it, and + // `cuda::make_current()?` ran at the top of this function. `width`/`height` are plain + // `Copy` frame dimensions. self.blit = Some(unsafe { GlBlit::new(width, height)? }); } let egl_image_target = self.egl_image_target; let blit = self.blit.as_mut().unwrap(); - // SAFETY: GL + CUDA contexts current on this thread; `image` is a valid EGLImage. + // SAFETY: `GlBlit::run` requires a current GL context and a valid `EGLImage`. The GL context + // is current on this capture thread (made current in `EglImporter::new`, never released) and + // `cuda::make_current()` ran above; `egl_image_target` is the `glEGLImageTargetTexture2DOES` + // pointer loaded in `new`; `image` is the raw handle of the live `EGLImage` that + // `import_inner` created with `eglCreateImage` and destroys only AFTER this call returns, so + // it stays valid for the whole synchronous `run`. unsafe { blit.run(egl_image_target, image)? }; // Persistent registration (mapped per frame) + a pooled buffer — no per-frame // cuGraphicsGLRegisterImage / cuMemAllocPitch. @@ -757,11 +834,21 @@ impl EglImporter { ) -> Result { cuda::make_current()?; if self.nv12_blit.as_ref().map(|b| (b.width, b.height)) != Some((width, height)) { + // SAFETY: `Nv12Blit::new` requires the GL context current on the calling thread and a + // current CUDA context. Both hold: this runs on the capture thread where + // `EglImporter::new` made the GL context current and never released it, and + // `cuda::make_current()?` ran at the top of this function. `width`/`height` are plain + // `Copy` frame dimensions. self.nv12_blit = Some(unsafe { Nv12Blit::new(width, height)? }); } let egl_image_target = self.egl_image_target; let blit = self.nv12_blit.as_mut().unwrap(); - // SAFETY: GL + CUDA contexts current on this thread; `image` is a valid EGLImage. + // SAFETY: `Nv12Blit::run` requires a current GL context and a valid `EGLImage`. The GL + // context is current on this capture thread (made current in `EglImporter::new`, never + // released) and `cuda::make_current()` ran above; `egl_image_target` is the + // `glEGLImageTargetTexture2DOES` pointer loaded in `new`; `image` is the raw handle of the + // live `EGLImage` that `import_inner` created with `eglCreateImage` and destroys only AFTER + // this call returns, so it stays valid for the whole synchronous `run`. unsafe { blit.run(egl_image_target, image)? }; let dst = blit.pool.get()?; cuda::copy_mapped_nv12(&mut blit.y_registered, &mut blit.uv_registered, &dst)?; @@ -787,9 +874,22 @@ impl EglImporter { ); cuda::make_current()?; if self.nv12_blit.as_ref().map(|b| (b.width, b.height)) != Some((width, height)) { + // SAFETY: `Nv12Blit::new` requires the GL context current on the calling thread and a + // current CUDA context. Both hold: this self-test path runs on the thread that owns this + // `EglImporter` with its GL context current, and `cuda::make_current()?` ran just above. + // `width`/`height` are plain `Copy` scalars. self.nv12_blit = Some(unsafe { Nv12Blit::new(width, height)? }); } let blit = self.nv12_blit.as_mut().unwrap(); + // SAFETY: runs on the thread that owns this `EglImporter` with its GL context current. + // `blit.src_tex` is a texture this `Nv12Blit` owns; `glTexStorage2D` allocates immutable + // RGBA8 storage exactly once (guarded by `test_src_storage`) sized `width×height`. + // `glTexSubImage2D` then uploads exactly `width×height` RGBA8 texels, reading `width*height*4` + // bytes from `rgba.as_ptr()`; the caller already asserted `rgba.len() == width*height*4`, rows + // are `width*4` bytes (a multiple of the default 4-byte unpack alignment, so no row-padding + // over-read), and `rgba` is a live borrow that outlives this synchronous upload. `run_passes` + // then needs only the current GL context (no further Rust pointers). All GL names are this + // blit's own, alias no other live object, and nothing is retained past the calls. unsafe { // Upload the host RGBA into `src_tex` (an immutable GL_RGBA8 backing must exist first; // the live path never allocates it — it retargets `src_tex` via EGLImage instead). @@ -824,9 +924,16 @@ impl EglImporter { impl Drop for EglImporter { fn drop(&mut self) { if !self.gbm.is_null() { + // SAFETY: `self.gbm` is the non-null `gbm_device*` from `gbm_create_device` in `new` + // (checked non-null here), owned exclusively by this `EglImporter` and destroyed exactly + // once (in `Drop`). It is freed BEFORE `render_fd` is closed below — the correct order, + // since the device borrowed that fd for its lifetime. unsafe { gbm_device_destroy(self.gbm) }; } if self.render_fd >= 0 { + // SAFETY: `self.render_fd` is the fd `open` returned in `new` (checked `>= 0`), owned + // exclusively by this `EglImporter`; this `close` runs exactly once, after the gbm device + // that borrowed it has been destroyed. No double-close or use-after-close. unsafe { libc::close(self.render_fd) }; } } diff --git a/crates/punktfunk-host/src/linux/zerocopy/vulkan.rs b/crates/punktfunk-host/src/linux/zerocopy/vulkan.rs index 98cc78e..274c784 100644 --- a/crates/punktfunk-host/src/linux/zerocopy/vulkan.rs +++ b/crates/punktfunk-host/src/linux/zerocopy/vulkan.rs @@ -16,6 +16,9 @@ //! a stream's life). Falls back cleanly: any init/import error disables the importer and the //! CPU mmap path takes over. +// Every `unsafe` block in this file carries a `// SAFETY:` proof; enforce it (unsafe-proof program). +#![deny(clippy::undocumented_unsafe_blocks)] + use super::cuda::{self, DeviceBuffer}; use anyhow::{anyhow, bail, Context as _, Result}; use ash::vk; @@ -51,12 +54,27 @@ pub struct VkBridge { dst: Option, } -// Confined to the capture thread; moved there once. +// SAFETY: `VkBridge` owns ash Vulkan handles (instance/device/queue/command pool+buffer/fence), a +// CUDA external-memory mapping, and an fd→buffer cache — none `Sync`, and a single queue + +// command buffer must be externally synchronized. It is created inside `EglImporter::import_linear` +// on the dedicated `punktfunk-pipewire` capture thread and every method (`import_linear`, `Drop`) +// runs on that thread; it is never shared via `&` across threads. `Send` asserts only that +// transferring ownership is sound (so the bridge can live inside the `Send` `EglImporter`); the live +// handles are never touched off-thread, and `Sync` is deliberately NOT implied. unsafe impl Send for VkBridge {} impl VkBridge { /// Bring up Vulkan on the NVIDIA GPU with the external-memory extensions. pub fn new() -> Result { + // SAFETY: standard ash bring-up — every call is `unsafe` only because ash cannot statically + // verify Vulkan handle/CreateInfo validity. `ash::Entry::load` dlopens a real system + // libvulkan. Each `*CreateInfo`/`AllocateInfo` is built by ash's builders from locals (`app`, + // `exts`, `prio`, `qci`, and the inline infos) that all live for the duration of the + // synchronous `create_*`/`enumerate_*` call that reads them — in particular the + // `enabled_extension_names(&exts)` and `queue_priorities(&prio)` borrows outlive their calls. + // Every handle passed (`instance`, `phys`, `device`, `qf`, `cmd_pool`) was just created and + // checked via `?`/`ok_or_else` in this same function, so no invalid handle is ever used. This + // constructor shares nothing across threads. unsafe { let entry = ash::Entry::load().context("load libvulkan")?; let app = vk::ApplicationInfo::default().api_version(vk::API_VERSION_1_1); @@ -294,6 +312,19 @@ impl VkBridge { height: u32, pool: &cuda::BufferPool, ) -> Result { + // SAFETY: `fd` is the live dmabuf fd handed in by the caller (borrowed; `import_src` dup's it + // internally and Vulkan owns the dup). `libc::lseek` only queries the fd's size. The unsafe + // `import_src`/`ensure_dst` are called with a valid fd and a checked size. The bounds are + // proven: `import_src` asserts `size >= span` (so the cached `src_size >= span`), + // `copy_size = src_size.min(span)`, and `ensure_dst(copy_size)` makes `dst` at least + // `copy_size` — so the GPU `cmd_copy_buffer` of `copy_size` bytes reads/writes within both + // buffers, and the later CUDA pitched copy reading `[offset, span)` from `dst.cuda.ptr` (= + // `offset + stride*height = span <= copy_size`) stays inside the freshly-copied region. The + // `*Info`/`region`/`cmds`/`submit` are locals that outlive the synchronous calls reading them. + // `cmd`/`queue`/`fence` are this bridge's own handles, used on this single thread only. The + // host-side `wait_for_fences` fully retires the Vulkan copy BEFORE CUDA reads the shared + // memory, so there is no GPU write/read data race. `dst` is an `&self.dst` shared borrow that + // does not alias the `&self.device` calls. unsafe { let span = offset as u64 + stride as u64 * height as u64; if !self.src_cache.contains_key(&fd) { @@ -347,6 +378,15 @@ impl VkBridge { impl Drop for VkBridge { fn drop(&mut self) { + // SAFETY: runs once when the bridge is dropped on its owning capture thread. + // `device_wait_idle` first drains all in-flight GPU work, so no queued command still + // references these objects. Every handle freed (the `src_cache` buffers+memories, the `dst` + // buffer+memory, `fence`, `cmd_pool`, `device`, `instance`) was created by this `VkBridge` + // and owned exclusively by it, so each `destroy_*`/`free_*` runs exactly once with no + // double-free, in dependency order (child objects before `device`, `device` before + // `instance`). `dst.cuda` is dropped after `free_memory`, which is safe because CUDA holds + // its own dup'd OPAQUE_FD reference to the underlying allocation. No other thread touches + // these handles. unsafe { let _ = self.device.device_wait_idle(); for (_, s) in self.src_cache.drain() { diff --git a/crates/punktfunk-host/src/vdisplay/linux/kwin.rs b/crates/punktfunk-host/src/vdisplay/linux/kwin.rs index fe7376a..8ee651d 100644 --- a/crates/punktfunk-host/src/vdisplay/linux/kwin.rs +++ b/crates/punktfunk-host/src/vdisplay/linux/kwin.rs @@ -15,6 +15,8 @@ //! the KWin session's environment. #![allow(clippy::all, dead_code, non_camel_case_types, non_snake_case, unused)] +// Every `unsafe` block in this file carries a `// SAFETY:` proof; enforce it (unsafe-proof program). +#![deny(clippy::undocumented_unsafe_blocks)] use super::{Mode, VirtualDisplay, VirtualOutput}; use anyhow::{anyhow, bail, Context, Result}; @@ -495,6 +497,11 @@ fn run( events: libc::POLLIN, revents: 0, }; + // SAFETY: `&mut pfd` points at a single live, fully-initialized `libc::pollfd` on the stack, and + // the count `1` matches that one-element array, so `poll` reads `fd`/`events` and writes `revents` + // strictly within `pfd`. `pfd.fd` is the Wayland connection's fd, valid because `conn` (and the + // `prepare_read` guard) are alive across the call. `poll` blocks up to 200 ms and writes only + // `revents`; `pfd` outlives the synchronous call and aliases nothing (a fresh local). let r = unsafe { libc::poll(&mut pfd, 1, 200) }; if r > 0 && (pfd.revents & libc::POLLIN) != 0 { let _ = guard.read();