From 16f72da72513af10b391f6791a36fdce10d72a3c Mon Sep 17 00:00:00 2001 From: enricobuehler Date: Thu, 2 Jul 2026 12:34:31 +0000 Subject: [PATCH] feat(host/linux): default the tiled zero-copy path to GPU NV12 (NVENC fed native YUV) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A/B'd on the Bazzite box (RTX 5070 Ti, KWin 6.6, driver 595, 1080p60 over the LAN): pixel-correct decode (full desktop, no tint/banding), latency-neutral idle (p50 1.47ms RGB vs 1.52ms NV12, both 2400/2400 frames), CPU-neutral — and it deletes NVENC's internal RGB->YUV CSC from the SM/3D engine a game saturates (video 40%+SM 15% -> video 26%+SM 2% measured on Windows). Matches the Windows host default. PUNKTFUNK_NV12=0 restores the RGB feed; LINEAR/gamescope captures are unaffected. Co-Authored-By: Claude Fable 5 --- CLAUDE.md | 4 +++- crates/punktfunk-host/src/linux/zerocopy/mod.rs | 12 +++++++----- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index df71239..c6768b7 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -455,7 +455,9 @@ FFI also link-needs `libGL`/`libgbm`/`libcuda` at build time). Env knobs: `PUNKT `PUNKTFUNK_COMPOSITOR=kwin|gamescope|mutter`, `PUNKTFUNK_ZEROCOPY=1|0` (Linux default: ON for VAAPI/AMD/Intel with a one-shot CPU downgrade if the dmabuf offer never negotiates, OFF/opt-in for NVENC), `PUNKTFUNK_VAAPI_LOW_POWER=1|0` (pin the VAAPI entrypoint; auto = full-feature then VDEnc -fallback for modern Intel), `PUNKTFUNK_GAMESCOPE_APP=...`, +fallback for modern Intel), `PUNKTFUNK_NV12=0` (opt OUT of the default GPU RGB→NV12 convert on the +NVIDIA tiled zero-copy path), `PUNKTFUNK_INTRA_REFRESH=1` (opt-in NVENC intra-refresh loss recovery), +`PUNKTFUNK_PIN_CLOCKS=1` (opt-in NVML GPU clock floor, root-gated), `PUNKTFUNK_GAMESCOPE_APP=...`, `PUNKTFUNK_INPUT_BACKEND=...`, `PUNKTFUNK_PERF=1` (per-stage timing), `PUNKTFUNK_VIDEO_DROP=N` (FEC test), `PUNKTFUNK_FEC_PCT=N`, `PUNKTFUNK_DSCP=1` (opt-in DSCP/SO_PRIORITY media QoS on the data + GameStream video/audio sockets; no-op on the wire on Windows without a qWAVE policy), diff --git a/crates/punktfunk-host/src/linux/zerocopy/mod.rs b/crates/punktfunk-host/src/linux/zerocopy/mod.rs index 7dcb673..4361bf8 100644 --- a/crates/punktfunk-host/src/linux/zerocopy/mod.rs +++ b/crates/punktfunk-host/src/linux/zerocopy/mod.rs @@ -63,12 +63,14 @@ pub fn enabled() -> bool { } } -/// Whether the NV12 convert path is opted in (`PUNKTFUNK_NV12` truthy). When set AND the zero-copy -/// tiled-GL path is active, the capturer produces native NV12 (BT.709 limited range) on the GPU and -/// feeds NVENC YUV directly — deleting NVENC's internal RGB→YUV CSC (Tier 2A). Off by default: the -/// existing RGB/BGRx path is then 100% unchanged. +/// Whether the tiled-GL zero-copy path converts to NV12 on the GPU and feeds NVENC native YUV — +/// deleting NVENC's internal RGB→YUV CSC, which otherwise runs on the SM/3D engine the game +/// saturates (Tier 2A). **Default ON** (validated color-correct on the RTX 5070 Ti via +/// `nv12-selftest` + live decode on dev + Bazzite/KWin boxes; latency- and CPU-neutral idle, +/// frees SM headroom under load — the same default the Windows host ships). `PUNKTFUNK_NV12=0` +/// restores the RGB/BGRx feed. LINEAR (gamescope/Vulkan-bridge) captures are unaffected either way. pub fn nv12_enabled() -> bool { - flag("PUNKTFUNK_NV12") + flag_opt("PUNKTFUNK_NV12").unwrap_or(true) } /// DRM FourCC for a packed 32-bit format name (little-endian, e.g. `b"XR24"`).