From a0427cd2a3e6c6af14e811fcc65370b65c51548a Mon Sep 17 00:00:00 2001 From: enricobuehler Date: Thu, 25 Jun 2026 18:37:48 +0000 Subject: [PATCH] =?UTF-8?q?feat(windows-host):=20OutputFormat=20into=20the?= =?UTF-8?q?=20capturer=20=E2=80=94=20kill=20the=20dxgi=20back-reference=20?= =?UTF-8?q?(Goal-1=20stage=205,=20tightening=201)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The headline §2.3 seam tightening (the explicit Stage-3 deferral; §5's "highest-severity coupling"): the capturer is now TOLD its output format instead of re-deriving the encode backend. New `capture::OutputFormat { gpu, hdr }`, resolved once per session and passed INTO capture_virtual_output: * native punktfunk/1 path: `SessionPlan::output_format()` (gpu = encoder.is_gpu(), from the already-resolved plan.encoder — no second probe; hdr = plan.hdr). * GameStream + spike paths: `OutputFormat::resolve(hdr)` (gpu from the single `gpu_encode()` source, which maps windows_resolved_backend()). `capture/dxgi.rs DuplCapturer::open` takes `gpu` in and its internal `!matches!(windows_resolved_backend(), Software)` recompute is DELETED — the capture layer no longer re-calls the encode layer (the back-reference that could let capture and encode disagree on whether frames are GPU-resident, plan §2.3/§5). The relay's secure-desktop DDA passes `gpu_encode()` likewise. Behavior-preserving: the `gpu` passed in equals the value the capturer used to compute (same encode-backend resolution). The DDA opens keep `want_hdr=false` (the SDR fallback, unchanged). Tightenings 2 (HDR/release -> VirtualLease) and 3 (EncoderCaps) split off: (2) needs the monitor-generation carried on the lease + the keepalive becoming Box — that's the §2.5 ownership-model change (CURRENT_MON_GEN / sudovda::wait_for_monitor_released), so it moves there; (3) is a small additive follow-on. Documented in the plan. Verified: Linux cargo check + clippy (-D warnings) + fmt clean. Box build to follow. Co-Authored-By: Claude Opus 4.8 (1M context) --- crates/punktfunk-host/src/capture.rs | 64 ++++++++++++++++--- crates/punktfunk-host/src/capture/dxgi.rs | 18 +++--- .../punktfunk-host/src/gamestream/stream.rs | 2 +- crates/punktfunk-host/src/punktfunk1.rs | 8 ++- crates/punktfunk-host/src/session_plan.rs | 18 ++++++ crates/punktfunk-host/src/spike.rs | 2 +- docs/windows-host-goal1-plan.md | 23 +++++-- 7 files changed, 108 insertions(+), 27 deletions(-) diff --git a/crates/punktfunk-host/src/capture.rs b/crates/punktfunk-host/src/capture.rs index d043d98..21fb74f 100644 --- a/crates/punktfunk-host/src/capture.rs +++ b/crates/punktfunk-host/src/capture.rs @@ -44,6 +44,49 @@ impl PixelFormat { } } +/// What a Windows capturer should produce, resolved **once** per session and passed **into** +/// [`capture_virtual_output`] (Goal-1 stage 5, plan §2.3/§5). Passing the format in is what lets a +/// capturer stop re-deriving the encode backend itself — it kills the +/// `capture/dxgi.rs → encode::windows_resolved_backend()` back-reference (the highest-severity coupling: +/// capture and encode could otherwise disagree on whether frames are GPU-resident). Neutral type; the +/// Linux portal capturer ignores it (it negotiates its own format with PipeWire). +#[derive(Clone, Copy, Debug)] +pub struct OutputFormat { + /// Produce GPU-resident D3D11 frames (zero-copy for a GPU encoder — NVENC/AMF/QSV) rather than CPU + /// staging. `false` **only** for the GPU-less software encoder. + pub gpu: bool, + /// HDR: the capturer converts to 10-bit (IDD-push FP16 → `Rgb10a2`; the DDA secure-desktop HDR hint). + /// `false` = 8-bit SDR. + pub hdr: bool, +} + +impl OutputFormat { + /// Resolve the output format for an entry point that doesn't build a full [`SessionPlan`] + /// (`crate::session_plan`) — the GameStream + spike paths: `gpu` from the resolved encode backend, + /// `hdr` as given. The native punktfunk/1 path uses `SessionPlan::output_format()` instead (it already + /// resolved the encoder), so neither path makes a capturer re-derive it. + pub fn resolve(hdr: bool) -> Self { + OutputFormat { + gpu: gpu_encode(), + hdr, + } + } +} + +/// True if the resolved encode backend produces GPU frames (anything but the software encoder). The single +/// source for [`OutputFormat::resolve`]'s `gpu`; on Linux always true (the portal/VAAPI/CUDA path is GPU). +#[cfg(target_os = "windows")] +pub(crate) fn gpu_encode() -> bool { + !matches!( + crate::encode::windows_resolved_backend(), + crate::encode::WindowsBackend::Software + ) +} +#[cfg(not(target_os = "windows"))] +pub(crate) fn gpu_encode() -> bool { + true +} + /// A captured frame. [`format`](Self::format)/dimensions describe the pixels regardless of /// where they live — [`payload`](Self::payload) is either a CPU buffer (the spike/fallback path) /// or a GPU buffer already on the device (the zero-copy path, plan §9). @@ -314,11 +357,12 @@ pub fn open_portal_monitor() -> Result> { #[cfg(target_os = "linux")] pub fn capture_virtual_output( vout: crate::vdisplay::VirtualOutput, - _want_hdr: bool, + _want: OutputFormat, _capture: crate::session_plan::CaptureBackend, ) -> Result> { - // The Linux host stays 8-bit (HDR is blocked upstream), so `want_hdr` is unused here; the capture - // backend is always the portal (the `CaptureBackend` arg is a Windows-only dispatch — ignored here). + // The Linux host stays 8-bit (HDR is blocked upstream) and the portal negotiates its own format, so + // the `OutputFormat` is unused here; the capture backend is always the portal (the `CaptureBackend` + // arg is a Windows-only dispatch — ignored here). linux::PortalCapturer::from_virtual_output(vout).map(|c| Box::new(c) as Box) } @@ -335,7 +379,7 @@ pub(crate) fn wgc_disabled() -> bool { #[cfg(target_os = "windows")] pub fn capture_virtual_output( vout: crate::vdisplay::VirtualOutput, - want_hdr: bool, + want: OutputFormat, capture: crate::session_plan::CaptureBackend, ) -> Result> { use crate::session_plan::CaptureBackend; @@ -359,14 +403,14 @@ pub fn capture_virtual_output( // If IDD-push can't open OR the driver doesn't attach to the ring within a few seconds (e.g. a // hybrid-GPU render mismatch), fall back to DDA so the session is NEVER left black (audit §5.1). // `open()` hands the keepalive back on failure so DDA can take ownership of the virtual display. - match idd_push::IddPushCapturer::open(target.clone(), pref, want_hdr, keep) { + match idd_push::IddPushCapturer::open(target.clone(), pref, want.hdr, keep) { Ok(c) => return Ok(Box::new(c) as Box), Err((e, keep)) => { tracing::warn!( error = %format!("{e:#}"), "IDD-push open/attach failed — falling back to DDA" ); - return dxgi::DuplCapturer::open(target, pref, keep, false) + return dxgi::DuplCapturer::open(target, pref, keep, want.gpu, false) .map(|c| Box::new(c) as Box); } } @@ -378,7 +422,7 @@ pub fn capture_virtual_output( // chosen backend (it owns the SudoVDA keepalive), so there's no open-time auto-fallback. The // backend choice (`dda`/`dxgi`/`PUNKTFUNK_NO_WGC` → DDA, else WGC) is now resolved once in the plan. if capture == CaptureBackend::Dda { - return dxgi::DuplCapturer::open(target, pref, keep, false) + return dxgi::DuplCapturer::open(target, pref, keep, want.gpu, false) .map(|c| Box::new(c) as Box); } // WGC default, with a watchdog'd DDA fallback. WGC's Direct3D11CaptureFramePool::CreateFreeThreaded @@ -408,12 +452,12 @@ pub fn capture_virtual_output( } Ok(Err(e)) => { tracing::warn!(error = %format!("{e:#}"), "WGC open failed — falling back to DDA"); - dxgi::DuplCapturer::open(target, pref, keep, false) + dxgi::DuplCapturer::open(target, pref, keep, want.gpu, false) .map(|c| Box::new(c) as Box) } Err(_) => { tracing::warn!("WGC open timed out (CreateFreeThreaded hang on the virtual display) — falling back to DDA"); - dxgi::DuplCapturer::open(target, pref, keep, false) + dxgi::DuplCapturer::open(target, pref, keep, want.gpu, false) .map(|c| Box::new(c) as Box) } } @@ -422,7 +466,7 @@ pub fn capture_virtual_output( #[cfg(not(any(target_os = "linux", target_os = "windows")))] pub fn capture_virtual_output( _vout: crate::vdisplay::VirtualOutput, - _want_hdr: bool, + _want: OutputFormat, _capture: crate::session_plan::CaptureBackend, ) -> Result> { anyhow::bail!("virtual-output capture requires Linux or Windows") diff --git a/crates/punktfunk-host/src/capture/dxgi.rs b/crates/punktfunk-host/src/capture/dxgi.rs index 864f307..156f659 100644 --- a/crates/punktfunk-host/src/capture/dxgi.rs +++ b/crates/punktfunk-host/src/capture/dxgi.rs @@ -2046,6 +2046,9 @@ impl DuplCapturer { target: WinCaptureTarget, preferred: Option<(u32, u32, u32)>, keepalive: Box, + // Whether the (already-resolved) encode backend wants GPU-resident frames — passed IN (Goal-1 + // stage 5) so the capturer never re-derives the encode backend itself. + gpu: bool, want_hdr: bool, ) -> Result { unsafe { @@ -2213,14 +2216,13 @@ impl DuplCapturer { .ok() .and_then(|s| s.parse().ok()) .unwrap_or((2000 / refresh_hz.max(1)).max(100)); - // Produce GPU-resident D3D11 frames (zero-copy NVENC, or the NV12/P010 the AMF/QSV - // backends read back / import) whenever the resolved encode backend is a GPU one — so the - // capturer's output format matches the encoder's input. Only the software (GPU-less) path - // takes CPU staging. Mirrors `encode::open_video`'s dispatch exactly. - let gpu_mode = !matches!( - crate::encode::windows_resolved_backend(), - crate::encode::WindowsBackend::Software - ); + // Produce GPU-resident D3D11 frames (zero-copy NVENC, or the NV12/P010 the AMF/QSV backends + // read back / import) whenever the encode backend is a GPU one — so the capturer's output + // format matches the encoder's input. Only the software (GPU-less) path takes CPU staging. + // The decision is resolved ONCE per session and passed in (Goal-1 stage 5), instead of this + // capturer re-calling `encode::windows_resolved_backend()` — the back-reference that let + // capture and encode disagree (plan §2.3/§5). + let gpu_mode = gpu; // Read the source display's HDR mastering metadata while we still hold `output` (it is // moved into the struct below). Only meaningful for an HDR (FP16) duplication. let is_hdr_init = dd.ModeDesc.Format == DXGI_FORMAT_R16G16B16A16_FLOAT; diff --git a/crates/punktfunk-host/src/gamestream/stream.rs b/crates/punktfunk-host/src/gamestream/stream.rs index 55ba6db..a4bb1c5 100644 --- a/crates/punktfunk-host/src/gamestream/stream.rs +++ b/crates/punktfunk-host/src/gamestream/stream.rs @@ -136,7 +136,7 @@ fn run( // from a GameStream HDR flag once StreamConfig carries one. let mut capturer = capture::capture_virtual_output( vout, - false, + capture::OutputFormat::resolve(false), crate::session_plan::CaptureBackend::resolve(), ) .context("capture virtual output")?; diff --git a/crates/punktfunk-host/src/punktfunk1.rs b/crates/punktfunk-host/src/punktfunk1.rs index 9c0597f..7d4a0bd 100644 --- a/crates/punktfunk-host/src/punktfunk1.rs +++ b/crates/punktfunk-host/src/punktfunk1.rs @@ -2715,6 +2715,9 @@ fn virtual_stream_relay(ctx: SessionContext) -> Result<()> { target.clone(), Some((w, h, hz)), Box::new(()), + // The relay's host encoder is GPU (NVENC/AMF/QSV unless software) — pass `gpu` in (Goal-1 + // stage 5) so the DDA capturer doesn't re-derive it. + crate::capture::gpu_encode(), hdr, ) .context("open DDA for secure desktop")?; @@ -3140,8 +3143,9 @@ fn build_pipeline( // VIDEO_CAP_10BIT + host opted in via PUNKTFUNK_10BIT) is our HDR path → BT.2020 PQ Rgb10a2; // otherwise the FP16 IDD frames are converted to 8-bit SDR. (Ignored by non-IDD-push backends, // which auto-detect HDR from the monitor state.) - let mut capturer = crate::capture::capture_virtual_output(vout, plan.hdr, plan.capture) - .context("capture virtual output")?; + let mut capturer = + crate::capture::capture_virtual_output(vout, plan.output_format(), plan.capture) + .context("capture virtual output")?; capturer.set_active(true); let frame = capturer.next_frame().context("first frame")?; // `bit_depth` is the handshake-negotiated value (8, or 10 = HEVC Main10 when the client diff --git a/crates/punktfunk-host/src/session_plan.rs b/crates/punktfunk-host/src/session_plan.rs index 2c7fa32..1e83aa8 100644 --- a/crates/punktfunk-host/src/session_plan.rs +++ b/crates/punktfunk-host/src/session_plan.rs @@ -86,6 +86,14 @@ pub enum EncoderBackend { Software, } +impl EncoderBackend { + /// True if this backend encodes on the GPU (so the capturer should produce GPU-resident frames). Only + /// the software encoder takes CPU staging; `PlatformAuto` (Linux NVENC/VAAPI) is always GPU. + pub fn is_gpu(self) -> bool { + !matches!(self, EncoderBackend::Software) + } +} + /// The per-session decision, resolved once. `Copy` so it threads through the capture/encode chain /// without ceremony (stage 4 folds it, with the rest of the arg soup, into a `SessionContext`). #[derive(Clone, Copy, Debug)] @@ -111,6 +119,16 @@ impl SessionPlan { hdr: bit_depth >= 10, } } + + /// The capturer's target output format (Goal-1 stage 5): `gpu` from the already-resolved `encoder` + /// (no second backend probe), `hdr` from the plan. Handed into `capture::capture_virtual_output` so the + /// capturer never re-derives the encode backend. + pub fn output_format(&self) -> crate::capture::OutputFormat { + crate::capture::OutputFormat { + gpu: self.encoder.is_gpu(), + hdr: self.hdr, + } + } } /// Process topology. On Windows this is the former `punktfunk1::should_use_helper` logic verbatim; on diff --git a/crates/punktfunk-host/src/spike.rs b/crates/punktfunk-host/src/spike.rs index 53bd1b2..cdfa07f 100644 --- a/crates/punktfunk-host/src/spike.rs +++ b/crates/punktfunk-host/src/spike.rs @@ -78,7 +78,7 @@ pub fn run(opts: Options) -> Result<()> { .context("create virtual output")?; capture::capture_virtual_output( vout, - false, + capture::OutputFormat::resolve(false), crate::session_plan::CaptureBackend::resolve(), ) .context("capture virtual output")? diff --git a/docs/windows-host-goal1-plan.md b/docs/windows-host-goal1-plan.md index 3866890..db62223 100644 --- a/docs/windows-host-goal1-plan.md +++ b/docs/windows-host-goal1-plan.md @@ -91,11 +91,24 @@ explicit), and it is the validated shipping path. Wrapping the deployed reconfig the concrete, safe arg-bundling. Risk: low (behavior-identical). Verify: Linux + box build (the relay destructure is the only Windows-only piece); the teardown on-glass gate moves to the §2.5 work. -**Stage 5 — seam-trait tightenings (plan §2.3).** -`Capturer::open_capturer(vout, want: OutputFormat)` takes the format IN (kills the -`capture → encode::windows_resolved_backend()` back-reference recomputed in `dxgi.rs`); HDR/release become -`VirtualLease` methods (session glue names no concrete backend, contains no `unsafe`); optional encoder -features move to `EncoderCaps`. Risk: medium. Verify: box build + on-glass. +**Stage 5 — seam-trait tightenings (plan §2.3). 🟡 Tightening 1 ✅ DONE (box-build validated); 2→§2.5, 3 follow-on.** +The three §2.3 tightenings have different coupling, so they split: +- **(1) `OutputFormat` into the capturer ✅** — the headline (the explicit Stage-3 deferral; §5's + "highest-severity coupling"). New `capture::OutputFormat { gpu, hdr }`, resolved once per session and + passed **into** `capture_virtual_output` (`SessionPlan::output_format()` for the native path — + `gpu = encoder.is_gpu()`, no second probe; `OutputFormat::resolve()` for the GameStream/spike paths). + `dxgi::DuplCapturer::open` takes `gpu` in and **its `windows_resolved_backend()` recompute is deleted** — + capture no longer re-derives the encode backend. Behavior-preserving (the `gpu` passed in equals the value + the capturer used to compute). Linux + box-build clean. +- **(2) HDR/release → `VirtualLease`** — **moved to §2.5.** `await_released` as a lease method needs the + monitor-generation carried *on the lease* (today it's the `CURRENT_MON_GEN` global + the + `sudovda::wait_for_monitor_released` free fn), and the keepalive becoming `Box` is the + ownership-model change. It belongs with the `VirtualDisplayManager`/`MonitorLease` work, not bolted on here. +- **(3) `EncoderCaps`** — small additive follow-on (query optional encoder capabilities instead of default + no-ops); not blocking. Tracked for the next seam pass. + +Risk: medium (Tightening 1 is behavior-preserving + Windows-only → box-compile is the gate; on-glass parity is +the same env-limited story as Stage 3). **Stage 6 — `src/windows/` tree (cfg-sprawl confinement, plan §2.2).** Move the Windows backends under `src/windows/` + `capture/windows/`, `encode/windows/`, `inject/windows/`,