From 9c2499fd45de8f3ad5a3062affed70b3bea0e74d Mon Sep 17 00:00:00 2001 From: enricobuehler Date: Mon, 15 Jun 2026 01:06:21 +0000 Subject: [PATCH] feat(host/windows): DXGI Desktop Duplication capture backend Windows Capturer via DXGI Desktop Duplication: create a D3D11 device on the SudoVDA adapter (by LUID), find the matching output (by GDI name), DuplicateOutput, and per AcquireNextFrame copy the desktop into a CPU-readable staging texture -> tightly-packed BGRA (FramePayload::Cpu, feeds the openh264 software encoder GPU-lessly). Handles WAIT_TIMEOUT (reuse last frame) and ACCESS_LOST (re-duplicate). Adds FramePayload::D3d11(D3d11Frame) for the future NVENC zero-copy path, and a VirtualOutput.win_capture identity (adapter LUID + GDI name) carried out of the SudoVDA backend. Pure helpers (pack_luid/gdi_name_matches/depad_bgra) unit-tested on the VM; the live duplication path needs a real GPU + an activated SudoVDA monitor. Compiles clean on Windows + Linux. Co-Authored-By: Claude Opus 4.8 (1M context) --- crates/punktfunk-host/Cargo.toml | 5 + crates/punktfunk-host/src/capture.rs | 18 +- crates/punktfunk-host/src/capture/dxgi.rs | 355 ++++++++++++++++++ crates/punktfunk-host/src/vdisplay.rs | 4 + crates/punktfunk-host/src/vdisplay/sudovda.rs | 4 + 5 files changed, 384 insertions(+), 2 deletions(-) create mode 100644 crates/punktfunk-host/src/capture/dxgi.rs diff --git a/crates/punktfunk-host/Cargo.toml b/crates/punktfunk-host/Cargo.toml index cedfa43..a2d1410 100644 --- a/crates/punktfunk-host/Cargo.toml +++ b/crates/punktfunk-host/Cargo.toml @@ -116,6 +116,11 @@ windows = { version = "0.62", features = [ "Win32_UI_Input_KeyboardAndMouse", "Win32_UI_WindowsAndMessaging", "Win32_System_StationsAndDesktops", + "Win32_Graphics_Dxgi", + "Win32_Graphics_Dxgi_Common", + "Win32_Graphics_Direct3D", + "Win32_Graphics_Direct3D11", + "Win32_Graphics_Gdi", ] } # Software H.264 encoder (GPU-less path + NVENC fallback). The default `source` feature statically # compiles OpenH264 (BSD-2) — no system lib, builds on MSVC; nasm on PATH adds the SIMD fast path. diff --git a/crates/punktfunk-host/src/capture.rs b/crates/punktfunk-host/src/capture.rs index d5e58fc..527938d 100644 --- a/crates/punktfunk-host/src/capture.rs +++ b/crates/punktfunk-host/src/capture.rs @@ -53,6 +53,9 @@ pub enum FramePayload { /// dmabuf has already been imported + copied into this owned device buffer. #[cfg(target_os = "linux")] Cuda(crate::zerocopy::DeviceBuffer), + /// A GPU-resident D3D11 texture (Windows zero-copy path for NVENC). Owns the copied frame. + #[cfg(target_os = "windows")] + D3d11(dxgi::D3d11Frame), } impl CapturedFrame { @@ -251,10 +254,21 @@ pub fn capture_virtual_output(vout: crate::vdisplay::VirtualOutput) -> Result) } -#[cfg(not(target_os = "linux"))] +#[cfg(target_os = "windows")] +pub fn capture_virtual_output(vout: crate::vdisplay::VirtualOutput) -> Result> { + let target = vout.win_capture.clone().ok_or_else(|| { + anyhow::anyhow!("SudoVDA target not yet an active display (needs a WDDM GPU to activate it)") + })?; + dxgi::DuplCapturer::open(target, vout.preferred_mode, vout.keepalive) + .map(|c| Box::new(c) as Box) +} + +#[cfg(not(any(target_os = "linux", target_os = "windows")))] pub fn capture_virtual_output(_vout: crate::vdisplay::VirtualOutput) -> Result> { - anyhow::bail!("virtual-output capture requires Linux") + anyhow::bail!("virtual-output capture requires Linux or Windows") } #[cfg(target_os = "linux")] mod linux; +#[cfg(target_os = "windows")] +pub mod dxgi; diff --git a/crates/punktfunk-host/src/capture/dxgi.rs b/crates/punktfunk-host/src/capture/dxgi.rs new file mode 100644 index 0000000..4f028f5 --- /dev/null +++ b/crates/punktfunk-host/src/capture/dxgi.rs @@ -0,0 +1,355 @@ +//! DXGI Desktop Duplication capture (Windows) — the analogue of the PipeWire portal capturer. +//! Creates a D3D11 device on the SudoVDA adapter (by LUID), finds the matching output (by GDI +//! name), duplicates it, and on each `AcquireNextFrame` copies the desktop image into a CPU-readable +//! staging texture → tightly-packed BGRA (the GPU-less path that feeds the software encoder). A +//! future zero-copy path returns `FramePayload::D3d11` for NVENC. +//! +//! Validates only with a real GPU + an *activated* SudoVDA monitor (`DuplicateOutput` needs a live +//! WDDM output). Compiles on the GPU-less VM; the pure helpers are unit-tested there. + +use super::{CapturedFrame, Capturer, FramePayload, PixelFormat}; +use anyhow::{anyhow, Context, Result}; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH}; +use windows::core::Interface; +use windows::Win32::Foundation::{HMODULE, LUID}; +use windows::Win32::Graphics::Direct3D::{D3D_DRIVER_TYPE_UNKNOWN, D3D_FEATURE_LEVEL_11_0}; +use windows::Win32::Graphics::Direct3D11::{ + D3D11CreateDevice, ID3D11Device, ID3D11DeviceContext, ID3D11Texture2D, D3D11_BIND_FLAG, + D3D11_CPU_ACCESS_READ, D3D11_CREATE_DEVICE_BGRA_SUPPORT, D3D11_MAPPED_SUBRESOURCE, D3D11_MAP_READ, + D3D11_SDK_VERSION, D3D11_TEXTURE2D_DESC, D3D11_USAGE_STAGING, +}; +use windows::Win32::Graphics::Dxgi::Common::{DXGI_FORMAT_B8G8R8A8_UNORM, DXGI_SAMPLE_DESC}; +use windows::Win32::Graphics::Dxgi::{ + CreateDXGIFactory1, IDXGIAdapter1, IDXGIFactory1, IDXGIOutput1, IDXGIOutputDuplication, + IDXGIResource, DXGI_ERROR_ACCESS_LOST, DXGI_ERROR_WAIT_TIMEOUT, DXGI_OUTDUPL_DESC, + DXGI_OUTDUPL_FRAME_INFO, +}; + +/// The Windows capture identity carried out of the SudoVDA backend in +/// [`crate::vdisplay::VirtualOutput`]: which adapter + which GDI output to duplicate. +#[derive(Clone, Debug)] +pub struct WinCaptureTarget { + /// Packed DXGI adapter LUID (`(HighPart << 32) | (LowPart & 0xffff_ffff)`). + pub adapter_luid: i64, + /// The output's GDI device name, e.g. `\\.\DISPLAY3`. + pub gdi_name: String, +} + +/// A GPU-resident captured texture (future NVENC-D3D11 zero-copy path). +pub struct D3d11Frame { + pub texture: ID3D11Texture2D, + pub device: ID3D11Device, +} +// COM pointers, used only from the single owning thread. +unsafe impl Send for D3d11Frame {} + +pub fn pack_luid(luid: LUID) -> i64 { + ((luid.HighPart as i64) << 32) | (luid.LowPart as i64 & 0xffff_ffff) +} + +/// Does a fixed-size UTF-16 GDI device name (NUL-padded, e.g. `DXGI_OUTPUT_DESC::DeviceName`) +/// equal `target`? +fn gdi_name_matches(name16: &[u16], target: &str) -> bool { + let s = String::from_utf16_lossy(name16); + s.trim_end_matches('\u{0}') == target +} + +/// Copy a row-padded BGRA surface (`pitch` >= `w*4`) into a tightly-packed `w*4*h` buffer. +fn depad_bgra(src: &[u8], pitch: usize, w: usize, h: usize) -> Vec { + let row = w * 4; + let mut out = vec![0u8; row * h]; + for y in 0..h { + out[y * row..y * row + row].copy_from_slice(&src[y * pitch..y * pitch + row]); + } + out +} + +pub struct DuplCapturer { + device: ID3D11Device, + context: ID3D11DeviceContext, + output: IDXGIOutput1, + dupl: IDXGIOutputDuplication, + width: u32, + height: u32, + refresh_hz: u32, + staging: Option, + holding_frame: bool, + active: AtomicBool, + timeout_ms: u32, + last: Option>, + _keepalive: Box, +} +// COM objects used only from the one thread that owns the capturer (the encode thread). +unsafe impl Send for DuplCapturer {} + +impl DuplCapturer { + pub fn open( + target: WinCaptureTarget, + preferred: Option<(u32, u32, u32)>, + keepalive: Box, + ) -> Result { + unsafe { + let factory: IDXGIFactory1 = CreateDXGIFactory1().context("CreateDXGIFactory1")?; + // 1) the adapter whose LUID matches SudoVDA's AddOut.luid. + let mut adapter: Option = None; + let mut i = 0u32; + while let Ok(a) = factory.EnumAdapters1(i) { + let d = a.GetDesc1()?; + if pack_luid(d.AdapterLuid) == target.adapter_luid { + adapter = Some(a); + break; + } + i += 1; + } + let adapter = adapter.context("no DXGI adapter matches the SudoVDA LUID")?; + // 2) D3D11 device ON that adapter (driver_type MUST be UNKNOWN with an explicit adapter). + let mut device: Option = None; + let mut context: Option = None; + D3D11CreateDevice( + &adapter, + D3D_DRIVER_TYPE_UNKNOWN, + HMODULE::default(), + D3D11_CREATE_DEVICE_BGRA_SUPPORT, + Some(&[D3D_FEATURE_LEVEL_11_0]), + D3D11_SDK_VERSION, + Some(&mut device), + None, + Some(&mut context), + ) + .context("D3D11CreateDevice")?; + let device = device.context("null D3D11 device")?; + let context = context.context("null D3D11 context")?; + // 3) the output (monitor) whose GDI DeviceName matches. + let mut out1: Option = None; + let mut j = 0u32; + while let Ok(o) = adapter.EnumOutputs(j) { + let od = o.GetDesc()?; + if gdi_name_matches(&od.DeviceName, &target.gdi_name) { + out1 = Some(o.cast::()?); + break; + } + j += 1; + } + let output = out1 + .with_context(|| format!("adapter has no output named {}", target.gdi_name))?; + // 4) duplicate the output. + let dupl = output + .DuplicateOutput(&device) + .context("DuplicateOutput (already duplicated by another app?)")?; + let dd: DXGI_OUTDUPL_DESC = dupl.GetDesc(); + let (width, height) = (dd.ModeDesc.Width, dd.ModeDesc.Height); + let refresh_hz = preferred + .map(|(_, _, hz)| hz) + .filter(|&hz| hz > 0) + .unwrap_or_else(|| { + let r = dd.ModeDesc.RefreshRate; + if r.Denominator > 0 { + (r.Numerator / r.Denominator).max(1) + } else { + 60 + } + }); + let timeout_ms = std::env::var("PUNKTFUNK_CAPTURE_TIMEOUT_MS") + .ok() + .and_then(|s| s.parse().ok()) + .unwrap_or((2000 / refresh_hz.max(1)).max(100)); + tracing::info!( + "DXGI duplication: {}x{}@{} on {}", + width, + height, + refresh_hz, + target.gdi_name + ); + Ok(Self { + device, + context, + output, + dupl, + width, + height, + refresh_hz, + staging: None, + holding_frame: false, + active: AtomicBool::new(false), + timeout_ms, + last: None, + _keepalive: keepalive, + }) + } + } + + unsafe fn ensure_staging(&mut self) -> Result<()> { + if self.staging.is_some() { + return Ok(()); + } + let desc = D3D11_TEXTURE2D_DESC { + Width: self.width, + Height: self.height, + MipLevels: 1, + ArraySize: 1, + Format: DXGI_FORMAT_B8G8R8A8_UNORM, + SampleDesc: DXGI_SAMPLE_DESC { + Count: 1, + Quality: 0, + }, + Usage: D3D11_USAGE_STAGING, + BindFlags: D3D11_BIND_FLAG(0).0 as u32, + CPUAccessFlags: D3D11_CPU_ACCESS_READ.0 as u32, + MiscFlags: 0, + }; + let mut t: Option = None; + self.device + .CreateTexture2D(&desc, None, Some(&mut t)) + .context("CreateTexture2D(staging)")?; + self.staging = t; + Ok(()) + } + + unsafe fn recreate_dupl(&mut self) -> Result<()> { + if self.holding_frame { + let _ = self.dupl.ReleaseFrame(); + self.holding_frame = false; + } + self.dupl = self + .output + .DuplicateOutput(&self.device) + .context("re-DuplicateOutput after ACCESS_LOST")?; + Ok(()) + } + + /// Acquire one frame: `Some` on a fresh image, `None` on timeout (no change → caller reuses last). + unsafe fn acquire(&mut self) -> Result> { + if self.holding_frame { + let _ = self.dupl.ReleaseFrame(); + self.holding_frame = false; + } + let mut info = DXGI_OUTDUPL_FRAME_INFO::default(); + let mut res: Option = None; + match self.dupl.AcquireNextFrame(self.timeout_ms, &mut info, &mut res) { + Ok(()) => {} + Err(e) if e.code() == DXGI_ERROR_WAIT_TIMEOUT => return Ok(None), + Err(e) if e.code() == DXGI_ERROR_ACCESS_LOST => { + self.recreate_dupl()?; + return Ok(None); + } + Err(e) => return Err(e).context("AcquireNextFrame"), + } + self.holding_frame = true; + let res = res.context("AcquireNextFrame: null resource")?; + let tex: ID3D11Texture2D = res.cast().context("resource -> Texture2D")?; + self.ensure_staging()?; + let staging = self.staging.clone().context("staging texture")?; + self.context.CopyResource(&staging, &tex); + let mut map = D3D11_MAPPED_SUBRESOURCE::default(); + self.context + .Map(&staging, 0, D3D11_MAP_READ, 0, Some(&mut map)) + .context("Map staging")?; + let (w, h) = (self.width as usize, self.height as usize); + let pitch = map.RowPitch as usize; + let src = std::slice::from_raw_parts(map.pData as *const u8, pitch * h); + let tight = depad_bgra(src, pitch, w, h); + self.context.Unmap(&staging, 0); + let _ = self.dupl.ReleaseFrame(); + self.holding_frame = false; + self.last = Some(tight.clone()); + Ok(Some(CapturedFrame { + width: self.width, + height: self.height, + pts_ns: now_ns(), + format: PixelFormat::Bgra, + payload: FramePayload::Cpu(tight), + })) + } +} + +fn now_ns() -> u64 { + SystemTime::now() + .duration_since(UNIX_EPOCH) + .map(|d| d.as_nanos() as u64) + .unwrap_or(0) +} + +impl Capturer for DuplCapturer { + fn next_frame(&mut self) -> Result { + let deadline = Instant::now() + Duration::from_secs(10); + loop { + if let Some(f) = unsafe { self.acquire() }? { + return Ok(f); + } + if let Some(b) = &self.last { + return Ok(CapturedFrame { + width: self.width, + height: self.height, + pts_ns: now_ns(), + format: PixelFormat::Bgra, + payload: FramePayload::Cpu(b.clone()), + }); + } + if Instant::now() > deadline { + return Err(anyhow!( + "no DXGI frame within 10s (SudoVDA monitor not activated by a WDDM GPU?)" + )); + } + } + } + + fn try_latest(&mut self) -> Result> { + unsafe { self.acquire() } + } + + fn set_active(&self, active: bool) { + self.active.store(active, Ordering::Relaxed); + } +} + +impl Drop for DuplCapturer { + fn drop(&mut self) { + if self.holding_frame { + unsafe { + let _ = self.dupl.ReleaseFrame(); + } + } + // _keepalive drops after, REMOVEing the SudoVDA monitor. + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn pack_luid_roundtrip() { + let l = LUID { + LowPart: 0x1234_5678, + HighPart: 0x0000_0009, + }; + assert_eq!(pack_luid(l), (0x9i64 << 32) | 0x1234_5678); + } + + #[test] + fn gdi_name_match() { + let mut buf = [0u16; 32]; + for (i, c) in r"\\.\DISPLAY3".encode_utf16().enumerate() { + buf[i] = c; + } + assert!(gdi_name_matches(&buf, r"\\.\DISPLAY3")); + assert!(!gdi_name_matches(&buf, r"\\.\DISPLAY1")); + } + + #[test] + fn depad_removes_row_padding() { + // 2x2 BGRA, pitch = 12 (row=8 + 4 pad bytes). + let pitch = 12; + let mut src = vec![0u8; pitch * 2]; + for y in 0..2 { + for x in 0..8 { + src[y * pitch + x] = (y * 8 + x) as u8; + } + } + let out = depad_bgra(&src, pitch, 2, 2); + assert_eq!(out.len(), 16); + assert_eq!(&out[0..8], &[0, 1, 2, 3, 4, 5, 6, 7]); + assert_eq!(&out[8..16], &[8, 9, 10, 11, 12, 13, 14, 15]); + } +} diff --git a/crates/punktfunk-host/src/vdisplay.rs b/crates/punktfunk-host/src/vdisplay.rs index 52884fe..7dab2af 100644 --- a/crates/punktfunk-host/src/vdisplay.rs +++ b/crates/punktfunk-host/src/vdisplay.rs @@ -35,6 +35,10 @@ pub struct VirtualOutput { /// gamescope outputs are created at the exact size, so this just confirms it; **Mutter sizes /// its virtual monitor FROM the negotiation**, so here it's what makes the client's mode real. pub preferred_mode: Option<(u32, u32, u32)>, + /// Windows capture identity (DXGI adapter LUID + GDI output name) for the SudoVDA backend — + /// what [`crate::capture::capture_virtual_output`] needs to duplicate the right output. + #[cfg(target_os = "windows")] + pub win_capture: Option, /// Keeps the output — and whatever connection/thread backs it — alive; dropped on teardown. pub keepalive: Box, } diff --git a/crates/punktfunk-host/src/vdisplay/sudovda.rs b/crates/punktfunk-host/src/vdisplay/sudovda.rs index 93d7c6c..249d68d 100644 --- a/crates/punktfunk-host/src/vdisplay/sudovda.rs +++ b/crates/punktfunk-host/src/vdisplay/sudovda.rs @@ -281,6 +281,10 @@ impl VirtualDisplay for SudoVdaDisplay { Ok(VirtualOutput { node_id: 0, // unused on Windows; the capture target is the GDI name below preferred_mode: Some((mode.width, mode.height, mode.refresh_hz)), + win_capture: gdi_name.clone().map(|n| crate::capture::dxgi::WinCaptureTarget { + adapter_luid: crate::capture::dxgi::pack_luid(ao.luid), + gdi_name: n, + }), keepalive: Box::new(SudoVdaKeepalive { device: device_raw, guid: MONITOR_GUID,