diff --git a/crates/punktfunk-host/Cargo.toml b/crates/punktfunk-host/Cargo.toml index 7a26611..f8393e2 100644 --- a/crates/punktfunk-host/Cargo.toml +++ b/crates/punktfunk-host/Cargo.toml @@ -141,6 +141,9 @@ windows = { version = "0.62", features = [ "Win32_System_Threading", "Win32_System_Pipes", "Win32_System_Environment", + # Force-composed-flip overlay: a topmost layered window on the Winlogon desktop disqualifies the + # secure desktop's fullscreen independent-flip so Desktop Duplication can capture it. + "Win32_System_LibraryLoader", ] } # Software H.264 encoder (GPU-less path + NVENC fallback). The default `source` feature statically # compiles OpenH264 (BSD-2) — no system lib, builds on MSVC; nasm on PATH adds the SIMD fast path. diff --git a/crates/punktfunk-host/src/capture.rs b/crates/punktfunk-host/src/capture.rs index 5532bb8..a964da8 100644 --- a/crates/punktfunk-host/src/capture.rs +++ b/crates/punktfunk-host/src/capture.rs @@ -320,6 +320,8 @@ pub fn capture_virtual_output(_vout: crate::vdisplay::VirtualOutput) -> Result, +} + +impl ForceComposedFlip { + /// Start the overlay (no-op + `None` if disabled via `PUNKTFUNK_FORCE_COMPOSED=0`). + pub fn start() -> Option { + if std::env::var("PUNKTFUNK_FORCE_COMPOSED").as_deref() == Ok("0") { + tracing::info!("force-composed-flip overlay disabled (PUNKTFUNK_FORCE_COMPOSED=0)"); + return None; + } + let stop = Arc::new(AtomicBool::new(false)); + let st = stop.clone(); + std::thread::Builder::new() + .name("composed-flip".into()) + .spawn(move || unsafe { run(st) }) + .ok()?; + tracing::info!("force-composed-flip overlay started (Winlogon-aware)"); + Some(ForceComposedFlip { stop }) + } +} + +impl Drop for ForceComposedFlip { + fn drop(&mut self) { + self.stop.store(true, Ordering::Relaxed); + } +} + +extern "system" fn wndproc(hwnd: HWND, msg: u32, wp: WPARAM, lp: LPARAM) -> LRESULT { + unsafe { DefWindowProcW(hwnd, msg, wp, lp) } +} + +/// Read the current input-desktop name (e.g. "Default" / "Winlogon"); `None` if it can't be read. +unsafe fn input_desktop_name() -> Option { + let desk = OpenInputDesktop( + DESKTOP_CONTROL_FLAGS(0), + false, + DESKTOP_ACCESS_FLAGS(0x0001), + ) + .ok()?; + let mut buf = [0u16; 64]; + let mut needed = 0u32; + let ok = GetUserObjectInformationW( + windows::Win32::Foundation::HANDLE(desk.0), + UOI_NAME, + Some(buf.as_mut_ptr() as *mut _), + (buf.len() * 2) as u32, + Some(&mut needed), + ) + .is_ok(); + let _ = CloseDesktop(desk); + if !ok { + return None; + } + Some( + String::from_utf16_lossy(&buf) + .trim_end_matches('\u{0}') + .to_string(), + ) +} + +/// Create the tiny topmost layered click-through window on the CURRENT thread's desktop. Caller must +/// have `SetThreadDesktop`'d to the target input desktop first. +unsafe fn make_overlay() -> Option { + let hinst = GetModuleHandleW(None).ok()?; + let class = w!("PunktfunkComposedFlip"); + // RegisterClassW is idempotent-ish: a second register for the same name fails harmlessly; we + // ignore the result and rely on the class existing. (One process, so it registers once.) + let wc = WNDCLASSW { + lpfnWndProc: Some(wndproc), + hInstance: hinst.into(), + lpszClassName: class, + ..Default::default() + }; + let atom = RegisterClassW(&wc); + if atom == 0 { + let e = windows::Win32::Foundation::GetLastError(); + // 1410 = ERROR_CLASS_ALREADY_EXISTS is fine (re-register after a desktop switch). + if e.0 != 1410 { + tracing::warn!(err = e.0, "force-composed-flip: RegisterClassW failed"); + } + } + let hwnd = match CreateWindowExW( + WS_EX_LAYERED | WS_EX_TRANSPARENT | WS_EX_TOPMOST | WS_EX_NOACTIVATE | WS_EX_TOOLWINDOW, + class, + w!(""), + WS_POPUP, + 0, + 0, + 1, + 1, + None, + None, + Some(hinst.into()), + None, + ) { + Ok(h) => h, + Err(e) => { + let le = windows::Win32::Foundation::GetLastError(); + tracing::warn!(err = %format!("{e:?}"), last = le.0, + "force-composed-flip: CreateWindowExW failed"); + return None; + } + }; + // alpha=1: technically visible (so it disqualifies independent-flip) but imperceptible. + let _ = SetLayeredWindowAttributes(hwnd, windows::Win32::Foundation::COLORREF(0), 1, LWA_ALPHA); + let _ = ShowWindow(hwnd, SW_SHOWNOACTIVATE); + let _ = SetWindowPos( + hwnd, + Some(HWND_TOPMOST), + 0, + 0, + 0, + 0, + SWP_NOMOVE | SWP_NOSIZE | SWP_NOACTIVATE, + ); + Some(hwnd) +} + +unsafe fn run(stop: Arc) { + let mut cur_desktop: Option = None; + let mut hwnd: Option = None; + let mut ticks: u32 = 0; + while !stop.load(Ordering::Relaxed) { + // Follow the input desktop: if it changed (Default↔Winlogon), re-attach this thread and + // recreate the window there (a window is bound to the desktop it was created on). + let name = input_desktop_name(); + if name != cur_desktop { + if let Some(h) = hwnd.take() { + let _ = DestroyWindow(h); + } + if let Ok(desk) = OpenInputDesktop( + DESKTOP_CONTROL_FLAGS(0), + false, + DESKTOP_ACCESS_FLAGS(0x1000_0000), // GENERIC_ALL (incl. DESKTOP_CREATEWINDOW=0x0002) + ) { + if SetThreadDesktop(desk).is_ok() { + hwnd = make_overlay(); + tracing::info!(desktop = ?name, created = hwnd.is_some(), + "force-composed-flip: overlay (re)created on input desktop"); + } + // Leak `desk` while it's the thread desktop (closing the current thread desktop is UB). + } + cur_desktop = name; + } + // Re-assert topmost periodically (other windows on the secure desktop can push us down) and + // pump our message queue so the window stays responsive/composited. + if let Some(h) = hwnd { + let _ = SetWindowPos( + h, + Some(HWND_TOPMOST), + 0, + 0, + 0, + 0, + SWP_NOMOVE | SWP_NOSIZE | SWP_NOACTIVATE, + ); + let mut msg = MSG::default(); + while PeekMessageW(&mut msg, Some(h), 0, 0, PM_REMOVE).as_bool() { + let _ = TranslateMessage(&msg); + DispatchMessageW(&msg); + } + } + ticks = ticks.wrapping_add(1); + let _ = ticks; + std::thread::sleep(std::time::Duration::from_millis(200)); + } + if let Some(h) = hwnd.take() { + let _ = DestroyWindow(h); + } + tracing::info!("force-composed-flip overlay stopped"); +} diff --git a/crates/punktfunk-host/src/m3.rs b/crates/punktfunk-host/src/m3.rs index 278f218..46aacc0 100644 --- a/crates/punktfunk-host/src/m3.rs +++ b/crates/punktfunk-host/src/m3.rs @@ -2345,6 +2345,10 @@ fn virtual_stream_relay( // The authoritative Default↔Winlogon signal (requires SYSTEM to read the Winlogon desktop name). let watcher = crate::capture::desktop_watch::DesktopWatcher::start(); + // Keep a force-composed-flip overlay alive on the input desktop so the SECURE desktop (which + // otherwise presents via fullscreen independent-flip → DDA gets born-lost ACCESS_LOST / black) is + // forced into DWM composition and becomes capturable. Held for the stream's lifetime. + let _composed_flip = crate::capture::composed_flip::ForceComposedFlip::start(); // Test hook: PUNKTFUNK_SECURE_TEST_PERIOD_MS=N drives a square-wave secure/normal toggle every N ms // instead of the real watcher — exercises the mid-session helper↔DDA mux without a live UAC/lock // (the real Winlogon DDA capture is already proven by the single-process secure path). diff --git a/crates/punktfunk-host/src/vdisplay/sudovda.rs b/crates/punktfunk-host/src/vdisplay/sudovda.rs index 779f43d..869b721 100644 --- a/crates/punktfunk-host/src/vdisplay/sudovda.rs +++ b/crates/punktfunk-host/src/vdisplay/sudovda.rs @@ -49,6 +49,7 @@ const fn ctl(func: u32) -> u32 { } const IOCTL_ADD: u32 = ctl(0x800); const IOCTL_REMOVE: u32 = ctl(0x801); +const IOCTL_SET_RENDER_ADAPTER: u32 = ctl(0x802); // == 0x0022_2008 const IOCTL_GET_WATCHDOG: u32 = ctl(0x803); const IOCTL_DRIVER_PING: u32 = ctl(0x888); const IOCTL_GET_VERSION: u32 = ctl(0x8FF); @@ -76,6 +77,82 @@ struct AddOut { target_id: u32, } +// SET_RENDER_ADAPTER input — byte-identical to SudoVDA's `{ LUID AdapterLuid; }` (8 bytes). The +// windows `LUID` is `{ LowPart: u32, HighPart: i32 }` == the C `LUID`, so `#[repr(C)]` is exact. +#[repr(C)] +#[derive(Clone, Copy)] +struct SetRenderAdapterParams { + luid: LUID, +} + +/// Pin the SudoVDA IDD's RENDER GPU to `luid` (Apollo's `SetRenderAdapter`). No output buffer. MUST be +/// issued on the driver handle BEFORE `IOCTL_ADD` to steer which GPU the new target renders on — on a +/// multi-adapter box (SudoVDA IDD + a discrete GPU) this stops DXGI from reparenting the virtual +/// output onto a different adapter than the one we duplicate/encode on (the ACCESS_LOST storm). +unsafe fn set_render_adapter(h: HANDLE, luid: LUID) -> Result<()> { + let p = SetRenderAdapterParams { luid }; + let bytes = std::slice::from_raw_parts( + &p as *const _ as *const u8, + size_of::(), + ); + let mut none: [u8; 0] = []; + ioctl(h, IOCTL_SET_RENDER_ADAPTER, bytes, &mut none) + .map(|_| ()) + .context("SudoVDA SET_RENDER_ADAPTER") +} + +/// Resolve the LUID of the GPU that should RENDER the virtual display = the GPU that drives NVENC + +/// Desktop Duplication (e.g. the RTX 4090). Default: the discrete adapter with the most +/// `DedicatedVideoMemory`, skipping WARP / Basic-Render and the SudoVDA software adapter (≈0 VRAM). +/// `PUNKTFUNK_RENDER_ADAPTER=` forces a match by Description (Apollo's `adapter_name`). +unsafe fn resolve_render_adapter_luid() -> Option { + use windows::Win32::Graphics::Dxgi::{CreateDXGIFactory1, IDXGIFactory1}; + let want = std::env::var("PUNKTFUNK_RENDER_ADAPTER") + .ok() + .filter(|s| !s.is_empty()); + let factory: IDXGIFactory1 = CreateDXGIFactory1().ok()?; + let mut best: Option<(LUID, u64, String)> = None; + let mut i = 0u32; + while let Ok(a) = factory.EnumAdapters1(i) { + i += 1; + let Ok(d) = a.GetDesc1() else { continue }; + let name = String::from_utf16_lossy(&d.Description); + let name = name.trim_end_matches('\u{0}').to_string(); + let lname = name.to_ascii_lowercase(); + if lname.contains("basic render") || lname.contains("warp") { + continue; // never pin to the software rasterizer + } + if let Some(w) = &want { + if lname.contains(&w.to_ascii_lowercase()) { + tracing::info!( + adapter = name, + "render adapter chosen by PUNKTFUNK_RENDER_ADAPTER" + ); + return Some(d.AdapterLuid); + } + continue; + } + let vram = d.DedicatedVideoMemory as u64; // SudoVDA software adapter ≈ 0 → loses to the dGPU + if best.as_ref().map_or(true, |(_, v, _)| vram > *v) { + best = Some((d.AdapterLuid, vram, name)); + } + } + match best { + Some((luid, vram, name)) => { + tracing::info!( + adapter = name, + vram_mb = vram / (1024 * 1024), + "render adapter chosen (max VRAM)" + ); + Some(luid) + } + None => { + tracing::warn!("no suitable render adapter found for SET_RENDER_ADAPTER"); + None + } + } +} + #[repr(C)] struct RemoveParams { guid: GUID, @@ -457,6 +534,22 @@ impl VirtualDisplay for SudoVdaDisplay { device_name, serial: [0u8; 14], }; + // Pin the IDD's RENDER GPU to the NVENC/capture GPU (e.g. the 4090) BEFORE adding the target. + // On a multi-adapter box (SudoVDA IDD + discrete GPU) DXGI otherwise reparents the virtual + // output onto whichever GPU its hybrid-preference path resolves, which storms ACCESS_LOST + // (0x887A0026) on the secure/HDR desktop. Apollo's SET_RENDER_ADAPTER fixes this and MUST be + // issued before ADD. Best-effort: a driver that rejects it just keeps the default render GPU. + let pinned = unsafe { resolve_render_adapter_luid() }; + if let Some(luid) = pinned { + match unsafe { set_render_adapter(self.device, luid) } { + Ok(()) => tracing::info!( + luid = format!("{:08x}:{:08x}", luid.HighPart, luid.LowPart), + "SudoVDA SET_RENDER_ADAPTER: pinned IDD render GPU" + ), + Err(e) => tracing::warn!("SudoVDA SET_RENDER_ADAPTER failed (continuing): {e:#}"), + } + } + let add_bytes = unsafe { std::slice::from_raw_parts(&add as *const _ as *const u8, size_of::()) }; @@ -476,6 +569,17 @@ impl VirtualDisplay for SudoVdaDisplay { ao.target_id, ao.luid.LowPart ); + if let Some(luid) = pinned { + if ao.luid.LowPart == luid.LowPart && ao.luid.HighPart == luid.HighPart { + tracing::info!("SudoVDA ADD render adapter matches the pinned GPU (pin took)"); + } else { + tracing::warn!( + add = format!("{:08x}:{:08x}", ao.luid.HighPart, ao.luid.LowPart), + pinned = format!("{:08x}:{:08x}", luid.HighPart, luid.LowPart), + "SudoVDA ADD render adapter DIFFERS from pinned — driver ignored SET_RENDER_ADAPTER?" + ); + } + } // Mandatory keepalive: ping inside the watchdog window or the driver tears all displays down. let stop = Arc::new(AtomicBool::new(false));