diff --git a/packaging/windows/drivers/pf-vdisplay/Cargo.toml b/packaging/windows/drivers/pf-vdisplay/Cargo.toml index 4d8c754..fb75014 100644 --- a/packaging/windows/drivers/pf-vdisplay/Cargo.toml +++ b/packaging/windows/drivers/pf-vdisplay/Cargo.toml @@ -24,3 +24,18 @@ wdk.workspace = true wdk-sys = { workspace = true, features = ["iddcx"] } wdk-iddcx.workspace = true pf-vdisplay-proto.workspace = true +# STEP 5: the swap-chain processor's render-side D3D11 device + worker. 0.58.0 matches the wdk-build +# transitive `windows` already in the workspace lock (one resolved version) AND the proven oracle's +# version, so the ported D3D/DXGI/threading calls compile verbatim. +thiserror = "2.0" + +[dependencies.windows] +version = "0.58.0" +features = [ + "Win32_Foundation", + "Win32_System_Threading", + "Win32_Graphics_Direct3D", + "Win32_Graphics_Direct3D11", + "Win32_Graphics_Dxgi", + "Win32_Graphics_Dxgi_Common", +] diff --git a/packaging/windows/drivers/pf-vdisplay/build.rs b/packaging/windows/drivers/pf-vdisplay/build.rs index be62abe..8187fea 100644 --- a/packaging/windows/drivers/pf-vdisplay/build.rs +++ b/packaging/windows/drivers/pf-vdisplay/build.rs @@ -45,7 +45,9 @@ fn link_iddcx_stub() { } } let Some((_, dir)) = best else { - panic!("IddCxStub.lib not found under any Windows Kits Lib\\\\um\\{ARCH}\\iddcx\\\\"); + panic!( + "IddCxStub.lib not found under any Windows Kits Lib\\\\um\\{ARCH}\\iddcx\\\\" + ); }; println!("cargo:rustc-link-search={}", dir.display()); println!("cargo:rustc-link-lib=static=IddCxStub"); diff --git a/packaging/windows/drivers/pf-vdisplay/src/adapter.rs b/packaging/windows/drivers/pf-vdisplay/src/adapter.rs index 65e46c3..08c571d 100644 --- a/packaging/windows/drivers/pf-vdisplay/src/adapter.rs +++ b/packaging/windows/drivers/pf-vdisplay/src/adapter.rs @@ -6,7 +6,7 @@ use std::sync::OnceLock; -use wdk_sys::{iddcx, NTSTATUS, WDFDEVICE}; +use wdk_sys::{NTSTATUS, WDFDEVICE, iddcx}; use crate::STATUS_SUCCESS; diff --git a/packaging/windows/drivers/pf-vdisplay/src/callbacks.rs b/packaging/windows/drivers/pf-vdisplay/src/callbacks.rs index 476609f..3825cc6 100644 --- a/packaging/windows/drivers/pf-vdisplay/src/callbacks.rs +++ b/packaging/windows/drivers/pf-vdisplay/src/callbacks.rs @@ -1,13 +1,14 @@ //! The IddCx client-config callbacks + the PnP `EvtDeviceD0Entry`. //! -//! STEP 2: stubs with the correct PFN signatures (so the config wires up + the driver loads); the real -//! mode/EDID logic (STEP 4), adapter init (STEP 3), and swap-chain handoff (STEP 5) fill them in. Every -//! callback is `unsafe extern "C"` to match the wdk-sys `PFN_IDD_CX_*` types; with `panic = "abort"` -//! (workspace profile) a panic across the FFI boundary aborts rather than being UB. `query_target_info` -//! is implemented now because it gates HDR (`HIGH_COLOR_SPACE`) and the adapter (STEP 3) sets FP16. +//! The mode/EDID logic (STEP 4), adapter init (STEP 3), and swap-chain handoff (STEP 5) are wired in; the +//! `*2`/HDR-metadata/gamma callbacks remain stubs (STEP 7). Every callback is `unsafe extern "C"` to match +//! the wdk-sys `PFN_IDD_CX_*` types; a panic unwinding across that `extern "C"` boundary aborts the process +//! (Rust >= 1.81 default) rather than being UB. (The swap-chain WORKER is a plain `thread::spawn`, so a +//! panic there only unwinds + ends that thread — it must not panic.) `query_target_info` is implemented +//! because it gates HDR (`HIGH_COLOR_SPACE`) and the adapter (STEP 3) sets FP16. use wdk_sys::iddcx; -use wdk_sys::{NTSTATUS, WDFDEVICE, WDFREQUEST}; +use wdk_sys::{NTSTATUS, WDFDEVICE, WDFOBJECT, WDFREQUEST, call_unsafe_wdf_function_binding}; use crate::{ STATUS_BUFFER_TOO_SMALL, STATUS_INVALID_PARAMETER, STATUS_NOT_FOUND, STATUS_NOT_IMPLEMENTED, @@ -178,16 +179,67 @@ pub unsafe extern "C" fn set_gamma_ramp( STATUS_SUCCESS } -/// A swap-chain was assigned to the monitor. STEP 5: spawn the `SwapChainProcessor`. +/// A swap-chain was assigned to the monitor. STEP 5: spawn the `SwapChainProcessor` that drains it (so +/// the monitor is a usable display). Always returns `STATUS_SUCCESS` — on D3D-init failure we delete the +/// swap-chain so the OS makes a fresh one and re-assigns (the oracle pattern). pub unsafe extern "C" fn assign_swap_chain( - _monitor: iddcx::IDDCX_MONITOR, - _p_in: *const iddcx::IDARG_IN_SETSWAPCHAIN, + monitor: iddcx::IDDCX_MONITOR, + p_in: *const iddcx::IDARG_IN_SETSWAPCHAIN, ) -> NTSTATUS { + // SAFETY: framework-provided in args, valid for the call. + let in_args = unsafe { &*p_in }; + let swap_chain = in_args.hSwapChain; + let render_adapter = in_args.RenderAdapterLuid; + let new_frame_event = in_args.hNextSurfaceAvailable; + + // wdk-sys LUID → windows-crate LUID (identical { LowPart: u32, HighPart: i32 } layout). The render + // adapter is the GPU the OS picked to render this virtual monitor; the pooled D3D device is keyed by + // it (relevant on a hybrid iGPU+dGPU box). + let luid = windows::Win32::Foundation::LUID { + LowPart: render_adapter.LowPart, + HighPart: render_adapter.HighPart, + }; + dbglog!( + "[pf-vd] assign_swap_chain: OS render adapter LUID = {:08x}:{:08x}", + render_adapter.HighPart, + render_adapter.LowPart + ); + + // FIRST drop any existing processor on this monitor (RAII-joins its worker), OUTSIDE the lock. + drop(crate::monitor::take_swap_chain_processor(monitor)); + + // The OS target id (stamped on the monitor at creation, after IddCxMonitorArrival) keys the + // per-monitor objects STEP 6's host opens. 0 (default) if the monitor isn't found. + let target_id = crate::monitor::target_id_for_object(monitor).unwrap_or(0); + + if let Some(device) = crate::direct_3d_device::pooled_device(luid) { + let mut processor = crate::swap_chain_processor::SwapChainProcessor::new(); + processor.run(swap_chain, device, new_frame_event, target_id); + // Install on the monitor; drop any processor it replaced (a race lost above) OUTSIDE the lock. + drop(crate::monitor::set_swap_chain_processor(monitor, processor)); + } else { + // D3D init failed: delete the swap-chain so the OS generates a fresh one + retries. + dbglog!( + "[pf-vd] assign_swap_chain: pooled Direct3DDevice unavailable — deleting swap-chain for OS retry" + ); + // SAFETY: `swap_chain` is the framework-provided IddCx swap-chain handle. + unsafe { + call_unsafe_wdf_function_binding!(WdfObjectDelete, swap_chain as WDFOBJECT); + } + } STATUS_SUCCESS } -/// The monitor went inactive. STEP 5: drop the processor (RAII joins the worker thread). -pub unsafe extern "C" fn unassign_swap_chain(_monitor: iddcx::IDDCX_MONITOR) -> NTSTATUS { +/// The monitor went inactive. STEP 5: drop the processor (RAII joins the worker thread, which deletes the +/// swap-chain object before returning). +pub unsafe extern "C" fn unassign_swap_chain(monitor: iddcx::IDDCX_MONITOR) -> NTSTATUS { + // Take + drop OUTSIDE any lock (the take releases `MONITOR_MODES` before the join). + let had = crate::monitor::take_swap_chain_processor(monitor); + dbglog!( + "[pf-vd] unassign_swap_chain — dropped live processor: {}", + had.is_some() + ); + drop(had); STATUS_SUCCESS } diff --git a/packaging/windows/drivers/pf-vdisplay/src/control.rs b/packaging/windows/drivers/pf-vdisplay/src/control.rs index d713629..51bc90d 100644 --- a/packaging/windows/drivers/pf-vdisplay/src/control.rs +++ b/packaging/windows/drivers/pf-vdisplay/src/control.rs @@ -7,7 +7,7 @@ use core::sync::atomic::{AtomicU64, Ordering}; use pf_vdisplay_proto::control; use wdk_iddcx::nt_success; -use wdk_sys::{call_unsafe_wdf_function_binding, NTSTATUS, WDFREQUEST}; +use wdk_sys::{NTSTATUS, WDFREQUEST, call_unsafe_wdf_function_binding}; use crate::{STATUS_INVALID_PARAMETER, STATUS_NOT_FOUND, STATUS_NOT_IMPLEMENTED, STATUS_SUCCESS}; @@ -149,6 +149,11 @@ fn complete(request: WDFREQUEST, status: NTSTATUS) { fn complete_info(request: WDFREQUEST, status: NTSTATUS, info: usize) { // SAFETY: completing hands the framework `WDFREQUEST` back to the OS. unsafe { - call_unsafe_wdf_function_binding!(WdfRequestCompleteWithInformation, request, status, info as u64) + call_unsafe_wdf_function_binding!( + WdfRequestCompleteWithInformation, + request, + status, + info as u64 + ) }; } diff --git a/packaging/windows/drivers/pf-vdisplay/src/direct_3d_device.rs b/packaging/windows/drivers/pf-vdisplay/src/direct_3d_device.rs new file mode 100644 index 0000000..4428b56 --- /dev/null +++ b/packaging/windows/drivers/pf-vdisplay/src/direct_3d_device.rs @@ -0,0 +1,139 @@ +//! The render-side D3D11 device the swap-chain processor binds to the IddCx swap-chain (STEP 5). +//! +//! Ported verbatim from the proven oracle (`packaging/windows/vdisplay-driver/pf-vdisplay/src/ +//! direct_3d_device.rs` + the `DEVICE_POOL`/`pooled_device` that lived in its `context.rs`). The +//! D3D/DXGI types are the `windows` crate (refcounted COM, no manual Drop); the swap-chain/LUID hand-off +//! to the wdk-sys IddCx world happens via raw pointers in `swap_chain_processor.rs`. +//! +//! STEP 5 only DRAINS the swap-chain to keep the monitor a live display — there is no frame publisher, +//! so the device's immediate context is unused here (it returns to use in STEP 6's `CopyResource`). + +use std::sync::atomic::{AtomicI32, Ordering}; +use std::sync::{Arc, Mutex}; + +use windows::{ + Win32::{ + Foundation::LUID, + Graphics::{ + Direct3D::D3D_DRIVER_TYPE_UNKNOWN, + Direct3D11::{ + D3D11_CREATE_DEVICE_BGRA_SUPPORT, + D3D11_CREATE_DEVICE_PREVENT_ALTERING_LAYER_SETTINGS_FROM_REGISTRY, + D3D11_CREATE_DEVICE_SINGLETHREADED, D3D11_SDK_VERSION, D3D11CreateDevice, + ID3D11Device, ID3D11DeviceContext, + }, + Dxgi::{CreateDXGIFactory2, DXGI_CREATE_FACTORY_FLAGS, IDXGIAdapter1, IDXGIFactory5}, + }, + }, + core::Error, +}; + +#[derive(thiserror::Error, Debug)] +pub enum Direct3DError { + #[error("Direct3DError({0:?})")] + Win32(#[from] Error), + #[error("Direct3DError(\"{0}\")")] + Other(&'static str), +} + +impl From<&'static str> for Direct3DError { + fn from(value: &'static str) -> Self { + Direct3DError::Other(value) + } +} + +/// DIAGNOSTIC: live `Direct3DDevice` count. Each one holds an `ID3D11Device` whose NVIDIA UMD spawns +/// ~dozens of worker threads; if this climbs without bound across reconnects, devices are leaking. +pub static LIVE_DEVICES: AtomicI32 = AtomicI32::new(0); + +#[derive(Debug)] +pub struct Direct3DDevice { + // The following are already refcounted, so they're safe to use directly without additional drop impls + _dxgi_factory: IDXGIFactory5, + _adapter: IDXGIAdapter1, + pub device: ID3D11Device, + /// The single (SINGLETHREADED) immediate context — used by STEP 6's frame-push publisher's + /// `CopyResource` on the swap-chain processor thread (the one thread this device is touched from). + /// Unused in STEP 5 (drain-only); kept so the device matches the oracle exactly. + #[allow(dead_code)] + pub device_context: ID3D11DeviceContext, +} + +impl Direct3DDevice { + pub fn init(adapter_luid: LUID) -> Result { + let dxgi_factory = + unsafe { CreateDXGIFactory2::(DXGI_CREATE_FACTORY_FLAGS(0))? }; + + let adapter = unsafe { dxgi_factory.EnumAdapterByLuid::(adapter_luid)? }; + + let mut device = None; + let mut device_context = None; + + unsafe { + D3D11CreateDevice( + &adapter, + D3D_DRIVER_TYPE_UNKNOWN, + None, + D3D11_CREATE_DEVICE_BGRA_SUPPORT + | D3D11_CREATE_DEVICE_SINGLETHREADED + | D3D11_CREATE_DEVICE_PREVENT_ALTERING_LAYER_SETTINGS_FROM_REGISTRY, + None, + D3D11_SDK_VERSION, + Some(&mut device), + None, + Some(&mut device_context), + )?; + } + + let device = device.ok_or("ID3D11Device not found")?; + let device_context = device_context.ok_or("ID3D11DeviceContext not found")?; + + let live = LIVE_DEVICES.fetch_add(1, Ordering::Relaxed) + 1; + dbglog!("[pf-vd] Direct3DDevice::init OK — live D3D devices = {live}"); + + Ok(Self { + _dxgi_factory: dxgi_factory, + _adapter: adapter, + device, + device_context, + }) + } +} + +impl Drop for Direct3DDevice { + fn drop(&mut self) { + let live = LIVE_DEVICES.fetch_sub(1, Ordering::Relaxed) - 1; + dbglog!("[pf-vd] Direct3DDevice::drop — live D3D devices = {live}"); + } +} + +/// ONE shared D3D render device, reused across every swap-chain assignment (keyed by render LUID). +/// Creating a fresh `Direct3DDevice` per assign — and the swap-chain flap fires several assigns per +/// session — spawned a new NVIDIA UMD worker-thread set each time that was NEVER reclaimed on release +/// (proven on the RTX box: ~70 `nvwgf2umx` threads + ~50 MB VRAM leaked per reconnect, permanently, +/// even though our `Direct3DDevice` refcount dropped to 0). Pooling one device keeps a single, stable +/// thread set: the processors borrow an `Arc`, so the device outlives them and is never re-created. +static DEVICE_POOL: Mutex)>> = Mutex::new(None); + +/// Get-or-create the pooled D3D device for `luid`. Re-creates only if the render adapter changes +/// (e.g. a GPU hot-swap), which drops the old `Arc` once its last processor releases it. +pub fn pooled_device(luid: LUID) -> Option> { + let key = (i64::from(luid.HighPart) << 32) | i64::from(luid.LowPart); + let mut pool = DEVICE_POOL.lock().ok()?; + if let Some((k, dev)) = pool.as_ref() { + if *k == key { + return Some(dev.clone()); + } + } + match Direct3DDevice::init(luid) { + Ok(d) => { + let a = Arc::new(d); + *pool = Some((key, a.clone())); + Some(a) + } + Err(e) => { + dbglog!("[pf-vd] pooled Direct3DDevice::init failed: {e:?}"); + None + } + } +} diff --git a/packaging/windows/drivers/pf-vdisplay/src/entry.rs b/packaging/windows/drivers/pf-vdisplay/src/entry.rs index a150b9f..1b86b23 100644 --- a/packaging/windows/drivers/pf-vdisplay/src/entry.rs +++ b/packaging/windows/drivers/pf-vdisplay/src/entry.rs @@ -5,9 +5,9 @@ use wdk_iddcx::nt_success; use wdk_sys::{ - call_unsafe_wdf_function_binding, iddcx, GUID, NTSTATUS, PCUNICODE_STRING, PDRIVER_OBJECT, - PWDFDEVICE_INIT, ULONG, WDFDEVICE, WDFDRIVER, WDF_DRIVER_CONFIG, WDF_NO_HANDLE, - WDF_NO_OBJECT_ATTRIBUTES, WDF_PNPPOWER_EVENT_CALLBACKS, + GUID, NTSTATUS, PCUNICODE_STRING, PDRIVER_OBJECT, PWDFDEVICE_INIT, ULONG, WDF_DRIVER_CONFIG, + WDF_NO_HANDLE, WDF_NO_OBJECT_ATTRIBUTES, WDF_PNPPOWER_EVENT_CALLBACKS, WDFDEVICE, WDFDRIVER, + call_unsafe_wdf_function_binding, iddcx, }; use crate::callbacks; @@ -127,7 +127,12 @@ extern "C" fn driver_add(_driver: WDFDRIVER, mut init: PWDFDEVICE_INIT) -> NTSTA }; // SAFETY: device is the just-created WDFDEVICE; guid lives for the call; no reference string. let status = unsafe { - call_unsafe_wdf_function_binding!(WdfDeviceCreateDeviceInterface, device, &guid, core::ptr::null()) + call_unsafe_wdf_function_binding!( + WdfDeviceCreateDeviceInterface, + device, + &guid, + core::ptr::null() + ) }; dbglog!("[pf-vd] WdfDeviceCreateDeviceInterface -> {status:#x}"); status diff --git a/packaging/windows/drivers/pf-vdisplay/src/lib.rs b/packaging/windows/drivers/pf-vdisplay/src/lib.rs index 01916c8..305f521 100644 --- a/packaging/windows/drivers/pf-vdisplay/src/lib.rs +++ b/packaging/windows/drivers/pf-vdisplay/src/lib.rs @@ -15,9 +15,11 @@ mod log; mod adapter; mod callbacks; mod control; +mod direct_3d_device; mod edid; mod entry; mod monitor; +mod swap_chain_processor; use wdk_sys::NTSTATUS; diff --git a/packaging/windows/drivers/pf-vdisplay/src/monitor.rs b/packaging/windows/drivers/pf-vdisplay/src/monitor.rs index 30acf87..9c0a03c 100644 --- a/packaging/windows/drivers/pf-vdisplay/src/monitor.rs +++ b/packaging/windows/drivers/pf-vdisplay/src/monitor.rs @@ -4,8 +4,8 @@ //! from the working upstream virtual-display-rs (`monitor.rs` + `context.rs::create_monitor`), with //! `guid: u128` → `session_id: u64` for the owned `pf_vdisplay_proto` control plane. -use std::sync::atomic::{AtomicU32, Ordering}; use std::sync::Mutex; +use std::sync::atomic::{AtomicU32, Ordering}; use std::time::Instant; use wdk_sys::iddcx; @@ -51,6 +51,9 @@ pub struct MonitorObject { pub target_id: u32, pub adapter_luid_low: u32, pub adapter_luid_high: i32, + /// The live swap-chain drain worker, set by `assign_swap_chain` and dropped (RAII-joins the worker + /// thread) by `unassign_swap_chain` / departure (STEP 5). + pub swap_chain_processor: Option, /// When the entry was created — the watchdog skips still-initializing monitors. pub created_at: Instant, } @@ -64,21 +67,44 @@ static NEXT_ID: AtomicU32 = AtomicU32::new(1); /// Fallback modes appended after the requested mode, so a topology change still has options. fn default_modes() -> Vec { vec![ - Mode { width: 1920, height: 1080, refresh_rates: vec![60, 120] }, - Mode { width: 1280, height: 720, refresh_rates: vec![60] }, + Mode { + width: 1920, + height: 1080, + refresh_rates: vec![60, 120], + }, + Mode { + width: 1280, + height: 720, + refresh_rates: vec![60], + }, ] } /// `DISPLAYCONFIG_VIDEO_SIGNAL_INFO` for a monitor mode (vSyncFreqDivider = 0, per the DDI contract). -pub fn display_info(width: u32, height: u32, refresh_rate: u32) -> wdk_sys::DISPLAYCONFIG_VIDEO_SIGNAL_INFO { +pub fn display_info( + width: u32, + height: u32, + refresh_rate: u32, +) -> wdk_sys::DISPLAYCONFIG_VIDEO_SIGNAL_INFO { let clock_rate = refresh_rate * (height + 4) * (height + 4) + 1000; let mut si: wdk_sys::DISPLAYCONFIG_VIDEO_SIGNAL_INFO = unsafe { core::mem::zeroed() }; si.pixelRate = u64::from(clock_rate); - si.hSyncFreq = wdk_sys::DISPLAYCONFIG_RATIONAL { Numerator: clock_rate, Denominator: height + 4 }; - si.vSyncFreq = - wdk_sys::DISPLAYCONFIG_RATIONAL { Numerator: clock_rate, Denominator: (height + 4) * (height + 4) }; - si.activeSize = wdk_sys::DISPLAYCONFIG_2DREGION { cx: width, cy: height }; - si.totalSize = wdk_sys::DISPLAYCONFIG_2DREGION { cx: width + 4, cy: height + 4 }; + si.hSyncFreq = wdk_sys::DISPLAYCONFIG_RATIONAL { + Numerator: clock_rate, + Denominator: height + 4, + }; + si.vSyncFreq = wdk_sys::DISPLAYCONFIG_RATIONAL { + Numerator: clock_rate, + Denominator: (height + 4) * (height + 4), + }; + si.activeSize = wdk_sys::DISPLAYCONFIG_2DREGION { + cx: width, + cy: height, + }; + si.totalSize = wdk_sys::DISPLAYCONFIG_2DREGION { + cx: width + 4, + cy: height + 4, + }; // union { AdditionalSignalInfo bitfield | videoStandard:u32 }: videoStandard=255, vSyncFreqDivider=0. si.__bindgen_anon_1.videoStandard = 255; si.scanLineOrdering = @@ -88,11 +114,20 @@ pub fn display_info(width: u32, height: u32, refresh_rate: u32) -> wdk_sys::DISP /// `IDDCX_TARGET_MODE` for a scan-out mode (vSyncFreqDivider = 1, per the DDI contract). pub fn target_mode(width: u32, height: u32, refresh_rate: u32) -> iddcx::IDDCX_TARGET_MODE { - let region = wdk_sys::DISPLAYCONFIG_2DREGION { cx: width, cy: height }; + let region = wdk_sys::DISPLAYCONFIG_2DREGION { + cx: width, + cy: height, + }; let mut si: wdk_sys::DISPLAYCONFIG_VIDEO_SIGNAL_INFO = unsafe { core::mem::zeroed() }; si.pixelRate = u64::from(refresh_rate) * u64::from(width) * u64::from(height); - si.hSyncFreq = wdk_sys::DISPLAYCONFIG_RATIONAL { Numerator: refresh_rate * height, Denominator: 1 }; - si.vSyncFreq = wdk_sys::DISPLAYCONFIG_RATIONAL { Numerator: refresh_rate, Denominator: 1 }; + si.hSyncFreq = wdk_sys::DISPLAYCONFIG_RATIONAL { + Numerator: refresh_rate * height, + Denominator: 1, + }; + si.vSyncFreq = wdk_sys::DISPLAYCONFIG_RATIONAL { + Numerator: refresh_rate, + Denominator: 1, + }; si.totalSize = region; si.activeSize = region; si.scanLineOrdering = @@ -101,13 +136,20 @@ pub fn target_mode(width: u32, height: u32, refresh_rate: u32) -> iddcx::IDDCX_T si.__bindgen_anon_1.videoStandard = 255 | (1 << 16); let mut tm: iddcx::IDDCX_TARGET_MODE = unsafe { core::mem::zeroed() }; tm.Size = core::mem::size_of::() as u32; - tm.TargetVideoSignalInfo = wdk_sys::DISPLAYCONFIG_TARGET_MODE { targetVideoSignalInfo: si }; + tm.TargetVideoSignalInfo = wdk_sys::DISPLAYCONFIG_TARGET_MODE { + targetVideoSignalInfo: si, + }; tm } /// A monitor's advertised modes (the looked-up entry returns a clone for lock-free mode-DDI fill). pub fn modes_for_id(id: u32) -> Option> { - MONITOR_MODES.lock().ok()?.iter().find(|m| m.id == id).map(|m| m.modes.clone()) + MONITOR_MODES + .lock() + .ok()? + .iter() + .find(|m| m.id == id) + .map(|m| m.modes.clone()) } /// Modes for the monitor whose handle matches (used by `monitor_query_modes`). @@ -120,14 +162,69 @@ pub fn modes_for_object(object: iddcx::IDDCX_MONITOR) -> Option> { .map(|m| m.modes.clone()) } +/// The OS target id stamped on the monitor whose handle matches (used by `assign_swap_chain` to name the +/// shared-ring objects). `None` if the monitor isn't found. +pub fn target_id_for_object(object: iddcx::IDDCX_MONITOR) -> Option { + MONITOR_MODES + .lock() + .ok()? + .iter() + .find(|m| m.object == Some(object)) + .map(|m| m.target_id) +} + +/// Install a swap-chain processor on the monitor whose handle matches, returning any PREVIOUS processor +/// for the caller to drop OUTSIDE the lock. Dropping a processor RAII-joins its worker thread, so it must +/// never happen while holding `MONITOR_MODES` (the worker would block the whole control plane / risk a +/// self-deadlock). `None` returned if the monitor isn't found (the caller should drop `proc` itself). +#[must_use] +pub fn set_swap_chain_processor( + object: iddcx::IDDCX_MONITOR, + proc: crate::swap_chain_processor::SwapChainProcessor, +) -> Option { + let Ok(mut lock) = MONITOR_MODES.lock() else { + return Some(proc); + }; + if let Some(m) = lock.iter_mut().find(|m| m.object == Some(object)) { + m.swap_chain_processor.replace(proc) + } else { + // No such monitor — hand `proc` back so the caller drops it (joins the worker) outside the lock. + Some(proc) + } +} + +/// Take (remove) the swap-chain processor from the monitor whose handle matches, returning it for the +/// caller to drop OUTSIDE the lock (see `set_swap_chain_processor`). `None` if none was installed. +#[must_use] +pub fn take_swap_chain_processor( + object: iddcx::IDDCX_MONITOR, +) -> Option { + MONITOR_MODES + .lock() + .ok()? + .iter_mut() + .find(|m| m.object == Some(object))? + .swap_chain_processor + .take() +} + /// `IOCTL_ADD`: create + arrive a virtual monitor at `width`x`height`@`refresh`. Returns the OS /// `(target_id, adapter_luid_low, adapter_luid_high)` for the [`AddReply`](pf_vdisplay_proto::control::AddReply), /// or `None` on failure (no adapter yet / IddCx error). -pub fn create_monitor(session_id: u64, width: u32, height: u32, refresh: u32) -> Option<(u32, u32, i32)> { +pub fn create_monitor( + session_id: u64, + width: u32, + height: u32, + refresh: u32, +) -> Option<(u32, u32, i32)> { let adapter = crate::adapter::adapter()?; let id = NEXT_ID.fetch_add(1, Ordering::Relaxed); - let mut modes = vec![Mode { width, height, refresh_rates: vec![refresh] }]; + let mut modes = vec![Mode { + width, + height, + refresh_rates: vec![refresh], + }]; modes.extend(default_modes()); // Register the (pending) monitor so the mode DDIs can find it by EDID-serial id before arrival. @@ -140,6 +237,7 @@ pub fn create_monitor(session_id: u64, width: u32, height: u32, refresh: u32) -> target_id: 0, adapter_luid_low: 0, adapter_luid_high: 0, + swap_chain_processor: None, created_at: Instant::now(), }); } else { @@ -168,7 +266,10 @@ pub fn create_monitor(session_id: u64, width: u32, height: u32, refresh: u32) -> attr.SynchronizationScope = wdk_sys::_WDF_SYNCHRONIZATION_SCOPE::WdfSynchronizationScopeInheritFromParent; - let create_in = iddcx::IDARG_IN_MONITORCREATE { ObjectAttributes: &raw mut attr, pMonitorInfo: &raw mut info }; + let create_in = iddcx::IDARG_IN_MONITORCREATE { + ObjectAttributes: &raw mut attr, + pMonitorInfo: &raw mut info, + }; let mut create_out: iddcx::IDARG_OUT_MONITORCREATE = unsafe { core::mem::zeroed() }; // SAFETY: adapter is a valid IddCx adapter; create_in points to valid local storage read synchronously. let st = unsafe { wdk_iddcx::IddCxMonitorCreate(adapter, &create_in, &mut create_out) }; @@ -210,12 +311,21 @@ pub fn create_monitor(session_id: u64, width: u32, height: u32, refresh: u32) -> /// `IOCTL_REMOVE`: depart + drop the monitor for `session_id`. Returns true if one was removed. pub fn remove_monitor(session_id: u64) -> bool { - let monitor = { - let Ok(mut lock) = MONITOR_MODES.lock() else { return false }; - let Some(pos) = lock.iter().position(|m| m.session_id == session_id) else { return false }; - let entry = lock.remove(pos); - entry.object + // Pull out the IddCx handle AND the swap-chain processor under the lock, but drop the processor + // (which RAII-joins its worker thread) only AFTER the lock guard is released — joining a worker + // while holding `MONITOR_MODES` would head-block the whole control plane / risk a self-deadlock. + let (monitor, processor) = { + let Ok(mut lock) = MONITOR_MODES.lock() else { + return false; + }; + let Some(pos) = lock.iter().position(|m| m.session_id == session_id) else { + return false; + }; + let mut entry = lock.remove(pos); + (entry.object, entry.swap_chain_processor.take()) }; + // Drop the worker FIRST (it joins + deletes the swap-chain), THEN depart the monitor. + drop(processor); if let Some(m) = monitor { // SAFETY: `m` is a live IddCx monitor handle; departure tears it down. unsafe { wdk_iddcx::IddCxMonitorDeparture(m) }; @@ -225,13 +335,28 @@ pub fn remove_monitor(session_id: u64) -> bool { /// `IOCTL_CLEAR_ALL`: depart + drop every monitor (host-startup orphan reap). pub fn clear_all() { - let monitors: Vec = { - let Ok(mut lock) = MONITOR_MODES.lock() else { return }; - lock.drain(..).filter_map(|m| m.object).collect() + // Drain every entry under the lock, keeping each (handle, processor); drop the processors (RAII-join + // their workers) only AFTER releasing the lock, then depart the monitors. See `remove_monitor`. + let mut drained: Vec<( + Option, + Option, + )> = { + let Ok(mut lock) = MONITOR_MODES.lock() else { + return; + }; + lock.drain(..) + .map(|mut m| (m.object, m.swap_chain_processor.take())) + .collect() }; - for m in monitors { - // SAFETY: `m` is a live IddCx monitor handle. - unsafe { wdk_iddcx::IddCxMonitorDeparture(m) }; + // Drop all workers FIRST (join + delete their swap-chains), THEN depart the monitors. + for (_, processor) in &mut drained { + drop(processor.take()); + } + for (object, _) in drained { + if let Some(m) = object { + // SAFETY: `m` is a live IddCx monitor handle. + unsafe { wdk_iddcx::IddCxMonitorDeparture(m) }; + } } } @@ -249,6 +374,15 @@ fn container_guid(id: u32) -> wdk_sys::GUID { Data1: 0x7066_7664u32.wrapping_add(id), Data2: 0x7044, Data3: 0x5350, - Data4: [0xa1, 0xb2, 0xc3, 0xd4, 0xe5, 0xf6, (id >> 8) as u8, id as u8], + Data4: [ + 0xa1, + 0xb2, + 0xc3, + 0xd4, + 0xe5, + 0xf6, + (id >> 8) as u8, + id as u8, + ], } } diff --git a/packaging/windows/drivers/pf-vdisplay/src/swap_chain_processor.rs b/packaging/windows/drivers/pf-vdisplay/src/swap_chain_processor.rs new file mode 100644 index 0000000..57cc2fc --- /dev/null +++ b/packaging/windows/drivers/pf-vdisplay/src/swap_chain_processor.rs @@ -0,0 +1,303 @@ +//! The swap-chain processor (STEP 5): a worker thread that DRAINS the IddCx swap-chain so the virtual +//! monitor stays a usable display. +//! +//! The OS presents the composited desktop to the driver through a swap-chain; the driver MUST consume +//! it (acquire → finished-processing) or the monitor stalls. STEP 5 binds our render device to the +//! swap-chain (`IddCxSwapChainSetDevice`) and loops acquire/finish, discarding each frame. It does NOT +//! publish frames to the host — that is STEP 6 (the `CopyResource` of `out.MetaData.pSurface` into a +//! shared ring), deliberately omitted here. +//! +//! Ported from the proven oracle (`packaging/windows/vdisplay-driver/pf-vdisplay/src/ +//! swap_chain_processor.rs`) onto wdk-sys + wdk-iddcx. The oracle's `wdf_umdf`/`wdf_umdf_sys` are +//! replaced by `wdk_sys::iddcx::*` + the `wdk_iddcx` DDI wrappers. Those wrappers return a RAW +//! `NTSTATUS` (`i32`) that is HRESULT-shaped for the swap-chain DDIs, so we classify it by hand +//! (`hr >= 0` = success; `0x8000_000A` = E_PENDING; `hr < 0 && != E_PENDING` = error) rather than with +//! `nt_success`. The publisher + `render_luid_low/high` params are dropped (STEP 6). + +use std::{ + mem::size_of, + sync::{ + Arc, + atomic::{AtomicBool, Ordering}, + }, + thread::{self, JoinHandle}, + time::Duration, +}; + +use wdk_sys::iddcx::{ + IDARG_IN_RELEASEANDACQUIREBUFFER2, IDARG_IN_SWAPCHAINSETDEVICE, + IDARG_OUT_RELEASEANDACQUIREBUFFER2, IDDCX_SWAPCHAIN, +}; +// `HANDLE` is the shared wdk-sys typedef (`crate::types`) re-used by the iddcx bindings — take it from +// the crate root, which is guaranteed to export it (the iddcx module only re-exports it if bindgen +// re-declared it there). It is the same type as `IDARG_IN_SETSWAPCHAIN.hNextSurfaceAvailable`. +use wdk_sys::{HANDLE, NTSTATUS, WDFOBJECT, call_unsafe_wdf_function_binding}; +use windows::{ + Win32::{ + Foundation::HANDLE as WHANDLE, + Graphics::Dxgi::IDXGIDevice, + System::Threading::{ + AvRevertMmThreadCharacteristics, AvSetMmThreadCharacteristicsW, WaitForSingleObject, + }, + }, + core::{Interface, w}, +}; + +use crate::direct_3d_device::Direct3DDevice; + +/// E_PENDING — `ReleaseAndAcquireBuffer2` returns this (HRESULT-shaped) when the swap-chain is valid but +/// DWM has composed no new frame yet; wait on the surface-available event and retry. +const E_PENDING: u32 = 0x8000_000A; +/// `WAIT_TIMEOUT` from `WaitForSingleObject` (defined locally to avoid pulling a windows-crate constant +/// type into the comparison — the raw `WAIT_EVENT.0` is just a `u32`). +const WAIT_TIMEOUT_U32: u32 = 0x0000_0102; + +/// HRESULT-shaped success test for the swap-chain DDIs (raw `NTSTATUS`/HRESULT: success iff non-negative). +#[inline] +fn hr_success(hr: NTSTATUS) -> bool { + hr >= 0 +} + +/// A minimal newtype to move a raw pointer / handle across the thread boundary. The wrapped value is a +/// raw IddCx swap-chain handle or an event HANDLE (both raw pointers, framework-managed) — sending them +/// to the worker is sound because only this thread touches them and the framework synchronises lifetime. +struct Sendable(T); +// SAFETY: see the type doc — the wrapped raw handle is owned by the worker for its lifetime. +unsafe impl Send for Sendable {} + +pub struct SwapChainProcessor { + terminate: Arc, + thread: Option>, +} + +// SAFETY: Raw ptr is managed by external library; access is serialised by the worker thread + the +// terminate flag. +unsafe impl Send for SwapChainProcessor {} +unsafe impl Sync for SwapChainProcessor {} + +impl SwapChainProcessor { + pub fn new() -> Self { + Self { + terminate: Arc::new(AtomicBool::new(false)), + thread: None, + } + } + + pub fn run( + &mut self, + swap_chain: IDDCX_SWAPCHAIN, + device: Arc, + available_buffer_event: HANDLE, + target_id: u32, + ) { + let available_buffer_event = Sendable(available_buffer_event); + let swap_chain = Sendable(swap_chain); + let terminate = self.terminate.clone(); + + let join_handle = thread::spawn(move || { + // Rust 2021 disjoint closure captures would otherwise grab the raw `swap_chain.0` / + // `available_buffer_event.0` FIELDS directly (defeating the `Sendable` Send wrapper, since the + // inner `*mut IDDCX_SWAPCHAIN__` / `HANDLE` are `!Send`). Rebind the WHOLE wrappers here so the + // closure captures them as `Sendable<_>` (which IS `Send`), then unwrap from the locals. + let swap_chain = swap_chain; + let available_buffer_event = available_buffer_event; + // It is very important to prioritize this thread by making use of the Multimedia Scheduler + // Service. It will intelligently prioritize the thread for improved throughput in high + // CPU-load scenarios. + let mut av_task = 0u32; + let res = unsafe { AvSetMmThreadCharacteristicsW(w!("Distribution"), &mut av_task) }; + let Ok(av_handle) = res else { + dbglog!("[pf-vd] swap-chain: failed to prioritize thread: {res:?}"); + return; + }; + + Self::run_core( + swap_chain.0, + &device, + available_buffer_event.0, + &terminate, + target_id, + ); + + dbglog!( + "[pf-vd] swap-chain run_core RETURNED (target={target_id}) — deleting swap-chain, device drops next" + ); + + // Delete the swap-chain WDF object BEFORE the `Arc` drops (the swap-chain + // referenced our device). `WdfObjectDelete` takes a WDFOBJECT. + // SAFETY: `swap_chain` is a live IddCx swap-chain handle; we own the sole reference here and + // the drain loop has exited. + unsafe { + call_unsafe_wdf_function_binding!(WdfObjectDelete, swap_chain.0 as WDFOBJECT); + } + + // Revert the thread to normal once it's done. + let res = unsafe { AvRevertMmThreadCharacteristics(av_handle) }; + if let Err(e) = res { + dbglog!("[pf-vd] swap-chain: failed to revert prioritized thread: {e:?}"); + } + }); + + self.thread = Some(join_handle); + } + + fn run_core( + swap_chain: IDDCX_SWAPCHAIN, + device: &Direct3DDevice, + available_buffer_event: HANDLE, + terminate: &AtomicBool, + target_id: u32, + ) { + // SetDevice fails (0x887A0026, FACILITY_DXGI) when the monitor briefly flaps INACTIVE during + // topology activation — the OS unassigns + re-assigns the swap-chain, and a fresh run_core thread + // can lose the race to the unassign. Retry briefly so a stable re-assign binds the device instead + // of giving up on the first transient failure. `terminate` (set when the OS unassigns + drops the + // processor) breaks us out promptly. + // + // Cast to IDXGIDevice ONCE and BORROW it to the swap-chain across all retries. Re-casting + + // `into_raw()`'ing on EVERY attempt — and a flapping monitor fails several attempts per session — + // orphans an IDXGIDevice reference per failure, pinning the D3D device (and its ~dozen worker + // threads + tens of MB of VRAM) so it is NEVER freed when the processor drops. `as_raw()` keeps + // our single reference (released right after the loop); IddCx AddRefs its own on success, and + // `device` keeps the object alive for the drain loop regardless. + let dxgi_device = match device.device.cast::() { + Ok(d) => d, + Err(e) => { + dbglog!("[pf-vd] swap-chain: failed to cast ID3D11Device to IDXGIDevice: {e:?}"); + return; + } + }; + // Built zeroed + field-assigned (driver style) — robust against a bindgen field-set difference. + let mut set_device: IDARG_IN_SWAPCHAINSETDEVICE = unsafe { core::mem::zeroed() }; + set_device.pDevice = dxgi_device.as_raw().cast(); + let mut set_ok = false; + let mut terminated = false; + for attempt in 0..60u32 { + if terminate.load(Ordering::Relaxed) { + dbglog!( + "[pf-vd] swap-chain run_core: terminated during SetDevice (attempt {attempt}, target={target_id})" + ); + terminated = true; + break; + } + // SAFETY: driver is loaded; `swap_chain` is valid; `set_device` points to valid local storage. + let hr = unsafe { wdk_iddcx::IddCxSwapChainSetDevice(swap_chain, &set_device) }; + if hr_success(hr) { + set_ok = true; + dbglog!( + "[pf-vd] swap-chain run_core: SetDevice OK (target={target_id}, attempt={attempt}) — entering drain loop" + ); + break; + } + if attempt == 0 { + dbglog!( + "[pf-vd] swap-chain run_core: SetDevice attempt 0 failed ({hr:#x}) — retrying up to 60x@50ms (monitor may be flapping)" + ); + } + thread::sleep(Duration::from_millis(50)); + } + // Release our borrowed device reference — IddCx holds its own now, or we gave up. (Explicit drop + // so NLL can't release it mid-loop while the swap-chain still references the raw ptr.) + drop(dxgi_device); + if !set_ok { + if !terminated { + dbglog!( + "[pf-vd] swap-chain run_core: SetDevice never succeeded after retries (target={target_id}) — giving up" + ); + } + return; + } + + let mut logged_pending = false; + let mut logged_frame = false; + loop { + // Check terminate at the TOP, every iteration. The success branch below does NOT re-check it, + // so during a CONTINUOUS frame burst (DWM rendering the freshly-activated desktop) a thread the + // OS unassigns — or that the processor is dropping — never sees the flag and loops on, pinning + // its D3D device (and ~36 NVIDIA worker threads). That is THE reconnect leak; it only + // reproduced at full speed (E_PENDING gaps DO check terminate and masked it under a debugger). + // Without this, `SwapChainProcessor::drop`'s join can also block until the burst ends. + if terminate.load(Ordering::Relaxed) { + break; + } + + // ...Buffer2 is required once CAN_PROCESS_FP16 is set. AcquireSystemMemoryBuffer=FALSE keeps + // the GPU surface (out.MetaData.pSurface). STEP 5 only drains — it does NOT publish the + // surface (STEP 6 will). Built zeroed + field-assigned (driver style) so a bindgen field-set + // difference can't break a positional struct literal. + let mut in_args: IDARG_IN_RELEASEANDACQUIREBUFFER2 = unsafe { core::mem::zeroed() }; + #[allow(clippy::cast_possible_truncation)] + { + in_args.Size = size_of::() as u32; + } + in_args.AcquireSystemMemoryBuffer = 0; + // `core::mem::zeroed()` (not `::default()`) — consistent with every other IddCx out-struct + // in this driver, and robust whether or not bindgen derives `Default` for this type (its + // `MetaData` field carries a raw `pSurface` pointer + union which can suppress the derive). + let mut buffer: IDARG_OUT_RELEASEANDACQUIREBUFFER2 = unsafe { core::mem::zeroed() }; + // SAFETY: driver is loaded; `swap_chain` is valid; in/out point to valid local storage. + let hr: NTSTATUS = unsafe { + wdk_iddcx::IddCxSwapChainReleaseAndAcquireBuffer2( + swap_chain, + &mut in_args, + &mut buffer, + ) + }; + + if (hr as u32) == E_PENDING { + if !logged_pending { + dbglog!( + "[pf-vd] swap-chain run_core: E_PENDING (target={target_id}) — swap-chain valid but DWM has composed NO frame yet" + ); + logged_pending = true; + } + // SAFETY: `available_buffer_event` is the framework-provided surface-available event. + let wait_result = + unsafe { WaitForSingleObject(WHANDLE(available_buffer_event.cast()), 16).0 }; + + // thread requested an end + if terminate.load(Ordering::Relaxed) { + break; + } + + // WAIT_OBJECT_0 | WAIT_TIMEOUT + if matches!(wait_result, 0 | WAIT_TIMEOUT_U32) { + // We have a new buffer (or timed out), so try the AcquireBuffer again. + continue; + } + + // The wait was cancelled or something unexpected happened. + break; + } else if hr_success(hr) { + if !logged_frame { + dbglog!( + "[pf-vd] swap-chain run_core: FIRST FRAME acquired (target={target_id}) — DWM IS compositing the virtual display!" + ); + logged_frame = true; + } + // STEP 6 publishes `buffer.MetaData.pSurface` into the shared ring HERE (the surface is + // valid until the next ReleaseAndAcquire). STEP 5 only drains, so we immediately finish + // the frame. + // SAFETY: driver is loaded; `swap_chain` is valid. + let hr = unsafe { wdk_iddcx::IddCxSwapChainFinishedProcessingFrame(swap_chain) }; + if !hr_success(hr) { + break; + } + } else { + // The swap-chain was likely abandoned (e.g. DXGI_ERROR_ACCESS_LOST) — exit the loop. + break; + } + } + } +} + +impl Drop for SwapChainProcessor { + fn drop(&mut self) { + if let Some(handle) = self.thread.take() { + // signal the worker to end + self.terminate.store(true, Ordering::Relaxed); + // wait until the worker is finished (it deletes the swap-chain object before returning) + let _ = handle.join(); + } + } +}