diff --git a/Cargo.lock b/Cargo.lock index aeef7c6..c83919b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2688,6 +2688,7 @@ dependencies = [ "wayland-protocols-wlr", "wayland-scanner", "windows 0.62.2 (registry+https://github.com/rust-lang/crates.io-index)", + "windows-service", "x509-parser", "xkbcommon", ] @@ -4325,6 +4326,12 @@ dependencies = [ "safe_arch", ] +[[package]] +name = "widestring" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72069c3113ab32ab29e5584db3c6ec55d416895e60715417b5b883a357c3e471" + [[package]] name = "winapi" version = "0.3.9" @@ -4557,6 +4564,17 @@ dependencies = [ "windows-link 0.2.1 (git+https://github.com/microsoft/windows-rs?rev=b4129fcc1ae81eec8bf1217539883db821bca3a1)", ] +[[package]] +name = "windows-service" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d24d6bcc7f734a4091ecf8d7a64c5f7d7066f45585c1861eba06449909609c8a" +dependencies = [ + "bitflags", + "widestring", + "windows-sys 0.52.0", +] + [[package]] name = "windows-strings" version = "0.5.1" diff --git a/crates/punktfunk-host/Cargo.toml b/crates/punktfunk-host/Cargo.toml index f8393e2..bcff7be 100644 --- a/crates/punktfunk-host/Cargo.toml +++ b/crates/punktfunk-host/Cargo.toml @@ -144,7 +144,22 @@ windows = { version = "0.62", features = [ # Force-composed-flip overlay: a topmost layered window on the Winlogon desktop disqualifies the # secure desktop's fullscreen independent-flip so Desktop Duplication can capture it. "Win32_System_LibraryLoader", + # VirtualProtect — for the inline patch of the win32u GPU-preference shim (Apollo's MinHook port: + # the hybrid-GPU output-reparenting hook that keeps Desktop Duplication stable on a 4090+iGPU box). + # See capture/dxgi.rs `install_gpu_pref_hook`. No trampoline (we fully replace the fn) → no detour + # crate / no C length-disassembler dep; a 12-byte absolute-jmp prologue patch suffices. + "Win32_System_Memory", + # Per-monitor-v2 DPI awareness — IDXGIOutput5::DuplicateOutput1 (the modern capture path Apollo + # uses; FP16/format-list, robust to overlay/format churn) requires the process to be DPI-aware. + "Win32_UI_HiDpi", + # Windows service supervisor (src/service.rs): a kill-on-close job object so a service crash never + # orphans the SYSTEM host it launched into the interactive session. + "Win32_System_JobObjects", ] } +# The SCM plumbing for the `service` subcommand (define_windows_service! / dispatcher / control +# handler / ServiceManager install). Wraps the Win32 service API; the supervision loop itself uses +# the `windows` crate above. +windows-service = "0.7" # Software H.264 encoder (GPU-less path + NVENC fallback). The default `source` feature statically # compiles OpenH264 (BSD-2) — no system lib, builds on MSVC; nasm on PATH adds the SIMD fast path. openh264 = "0.9" diff --git a/crates/punktfunk-host/src/capture.rs b/crates/punktfunk-host/src/capture.rs index a964da8..52accf5 100644 --- a/crates/punktfunk-host/src/capture.rs +++ b/crates/punktfunk-host/src/capture.rs @@ -258,6 +258,16 @@ pub fn capture_virtual_output(vout: crate::vdisplay::VirtualOutput) -> Result) } +/// `PUNKTFUNK_NO_WGC=1` forces the pure single-process DDA (Desktop Duplication) path everywhere: it +/// skips WGC in [`capture_virtual_output`] AND bypasses the two-process secure-desktop relay (so even a +/// SYSTEM host captures in-process via DDA, the way Apollo does — one capturer for the normal AND the +/// secure desktop). For bringing DDA up to parity / validating it on its own; all the WGC code stays +/// compiled and comes back the moment the flag is unset. +#[cfg(target_os = "windows")] +pub(crate) fn wgc_disabled() -> bool { + std::env::var_os("PUNKTFUNK_NO_WGC").is_some() +} + #[cfg(target_os = "windows")] pub fn capture_virtual_output(vout: crate::vdisplay::VirtualOutput) -> Result> { let target = vout.win_capture.clone().ok_or_else(|| { @@ -275,7 +285,7 @@ pub fn capture_virtual_output(vout: crate::vdisplay::VirtualOutput) -> Result); } diff --git a/crates/punktfunk-host/src/capture/composed_flip.rs b/crates/punktfunk-host/src/capture/composed_flip.rs index 6e1a3f2..73d00a2 100644 --- a/crates/punktfunk-host/src/capture/composed_flip.rs +++ b/crates/punktfunk-host/src/capture/composed_flip.rs @@ -17,7 +17,7 @@ use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::Arc; -use windows::core::{w, PCWSTR}; +use windows::core::w; use windows::Win32::Foundation::{HWND, LPARAM, LRESULT, WPARAM}; use windows::Win32::System::LibraryLoader::GetModuleHandleW; use windows::Win32::System::StationsAndDesktops::{ diff --git a/crates/punktfunk-host/src/capture/desktop_watch.rs b/crates/punktfunk-host/src/capture/desktop_watch.rs index c08a0a0..c1de933 100644 --- a/crates/punktfunk-host/src/capture/desktop_watch.rs +++ b/crates/punktfunk-host/src/capture/desktop_watch.rs @@ -45,24 +45,36 @@ impl DesktopWatcher { let _ = std::thread::Builder::new() .name("desktop-watch".into()) .spawn(move || { - let mut last = initial; + // Debounce: only publish a change after the raw reading has been stable for several + // polls. The input desktop flaps Default↔Winlogon transiently during a lock/UAC + // transition; publishing every flap makes the capture mux thrash (rebuild storms). + const STABLE_POLLS: u32 = 4; // ~80ms + let mut published = initial; + let mut candidate = initial; + let mut stable = 0u32; while !st.load(Ordering::Relaxed) { let v = if unsafe { is_secure_desktop() } { DESKTOP_SECURE } else { DESKTOP_NORMAL }; - s.store(v, Ordering::Release); - if v != last { + if v == candidate { + stable = stable.saturating_add(1); + } else { + candidate = v; + stable = 1; + } + if stable >= STABLE_POLLS && candidate != published { + s.store(candidate, Ordering::Release); + published = candidate; tracing::info!( - desktop = if v == DESKTOP_SECURE { + desktop = if candidate == DESKTOP_SECURE { "Winlogon(secure)" } else { "Default" }, - "input desktop changed" + "input desktop changed (debounced)" ); - last = v; } std::thread::sleep(Duration::from_millis(20)); } @@ -89,7 +101,7 @@ impl Drop for DesktopWatcher { /// True if the current input desktop is "Winlogon" (the secure desktop). Best-effort: if the desktop /// can't be opened or named, report not-secure (the safe default — keep WGC/normal capture). -unsafe fn is_secure_desktop() -> bool { +pub(crate) unsafe fn is_secure_desktop() -> bool { let desk = match OpenInputDesktop( DESKTOP_CONTROL_FLAGS(0), false, diff --git a/crates/punktfunk-host/src/capture/dxgi.rs b/crates/punktfunk-host/src/capture/dxgi.rs index 25cad2a..33bf574 100644 --- a/crates/punktfunk-host/src/capture/dxgi.rs +++ b/crates/punktfunk-host/src/capture/dxgi.rs @@ -10,7 +10,7 @@ use super::{CapturedFrame, Capturer, FramePayload, PixelFormat}; use anyhow::{anyhow, bail, Context, Result}; use std::ffi::c_void; -use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::atomic::{AtomicBool, AtomicU64, Ordering}; use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH}; use windows::core::{s, Interface, PCSTR}; use windows::Win32::Foundation::{HMODULE, LUID}; @@ -37,14 +37,15 @@ use windows::Win32::Graphics::Dxgi::Common::{ DXGI_SAMPLE_DESC, }; use windows::Win32::Graphics::Dxgi::{ - CreateDXGIFactory1, IDXGIAdapter1, IDXGIFactory1, IDXGIOutput1, IDXGIOutputDuplication, - IDXGIResource, DXGI_ERROR_ACCESS_LOST, DXGI_ERROR_DEVICE_REMOVED, DXGI_ERROR_DEVICE_RESET, - DXGI_ERROR_INVALID_CALL, DXGI_ERROR_WAIT_TIMEOUT, DXGI_OUTDUPL_DESC, DXGI_OUTDUPL_FRAME_INFO, + CreateDXGIFactory1, IDXGIAdapter1, IDXGIFactory1, IDXGIOutput1, IDXGIOutput5, + IDXGIOutputDuplication, IDXGIResource, DXGI_ERROR_ACCESS_LOST, DXGI_ERROR_DEVICE_REMOVED, + DXGI_ERROR_DEVICE_RESET, DXGI_ERROR_INVALID_CALL, DXGI_ERROR_MODE_CHANGE_IN_PROGRESS, + DXGI_ERROR_WAIT_TIMEOUT, DXGI_OUTDUPL_DESC, DXGI_OUTDUPL_FRAME_INFO, DXGI_OUTDUPL_POINTER_SHAPE_INFO, DXGI_OUTDUPL_POINTER_SHAPE_TYPE_COLOR, DXGI_OUTDUPL_POINTER_SHAPE_TYPE_MASKED_COLOR, }; use windows::Win32::System::StationsAndDesktops::{ - OpenInputDesktop, SetThreadDesktop, DESKTOP_ACCESS_FLAGS, DESKTOP_CONTROL_FLAGS, + CloseDesktop, OpenInputDesktop, SetThreadDesktop, DESKTOP_ACCESS_FLAGS, DESKTOP_CONTROL_FLAGS, }; use windows::Win32::UI::WindowsAndMessaging::SetCursorPos; @@ -164,33 +165,113 @@ unsafe fn reopen_duplication( )> { let (adapter, out) = find_output(gdi_name)?; let (dev, ctx) = make_device(&adapter)?; - let dupl = out - .DuplicateOutput(&dev) - .context("re-DuplicateOutput after ACCESS_LOST")?; + let dupl = duplicate_output(&out, &dev).context("re-DuplicateOutput after ACCESS_LOST")?; Ok((dev, ctx, out, dupl)) } +/// Create the output duplication. Prefer `IDXGIOutput5::DuplicateOutput1` with an explicit +/// encoder-format list (FP16 first, then BGRA8) — Apollo's path. It hands us the desktop's real +/// scanout format (HDR FP16 or SDR BGRA8) and is far more robust to overlay/format changes than +/// legacy `DuplicateOutput` (which always tone-maps to 8-bit BGRA — the source of much of the +/// ACCESS_LOST churn). Requires the process be per-monitor-v2 DPI aware (set at startup in +/// [`install_gpu_pref_hook`]). Falls back to legacy `DuplicateOutput` if Output5 is unavailable or +/// `DuplicateOutput1` fails. +unsafe fn duplicate_output( + output: &IDXGIOutput1, + device: &ID3D11Device, +) -> Result { + if let Ok(output5) = output.cast::() { + // BGRA8 only for now (SDR). NOTE: DuplicateOutput1 returns the FIRST format it can provide and + // DXGI will CONVERT to it — so listing FP16 first would hand back FP16 even on an SDR desktop, + // wrongly tripping the HDR path. Real HDR capture (FP16 first + IDXGIOutput6 colorspace + // detection to pick the path) is the follow-up once the churn is settled. + let formats = [DXGI_FORMAT_B8G8R8A8_UNORM]; + // RETRY DuplicateOutput1. The caller releases the OLD duplication (self.dupl = None) immediately + // before calling us, and the kernel-side teardown of that duplication is ASYNC — the FIRST + // DuplicateOutput1 right after can race it and return E_ACCESSDENIED ("output still duplicated") + // even though we dropped our only reference. A few short retries let the teardown finish so the + // ROBUST DuplicateOutput1 dup succeeds, instead of falling through to legacy DuplicateOutput, + // which "succeeds" into a fragile dup that churns ACCESS_LOST/MODE_CHANGE every few ms on this + // cross-GPU IDD. (This is why DuplicateOutput1 failed but the legacy call a beat later + // succeeded — pure timing. Apollo retries DuplicateOutput1 2x/200ms for the same reason.) + // Apollo waits 200 ms between DuplicateOutput1 attempts — the kernel-side teardown of the + // just-released duplication takes that long, so short (ms) waits aren't enough. Env-tunable so + // we can dial it without a rebuild: PUNKTFUNK_DUP_RETRY_MS (per-wait, default 200) × + // PUNKTFUNK_DUP_RETRY_N (attempts, default 6) → ~1 s worst case before the legacy fallback. + let retry_ms: u64 = std::env::var("PUNKTFUNK_DUP_RETRY_MS") + .ok() + .and_then(|s| s.parse().ok()) + .unwrap_or(200); + // Default 1 (no retry → immediate legacy fallback). On the secure desktop DuplicateOutput1 + // ALWAYS refuses (only LOGON_UI may use it), so retrying there just blocks the capture thread; + // and on the normal desktop the release-before-reduplicate + gentle recovery already keep the + // legacy dup stable. Raise PUNKTFUNK_DUP_RETRY_N only on a box where DuplicateOutput1 can win + // the old-dup-teardown race (then PUNKTFUNK_DUP_RETRY_MS sets the per-wait, default 200). + let attempts: u64 = std::env::var("PUNKTFUNK_DUP_RETRY_N") + .ok() + .and_then(|s| s.parse().ok()) + .unwrap_or(1) + .max(1); + let mut last_err = None; + for attempt in 0..attempts { + match output5.DuplicateOutput1(device, 0, &formats) { + Ok(d) => { + if attempt > 0 { + tracing::debug!( + attempt, + "DuplicateOutput1 succeeded on retry (rode out old-dup teardown race)" + ); + } + return Ok(d); + } + Err(e) => { + last_err = Some(e); + if attempt + 1 < attempts { + std::thread::sleep(Duration::from_millis(retry_ms)); + } + } + } + } + if let Some(e) = last_err { + // Expected on the secure (Winlogon) desktop (DuplicateOutput1 is LOGON_UI-only) and fires + // once per gentle recovery there — throttle so a lock dwell doesn't flood the log. The + // legacy fallback below handles it; gentle recovery keeps it from churning. + static FALLBACKS: AtomicU64 = AtomicU64::new(0); + if FALLBACKS.fetch_add(1, Ordering::Relaxed) % 64 == 0 { + tracing::debug!( + error = %format!("{e:?}"), + "DuplicateOutput1 unavailable — using legacy DuplicateOutput (expected on the secure desktop)" + ); + } + } + } + output.DuplicateOutput(device).context("DuplicateOutput") +} + /// Park the cursor on a duplicated output. A blank virtual display emits NO Desktop Duplication /// frames until something changes; a pointer move IS a DDA "change", so this kicks the very first /// `AcquireNextFrame` loose — and lands the cursor on the display the client is viewing. Two moves /// to distinct points guarantee an actual move even if the cursor already sat at the center. -/// Follow the current input desktop so duplication spans the normal ↔ Winlogon (secure: login/UAC) -/// desktops. Opening the secure desktop requires SYSTEM; on a non-SYSTEM host this just fails on -/// Winlogon (capture freezes there) — which is why the host relaunches itself as SYSTEM. The HDESK -/// is intentionally leaked: it must stay open while it's the thread's desktop, and switches -/// (lock/unlock/UAC) are rare, so a few handles per session is fine. +/// Re-sync the calling (capture) thread to the CURRENT input desktop. MUST be called on EVERY recovery +/// — symmetrically for ENTERING and LEAVING the Winlogon (secure: lock/login/UAC) desktop. Gating it on +/// is_secure_desktop() (the old bug) re-attached only on the way IN, so on the way OUT the capture +/// thread stayed stuck on the gone Winlogon desktop and every rebuild failed → no frames → client +/// timeout → "display disconnected". Apollo calls its equivalent (syncThreadDesktop) before every +/// duplicate. Opening the secure desktop requires SYSTEM (the host relaunches itself as SYSTEM). +/// Matches Apollo by closing the handle right after SetThreadDesktop — the thread keeps the desktop via +/// an internal reference, so this does NOT leak even when called on every recovery. unsafe fn attach_input_desktop() { match OpenInputDesktop( DESKTOP_CONTROL_FLAGS(0), false, DESKTOP_ACCESS_FLAGS(0x1000_0000), // GENERIC_ALL ) { - Ok(desk) => match SetThreadDesktop(desk) { - Ok(()) => tracing::info!("attach_input_desktop: SetThreadDesktop OK"), - Err(e) => { - tracing::warn!(error = %format!("{e:?}"), "attach_input_desktop: SetThreadDesktop FAILED") + Ok(desk) => { + if let Err(e) = SetThreadDesktop(desk) { + tracing::warn!(error = %format!("{e:?}"), "attach_input_desktop: SetThreadDesktop FAILED"); } - }, + let _ = CloseDesktop(desk); + } Err(e) => { tracing::warn!(error = %format!("{e:?}"), "attach_input_desktop: OpenInputDesktop FAILED") } @@ -205,6 +286,122 @@ pub(crate) unsafe fn nudge_cursor_onto(output: &IDXGIOutput1) { } } +/// How many times DXGI has actually called our hooked `NtGdiDdDDIGetCachedHybridQueryValue`. If this +/// stays 0 while DDA churns with ACCESS_LOST, the hook is NOT on DXGI's GPU-preference path on this +/// build (so reparenting can't be the cause — look at composition/independent-flip instead). >0 with +/// continuing churn means the hook fires but reparenting isn't the trigger here. +static HYBRID_HOOK_HITS: AtomicU64 = AtomicU64::new(0); + +pub(crate) fn hybrid_hook_hits() -> u64 { + HYBRID_HOOK_HITS.load(Ordering::Relaxed) +} + +// kernel32 — declared directly so we don't pull the whole Win32_System_Diagnostics_Debug feature for +// one call. FlushInstructionCache serializes the i-cache after the inline patch: the patch is written +// on the main thread but DXGI runs the hooked export from the encode/worker thread (possibly a +// different core), so the "same-thread, no flush needed" assumption was wrong. +#[link(name = "kernel32")] +extern "system" { + fn FlushInstructionCache(h: *mut c_void, base: *const c_void, size: usize) -> i32; + fn GetCurrentProcess() -> *mut c_void; + fn SetThreadExecutionState(es_flags: u32) -> u32; +} +const ES_CONTINUOUS: u32 = 0x8000_0000; +const ES_SYSTEM_REQUIRED: u32 = 0x0000_0001; +const ES_DISPLAY_REQUIRED: u32 = 0x0000_0002; + +/// Replacement for `win32u.dll!NtGdiDdDDIGetCachedHybridQueryValue`: always report +/// `D3DKMT_GPU_PREFERENCE_STATE_UNSPECIFIED` (3). We fully replace the function (never call the +/// original), so no trampoline is needed. (Ported verbatim from Apollo's MinHook hook.) +unsafe extern "system" fn hybrid_query_hook(gpu_preference: *mut u32) -> i32 { + HYBRID_HOOK_HITS.fetch_add(1, Ordering::Relaxed); + if gpu_preference.is_null() { + return 0xC000_000Du32 as i32; // STATUS_INVALID_PARAMETER + } + *gpu_preference = 3; // D3DKMT_GPU_PREFERENCE_STATE_UNSPECIFIED + 0 // STATUS_SUCCESS +} + +/// Apollo's win32u GPU-preference hook, ported. On a HYBRID-GPU box DXGI resolves a GPU preference +/// (registry + power settings + the hybrid-adapter DDI) and REPARENTS outputs onto the chosen render +/// GPU — which constantly invalidates Desktop Duplication (DXGI_ERROR_ACCESS_LOST 0x887A0026, the +/// freeze/churn observed on the RTX 4090 + AMD iGPU box; `SET_RENDER_ADAPTER` is ignored there). Faking +/// a cached preference of UNSPECIFIED makes DXGI skip the resolution, so the output is NOT reparented +/// and DDA stays stable on one adapter (this is what makes Apollo's DDA work on this hardware). +/// Installed once, before the first DXGI factory/enumeration; lasts the process lifetime (like Apollo). +pub(crate) fn install_gpu_pref_hook() { + use std::sync::Once; + static HOOK: Once = Once::new(); + HOOK.call_once(|| unsafe { + use windows::Win32::System::LibraryLoader::{GetProcAddress, LoadLibraryA}; + use windows::Win32::System::Memory::{ + VirtualProtect, PAGE_EXECUTE_READWRITE, PAGE_PROTECTION_FLAGS, + }; + use windows::Win32::UI::HiDpi::{ + GetAwarenessFromDpiAwarenessContext, GetThreadDpiAwarenessContext, + SetProcessDpiAwarenessContext, DPI_AWARENESS_CONTEXT_PER_MONITOR_AWARE_V2, + }; + // Per-monitor-v2 DPI awareness — REQUIRED for IDXGIOutput5::DuplicateOutput1 (without it the + // call returns E_ACCESSDENIED forever, forcing the legacy DuplicateOutput path). Matches + // Apollo's startup. SetProcessDpiAwarenessContext fails with E_ACCESS_DENIED if awareness was + // already set (manifest / earlier call) — log the outcome AND the effective awareness so a + // 100% DuplicateOutput1 E_ACCESSDENIED is diagnosable instead of silent. + match SetProcessDpiAwarenessContext(DPI_AWARENESS_CONTEXT_PER_MONITOR_AWARE_V2) { + Ok(()) => tracing::info!("DPI awareness set: PER_MONITOR_AWARE_V2"), + Err(e) => tracing::warn!(error = %format!("{e:?}"), + "SetProcessDpiAwarenessContext failed (already set?) — DuplicateOutput1 may E_ACCESSDENIED"), + } + // 0=UNAWARE 1=SYSTEM 2=PER_MONITOR(_V2). DuplicateOutput1 needs 2. + let awareness = GetAwarenessFromDpiAwarenessContext(GetThreadDpiAwarenessContext()).0; + tracing::info!(awareness, "effective DPI awareness (need 2=PER_MONITOR for DuplicateOutput1)"); + let Ok(lib) = LoadLibraryA(s!("win32u.dll")) else { + tracing::warn!("GPU-pref hook: win32u.dll not loadable — skipping (DDA may churn on hybrid GPUs)"); + return; + }; + let Some(target) = GetProcAddress(lib, s!("NtGdiDdDDIGetCachedHybridQueryValue")) else { + tracing::warn!("GPU-pref hook: NtGdiDdDDIGetCachedHybridQueryValue not exported — skipping"); + return; + }; + let target = target as usize as *mut u8; + // x64 absolute jump to our replacement: `mov rax, imm64 ; jmp rax` (12 bytes). We never call the + // original, so no trampoline/relocation (hence no detour crate / C length-disassembler dep). + let hook = hybrid_query_hook as *const () as usize; + let mut patch = [0u8; 12]; + patch[0] = 0x48; + patch[1] = 0xB8; // mov rax, imm64 + patch[2..10].copy_from_slice(&hook.to_le_bytes()); + patch[10] = 0xFF; + patch[11] = 0xE0; // jmp rax + let mut old = PAGE_PROTECTION_FLAGS(0); + if VirtualProtect(target as *const c_void, 12, PAGE_EXECUTE_READWRITE, &mut old).is_err() { + tracing::warn!("GPU-pref hook: VirtualProtect failed — skipping"); + return; + } + std::ptr::copy_nonoverlapping(patch.as_ptr(), target, 12); + let mut restore = PAGE_PROTECTION_FLAGS(0); + let _ = VirtualProtect(target as *const c_void, 12, old, &mut restore); + // Serialize the i-cache: the patch is written here (main thread) but DXGI calls the export from + // the capture/encode worker thread — possibly a different core with a stale i-cache, in which + // case it would keep running the ORIGINAL function and DXGI would still reparent. (Apollo's + // MinHook does this flush internally; our hand-rolled patch must do it explicitly.) + let _ = FlushInstructionCache(GetCurrentProcess(), target as *const c_void, 12); + // VERIFY the patch actually landed (CFG/hotpatch/short-stub could silently reject it). Read it + // back; an error! (not a cheery "installed") makes a dead hook obvious in the logs. + let mut readback = [0u8; 12]; + std::ptr::copy_nonoverlapping(target, readback.as_mut_ptr(), 12); + if readback == patch { + tracing::info!( + "GPU-pref hook installed + verified (win32u hybrid-query -> UNSPECIFIED): reparenting disabled" + ); + } else { + tracing::error!( + want = %format!("{patch:02x?}"), got = %format!("{readback:02x?}"), + "GPU-pref hook patch did NOT land — hook is DEAD (DXGI will still reparent → ACCESS_LOST churn)" + ); + } + }); +} + // DXGI Desktop Duplication deliberately EXCLUDES the hardware cursor from the captured surface (the // OS composites it separately). We capture the cursor shape/position from the frame info and blend it // back in — on the GPU for the zero-copy path (a CPU readback would stall the 240 fps pipeline). @@ -794,7 +991,12 @@ pub struct DuplCapturer { device: ID3D11Device, context: ID3D11DeviceContext, output: IDXGIOutput1, - dupl: IDXGIOutputDuplication, + /// The output duplication. `Option` so recovery can RELEASE it (set `None`) BEFORE re-duplicating: + /// DXGI permits only ONE `IDXGIOutputDuplication` per output, and a stale one (incl. an ACCESS_LOST + /// one) keeps holding the output, so a re-`DuplicateOutput1` returns E_ACCESSDENIED and legacy + /// `DuplicateOutput` returns a BORN-LOST dup — the storm. Apollo releases before re-duplicating; so + /// do we now. `None` only transiently during recovery (acquire routes None → recovery). + dupl: Option, /// The output's GDI name — re-resolved on ACCESS_LOST (a mode change can stale the cached handle). gdi_name: String, /// Stable SudoVDA target id, used to re-resolve `gdi_name` during recovery. @@ -842,11 +1044,22 @@ pub struct DuplCapturer { /// secure-desktop dwell where the output is gone) so we don't block the encode loop or hammer /// DuplicateOutput — between attempts the last good frame is repeated. `None` = never attempted. last_rebuild: Option, + /// Throttle for ALL ACCESS_LOST recovery attempts (cheap re-duplicate + full rebuild). A + /// constantly-invalidated duplication (HDR overlay/MPO churn) would otherwise spin recovery and + /// starve the encode thread; cap attempts to ~one per 5 ms and repeat the last frame between them. + last_recover: Option, /// True once at least one real frame has been produced. After that, a frame drought (e.g. a long /// secure-desktop dwell with nothing rendering to the virtual output) must never fatally end the /// session — `next_frame` keeps repeating the last/seeded frame instead of erroring on its /// deadline. The deadline stays fatal only *before* the first frame (a genuine startup misconfig). ever_got_frame: bool, + /// Consecutive rebuilds that produced a BORN-LOST duplication (created OK, but its first + /// AcquireNextFrame instantly returned ACCESS_LOST). On the NORMAL desktop this is the hybrid + /// reparent/flip storm — once it persists, `acquire` returns Err so the m3 loop cold-rebuilds the + /// whole pipeline (new device/output) instead of spinning on a dead dup forever (the bug where the + /// stream froze on the last frame). Reset to 0 by any real frame. NOT armed on the secure + /// (Winlogon) desktop, where a long static dwell is legitimate and must never end the session. + consecutive_born_lost: u32, /// GPU cursor overlay (rebuilt on device recreate). `None` until the first composite. cursor: Option, /// Last cursor shape, decomposed into alpha + XOR layers (kept device-independent so it survives @@ -869,6 +1082,39 @@ impl DuplCapturer { keepalive: Box, ) -> Result { unsafe { + // Stop DXGI hybrid-GPU output reparenting BEFORE we create the factory / enumerate outputs + // (the cause of the 0x887A0026 ACCESS_LOST churn on this hybrid box: RTX 4090 + AMD iGPU). + install_gpu_pref_hook(); + // Force PER-MONITOR-AWARE-V2 on THIS (capture) thread. IDXGIOutput5::DuplicateOutput1 + // REQUIRES V2 — without it the call returns E_ACCESSDENIED forever (the 4370x failures + // measured live), forcing the legacy DuplicateOutput fallback which yields a BORN-LOST + // duplication on this box → the ACCESS_LOST storm. SetProcessDpiAwarenessContext failed at + // startup ("already set" — a manifest/runtime locked the process to a LOWER awareness, and + // GetAwarenessFromDpiAwarenessContext can't tell V1 from V2: it reports 2 for both). The + // per-THREAD override works regardless of the process default, so DuplicateOutput1 can + // succeed (the working dup Apollo gets). Must run on the capture thread before any DXGI use. + { + use windows::Win32::UI::HiDpi::{ + AreDpiAwarenessContextsEqual, GetThreadDpiAwarenessContext, + SetThreadDpiAwarenessContext, DPI_AWARENESS_CONTEXT_PER_MONITOR_AWARE_V2, + }; + let prev = SetThreadDpiAwarenessContext(DPI_AWARENESS_CONTEXT_PER_MONITOR_AWARE_V2); + let is_v2 = AreDpiAwarenessContextsEqual( + GetThreadDpiAwarenessContext(), + DPI_AWARENESS_CONTEXT_PER_MONITOR_AWARE_V2, + ) + .as_bool(); + tracing::info!( + set_ok = !prev.0.is_null(), + thread_is_v2 = is_v2, + "capture thread DPI awareness -> PER_MONITOR_AWARE_V2 (required for DuplicateOutput1)" + ); + } + // Keep the IDD (SudoVDA) virtual display awake for the capture lifetime: an idle indirect + // display can be power-gated, which invalidates the duplication (a contributor to the + // "freezes randomly while streaming" loss). Restored to ES_CONTINUOUS on Drop. (Apollo does + // this too.) Must run on the capture thread (this one owns the capturer). + SetThreadExecutionState(ES_CONTINUOUS | ES_DISPLAY_REQUIRED | ES_SYSTEM_REQUIRED); let factory: IDXGIFactory1 = CreateDXGIFactory1().context("CreateDXGIFactory1")?; // 1) Find the output (monitor) whose GDI DeviceName matches, across ALL adapters. On a // real-GPU box the SudoVDA virtual monitor's DXGI output is enumerated under the GPU that @@ -969,16 +1215,20 @@ impl DuplCapturer { let device = device.context("null D3D11 device")?; let context = context.context("null D3D11 context")?; // 3) duplicate the output. Attach to the current input desktop first (as SYSTEM this can - // be the Winlogon secure desktop) so a session that starts at the lock/login screen works, - // and re-assert display isolation at OPEN time (not just in recovery): a lock/UAC switch can - // re-attach a physical monitor and route the secure desktop THERE, leaving our virtual - // output perpetually idle/lost — re-isolating forces the secure desktop back onto it. Cheap - // + idempotent (a no-op when nothing else is attached). + // be the Winlogon secure desktop) so a session that starts at the lock/login screen works. + // The SudoVDA is kept the sole desktop via the CCD isolation in sudovda::create_monitor + // (registry-persisted), so the secure desktop has nowhere to render but the output we + // capture — no per-open re-isolation needed. attach_input_desktop(); - crate::vdisplay::sudovda::reassert_isolation(&target.gdi_name); - let dupl = output - .DuplicateOutput(&device) + let dupl = duplicate_output(&output, &device) .context("DuplicateOutput (already duplicated by another app?)")?; + // Did DXGI actually call our win32u GPU-pref hook during factory/device/dupl creation? hits==0 + // here means the hook is NOT on DXGI's reparenting path on this build → reparenting can't be + // the churn cause (look at independent-flip/composition instead). Diagnostic only. + tracing::debug!( + hook_hits = hybrid_hook_hits(), + "win32u GPU-pref hook call count after open" + ); // Kick the first frame loose: a blank virtual display is otherwise change-less. nudge_cursor_onto(&output); let dd: DXGI_OUTDUPL_DESC = dupl.GetDesc(); @@ -1016,7 +1266,7 @@ impl DuplCapturer { device, context, output, - dupl, + dupl: Some(dupl), target_id: target.target_id, gdi_name: target.gdi_name, width, @@ -1040,7 +1290,9 @@ impl DuplCapturer { hdr10_out: None, hdr_conv: None, last_rebuild: None, + last_recover: None, ever_got_frame: false, + consecutive_born_lost: 0, cursor: None, cursor_shape: None, cursor_pos: (0, 0), @@ -1220,16 +1472,15 @@ impl DuplCapturer { let mut buf = vec![0u8; info.PointerShapeBufferSize as usize]; let mut required = 0u32; let mut si = DXGI_OUTDUPL_POINTER_SHAPE_INFO::default(); - if self - .dupl - .GetFramePointerShape( + if self.dupl.as_ref().is_some_and(|d| { + d.GetFramePointerShape( info.PointerShapeBufferSize, buf.as_mut_ptr() as *mut c_void, &mut required, &mut si, ) .is_ok() - { + }) { if let Some(shape) = convert_pointer_shape(&buf, &si) { tracing::info!( shape_type = si.Type, @@ -1250,12 +1501,6 @@ impl DuplCapturer { /// HDR graphics white (PUNKTFUNK_HDR_CURSOR_NITS, default 203, per BT.2408) so it isn't ~2.5× /// too dim; SDR composites the raw cursor in the display's native sRGB space. unsafe fn composite_cursor_gpu(&mut self, gpu: &ID3D11Texture2D, hdr: bool) -> Result<()> { - // Diagnostic kill-switch: skip the GPU cursor composite entirely (PUNKTFUNK_NO_CURSOR=1) to - // isolate its cost on the 3D engine. The per-frame render-target view + draw to the 5K target - // is the suspect for the high 3D usage under heavy desktop change. - if std::env::var_os("PUNKTFUNK_NO_CURSOR").is_some() { - return Ok(()); - } self.dbg_cursor += 1; if self.dbg_cursor % 240 == 1 { tracing::debug!( @@ -1350,10 +1595,14 @@ impl DuplCapturer { /// (like recreate_dupl) so a born-lost one is rejected rather than adopted. unsafe fn try_reduplicate(&mut self) -> bool { if self.holding_frame { - let _ = self.dupl.ReleaseFrame(); + let _ = self.dupl.as_ref().map(|d| d.ReleaseFrame()); self.holding_frame = false; } - let dupl = match self.output.DuplicateOutput(&self.device) { + // RELEASE the old duplication FIRST (drop it → frees the output) before re-duplicating. DXGI + // allows one duplication per output; leaving the stale one alive is exactly why DuplicateOutput1 + // returned E_ACCESSDENIED and the legacy fallback produced a born-lost dup. + self.dupl = None; + let dupl = match duplicate_output(&self.output, &self.device) { Ok(d) => d, Err(_) => return false, }; @@ -1361,10 +1610,15 @@ impl DuplCapturer { // + CAPTURE the frame: a born-lost duplication returns ACCESS_LOST immediately; alive-but-idle // waits the full 16ms. On a real frame we present it (so a static desktop keeps a real // last_present instead of the discarded one); idle keeps the existing last_present. - self.dupl = dupl; + self.dupl = Some(dupl); let mut info = DXGI_OUTDUPL_FRAME_INFO::default(); let mut res: Option = None; - match self.dupl.AcquireNextFrame(16, &mut info, &mut res) { + match self + .dupl + .as_ref() + .unwrap() + .AcquireNextFrame(16, &mut info, &mut res) + { Ok(()) => { self.update_cursor(&info); if let Some(r) = res { @@ -1388,7 +1642,7 @@ impl DuplCapturer { /// frame and retries on a throttle, so the session survives an arbitrarily long secure visit. unsafe fn recreate_dupl(&mut self) -> Result<()> { if self.holding_frame { - let _ = self.dupl.ReleaseFrame(); + let _ = self.dupl.as_ref().map(|d| d.ReleaseFrame()); self.holding_frame = false; } // The SudoVDA output's GDI name can CHANGE across a secure-desktop topology rebuild — @@ -1396,12 +1650,20 @@ impl DuplCapturer { if let Some(n) = crate::vdisplay::sudovda::resolve_gdi_name(self.target_id) { self.gdi_name = n; } + // Re-sync the capture thread to the CURRENT input desktop on EVERY rebuild — symmetric for + // ENTERING and LEAVING the secure (Winlogon) desktop. This is the fix for "UAC/lock appears + // fine but breaks the instant you click out of it": leaving secure used to skip this (it was + // gated on is_secure_desktop()), stranding the thread on the gone Winlogon desktop. Cheap + + // leak-free (attach_input_desktop closes its handle). Apollo (syncThreadDesktop) does the same. + // We do NOT re-isolate the display on recovery: the CCD isolation from create_monitor is + // registry-persisted, and a CCD topology mutation here would itself invalidate the freshly-rebuilt + // duplication → a self-feeding ACCESS_LOST storm (200 rebuilds/session observed before this). attach_input_desktop(); - // Re-route the secure (Winlogon) desktop back to the virtual output. The lock/UAC switch can - // re-attach a physical monitor so the secure desktop lands there and our virtual output goes - // perpetually ACCESS_LOST; re-isolating (as a fresh session's `create` does) is the delta that - // makes in-session recovery work like a reconnect. Idempotent/cheap when already isolated. - crate::vdisplay::sudovda::reassert_isolation(&self.gdi_name); + // RELEASE the old duplication FIRST (frees the output). reopen_duplication creates a NEW device + // and re-DuplicateOutputs the output; if the stale duplication is still alive it holds the output + // and the new one is born-lost / E_ACCESSDENIED. (On reopen failure self.dupl stays None and + // acquire's None-guard re-drives recovery.) + self.dupl = None; let (dev, ctx, out, dupl) = reopen_duplication(&self.gdi_name)?; // Err → caller repeats + retries // (The born-lost guard is now the capture-acquire at the end: we adopt, then grab the current @@ -1428,7 +1690,7 @@ impl DuplCapturer { self.device = dev; self.context = ctx; self.output = out; - self.dupl = dupl; + self.dupl = Some(dupl); self.gpu_copy = None; // stale: belonged to the old device self.cursor = None; // shaders/textures belonged to the old device; rebuilt on demand self.last_present = None; // belonged to the old device; reseeded below @@ -1450,7 +1712,12 @@ impl DuplCapturer { nudge_cursor_onto(&self.output); // kick a change so a static desktop yields its first frame let mut info = DXGI_OUTDUPL_FRAME_INFO::default(); let mut res: Option = None; - let captured = match self.dupl.AcquireNextFrame(120, &mut info, &mut res) { + let captured = match self + .dupl + .as_ref() + .unwrap() + .AcquireNextFrame(120, &mut info, &mut res) + { Ok(()) => { self.update_cursor(&info); match res { @@ -1481,13 +1748,21 @@ impl DuplCapturer { tracing::warn!(error = %format!("{e:#}"), "seed black frame after recovery failed"); } } + // Track the born-lost storm: a rebuild that grabbed a real frame clears it; one that came back + // born-lost (created OK, first AcquireNextFrame == ACCESS_LOST) advances it. `acquire` uses this + // to escape to a full pipeline cold-rebuild on the normal desktop instead of spinning forever. + if captured { + self.consecutive_born_lost = 0; + } else { + self.consecutive_born_lost = self.consecutive_born_lost.saturating_add(1); + } Ok(()) } /// Acquire one frame: `Some` on a fresh image, `None` on timeout (no change → caller reuses last). unsafe fn acquire(&mut self) -> Result> { if self.holding_frame { - let _ = self.dupl.ReleaseFrame(); + let _ = self.dupl.as_ref().map(|d| d.ReleaseFrame()); self.holding_frame = false; } let mut info = DXGI_OUTDUPL_FRAME_INFO::default(); @@ -1497,18 +1772,27 @@ impl DuplCapturer { } else { self.timeout_ms }; - match self.dupl.AcquireNextFrame(timeout, &mut info, &mut res) { + // If a prior recovery released the old duplication but couldn't create a new one yet (output + // gone during a secure dwell, etc.), self.dupl is None — synthesize ACCESS_LOST so we flow into + // the recovery path below instead of panicking. + let acq = match self.dupl.as_ref() { + Some(d) => d.AcquireNextFrame(timeout, &mut info, &mut res), + None => Err(windows::core::Error::from_hresult(DXGI_ERROR_ACCESS_LOST)), + }; + match acq { Ok(()) => { if self.first_frame { tracing::info!(w = self.width, h = self.height, "DXGI first frame acquired"); self.first_frame = false; } + self.consecutive_born_lost = 0; // a real frame breaks the born-lost storm self.update_cursor(&info); } Err(e) if e.code() == DXGI_ERROR_WAIT_TIMEOUT => { self.dbg_timeouts += 1; if self.dbg_timeouts % 40 == 1 { - tracing::warn!( + // A static desktop produces no DDA frames, so timeouts are NORMAL idle, not an error. + tracing::debug!( timeouts = self.dbg_timeouts, first_frame = self.first_frame, "DXGI AcquireNextFrame timeout (no desktop change yet)" @@ -1516,6 +1800,20 @@ impl DuplCapturer { } return Ok(None); } + // MODE_CHANGE_IN_PROGRESS (0x887A0025) is TRANSIENT by design ("the call may succeed at a + // later attempt") — the display topology is mid-settle (e.g. just after the IDD's mode is + // applied). Do NOT recover/rebuild: a rebuild re-issues create()→set_active_mode, re-touching + // the topology and PERPETUATING the change (the storm we measured). Just repeat the last frame + // and wait it out, like a timeout. Throttled log so a genuinely stuck change stays visible. + Err(e) if e.code() == DXGI_ERROR_MODE_CHANGE_IN_PROGRESS => { + self.dbg_timeouts += 1; + if self.dbg_timeouts % 120 == 1 { + tracing::warn!( + "DXGI mode change in progress (0x887A0025) — waiting for topology to settle" + ); + } + return Ok(None); + } // Recoverable losses, ALL handled by rebuilding the duplication (device + re-DuplicateOutput): // ACCESS_LOST — desktop switch (normal <-> Winlogon secure: lock/login/UAC) or mode change // INVALID_CALL — the secure->user-desktop switch (post-login) leaves the duplication in a @@ -1547,29 +1845,103 @@ impl DuplCapturer { "DXGI capture lost — recovering (cheap re-duplicate, full rebuild if output gone)" ); } + // GENTLE recovery. On the secure (Winlogon) desktop the duplication dies on EVERY + // independent-flip; a tight re-duplicate loop tears the duplication down + brings it up + // hundreds of times/sec — that release/recreate cycle is the real kernel stress (and it + // stalls the send thread long enough that the client times out → "display disconnected"). + // So instead of fighting it: cap recovery HARD and just repeat the last frame in between + // (no busy-spin, no per-flip teardown). The session stays alive across a secure dwell; the + // lock/UAC screen is frozen/laggy, then capture resumes cleanly when the desktop returns. + // Tunable: PUNKTFUNK_RECOVER_MS (cheap re-duplicate cadence, default 250) and + // PUNKTFUNK_REBUILD_MS (heavy new-device rebuild cadence, default 1500). + let recover_ms = std::env::var("PUNKTFUNK_RECOVER_MS") + .ok() + .and_then(|s| s.parse().ok()) + .unwrap_or(250u64); + let now = Instant::now(); + if self + .last_recover + .is_some_and(|t| now.duration_since(t) < Duration::from_millis(recover_ms)) + { + return Ok(None); // repeat the last frame; do NOT tear down/recreate yet + } + self.last_recover = Some(now); if !device_dead && self.try_reduplicate() { - // Cheap recovery succeeded; the next acquire gets frames on the same device. + // Cheap recovery succeeded (same device, no teardown of the device/monitor). self.first_frame = true; return Ok(None); } - // Output gone / device dead → full rebuild (new device), throttled. + // Heavy full rebuild (new device) — the costliest teardown/recreate, so throttle it the + // hardest. Only when the cheap re-duplicate keeps failing (genuine output/device loss). + let rebuild_ms = std::env::var("PUNKTFUNK_REBUILD_MS") + .ok() + .and_then(|s| s.parse().ok()) + .unwrap_or(1500u64); let now = Instant::now(); - let due = self.last_rebuild.map_or(true, |t| { - now.duration_since(t) >= Duration::from_millis(250) - }); + let due = self + .last_rebuild + .is_none_or(|t| now.duration_since(t) >= Duration::from_millis(rebuild_ms)); if due { self.last_rebuild = Some(now); if self.recreate_dupl().is_ok() { self.first_frame = true; } - } else { - std::thread::sleep(Duration::from_millis(8)); + } + // Born-lost rebuilds (created OK, instant ACCESS_LOST) used to escalate to a full pipeline + // cold-rebuild here — but that re-issued vd.create()→set_active_mode (an audible PnP + // add/remove chime + a fresh topology mode change), which never converged and amplified + // the storm. With the topology fix (set_active_mode no longer promotes the IDD to PRIMARY + // by default) the born-lost storm is gone at its source; if one ever recurs, just keep + // repeating the last frame in-process — never tear the IDD down mid-session (Apollo never + // does). Throttled visibility only. + if self.consecutive_born_lost > 0 && self.consecutive_born_lost % 40 == 1 { + tracing::warn!( + consecutive = self.consecutive_born_lost, + "DDA born-lost rebuilds — repeating last frame in-process (no teardown)" + ); } return Ok(None); } Err(e) => return Err(e).context("AcquireNextFrame"), } let res = res.context("AcquireNextFrame: null resource")?; + // Detect a mode/format change on the hot path. The desktop can flip HDR<->SDR (FP16<->BGRA — + // e.g. the SudoVDA output dropping out of HDR for the secure desktop) or change resolution + // WITHOUT raising ACCESS_LOST; `hdr_fp16`/`width`/`height` would then be stale and + // `present_acquired` would CopyResource into a mismatched-format/size target — corruption, or + // the secure-desktop "works once, then HDR breaks" bug. Re-read the acquired texture's desc + // every frame (Apollo does this) and rebuild on a real change instead of presenting a + // mismatched frame. Throttled like the ACCESS_LOST path so a flapping toggle can't hammer + // DuplicateOutput. + if let Ok(tex) = res.cast::() { + let mut d = D3D11_TEXTURE2D_DESC::default(); + tex.GetDesc(&mut d); + // Only a real SIZE change is reliably detectable here. Format/HDR is NOT: legacy + // DuplicateOutput always hands back an 8-bit BGRA surface regardless of the output's FP16 + // scanout mode, so comparing the acquired-texture format against `hdr_fp16` (derived from + // the OUTDUPL ModeDesc) self-fires every frame → a rebuild storm. A genuine resolution + // change is caught here; a real HDR↔SDR toggle arrives as ACCESS_LOST → recreate_dupl + // re-detects it. (Genuine FP16 capture is a separate change: DuplicateOutput1.) + if d.Width != self.width || d.Height != self.height { + tracing::info!( + old = format!("{}x{}", self.width, self.height), + new = format!("{}x{}", d.Width, d.Height), + "DXGI capture size changed mid-stream — rebuilding" + ); + let _ = self.dupl.as_ref().map(|d| d.ReleaseFrame()); + let now = Instant::now(); + let due = self + .last_rebuild + .is_none_or(|t| now.duration_since(t) >= Duration::from_millis(250)); + if due { + self.last_rebuild = Some(now); + if self.recreate_dupl().is_ok() { + self.first_frame = true; + } + } + return Ok(None); + } + } Ok(Some(self.present_acquired(res)?)) } @@ -1590,7 +1962,7 @@ impl DuplCapturer { self.ensure_fp16_src()?; let src = self.fp16_src.clone().context("fp16 src texture")?; self.context.CopyResource(&src, &tex); - let _ = self.dupl.ReleaseFrame(); + let _ = self.dupl.as_ref().map(|d| d.ReleaseFrame()); self.holding_frame = false; self.composite_cursor_gpu(&src, true)?; // onto the FP16 surface (HDR: decode + nits scale) self.ensure_hdr10_out()?; @@ -1628,7 +2000,7 @@ impl DuplCapturer { self.ensure_gpu_copy()?; let gpu = self.gpu_copy.clone().context("gpu copy texture")?; self.context.CopyResource(&gpu, &tex); - let _ = self.dupl.ReleaseFrame(); + let _ = self.dupl.as_ref().map(|d| d.ReleaseFrame()); self.holding_frame = false; self.composite_cursor_gpu(&gpu, false)?; self.last_present = Some((gpu.clone(), PixelFormat::Bgra)); @@ -1655,7 +2027,7 @@ impl DuplCapturer { let src = std::slice::from_raw_parts(map.pData as *const u8, pitch * h); let mut tight = depad_bgra(src, pitch, w, h); self.context.Unmap(&staging, 0); - let _ = self.dupl.ReleaseFrame(); + let _ = self.dupl.as_ref().map(|d| d.ReleaseFrame()); self.holding_frame = false; if self.cursor_visible { if let Some(shape) = &self.cursor_shape { @@ -1770,9 +2142,13 @@ impl Drop for DuplCapturer { fn drop(&mut self) { if self.holding_frame { unsafe { - let _ = self.dupl.ReleaseFrame(); + let _ = self.dupl.as_ref().map(|d| d.ReleaseFrame()); } } + // Release the display/system-required execution state we took at open(). + unsafe { + SetThreadExecutionState(ES_CONTINUOUS); + } // _keepalive drops after, REMOVEing the SudoVDA monitor. } } diff --git a/crates/punktfunk-host/src/capture/wgc.rs b/crates/punktfunk-host/src/capture/wgc.rs index e84eb01..6d4c888 100644 --- a/crates/punktfunk-host/src/capture/wgc.rs +++ b/crates/punktfunk-host/src/capture/wgc.rs @@ -20,7 +20,7 @@ use super::dxgi::{ find_output, make_device, nudge_cursor_onto, D3d11Frame, HdrConverter, WinCaptureTarget, }; use super::{CapturedFrame, Capturer, FramePayload, PixelFormat}; -use anyhow::{anyhow, bail, Context, Result}; +use anyhow::{bail, Context, Result}; use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::{Arc, Condvar, Mutex}; use std::time::{Duration, Instant}; @@ -30,7 +30,6 @@ use windows::Graphics::Capture::{ Direct3D11CaptureFrame, Direct3D11CaptureFramePool, GraphicsCaptureItem, GraphicsCaptureSession, }; use windows::Graphics::DirectX::DirectXPixelFormat; -use windows::Graphics::SizeInt32; use windows::Win32::Foundation::{CloseHandle, HANDLE}; use windows::Win32::Graphics::Direct3D11::{ ID3D11Device, ID3D11DeviceContext, ID3D11RenderTargetView, ID3D11ShaderResourceView, diff --git a/crates/punktfunk-host/src/capture/wgc_relay.rs b/crates/punktfunk-host/src/capture/wgc_relay.rs index 54b2a37..ddb4436 100644 --- a/crates/punktfunk-host/src/capture/wgc_relay.rs +++ b/crates/punktfunk-host/src/capture/wgc_relay.rs @@ -15,7 +15,7 @@ use crate::capture::dxgi::WinCaptureTarget; use anyhow::{bail, Context, Result}; -use std::io::{BufRead, BufReader, Read, Write}; +use std::io::{BufRead, BufReader, Read}; use std::sync::mpsc::{Receiver, SyncSender}; use std::sync::Mutex; use windows::core::PWSTR; @@ -152,11 +152,12 @@ unsafe fn no_inherit(h: HANDLE) { let _ = SetHandleInformation(h, HANDLE_FLAG_INHERIT.0, HANDLE_FLAGS(0)); } -/// Build the helper's environment block: the user's block (so DLL/PATH/SystemRoot resolve) with this -/// (host) process's `PUNKTFUNK_*` vars overlaid, so the helper encodes with the SAME settings the -/// host runs with (`PUNKTFUNK_ENCODER=nvenc`, `PUNKTFUNK_ZEROCOPY`, …) instead of the user shell's. -/// Returns a UTF-16, double-null-terminated block suitable for `CREATE_UNICODE_ENVIRONMENT`. -unsafe fn merged_env_block(user_block: *const u16) -> Vec { +/// Build a child environment block: the target session's block (so DLL/PATH/SystemRoot resolve) with +/// this process's `PUNKTFUNK_*` vars overlaid, so the child runs with the SAME settings this process +/// has (`PUNKTFUNK_ENCODER=nvenc`, `PUNKTFUNK_ZEROCOPY`, …) instead of the target shell's. Returns a +/// UTF-16, double-null-terminated block suitable for `CREATE_UNICODE_ENVIRONMENT`. Shared by the WGC +/// helper spawn (here) and the Windows service launching the host into the active session. +pub(crate) unsafe fn merged_env_block(user_block: *const u16) -> Vec { // Parse the user block ("VAR=VALUE\0" … "\0") into entries. let mut entries: Vec = Vec::new(); if !user_block.is_null() { @@ -174,9 +175,10 @@ unsafe fn merged_env_block(user_block: *const u16) -> Vec { p = p.offset(len + 1); } } - // Drop any PUNKTFUNK_* the user block carried, then overlay this process's PUNKTFUNK_* vars. - entries.retain(|e| !e.split('=').next().unwrap_or("").starts_with("PUNKTFUNK_")); - for (k, v) in std::env::vars().filter(|(k, _)| k.starts_with("PUNKTFUNK_")) { + // Overlay "our" settings — PUNKTFUNK_* and RUST_LOG — dropping whatever the target block had. + let is_ours = |k: &str| k.starts_with("PUNKTFUNK_") || k == "RUST_LOG"; + entries.retain(|e| !is_ours(e.split('=').next().unwrap_or(""))); + for (k, v) in std::env::vars().filter(|(k, _)| is_ours(k)) { entries.push(format!("{k}={v}")); } // Serialize back to a UTF-16 double-null-terminated block. diff --git a/crates/punktfunk-host/src/gamestream/mod.rs b/crates/punktfunk-host/src/gamestream/mod.rs index 12ee98d..1016563 100644 --- a/crates/punktfunk-host/src/gamestream/mod.rs +++ b/crates/punktfunk-host/src/gamestream/mod.rs @@ -201,13 +201,25 @@ pub fn serve(mgmt: crate::mgmt::Options, native: Option) }) } -/// `~/.config/punktfunk`, created on demand — host identity + (later) pairing state live here. +/// The host config dir (host identity, pairing state, mgmt token, library) — created on demand. +/// Linux: `$XDG_CONFIG_HOME/punktfunk` or `~/.config/punktfunk`. Windows: `%ProgramData%\punktfunk` +/// (machine-wide — the SYSTEM service and the interactive user share ONE dir that survives logout). +/// `PUNKTFUNK_CONFIG_DIR` overrides on both platforms (used by the Windows service config / tests). pub(crate) fn config_dir() -> PathBuf { + if let Some(dir) = std::env::var_os("PUNKTFUNK_CONFIG_DIR").filter(|s| !s.is_empty()) { + return PathBuf::from(dir); + } + // Windows: %ProgramData% (e.g. C:\ProgramData\punktfunk) — machine-wide, SYSTEM-readable, + // persists across user logout, correct for a SYSTEM service. Falls back to %APPDATA% then CWD. + #[cfg(target_os = "windows")] + let base = std::env::var_os("ProgramData") + .or_else(|| std::env::var_os("APPDATA")) + .map(PathBuf::from) + .unwrap_or_else(|| PathBuf::from(".")); + #[cfg(not(target_os = "windows"))] let base = std::env::var_os("XDG_CONFIG_HOME") .map(PathBuf::from) .or_else(|| std::env::var_os("HOME").map(|h| PathBuf::from(h).join(".config"))) - // Windows: %APPDATA% (e.g. C:\Users\X\AppData\Roaming) — cert/key/paired/uniqueid persist there. - .or_else(|| std::env::var_os("APPDATA").map(PathBuf::from)) .unwrap_or_else(|| PathBuf::from(".")); base.join("punktfunk") } diff --git a/crates/punktfunk-host/src/m3.rs b/crates/punktfunk-host/src/m3.rs index f0e0306..36d4aa1 100644 --- a/crates/punktfunk-host/src/m3.rs +++ b/crates/punktfunk-host/src/m3.rs @@ -2026,6 +2026,19 @@ fn virtual_stream( let (mut capturer, mut enc, mut frame, mut interval) = build_pipeline_with_retry(&mut vd, mode, bitrate_kbps, bit_depth)?; + // Windows single-process DDA path (PUNKTFUNK_NO_WGC=1): the SudoVDA virtual display, isolated as the + // SOLE active output, goes into fullscreen independent-flip (one plane on one display) which Desktop + // Duplication cannot capture → the born-lost ACCESS_LOST storm we measured on the RTX4090+iGPU box + // (hook verified-firing, DPI=2, yet 100% DuplicateOutput1 E_ACCESSDENIED + born-lost). A tiny topmost + // layered overlay disqualifies independent-flip and forces DWM composition, which DDA CAN capture. + // (Apollo never hits this because it runs WITH a physical monitor attached — multi-display is already + // DWM-composited; we isolate to sole-display, so we must force composition ourselves.) Unlike the WGC + // relay path — where WGC owns the normal desktop and the overlay is secure-only — here DDA owns the + // normal desktop too, so it must run unconditionally. Held for the session; Drop tears it down. + // Best-effort; disable with PUNKTFUNK_FORCE_COMPOSED=0. + #[cfg(target_os = "windows")] + let _composed_flip = crate::capture::composed_flip::ForceComposedFlip::start(); + let perf = std::env::var("PUNKTFUNK_PERF").is_ok(); // Microburst cap (applied in send_loop/paced_submit): a frame ≤ this bursts out immediately; // only a bigger frame's overflow is spread. PUNKTFUNK_PACE_BURST_KB overrides the 128 KB default. @@ -2266,10 +2279,12 @@ fn virtual_stream( /// Should this host take the two-process (SYSTEM host + user-session WGC helper) path? Yes when it's /// running as SYSTEM — the only account that can capture the secure desktop + drive SendInput on it, /// and the account under which in-process WGC won't activate. `PUNKTFUNK_FORCE_HELPER` forces it on -/// (for testing the relay as a normal user); `PUNKTFUNK_NO_HELPER` forces it off. +/// (for testing the relay as a normal user); `PUNKTFUNK_NO_HELPER` forces it off. `PUNKTFUNK_NO_WGC` +/// also forces it off — that mode runs pure single-process DDA (one capturer for the normal AND secure +/// desktop, Apollo-style), which has no WGC helper to relay. #[cfg(target_os = "windows")] fn should_use_helper() -> bool { - if std::env::var_os("PUNKTFUNK_NO_HELPER").is_some() { + if std::env::var_os("PUNKTFUNK_NO_HELPER").is_some() || crate::capture::wgc_disabled() { return false; } std::env::var_os("PUNKTFUNK_FORCE_HELPER").is_some() @@ -2329,6 +2344,20 @@ fn virtual_stream_relay( let target = vout.win_capture.clone().ok_or_else(|| { anyhow!("SudoVDA target not yet an active display (needs a WDDM GPU to activate it)") })?; + // Force the SudoVDA's advanced-color (HDR) state to MATCH the session bit depth BEFORE the WGC + // helper captures it. The advanced-color state PERSISTS on the monitor across sessions, so an + // 8-bit (SDR) session could otherwise inherit HDR left on by a prior 10-bit run (or our own + // earlier toggle) → the helper captures HDR FP16 while the encoder is 8-bit SDR → broken image. + // Runs on every build (initial + mode-switch + return-from-secure rebuild), keeping WGC's format + // consistent with the encoder. (HDR independent-flip on the secure desktop is handled separately + // by dropping to SDR for the DDA leg.) + #[cfg(target_os = "windows")] + unsafe { + if crate::vdisplay::sudovda::set_advanced_color(target.target_id, bit_depth >= 10) { + // Let the colorspace change settle before WGC creates its capture item / detects HDR. + std::thread::sleep(std::time::Duration::from_millis(250)); + } + } let relay = HelperRelay::spawn( &target, (mode.width, mode.height, effective_hz), @@ -2526,24 +2555,65 @@ fn virtual_stream_relay( "two-process: source switch" ); if secure { - if dda.is_none() { - match open_dda(&target, cur_mode.width, cur_mode.height, effective_hz) { - Ok(p) => dda = Some(p), - Err(e) => { - tracing::error!(error = %format!("{e:#}"), - "two-process: DDA open failed — secure desktop will freeze on last frame"); - } + // SDR-while-secure (HDR sessions ONLY): drop the SudoVDA out of HDR so the secure + // (Winlogon) desktop renders SDR/composed — HDR fullscreen independent-flip is what made + // DDA storm ACCESS_LOST (black). For an SDR (8-bit) session the output is already SDR, so + // toggling is a needless topology change AND its matching restore on the way back would + // force the desktop into HDR the 8-bit encoder can't take (broken image). + if bit_depth >= 10 { + let toggled = unsafe { + crate::vdisplay::sudovda::set_advanced_color(target.target_id, false) + }; + if toggled { + std::thread::sleep(std::time::Duration::from_millis(250)); } } - if let Some(d) = dda.as_mut() { - d.enc.request_keyframe(); + dda = None; // reopen so we capture the (SDR) output + match open_dda(&target, cur_mode.width, cur_mode.height, effective_hz) { + Ok(mut p) => { + p.enc.request_keyframe(); + dda = Some(p); + } + Err(e) => { + tracing::error!(error = %format!("{e:#}"), + "two-process: DDA open failed — secure desktop will freeze on last frame"); + } } next = std::time::Instant::now(); } else { - // Returning to the helper: drain stale buffered AUs (encoded while we ignored it) and - // force a fresh IDR; await_idr then skips the stale deltas until that IDR arrives. - while relay.try_recv().is_ok() {} - relay.request_keyframe(); + // Returning to the normal desktop: RESUME from the still-alive WGC helper. Do NOT + // recreate the SudoVDA monitor or respawn the helper — build()'s vd.create() is an + // IOCTL_REMOVE+ADD of the monitor (the audible disconnect/connect chime + the + // teardown/recreate kernel stress that broke DDA, now applied to the mux). The monitor + + // helper persist for the WHOLE session; only the host-DDA leg opens (secure) and closes + // (normal). Apply the DDA learning here: reuse, don't tear down. + dda = None; // free the secure DDA encoder; the relay (helper) is the source again + while relay.try_recv().is_ok() {} // drop secure-dwell backlog + relay.request_keyframe(); // client decoder resumes on the helper's next IDR + if bit_depth >= 10 { + // HDR session ONLY: the secure switch dropped the SudoVDA to SDR for the DDA leg, so + // here we must restore HDR AND rebuild the helper so WGC re-detects the HDR + // colorspace. An SDR session never changed the colorspace → no rebuild, no recreate. + unsafe { + crate::vdisplay::sudovda::set_advanced_color(target.target_id, true); + } + match build(&mut vd, cur_mode) { + Ok((ka, rl, tg, hz)) => { + relay = rl; + _keepalive = ka; + target = tg; + effective_hz = hz; + interval = std::time::Duration::from_secs_f64(1.0 / hz.max(1) as f64); + } + Err(e) => { + tracing::error!(error = %format!("{e:#}"), + "two-process: helper rebuild on secure-exit failed"); + while relay.try_recv().is_ok() {} + relay.request_keyframe(); + } + } + } + next = std::time::Instant::now(); } } if want_kf { diff --git a/crates/punktfunk-host/src/main.rs b/crates/punktfunk-host/src/main.rs index 883efa3..9589883 100644 --- a/crates/punktfunk-host/src/main.rs +++ b/crates/punktfunk-host/src/main.rs @@ -31,6 +31,8 @@ mod mgmt_token; mod native_pairing; mod pipeline; mod pwinit; +#[cfg(target_os = "windows")] +mod service; mod vdisplay; #[cfg(target_os = "windows")] mod wgc_helper; @@ -43,13 +45,28 @@ use m0::{Options, Source}; use std::path::PathBuf; fn main() { - // Logs go to stderr so stdout stays machine-readable (`punktfunk-host openapi > spec.json`). - tracing_subscriber::fmt() - .with_env_filter( - tracing_subscriber::EnvFilter::try_from_default_env().unwrap_or_else(|_| "info".into()), - ) - .with_writer(std::io::stderr) - .init(); + let filter = + tracing_subscriber::EnvFilter::try_from_default_env().unwrap_or_else(|_| "info".into()); + // `service run` is launched by the SCM with no console — log to a file instead of stderr. + #[cfg(target_os = "windows")] + let service_run = { + let a: Vec = std::env::args().skip(1).take(2).collect(); + a.first().map(String::as_str) == Some("service") + && a.get(1).map(String::as_str) == Some("run") + }; + #[cfg(not(target_os = "windows"))] + let service_run = false; + + if service_run { + #[cfg(target_os = "windows")] + service::init_file_logging(filter); + } else { + // Logs go to stderr so stdout stays machine-readable (`punktfunk-host openapi > spec.json`). + tracing_subscriber::fmt() + .with_env_filter(filter) + .with_writer(std::io::stderr) + .init(); + } if let Err(e) = real_main() { tracing::error!("{e:#}"); @@ -75,6 +92,13 @@ fn real_main() -> Result<()> { punktfunk_core::ABI_VERSION ); + // Install Apollo's win32u GPU-preference hook BEFORE anything touches DXGI (the SudoVDA + // render-adapter selection creates a DXGI factory during virtual-display setup, well before + // capture). On a hybrid-GPU box this stops DXGI from reparenting the virtual output off the + // capture GPU — the ACCESS_LOST churn fix. Idempotent (Once); harmless on non-hybrid boxes. + #[cfg(target_os = "windows")] + crate::capture::dxgi::install_gpu_pref_hook(); + match args.first().map(String::as_str) { // GameStream host control plane (P1.1: mDNS + serverinfo) + management API, and (with // --native) the native punktfunk/1 host in the same process — the unified host. @@ -226,6 +250,11 @@ fn real_main() -> Result<()> { bit_depth: get("--bit-depth").and_then(|s| s.parse().ok()).unwrap_or(8), }) } + // Windows service control: install/uninstall/start/stop/status + the SCM `run` entry point. + // Replaces the ad-hoc launch chain — `service install` registers an auto-start SYSTEM service + // that launches the host into the active interactive session. + #[cfg(target_os = "windows")] + Some("service") => service::main(&args[1..]), Some("-h") | Some("--help") | Some("help") | None => { print_usage(); Ok(()) @@ -508,4 +537,12 @@ NOTES: Both 'serve --native' and 'm3-host' advertise the native service over mDNS (_punktfunk._udp) for client auto-discovery — 'punktfunk-client-rs --discover' lists them." ); + #[cfg(target_os = "windows")] + eprintln!( + "\nWINDOWS SERVICE (end-user deployment — replaces a manual launch):\n\ + \x20 punktfunk-host service install register an auto-start SYSTEM service + firewall rules\n\ + \x20 punktfunk-host service uninstall remove the service + firewall rules\n\ + \x20 punktfunk-host service start|stop|status\n\ + \x20 config: %ProgramData%\\punktfunk\\host.env" + ); } diff --git a/crates/punktfunk-host/src/service.rs b/crates/punktfunk-host/src/service.rs new file mode 100644 index 0000000..6ce5fc7 --- /dev/null +++ b/crates/punktfunk-host/src/service.rs @@ -0,0 +1,702 @@ +//! Windows service: a SYSTEM supervisor that launches the streaming host into the **active +//! interactive console session** and keeps it tracking session switches — the end-user replacement +//! for the ad-hoc PsExec / VBS / scheduled-task launch chain used during bring-up. +//! +//! Why a supervisor and not just "run the host as a service": the host must run **as SYSTEM in the +//! interactive session** (session 1+). Desktop Duplication of the secure (Winlogon/UAC/lock) desktop +//! and `SendInput` both need SYSTEM; capture and injection both need the *interactive* session, which +//! a plain session-0 service is not in. So this service (itself in session 0) never captures — it +//! duplicates its own LocalSystem token, retargets it to the active console session, and +//! `CreateProcessAsUserW`s the host there. This is the Sunshine/Apollo model. The host in turn spawns +//! the WGC helper into the *user* session (see `capture::wgc_relay`) — two nested launches. +//! +//! Subcommands (Windows only): +//! ```text +//! punktfunk-host service run SCM entry point (registered as binPath; not run by hand) +//! punktfunk-host service install register an auto-start LocalSystem service + firewall rules +//! punktfunk-host service uninstall stop + delete the service + remove firewall rules +//! punktfunk-host service start|stop|status convenience wrappers over the SCM +//! ``` +//! Config lives in `%ProgramData%\punktfunk\host.env` (the Windows analogue of `scripts/host.env`), +//! loaded into the service's environment and carried to the host child. Logs land in +//! `%ProgramData%\punktfunk\logs\`. + +use anyhow::{bail, Context, Result}; +use std::ffi::{c_void, OsString}; +use std::path::PathBuf; +use std::sync::atomic::{AtomicIsize, Ordering}; +use std::time::Duration; + +use windows::core::{PCWSTR, PWSTR}; +use windows::Win32::Foundation::{CloseHandle, HANDLE, WAIT_OBJECT_0}; +use windows::Win32::Security::{ + DuplicateTokenEx, SecurityImpersonation, SetTokenInformation, TokenPrimary, TokenSessionId, + SECURITY_ATTRIBUTES, TOKEN_ADJUST_DEFAULT, TOKEN_ADJUST_SESSIONID, TOKEN_ALL_ACCESS, + TOKEN_ASSIGN_PRIMARY, TOKEN_DUPLICATE, TOKEN_QUERY, +}; +use windows::Win32::Storage::FileSystem::{ + CreateFileW, FILE_APPEND_DATA, FILE_GENERIC_WRITE, FILE_SHARE_READ, FILE_SHARE_WRITE, + FILE_WRITE_DATA, OPEN_ALWAYS, +}; +use windows::Win32::System::Environment::{CreateEnvironmentBlock, DestroyEnvironmentBlock}; +use windows::Win32::System::JobObjects::{ + AssignProcessToJobObject, CreateJobObjectW, JobObjectExtendedLimitInformation, + SetInformationJobObject, JOBOBJECT_EXTENDED_LIMIT_INFORMATION, JOB_OBJECT_LIMIT_BREAKAWAY_OK, + JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE, +}; +use windows::Win32::System::RemoteDesktop::WTSGetActiveConsoleSessionId; +use windows::Win32::System::Threading::{ + CreateEventW, CreateProcessAsUserW, GetCurrentProcess, OpenProcessToken, ResetEvent, SetEvent, + TerminateProcess, WaitForMultipleObjects, CREATE_NO_WINDOW, CREATE_UNICODE_ENVIRONMENT, + INFINITE, PROCESS_INFORMATION, STARTF_USESTDHANDLES, STARTUPINFOW, +}; + +/// SCM service name (the key under HKLM\SYSTEM\CurrentControlSet\Services). Stable identity. +const SERVICE_NAME: &str = "PunktfunkHost"; +const SERVICE_DISPLAY: &str = "punktfunk streaming host"; +const SERVICE_DESCRIPTION: &str = + "Low-latency desktop/game streaming host. Launches the punktfunk host into the active session."; + +/// The host subcommand the service launches, overridable via `PUNKTFUNK_HOST_CMD` in host.env. +/// `serve --native` runs the GameStream (Moonlight) host + the native punktfunk/1 QUIC host in one +/// process — the unified host an end user wants. +const DEFAULT_HOST_CMD: &str = "serve --native"; + +/// Event handles shared between the SCM control handler (which signals them) and the supervision loop +/// (which waits on them). Stored as raw `isize` so the `'static + Send` handler can reach them without +/// a non-`Send` `HANDLE` capture. Set once in `run_service`. +static STOP_EVENT: AtomicIsize = AtomicIsize::new(0); +static SESSION_EVENT: AtomicIsize = AtomicIsize::new(0); + +fn load_event(a: &AtomicIsize) -> HANDLE { + HANDLE(a.load(Ordering::Relaxed) as *mut c_void) +} + +/// Dispatch `service `. +pub fn main(args: &[String]) -> Result<()> { + match args.first().map(String::as_str) { + Some("run") => run(), + Some("install") => install(), + Some("uninstall") => uninstall(), + Some("start") => sc(&["start", SERVICE_NAME]), + Some("stop") => sc(&["stop", SERVICE_NAME]), + Some("status") => sc(&["query", SERVICE_NAME]), + _ => { + eprintln!( + "punktfunk-host service — Windows service control\n\n\ + USAGE:\n\ + \x20 punktfunk-host service install register the auto-start service + firewall rules\n\ + \x20 punktfunk-host service uninstall stop + remove the service + firewall rules\n\ + \x20 punktfunk-host service start start the service now\n\ + \x20 punktfunk-host service stop stop the service\n\ + \x20 punktfunk-host service status query the service\n\n\ + Config: %ProgramData%\\punktfunk\\host.env Logs: %ProgramData%\\punktfunk\\logs\\" + ); + Ok(()) + } + } +} + +// ── Logging ───────────────────────────────────────────────────────────────────────────────────── + +/// `%ProgramData%\punktfunk\logs\service.log` — the service's own (supervision) log. The host child's +/// stdout/stderr are redirected to `host.log` in the same dir. +pub fn service_log_path() -> PathBuf { + let dir = crate::gamestream::config_dir().join("logs"); + let _ = std::fs::create_dir_all(&dir); + dir.join("service.log") +} + +fn host_log_path() -> PathBuf { + let dir = crate::gamestream::config_dir().join("logs"); + let _ = std::fs::create_dir_all(&dir); + dir.join("host.log") +} + +/// Initialise tracing to the service log file (the SCM gives the service no console/stderr). Falls +/// back to stderr if the file can't be opened. Called from `main()` only for `service run`. +pub fn init_file_logging(filter: tracing_subscriber::EnvFilter) { + match std::fs::OpenOptions::new() + .create(true) + .append(true) + .open(service_log_path()) + { + Ok(file) => { + tracing_subscriber::fmt() + .with_env_filter(filter) + .with_ansi(false) + .with_writer(move || file.try_clone().expect("clone service log handle")) + .init(); + } + Err(_) => { + tracing_subscriber::fmt() + .with_env_filter(filter) + .with_writer(std::io::stderr) + .init(); + } + } +} + +// ── host.env config ───────────────────────────────────────────────────────────────────────────── + +fn host_env_path() -> PathBuf { + crate::gamestream::config_dir().join("host.env") +} + +/// Load `%ProgramData%\punktfunk\host.env` (KEY=VALUE lines, `#` comments) into this process's +/// environment, so the host child inherits `PUNKTFUNK_*` / `RUST_LOG` via the merged env block. +fn load_host_env() { + let path = host_env_path(); + let Ok(contents) = std::fs::read_to_string(&path) else { + tracing::info!(path = %path.display(), "no host.env (using defaults)"); + return; + }; + let mut n = 0; + for line in contents.lines() { + let line = line.trim(); + if line.is_empty() || line.starts_with('#') { + continue; + } + if let Some((k, v)) = line.split_once('=') { + let (k, v) = (k.trim(), v.trim().trim_matches('"')); + if !k.is_empty() { + std::env::set_var(k, v); + n += 1; + } + } + } + tracing::info!(path = %path.display(), vars = n, "loaded host.env"); +} + +// ── service run (SCM entry point) ──────────────────────────────────────────────────────────────── + +windows_service::define_windows_service!(ffi_service_main, service_main); + +fn run() -> Result<()> { + // Blocks until the service stops; the SCM then calls `service_main` on its own thread. + windows_service::service_dispatcher::start(SERVICE_NAME, ffi_service_main).map_err(|e| { + anyhow::anyhow!( + "service_dispatcher failed ({e}). `service run` is launched by the Service Control \ + Manager, not by hand — use `punktfunk-host service install` then `service start`." + ) + }) +} + +fn service_main(_args: Vec) { + if let Err(e) = run_service() { + tracing::error!("service exited with error: {e:#}"); + } +} + +fn run_service() -> Result<()> { + use windows_service::service::{ + ServiceControl, ServiceControlAccept, ServiceExitCode, ServiceState, ServiceStatus, + ServiceType, + }; + use windows_service::service_control_handler::{self, ServiceControlHandlerResult}; + + // Two manual-reset events: STOP (set once, never reset) and SESSION (set on a console + // connect/disconnect, reset by the supervisor after it reacts). + let stop = + unsafe { CreateEventW(None, true, false, PCWSTR::null()) }.context("CreateEvent stop")?; + let session = unsafe { CreateEventW(None, true, false, PCWSTR::null()) } + .context("CreateEvent session")?; + STOP_EVENT.store(stop.0 as isize, Ordering::Relaxed); + SESSION_EVENT.store(session.0 as isize, Ordering::Relaxed); + + // The control handler captures nothing — it reaches the events through the statics, so it stays + // `Fn + Send + 'static`. Session lock/unlock are handled inside the host (DesktopWatcher), so we + // only flag console connect/disconnect/logon — the events that change the active session. + let handler = move |control| -> ServiceControlHandlerResult { + match control { + ServiceControl::Stop | ServiceControl::Preshutdown | ServiceControl::Shutdown => { + unsafe { SetEvent(load_event(&STOP_EVENT)) }.ok(); + ServiceControlHandlerResult::NoError + } + ServiceControl::SessionChange(param) => { + use windows_service::service::SessionChangeReason::*; + if matches!( + param.reason, + ConsoleConnect | ConsoleDisconnect | SessionLogon + ) { + unsafe { SetEvent(load_event(&SESSION_EVENT)) }.ok(); + } + ServiceControlHandlerResult::NoError + } + ServiceControl::Interrogate => ServiceControlHandlerResult::NoError, + _ => ServiceControlHandlerResult::NotImplemented, + } + }; + let status_handle = service_control_handler::register(SERVICE_NAME, handler) + .context("register service control handler")?; + + let accepted = ServiceControlAccept::STOP + | ServiceControlAccept::PRESHUTDOWN + | ServiceControlAccept::SESSION_CHANGE; + let running = ServiceStatus { + service_type: ServiceType::OWN_PROCESS, + current_state: ServiceState::Running, + controls_accepted: accepted, + exit_code: ServiceExitCode::Win32(0), + checkpoint: 0, + wait_hint: Duration::default(), + process_id: None, + }; + status_handle + .set_service_status(running.clone()) + .context("set RUNNING")?; + tracing::info!("punktfunk service started — supervising host in the active console session"); + + load_host_env(); + let result = supervise(stop, session); + + // Report STOPPED regardless of how supervise returned. + let _ = status_handle.set_service_status(ServiceStatus { + current_state: ServiceState::Stopped, + controls_accepted: ServiceControlAccept::empty(), + ..running + }); + unsafe { + let _ = CloseHandle(stop); + let _ = CloseHandle(session); + } + result +} + +/// The supervision loop: (re)launch the host into the active console session and wait on +/// [stop, session-change, child-exit], relaunching on child exit and on a console-session switch. +fn supervise(stop: HANDLE, session_ev: HANDLE) -> Result<()> { + let exe = std::env::current_exe().context("current_exe")?; + let host_cmd = std::env::var("PUNKTFUNK_HOST_CMD").unwrap_or_else(|_| DEFAULT_HOST_CMD.into()); + let cmdline = format!("\"{}\" {host_cmd}", exe.to_string_lossy()); + let workdir: Vec = exe + .parent() + .map(|p| p.to_string_lossy().into_owned()) + .unwrap_or_default() + .encode_utf16() + .chain(std::iter::once(0)) + .collect(); + + // Kill-on-close job so a service crash never orphans the SYSTEM host; BREAKAWAY_OK lets the host + // still spawn the WGC helper. + let job = unsafe { make_job() }.context("create job object")?; + + let mut restarts: u32 = 0; + loop { + if wait_one(stop, 0) { + break; + } + let session = unsafe { WTSGetActiveConsoleSessionId() }; + if session == 0xFFFF_FFFF { + // No interactive session yet (boot / fully logged out). Wait, but wake on stop/session. + tracing::info!("no active console session — waiting"); + if wait_any(&[stop, session_ev], 3000) == Some(0) { + break; + } + unsafe { ResetEvent(session_ev) }.ok(); + continue; + } + + let pi = match unsafe { spawn_host(session, &cmdline, &workdir, job) } { + Ok(pi) => pi, + Err(e) => { + tracing::error!("failed to launch host into session {session}: {e:#}"); + if wait_one(stop, 3000) { + break; + } + continue; + } + }; + tracing::info!(pid = pi.dwProcessId, session, cmd = %host_cmd, "host launched"); + + // Wait on stop / session-change / child-exit. + let reason = wait_any(&[stop, session_ev, pi.hProcess], INFINITE); + match reason { + Some(0) => { + // Stop: terminate the child and exit. + unsafe { + let _ = TerminateProcess(pi.hProcess, 0); + let _ = CloseHandle(pi.hProcess); + let _ = CloseHandle(pi.hThread); + } + break; + } + Some(1) => { + // Session change: relaunch only if the active console session actually moved. + unsafe { ResetEvent(session_ev) }.ok(); + let now = unsafe { WTSGetActiveConsoleSessionId() }; + if now != session { + tracing::info!( + old = session, + new = now, + "console session changed — relaunching host" + ); + unsafe { + let _ = TerminateProcess(pi.hProcess, 0); + let _ = CloseHandle(pi.hProcess); + let _ = CloseHandle(pi.hThread); + } + restarts = 0; + continue; + } + // Same session (e.g. a stray notification) — keep waiting on the same child. + let r = wait_any(&[stop, pi.hProcess], INFINITE); + unsafe { + let _ = TerminateProcess(pi.hProcess, 0); + let _ = CloseHandle(pi.hProcess); + let _ = CloseHandle(pi.hThread); + } + if r == Some(0) { + break; + } + // child exited → fall through to relaunch + } + _ => { + // Child exited on its own — relaunch (with a small crash-loop backoff). + tracing::warn!("host process exited — relaunching"); + unsafe { + let _ = CloseHandle(pi.hProcess); + let _ = CloseHandle(pi.hThread); + } + } + } + + restarts += 1; + let backoff = restarts.min(10) * 500; // 0.5s..5s + if wait_one(stop, backoff) { + break; + } + } + + unsafe { + // Dropping the job (KILL_ON_JOB_CLOSE) reaps any straggler in it. + let _ = CloseHandle(job); + } + tracing::info!("supervision loop ended"); + Ok(()) +} + +/// `true` if `h` is signalled within `ms`. +fn wait_one(h: HANDLE, ms: u32) -> bool { + unsafe { WaitForMultipleObjects(&[h], false, ms) == WAIT_OBJECT_0 } +} + +/// Wait on several handles; returns the index of the first signalled, or `None` on timeout. +fn wait_any(handles: &[HANDLE], ms: u32) -> Option { + let r = unsafe { WaitForMultipleObjects(handles, false, ms) }; + let idx = r.0.wrapping_sub(WAIT_OBJECT_0.0); + (idx < handles.len() as u32).then_some(idx as usize) +} + +/// A kill-on-close + breakaway-ok job object. +unsafe fn make_job() -> Result { + let job = CreateJobObjectW(None, PCWSTR::null()).context("CreateJobObjectW")?; + let mut info = JOBOBJECT_EXTENDED_LIMIT_INFORMATION::default(); + info.BasicLimitInformation.LimitFlags = + JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE | JOB_OBJECT_LIMIT_BREAKAWAY_OK; + SetInformationJobObject( + job, + JobObjectExtendedLimitInformation, + &info as *const _ as *const c_void, + std::mem::size_of::() as u32, + ) + .context("SetInformationJobObject")?; + Ok(job) +} + +/// Launch the host as SYSTEM into `session_id`'s interactive desktop. Returns the child handles. +unsafe fn spawn_host( + session_id: u32, + cmdline: &str, + workdir: &[u16], + job: HANDLE, +) -> Result { + // 1) A primary SYSTEM token retargeted to the active console session: duplicate THIS process's + // (LocalSystem) token, then set its session id. SYSTEM holds SE_TCB so SetTokenInformation + // (TokenSessionId) is permitted. + let mut proc_token = HANDLE::default(); + OpenProcessToken( + GetCurrentProcess(), + TOKEN_DUPLICATE + | TOKEN_QUERY + | TOKEN_ASSIGN_PRIMARY + | TOKEN_ADJUST_DEFAULT + | TOKEN_ADJUST_SESSIONID, + &mut proc_token, + ) + .context("OpenProcessToken (service must run as SYSTEM)")?; + + let mut primary = HANDLE::default(); + let dup = DuplicateTokenEx( + proc_token, + TOKEN_ALL_ACCESS, + None, + SecurityImpersonation, + TokenPrimary, + &mut primary, + ); + let _ = CloseHandle(proc_token); + dup.context("DuplicateTokenEx(TokenPrimary)")?; + + SetTokenInformation( + primary, + TokenSessionId, + &session_id as *const u32 as *const c_void, + std::mem::size_of::() as u32, + ) + .context("SetTokenInformation(TokenSessionId)")?; + + // 2) The session's environment block, merged with this process's PUNKTFUNK_*/RUST_LOG (so the + // host runs with host.env's settings, not a bare block). Same merge the WGC helper uses. + let mut env_block: *mut c_void = std::ptr::null_mut(); + let _ = CreateEnvironmentBlock(&mut env_block, Some(primary), false); + let merged = crate::capture::wgc_relay::merged_env_block(env_block as *const u16); + if !env_block.is_null() { + let _ = DestroyEnvironmentBlock(env_block); + } + + // 3) Redirect the host's stdout+stderr to host.log (inheritable handle). + let log = open_log_handle(&host_log_path())?; + + let mut si = STARTUPINFOW { + cb: std::mem::size_of::() as u32, + dwFlags: STARTF_USESTDHANDLES, + hStdOutput: log, + hStdError: log, + ..Default::default() + }; + let mut desktop: Vec = "winsta0\\default\0".encode_utf16().collect(); + si.lpDesktop = PWSTR(desktop.as_mut_ptr()); + + let mut cmd: Vec = cmdline.encode_utf16().chain(std::iter::once(0)).collect(); + let cwd = (!workdir.is_empty()).then_some(PCWSTR(workdir.as_ptr())); + let mut pi = PROCESS_INFORMATION::default(); + + let created = CreateProcessAsUserW( + Some(primary), + None, + Some(PWSTR(cmd.as_mut_ptr())), + None, + None, + true, // inherit the log handle + CREATE_UNICODE_ENVIRONMENT | CREATE_NO_WINDOW, + Some(merged.as_ptr() as *const c_void), + cwd.unwrap_or(PCWSTR::null()), + &si, + &mut pi, + ); + + let _ = CloseHandle(log); // the child owns its inherited copy + let _ = CloseHandle(primary); + created.context("CreateProcessAsUserW(host)")?; + + // Best-effort: keep the host inside the kill-on-close job. + let _ = AssignProcessToJobObject(job, pi.hProcess); + Ok(pi) +} + +/// Open `path` for appending, as an INHERITABLE handle (so the child can use it as stdout/stderr). +unsafe fn open_log_handle(path: &std::path::Path) -> Result { + let wpath: Vec = path + .as_os_str() + .to_string_lossy() + .encode_utf16() + .chain(std::iter::once(0)) + .collect(); + let sa = SECURITY_ATTRIBUTES { + nLength: std::mem::size_of::() as u32, + lpSecurityDescriptor: std::ptr::null_mut(), + bInheritHandle: true.into(), + }; + // Append (no FILE_WRITE_DATA → all writes go to EOF), so each relaunch's OPEN_ALWAYS reopen + // accumulates instead of truncating from offset 0. This mirrors Rust's own `OpenOptions::append` + // access mask (FILE_GENERIC_WRITE minus WRITE_DATA, plus APPEND_DATA + SYNCHRONIZE/READ_CONTROL); + // bare FILE_APPEND_DATA alone produced a child handle that silently dropped writes. + let access = (FILE_GENERIC_WRITE.0 & !FILE_WRITE_DATA.0) | FILE_APPEND_DATA.0; + let h = CreateFileW( + PCWSTR(wpath.as_ptr()), + access, + FILE_SHARE_READ | FILE_SHARE_WRITE, + Some(&sa), + OPEN_ALWAYS, + windows::Win32::Storage::FileSystem::FILE_FLAGS_AND_ATTRIBUTES(0), + None, + ) + .context("CreateFileW(host.log)")?; + Ok(h) +} + +// ── install / uninstall ────────────────────────────────────────────────────────────────────────── + +fn install() -> Result<()> { + use windows_service::service::{ + ServiceAccess, ServiceErrorControl, ServiceInfo, ServiceStartType, ServiceType, + }; + use windows_service::service_manager::{ServiceManager, ServiceManagerAccess}; + + let exe = std::env::current_exe().context("current_exe")?; + let manager = ServiceManager::local_computer( + None::<&str>, + ServiceManagerAccess::CONNECT | ServiceManagerAccess::CREATE_SERVICE, + ) + .context("open Service Control Manager (run from an elevated/Administrator prompt)")?; + + let info = ServiceInfo { + name: OsString::from(SERVICE_NAME), + display_name: OsString::from(SERVICE_DISPLAY), + service_type: ServiceType::OWN_PROCESS, + start_type: ServiceStartType::AutoStart, + error_control: ServiceErrorControl::Normal, + executable_path: exe.clone(), + launch_arguments: vec![OsString::from("service"), OsString::from("run")], + dependencies: vec![], + account_name: None, // None = LocalSystem + account_password: None, + }; + + // Create, or reconfigure if it already exists (idempotent install/upgrade). + match manager.create_service(&info, ServiceAccess::CHANGE_CONFIG | ServiceAccess::START) { + Ok(svc) => { + let _ = svc.set_description(SERVICE_DESCRIPTION); + println!("Created service '{SERVICE_NAME}' (auto-start, LocalSystem)."); + } + Err(windows_service::Error::Winapi(e)) + if e.raw_os_error() == Some(1073 /* ERROR_SERVICE_EXISTS */) => + { + let svc = manager + .open_service(SERVICE_NAME, ServiceAccess::CHANGE_CONFIG) + .context("open existing service to reconfigure")?; + svc.change_config(&info) + .context("reconfigure existing service")?; + let _ = svc.set_description(SERVICE_DESCRIPTION); + println!("Reconfigured existing service '{SERVICE_NAME}'."); + } + Err(e) => return Err(e).context("create service"), + } + + ensure_default_host_env()?; + add_firewall_rules(); + + println!( + "\nInstalled. Config: {}\nLogs: {}\n\nStart now with: punktfunk-host service start", + host_env_path().display(), + crate::gamestream::config_dir().join("logs").display() + ); + Ok(()) +} + +fn uninstall() -> Result<()> { + use windows_service::service::ServiceAccess; + use windows_service::service_manager::{ServiceManager, ServiceManagerAccess}; + + let _ = sc(&["stop", SERVICE_NAME]); // best-effort stop first + let manager = ServiceManager::local_computer(None::<&str>, ServiceManagerAccess::CONNECT) + .context("open Service Control Manager (run elevated)")?; + let svc = manager + .open_service(SERVICE_NAME, ServiceAccess::DELETE) + .context("open service for delete")?; + svc.delete().context("delete service")?; + remove_firewall_rules(); + println!("Removed service '{SERVICE_NAME}' and its firewall rules."); + Ok(()) +} + +/// Write a default `host.env` if none exists, so a fresh install streams with NVENC out of the box. +fn ensure_default_host_env() -> Result<()> { + let path = host_env_path(); + if path.exists() { + return Ok(()); + } + if let Some(dir) = path.parent() { + std::fs::create_dir_all(dir).ok(); + } + let default = "# punktfunk host configuration (read by the Windows service).\n\ + # KEY=VALUE per line; '#' comments. Restart the service after editing:\n\ + # punktfunk-host service stop && punktfunk-host service start\n\ + \n\ + PUNKTFUNK_ENCODER=nvenc\n\ + PUNKTFUNK_VIDEO_SOURCE=virtual\n\ + PUNKTFUNK_SECURE_DDA=1\n\ + RUST_LOG=info\n\ + \n\ + # The host subcommand the service launches (default: serve --native).\n\ + # PUNKTFUNK_HOST_CMD=serve --native\n\ + \n\ + # Force a specific NVENC render GPU by name substring (multi-GPU boxes only):\n\ + # PUNKTFUNK_RENDER_ADAPTER=4090\n"; + std::fs::write(&path, default).with_context(|| format!("write {}", path.display()))?; + println!("Wrote default config: {}", path.display()); + Ok(()) +} + +// ── firewall + sc helpers ──────────────────────────────────────────────────────────────────────── + +/// Inbound firewall rules for the streaming ports (best-effort; logs but never fails the install). +fn add_firewall_rules() { + // (name suffix, protocol, ports) + let rules = [ + ("TCP", "TCP", "47984,47989,48010,47990"), + ("UDP", "UDP", "47998-48010,9777,5353"), + ]; + for (suffix, proto, ports) in rules { + let name = format!("punktfunk {suffix}"); + let ok = run_quiet( + "netsh", + &[ + "advfirewall", + "firewall", + "add", + "rule", + &format!("name={name}"), + "dir=in", + "action=allow", + &format!("protocol={proto}"), + &format!("localport={ports}"), + ], + ); + if ok { + println!("Firewall rule added: {name} ({ports})"); + } else { + eprintln!("warning: could not add firewall rule '{name}' (add it manually if needed)"); + } + } +} + +fn remove_firewall_rules() { + for suffix in ["TCP", "UDP"] { + let name = format!("punktfunk {suffix}"); + let _ = run_quiet( + "netsh", + &[ + "advfirewall", + "firewall", + "delete", + "rule", + &format!("name={name}"), + ], + ); + } +} + +/// Run an `sc.exe` command, passing its output through (used by start/stop/status). +fn sc(args: &[&str]) -> Result<()> { + let status = std::process::Command::new("sc") + .args(args) + .status() + .context("run sc.exe")?; + if !status.success() { + bail!("sc {} failed ({status})", args.join(" ")); + } + Ok(()) +} + +/// Run a command discarding output; return whether it succeeded. +fn run_quiet(cmd: &str, args: &[&str]) -> bool { + std::process::Command::new(cmd) + .args(args) + .stdout(std::process::Stdio::null()) + .stderr(std::process::Stdio::null()) + .status() + .map(|s| s.success()) + .unwrap_or(false) +} diff --git a/crates/punktfunk-host/src/vdisplay/sudovda.rs b/crates/punktfunk-host/src/vdisplay/sudovda.rs index 869b721..4f4db87 100644 --- a/crates/punktfunk-host/src/vdisplay/sudovda.rs +++ b/crates/punktfunk-host/src/vdisplay/sudovda.rs @@ -10,9 +10,9 @@ use std::ffi::c_void; use std::mem::size_of; use std::sync::atomic::{AtomicBool, Ordering}; -use std::sync::Arc; +use std::sync::{Arc, Mutex, Once}; use std::thread::{self, JoinHandle}; -use std::time::Duration; +use std::time::{Duration, Instant}; use anyhow::{Context, Result}; use windows::core::{GUID, PCWSTR}; @@ -22,16 +22,18 @@ use windows::Win32::Devices::DeviceAndDriverInstallation::{ SP_DEVICE_INTERFACE_DATA, SP_DEVICE_INTERFACE_DETAIL_DATA_W, }; use windows::Win32::Devices::Display::{ - DisplayConfigGetDeviceInfo, GetDisplayConfigBufferSizes, QueryDisplayConfig, - DISPLAYCONFIG_DEVICE_INFO_GET_SOURCE_NAME, DISPLAYCONFIG_MODE_INFO, DISPLAYCONFIG_PATH_INFO, - DISPLAYCONFIG_SOURCE_DEVICE_NAME, QDC_ONLY_ACTIVE_PATHS, + DisplayConfigGetDeviceInfo, DisplayConfigSetDeviceInfo, GetDisplayConfigBufferSizes, + QueryDisplayConfig, SetDisplayConfig, DISPLAYCONFIG_DEVICE_INFO_GET_SOURCE_NAME, + DISPLAYCONFIG_DEVICE_INFO_SET_ADVANCED_COLOR_STATE, DISPLAYCONFIG_MODE_INFO, + DISPLAYCONFIG_PATH_INFO, DISPLAYCONFIG_SET_ADVANCED_COLOR_STATE, + DISPLAYCONFIG_SOURCE_DEVICE_NAME, QDC_ONLY_ACTIVE_PATHS, SDC_ALLOW_CHANGES, SDC_APPLY, + SDC_USE_SUPPLIED_DISPLAY_CONFIG, }; use windows::Win32::Foundation::{CloseHandle, HANDLE, LUID}; use windows::Win32::Graphics::Gdi::{ - ChangeDisplaySettingsExW, EnumDisplayDevicesW, EnumDisplaySettingsW, CDS_GLOBAL, CDS_NORESET, - CDS_SET_PRIMARY, CDS_TEST, CDS_TYPE, CDS_UPDATEREGISTRY, DEVMODEW, DISPLAY_DEVICEW, - DISPLAY_DEVICE_ATTACHED_TO_DESKTOP, DISP_CHANGE_SUCCESSFUL, DM_BITSPERPEL, DM_DISPLAYFREQUENCY, - DM_PELSHEIGHT, DM_PELSWIDTH, DM_POSITION, ENUM_CURRENT_SETTINGS, ENUM_DISPLAY_SETTINGS_MODE, + ChangeDisplaySettingsExW, EnumDisplaySettingsW, CDS_TEST, CDS_UPDATEREGISTRY, DEVMODEW, + DISP_CHANGE_SUCCESSFUL, DM_BITSPERPEL, DM_DISPLAYFREQUENCY, DM_PELSHEIGHT, DM_PELSWIDTH, + ENUM_DISPLAY_SETTINGS_MODE, }; use windows::Win32::Storage::FileSystem::{ CreateFileW, FILE_FLAGS_AND_ATTRIBUTES, FILE_SHARE_READ, FILE_SHARE_WRITE, OPEN_EXISTING, @@ -54,10 +56,19 @@ const IOCTL_GET_WATCHDOG: u32 = ctl(0x803); const IOCTL_DRIVER_PING: u32 = ctl(0x888); const IOCTL_GET_VERSION: u32 = ctl(0x8FF); -// A fixed monitor identity. One session at a time today; Windows persists this monitor's layout -// across sessions by GUID, and REMOVE keys off it. (TODO: derive per-client when concurrent -// sessions land.) -const MONITOR_GUID: GUID = GUID::from_u128(0x70756E6B_7466_756E_6B30_000000000001); +/// A UNIQUE-per-session SudoVDA monitor GUID. The monitor is keyed by GUID for IOCTL_ADD/REMOVE, so a +/// FIXED GUID makes overlapping sessions (a client reconnecting after a freeze before the old session +/// has torn down, or genuine concurrent sessions) all map to the SAME monitor — then one session's +/// IOCTL_REMOVE on teardown tears the monitor down OUT FROM UNDER a still-live session ("display +/// disconnected" sound + freeze, even with no context change — observed live). Make it unique per +/// (process, session): base GUID with the low 48-bit node = (pid << 16 | session#). +fn next_monitor_guid() -> GUID { + use std::sync::atomic::AtomicU32; + static N: AtomicU32 = AtomicU32::new(0); + let n = N.fetch_add(1, Ordering::Relaxed) as u128; + let pid = std::process::id() as u128; + GUID::from_u128(0x70756E6B_7466_756E_6B30_000000000000u128 | (pid << 16) | (n & 0xFFFF)) +} #[repr(C)] #[derive(Clone, Copy)] @@ -133,7 +144,7 @@ unsafe fn resolve_render_adapter_luid() -> Option { continue; } let vram = d.DedicatedVideoMemory as u64; // SudoVDA software adapter ≈ 0 → loses to the dGPU - if best.as_ref().map_or(true, |(_, v, _)| vram > *v) { + if best.as_ref().is_none_or(|(_, v, _)| vram > *v) { best = Some((d.AdapterLuid, vram, name)); } } @@ -216,6 +227,55 @@ pub(crate) unsafe fn resolve_gdi_name(target_id: u32) -> Option { None } +/// Toggle the SudoVDA target's advanced-color (HDR) state via the CCD API. Disabling HDR while on the +/// secure (Winlogon) desktop makes it render SDR/composed so DXGI Desktop Duplication can capture it +/// (the HDR fullscreen independent-flip otherwise storms `ACCESS_LOST` → black); re-enable on return so +/// WGC keeps HDR on the normal desktop. Returns true on a successful `DisplayConfigSetDeviceInfo`. +pub(crate) unsafe fn set_advanced_color(target_id: u32, enable: bool) -> bool { + let mut np = 0u32; + let mut nm = 0u32; + if GetDisplayConfigBufferSizes(QDC_ONLY_ACTIVE_PATHS, &mut np, &mut nm).is_err() { + return false; + } + let mut paths = vec![DISPLAYCONFIG_PATH_INFO::default(); np as usize]; + let mut modes = vec![DISPLAYCONFIG_MODE_INFO::default(); nm as usize]; + if QueryDisplayConfig( + QDC_ONLY_ACTIVE_PATHS, + &mut np, + paths.as_mut_ptr(), + &mut nm, + modes.as_mut_ptr(), + None, + ) + .is_err() + { + return false; + } + for p in paths.iter().take(np as usize) { + if p.targetInfo.id == target_id { + let mut s = DISPLAYCONFIG_SET_ADVANCED_COLOR_STATE::default(); + s.header.r#type = DISPLAYCONFIG_DEVICE_INFO_SET_ADVANCED_COLOR_STATE; + s.header.size = size_of::() as u32; + s.header.adapterId = p.targetInfo.adapterId; + s.header.id = p.targetInfo.id; + s.Anonymous.value = enable as u32; // bit 0 = enableAdvancedColor + let rc = DisplayConfigSetDeviceInfo(&s.header); + tracing::info!( + target_id, + enable, + rc, + "SudoVDA set advanced-color (HDR) state" + ); + return rc == 0; + } + } + tracing::warn!( + target_id, + "set_advanced_color: target not found in active paths" + ); + false +} + /// Force the freshly-added SudoVDA monitor to the client's exact `WxH@Hz`. The ADD IOCTL only /// ADVERTISES the mode; Windows otherwise activates an IDD target at a 1280x720 default, so the /// ACTIVE mode (what DXGI Desktop Duplication captures) must be set explicitly. CDS_TEST first so a @@ -289,9 +349,15 @@ fn set_active_mode(gdi_name: &str, mode: Mode) { ); } + // Set ONLY this output's mode in place (size/refresh/bpp; NO DM_POSITION). Do NOT promote it to + // PRIMARY here and do NOT write a GLOBAL topology: promoting the IDD to primary at (0,0) while the + // box's leftover basic display is still active contests the topology and storms + // DXGI_ERROR_MODE_CHANGE_IN_PROGRESS (measured live). The IDD is made the sole → primary → + // DWM-composited display by the CCD isolation in create() (which deactivates the other display + // first), so a sole display is already primary and needs no CDS_SET_PRIMARY here. let dm = DEVMODEW { dmSize: size_of::() as u16, - dmFields: DM_PELSWIDTH | DM_PELSHEIGHT | DM_DISPLAYFREQUENCY | DM_BITSPERPEL | DM_POSITION, + dmFields: DM_PELSWIDTH | DM_PELSHEIGHT | DM_DISPLAYFREQUENCY | DM_BITSPERPEL, dmBitsPerPel: 32, dmPelsWidth: mode.width, dmPelsHeight: mode.height, @@ -316,10 +382,7 @@ fn set_active_mode(gdi_name: &str, mode: Mode) { PCWSTR(wname.as_ptr()), Some(&dm), None, - // Make it the PRIMARY display: a blank *extended* IDD output isn't composited by the DWM, - // so it produces no duplication frames. As primary it carries the shell/cursor → frames - // flow (this is what Apollo does). Position is (0,0) via DM_POSITION (zeroed by default). - CDS_UPDATEREGISTRY | CDS_GLOBAL | CDS_SET_PRIMARY, + CDS_UPDATEREGISTRY, None, ) }; @@ -341,101 +404,83 @@ fn set_active_mode(gdi_name: &str, mode: Mode) { } } -/// Detach every display except `keep_gdi_name`, leaving the SudoVDA virtual output as the ONLY -/// display. This is the SudoVDA/Apollo "isolate the virtual display" move and the key to capturing -/// the secure desktop: Windows renders the login / UAC (Winlogon) desktop on the physical/primary -/// display and resets the topology when it switches there — with a physical monitor still attached -/// (e.g. an LG TV), the login lands on it and our virtual output goes perpetually ACCESS_LOST. With -/// the physical detached and the change PERSISTED to the registry, Winlogon reads "only the virtual -/// is attached" and the secure desktop has nowhere to render but the output we capture. -/// -/// Returns the displays we detached plus their saved modes so teardown can restore them. -unsafe fn isolate_displays(keep_gdi_name: &str) -> Vec<(String, DEVMODEW)> { - let mut saved = Vec::new(); - let mut idx = 0u32; - loop { - let mut dd = DISPLAY_DEVICEW { - cb: size_of::() as u32, - ..Default::default() - }; - if !EnumDisplayDevicesW(PCWSTR::null(), idx, &mut dd, 0).as_bool() { - break; - } - idx += 1; - if (dd.StateFlags & DISPLAY_DEVICE_ATTACHED_TO_DESKTOP).0 == 0 { - continue; // not part of the desktop — nothing to detach - } - let name = String::from_utf16_lossy(&dd.DeviceName); - let name = name.trim_end_matches('\u{0}').to_string(); - if name == keep_gdi_name { - continue; // the virtual output we want to keep - } - // Save the current mode so the teardown can re-attach this display where it was. - let mut cur = DEVMODEW { - dmSize: size_of::() as u16, - ..Default::default() - }; - let wname: Vec = name.encode_utf16().chain(std::iter::once(0)).collect(); - if EnumDisplaySettingsW(PCWSTR(wname.as_ptr()), ENUM_CURRENT_SETTINGS, &mut cur).as_bool() { - saved.push((name.clone(), cur)); - } - // A 0x0 mode removes the display from the desktop. NORESET batches; we commit once below. - let off = DEVMODEW { - dmSize: size_of::() as u16, - dmFields: DM_POSITION | DM_PELSWIDTH | DM_PELSHEIGHT, - ..Default::default() - }; - let r = ChangeDisplaySettingsExW( - PCWSTR(wname.as_ptr()), - Some(&off), - None, - CDS_UPDATEREGISTRY | CDS_NORESET | CDS_GLOBAL, - None, - ); - tracing::info!("display isolate: detaching {name} (result={})", r.0); +/// Saved active display topology, for restoring on teardown. +type SavedConfig = (Vec, Vec); + +/// `DISPLAYCONFIG_PATH_ACTIVE` (wingdi.h) — the `flags` bit marking a path active. The `windows` crate +/// doesn't export it, so define it here. +const DISPLAYCONFIG_PATH_ACTIVE: u32 = 0x0000_0001; + +/// Robust display isolation via the CCD API. The naive GDI approach (EnumDisplayDevices + +/// ChangeDisplaySettings) MISSES displays on a hybrid box — an iGPU-attached physical monitor isn't +/// flagged `ATTACHED_TO_DESKTOP` in the GDI enum, so it's never detached and the secure desktop / +/// lock screen lands on IT while our virtual output freezes. `QueryDisplayConfig(QDC_ONLY_ACTIVE_PATHS)` +/// sees every active path; we deactivate all of them EXCEPT the SudoVDA target's, leaving the virtual +/// display as the sole desktop so ALL content (incl. Winlogon) renders to it. Apollo isolates the same +/// way (CCD). Returns the original active config to restore on teardown. +unsafe fn isolate_displays_ccd(keep_target_id: u32) -> Option { + let mut np = 0u32; + let mut nm = 0u32; + if GetDisplayConfigBufferSizes(QDC_ONLY_ACTIVE_PATHS, &mut np, &mut nm).is_err() { + return None; } - if !saved.is_empty() { - // Commit the batched detaches (NULL device + 0 flags applies the pending registry changes). - let _ = ChangeDisplaySettingsExW(PCWSTR::null(), None, None, CDS_TYPE(0), None); - tracing::info!( - "display isolate: {} display(s) detached — only {keep_gdi_name} remains", - saved.len() - ); + let mut paths = vec![DISPLAYCONFIG_PATH_INFO::default(); np as usize]; + let mut modes = vec![DISPLAYCONFIG_MODE_INFO::default(); nm as usize]; + if QueryDisplayConfig( + QDC_ONLY_ACTIVE_PATHS, + &mut np, + paths.as_mut_ptr(), + &mut nm, + modes.as_mut_ptr(), + None, + ) + .is_err() + { + return None; } - saved + paths.truncate(np as usize); + modes.truncate(nm as usize); + let saved = (paths.clone(), modes.clone()); + let mut others = 0u32; + for p in paths.iter_mut() { + if p.targetInfo.id == keep_target_id { + continue; + } + if p.flags & DISPLAYCONFIG_PATH_ACTIVE != 0 { + p.flags &= !DISPLAYCONFIG_PATH_ACTIVE; // mark this path inactive + others += 1; + } + } + if others == 0 { + tracing::info!("display isolate (CCD): SudoVDA target {keep_target_id} already the only active display"); + return Some(saved); + } + let rc = SetDisplayConfig( + Some(paths.as_slice()), + Some(modes.as_slice()), + SDC_APPLY | SDC_USE_SUPPLIED_DISPLAY_CONFIG | SDC_ALLOW_CHANGES, + ); + if rc == 0 { + tracing::info!("display isolate (CCD): deactivated {others} other display(s) — SudoVDA target {keep_target_id} is now the sole desktop"); + } else { + tracing::warn!("display isolate (CCD): SetDisplayConfig failed rc={rc:#x} (tried to deactivate {others} path(s))"); + } + Some(saved) } -/// Re-attach the displays [`isolate_displays`] detached, restoring each to its saved mode. Called on -/// teardown BEFORE the virtual output is removed, so there is always at least one display. -unsafe fn restore_displays(saved: &[(String, DEVMODEW)]) { - for (name, dm) in saved { - let wname: Vec = name.encode_utf16().chain(std::iter::once(0)).collect(); - let _ = ChangeDisplaySettingsExW( - PCWSTR(wname.as_ptr()), - Some(dm), - None, - CDS_UPDATEREGISTRY | CDS_NORESET | CDS_GLOBAL, - None, - ); - } - if !saved.is_empty() { - let _ = ChangeDisplaySettingsExW(PCWSTR::null(), None, None, CDS_TYPE(0), None); - tracing::info!("display isolate: restored {} display(s)", saved.len()); - } -} - -/// Re-detach physical displays so the secure (Winlogon) desktop keeps rendering to the virtual -/// output — for the in-session DXGI capture recovery (dxgi.rs `recreate_dupl`). The lock/UAC/login -/// switch can re-attach a physical monitor (the secure desktop then lands on IT and our virtual -/// output goes perpetually ACCESS_LOST — the "born-lost" storm); re-running the isolate routes the -/// secure desktop back to the virtual output, mirroring what a fresh session's `create` does (the -/// delta that makes a reconnect work where in-session recovery didn't). Idempotent + cheap: when -/// nothing besides `gdi_name` is attached, [`isolate_displays`] finds nothing to detach and commits -/// nothing — so this is safe to call on every throttled recovery tick (no display thrash). -pub(crate) fn reassert_isolation(gdi_name: &str) { - unsafe { - let _ = isolate_displays(gdi_name); +/// Restore the topology saved by [`isolate_displays_ccd`] (teardown, before the virtual output is +/// removed), re-activating the displays we deactivated. +unsafe fn restore_displays_ccd(saved: &SavedConfig) { + let (paths, modes) = saved; + if paths.is_empty() { + return; } + let rc = SetDisplayConfig( + Some(paths.as_slice()), + Some(modes.as_slice()), + SDC_APPLY | SDC_USE_SUPPLIED_DISPLAY_CONFIG | SDC_ALLOW_CHANGES, + ); + tracing::info!("display isolate (CCD): restored original topology rc={rc:#x}"); } unsafe fn open_device() -> Result { @@ -476,44 +521,64 @@ unsafe fn open_device() -> Result { Ok(handle) } -/// A live SudoVDA control handle. One per host; `create` adds/removes monitors on it. -pub struct SudoVdaDisplay { - device: HANDLE, - watchdog_s: u32, +// ── Host-level reference-counted SudoVDA monitor lifecycle ────────────────────────────────────── +// +// The virtual monitor is created on the first session and REUSED across sessions. When the last +// session disconnects the monitor LINGERS for a grace window (PUNKTFUNK_MONITOR_LINGER_MS, default +// 10 s): a reconnect within the window reuses it instantly (no new screen, no PnP connect/disconnect +// chime, no teardown/recreate kernel churn); after the window a background timer REMOVEs it so a +// physical-screen user gets their screen back. Overlapping sessions share one monitor via the +// refcount (teardown only at refs==0 + expired grace), so a stale session can never REMOVE a live +// session's monitor (the earlier collision). The control-device HANDLE is opened once and kept for +// the host lifetime — it's a handle, not a screen, so it creates no phantom display. + +/// The resources backing one live SudoVDA monitor (owned by [`MGR`], not by any session). +struct Monitor { + guid: GUID, + target_id: u32, + luid: LUID, + gdi_name: Option, + mode: Mode, + stop: Arc, + pinger: Option>, + ccd_saved: Option, } -// The HANDLE is a kernel object usable from any thread; we only ever issue serialized IOCTLs. -unsafe impl Send for SudoVdaDisplay {} +enum MgrState { + Idle, + Active { mon: Monitor, refs: u32 }, + Lingering { mon: Monitor, until: Instant }, +} + +struct Mgr { + /// Control-device handle (raw isize; `HANDLE` isn't `Send`). Opened once, kept for the host life. + device: Option, + watchdog_s: u32, + state: MgrState, +} + +static MGR: Mutex = Mutex::new(Mgr { + device: None, + watchdog_s: 3, + state: MgrState::Idle, +}); + +/// The Windows virtual-display backend. A marker — the monitor lifecycle lives in the global [`MGR`]. +pub struct SudoVdaDisplay; impl SudoVdaDisplay { pub fn new() -> Result { - let device = unsafe { open_device()? }; - let mut ver = [0u8; 4]; - if unsafe { ioctl(device, IOCTL_GET_VERSION, &[], &mut ver) }.is_ok() { - tracing::info!( - "SudoVDA protocol {}.{}.{} (test={})", - ver[0], - ver[1], - ver[2], - ver[3] - ); - } - let mut wd = [0u8; 8]; - let watchdog_s = if unsafe { ioctl(device, IOCTL_GET_WATCHDOG, &[], &mut wd) }.is_ok() { - u32::from_le_bytes([wd[0], wd[1], wd[2], wd[3]]).max(1) - } else { - 3 - }; - tracing::info!("SudoVDA watchdog timeout {watchdog_s}s"); - Ok(Self { device, watchdog_s }) + // Open the control device once (validates the driver is present) + log version/watchdog. + let mut g = MGR.lock().unwrap(); + mgr_ensure_device(&mut g)?; + Ok(Self) } } impl Drop for SudoVdaDisplay { fn drop(&mut self) { - unsafe { - let _ = CloseHandle(self.device); - } + // Nothing: the control device + monitor lifecycle are host-level (owned by MGR) and + // deliberately outlive any single session so a reconnect can reuse the monitor. } } @@ -523,25 +588,52 @@ impl VirtualDisplay for SudoVdaDisplay { } fn create(&mut self, mode: Mode) -> Result { + // Delegate to the host-level manager: create the monitor, reuse a lingering one on reconnect, + // or join the live one — and hand back a lease whose Drop releases the refcount. + mgr_acquire(mode) + } +} + +/// Create a fresh SudoVDA monitor at `mode` on the (host-level) control `device`. The old per-session +/// `create()` body, now owned by the manager: ADD the target, start the watchdog ping, resolve the +/// GDI name, force the client mode + (default) isolate to a sole composited display. Returns the +/// [`Monitor`] resources; the manager tracks its lifecycle (refcount + linger). +unsafe fn create_monitor(device: isize, mode: Mode, watchdog_s: u32) -> Result { + let dev = HANDLE(device as *mut c_void); + { let mut device_name = [0u8; 14]; let nm = b"punktfunk"; device_name[..nm.len()].copy_from_slice(nm); + // Fresh GUID per created monitor (the manager refcount, not the GUID, prevents the + // cross-session REMOVE collision now). + let session_guid = next_monitor_guid(); let add = AddParams { width: mode.width, height: mode.height, refresh: mode.refresh_hz, - guid: MONITOR_GUID, + guid: session_guid, device_name, serial: [0u8; 14], }; - // Pin the IDD's RENDER GPU to the NVENC/capture GPU (e.g. the 4090) BEFORE adding the target. - // On a multi-adapter box (SudoVDA IDD + discrete GPU) DXGI otherwise reparents the virtual - // output onto whichever GPU its hybrid-preference path resolves, which storms ACCESS_LOST - // (0x887A0026) on the secure/HDR desktop. Apollo's SET_RENDER_ADAPTER fixes this and MUST be - // issued before ADD. Best-effort: a driver that rejects it just keeps the default render GPU. - let pinned = unsafe { resolve_render_adapter_luid() }; + // SET_RENDER_ADAPTER is OPT-IN. Apollo runs with an EMPTY config and NEVER pins the render + // adapter, yet captures the SudoVDA cleanly at the client mode on the 4090 (verified live on + // this exact box: no ACCESS_LOST, no MODE_CHANGE storm). On this box our pin is IGNORED by the + // driver AND the IDD lands on a DIFFERENT adapter (0x23664) than the one its DXGI output is + // enumerated under (the 4090, where we make the capture device) — a cross-GPU mismatch that is + // the real source of the perpetual ACCESS_LOST + MODE_CHANGE_IN_PROGRESS storm. So default to + // NOT pinning — let the IDD use its natural adapter like Apollo. Opt in with + // PUNKTFUNK_RENDER_ADAPTER= only on a box that genuinely needs steering. + let pinned = if std::env::var("PUNKTFUNK_RENDER_ADAPTER").is_ok() { + unsafe { resolve_render_adapter_luid() } + } else { + tracing::info!( + "SudoVDA SET_RENDER_ADAPTER skipped (Apollo-parity: no render pin — avoids cross-GPU \ + mismatch; set PUNKTFUNK_RENDER_ADAPTER= to force a specific render GPU)" + ); + None + }; if let Some(luid) = pinned { - match unsafe { set_render_adapter(self.device, luid) } { + match unsafe { set_render_adapter(dev, luid) } { Ok(()) => tracing::info!( luid = format!("{:08x}:{:08x}", luid.HighPart, luid.LowPart), "SudoVDA SET_RENDER_ADAPTER: pinned IDD render GPU" @@ -554,7 +646,7 @@ impl VirtualDisplay for SudoVdaDisplay { std::slice::from_raw_parts(&add as *const _ as *const u8, size_of::()) }; let mut out = [0u8; size_of::()]; - unsafe { ioctl(self.device, IOCTL_ADD, add_bytes, &mut out) }.with_context(|| { + unsafe { ioctl(dev, IOCTL_ADD, add_bytes, &mut out) }.with_context(|| { format!( "SudoVDA ADD {}x{}@{}", mode.width, mode.height, mode.refresh_hz @@ -583,8 +675,8 @@ impl VirtualDisplay for SudoVdaDisplay { // Mandatory keepalive: ping inside the watchdog window or the driver tears all displays down. let stop = Arc::new(AtomicBool::new(false)); - let device_raw = self.device.0 as isize; - let interval = Duration::from_millis(self.watchdog_s as u64 * 1000 / 3); + let device_raw = device; + let interval = Duration::from_millis(watchdog_s as u64 * 1000 / 3); let stop_t = stop.clone(); let pinger = thread::spawn(move || { let h = HANDLE(device_raw as *mut c_void); @@ -607,14 +699,31 @@ impl VirtualDisplay for SudoVdaDisplay { break; } } - let mut isolated: Vec<(String, DEVMODEW)> = Vec::new(); + let mut ccd_saved: Option = None; match &gdi_name { Some(n) => { tracing::info!("SudoVDA target {} -> {n}", ao.target_id); // ADD only advertises the mode; force it active so DXGI captures the requested size. set_active_mode(n, mode); - // Detach every other display so the secure desktop (Winlogon/UAC) renders here too. - isolated = unsafe { isolate_displays(n) }; + // Make the SudoVDA the SOLE active display (default). On this box an EXTENDED + // (non-primary) IDD is NOT DWM-composited → Desktop Duplication gets a born-lost + // ACCESS_LOST (measured live: MODE_CHANGE storm fixed, but the extended IDD then + // born-lost). Apollo reaches the same end state ("Virtual Desktop: WxH" — the IDD is the + // whole desktop, hence primary + composited) via Windows AUTO-promoting the real WDDM + // display over the box's leftover 1024x768 basic display; Windows does NOT auto-promote + // for us, so we deactivate the other display(s) explicitly via the clean atomic CCD path. + // Deactivating FIRST means set_active_mode's primary-promotion has nothing to contest → + // no MODE_CHANGE_IN_PROGRESS storm (that storm came from promoting primary WHILE the + // basic display stayed active). Opt out with PUNKTFUNK_NO_ISOLATE=1 (a box with a real + // second monitor to keep live). The legacy GDI detach is skipped — it misses + // iGPU-attached monitors on a hybrid box and churns per-device; CCD is atomic. + if std::env::var("PUNKTFUNK_NO_ISOLATE").is_err() { + ccd_saved = unsafe { isolate_displays_ccd(ao.target_id) }; + } else { + tracing::info!( + "display isolation skipped (PUNKTFUNK_NO_ISOLATE) — IDD stays extended" + ); + } thread::sleep(Duration::from_millis(1500)); // let the topology settle before capture opens } None => tracing::warn!( @@ -623,59 +732,50 @@ impl VirtualDisplay for SudoVdaDisplay { ), } - Ok(VirtualOutput { - node_id: 0, // unused on Windows; the capture target is the GDI name below - preferred_mode: Some((mode.width, mode.height, mode.refresh_hz)), - win_capture: gdi_name - .clone() - .map(|n| crate::capture::dxgi::WinCaptureTarget { - adapter_luid: crate::capture::dxgi::pack_luid(ao.luid), - gdi_name: n, - // The SudoVDA target id is stable across secure-desktop topology rebuilds; the - // GDI name is NOT, so capture re-resolves the name from this on every recovery. - target_id: ao.target_id, - }), - keepalive: Box::new(SudoVdaKeepalive { - device: device_raw, - guid: MONITOR_GUID, - stop, - pinger: Some(pinger), - gdi_name, - isolated, - }), + Ok(Monitor { + guid: session_guid, + target_id: ao.target_id, + luid: ao.luid, + gdi_name, + mode, + stop, + pinger: Some(pinger), + ccd_saved, }) } } -/// RAII teardown: stop the ping thread, then REMOVE the monitor by its GUID. Does NOT close the -/// device handle — that belongs to [`SudoVdaDisplay`], which outlives the output. -struct SudoVdaKeepalive { - device: isize, - guid: GUID, - stop: Arc, - pinger: Option>, - #[allow(dead_code)] // consumed by the Windows capture backend (not yet wired) - gdi_name: Option, - /// Displays detached by [`isolate_displays`], restored here on teardown. - isolated: Vec<(String, DEVMODEW)>, -} +impl Monitor { + /// The capture target handed to a session (`None` until the GDI name resolves). + fn target(&self) -> Option { + self.gdi_name + .clone() + .map(|n| crate::capture::dxgi::WinCaptureTarget { + adapter_luid: crate::capture::dxgi::pack_luid(self.luid), + gdi_name: n, + // target_id is stable across secure-desktop topology rebuilds; the GDI name is NOT, + // so capture re-resolves the name from this on every recovery. + target_id: self.target_id, + }) + } -impl Drop for SudoVdaKeepalive { - fn drop(&mut self) { + /// Stop the watchdog ping, re-attach the displays we detached, then REMOVE the monitor (by GUID). + /// `device` is the host-level control handle. Consumes the monitor. + unsafe fn teardown(mut self, device: isize) { self.stop.store(true, Ordering::Relaxed); if let Some(j) = self.pinger.take() { let _ = j.join(); } - // Re-attach the physical display(s) we detached BEFORE removing the virtual output, so the - // box is never left with zero displays. - unsafe { restore_displays(&self.isolated) }; + // Re-attach detached display(s) BEFORE the REMOVE so the box is never left with zero displays. + if let Some(saved) = &self.ccd_saved { + restore_displays_ccd(saved); + } let rp = RemoveParams { guid: self.guid }; - let rp_bytes = unsafe { - std::slice::from_raw_parts(&rp as *const _ as *const u8, size_of::()) - }; + let rp_bytes = + std::slice::from_raw_parts(&rp as *const _ as *const u8, size_of::()); let mut none: [u8; 0] = []; - let h = HANDLE(self.device as *mut c_void); - if let Err(e) = unsafe { ioctl(h, IOCTL_REMOVE, rp_bytes, &mut none) } { + let h = HANDLE(device as *mut c_void); + if let Err(e) = ioctl(h, IOCTL_REMOVE, rp_bytes, &mut none) { tracing::warn!("SudoVDA REMOVE failed: {e:#}"); } else { tracing::info!("SudoVDA monitor removed"); @@ -683,6 +783,178 @@ impl Drop for SudoVdaKeepalive { } } +/// Open the control device once + read version/watchdog; cache the handle (raw isize) in `g`. +fn mgr_ensure_device(g: &mut Mgr) -> Result { + if let Some(d) = g.device { + return Ok(d); + } + let device = unsafe { open_device()? }; + let mut ver = [0u8; 4]; + if unsafe { ioctl(device, IOCTL_GET_VERSION, &[], &mut ver) }.is_ok() { + tracing::info!( + "SudoVDA protocol {}.{}.{} (test={})", + ver[0], + ver[1], + ver[2], + ver[3] + ); + } + let mut wd = [0u8; 8]; + g.watchdog_s = if unsafe { ioctl(device, IOCTL_GET_WATCHDOG, &[], &mut wd) }.is_ok() { + u32::from_le_bytes([wd[0], wd[1], wd[2], wd[3]]).max(1) + } else { + 3 + }; + tracing::info!("SudoVDA watchdog timeout {}s", g.watchdog_s); + let raw = device.0 as isize; + g.device = Some(raw); + Ok(raw) +} + +/// Linger window before a session-less monitor is torn down. A reconnect within it reuses the +/// monitor (no new screen / PnP chime); after it the monitor is REMOVEd so a physical screen returns. +fn linger_ms() -> u64 { + std::env::var("PUNKTFUNK_MONITOR_LINGER_MS") + .ok() + .and_then(|s| s.parse().ok()) + .unwrap_or(10_000) +} + +/// Acquire the shared monitor for a new session: join the live one (refcount++), reuse a lingering +/// one (reconfiguring if the client mode changed), or create one. The returned [`MonitorLease`] +/// releases the refcount on drop. +fn mgr_acquire(mode: Mode) -> Result { + ensure_linger_timer(); + let mut g = MGR.lock().unwrap(); + let device = mgr_ensure_device(&mut g)?; + let watchdog_s = g.watchdog_s; + + // A live monitor already exists — join it (refcount++). This covers a concurrent session AND the + // build-then-drop overlap of a mid-stream Reconfigure / secure-return (the new lease is taken while + // the old is still held). If the requested mode differs, reconfigure the shared monitor to it so a + // Reconfigure actually applies (one shared monitor → sessions necessarily share a mode). + if let MgrState::Active { mon, refs } = &mut g.state { + *refs += 1; + let changed = mon.mode.width != mode.width + || mon.mode.height != mode.height + || mon.mode.refresh_hz != mode.refresh_hz; + if changed { + unsafe { mgr_reconfigure(mon, mode) }; + } + tracing::info!( + refs = *refs, + "SudoVDA monitor reused (concurrent / reconfigure session)" + ); + let pm = Some((mon.mode.width, mon.mode.height, mon.mode.refresh_hz)); + let target = mon.target(); + return Ok(VirtualOutput { + node_id: 0, + preferred_mode: pm, + win_capture: target, + keepalive: Box::new(MonitorLease), + }); + } + + // Idle or Lingering: repurpose/create a monitor → Active{refs:1}. + let mon = match std::mem::replace(&mut g.state, MgrState::Idle) { + MgrState::Lingering { mut mon, .. } => { + tracing::info!("SudoVDA monitor reused (reconnect within the linger window)"); + let changed = mon.mode.width != mode.width + || mon.mode.height != mode.height + || mon.mode.refresh_hz != mode.refresh_hz; + if changed { + unsafe { mgr_reconfigure(&mut mon, mode) }; + } + mon + } + MgrState::Idle => unsafe { create_monitor(device, mode, watchdog_s)? }, + MgrState::Active { .. } => unreachable!("handled above"), + }; + let pm = Some((mon.mode.width, mon.mode.height, mon.mode.refresh_hz)); + let target = mon.target(); + g.state = MgrState::Active { mon, refs: 1 }; + Ok(VirtualOutput { + node_id: 0, + preferred_mode: pm, + win_capture: target, + keepalive: Box::new(MonitorLease), + }) +} + +/// Re-apply a (possibly new) mode to a reused monitor on reconnect, re-resolving its GDI name. +unsafe fn mgr_reconfigure(mon: &mut Monitor, mode: Mode) { + tracing::info!( + old = format!( + "{}x{}@{}", + mon.mode.width, mon.mode.height, mon.mode.refresh_hz + ), + new = format!("{}x{}@{}", mode.width, mode.height, mode.refresh_hz), + "SudoVDA: reconfiguring reused monitor to the new client mode" + ); + if let Some(n) = resolve_gdi_name(mon.target_id) { + mon.gdi_name = Some(n); + } + if let Some(n) = &mon.gdi_name { + set_active_mode(n, mode); + } + mon.mode = mode; +} + +/// Release a session's hold: refcount-- ; when the last session leaves, LINGER before teardown. +fn mgr_release() { + let mut g = MGR.lock().unwrap(); + g.state = match std::mem::replace(&mut g.state, MgrState::Idle) { + MgrState::Active { mon, refs } if refs > 1 => MgrState::Active { + mon, + refs: refs - 1, + }, + MgrState::Active { mon, .. } => { + let ms = linger_ms(); + tracing::info!( + linger_ms = ms, + "SudoVDA: last session left — lingering before teardown" + ); + MgrState::Lingering { + mon, + until: Instant::now() + Duration::from_millis(ms), + } + } + other => other, + }; +} + +/// Background timer (started once): tear down a monitor that has lingered past its deadline (→ Idle), +/// so a physical-screen user gets their screen back after they stop streaming. +fn ensure_linger_timer() { + static TIMER: Once = Once::new(); + TIMER.call_once(|| { + let _ = thread::Builder::new() + .name("sudovda-linger".into()) + .spawn(|| loop { + thread::sleep(Duration::from_millis(500)); + let mut g = MGR.lock().unwrap(); + let due = matches!(&g.state, MgrState::Lingering { until, .. } if Instant::now() >= *until); + if due { + let device = g.device.unwrap_or(0); + if let MgrState::Lingering { mon, .. } = + std::mem::replace(&mut g.state, MgrState::Idle) + { + drop(g); // release the lock before the REMOVE IOCTL + display restore + unsafe { mon.teardown(device) }; + } + } + }); + }); +} + +/// A session's lease on the shared monitor. Drop releases the refcount (→ linger when it hits 0). +struct MonitorLease; +impl Drop for MonitorLease { + fn drop(&mut self) { + mgr_release(); + } +} + /// Readiness probe: can we open the SudoVDA control device? pub fn probe() -> Result<()> { let h = unsafe { open_device()? }; diff --git a/docs/windows-host.md b/docs/windows-host.md index 90ffbe5..768ee50 100644 --- a/docs/windows-host.md +++ b/docs/windows-host.md @@ -74,14 +74,26 @@ Driven by live testing with the native macOS client at the display's native **51 detaches other monitors so Winlogon renders to the virtual output) covers the case where a physical monitor is also attached. -### Running as SYSTEM, windowless (deployment) +### Running as SYSTEM (deployment) — the `PunktfunkHost` service To capture the secure desktop the host must run as **SYSTEM in the interactive Session 1** (a Session -0 service can't duplicate Session 1). Launch chain: a scheduled task (Interactive, Highest) → -`PsExec64 -s -i 1 -d wscript.exe launch.vbs` → `launch.vbs` runs `host-run.cmd` with a **hidden -window** (`WScript.Shell.Run …, 0`). This keeps the host off the captured desktop — no `cmd` windows -the user can see or accidentally close (which would kill the stream). `host-run.cmd` sets -`APPDATA=C:\Users\Public` (shared identity/pairing) + `PUNKTFUNK_ENCODER=nvenc` and runs `m3-host`. +0 service can't duplicate Session 1). The end-user deployment is the built-in Windows **service** +(`src/service.rs`) — see [`windows-service.md`](windows-service.md). One elevated command: + +```powershell +punktfunk-host service install # auto-start LocalSystem service + firewall rules + default host.env +punktfunk-host service start +``` + +The service runs in Session 0 but never captures: it duplicates its own LocalSystem token, retargets +it to the active console session, and `CreateProcessAsUserW`s the host there — supervising it across +exits and console-session switches (the Sunshine/Apollo model). Config lives in +`%ProgramData%\punktfunk\host.env`; logs in `%ProgramData%\punktfunk\logs\`. + +> **Old bring-up chain (debug only, superseded by the service):** a scheduled task (Interactive, +> Highest) → `PsExec64 -s -i 1 -d wscript.exe launch.vbs` → `host-run.cmd` (hidden window), with +> `APPDATA=C:\Users\Public` as the shared-identity hack. The service replaces all of this; the host +> now resolves its config dir to `%ProgramData%\punktfunk` directly (`PUNKTFUNK_CONFIG_DIR` overrides). ### Real-GPU test box (RTX 4090, `ssh "Enrico Bühler"@192.168.1.174`) diff --git a/docs/windows-service.md b/docs/windows-service.md new file mode 100644 index 0000000..ab94eb5 --- /dev/null +++ b/docs/windows-service.md @@ -0,0 +1,93 @@ +# Windows service (deployment) + +The `PunktfunkHost` Windows service is the end-user way to run the host on Windows. It replaces the +manual bring-up chain (a scheduled task → `PsExec64 -s -i 1` → `wscript launch.vbs` → `host-run.cmd`) +with one command, auto-start on boot, and supervision. + +## Install + +From an **elevated** (Administrator) prompt: + +```powershell +punktfunk-host service install # register auto-start LocalSystem service + firewall rules + default host.env +punktfunk-host service start # start it now (also starts automatically on every boot) +``` + +`service install` is idempotent — run it again after upgrading the exe to re-point the service at the +new binary. Register whatever location you keep the exe in (e.g. `C:\Program Files\punktfunk\`); the +service records the current exe path. + +Other subcommands: + +```powershell +punktfunk-host service stop +punktfunk-host service status +punktfunk-host service uninstall # stop + delete the service + remove its firewall rules +``` + +## How it works + +The host must run **as SYSTEM in the interactive session** (Session 1+): Desktop Duplication of the +secure desktop (UAC / lock / login) and `SendInput` need SYSTEM, and capture/injection need the +interactive session, which a plain Session-0 service is not in. + +So the service (itself in Session 0) **never captures**. On start, and whenever the active console +session changes, it: + +1. resolves the active console session (`WTSGetActiveConsoleSessionId`), +2. duplicates its own LocalSystem token and retargets it to that session (`SetTokenInformation` + `TokenSessionId`), +3. launches the host there with `CreateProcessAsUserW` (`lpDesktop = winsta0\default`), +4. supervises it: relaunches on exit/crash (with backoff) and on a console connect/disconnect. + +A kill-on-close **job object** ensures a service crash never orphans the SYSTEM host. The host in turn +spawns the WGC helper into the *user* session (see [`windows-secure-desktop.md`](windows-secure-desktop.md)) +— two nested launches. Lock/unlock are handled inside the host (the `DesktopWatcher` DDA↔WGC mux), so +the service deliberately does **not** relaunch on lock/unlock — only on a real session switch. + +This is the same model Sunshine/Apollo use. + +## Configuration + +Config lives in **`%ProgramData%\punktfunk\host.env`** (KEY=VALUE lines, `#` comments). `service +install` writes a default if none exists. Template: [`scripts/windows/host.env.example`](../scripts/windows/host.env.example). + +```ini +PUNKTFUNK_ENCODER=nvenc +PUNKTFUNK_VIDEO_SOURCE=virtual +PUNKTFUNK_SECURE_DDA=1 +RUST_LOG=info +# PUNKTFUNK_HOST_CMD=serve --native # the host subcommand the service launches (default) +``` + +The service loads these into its environment and carries `PUNKTFUNK_*` + `RUST_LOG` to the host child +(the same env-merge the WGC helper uses). Restart the service after editing: + +```powershell +punktfunk-host service stop; punktfunk-host service start +``` + +The host's identity (cert/pairing/mgmt token/library) also lives under `%ProgramData%\punktfunk` — a +machine-wide dir the SYSTEM service and the interactive user share, surviving user logout. +`PUNKTFUNK_CONFIG_DIR` overrides the location (both platforms; handy for tests). + +## Logs + +- `%ProgramData%\punktfunk\logs\service.log` — the service's own supervision log (spawn/exit/session + switches). +- `%ProgramData%\punktfunk\logs\host.log` — the host child's stdout/stderr. + +## Prerequisites + +- The host built with `--features nvenc` for NVENC (the driver ships `nvEncodeAPI64.dll`; no SDK + needed at runtime). Software encode otherwise. +- The **SudoVDA** indirect display driver installed (for `PUNKTFUNK_VIDEO_SOURCE=virtual`). +- **ViGEmBus** for virtual gamepads (optional). + +## Gotchas + +- `service install`/`uninstall` need an **elevated** prompt (the SCM rejects non-admin). +- `service run` is the SCM entry point — don't run it by hand (it errors with a hint). +- A **graceful** stop currently `TerminateProcess`es the host, so its RAII teardown (SudoVDA monitor + REMOVE) doesn't run; a stale virtual monitor can linger until the next start. A cooperative-stop + signal is a follow-up. diff --git a/scripts/windows/host.env.example b/scripts/windows/host.env.example new file mode 100644 index 0000000..48faf11 --- /dev/null +++ b/scripts/windows/host.env.example @@ -0,0 +1,36 @@ +# punktfunk host configuration (Windows) — read by the `PunktfunkHost` service. +# +# `punktfunk-host service install` writes a default copy of this to +# %ProgramData%\punktfunk\host.env +# Edit that file (not this one) and restart the service to apply: +# punktfunk-host service stop +# punktfunk-host service start +# +# Format: KEY=VALUE per line; '#' starts a comment. The service loads these into its environment +# and passes PUNKTFUNK_* and RUST_LOG through to the host it launches into the active session. + +# Hardware encode via NVENC (NVIDIA). The host must be the `--features nvenc` build. Falls back to +# the software encoder automatically if NVENC is unavailable. +PUNKTFUNK_ENCODER=nvenc + +# Video source: `virtual` creates a per-client virtual display (SudoVDA) at the client's exact +# resolution + refresh — the flagship mode. Requires the SudoVDA indirect display driver installed. +PUNKTFUNK_VIDEO_SOURCE=virtual + +# Capture the secure desktop (UAC / lock / login) so the stream survives those transitions. +PUNKTFUNK_SECURE_DDA=1 + +# Log level (info | debug | trace). Logs land in %ProgramData%\punktfunk\logs\. +RUST_LOG=info + +# The host subcommand the service launches. Default: `serve --native` (GameStream/Moonlight + the +# native punktfunk/1 QUIC host in one process). Uncomment to override. +#PUNKTFUNK_HOST_CMD=serve --native + +# Multi-GPU boxes only: force the NVENC/Desktop-Duplication GPU by Description substring. Leave +# unset on single-GPU machines (the default auto-picks the discrete adapter). +#PUNKTFUNK_RENDER_ADAPTER=4090 + +# Keep a per-client virtual display alive briefly after disconnect so a quick reconnect reuses it +# (no display connect/disconnect chime). Default 10000 ms. +#PUNKTFUNK_MONITOR_LINGER_MS=10000