Merge remote-tracking branch 'origin/dda-parity'
apple / swift (push) Successful in 55s
windows-msix / package (push) Successful in 1m8s
audit / cargo-audit (push) Failing after 1m10s
windows / build (push) Successful in 1m5s
android / android (push) Successful in 3m33s
ci / web (push) Successful in 40s
ci / docs-site (push) Successful in 37s
ci / bench (push) Successful in 4m32s
ci / rust (push) Successful in 8m37s
decky / build-publish (push) Successful in 28s
deb / build-publish (push) Successful in 3m21s
docker / build-push (., web/Dockerfile, punktfunk-web) (push) Successful in 17s
docker / build-push (--build-arg FEDORA_VERSION=44, ci, ci/fedora-rpm.Dockerfile, punktfunk-fedora44-rpm) (push) Successful in 3m37s
docker / build-push (ci, ci/fedora-rpm.Dockerfile, punktfunk-fedora-rpm) (push) Successful in 2m58s
docker / build-push (docs-site, docs-site/Dockerfile, punktfunk-docs) (push) Successful in 22s
docker / build-push (ci, ci/rust-ci.Dockerfile, punktfunk-rust-ci) (push) Successful in 2m31s
flatpak / build-publish (push) Failing after 3m2s
rpm / build-publish (bazzite, punktfunk-fedora-rpm) (push) Failing after 7m2s
rpm / build-publish (fedora-44, punktfunk-fedora44-rpm) (push) Failing after 4m22s
docker / deploy-docs (push) Successful in 21s

This commit is contained in:
2026-06-16 18:55:52 +00:00
16 changed files with 1974 additions and 308 deletions
Generated
+18
View File
@@ -2688,6 +2688,7 @@ dependencies = [
"wayland-protocols-wlr",
"wayland-scanner",
"windows 0.62.2 (registry+https://github.com/rust-lang/crates.io-index)",
"windows-service",
"x509-parser",
"xkbcommon",
]
@@ -4325,6 +4326,12 @@ dependencies = [
"safe_arch",
]
[[package]]
name = "widestring"
version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72069c3113ab32ab29e5584db3c6ec55d416895e60715417b5b883a357c3e471"
[[package]]
name = "winapi"
version = "0.3.9"
@@ -4557,6 +4564,17 @@ dependencies = [
"windows-link 0.2.1 (git+https://github.com/microsoft/windows-rs?rev=b4129fcc1ae81eec8bf1217539883db821bca3a1)",
]
[[package]]
name = "windows-service"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d24d6bcc7f734a4091ecf8d7a64c5f7d7066f45585c1861eba06449909609c8a"
dependencies = [
"bitflags",
"widestring",
"windows-sys 0.52.0",
]
[[package]]
name = "windows-strings"
version = "0.5.1"
+15
View File
@@ -144,7 +144,22 @@ windows = { version = "0.62", features = [
# Force-composed-flip overlay: a topmost layered window on the Winlogon desktop disqualifies the
# secure desktop's fullscreen independent-flip so Desktop Duplication can capture it.
"Win32_System_LibraryLoader",
# VirtualProtect — for the inline patch of the win32u GPU-preference shim (Apollo's MinHook port:
# the hybrid-GPU output-reparenting hook that keeps Desktop Duplication stable on a 4090+iGPU box).
# See capture/dxgi.rs `install_gpu_pref_hook`. No trampoline (we fully replace the fn) → no detour
# crate / no C length-disassembler dep; a 12-byte absolute-jmp prologue patch suffices.
"Win32_System_Memory",
# Per-monitor-v2 DPI awareness — IDXGIOutput5::DuplicateOutput1 (the modern capture path Apollo
# uses; FP16/format-list, robust to overlay/format churn) requires the process to be DPI-aware.
"Win32_UI_HiDpi",
# Windows service supervisor (src/service.rs): a kill-on-close job object so a service crash never
# orphans the SYSTEM host it launched into the interactive session.
"Win32_System_JobObjects",
] }
# The SCM plumbing for the `service` subcommand (define_windows_service! / dispatcher / control
# handler / ServiceManager install). Wraps the Win32 service API; the supervision loop itself uses
# the `windows` crate above.
windows-service = "0.7"
# Software H.264 encoder (GPU-less path + NVENC fallback). The default `source` feature statically
# compiles OpenH264 (BSD-2) — no system lib, builds on MSVC; nasm on PATH adds the SIMD fast path.
openh264 = "0.9"
+11 -1
View File
@@ -258,6 +258,16 @@ pub fn capture_virtual_output(vout: crate::vdisplay::VirtualOutput) -> Result<Bo
linux::PortalCapturer::from_virtual_output(vout).map(|c| Box::new(c) as Box<dyn Capturer>)
}
/// `PUNKTFUNK_NO_WGC=1` forces the pure single-process DDA (Desktop Duplication) path everywhere: it
/// skips WGC in [`capture_virtual_output`] AND bypasses the two-process secure-desktop relay (so even a
/// SYSTEM host captures in-process via DDA, the way Apollo does — one capturer for the normal AND the
/// secure desktop). For bringing DDA up to parity / validating it on its own; all the WGC code stays
/// compiled and comes back the moment the flag is unset.
#[cfg(target_os = "windows")]
pub(crate) fn wgc_disabled() -> bool {
std::env::var_os("PUNKTFUNK_NO_WGC").is_some()
}
#[cfg(target_os = "windows")]
pub fn capture_virtual_output(vout: crate::vdisplay::VirtualOutput) -> Result<Box<dyn Capturer>> {
let target = vout.win_capture.clone().ok_or_else(|| {
@@ -275,7 +285,7 @@ pub fn capture_virtual_output(vout: crate::vdisplay::VirtualOutput) -> Result<Bo
let backend = std::env::var("PUNKTFUNK_CAPTURE")
.unwrap_or_default()
.to_ascii_lowercase();
if backend == "dda" || backend == "dxgi" {
if backend == "dda" || backend == "dxgi" || wgc_disabled() {
return dxgi::DuplCapturer::open(target, pref, keep)
.map(|c| Box::new(c) as Box<dyn Capturer>);
}
@@ -17,7 +17,7 @@
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;
use windows::core::{w, PCWSTR};
use windows::core::w;
use windows::Win32::Foundation::{HWND, LPARAM, LRESULT, WPARAM};
use windows::Win32::System::LibraryLoader::GetModuleHandleW;
use windows::Win32::System::StationsAndDesktops::{
@@ -45,24 +45,36 @@ impl DesktopWatcher {
let _ = std::thread::Builder::new()
.name("desktop-watch".into())
.spawn(move || {
let mut last = initial;
// Debounce: only publish a change after the raw reading has been stable for several
// polls. The input desktop flaps Default↔Winlogon transiently during a lock/UAC
// transition; publishing every flap makes the capture mux thrash (rebuild storms).
const STABLE_POLLS: u32 = 4; // ~80ms
let mut published = initial;
let mut candidate = initial;
let mut stable = 0u32;
while !st.load(Ordering::Relaxed) {
let v = if unsafe { is_secure_desktop() } {
DESKTOP_SECURE
} else {
DESKTOP_NORMAL
};
s.store(v, Ordering::Release);
if v != last {
if v == candidate {
stable = stable.saturating_add(1);
} else {
candidate = v;
stable = 1;
}
if stable >= STABLE_POLLS && candidate != published {
s.store(candidate, Ordering::Release);
published = candidate;
tracing::info!(
desktop = if v == DESKTOP_SECURE {
desktop = if candidate == DESKTOP_SECURE {
"Winlogon(secure)"
} else {
"Default"
},
"input desktop changed"
"input desktop changed (debounced)"
);
last = v;
}
std::thread::sleep(Duration::from_millis(20));
}
@@ -89,7 +101,7 @@ impl Drop for DesktopWatcher {
/// True if the current input desktop is "Winlogon" (the secure desktop). Best-effort: if the desktop
/// can't be opened or named, report not-secure (the safe default — keep WGC/normal capture).
unsafe fn is_secure_desktop() -> bool {
pub(crate) unsafe fn is_secure_desktop() -> bool {
let desk = match OpenInputDesktop(
DESKTOP_CONTROL_FLAGS(0),
false,
+440 -64
View File
@@ -10,7 +10,7 @@
use super::{CapturedFrame, Capturer, FramePayload, PixelFormat};
use anyhow::{anyhow, bail, Context, Result};
use std::ffi::c_void;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
use windows::core::{s, Interface, PCSTR};
use windows::Win32::Foundation::{HMODULE, LUID};
@@ -37,14 +37,15 @@ use windows::Win32::Graphics::Dxgi::Common::{
DXGI_SAMPLE_DESC,
};
use windows::Win32::Graphics::Dxgi::{
CreateDXGIFactory1, IDXGIAdapter1, IDXGIFactory1, IDXGIOutput1, IDXGIOutputDuplication,
IDXGIResource, DXGI_ERROR_ACCESS_LOST, DXGI_ERROR_DEVICE_REMOVED, DXGI_ERROR_DEVICE_RESET,
DXGI_ERROR_INVALID_CALL, DXGI_ERROR_WAIT_TIMEOUT, DXGI_OUTDUPL_DESC, DXGI_OUTDUPL_FRAME_INFO,
CreateDXGIFactory1, IDXGIAdapter1, IDXGIFactory1, IDXGIOutput1, IDXGIOutput5,
IDXGIOutputDuplication, IDXGIResource, DXGI_ERROR_ACCESS_LOST, DXGI_ERROR_DEVICE_REMOVED,
DXGI_ERROR_DEVICE_RESET, DXGI_ERROR_INVALID_CALL, DXGI_ERROR_MODE_CHANGE_IN_PROGRESS,
DXGI_ERROR_WAIT_TIMEOUT, DXGI_OUTDUPL_DESC, DXGI_OUTDUPL_FRAME_INFO,
DXGI_OUTDUPL_POINTER_SHAPE_INFO, DXGI_OUTDUPL_POINTER_SHAPE_TYPE_COLOR,
DXGI_OUTDUPL_POINTER_SHAPE_TYPE_MASKED_COLOR,
};
use windows::Win32::System::StationsAndDesktops::{
OpenInputDesktop, SetThreadDesktop, DESKTOP_ACCESS_FLAGS, DESKTOP_CONTROL_FLAGS,
CloseDesktop, OpenInputDesktop, SetThreadDesktop, DESKTOP_ACCESS_FLAGS, DESKTOP_CONTROL_FLAGS,
};
use windows::Win32::UI::WindowsAndMessaging::SetCursorPos;
@@ -164,33 +165,113 @@ unsafe fn reopen_duplication(
)> {
let (adapter, out) = find_output(gdi_name)?;
let (dev, ctx) = make_device(&adapter)?;
let dupl = out
.DuplicateOutput(&dev)
.context("re-DuplicateOutput after ACCESS_LOST")?;
let dupl = duplicate_output(&out, &dev).context("re-DuplicateOutput after ACCESS_LOST")?;
Ok((dev, ctx, out, dupl))
}
/// Create the output duplication. Prefer `IDXGIOutput5::DuplicateOutput1` with an explicit
/// encoder-format list (FP16 first, then BGRA8) — Apollo's path. It hands us the desktop's real
/// scanout format (HDR FP16 or SDR BGRA8) and is far more robust to overlay/format changes than
/// legacy `DuplicateOutput` (which always tone-maps to 8-bit BGRA — the source of much of the
/// ACCESS_LOST churn). Requires the process be per-monitor-v2 DPI aware (set at startup in
/// [`install_gpu_pref_hook`]). Falls back to legacy `DuplicateOutput` if Output5 is unavailable or
/// `DuplicateOutput1` fails.
unsafe fn duplicate_output(
output: &IDXGIOutput1,
device: &ID3D11Device,
) -> Result<IDXGIOutputDuplication> {
if let Ok(output5) = output.cast::<IDXGIOutput5>() {
// BGRA8 only for now (SDR). NOTE: DuplicateOutput1 returns the FIRST format it can provide and
// DXGI will CONVERT to it — so listing FP16 first would hand back FP16 even on an SDR desktop,
// wrongly tripping the HDR path. Real HDR capture (FP16 first + IDXGIOutput6 colorspace
// detection to pick the path) is the follow-up once the churn is settled.
let formats = [DXGI_FORMAT_B8G8R8A8_UNORM];
// RETRY DuplicateOutput1. The caller releases the OLD duplication (self.dupl = None) immediately
// before calling us, and the kernel-side teardown of that duplication is ASYNC — the FIRST
// DuplicateOutput1 right after can race it and return E_ACCESSDENIED ("output still duplicated")
// even though we dropped our only reference. A few short retries let the teardown finish so the
// ROBUST DuplicateOutput1 dup succeeds, instead of falling through to legacy DuplicateOutput,
// which "succeeds" into a fragile dup that churns ACCESS_LOST/MODE_CHANGE every few ms on this
// cross-GPU IDD. (This is why DuplicateOutput1 failed but the legacy call a beat later
// succeeded — pure timing. Apollo retries DuplicateOutput1 2x/200ms for the same reason.)
// Apollo waits 200 ms between DuplicateOutput1 attempts — the kernel-side teardown of the
// just-released duplication takes that long, so short (ms) waits aren't enough. Env-tunable so
// we can dial it without a rebuild: PUNKTFUNK_DUP_RETRY_MS (per-wait, default 200) ×
// PUNKTFUNK_DUP_RETRY_N (attempts, default 6) → ~1 s worst case before the legacy fallback.
let retry_ms: u64 = std::env::var("PUNKTFUNK_DUP_RETRY_MS")
.ok()
.and_then(|s| s.parse().ok())
.unwrap_or(200);
// Default 1 (no retry → immediate legacy fallback). On the secure desktop DuplicateOutput1
// ALWAYS refuses (only LOGON_UI may use it), so retrying there just blocks the capture thread;
// and on the normal desktop the release-before-reduplicate + gentle recovery already keep the
// legacy dup stable. Raise PUNKTFUNK_DUP_RETRY_N only on a box where DuplicateOutput1 can win
// the old-dup-teardown race (then PUNKTFUNK_DUP_RETRY_MS sets the per-wait, default 200).
let attempts: u64 = std::env::var("PUNKTFUNK_DUP_RETRY_N")
.ok()
.and_then(|s| s.parse().ok())
.unwrap_or(1)
.max(1);
let mut last_err = None;
for attempt in 0..attempts {
match output5.DuplicateOutput1(device, 0, &formats) {
Ok(d) => {
if attempt > 0 {
tracing::debug!(
attempt,
"DuplicateOutput1 succeeded on retry (rode out old-dup teardown race)"
);
}
return Ok(d);
}
Err(e) => {
last_err = Some(e);
if attempt + 1 < attempts {
std::thread::sleep(Duration::from_millis(retry_ms));
}
}
}
}
if let Some(e) = last_err {
// Expected on the secure (Winlogon) desktop (DuplicateOutput1 is LOGON_UI-only) and fires
// once per gentle recovery there — throttle so a lock dwell doesn't flood the log. The
// legacy fallback below handles it; gentle recovery keeps it from churning.
static FALLBACKS: AtomicU64 = AtomicU64::new(0);
if FALLBACKS.fetch_add(1, Ordering::Relaxed) % 64 == 0 {
tracing::debug!(
error = %format!("{e:?}"),
"DuplicateOutput1 unavailable — using legacy DuplicateOutput (expected on the secure desktop)"
);
}
}
}
output.DuplicateOutput(device).context("DuplicateOutput")
}
/// Park the cursor on a duplicated output. A blank virtual display emits NO Desktop Duplication
/// frames until something changes; a pointer move IS a DDA "change", so this kicks the very first
/// `AcquireNextFrame` loose — and lands the cursor on the display the client is viewing. Two moves
/// to distinct points guarantee an actual move even if the cursor already sat at the center.
/// Follow the current input desktop so duplication spans the normal ↔ Winlogon (secure: login/UAC)
/// desktops. Opening the secure desktop requires SYSTEM; on a non-SYSTEM host this just fails on
/// Winlogon (capture freezes there) — which is why the host relaunches itself as SYSTEM. The HDESK
/// is intentionally leaked: it must stay open while it's the thread's desktop, and switches
/// (lock/unlock/UAC) are rare, so a few handles per session is fine.
/// Re-sync the calling (capture) thread to the CURRENT input desktop. MUST be called on EVERY recovery
/// — symmetrically for ENTERING and LEAVING the Winlogon (secure: lock/login/UAC) desktop. Gating it on
/// is_secure_desktop() (the old bug) re-attached only on the way IN, so on the way OUT the capture
/// thread stayed stuck on the gone Winlogon desktop and every rebuild failed → no frames → client
/// timeout → "display disconnected". Apollo calls its equivalent (syncThreadDesktop) before every
/// duplicate. Opening the secure desktop requires SYSTEM (the host relaunches itself as SYSTEM).
/// Matches Apollo by closing the handle right after SetThreadDesktop — the thread keeps the desktop via
/// an internal reference, so this does NOT leak even when called on every recovery.
unsafe fn attach_input_desktop() {
match OpenInputDesktop(
DESKTOP_CONTROL_FLAGS(0),
false,
DESKTOP_ACCESS_FLAGS(0x1000_0000), // GENERIC_ALL
) {
Ok(desk) => match SetThreadDesktop(desk) {
Ok(()) => tracing::info!("attach_input_desktop: SetThreadDesktop OK"),
Err(e) => {
tracing::warn!(error = %format!("{e:?}"), "attach_input_desktop: SetThreadDesktop FAILED")
Ok(desk) => {
if let Err(e) = SetThreadDesktop(desk) {
tracing::warn!(error = %format!("{e:?}"), "attach_input_desktop: SetThreadDesktop FAILED");
}
},
let _ = CloseDesktop(desk);
}
Err(e) => {
tracing::warn!(error = %format!("{e:?}"), "attach_input_desktop: OpenInputDesktop FAILED")
}
@@ -205,6 +286,122 @@ pub(crate) unsafe fn nudge_cursor_onto(output: &IDXGIOutput1) {
}
}
/// How many times DXGI has actually called our hooked `NtGdiDdDDIGetCachedHybridQueryValue`. If this
/// stays 0 while DDA churns with ACCESS_LOST, the hook is NOT on DXGI's GPU-preference path on this
/// build (so reparenting can't be the cause — look at composition/independent-flip instead). >0 with
/// continuing churn means the hook fires but reparenting isn't the trigger here.
static HYBRID_HOOK_HITS: AtomicU64 = AtomicU64::new(0);
pub(crate) fn hybrid_hook_hits() -> u64 {
HYBRID_HOOK_HITS.load(Ordering::Relaxed)
}
// kernel32 — declared directly so we don't pull the whole Win32_System_Diagnostics_Debug feature for
// one call. FlushInstructionCache serializes the i-cache after the inline patch: the patch is written
// on the main thread but DXGI runs the hooked export from the encode/worker thread (possibly a
// different core), so the "same-thread, no flush needed" assumption was wrong.
#[link(name = "kernel32")]
extern "system" {
fn FlushInstructionCache(h: *mut c_void, base: *const c_void, size: usize) -> i32;
fn GetCurrentProcess() -> *mut c_void;
fn SetThreadExecutionState(es_flags: u32) -> u32;
}
const ES_CONTINUOUS: u32 = 0x8000_0000;
const ES_SYSTEM_REQUIRED: u32 = 0x0000_0001;
const ES_DISPLAY_REQUIRED: u32 = 0x0000_0002;
/// Replacement for `win32u.dll!NtGdiDdDDIGetCachedHybridQueryValue`: always report
/// `D3DKMT_GPU_PREFERENCE_STATE_UNSPECIFIED` (3). We fully replace the function (never call the
/// original), so no trampoline is needed. (Ported verbatim from Apollo's MinHook hook.)
unsafe extern "system" fn hybrid_query_hook(gpu_preference: *mut u32) -> i32 {
HYBRID_HOOK_HITS.fetch_add(1, Ordering::Relaxed);
if gpu_preference.is_null() {
return 0xC000_000Du32 as i32; // STATUS_INVALID_PARAMETER
}
*gpu_preference = 3; // D3DKMT_GPU_PREFERENCE_STATE_UNSPECIFIED
0 // STATUS_SUCCESS
}
/// Apollo's win32u GPU-preference hook, ported. On a HYBRID-GPU box DXGI resolves a GPU preference
/// (registry + power settings + the hybrid-adapter DDI) and REPARENTS outputs onto the chosen render
/// GPU — which constantly invalidates Desktop Duplication (DXGI_ERROR_ACCESS_LOST 0x887A0026, the
/// freeze/churn observed on the RTX 4090 + AMD iGPU box; `SET_RENDER_ADAPTER` is ignored there). Faking
/// a cached preference of UNSPECIFIED makes DXGI skip the resolution, so the output is NOT reparented
/// and DDA stays stable on one adapter (this is what makes Apollo's DDA work on this hardware).
/// Installed once, before the first DXGI factory/enumeration; lasts the process lifetime (like Apollo).
pub(crate) fn install_gpu_pref_hook() {
use std::sync::Once;
static HOOK: Once = Once::new();
HOOK.call_once(|| unsafe {
use windows::Win32::System::LibraryLoader::{GetProcAddress, LoadLibraryA};
use windows::Win32::System::Memory::{
VirtualProtect, PAGE_EXECUTE_READWRITE, PAGE_PROTECTION_FLAGS,
};
use windows::Win32::UI::HiDpi::{
GetAwarenessFromDpiAwarenessContext, GetThreadDpiAwarenessContext,
SetProcessDpiAwarenessContext, DPI_AWARENESS_CONTEXT_PER_MONITOR_AWARE_V2,
};
// Per-monitor-v2 DPI awareness — REQUIRED for IDXGIOutput5::DuplicateOutput1 (without it the
// call returns E_ACCESSDENIED forever, forcing the legacy DuplicateOutput path). Matches
// Apollo's startup. SetProcessDpiAwarenessContext fails with E_ACCESS_DENIED if awareness was
// already set (manifest / earlier call) — log the outcome AND the effective awareness so a
// 100% DuplicateOutput1 E_ACCESSDENIED is diagnosable instead of silent.
match SetProcessDpiAwarenessContext(DPI_AWARENESS_CONTEXT_PER_MONITOR_AWARE_V2) {
Ok(()) => tracing::info!("DPI awareness set: PER_MONITOR_AWARE_V2"),
Err(e) => tracing::warn!(error = %format!("{e:?}"),
"SetProcessDpiAwarenessContext failed (already set?) — DuplicateOutput1 may E_ACCESSDENIED"),
}
// 0=UNAWARE 1=SYSTEM 2=PER_MONITOR(_V2). DuplicateOutput1 needs 2.
let awareness = GetAwarenessFromDpiAwarenessContext(GetThreadDpiAwarenessContext()).0;
tracing::info!(awareness, "effective DPI awareness (need 2=PER_MONITOR for DuplicateOutput1)");
let Ok(lib) = LoadLibraryA(s!("win32u.dll")) else {
tracing::warn!("GPU-pref hook: win32u.dll not loadable — skipping (DDA may churn on hybrid GPUs)");
return;
};
let Some(target) = GetProcAddress(lib, s!("NtGdiDdDDIGetCachedHybridQueryValue")) else {
tracing::warn!("GPU-pref hook: NtGdiDdDDIGetCachedHybridQueryValue not exported — skipping");
return;
};
let target = target as usize as *mut u8;
// x64 absolute jump to our replacement: `mov rax, imm64 ; jmp rax` (12 bytes). We never call the
// original, so no trampoline/relocation (hence no detour crate / C length-disassembler dep).
let hook = hybrid_query_hook as *const () as usize;
let mut patch = [0u8; 12];
patch[0] = 0x48;
patch[1] = 0xB8; // mov rax, imm64
patch[2..10].copy_from_slice(&hook.to_le_bytes());
patch[10] = 0xFF;
patch[11] = 0xE0; // jmp rax
let mut old = PAGE_PROTECTION_FLAGS(0);
if VirtualProtect(target as *const c_void, 12, PAGE_EXECUTE_READWRITE, &mut old).is_err() {
tracing::warn!("GPU-pref hook: VirtualProtect failed — skipping");
return;
}
std::ptr::copy_nonoverlapping(patch.as_ptr(), target, 12);
let mut restore = PAGE_PROTECTION_FLAGS(0);
let _ = VirtualProtect(target as *const c_void, 12, old, &mut restore);
// Serialize the i-cache: the patch is written here (main thread) but DXGI calls the export from
// the capture/encode worker thread — possibly a different core with a stale i-cache, in which
// case it would keep running the ORIGINAL function and DXGI would still reparent. (Apollo's
// MinHook does this flush internally; our hand-rolled patch must do it explicitly.)
let _ = FlushInstructionCache(GetCurrentProcess(), target as *const c_void, 12);
// VERIFY the patch actually landed (CFG/hotpatch/short-stub could silently reject it). Read it
// back; an error! (not a cheery "installed") makes a dead hook obvious in the logs.
let mut readback = [0u8; 12];
std::ptr::copy_nonoverlapping(target, readback.as_mut_ptr(), 12);
if readback == patch {
tracing::info!(
"GPU-pref hook installed + verified (win32u hybrid-query -> UNSPECIFIED): reparenting disabled"
);
} else {
tracing::error!(
want = %format!("{patch:02x?}"), got = %format!("{readback:02x?}"),
"GPU-pref hook patch did NOT land — hook is DEAD (DXGI will still reparent → ACCESS_LOST churn)"
);
}
});
}
// DXGI Desktop Duplication deliberately EXCLUDES the hardware cursor from the captured surface (the
// OS composites it separately). We capture the cursor shape/position from the frame info and blend it
// back in — on the GPU for the zero-copy path (a CPU readback would stall the 240 fps pipeline).
@@ -794,7 +991,12 @@ pub struct DuplCapturer {
device: ID3D11Device,
context: ID3D11DeviceContext,
output: IDXGIOutput1,
dupl: IDXGIOutputDuplication,
/// The output duplication. `Option` so recovery can RELEASE it (set `None`) BEFORE re-duplicating:
/// DXGI permits only ONE `IDXGIOutputDuplication` per output, and a stale one (incl. an ACCESS_LOST
/// one) keeps holding the output, so a re-`DuplicateOutput1` returns E_ACCESSDENIED and legacy
/// `DuplicateOutput` returns a BORN-LOST dup — the storm. Apollo releases before re-duplicating; so
/// do we now. `None` only transiently during recovery (acquire routes None → recovery).
dupl: Option<IDXGIOutputDuplication>,
/// The output's GDI name — re-resolved on ACCESS_LOST (a mode change can stale the cached handle).
gdi_name: String,
/// Stable SudoVDA target id, used to re-resolve `gdi_name` during recovery.
@@ -842,11 +1044,22 @@ pub struct DuplCapturer {
/// secure-desktop dwell where the output is gone) so we don't block the encode loop or hammer
/// DuplicateOutput — between attempts the last good frame is repeated. `None` = never attempted.
last_rebuild: Option<Instant>,
/// Throttle for ALL ACCESS_LOST recovery attempts (cheap re-duplicate + full rebuild). A
/// constantly-invalidated duplication (HDR overlay/MPO churn) would otherwise spin recovery and
/// starve the encode thread; cap attempts to ~one per 5 ms and repeat the last frame between them.
last_recover: Option<Instant>,
/// True once at least one real frame has been produced. After that, a frame drought (e.g. a long
/// secure-desktop dwell with nothing rendering to the virtual output) must never fatally end the
/// session — `next_frame` keeps repeating the last/seeded frame instead of erroring on its
/// deadline. The deadline stays fatal only *before* the first frame (a genuine startup misconfig).
ever_got_frame: bool,
/// Consecutive rebuilds that produced a BORN-LOST duplication (created OK, but its first
/// AcquireNextFrame instantly returned ACCESS_LOST). On the NORMAL desktop this is the hybrid
/// reparent/flip storm — once it persists, `acquire` returns Err so the m3 loop cold-rebuilds the
/// whole pipeline (new device/output) instead of spinning on a dead dup forever (the bug where the
/// stream froze on the last frame). Reset to 0 by any real frame. NOT armed on the secure
/// (Winlogon) desktop, where a long static dwell is legitimate and must never end the session.
consecutive_born_lost: u32,
/// GPU cursor overlay (rebuilt on device recreate). `None` until the first composite.
cursor: Option<CursorCompositor>,
/// Last cursor shape, decomposed into alpha + XOR layers (kept device-independent so it survives
@@ -869,6 +1082,39 @@ impl DuplCapturer {
keepalive: Box<dyn Send>,
) -> Result<Self> {
unsafe {
// Stop DXGI hybrid-GPU output reparenting BEFORE we create the factory / enumerate outputs
// (the cause of the 0x887A0026 ACCESS_LOST churn on this hybrid box: RTX 4090 + AMD iGPU).
install_gpu_pref_hook();
// Force PER-MONITOR-AWARE-V2 on THIS (capture) thread. IDXGIOutput5::DuplicateOutput1
// REQUIRES V2 — without it the call returns E_ACCESSDENIED forever (the 4370x failures
// measured live), forcing the legacy DuplicateOutput fallback which yields a BORN-LOST
// duplication on this box → the ACCESS_LOST storm. SetProcessDpiAwarenessContext failed at
// startup ("already set" — a manifest/runtime locked the process to a LOWER awareness, and
// GetAwarenessFromDpiAwarenessContext can't tell V1 from V2: it reports 2 for both). The
// per-THREAD override works regardless of the process default, so DuplicateOutput1 can
// succeed (the working dup Apollo gets). Must run on the capture thread before any DXGI use.
{
use windows::Win32::UI::HiDpi::{
AreDpiAwarenessContextsEqual, GetThreadDpiAwarenessContext,
SetThreadDpiAwarenessContext, DPI_AWARENESS_CONTEXT_PER_MONITOR_AWARE_V2,
};
let prev = SetThreadDpiAwarenessContext(DPI_AWARENESS_CONTEXT_PER_MONITOR_AWARE_V2);
let is_v2 = AreDpiAwarenessContextsEqual(
GetThreadDpiAwarenessContext(),
DPI_AWARENESS_CONTEXT_PER_MONITOR_AWARE_V2,
)
.as_bool();
tracing::info!(
set_ok = !prev.0.is_null(),
thread_is_v2 = is_v2,
"capture thread DPI awareness -> PER_MONITOR_AWARE_V2 (required for DuplicateOutput1)"
);
}
// Keep the IDD (SudoVDA) virtual display awake for the capture lifetime: an idle indirect
// display can be power-gated, which invalidates the duplication (a contributor to the
// "freezes randomly while streaming" loss). Restored to ES_CONTINUOUS on Drop. (Apollo does
// this too.) Must run on the capture thread (this one owns the capturer).
SetThreadExecutionState(ES_CONTINUOUS | ES_DISPLAY_REQUIRED | ES_SYSTEM_REQUIRED);
let factory: IDXGIFactory1 = CreateDXGIFactory1().context("CreateDXGIFactory1")?;
// 1) Find the output (monitor) whose GDI DeviceName matches, across ALL adapters. On a
// real-GPU box the SudoVDA virtual monitor's DXGI output is enumerated under the GPU that
@@ -969,16 +1215,20 @@ impl DuplCapturer {
let device = device.context("null D3D11 device")?;
let context = context.context("null D3D11 context")?;
// 3) duplicate the output. Attach to the current input desktop first (as SYSTEM this can
// be the Winlogon secure desktop) so a session that starts at the lock/login screen works,
// and re-assert display isolation at OPEN time (not just in recovery): a lock/UAC switch can
// re-attach a physical monitor and route the secure desktop THERE, leaving our virtual
// output perpetually idle/lost — re-isolating forces the secure desktop back onto it. Cheap
// + idempotent (a no-op when nothing else is attached).
// be the Winlogon secure desktop) so a session that starts at the lock/login screen works.
// The SudoVDA is kept the sole desktop via the CCD isolation in sudovda::create_monitor
// (registry-persisted), so the secure desktop has nowhere to render but the output we
// capture — no per-open re-isolation needed.
attach_input_desktop();
crate::vdisplay::sudovda::reassert_isolation(&target.gdi_name);
let dupl = output
.DuplicateOutput(&device)
let dupl = duplicate_output(&output, &device)
.context("DuplicateOutput (already duplicated by another app?)")?;
// Did DXGI actually call our win32u GPU-pref hook during factory/device/dupl creation? hits==0
// here means the hook is NOT on DXGI's reparenting path on this build → reparenting can't be
// the churn cause (look at independent-flip/composition instead). Diagnostic only.
tracing::debug!(
hook_hits = hybrid_hook_hits(),
"win32u GPU-pref hook call count after open"
);
// Kick the first frame loose: a blank virtual display is otherwise change-less.
nudge_cursor_onto(&output);
let dd: DXGI_OUTDUPL_DESC = dupl.GetDesc();
@@ -1016,7 +1266,7 @@ impl DuplCapturer {
device,
context,
output,
dupl,
dupl: Some(dupl),
target_id: target.target_id,
gdi_name: target.gdi_name,
width,
@@ -1040,7 +1290,9 @@ impl DuplCapturer {
hdr10_out: None,
hdr_conv: None,
last_rebuild: None,
last_recover: None,
ever_got_frame: false,
consecutive_born_lost: 0,
cursor: None,
cursor_shape: None,
cursor_pos: (0, 0),
@@ -1220,16 +1472,15 @@ impl DuplCapturer {
let mut buf = vec![0u8; info.PointerShapeBufferSize as usize];
let mut required = 0u32;
let mut si = DXGI_OUTDUPL_POINTER_SHAPE_INFO::default();
if self
.dupl
.GetFramePointerShape(
if self.dupl.as_ref().is_some_and(|d| {
d.GetFramePointerShape(
info.PointerShapeBufferSize,
buf.as_mut_ptr() as *mut c_void,
&mut required,
&mut si,
)
.is_ok()
{
}) {
if let Some(shape) = convert_pointer_shape(&buf, &si) {
tracing::info!(
shape_type = si.Type,
@@ -1250,12 +1501,6 @@ impl DuplCapturer {
/// HDR graphics white (PUNKTFUNK_HDR_CURSOR_NITS, default 203, per BT.2408) so it isn't ~2.5×
/// too dim; SDR composites the raw cursor in the display's native sRGB space.
unsafe fn composite_cursor_gpu(&mut self, gpu: &ID3D11Texture2D, hdr: bool) -> Result<()> {
// Diagnostic kill-switch: skip the GPU cursor composite entirely (PUNKTFUNK_NO_CURSOR=1) to
// isolate its cost on the 3D engine. The per-frame render-target view + draw to the 5K target
// is the suspect for the high 3D usage under heavy desktop change.
if std::env::var_os("PUNKTFUNK_NO_CURSOR").is_some() {
return Ok(());
}
self.dbg_cursor += 1;
if self.dbg_cursor % 240 == 1 {
tracing::debug!(
@@ -1350,10 +1595,14 @@ impl DuplCapturer {
/// (like recreate_dupl) so a born-lost one is rejected rather than adopted.
unsafe fn try_reduplicate(&mut self) -> bool {
if self.holding_frame {
let _ = self.dupl.ReleaseFrame();
let _ = self.dupl.as_ref().map(|d| d.ReleaseFrame());
self.holding_frame = false;
}
let dupl = match self.output.DuplicateOutput(&self.device) {
// RELEASE the old duplication FIRST (drop it → frees the output) before re-duplicating. DXGI
// allows one duplication per output; leaving the stale one alive is exactly why DuplicateOutput1
// returned E_ACCESSDENIED and the legacy fallback produced a born-lost dup.
self.dupl = None;
let dupl = match duplicate_output(&self.output, &self.device) {
Ok(d) => d,
Err(_) => return false,
};
@@ -1361,10 +1610,15 @@ impl DuplCapturer {
// + CAPTURE the frame: a born-lost duplication returns ACCESS_LOST immediately; alive-but-idle
// waits the full 16ms. On a real frame we present it (so a static desktop keeps a real
// last_present instead of the discarded one); idle keeps the existing last_present.
self.dupl = dupl;
self.dupl = Some(dupl);
let mut info = DXGI_OUTDUPL_FRAME_INFO::default();
let mut res: Option<IDXGIResource> = None;
match self.dupl.AcquireNextFrame(16, &mut info, &mut res) {
match self
.dupl
.as_ref()
.unwrap()
.AcquireNextFrame(16, &mut info, &mut res)
{
Ok(()) => {
self.update_cursor(&info);
if let Some(r) = res {
@@ -1388,7 +1642,7 @@ impl DuplCapturer {
/// frame and retries on a throttle, so the session survives an arbitrarily long secure visit.
unsafe fn recreate_dupl(&mut self) -> Result<()> {
if self.holding_frame {
let _ = self.dupl.ReleaseFrame();
let _ = self.dupl.as_ref().map(|d| d.ReleaseFrame());
self.holding_frame = false;
}
// The SudoVDA output's GDI name can CHANGE across a secure-desktop topology rebuild —
@@ -1396,12 +1650,20 @@ impl DuplCapturer {
if let Some(n) = crate::vdisplay::sudovda::resolve_gdi_name(self.target_id) {
self.gdi_name = n;
}
// Re-sync the capture thread to the CURRENT input desktop on EVERY rebuild — symmetric for
// ENTERING and LEAVING the secure (Winlogon) desktop. This is the fix for "UAC/lock appears
// fine but breaks the instant you click out of it": leaving secure used to skip this (it was
// gated on is_secure_desktop()), stranding the thread on the gone Winlogon desktop. Cheap +
// leak-free (attach_input_desktop closes its handle). Apollo (syncThreadDesktop) does the same.
// We do NOT re-isolate the display on recovery: the CCD isolation from create_monitor is
// registry-persisted, and a CCD topology mutation here would itself invalidate the freshly-rebuilt
// duplication → a self-feeding ACCESS_LOST storm (200 rebuilds/session observed before this).
attach_input_desktop();
// Re-route the secure (Winlogon) desktop back to the virtual output. The lock/UAC switch can
// re-attach a physical monitor so the secure desktop lands there and our virtual output goes
// perpetually ACCESS_LOST; re-isolating (as a fresh session's `create` does) is the delta that
// makes in-session recovery work like a reconnect. Idempotent/cheap when already isolated.
crate::vdisplay::sudovda::reassert_isolation(&self.gdi_name);
// RELEASE the old duplication FIRST (frees the output). reopen_duplication creates a NEW device
// and re-DuplicateOutputs the output; if the stale duplication is still alive it holds the output
// and the new one is born-lost / E_ACCESSDENIED. (On reopen failure self.dupl stays None and
// acquire's None-guard re-drives recovery.)
self.dupl = None;
let (dev, ctx, out, dupl) = reopen_duplication(&self.gdi_name)?; // Err → caller repeats + retries
// (The born-lost guard is now the capture-acquire at the end: we adopt, then grab the current
@@ -1428,7 +1690,7 @@ impl DuplCapturer {
self.device = dev;
self.context = ctx;
self.output = out;
self.dupl = dupl;
self.dupl = Some(dupl);
self.gpu_copy = None; // stale: belonged to the old device
self.cursor = None; // shaders/textures belonged to the old device; rebuilt on demand
self.last_present = None; // belonged to the old device; reseeded below
@@ -1450,7 +1712,12 @@ impl DuplCapturer {
nudge_cursor_onto(&self.output); // kick a change so a static desktop yields its first frame
let mut info = DXGI_OUTDUPL_FRAME_INFO::default();
let mut res: Option<IDXGIResource> = None;
let captured = match self.dupl.AcquireNextFrame(120, &mut info, &mut res) {
let captured = match self
.dupl
.as_ref()
.unwrap()
.AcquireNextFrame(120, &mut info, &mut res)
{
Ok(()) => {
self.update_cursor(&info);
match res {
@@ -1481,13 +1748,21 @@ impl DuplCapturer {
tracing::warn!(error = %format!("{e:#}"), "seed black frame after recovery failed");
}
}
// Track the born-lost storm: a rebuild that grabbed a real frame clears it; one that came back
// born-lost (created OK, first AcquireNextFrame == ACCESS_LOST) advances it. `acquire` uses this
// to escape to a full pipeline cold-rebuild on the normal desktop instead of spinning forever.
if captured {
self.consecutive_born_lost = 0;
} else {
self.consecutive_born_lost = self.consecutive_born_lost.saturating_add(1);
}
Ok(())
}
/// Acquire one frame: `Some` on a fresh image, `None` on timeout (no change → caller reuses last).
unsafe fn acquire(&mut self) -> Result<Option<CapturedFrame>> {
if self.holding_frame {
let _ = self.dupl.ReleaseFrame();
let _ = self.dupl.as_ref().map(|d| d.ReleaseFrame());
self.holding_frame = false;
}
let mut info = DXGI_OUTDUPL_FRAME_INFO::default();
@@ -1497,18 +1772,27 @@ impl DuplCapturer {
} else {
self.timeout_ms
};
match self.dupl.AcquireNextFrame(timeout, &mut info, &mut res) {
// If a prior recovery released the old duplication but couldn't create a new one yet (output
// gone during a secure dwell, etc.), self.dupl is None — synthesize ACCESS_LOST so we flow into
// the recovery path below instead of panicking.
let acq = match self.dupl.as_ref() {
Some(d) => d.AcquireNextFrame(timeout, &mut info, &mut res),
None => Err(windows::core::Error::from_hresult(DXGI_ERROR_ACCESS_LOST)),
};
match acq {
Ok(()) => {
if self.first_frame {
tracing::info!(w = self.width, h = self.height, "DXGI first frame acquired");
self.first_frame = false;
}
self.consecutive_born_lost = 0; // a real frame breaks the born-lost storm
self.update_cursor(&info);
}
Err(e) if e.code() == DXGI_ERROR_WAIT_TIMEOUT => {
self.dbg_timeouts += 1;
if self.dbg_timeouts % 40 == 1 {
tracing::warn!(
// A static desktop produces no DDA frames, so timeouts are NORMAL idle, not an error.
tracing::debug!(
timeouts = self.dbg_timeouts,
first_frame = self.first_frame,
"DXGI AcquireNextFrame timeout (no desktop change yet)"
@@ -1516,6 +1800,20 @@ impl DuplCapturer {
}
return Ok(None);
}
// MODE_CHANGE_IN_PROGRESS (0x887A0025) is TRANSIENT by design ("the call may succeed at a
// later attempt") — the display topology is mid-settle (e.g. just after the IDD's mode is
// applied). Do NOT recover/rebuild: a rebuild re-issues create()→set_active_mode, re-touching
// the topology and PERPETUATING the change (the storm we measured). Just repeat the last frame
// and wait it out, like a timeout. Throttled log so a genuinely stuck change stays visible.
Err(e) if e.code() == DXGI_ERROR_MODE_CHANGE_IN_PROGRESS => {
self.dbg_timeouts += 1;
if self.dbg_timeouts % 120 == 1 {
tracing::warn!(
"DXGI mode change in progress (0x887A0025) — waiting for topology to settle"
);
}
return Ok(None);
}
// Recoverable losses, ALL handled by rebuilding the duplication (device + re-DuplicateOutput):
// ACCESS_LOST — desktop switch (normal <-> Winlogon secure: lock/login/UAC) or mode change
// INVALID_CALL — the secure->user-desktop switch (post-login) leaves the duplication in a
@@ -1547,29 +1845,103 @@ impl DuplCapturer {
"DXGI capture lost — recovering (cheap re-duplicate, full rebuild if output gone)"
);
}
// GENTLE recovery. On the secure (Winlogon) desktop the duplication dies on EVERY
// independent-flip; a tight re-duplicate loop tears the duplication down + brings it up
// hundreds of times/sec — that release/recreate cycle is the real kernel stress (and it
// stalls the send thread long enough that the client times out → "display disconnected").
// So instead of fighting it: cap recovery HARD and just repeat the last frame in between
// (no busy-spin, no per-flip teardown). The session stays alive across a secure dwell; the
// lock/UAC screen is frozen/laggy, then capture resumes cleanly when the desktop returns.
// Tunable: PUNKTFUNK_RECOVER_MS (cheap re-duplicate cadence, default 250) and
// PUNKTFUNK_REBUILD_MS (heavy new-device rebuild cadence, default 1500).
let recover_ms = std::env::var("PUNKTFUNK_RECOVER_MS")
.ok()
.and_then(|s| s.parse().ok())
.unwrap_or(250u64);
let now = Instant::now();
if self
.last_recover
.is_some_and(|t| now.duration_since(t) < Duration::from_millis(recover_ms))
{
return Ok(None); // repeat the last frame; do NOT tear down/recreate yet
}
self.last_recover = Some(now);
if !device_dead && self.try_reduplicate() {
// Cheap recovery succeeded; the next acquire gets frames on the same device.
// Cheap recovery succeeded (same device, no teardown of the device/monitor).
self.first_frame = true;
return Ok(None);
}
// Output gone / device dead → full rebuild (new device), throttled.
// Heavy full rebuild (new device) — the costliest teardown/recreate, so throttle it the
// hardest. Only when the cheap re-duplicate keeps failing (genuine output/device loss).
let rebuild_ms = std::env::var("PUNKTFUNK_REBUILD_MS")
.ok()
.and_then(|s| s.parse().ok())
.unwrap_or(1500u64);
let now = Instant::now();
let due = self.last_rebuild.map_or(true, |t| {
now.duration_since(t) >= Duration::from_millis(250)
});
let due = self
.last_rebuild
.is_none_or(|t| now.duration_since(t) >= Duration::from_millis(rebuild_ms));
if due {
self.last_rebuild = Some(now);
if self.recreate_dupl().is_ok() {
self.first_frame = true;
}
} else {
std::thread::sleep(Duration::from_millis(8));
}
// Born-lost rebuilds (created OK, instant ACCESS_LOST) used to escalate to a full pipeline
// cold-rebuild here — but that re-issued vd.create()→set_active_mode (an audible PnP
// add/remove chime + a fresh topology mode change), which never converged and amplified
// the storm. With the topology fix (set_active_mode no longer promotes the IDD to PRIMARY
// by default) the born-lost storm is gone at its source; if one ever recurs, just keep
// repeating the last frame in-process — never tear the IDD down mid-session (Apollo never
// does). Throttled visibility only.
if self.consecutive_born_lost > 0 && self.consecutive_born_lost % 40 == 1 {
tracing::warn!(
consecutive = self.consecutive_born_lost,
"DDA born-lost rebuilds — repeating last frame in-process (no teardown)"
);
}
return Ok(None);
}
Err(e) => return Err(e).context("AcquireNextFrame"),
}
let res = res.context("AcquireNextFrame: null resource")?;
// Detect a mode/format change on the hot path. The desktop can flip HDR<->SDR (FP16<->BGRA —
// e.g. the SudoVDA output dropping out of HDR for the secure desktop) or change resolution
// WITHOUT raising ACCESS_LOST; `hdr_fp16`/`width`/`height` would then be stale and
// `present_acquired` would CopyResource into a mismatched-format/size target — corruption, or
// the secure-desktop "works once, then HDR breaks" bug. Re-read the acquired texture's desc
// every frame (Apollo does this) and rebuild on a real change instead of presenting a
// mismatched frame. Throttled like the ACCESS_LOST path so a flapping toggle can't hammer
// DuplicateOutput.
if let Ok(tex) = res.cast::<ID3D11Texture2D>() {
let mut d = D3D11_TEXTURE2D_DESC::default();
tex.GetDesc(&mut d);
// Only a real SIZE change is reliably detectable here. Format/HDR is NOT: legacy
// DuplicateOutput always hands back an 8-bit BGRA surface regardless of the output's FP16
// scanout mode, so comparing the acquired-texture format against `hdr_fp16` (derived from
// the OUTDUPL ModeDesc) self-fires every frame → a rebuild storm. A genuine resolution
// change is caught here; a real HDR↔SDR toggle arrives as ACCESS_LOST → recreate_dupl
// re-detects it. (Genuine FP16 capture is a separate change: DuplicateOutput1.)
if d.Width != self.width || d.Height != self.height {
tracing::info!(
old = format!("{}x{}", self.width, self.height),
new = format!("{}x{}", d.Width, d.Height),
"DXGI capture size changed mid-stream — rebuilding"
);
let _ = self.dupl.as_ref().map(|d| d.ReleaseFrame());
let now = Instant::now();
let due = self
.last_rebuild
.is_none_or(|t| now.duration_since(t) >= Duration::from_millis(250));
if due {
self.last_rebuild = Some(now);
if self.recreate_dupl().is_ok() {
self.first_frame = true;
}
}
return Ok(None);
}
}
Ok(Some(self.present_acquired(res)?))
}
@@ -1590,7 +1962,7 @@ impl DuplCapturer {
self.ensure_fp16_src()?;
let src = self.fp16_src.clone().context("fp16 src texture")?;
self.context.CopyResource(&src, &tex);
let _ = self.dupl.ReleaseFrame();
let _ = self.dupl.as_ref().map(|d| d.ReleaseFrame());
self.holding_frame = false;
self.composite_cursor_gpu(&src, true)?; // onto the FP16 surface (HDR: decode + nits scale)
self.ensure_hdr10_out()?;
@@ -1628,7 +2000,7 @@ impl DuplCapturer {
self.ensure_gpu_copy()?;
let gpu = self.gpu_copy.clone().context("gpu copy texture")?;
self.context.CopyResource(&gpu, &tex);
let _ = self.dupl.ReleaseFrame();
let _ = self.dupl.as_ref().map(|d| d.ReleaseFrame());
self.holding_frame = false;
self.composite_cursor_gpu(&gpu, false)?;
self.last_present = Some((gpu.clone(), PixelFormat::Bgra));
@@ -1655,7 +2027,7 @@ impl DuplCapturer {
let src = std::slice::from_raw_parts(map.pData as *const u8, pitch * h);
let mut tight = depad_bgra(src, pitch, w, h);
self.context.Unmap(&staging, 0);
let _ = self.dupl.ReleaseFrame();
let _ = self.dupl.as_ref().map(|d| d.ReleaseFrame());
self.holding_frame = false;
if self.cursor_visible {
if let Some(shape) = &self.cursor_shape {
@@ -1770,9 +2142,13 @@ impl Drop for DuplCapturer {
fn drop(&mut self) {
if self.holding_frame {
unsafe {
let _ = self.dupl.ReleaseFrame();
let _ = self.dupl.as_ref().map(|d| d.ReleaseFrame());
}
}
// Release the display/system-required execution state we took at open().
unsafe {
SetThreadExecutionState(ES_CONTINUOUS);
}
// _keepalive drops after, REMOVEing the SudoVDA monitor.
}
}
+1 -2
View File
@@ -20,7 +20,7 @@ use super::dxgi::{
find_output, make_device, nudge_cursor_onto, D3d11Frame, HdrConverter, WinCaptureTarget,
};
use super::{CapturedFrame, Capturer, FramePayload, PixelFormat};
use anyhow::{anyhow, bail, Context, Result};
use anyhow::{bail, Context, Result};
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::{Arc, Condvar, Mutex};
use std::time::{Duration, Instant};
@@ -30,7 +30,6 @@ use windows::Graphics::Capture::{
Direct3D11CaptureFrame, Direct3D11CaptureFramePool, GraphicsCaptureItem, GraphicsCaptureSession,
};
use windows::Graphics::DirectX::DirectXPixelFormat;
use windows::Graphics::SizeInt32;
use windows::Win32::Foundation::{CloseHandle, HANDLE};
use windows::Win32::Graphics::Direct3D11::{
ID3D11Device, ID3D11DeviceContext, ID3D11RenderTargetView, ID3D11ShaderResourceView,
+11 -9
View File
@@ -15,7 +15,7 @@
use crate::capture::dxgi::WinCaptureTarget;
use anyhow::{bail, Context, Result};
use std::io::{BufRead, BufReader, Read, Write};
use std::io::{BufRead, BufReader, Read};
use std::sync::mpsc::{Receiver, SyncSender};
use std::sync::Mutex;
use windows::core::PWSTR;
@@ -152,11 +152,12 @@ unsafe fn no_inherit(h: HANDLE) {
let _ = SetHandleInformation(h, HANDLE_FLAG_INHERIT.0, HANDLE_FLAGS(0));
}
/// Build the helper's environment block: the user's block (so DLL/PATH/SystemRoot resolve) with this
/// (host) process's `PUNKTFUNK_*` vars overlaid, so the helper encodes with the SAME settings the
/// host runs with (`PUNKTFUNK_ENCODER=nvenc`, `PUNKTFUNK_ZEROCOPY`, …) instead of the user shell's.
/// Returns a UTF-16, double-null-terminated block suitable for `CREATE_UNICODE_ENVIRONMENT`.
unsafe fn merged_env_block(user_block: *const u16) -> Vec<u16> {
/// Build a child environment block: the target session's block (so DLL/PATH/SystemRoot resolve) with
/// this process's `PUNKTFUNK_*` vars overlaid, so the child runs with the SAME settings this process
/// has (`PUNKTFUNK_ENCODER=nvenc`, `PUNKTFUNK_ZEROCOPY`, …) instead of the target shell's. Returns a
/// UTF-16, double-null-terminated block suitable for `CREATE_UNICODE_ENVIRONMENT`. Shared by the WGC
/// helper spawn (here) and the Windows service launching the host into the active session.
pub(crate) unsafe fn merged_env_block(user_block: *const u16) -> Vec<u16> {
// Parse the user block ("VAR=VALUE\0" … "\0") into entries.
let mut entries: Vec<String> = Vec::new();
if !user_block.is_null() {
@@ -174,9 +175,10 @@ unsafe fn merged_env_block(user_block: *const u16) -> Vec<u16> {
p = p.offset(len + 1);
}
}
// Drop any PUNKTFUNK_* the user block carried, then overlay this process's PUNKTFUNK_* vars.
entries.retain(|e| !e.split('=').next().unwrap_or("").starts_with("PUNKTFUNK_"));
for (k, v) in std::env::vars().filter(|(k, _)| k.starts_with("PUNKTFUNK_")) {
// Overlay "our" settings — PUNKTFUNK_* and RUST_LOG — dropping whatever the target block had.
let is_ours = |k: &str| k.starts_with("PUNKTFUNK_") || k == "RUST_LOG";
entries.retain(|e| !is_ours(e.split('=').next().unwrap_or("")));
for (k, v) in std::env::vars().filter(|(k, _)| is_ours(k)) {
entries.push(format!("{k}={v}"));
}
// Serialize back to a UTF-16 double-null-terminated block.
+15 -3
View File
@@ -201,13 +201,25 @@ pub fn serve(mgmt: crate::mgmt::Options, native: Option<crate::m3::NativeServe>)
})
}
/// `~/.config/punktfunk`, created on demand — host identity + (later) pairing state live here.
/// The host config dir (host identity, pairing state, mgmt token, library) — created on demand.
/// Linux: `$XDG_CONFIG_HOME/punktfunk` or `~/.config/punktfunk`. Windows: `%ProgramData%\punktfunk`
/// (machine-wide — the SYSTEM service and the interactive user share ONE dir that survives logout).
/// `PUNKTFUNK_CONFIG_DIR` overrides on both platforms (used by the Windows service config / tests).
pub(crate) fn config_dir() -> PathBuf {
if let Some(dir) = std::env::var_os("PUNKTFUNK_CONFIG_DIR").filter(|s| !s.is_empty()) {
return PathBuf::from(dir);
}
// Windows: %ProgramData% (e.g. C:\ProgramData\punktfunk) — machine-wide, SYSTEM-readable,
// persists across user logout, correct for a SYSTEM service. Falls back to %APPDATA% then CWD.
#[cfg(target_os = "windows")]
let base = std::env::var_os("ProgramData")
.or_else(|| std::env::var_os("APPDATA"))
.map(PathBuf::from)
.unwrap_or_else(|| PathBuf::from("."));
#[cfg(not(target_os = "windows"))]
let base = std::env::var_os("XDG_CONFIG_HOME")
.map(PathBuf::from)
.or_else(|| std::env::var_os("HOME").map(|h| PathBuf::from(h).join(".config")))
// Windows: %APPDATA% (e.g. C:\Users\X\AppData\Roaming) — cert/key/paired/uniqueid persist there.
.or_else(|| std::env::var_os("APPDATA").map(PathBuf::from))
.unwrap_or_else(|| PathBuf::from("."));
base.join("punktfunk")
}
+85 -15
View File
@@ -2026,6 +2026,19 @@ fn virtual_stream(
let (mut capturer, mut enc, mut frame, mut interval) =
build_pipeline_with_retry(&mut vd, mode, bitrate_kbps, bit_depth)?;
// Windows single-process DDA path (PUNKTFUNK_NO_WGC=1): the SudoVDA virtual display, isolated as the
// SOLE active output, goes into fullscreen independent-flip (one plane on one display) which Desktop
// Duplication cannot capture → the born-lost ACCESS_LOST storm we measured on the RTX4090+iGPU box
// (hook verified-firing, DPI=2, yet 100% DuplicateOutput1 E_ACCESSDENIED + born-lost). A tiny topmost
// layered overlay disqualifies independent-flip and forces DWM composition, which DDA CAN capture.
// (Apollo never hits this because it runs WITH a physical monitor attached — multi-display is already
// DWM-composited; we isolate to sole-display, so we must force composition ourselves.) Unlike the WGC
// relay path — where WGC owns the normal desktop and the overlay is secure-only — here DDA owns the
// normal desktop too, so it must run unconditionally. Held for the session; Drop tears it down.
// Best-effort; disable with PUNKTFUNK_FORCE_COMPOSED=0.
#[cfg(target_os = "windows")]
let _composed_flip = crate::capture::composed_flip::ForceComposedFlip::start();
let perf = std::env::var("PUNKTFUNK_PERF").is_ok();
// Microburst cap (applied in send_loop/paced_submit): a frame ≤ this bursts out immediately;
// only a bigger frame's overflow is spread. PUNKTFUNK_PACE_BURST_KB overrides the 128 KB default.
@@ -2266,10 +2279,12 @@ fn virtual_stream(
/// Should this host take the two-process (SYSTEM host + user-session WGC helper) path? Yes when it's
/// running as SYSTEM — the only account that can capture the secure desktop + drive SendInput on it,
/// and the account under which in-process WGC won't activate. `PUNKTFUNK_FORCE_HELPER` forces it on
/// (for testing the relay as a normal user); `PUNKTFUNK_NO_HELPER` forces it off.
/// (for testing the relay as a normal user); `PUNKTFUNK_NO_HELPER` forces it off. `PUNKTFUNK_NO_WGC`
/// also forces it off — that mode runs pure single-process DDA (one capturer for the normal AND secure
/// desktop, Apollo-style), which has no WGC helper to relay.
#[cfg(target_os = "windows")]
fn should_use_helper() -> bool {
if std::env::var_os("PUNKTFUNK_NO_HELPER").is_some() {
if std::env::var_os("PUNKTFUNK_NO_HELPER").is_some() || crate::capture::wgc_disabled() {
return false;
}
std::env::var_os("PUNKTFUNK_FORCE_HELPER").is_some()
@@ -2329,6 +2344,20 @@ fn virtual_stream_relay(
let target = vout.win_capture.clone().ok_or_else(|| {
anyhow!("SudoVDA target not yet an active display (needs a WDDM GPU to activate it)")
})?;
// Force the SudoVDA's advanced-color (HDR) state to MATCH the session bit depth BEFORE the WGC
// helper captures it. The advanced-color state PERSISTS on the monitor across sessions, so an
// 8-bit (SDR) session could otherwise inherit HDR left on by a prior 10-bit run (or our own
// earlier toggle) → the helper captures HDR FP16 while the encoder is 8-bit SDR → broken image.
// Runs on every build (initial + mode-switch + return-from-secure rebuild), keeping WGC's format
// consistent with the encoder. (HDR independent-flip on the secure desktop is handled separately
// by dropping to SDR for the DDA leg.)
#[cfg(target_os = "windows")]
unsafe {
if crate::vdisplay::sudovda::set_advanced_color(target.target_id, bit_depth >= 10) {
// Let the colorspace change settle before WGC creates its capture item / detects HDR.
std::thread::sleep(std::time::Duration::from_millis(250));
}
}
let relay = HelperRelay::spawn(
&target,
(mode.width, mode.height, effective_hz),
@@ -2526,24 +2555,65 @@ fn virtual_stream_relay(
"two-process: source switch"
);
if secure {
if dda.is_none() {
match open_dda(&target, cur_mode.width, cur_mode.height, effective_hz) {
Ok(p) => dda = Some(p),
Err(e) => {
tracing::error!(error = %format!("{e:#}"),
"two-process: DDA open failed — secure desktop will freeze on last frame");
}
// SDR-while-secure (HDR sessions ONLY): drop the SudoVDA out of HDR so the secure
// (Winlogon) desktop renders SDR/composed — HDR fullscreen independent-flip is what made
// DDA storm ACCESS_LOST (black). For an SDR (8-bit) session the output is already SDR, so
// toggling is a needless topology change AND its matching restore on the way back would
// force the desktop into HDR the 8-bit encoder can't take (broken image).
if bit_depth >= 10 {
let toggled = unsafe {
crate::vdisplay::sudovda::set_advanced_color(target.target_id, false)
};
if toggled {
std::thread::sleep(std::time::Duration::from_millis(250));
}
}
if let Some(d) = dda.as_mut() {
d.enc.request_keyframe();
dda = None; // reopen so we capture the (SDR) output
match open_dda(&target, cur_mode.width, cur_mode.height, effective_hz) {
Ok(mut p) => {
p.enc.request_keyframe();
dda = Some(p);
}
Err(e) => {
tracing::error!(error = %format!("{e:#}"),
"two-process: DDA open failed — secure desktop will freeze on last frame");
}
}
next = std::time::Instant::now();
} else {
// Returning to the helper: drain stale buffered AUs (encoded while we ignored it) and
// force a fresh IDR; await_idr then skips the stale deltas until that IDR arrives.
while relay.try_recv().is_ok() {}
relay.request_keyframe();
// Returning to the normal desktop: RESUME from the still-alive WGC helper. Do NOT
// recreate the SudoVDA monitor or respawn the helper — build()'s vd.create() is an
// IOCTL_REMOVE+ADD of the monitor (the audible disconnect/connect chime + the
// teardown/recreate kernel stress that broke DDA, now applied to the mux). The monitor +
// helper persist for the WHOLE session; only the host-DDA leg opens (secure) and closes
// (normal). Apply the DDA learning here: reuse, don't tear down.
dda = None; // free the secure DDA encoder; the relay (helper) is the source again
while relay.try_recv().is_ok() {} // drop secure-dwell backlog
relay.request_keyframe(); // client decoder resumes on the helper's next IDR
if bit_depth >= 10 {
// HDR session ONLY: the secure switch dropped the SudoVDA to SDR for the DDA leg, so
// here we must restore HDR AND rebuild the helper so WGC re-detects the HDR
// colorspace. An SDR session never changed the colorspace → no rebuild, no recreate.
unsafe {
crate::vdisplay::sudovda::set_advanced_color(target.target_id, true);
}
match build(&mut vd, cur_mode) {
Ok((ka, rl, tg, hz)) => {
relay = rl;
_keepalive = ka;
target = tg;
effective_hz = hz;
interval = std::time::Duration::from_secs_f64(1.0 / hz.max(1) as f64);
}
Err(e) => {
tracing::error!(error = %format!("{e:#}"),
"two-process: helper rebuild on secure-exit failed");
while relay.try_recv().is_ok() {}
relay.request_keyframe();
}
}
}
next = std::time::Instant::now();
}
}
if want_kf {
+44 -7
View File
@@ -31,6 +31,8 @@ mod mgmt_token;
mod native_pairing;
mod pipeline;
mod pwinit;
#[cfg(target_os = "windows")]
mod service;
mod vdisplay;
#[cfg(target_os = "windows")]
mod wgc_helper;
@@ -43,13 +45,28 @@ use m0::{Options, Source};
use std::path::PathBuf;
fn main() {
// Logs go to stderr so stdout stays machine-readable (`punktfunk-host openapi > spec.json`).
tracing_subscriber::fmt()
.with_env_filter(
tracing_subscriber::EnvFilter::try_from_default_env().unwrap_or_else(|_| "info".into()),
)
.with_writer(std::io::stderr)
.init();
let filter =
tracing_subscriber::EnvFilter::try_from_default_env().unwrap_or_else(|_| "info".into());
// `service run` is launched by the SCM with no console — log to a file instead of stderr.
#[cfg(target_os = "windows")]
let service_run = {
let a: Vec<String> = std::env::args().skip(1).take(2).collect();
a.first().map(String::as_str) == Some("service")
&& a.get(1).map(String::as_str) == Some("run")
};
#[cfg(not(target_os = "windows"))]
let service_run = false;
if service_run {
#[cfg(target_os = "windows")]
service::init_file_logging(filter);
} else {
// Logs go to stderr so stdout stays machine-readable (`punktfunk-host openapi > spec.json`).
tracing_subscriber::fmt()
.with_env_filter(filter)
.with_writer(std::io::stderr)
.init();
}
if let Err(e) = real_main() {
tracing::error!("{e:#}");
@@ -75,6 +92,13 @@ fn real_main() -> Result<()> {
punktfunk_core::ABI_VERSION
);
// Install Apollo's win32u GPU-preference hook BEFORE anything touches DXGI (the SudoVDA
// render-adapter selection creates a DXGI factory during virtual-display setup, well before
// capture). On a hybrid-GPU box this stops DXGI from reparenting the virtual output off the
// capture GPU — the ACCESS_LOST churn fix. Idempotent (Once); harmless on non-hybrid boxes.
#[cfg(target_os = "windows")]
crate::capture::dxgi::install_gpu_pref_hook();
match args.first().map(String::as_str) {
// GameStream host control plane (P1.1: mDNS + serverinfo) + management API, and (with
// --native) the native punktfunk/1 host in the same process — the unified host.
@@ -226,6 +250,11 @@ fn real_main() -> Result<()> {
bit_depth: get("--bit-depth").and_then(|s| s.parse().ok()).unwrap_or(8),
})
}
// Windows service control: install/uninstall/start/stop/status + the SCM `run` entry point.
// Replaces the ad-hoc launch chain — `service install` registers an auto-start SYSTEM service
// that launches the host into the active interactive session.
#[cfg(target_os = "windows")]
Some("service") => service::main(&args[1..]),
Some("-h") | Some("--help") | Some("help") | None => {
print_usage();
Ok(())
@@ -508,4 +537,12 @@ NOTES:
Both 'serve --native' and 'm3-host' advertise the native service over mDNS
(_punktfunk._udp) for client auto-discovery — 'punktfunk-client-rs --discover' lists them."
);
#[cfg(target_os = "windows")]
eprintln!(
"\nWINDOWS SERVICE (end-user deployment — replaces a manual launch):\n\
\x20 punktfunk-host service install register an auto-start SYSTEM service + firewall rules\n\
\x20 punktfunk-host service uninstall remove the service + firewall rules\n\
\x20 punktfunk-host service start|stop|status\n\
\x20 config: %ProgramData%\\punktfunk\\host.env"
);
}
+702
View File
@@ -0,0 +1,702 @@
//! Windows service: a SYSTEM supervisor that launches the streaming host into the **active
//! interactive console session** and keeps it tracking session switches — the end-user replacement
//! for the ad-hoc PsExec / VBS / scheduled-task launch chain used during bring-up.
//!
//! Why a supervisor and not just "run the host as a service": the host must run **as SYSTEM in the
//! interactive session** (session 1+). Desktop Duplication of the secure (Winlogon/UAC/lock) desktop
//! and `SendInput` both need SYSTEM; capture and injection both need the *interactive* session, which
//! a plain session-0 service is not in. So this service (itself in session 0) never captures — it
//! duplicates its own LocalSystem token, retargets it to the active console session, and
//! `CreateProcessAsUserW`s the host there. This is the Sunshine/Apollo model. The host in turn spawns
//! the WGC helper into the *user* session (see `capture::wgc_relay`) — two nested launches.
//!
//! Subcommands (Windows only):
//! ```text
//! punktfunk-host service run SCM entry point (registered as binPath; not run by hand)
//! punktfunk-host service install register an auto-start LocalSystem service + firewall rules
//! punktfunk-host service uninstall stop + delete the service + remove firewall rules
//! punktfunk-host service start|stop|status convenience wrappers over the SCM
//! ```
//! Config lives in `%ProgramData%\punktfunk\host.env` (the Windows analogue of `scripts/host.env`),
//! loaded into the service's environment and carried to the host child. Logs land in
//! `%ProgramData%\punktfunk\logs\`.
use anyhow::{bail, Context, Result};
use std::ffi::{c_void, OsString};
use std::path::PathBuf;
use std::sync::atomic::{AtomicIsize, Ordering};
use std::time::Duration;
use windows::core::{PCWSTR, PWSTR};
use windows::Win32::Foundation::{CloseHandle, HANDLE, WAIT_OBJECT_0};
use windows::Win32::Security::{
DuplicateTokenEx, SecurityImpersonation, SetTokenInformation, TokenPrimary, TokenSessionId,
SECURITY_ATTRIBUTES, TOKEN_ADJUST_DEFAULT, TOKEN_ADJUST_SESSIONID, TOKEN_ALL_ACCESS,
TOKEN_ASSIGN_PRIMARY, TOKEN_DUPLICATE, TOKEN_QUERY,
};
use windows::Win32::Storage::FileSystem::{
CreateFileW, FILE_APPEND_DATA, FILE_GENERIC_WRITE, FILE_SHARE_READ, FILE_SHARE_WRITE,
FILE_WRITE_DATA, OPEN_ALWAYS,
};
use windows::Win32::System::Environment::{CreateEnvironmentBlock, DestroyEnvironmentBlock};
use windows::Win32::System::JobObjects::{
AssignProcessToJobObject, CreateJobObjectW, JobObjectExtendedLimitInformation,
SetInformationJobObject, JOBOBJECT_EXTENDED_LIMIT_INFORMATION, JOB_OBJECT_LIMIT_BREAKAWAY_OK,
JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE,
};
use windows::Win32::System::RemoteDesktop::WTSGetActiveConsoleSessionId;
use windows::Win32::System::Threading::{
CreateEventW, CreateProcessAsUserW, GetCurrentProcess, OpenProcessToken, ResetEvent, SetEvent,
TerminateProcess, WaitForMultipleObjects, CREATE_NO_WINDOW, CREATE_UNICODE_ENVIRONMENT,
INFINITE, PROCESS_INFORMATION, STARTF_USESTDHANDLES, STARTUPINFOW,
};
/// SCM service name (the key under HKLM\SYSTEM\CurrentControlSet\Services). Stable identity.
const SERVICE_NAME: &str = "PunktfunkHost";
const SERVICE_DISPLAY: &str = "punktfunk streaming host";
const SERVICE_DESCRIPTION: &str =
"Low-latency desktop/game streaming host. Launches the punktfunk host into the active session.";
/// The host subcommand the service launches, overridable via `PUNKTFUNK_HOST_CMD` in host.env.
/// `serve --native` runs the GameStream (Moonlight) host + the native punktfunk/1 QUIC host in one
/// process — the unified host an end user wants.
const DEFAULT_HOST_CMD: &str = "serve --native";
/// Event handles shared between the SCM control handler (which signals them) and the supervision loop
/// (which waits on them). Stored as raw `isize` so the `'static + Send` handler can reach them without
/// a non-`Send` `HANDLE` capture. Set once in `run_service`.
static STOP_EVENT: AtomicIsize = AtomicIsize::new(0);
static SESSION_EVENT: AtomicIsize = AtomicIsize::new(0);
fn load_event(a: &AtomicIsize) -> HANDLE {
HANDLE(a.load(Ordering::Relaxed) as *mut c_void)
}
/// Dispatch `service <sub>`.
pub fn main(args: &[String]) -> Result<()> {
match args.first().map(String::as_str) {
Some("run") => run(),
Some("install") => install(),
Some("uninstall") => uninstall(),
Some("start") => sc(&["start", SERVICE_NAME]),
Some("stop") => sc(&["stop", SERVICE_NAME]),
Some("status") => sc(&["query", SERVICE_NAME]),
_ => {
eprintln!(
"punktfunk-host service — Windows service control\n\n\
USAGE:\n\
\x20 punktfunk-host service install register the auto-start service + firewall rules\n\
\x20 punktfunk-host service uninstall stop + remove the service + firewall rules\n\
\x20 punktfunk-host service start start the service now\n\
\x20 punktfunk-host service stop stop the service\n\
\x20 punktfunk-host service status query the service\n\n\
Config: %ProgramData%\\punktfunk\\host.env Logs: %ProgramData%\\punktfunk\\logs\\"
);
Ok(())
}
}
}
// ── Logging ─────────────────────────────────────────────────────────────────────────────────────
/// `%ProgramData%\punktfunk\logs\service.log` — the service's own (supervision) log. The host child's
/// stdout/stderr are redirected to `host.log` in the same dir.
pub fn service_log_path() -> PathBuf {
let dir = crate::gamestream::config_dir().join("logs");
let _ = std::fs::create_dir_all(&dir);
dir.join("service.log")
}
fn host_log_path() -> PathBuf {
let dir = crate::gamestream::config_dir().join("logs");
let _ = std::fs::create_dir_all(&dir);
dir.join("host.log")
}
/// Initialise tracing to the service log file (the SCM gives the service no console/stderr). Falls
/// back to stderr if the file can't be opened. Called from `main()` only for `service run`.
pub fn init_file_logging(filter: tracing_subscriber::EnvFilter) {
match std::fs::OpenOptions::new()
.create(true)
.append(true)
.open(service_log_path())
{
Ok(file) => {
tracing_subscriber::fmt()
.with_env_filter(filter)
.with_ansi(false)
.with_writer(move || file.try_clone().expect("clone service log handle"))
.init();
}
Err(_) => {
tracing_subscriber::fmt()
.with_env_filter(filter)
.with_writer(std::io::stderr)
.init();
}
}
}
// ── host.env config ─────────────────────────────────────────────────────────────────────────────
fn host_env_path() -> PathBuf {
crate::gamestream::config_dir().join("host.env")
}
/// Load `%ProgramData%\punktfunk\host.env` (KEY=VALUE lines, `#` comments) into this process's
/// environment, so the host child inherits `PUNKTFUNK_*` / `RUST_LOG` via the merged env block.
fn load_host_env() {
let path = host_env_path();
let Ok(contents) = std::fs::read_to_string(&path) else {
tracing::info!(path = %path.display(), "no host.env (using defaults)");
return;
};
let mut n = 0;
for line in contents.lines() {
let line = line.trim();
if line.is_empty() || line.starts_with('#') {
continue;
}
if let Some((k, v)) = line.split_once('=') {
let (k, v) = (k.trim(), v.trim().trim_matches('"'));
if !k.is_empty() {
std::env::set_var(k, v);
n += 1;
}
}
}
tracing::info!(path = %path.display(), vars = n, "loaded host.env");
}
// ── service run (SCM entry point) ────────────────────────────────────────────────────────────────
windows_service::define_windows_service!(ffi_service_main, service_main);
fn run() -> Result<()> {
// Blocks until the service stops; the SCM then calls `service_main` on its own thread.
windows_service::service_dispatcher::start(SERVICE_NAME, ffi_service_main).map_err(|e| {
anyhow::anyhow!(
"service_dispatcher failed ({e}). `service run` is launched by the Service Control \
Manager, not by hand — use `punktfunk-host service install` then `service start`."
)
})
}
fn service_main(_args: Vec<OsString>) {
if let Err(e) = run_service() {
tracing::error!("service exited with error: {e:#}");
}
}
fn run_service() -> Result<()> {
use windows_service::service::{
ServiceControl, ServiceControlAccept, ServiceExitCode, ServiceState, ServiceStatus,
ServiceType,
};
use windows_service::service_control_handler::{self, ServiceControlHandlerResult};
// Two manual-reset events: STOP (set once, never reset) and SESSION (set on a console
// connect/disconnect, reset by the supervisor after it reacts).
let stop =
unsafe { CreateEventW(None, true, false, PCWSTR::null()) }.context("CreateEvent stop")?;
let session = unsafe { CreateEventW(None, true, false, PCWSTR::null()) }
.context("CreateEvent session")?;
STOP_EVENT.store(stop.0 as isize, Ordering::Relaxed);
SESSION_EVENT.store(session.0 as isize, Ordering::Relaxed);
// The control handler captures nothing — it reaches the events through the statics, so it stays
// `Fn + Send + 'static`. Session lock/unlock are handled inside the host (DesktopWatcher), so we
// only flag console connect/disconnect/logon — the events that change the active session.
let handler = move |control| -> ServiceControlHandlerResult {
match control {
ServiceControl::Stop | ServiceControl::Preshutdown | ServiceControl::Shutdown => {
unsafe { SetEvent(load_event(&STOP_EVENT)) }.ok();
ServiceControlHandlerResult::NoError
}
ServiceControl::SessionChange(param) => {
use windows_service::service::SessionChangeReason::*;
if matches!(
param.reason,
ConsoleConnect | ConsoleDisconnect | SessionLogon
) {
unsafe { SetEvent(load_event(&SESSION_EVENT)) }.ok();
}
ServiceControlHandlerResult::NoError
}
ServiceControl::Interrogate => ServiceControlHandlerResult::NoError,
_ => ServiceControlHandlerResult::NotImplemented,
}
};
let status_handle = service_control_handler::register(SERVICE_NAME, handler)
.context("register service control handler")?;
let accepted = ServiceControlAccept::STOP
| ServiceControlAccept::PRESHUTDOWN
| ServiceControlAccept::SESSION_CHANGE;
let running = ServiceStatus {
service_type: ServiceType::OWN_PROCESS,
current_state: ServiceState::Running,
controls_accepted: accepted,
exit_code: ServiceExitCode::Win32(0),
checkpoint: 0,
wait_hint: Duration::default(),
process_id: None,
};
status_handle
.set_service_status(running.clone())
.context("set RUNNING")?;
tracing::info!("punktfunk service started — supervising host in the active console session");
load_host_env();
let result = supervise(stop, session);
// Report STOPPED regardless of how supervise returned.
let _ = status_handle.set_service_status(ServiceStatus {
current_state: ServiceState::Stopped,
controls_accepted: ServiceControlAccept::empty(),
..running
});
unsafe {
let _ = CloseHandle(stop);
let _ = CloseHandle(session);
}
result
}
/// The supervision loop: (re)launch the host into the active console session and wait on
/// [stop, session-change, child-exit], relaunching on child exit and on a console-session switch.
fn supervise(stop: HANDLE, session_ev: HANDLE) -> Result<()> {
let exe = std::env::current_exe().context("current_exe")?;
let host_cmd = std::env::var("PUNKTFUNK_HOST_CMD").unwrap_or_else(|_| DEFAULT_HOST_CMD.into());
let cmdline = format!("\"{}\" {host_cmd}", exe.to_string_lossy());
let workdir: Vec<u16> = exe
.parent()
.map(|p| p.to_string_lossy().into_owned())
.unwrap_or_default()
.encode_utf16()
.chain(std::iter::once(0))
.collect();
// Kill-on-close job so a service crash never orphans the SYSTEM host; BREAKAWAY_OK lets the host
// still spawn the WGC helper.
let job = unsafe { make_job() }.context("create job object")?;
let mut restarts: u32 = 0;
loop {
if wait_one(stop, 0) {
break;
}
let session = unsafe { WTSGetActiveConsoleSessionId() };
if session == 0xFFFF_FFFF {
// No interactive session yet (boot / fully logged out). Wait, but wake on stop/session.
tracing::info!("no active console session — waiting");
if wait_any(&[stop, session_ev], 3000) == Some(0) {
break;
}
unsafe { ResetEvent(session_ev) }.ok();
continue;
}
let pi = match unsafe { spawn_host(session, &cmdline, &workdir, job) } {
Ok(pi) => pi,
Err(e) => {
tracing::error!("failed to launch host into session {session}: {e:#}");
if wait_one(stop, 3000) {
break;
}
continue;
}
};
tracing::info!(pid = pi.dwProcessId, session, cmd = %host_cmd, "host launched");
// Wait on stop / session-change / child-exit.
let reason = wait_any(&[stop, session_ev, pi.hProcess], INFINITE);
match reason {
Some(0) => {
// Stop: terminate the child and exit.
unsafe {
let _ = TerminateProcess(pi.hProcess, 0);
let _ = CloseHandle(pi.hProcess);
let _ = CloseHandle(pi.hThread);
}
break;
}
Some(1) => {
// Session change: relaunch only if the active console session actually moved.
unsafe { ResetEvent(session_ev) }.ok();
let now = unsafe { WTSGetActiveConsoleSessionId() };
if now != session {
tracing::info!(
old = session,
new = now,
"console session changed — relaunching host"
);
unsafe {
let _ = TerminateProcess(pi.hProcess, 0);
let _ = CloseHandle(pi.hProcess);
let _ = CloseHandle(pi.hThread);
}
restarts = 0;
continue;
}
// Same session (e.g. a stray notification) — keep waiting on the same child.
let r = wait_any(&[stop, pi.hProcess], INFINITE);
unsafe {
let _ = TerminateProcess(pi.hProcess, 0);
let _ = CloseHandle(pi.hProcess);
let _ = CloseHandle(pi.hThread);
}
if r == Some(0) {
break;
}
// child exited → fall through to relaunch
}
_ => {
// Child exited on its own — relaunch (with a small crash-loop backoff).
tracing::warn!("host process exited — relaunching");
unsafe {
let _ = CloseHandle(pi.hProcess);
let _ = CloseHandle(pi.hThread);
}
}
}
restarts += 1;
let backoff = restarts.min(10) * 500; // 0.5s..5s
if wait_one(stop, backoff) {
break;
}
}
unsafe {
// Dropping the job (KILL_ON_JOB_CLOSE) reaps any straggler in it.
let _ = CloseHandle(job);
}
tracing::info!("supervision loop ended");
Ok(())
}
/// `true` if `h` is signalled within `ms`.
fn wait_one(h: HANDLE, ms: u32) -> bool {
unsafe { WaitForMultipleObjects(&[h], false, ms) == WAIT_OBJECT_0 }
}
/// Wait on several handles; returns the index of the first signalled, or `None` on timeout.
fn wait_any(handles: &[HANDLE], ms: u32) -> Option<usize> {
let r = unsafe { WaitForMultipleObjects(handles, false, ms) };
let idx = r.0.wrapping_sub(WAIT_OBJECT_0.0);
(idx < handles.len() as u32).then_some(idx as usize)
}
/// A kill-on-close + breakaway-ok job object.
unsafe fn make_job() -> Result<HANDLE> {
let job = CreateJobObjectW(None, PCWSTR::null()).context("CreateJobObjectW")?;
let mut info = JOBOBJECT_EXTENDED_LIMIT_INFORMATION::default();
info.BasicLimitInformation.LimitFlags =
JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE | JOB_OBJECT_LIMIT_BREAKAWAY_OK;
SetInformationJobObject(
job,
JobObjectExtendedLimitInformation,
&info as *const _ as *const c_void,
std::mem::size_of::<JOBOBJECT_EXTENDED_LIMIT_INFORMATION>() as u32,
)
.context("SetInformationJobObject")?;
Ok(job)
}
/// Launch the host as SYSTEM into `session_id`'s interactive desktop. Returns the child handles.
unsafe fn spawn_host(
session_id: u32,
cmdline: &str,
workdir: &[u16],
job: HANDLE,
) -> Result<PROCESS_INFORMATION> {
// 1) A primary SYSTEM token retargeted to the active console session: duplicate THIS process's
// (LocalSystem) token, then set its session id. SYSTEM holds SE_TCB so SetTokenInformation
// (TokenSessionId) is permitted.
let mut proc_token = HANDLE::default();
OpenProcessToken(
GetCurrentProcess(),
TOKEN_DUPLICATE
| TOKEN_QUERY
| TOKEN_ASSIGN_PRIMARY
| TOKEN_ADJUST_DEFAULT
| TOKEN_ADJUST_SESSIONID,
&mut proc_token,
)
.context("OpenProcessToken (service must run as SYSTEM)")?;
let mut primary = HANDLE::default();
let dup = DuplicateTokenEx(
proc_token,
TOKEN_ALL_ACCESS,
None,
SecurityImpersonation,
TokenPrimary,
&mut primary,
);
let _ = CloseHandle(proc_token);
dup.context("DuplicateTokenEx(TokenPrimary)")?;
SetTokenInformation(
primary,
TokenSessionId,
&session_id as *const u32 as *const c_void,
std::mem::size_of::<u32>() as u32,
)
.context("SetTokenInformation(TokenSessionId)")?;
// 2) The session's environment block, merged with this process's PUNKTFUNK_*/RUST_LOG (so the
// host runs with host.env's settings, not a bare block). Same merge the WGC helper uses.
let mut env_block: *mut c_void = std::ptr::null_mut();
let _ = CreateEnvironmentBlock(&mut env_block, Some(primary), false);
let merged = crate::capture::wgc_relay::merged_env_block(env_block as *const u16);
if !env_block.is_null() {
let _ = DestroyEnvironmentBlock(env_block);
}
// 3) Redirect the host's stdout+stderr to host.log (inheritable handle).
let log = open_log_handle(&host_log_path())?;
let mut si = STARTUPINFOW {
cb: std::mem::size_of::<STARTUPINFOW>() as u32,
dwFlags: STARTF_USESTDHANDLES,
hStdOutput: log,
hStdError: log,
..Default::default()
};
let mut desktop: Vec<u16> = "winsta0\\default\0".encode_utf16().collect();
si.lpDesktop = PWSTR(desktop.as_mut_ptr());
let mut cmd: Vec<u16> = cmdline.encode_utf16().chain(std::iter::once(0)).collect();
let cwd = (!workdir.is_empty()).then_some(PCWSTR(workdir.as_ptr()));
let mut pi = PROCESS_INFORMATION::default();
let created = CreateProcessAsUserW(
Some(primary),
None,
Some(PWSTR(cmd.as_mut_ptr())),
None,
None,
true, // inherit the log handle
CREATE_UNICODE_ENVIRONMENT | CREATE_NO_WINDOW,
Some(merged.as_ptr() as *const c_void),
cwd.unwrap_or(PCWSTR::null()),
&si,
&mut pi,
);
let _ = CloseHandle(log); // the child owns its inherited copy
let _ = CloseHandle(primary);
created.context("CreateProcessAsUserW(host)")?;
// Best-effort: keep the host inside the kill-on-close job.
let _ = AssignProcessToJobObject(job, pi.hProcess);
Ok(pi)
}
/// Open `path` for appending, as an INHERITABLE handle (so the child can use it as stdout/stderr).
unsafe fn open_log_handle(path: &std::path::Path) -> Result<HANDLE> {
let wpath: Vec<u16> = path
.as_os_str()
.to_string_lossy()
.encode_utf16()
.chain(std::iter::once(0))
.collect();
let sa = SECURITY_ATTRIBUTES {
nLength: std::mem::size_of::<SECURITY_ATTRIBUTES>() as u32,
lpSecurityDescriptor: std::ptr::null_mut(),
bInheritHandle: true.into(),
};
// Append (no FILE_WRITE_DATA → all writes go to EOF), so each relaunch's OPEN_ALWAYS reopen
// accumulates instead of truncating from offset 0. This mirrors Rust's own `OpenOptions::append`
// access mask (FILE_GENERIC_WRITE minus WRITE_DATA, plus APPEND_DATA + SYNCHRONIZE/READ_CONTROL);
// bare FILE_APPEND_DATA alone produced a child handle that silently dropped writes.
let access = (FILE_GENERIC_WRITE.0 & !FILE_WRITE_DATA.0) | FILE_APPEND_DATA.0;
let h = CreateFileW(
PCWSTR(wpath.as_ptr()),
access,
FILE_SHARE_READ | FILE_SHARE_WRITE,
Some(&sa),
OPEN_ALWAYS,
windows::Win32::Storage::FileSystem::FILE_FLAGS_AND_ATTRIBUTES(0),
None,
)
.context("CreateFileW(host.log)")?;
Ok(h)
}
// ── install / uninstall ──────────────────────────────────────────────────────────────────────────
fn install() -> Result<()> {
use windows_service::service::{
ServiceAccess, ServiceErrorControl, ServiceInfo, ServiceStartType, ServiceType,
};
use windows_service::service_manager::{ServiceManager, ServiceManagerAccess};
let exe = std::env::current_exe().context("current_exe")?;
let manager = ServiceManager::local_computer(
None::<&str>,
ServiceManagerAccess::CONNECT | ServiceManagerAccess::CREATE_SERVICE,
)
.context("open Service Control Manager (run from an elevated/Administrator prompt)")?;
let info = ServiceInfo {
name: OsString::from(SERVICE_NAME),
display_name: OsString::from(SERVICE_DISPLAY),
service_type: ServiceType::OWN_PROCESS,
start_type: ServiceStartType::AutoStart,
error_control: ServiceErrorControl::Normal,
executable_path: exe.clone(),
launch_arguments: vec![OsString::from("service"), OsString::from("run")],
dependencies: vec![],
account_name: None, // None = LocalSystem
account_password: None,
};
// Create, or reconfigure if it already exists (idempotent install/upgrade).
match manager.create_service(&info, ServiceAccess::CHANGE_CONFIG | ServiceAccess::START) {
Ok(svc) => {
let _ = svc.set_description(SERVICE_DESCRIPTION);
println!("Created service '{SERVICE_NAME}' (auto-start, LocalSystem).");
}
Err(windows_service::Error::Winapi(e))
if e.raw_os_error() == Some(1073 /* ERROR_SERVICE_EXISTS */) =>
{
let svc = manager
.open_service(SERVICE_NAME, ServiceAccess::CHANGE_CONFIG)
.context("open existing service to reconfigure")?;
svc.change_config(&info)
.context("reconfigure existing service")?;
let _ = svc.set_description(SERVICE_DESCRIPTION);
println!("Reconfigured existing service '{SERVICE_NAME}'.");
}
Err(e) => return Err(e).context("create service"),
}
ensure_default_host_env()?;
add_firewall_rules();
println!(
"\nInstalled. Config: {}\nLogs: {}\n\nStart now with: punktfunk-host service start",
host_env_path().display(),
crate::gamestream::config_dir().join("logs").display()
);
Ok(())
}
fn uninstall() -> Result<()> {
use windows_service::service::ServiceAccess;
use windows_service::service_manager::{ServiceManager, ServiceManagerAccess};
let _ = sc(&["stop", SERVICE_NAME]); // best-effort stop first
let manager = ServiceManager::local_computer(None::<&str>, ServiceManagerAccess::CONNECT)
.context("open Service Control Manager (run elevated)")?;
let svc = manager
.open_service(SERVICE_NAME, ServiceAccess::DELETE)
.context("open service for delete")?;
svc.delete().context("delete service")?;
remove_firewall_rules();
println!("Removed service '{SERVICE_NAME}' and its firewall rules.");
Ok(())
}
/// Write a default `host.env` if none exists, so a fresh install streams with NVENC out of the box.
fn ensure_default_host_env() -> Result<()> {
let path = host_env_path();
if path.exists() {
return Ok(());
}
if let Some(dir) = path.parent() {
std::fs::create_dir_all(dir).ok();
}
let default = "# punktfunk host configuration (read by the Windows service).\n\
# KEY=VALUE per line; '#' comments. Restart the service after editing:\n\
# punktfunk-host service stop && punktfunk-host service start\n\
\n\
PUNKTFUNK_ENCODER=nvenc\n\
PUNKTFUNK_VIDEO_SOURCE=virtual\n\
PUNKTFUNK_SECURE_DDA=1\n\
RUST_LOG=info\n\
\n\
# The host subcommand the service launches (default: serve --native).\n\
# PUNKTFUNK_HOST_CMD=serve --native\n\
\n\
# Force a specific NVENC render GPU by name substring (multi-GPU boxes only):\n\
# PUNKTFUNK_RENDER_ADAPTER=4090\n";
std::fs::write(&path, default).with_context(|| format!("write {}", path.display()))?;
println!("Wrote default config: {}", path.display());
Ok(())
}
// ── firewall + sc helpers ────────────────────────────────────────────────────────────────────────
/// Inbound firewall rules for the streaming ports (best-effort; logs but never fails the install).
fn add_firewall_rules() {
// (name suffix, protocol, ports)
let rules = [
("TCP", "TCP", "47984,47989,48010,47990"),
("UDP", "UDP", "47998-48010,9777,5353"),
];
for (suffix, proto, ports) in rules {
let name = format!("punktfunk {suffix}");
let ok = run_quiet(
"netsh",
&[
"advfirewall",
"firewall",
"add",
"rule",
&format!("name={name}"),
"dir=in",
"action=allow",
&format!("protocol={proto}"),
&format!("localport={ports}"),
],
);
if ok {
println!("Firewall rule added: {name} ({ports})");
} else {
eprintln!("warning: could not add firewall rule '{name}' (add it manually if needed)");
}
}
}
fn remove_firewall_rules() {
for suffix in ["TCP", "UDP"] {
let name = format!("punktfunk {suffix}");
let _ = run_quiet(
"netsh",
&[
"advfirewall",
"firewall",
"delete",
"rule",
&format!("name={name}"),
],
);
}
}
/// Run an `sc.exe` command, passing its output through (used by start/stop/status).
fn sc(args: &[&str]) -> Result<()> {
let status = std::process::Command::new("sc")
.args(args)
.status()
.context("run sc.exe")?;
if !status.success() {
bail!("sc {} failed ({status})", args.join(" "));
}
Ok(())
}
/// Run a command discarding output; return whether it succeeded.
fn run_quiet(cmd: &str, args: &[&str]) -> bool {
std::process::Command::new(cmd)
.args(args)
.stdout(std::process::Stdio::null())
.stderr(std::process::Stdio::null())
.status()
.map(|s| s.success())
.unwrap_or(false)
}
+465 -193
View File
@@ -10,9 +10,9 @@
use std::ffi::c_void;
use std::mem::size_of;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;
use std::sync::{Arc, Mutex, Once};
use std::thread::{self, JoinHandle};
use std::time::Duration;
use std::time::{Duration, Instant};
use anyhow::{Context, Result};
use windows::core::{GUID, PCWSTR};
@@ -22,16 +22,18 @@ use windows::Win32::Devices::DeviceAndDriverInstallation::{
SP_DEVICE_INTERFACE_DATA, SP_DEVICE_INTERFACE_DETAIL_DATA_W,
};
use windows::Win32::Devices::Display::{
DisplayConfigGetDeviceInfo, GetDisplayConfigBufferSizes, QueryDisplayConfig,
DISPLAYCONFIG_DEVICE_INFO_GET_SOURCE_NAME, DISPLAYCONFIG_MODE_INFO, DISPLAYCONFIG_PATH_INFO,
DISPLAYCONFIG_SOURCE_DEVICE_NAME, QDC_ONLY_ACTIVE_PATHS,
DisplayConfigGetDeviceInfo, DisplayConfigSetDeviceInfo, GetDisplayConfigBufferSizes,
QueryDisplayConfig, SetDisplayConfig, DISPLAYCONFIG_DEVICE_INFO_GET_SOURCE_NAME,
DISPLAYCONFIG_DEVICE_INFO_SET_ADVANCED_COLOR_STATE, DISPLAYCONFIG_MODE_INFO,
DISPLAYCONFIG_PATH_INFO, DISPLAYCONFIG_SET_ADVANCED_COLOR_STATE,
DISPLAYCONFIG_SOURCE_DEVICE_NAME, QDC_ONLY_ACTIVE_PATHS, SDC_ALLOW_CHANGES, SDC_APPLY,
SDC_USE_SUPPLIED_DISPLAY_CONFIG,
};
use windows::Win32::Foundation::{CloseHandle, HANDLE, LUID};
use windows::Win32::Graphics::Gdi::{
ChangeDisplaySettingsExW, EnumDisplayDevicesW, EnumDisplaySettingsW, CDS_GLOBAL, CDS_NORESET,
CDS_SET_PRIMARY, CDS_TEST, CDS_TYPE, CDS_UPDATEREGISTRY, DEVMODEW, DISPLAY_DEVICEW,
DISPLAY_DEVICE_ATTACHED_TO_DESKTOP, DISP_CHANGE_SUCCESSFUL, DM_BITSPERPEL, DM_DISPLAYFREQUENCY,
DM_PELSHEIGHT, DM_PELSWIDTH, DM_POSITION, ENUM_CURRENT_SETTINGS, ENUM_DISPLAY_SETTINGS_MODE,
ChangeDisplaySettingsExW, EnumDisplaySettingsW, CDS_TEST, CDS_UPDATEREGISTRY, DEVMODEW,
DISP_CHANGE_SUCCESSFUL, DM_BITSPERPEL, DM_DISPLAYFREQUENCY, DM_PELSHEIGHT, DM_PELSWIDTH,
ENUM_DISPLAY_SETTINGS_MODE,
};
use windows::Win32::Storage::FileSystem::{
CreateFileW, FILE_FLAGS_AND_ATTRIBUTES, FILE_SHARE_READ, FILE_SHARE_WRITE, OPEN_EXISTING,
@@ -54,10 +56,19 @@ const IOCTL_GET_WATCHDOG: u32 = ctl(0x803);
const IOCTL_DRIVER_PING: u32 = ctl(0x888);
const IOCTL_GET_VERSION: u32 = ctl(0x8FF);
// A fixed monitor identity. One session at a time today; Windows persists this monitor's layout
// across sessions by GUID, and REMOVE keys off it. (TODO: derive per-client when concurrent
// sessions land.)
const MONITOR_GUID: GUID = GUID::from_u128(0x70756E6B_7466_756E_6B30_000000000001);
/// A UNIQUE-per-session SudoVDA monitor GUID. The monitor is keyed by GUID for IOCTL_ADD/REMOVE, so a
/// FIXED GUID makes overlapping sessions (a client reconnecting after a freeze before the old session
/// has torn down, or genuine concurrent sessions) all map to the SAME monitor — then one session's
/// IOCTL_REMOVE on teardown tears the monitor down OUT FROM UNDER a still-live session ("display
/// disconnected" sound + freeze, even with no context change — observed live). Make it unique per
/// (process, session): base GUID with the low 48-bit node = (pid << 16 | session#).
fn next_monitor_guid() -> GUID {
use std::sync::atomic::AtomicU32;
static N: AtomicU32 = AtomicU32::new(0);
let n = N.fetch_add(1, Ordering::Relaxed) as u128;
let pid = std::process::id() as u128;
GUID::from_u128(0x70756E6B_7466_756E_6B30_000000000000u128 | (pid << 16) | (n & 0xFFFF))
}
#[repr(C)]
#[derive(Clone, Copy)]
@@ -133,7 +144,7 @@ unsafe fn resolve_render_adapter_luid() -> Option<LUID> {
continue;
}
let vram = d.DedicatedVideoMemory as u64; // SudoVDA software adapter ≈ 0 → loses to the dGPU
if best.as_ref().map_or(true, |(_, v, _)| vram > *v) {
if best.as_ref().is_none_or(|(_, v, _)| vram > *v) {
best = Some((d.AdapterLuid, vram, name));
}
}
@@ -216,6 +227,55 @@ pub(crate) unsafe fn resolve_gdi_name(target_id: u32) -> Option<String> {
None
}
/// Toggle the SudoVDA target's advanced-color (HDR) state via the CCD API. Disabling HDR while on the
/// secure (Winlogon) desktop makes it render SDR/composed so DXGI Desktop Duplication can capture it
/// (the HDR fullscreen independent-flip otherwise storms `ACCESS_LOST` → black); re-enable on return so
/// WGC keeps HDR on the normal desktop. Returns true on a successful `DisplayConfigSetDeviceInfo`.
pub(crate) unsafe fn set_advanced_color(target_id: u32, enable: bool) -> bool {
let mut np = 0u32;
let mut nm = 0u32;
if GetDisplayConfigBufferSizes(QDC_ONLY_ACTIVE_PATHS, &mut np, &mut nm).is_err() {
return false;
}
let mut paths = vec![DISPLAYCONFIG_PATH_INFO::default(); np as usize];
let mut modes = vec![DISPLAYCONFIG_MODE_INFO::default(); nm as usize];
if QueryDisplayConfig(
QDC_ONLY_ACTIVE_PATHS,
&mut np,
paths.as_mut_ptr(),
&mut nm,
modes.as_mut_ptr(),
None,
)
.is_err()
{
return false;
}
for p in paths.iter().take(np as usize) {
if p.targetInfo.id == target_id {
let mut s = DISPLAYCONFIG_SET_ADVANCED_COLOR_STATE::default();
s.header.r#type = DISPLAYCONFIG_DEVICE_INFO_SET_ADVANCED_COLOR_STATE;
s.header.size = size_of::<DISPLAYCONFIG_SET_ADVANCED_COLOR_STATE>() as u32;
s.header.adapterId = p.targetInfo.adapterId;
s.header.id = p.targetInfo.id;
s.Anonymous.value = enable as u32; // bit 0 = enableAdvancedColor
let rc = DisplayConfigSetDeviceInfo(&s.header);
tracing::info!(
target_id,
enable,
rc,
"SudoVDA set advanced-color (HDR) state"
);
return rc == 0;
}
}
tracing::warn!(
target_id,
"set_advanced_color: target not found in active paths"
);
false
}
/// Force the freshly-added SudoVDA monitor to the client's exact `WxH@Hz`. The ADD IOCTL only
/// ADVERTISES the mode; Windows otherwise activates an IDD target at a 1280x720 default, so the
/// ACTIVE mode (what DXGI Desktop Duplication captures) must be set explicitly. CDS_TEST first so a
@@ -289,9 +349,15 @@ fn set_active_mode(gdi_name: &str, mode: Mode) {
);
}
// Set ONLY this output's mode in place (size/refresh/bpp; NO DM_POSITION). Do NOT promote it to
// PRIMARY here and do NOT write a GLOBAL topology: promoting the IDD to primary at (0,0) while the
// box's leftover basic display is still active contests the topology and storms
// DXGI_ERROR_MODE_CHANGE_IN_PROGRESS (measured live). The IDD is made the sole → primary →
// DWM-composited display by the CCD isolation in create() (which deactivates the other display
// first), so a sole display is already primary and needs no CDS_SET_PRIMARY here.
let dm = DEVMODEW {
dmSize: size_of::<DEVMODEW>() as u16,
dmFields: DM_PELSWIDTH | DM_PELSHEIGHT | DM_DISPLAYFREQUENCY | DM_BITSPERPEL | DM_POSITION,
dmFields: DM_PELSWIDTH | DM_PELSHEIGHT | DM_DISPLAYFREQUENCY | DM_BITSPERPEL,
dmBitsPerPel: 32,
dmPelsWidth: mode.width,
dmPelsHeight: mode.height,
@@ -316,10 +382,7 @@ fn set_active_mode(gdi_name: &str, mode: Mode) {
PCWSTR(wname.as_ptr()),
Some(&dm),
None,
// Make it the PRIMARY display: a blank *extended* IDD output isn't composited by the DWM,
// so it produces no duplication frames. As primary it carries the shell/cursor → frames
// flow (this is what Apollo does). Position is (0,0) via DM_POSITION (zeroed by default).
CDS_UPDATEREGISTRY | CDS_GLOBAL | CDS_SET_PRIMARY,
CDS_UPDATEREGISTRY,
None,
)
};
@@ -341,101 +404,83 @@ fn set_active_mode(gdi_name: &str, mode: Mode) {
}
}
/// Detach every display except `keep_gdi_name`, leaving the SudoVDA virtual output as the ONLY
/// display. This is the SudoVDA/Apollo "isolate the virtual display" move and the key to capturing
/// the secure desktop: Windows renders the login / UAC (Winlogon) desktop on the physical/primary
/// display and resets the topology when it switches there — with a physical monitor still attached
/// (e.g. an LG TV), the login lands on it and our virtual output goes perpetually ACCESS_LOST. With
/// the physical detached and the change PERSISTED to the registry, Winlogon reads "only the virtual
/// is attached" and the secure desktop has nowhere to render but the output we capture.
///
/// Returns the displays we detached plus their saved modes so teardown can restore them.
unsafe fn isolate_displays(keep_gdi_name: &str) -> Vec<(String, DEVMODEW)> {
let mut saved = Vec::new();
let mut idx = 0u32;
loop {
let mut dd = DISPLAY_DEVICEW {
cb: size_of::<DISPLAY_DEVICEW>() as u32,
..Default::default()
};
if !EnumDisplayDevicesW(PCWSTR::null(), idx, &mut dd, 0).as_bool() {
break;
}
idx += 1;
if (dd.StateFlags & DISPLAY_DEVICE_ATTACHED_TO_DESKTOP).0 == 0 {
continue; // not part of the desktop — nothing to detach
}
let name = String::from_utf16_lossy(&dd.DeviceName);
let name = name.trim_end_matches('\u{0}').to_string();
if name == keep_gdi_name {
continue; // the virtual output we want to keep
}
// Save the current mode so the teardown can re-attach this display where it was.
let mut cur = DEVMODEW {
dmSize: size_of::<DEVMODEW>() as u16,
..Default::default()
};
let wname: Vec<u16> = name.encode_utf16().chain(std::iter::once(0)).collect();
if EnumDisplaySettingsW(PCWSTR(wname.as_ptr()), ENUM_CURRENT_SETTINGS, &mut cur).as_bool() {
saved.push((name.clone(), cur));
}
// A 0x0 mode removes the display from the desktop. NORESET batches; we commit once below.
let off = DEVMODEW {
dmSize: size_of::<DEVMODEW>() as u16,
dmFields: DM_POSITION | DM_PELSWIDTH | DM_PELSHEIGHT,
..Default::default()
};
let r = ChangeDisplaySettingsExW(
PCWSTR(wname.as_ptr()),
Some(&off),
None,
CDS_UPDATEREGISTRY | CDS_NORESET | CDS_GLOBAL,
None,
);
tracing::info!("display isolate: detaching {name} (result={})", r.0);
/// Saved active display topology, for restoring on teardown.
type SavedConfig = (Vec<DISPLAYCONFIG_PATH_INFO>, Vec<DISPLAYCONFIG_MODE_INFO>);
/// `DISPLAYCONFIG_PATH_ACTIVE` (wingdi.h) — the `flags` bit marking a path active. The `windows` crate
/// doesn't export it, so define it here.
const DISPLAYCONFIG_PATH_ACTIVE: u32 = 0x0000_0001;
/// Robust display isolation via the CCD API. The naive GDI approach (EnumDisplayDevices +
/// ChangeDisplaySettings) MISSES displays on a hybrid box — an iGPU-attached physical monitor isn't
/// flagged `ATTACHED_TO_DESKTOP` in the GDI enum, so it's never detached and the secure desktop /
/// lock screen lands on IT while our virtual output freezes. `QueryDisplayConfig(QDC_ONLY_ACTIVE_PATHS)`
/// sees every active path; we deactivate all of them EXCEPT the SudoVDA target's, leaving the virtual
/// display as the sole desktop so ALL content (incl. Winlogon) renders to it. Apollo isolates the same
/// way (CCD). Returns the original active config to restore on teardown.
unsafe fn isolate_displays_ccd(keep_target_id: u32) -> Option<SavedConfig> {
let mut np = 0u32;
let mut nm = 0u32;
if GetDisplayConfigBufferSizes(QDC_ONLY_ACTIVE_PATHS, &mut np, &mut nm).is_err() {
return None;
}
if !saved.is_empty() {
// Commit the batched detaches (NULL device + 0 flags applies the pending registry changes).
let _ = ChangeDisplaySettingsExW(PCWSTR::null(), None, None, CDS_TYPE(0), None);
tracing::info!(
"display isolate: {} display(s) detached — only {keep_gdi_name} remains",
saved.len()
);
let mut paths = vec![DISPLAYCONFIG_PATH_INFO::default(); np as usize];
let mut modes = vec![DISPLAYCONFIG_MODE_INFO::default(); nm as usize];
if QueryDisplayConfig(
QDC_ONLY_ACTIVE_PATHS,
&mut np,
paths.as_mut_ptr(),
&mut nm,
modes.as_mut_ptr(),
None,
)
.is_err()
{
return None;
}
saved
paths.truncate(np as usize);
modes.truncate(nm as usize);
let saved = (paths.clone(), modes.clone());
let mut others = 0u32;
for p in paths.iter_mut() {
if p.targetInfo.id == keep_target_id {
continue;
}
if p.flags & DISPLAYCONFIG_PATH_ACTIVE != 0 {
p.flags &= !DISPLAYCONFIG_PATH_ACTIVE; // mark this path inactive
others += 1;
}
}
if others == 0 {
tracing::info!("display isolate (CCD): SudoVDA target {keep_target_id} already the only active display");
return Some(saved);
}
let rc = SetDisplayConfig(
Some(paths.as_slice()),
Some(modes.as_slice()),
SDC_APPLY | SDC_USE_SUPPLIED_DISPLAY_CONFIG | SDC_ALLOW_CHANGES,
);
if rc == 0 {
tracing::info!("display isolate (CCD): deactivated {others} other display(s) — SudoVDA target {keep_target_id} is now the sole desktop");
} else {
tracing::warn!("display isolate (CCD): SetDisplayConfig failed rc={rc:#x} (tried to deactivate {others} path(s))");
}
Some(saved)
}
/// Re-attach the displays [`isolate_displays`] detached, restoring each to its saved mode. Called on
/// teardown BEFORE the virtual output is removed, so there is always at least one display.
unsafe fn restore_displays(saved: &[(String, DEVMODEW)]) {
for (name, dm) in saved {
let wname: Vec<u16> = name.encode_utf16().chain(std::iter::once(0)).collect();
let _ = ChangeDisplaySettingsExW(
PCWSTR(wname.as_ptr()),
Some(dm),
None,
CDS_UPDATEREGISTRY | CDS_NORESET | CDS_GLOBAL,
None,
);
}
if !saved.is_empty() {
let _ = ChangeDisplaySettingsExW(PCWSTR::null(), None, None, CDS_TYPE(0), None);
tracing::info!("display isolate: restored {} display(s)", saved.len());
}
}
/// Re-detach physical displays so the secure (Winlogon) desktop keeps rendering to the virtual
/// output — for the in-session DXGI capture recovery (dxgi.rs `recreate_dupl`). The lock/UAC/login
/// switch can re-attach a physical monitor (the secure desktop then lands on IT and our virtual
/// output goes perpetually ACCESS_LOST — the "born-lost" storm); re-running the isolate routes the
/// secure desktop back to the virtual output, mirroring what a fresh session's `create` does (the
/// delta that makes a reconnect work where in-session recovery didn't). Idempotent + cheap: when
/// nothing besides `gdi_name` is attached, [`isolate_displays`] finds nothing to detach and commits
/// nothing — so this is safe to call on every throttled recovery tick (no display thrash).
pub(crate) fn reassert_isolation(gdi_name: &str) {
unsafe {
let _ = isolate_displays(gdi_name);
/// Restore the topology saved by [`isolate_displays_ccd`] (teardown, before the virtual output is
/// removed), re-activating the displays we deactivated.
unsafe fn restore_displays_ccd(saved: &SavedConfig) {
let (paths, modes) = saved;
if paths.is_empty() {
return;
}
let rc = SetDisplayConfig(
Some(paths.as_slice()),
Some(modes.as_slice()),
SDC_APPLY | SDC_USE_SUPPLIED_DISPLAY_CONFIG | SDC_ALLOW_CHANGES,
);
tracing::info!("display isolate (CCD): restored original topology rc={rc:#x}");
}
unsafe fn open_device() -> Result<HANDLE> {
@@ -476,44 +521,64 @@ unsafe fn open_device() -> Result<HANDLE> {
Ok(handle)
}
/// A live SudoVDA control handle. One per host; `create` adds/removes monitors on it.
pub struct SudoVdaDisplay {
device: HANDLE,
watchdog_s: u32,
// ── Host-level reference-counted SudoVDA monitor lifecycle ──────────────────────────────────────
//
// The virtual monitor is created on the first session and REUSED across sessions. When the last
// session disconnects the monitor LINGERS for a grace window (PUNKTFUNK_MONITOR_LINGER_MS, default
// 10 s): a reconnect within the window reuses it instantly (no new screen, no PnP connect/disconnect
// chime, no teardown/recreate kernel churn); after the window a background timer REMOVEs it so a
// physical-screen user gets their screen back. Overlapping sessions share one monitor via the
// refcount (teardown only at refs==0 + expired grace), so a stale session can never REMOVE a live
// session's monitor (the earlier collision). The control-device HANDLE is opened once and kept for
// the host lifetime — it's a handle, not a screen, so it creates no phantom display.
/// The resources backing one live SudoVDA monitor (owned by [`MGR`], not by any session).
struct Monitor {
guid: GUID,
target_id: u32,
luid: LUID,
gdi_name: Option<String>,
mode: Mode,
stop: Arc<AtomicBool>,
pinger: Option<JoinHandle<()>>,
ccd_saved: Option<SavedConfig>,
}
// The HANDLE is a kernel object usable from any thread; we only ever issue serialized IOCTLs.
unsafe impl Send for SudoVdaDisplay {}
enum MgrState {
Idle,
Active { mon: Monitor, refs: u32 },
Lingering { mon: Monitor, until: Instant },
}
struct Mgr {
/// Control-device handle (raw isize; `HANDLE` isn't `Send`). Opened once, kept for the host life.
device: Option<isize>,
watchdog_s: u32,
state: MgrState,
}
static MGR: Mutex<Mgr> = Mutex::new(Mgr {
device: None,
watchdog_s: 3,
state: MgrState::Idle,
});
/// The Windows virtual-display backend. A marker — the monitor lifecycle lives in the global [`MGR`].
pub struct SudoVdaDisplay;
impl SudoVdaDisplay {
pub fn new() -> Result<Self> {
let device = unsafe { open_device()? };
let mut ver = [0u8; 4];
if unsafe { ioctl(device, IOCTL_GET_VERSION, &[], &mut ver) }.is_ok() {
tracing::info!(
"SudoVDA protocol {}.{}.{} (test={})",
ver[0],
ver[1],
ver[2],
ver[3]
);
}
let mut wd = [0u8; 8];
let watchdog_s = if unsafe { ioctl(device, IOCTL_GET_WATCHDOG, &[], &mut wd) }.is_ok() {
u32::from_le_bytes([wd[0], wd[1], wd[2], wd[3]]).max(1)
} else {
3
};
tracing::info!("SudoVDA watchdog timeout {watchdog_s}s");
Ok(Self { device, watchdog_s })
// Open the control device once (validates the driver is present) + log version/watchdog.
let mut g = MGR.lock().unwrap();
mgr_ensure_device(&mut g)?;
Ok(Self)
}
}
impl Drop for SudoVdaDisplay {
fn drop(&mut self) {
unsafe {
let _ = CloseHandle(self.device);
}
// Nothing: the control device + monitor lifecycle are host-level (owned by MGR) and
// deliberately outlive any single session so a reconnect can reuse the monitor.
}
}
@@ -523,25 +588,52 @@ impl VirtualDisplay for SudoVdaDisplay {
}
fn create(&mut self, mode: Mode) -> Result<VirtualOutput> {
// Delegate to the host-level manager: create the monitor, reuse a lingering one on reconnect,
// or join the live one — and hand back a lease whose Drop releases the refcount.
mgr_acquire(mode)
}
}
/// Create a fresh SudoVDA monitor at `mode` on the (host-level) control `device`. The old per-session
/// `create()` body, now owned by the manager: ADD the target, start the watchdog ping, resolve the
/// GDI name, force the client mode + (default) isolate to a sole composited display. Returns the
/// [`Monitor`] resources; the manager tracks its lifecycle (refcount + linger).
unsafe fn create_monitor(device: isize, mode: Mode, watchdog_s: u32) -> Result<Monitor> {
let dev = HANDLE(device as *mut c_void);
{
let mut device_name = [0u8; 14];
let nm = b"punktfunk";
device_name[..nm.len()].copy_from_slice(nm);
// Fresh GUID per created monitor (the manager refcount, not the GUID, prevents the
// cross-session REMOVE collision now).
let session_guid = next_monitor_guid();
let add = AddParams {
width: mode.width,
height: mode.height,
refresh: mode.refresh_hz,
guid: MONITOR_GUID,
guid: session_guid,
device_name,
serial: [0u8; 14],
};
// Pin the IDD's RENDER GPU to the NVENC/capture GPU (e.g. the 4090) BEFORE adding the target.
// On a multi-adapter box (SudoVDA IDD + discrete GPU) DXGI otherwise reparents the virtual
// output onto whichever GPU its hybrid-preference path resolves, which storms ACCESS_LOST
// (0x887A0026) on the secure/HDR desktop. Apollo's SET_RENDER_ADAPTER fixes this and MUST be
// issued before ADD. Best-effort: a driver that rejects it just keeps the default render GPU.
let pinned = unsafe { resolve_render_adapter_luid() };
// SET_RENDER_ADAPTER is OPT-IN. Apollo runs with an EMPTY config and NEVER pins the render
// adapter, yet captures the SudoVDA cleanly at the client mode on the 4090 (verified live on
// this exact box: no ACCESS_LOST, no MODE_CHANGE storm). On this box our pin is IGNORED by the
// driver AND the IDD lands on a DIFFERENT adapter (0x23664) than the one its DXGI output is
// enumerated under (the 4090, where we make the capture device) — a cross-GPU mismatch that is
// the real source of the perpetual ACCESS_LOST + MODE_CHANGE_IN_PROGRESS storm. So default to
// NOT pinning — let the IDD use its natural adapter like Apollo. Opt in with
// PUNKTFUNK_RENDER_ADAPTER=<name substring> only on a box that genuinely needs steering.
let pinned = if std::env::var("PUNKTFUNK_RENDER_ADAPTER").is_ok() {
unsafe { resolve_render_adapter_luid() }
} else {
tracing::info!(
"SudoVDA SET_RENDER_ADAPTER skipped (Apollo-parity: no render pin — avoids cross-GPU \
mismatch; set PUNKTFUNK_RENDER_ADAPTER=<name> to force a specific render GPU)"
);
None
};
if let Some(luid) = pinned {
match unsafe { set_render_adapter(self.device, luid) } {
match unsafe { set_render_adapter(dev, luid) } {
Ok(()) => tracing::info!(
luid = format!("{:08x}:{:08x}", luid.HighPart, luid.LowPart),
"SudoVDA SET_RENDER_ADAPTER: pinned IDD render GPU"
@@ -554,7 +646,7 @@ impl VirtualDisplay for SudoVdaDisplay {
std::slice::from_raw_parts(&add as *const _ as *const u8, size_of::<AddParams>())
};
let mut out = [0u8; size_of::<AddOut>()];
unsafe { ioctl(self.device, IOCTL_ADD, add_bytes, &mut out) }.with_context(|| {
unsafe { ioctl(dev, IOCTL_ADD, add_bytes, &mut out) }.with_context(|| {
format!(
"SudoVDA ADD {}x{}@{}",
mode.width, mode.height, mode.refresh_hz
@@ -583,8 +675,8 @@ impl VirtualDisplay for SudoVdaDisplay {
// Mandatory keepalive: ping inside the watchdog window or the driver tears all displays down.
let stop = Arc::new(AtomicBool::new(false));
let device_raw = self.device.0 as isize;
let interval = Duration::from_millis(self.watchdog_s as u64 * 1000 / 3);
let device_raw = device;
let interval = Duration::from_millis(watchdog_s as u64 * 1000 / 3);
let stop_t = stop.clone();
let pinger = thread::spawn(move || {
let h = HANDLE(device_raw as *mut c_void);
@@ -607,14 +699,31 @@ impl VirtualDisplay for SudoVdaDisplay {
break;
}
}
let mut isolated: Vec<(String, DEVMODEW)> = Vec::new();
let mut ccd_saved: Option<SavedConfig> = None;
match &gdi_name {
Some(n) => {
tracing::info!("SudoVDA target {} -> {n}", ao.target_id);
// ADD only advertises the mode; force it active so DXGI captures the requested size.
set_active_mode(n, mode);
// Detach every other display so the secure desktop (Winlogon/UAC) renders here too.
isolated = unsafe { isolate_displays(n) };
// Make the SudoVDA the SOLE active display (default). On this box an EXTENDED
// (non-primary) IDD is NOT DWM-composited → Desktop Duplication gets a born-lost
// ACCESS_LOST (measured live: MODE_CHANGE storm fixed, but the extended IDD then
// born-lost). Apollo reaches the same end state ("Virtual Desktop: WxH" — the IDD is the
// whole desktop, hence primary + composited) via Windows AUTO-promoting the real WDDM
// display over the box's leftover 1024x768 basic display; Windows does NOT auto-promote
// for us, so we deactivate the other display(s) explicitly via the clean atomic CCD path.
// Deactivating FIRST means set_active_mode's primary-promotion has nothing to contest →
// no MODE_CHANGE_IN_PROGRESS storm (that storm came from promoting primary WHILE the
// basic display stayed active). Opt out with PUNKTFUNK_NO_ISOLATE=1 (a box with a real
// second monitor to keep live). The legacy GDI detach is skipped — it misses
// iGPU-attached monitors on a hybrid box and churns per-device; CCD is atomic.
if std::env::var("PUNKTFUNK_NO_ISOLATE").is_err() {
ccd_saved = unsafe { isolate_displays_ccd(ao.target_id) };
} else {
tracing::info!(
"display isolation skipped (PUNKTFUNK_NO_ISOLATE) — IDD stays extended"
);
}
thread::sleep(Duration::from_millis(1500)); // let the topology settle before capture opens
}
None => tracing::warn!(
@@ -623,59 +732,50 @@ impl VirtualDisplay for SudoVdaDisplay {
),
}
Ok(VirtualOutput {
node_id: 0, // unused on Windows; the capture target is the GDI name below
preferred_mode: Some((mode.width, mode.height, mode.refresh_hz)),
win_capture: gdi_name
.clone()
.map(|n| crate::capture::dxgi::WinCaptureTarget {
adapter_luid: crate::capture::dxgi::pack_luid(ao.luid),
gdi_name: n,
// The SudoVDA target id is stable across secure-desktop topology rebuilds; the
// GDI name is NOT, so capture re-resolves the name from this on every recovery.
target_id: ao.target_id,
}),
keepalive: Box::new(SudoVdaKeepalive {
device: device_raw,
guid: MONITOR_GUID,
stop,
pinger: Some(pinger),
gdi_name,
isolated,
}),
Ok(Monitor {
guid: session_guid,
target_id: ao.target_id,
luid: ao.luid,
gdi_name,
mode,
stop,
pinger: Some(pinger),
ccd_saved,
})
}
}
/// RAII teardown: stop the ping thread, then REMOVE the monitor by its GUID. Does NOT close the
/// device handle — that belongs to [`SudoVdaDisplay`], which outlives the output.
struct SudoVdaKeepalive {
device: isize,
guid: GUID,
stop: Arc<AtomicBool>,
pinger: Option<JoinHandle<()>>,
#[allow(dead_code)] // consumed by the Windows capture backend (not yet wired)
gdi_name: Option<String>,
/// Displays detached by [`isolate_displays`], restored here on teardown.
isolated: Vec<(String, DEVMODEW)>,
}
impl Monitor {
/// The capture target handed to a session (`None` until the GDI name resolves).
fn target(&self) -> Option<crate::capture::dxgi::WinCaptureTarget> {
self.gdi_name
.clone()
.map(|n| crate::capture::dxgi::WinCaptureTarget {
adapter_luid: crate::capture::dxgi::pack_luid(self.luid),
gdi_name: n,
// target_id is stable across secure-desktop topology rebuilds; the GDI name is NOT,
// so capture re-resolves the name from this on every recovery.
target_id: self.target_id,
})
}
impl Drop for SudoVdaKeepalive {
fn drop(&mut self) {
/// Stop the watchdog ping, re-attach the displays we detached, then REMOVE the monitor (by GUID).
/// `device` is the host-level control handle. Consumes the monitor.
unsafe fn teardown(mut self, device: isize) {
self.stop.store(true, Ordering::Relaxed);
if let Some(j) = self.pinger.take() {
let _ = j.join();
}
// Re-attach the physical display(s) we detached BEFORE removing the virtual output, so the
// box is never left with zero displays.
unsafe { restore_displays(&self.isolated) };
// Re-attach detached display(s) BEFORE the REMOVE so the box is never left with zero displays.
if let Some(saved) = &self.ccd_saved {
restore_displays_ccd(saved);
}
let rp = RemoveParams { guid: self.guid };
let rp_bytes = unsafe {
std::slice::from_raw_parts(&rp as *const _ as *const u8, size_of::<RemoveParams>())
};
let rp_bytes =
std::slice::from_raw_parts(&rp as *const _ as *const u8, size_of::<RemoveParams>());
let mut none: [u8; 0] = [];
let h = HANDLE(self.device as *mut c_void);
if let Err(e) = unsafe { ioctl(h, IOCTL_REMOVE, rp_bytes, &mut none) } {
let h = HANDLE(device as *mut c_void);
if let Err(e) = ioctl(h, IOCTL_REMOVE, rp_bytes, &mut none) {
tracing::warn!("SudoVDA REMOVE failed: {e:#}");
} else {
tracing::info!("SudoVDA monitor removed");
@@ -683,6 +783,178 @@ impl Drop for SudoVdaKeepalive {
}
}
/// Open the control device once + read version/watchdog; cache the handle (raw isize) in `g`.
fn mgr_ensure_device(g: &mut Mgr) -> Result<isize> {
if let Some(d) = g.device {
return Ok(d);
}
let device = unsafe { open_device()? };
let mut ver = [0u8; 4];
if unsafe { ioctl(device, IOCTL_GET_VERSION, &[], &mut ver) }.is_ok() {
tracing::info!(
"SudoVDA protocol {}.{}.{} (test={})",
ver[0],
ver[1],
ver[2],
ver[3]
);
}
let mut wd = [0u8; 8];
g.watchdog_s = if unsafe { ioctl(device, IOCTL_GET_WATCHDOG, &[], &mut wd) }.is_ok() {
u32::from_le_bytes([wd[0], wd[1], wd[2], wd[3]]).max(1)
} else {
3
};
tracing::info!("SudoVDA watchdog timeout {}s", g.watchdog_s);
let raw = device.0 as isize;
g.device = Some(raw);
Ok(raw)
}
/// Linger window before a session-less monitor is torn down. A reconnect within it reuses the
/// monitor (no new screen / PnP chime); after it the monitor is REMOVEd so a physical screen returns.
fn linger_ms() -> u64 {
std::env::var("PUNKTFUNK_MONITOR_LINGER_MS")
.ok()
.and_then(|s| s.parse().ok())
.unwrap_or(10_000)
}
/// Acquire the shared monitor for a new session: join the live one (refcount++), reuse a lingering
/// one (reconfiguring if the client mode changed), or create one. The returned [`MonitorLease`]
/// releases the refcount on drop.
fn mgr_acquire(mode: Mode) -> Result<VirtualOutput> {
ensure_linger_timer();
let mut g = MGR.lock().unwrap();
let device = mgr_ensure_device(&mut g)?;
let watchdog_s = g.watchdog_s;
// A live monitor already exists — join it (refcount++). This covers a concurrent session AND the
// build-then-drop overlap of a mid-stream Reconfigure / secure-return (the new lease is taken while
// the old is still held). If the requested mode differs, reconfigure the shared monitor to it so a
// Reconfigure actually applies (one shared monitor → sessions necessarily share a mode).
if let MgrState::Active { mon, refs } = &mut g.state {
*refs += 1;
let changed = mon.mode.width != mode.width
|| mon.mode.height != mode.height
|| mon.mode.refresh_hz != mode.refresh_hz;
if changed {
unsafe { mgr_reconfigure(mon, mode) };
}
tracing::info!(
refs = *refs,
"SudoVDA monitor reused (concurrent / reconfigure session)"
);
let pm = Some((mon.mode.width, mon.mode.height, mon.mode.refresh_hz));
let target = mon.target();
return Ok(VirtualOutput {
node_id: 0,
preferred_mode: pm,
win_capture: target,
keepalive: Box::new(MonitorLease),
});
}
// Idle or Lingering: repurpose/create a monitor → Active{refs:1}.
let mon = match std::mem::replace(&mut g.state, MgrState::Idle) {
MgrState::Lingering { mut mon, .. } => {
tracing::info!("SudoVDA monitor reused (reconnect within the linger window)");
let changed = mon.mode.width != mode.width
|| mon.mode.height != mode.height
|| mon.mode.refresh_hz != mode.refresh_hz;
if changed {
unsafe { mgr_reconfigure(&mut mon, mode) };
}
mon
}
MgrState::Idle => unsafe { create_monitor(device, mode, watchdog_s)? },
MgrState::Active { .. } => unreachable!("handled above"),
};
let pm = Some((mon.mode.width, mon.mode.height, mon.mode.refresh_hz));
let target = mon.target();
g.state = MgrState::Active { mon, refs: 1 };
Ok(VirtualOutput {
node_id: 0,
preferred_mode: pm,
win_capture: target,
keepalive: Box::new(MonitorLease),
})
}
/// Re-apply a (possibly new) mode to a reused monitor on reconnect, re-resolving its GDI name.
unsafe fn mgr_reconfigure(mon: &mut Monitor, mode: Mode) {
tracing::info!(
old = format!(
"{}x{}@{}",
mon.mode.width, mon.mode.height, mon.mode.refresh_hz
),
new = format!("{}x{}@{}", mode.width, mode.height, mode.refresh_hz),
"SudoVDA: reconfiguring reused monitor to the new client mode"
);
if let Some(n) = resolve_gdi_name(mon.target_id) {
mon.gdi_name = Some(n);
}
if let Some(n) = &mon.gdi_name {
set_active_mode(n, mode);
}
mon.mode = mode;
}
/// Release a session's hold: refcount-- ; when the last session leaves, LINGER before teardown.
fn mgr_release() {
let mut g = MGR.lock().unwrap();
g.state = match std::mem::replace(&mut g.state, MgrState::Idle) {
MgrState::Active { mon, refs } if refs > 1 => MgrState::Active {
mon,
refs: refs - 1,
},
MgrState::Active { mon, .. } => {
let ms = linger_ms();
tracing::info!(
linger_ms = ms,
"SudoVDA: last session left — lingering before teardown"
);
MgrState::Lingering {
mon,
until: Instant::now() + Duration::from_millis(ms),
}
}
other => other,
};
}
/// Background timer (started once): tear down a monitor that has lingered past its deadline (→ Idle),
/// so a physical-screen user gets their screen back after they stop streaming.
fn ensure_linger_timer() {
static TIMER: Once = Once::new();
TIMER.call_once(|| {
let _ = thread::Builder::new()
.name("sudovda-linger".into())
.spawn(|| loop {
thread::sleep(Duration::from_millis(500));
let mut g = MGR.lock().unwrap();
let due = matches!(&g.state, MgrState::Lingering { until, .. } if Instant::now() >= *until);
if due {
let device = g.device.unwrap_or(0);
if let MgrState::Lingering { mon, .. } =
std::mem::replace(&mut g.state, MgrState::Idle)
{
drop(g); // release the lock before the REMOVE IOCTL + display restore
unsafe { mon.teardown(device) };
}
}
});
});
}
/// A session's lease on the shared monitor. Drop releases the refcount (→ linger when it hits 0).
struct MonitorLease;
impl Drop for MonitorLease {
fn drop(&mut self) {
mgr_release();
}
}
/// Readiness probe: can we open the SudoVDA control device?
pub fn probe() -> Result<()> {
let h = unsafe { open_device()? };
+18 -6
View File
@@ -74,14 +74,26 @@ Driven by live testing with the native macOS client at the display's native **51
detaches other monitors so Winlogon renders to the virtual output) covers the case where a physical
monitor is also attached.
### Running as SYSTEM, windowless (deployment)
### Running as SYSTEM (deployment) — the `PunktfunkHost` service
To capture the secure desktop the host must run as **SYSTEM in the interactive Session 1** (a Session
0 service can't duplicate Session 1). Launch chain: a scheduled task (Interactive, Highest) →
`PsExec64 -s -i 1 -d wscript.exe launch.vbs``launch.vbs` runs `host-run.cmd` with a **hidden
window** (`WScript.Shell.Run …, 0`). This keeps the host off the captured desktop — no `cmd` windows
the user can see or accidentally close (which would kill the stream). `host-run.cmd` sets
`APPDATA=C:\Users\Public` (shared identity/pairing) + `PUNKTFUNK_ENCODER=nvenc` and runs `m3-host`.
0 service can't duplicate Session 1). The end-user deployment is the built-in Windows **service**
(`src/service.rs`) — see [`windows-service.md`](windows-service.md). One elevated command:
```powershell
punktfunk-host service install # auto-start LocalSystem service + firewall rules + default host.env
punktfunk-host service start
```
The service runs in Session 0 but never captures: it duplicates its own LocalSystem token, retargets
it to the active console session, and `CreateProcessAsUserW`s the host there — supervising it across
exits and console-session switches (the Sunshine/Apollo model). Config lives in
`%ProgramData%\punktfunk\host.env`; logs in `%ProgramData%\punktfunk\logs\`.
> **Old bring-up chain (debug only, superseded by the service):** a scheduled task (Interactive,
> Highest) → `PsExec64 -s -i 1 -d wscript.exe launch.vbs` → `host-run.cmd` (hidden window), with
> `APPDATA=C:\Users\Public` as the shared-identity hack. The service replaces all of this; the host
> now resolves its config dir to `%ProgramData%\punktfunk` directly (`PUNKTFUNK_CONFIG_DIR` overrides).
### Real-GPU test box (RTX 4090, `ssh "Enrico Bühler"@192.168.1.174`)
+93
View File
@@ -0,0 +1,93 @@
# Windows service (deployment)
The `PunktfunkHost` Windows service is the end-user way to run the host on Windows. It replaces the
manual bring-up chain (a scheduled task → `PsExec64 -s -i 1``wscript launch.vbs``host-run.cmd`)
with one command, auto-start on boot, and supervision.
## Install
From an **elevated** (Administrator) prompt:
```powershell
punktfunk-host service install # register auto-start LocalSystem service + firewall rules + default host.env
punktfunk-host service start # start it now (also starts automatically on every boot)
```
`service install` is idempotent — run it again after upgrading the exe to re-point the service at the
new binary. Register whatever location you keep the exe in (e.g. `C:\Program Files\punktfunk\`); the
service records the current exe path.
Other subcommands:
```powershell
punktfunk-host service stop
punktfunk-host service status
punktfunk-host service uninstall # stop + delete the service + remove its firewall rules
```
## How it works
The host must run **as SYSTEM in the interactive session** (Session 1+): Desktop Duplication of the
secure desktop (UAC / lock / login) and `SendInput` need SYSTEM, and capture/injection need the
interactive session, which a plain Session-0 service is not in.
So the service (itself in Session 0) **never captures**. On start, and whenever the active console
session changes, it:
1. resolves the active console session (`WTSGetActiveConsoleSessionId`),
2. duplicates its own LocalSystem token and retargets it to that session (`SetTokenInformation`
`TokenSessionId`),
3. launches the host there with `CreateProcessAsUserW` (`lpDesktop = winsta0\default`),
4. supervises it: relaunches on exit/crash (with backoff) and on a console connect/disconnect.
A kill-on-close **job object** ensures a service crash never orphans the SYSTEM host. The host in turn
spawns the WGC helper into the *user* session (see [`windows-secure-desktop.md`](windows-secure-desktop.md))
— two nested launches. Lock/unlock are handled inside the host (the `DesktopWatcher` DDA↔WGC mux), so
the service deliberately does **not** relaunch on lock/unlock — only on a real session switch.
This is the same model Sunshine/Apollo use.
## Configuration
Config lives in **`%ProgramData%\punktfunk\host.env`** (KEY=VALUE lines, `#` comments). `service
install` writes a default if none exists. Template: [`scripts/windows/host.env.example`](../scripts/windows/host.env.example).
```ini
PUNKTFUNK_ENCODER=nvenc
PUNKTFUNK_VIDEO_SOURCE=virtual
PUNKTFUNK_SECURE_DDA=1
RUST_LOG=info
# PUNKTFUNK_HOST_CMD=serve --native # the host subcommand the service launches (default)
```
The service loads these into its environment and carries `PUNKTFUNK_*` + `RUST_LOG` to the host child
(the same env-merge the WGC helper uses). Restart the service after editing:
```powershell
punktfunk-host service stop; punktfunk-host service start
```
The host's identity (cert/pairing/mgmt token/library) also lives under `%ProgramData%\punktfunk` — a
machine-wide dir the SYSTEM service and the interactive user share, surviving user logout.
`PUNKTFUNK_CONFIG_DIR` overrides the location (both platforms; handy for tests).
## Logs
- `%ProgramData%\punktfunk\logs\service.log` — the service's own supervision log (spawn/exit/session
switches).
- `%ProgramData%\punktfunk\logs\host.log` — the host child's stdout/stderr.
## Prerequisites
- The host built with `--features nvenc` for NVENC (the driver ships `nvEncodeAPI64.dll`; no SDK
needed at runtime). Software encode otherwise.
- The **SudoVDA** indirect display driver installed (for `PUNKTFUNK_VIDEO_SOURCE=virtual`).
- **ViGEmBus** for virtual gamepads (optional).
## Gotchas
- `service install`/`uninstall` need an **elevated** prompt (the SCM rejects non-admin).
- `service run` is the SCM entry point — don't run it by hand (it errors with a hint).
- A **graceful** stop currently `TerminateProcess`es the host, so its RAII teardown (SudoVDA monitor
REMOVE) doesn't run; a stale virtual monitor can linger until the next start. A cooperative-stop
signal is a follow-up.
+36
View File
@@ -0,0 +1,36 @@
# punktfunk host configuration (Windows) — read by the `PunktfunkHost` service.
#
# `punktfunk-host service install` writes a default copy of this to
# %ProgramData%\punktfunk\host.env
# Edit that file (not this one) and restart the service to apply:
# punktfunk-host service stop
# punktfunk-host service start
#
# Format: KEY=VALUE per line; '#' starts a comment. The service loads these into its environment
# and passes PUNKTFUNK_* and RUST_LOG through to the host it launches into the active session.
# Hardware encode via NVENC (NVIDIA). The host must be the `--features nvenc` build. Falls back to
# the software encoder automatically if NVENC is unavailable.
PUNKTFUNK_ENCODER=nvenc
# Video source: `virtual` creates a per-client virtual display (SudoVDA) at the client's exact
# resolution + refresh — the flagship mode. Requires the SudoVDA indirect display driver installed.
PUNKTFUNK_VIDEO_SOURCE=virtual
# Capture the secure desktop (UAC / lock / login) so the stream survives those transitions.
PUNKTFUNK_SECURE_DDA=1
# Log level (info | debug | trace). Logs land in %ProgramData%\punktfunk\logs\.
RUST_LOG=info
# The host subcommand the service launches. Default: `serve --native` (GameStream/Moonlight + the
# native punktfunk/1 QUIC host in one process). Uncomment to override.
#PUNKTFUNK_HOST_CMD=serve --native
# Multi-GPU boxes only: force the NVENC/Desktop-Duplication GPU by Description substring. Leave
# unset on single-GPU machines (the default auto-picks the discrete adapter).
#PUNKTFUNK_RENDER_ADAPTER=4090
# Keep a per-client virtual display alive briefly after disconnect so a quick reconnect reuses it
# (no display connect/disconnect chime). Default 10000 ms.
#PUNKTFUNK_MONITOR_LINGER_MS=10000