fix(windows-host): cross-plane IDD serialization, linger-expiry race, second-host guard
Batch C of the audit's medium tier (M7+M8+M9): - M7: GameStream sessions now run the same begin_idd_setup dance as punktfunk/1 before creating the shared monitor. A GS connect could previously ADD/reconfigure the monitor while a native session was mid-build (and vice versa), and its sealed-channel delivery replaced the native ring (newest-wins) — each plane could freeze the other. GS has no cooperative stop plumbing, so it registers a flag nobody reads: a later session signals it, waits the 3 s grace, then force-preempts — the intended handover. - M8: the linger-expiry teardown now runs UNDER the state lock. Running it outside let a concurrent acquire see Idle and ADD+isolate while the old monitor's pinger-join / CCD-restore / REMOVE were still in flight — a failed or de-isolated session exactly at the expiry boundary. A racing acquire now waits the few teardown seconds instead. Lock order stays state → device everywhere; the pinger takes only the device lock. - M9: a named mutex (Global\punktfunk-vdisplay-manager) makes a SECOND host process fail its vdisplay open loudly instead of firing a startup CLEAR_ALL that razes the live host's monitors mid-stream (the admin footgun the shared watchdog then masked). Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
@@ -269,6 +269,23 @@ fn open_gs_virtual_source(
|
||||
));
|
||||
#[cfg(not(target_os = "linux"))]
|
||||
vd.set_launch_command(app.and_then(|a| a.cmd.clone()));
|
||||
// Serialize with the punktfunk/1 plane's IDD-push setup dance (Goal-1 §2.5). A GameStream
|
||||
// connect used to skip it entirely, so it could ADD/reconfigure the shared monitor while a
|
||||
// native session was mid-build (and vice versa), and its sealed-channel delivery would replace
|
||||
// the native session's ring (newest-wins) — each plane could freeze the other. GameStream has
|
||||
// no cooperative stop-flag plumbing, so it registers a flag nobody reads: a LATER session that
|
||||
// preempts this one signals it, waits the 3 s release grace, then force-preempts the monitor —
|
||||
// this session then fails on capture and tears down cleanly (the intended handover).
|
||||
#[cfg(target_os = "windows")]
|
||||
let _idd_setup_guard = matches!(
|
||||
crate::session_plan::CaptureBackend::resolve(),
|
||||
crate::session_plan::CaptureBackend::IddPush
|
||||
)
|
||||
.then(|| {
|
||||
crate::vdisplay::manager::vdm().begin_idd_setup(std::sync::Arc::new(
|
||||
std::sync::atomic::AtomicBool::new(false),
|
||||
))
|
||||
});
|
||||
let vout = vd
|
||||
.create(punktfunk_core::Mode {
|
||||
width: cfg.width,
|
||||
|
||||
@@ -16,15 +16,20 @@
|
||||
// Every `unsafe` block in this file carries a `// SAFETY:` proof; enforce it (unsafe-proof program).
|
||||
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||
|
||||
use std::os::windows::io::{AsRawHandle, OwnedHandle};
|
||||
use std::os::windows::io::{AsRawHandle, FromRawHandle, OwnedHandle};
|
||||
use std::sync::atomic::{AtomicBool, AtomicU32, AtomicU64, Ordering};
|
||||
use std::sync::{Arc, Mutex, Once, OnceLock};
|
||||
use std::thread::{self, JoinHandle};
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use anyhow::Result;
|
||||
use windows::Win32::Foundation::{CloseHandle, HANDLE, LUID, WAIT_OBJECT_0};
|
||||
use windows::Win32::System::Threading::{OpenProcess, WaitForSingleObject, PROCESS_SYNCHRONIZE};
|
||||
use anyhow::{Context, Result};
|
||||
use windows::core::w;
|
||||
use windows::Win32::Foundation::{
|
||||
CloseHandle, GetLastError, ERROR_ALREADY_EXISTS, HANDLE, LUID, WAIT_OBJECT_0,
|
||||
};
|
||||
use windows::Win32::System::Threading::{
|
||||
CreateMutexW, OpenProcess, WaitForSingleObject, PROCESS_SYNCHRONIZE,
|
||||
};
|
||||
|
||||
use super::{Mode, VirtualOutput};
|
||||
use crate::win_display::{
|
||||
@@ -144,6 +149,9 @@ struct DeviceSlot {
|
||||
/// `CLEAR_ALL` (crashed-host orphan reap) runs only on the FIRST open of the process; a reopen
|
||||
/// races sessions this process still considers live and must not raze them.
|
||||
opened_once: bool,
|
||||
/// The cross-process single-instance mutex (`Global\punktfunk-vdisplay-manager`), acquired on
|
||||
/// the first open and held — never released — for the process lifetime.
|
||||
instance_guard: Option<OwnedHandle>,
|
||||
}
|
||||
|
||||
/// The host-lifetime virtual-display manager: the single owner of the monitor lifecycle.
|
||||
@@ -208,6 +216,33 @@ pub(crate) fn control_device_handle() -> Option<HANDLE> {
|
||||
/// next use reopens. The root `windows` error survives anyhow `.context` chains via `downcast_ref`.
|
||||
/// NOTE: 0x80070490 (ERROR_NOT_FOUND, the ADD slot-exhaustion wedge) is deliberately NOT here — it
|
||||
/// has its own reap-and-retry handling and the device is alive when it fires.
|
||||
/// The cross-process single-instance guard for pf-vdisplay management. A SECOND host process's
|
||||
/// first device open used to fire `IOCTL_CLEAR_ALL` and raze the live host's monitors mid-stream —
|
||||
/// an admin footgun (run `punktfunk-host serve` while the SCM service streams), masked afterwards
|
||||
/// because both processes' pings satisfy the shared driver watchdog. The named mutex makes the
|
||||
/// second process fail its vdisplay open LOUDLY instead. Held, never released, for the process
|
||||
/// lifetime; the OS reclaims it (and frees the name) when the process exits, however it exits.
|
||||
fn acquire_single_instance() -> Result<OwnedHandle> {
|
||||
// SAFETY: plain FFI create of a named mutex; the returned handle (checked by `?`) is solely
|
||||
// owned by the `OwnedHandle`, and `GetLastError` is read immediately after the create — the
|
||||
// documented ERROR_ALREADY_EXISTS protocol for pre-existing named objects.
|
||||
unsafe {
|
||||
let h = CreateMutexW(None, false, w!("Global\\punktfunk-vdisplay-manager"))
|
||||
.context("CreateMutexW(punktfunk-vdisplay single-instance guard)")?;
|
||||
let already = GetLastError() == ERROR_ALREADY_EXISTS;
|
||||
let owned = OwnedHandle::from_raw_handle(h.0 as _);
|
||||
if already {
|
||||
anyhow::bail!(
|
||||
"another punktfunk-host process is already managing pf-vdisplay on this machine — \
|
||||
refusing to touch the driver (a second manager's startup CLEAR_ALL would raze the \
|
||||
live host's monitors mid-stream). Stop the other instance (e.g. `punktfunk-host \
|
||||
service stop`) first."
|
||||
);
|
||||
}
|
||||
Ok(owned)
|
||||
}
|
||||
}
|
||||
|
||||
/// Best-effort "is this WUDFHost pid still alive?" — the monitor-liveness probe for the JOIN path.
|
||||
/// `OpenProcess` failing (pid reaped) or the process being signaled ⇒ dead. Pid reuse could
|
||||
/// theoretically alias a fresh process and read "alive"; the joining session then just retries into
|
||||
@@ -261,6 +296,9 @@ impl VirtualDisplayManager {
|
||||
return Ok(HANDLE(d.as_raw_handle()));
|
||||
}
|
||||
let reap = !slot.opened_once;
|
||||
if slot.instance_guard.is_none() {
|
||||
slot.instance_guard = Some(acquire_single_instance()?);
|
||||
}
|
||||
// SAFETY: `VdisplayDriver::open` is `unsafe` only because it issues SetupAPI + `DeviceIoControl`
|
||||
// FFI in the caller's apartment; the `device` mutex (held here) serializes it, so there is no
|
||||
// concurrent open. `open` has no handle precondition to uphold, and the `OwnedHandle` it
|
||||
@@ -770,35 +808,29 @@ impl VirtualDisplayManager {
|
||||
.name("vdisplay-linger".into())
|
||||
.spawn(move || loop {
|
||||
thread::sleep(Duration::from_millis(500));
|
||||
let due = {
|
||||
let g = self.state.lock().unwrap();
|
||||
matches!(&*g, MgrState::Lingering { until, .. } if Instant::now() >= *until)
|
||||
};
|
||||
if !due {
|
||||
continue;
|
||||
}
|
||||
let Some(dev) = self.device_handle() else {
|
||||
continue;
|
||||
};
|
||||
let taken = {
|
||||
let mut g = self.state.lock().unwrap();
|
||||
if matches!(&*g, MgrState::Lingering { until, .. } if Instant::now() >= *until) {
|
||||
if !matches!(&*g, MgrState::Lingering { until, .. } if Instant::now() >= *until)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
if let MgrState::Lingering { mon, .. } =
|
||||
std::mem::replace(&mut *g, MgrState::Idle)
|
||||
{
|
||||
Some(mon)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
};
|
||||
if let Some(mon) = taken {
|
||||
// SAFETY: `teardown` requires `dev` to be the live control handle; `dev` is from
|
||||
// `self.device_handle()` (the `Some` checked just above), i.e. the cached
|
||||
// `OwnedHandle` live for the process lifetime. `mon` was moved out of the
|
||||
// `Lingering` state under the `state` lock, so it is exclusively owned here.
|
||||
// Teardown UNDER the state lock. Dropping the lock first (the old shape) let a
|
||||
// concurrent `acquire` see Idle and run its ADD + CCD isolate while this
|
||||
// monitor's pinger-join / CCD-restore / REMOVE were still in flight — the late
|
||||
// restore then de-isolated (or the REMOVE churn-rejected) the fresh session at
|
||||
// the linger-expiry boundary. Holding the lock makes the racing acquire WAIT
|
||||
// the few teardown seconds instead of failing its session. Lock order stays
|
||||
// state → device (teardown's invalidate path), same as every other holder; the
|
||||
// pinger takes only the device lock — no inversion.
|
||||
// SAFETY: `teardown` requires a valid control handle; `dev` is from
|
||||
// `self.device_handle()` (cached handles are never closed — a dead one is
|
||||
// retired, kept alive; see `DeviceSlot`). `mon` was moved out of the replaced
|
||||
// state under the lock, so it is exclusively owned here.
|
||||
unsafe { self.teardown(dev, mon) };
|
||||
}
|
||||
})
|
||||
|
||||
Reference in New Issue
Block a user