diff --git a/crates/punktfunk-host/src/gamestream/stream.rs b/crates/punktfunk-host/src/gamestream/stream.rs index 50fb532..6e0ff79 100644 --- a/crates/punktfunk-host/src/gamestream/stream.rs +++ b/crates/punktfunk-host/src/gamestream/stream.rs @@ -286,13 +286,15 @@ fn open_gs_virtual_source( std::sync::atomic::AtomicBool::new(false), )) }); - let vout = vd - .create(punktfunk_core::Mode { + let vout = crate::vdisplay::registry::acquire( + &mut vd, + punktfunk_core::Mode { width: cfg.width, height: cfg.height, refresh_hz: cfg.fps, - }) - .context("create virtual output at client resolution")?; + }, + ) + .context("create virtual output at client resolution")?; // HDR: pass the negotiated `cfg.hdr` (client asked for HDR AND the host can deliver it). On the // Windows IDD-push path this proactively enables advanced color on the virtual display so a Main10 // PQ stream flows even from an SDR desktop; an already-HDR desktop streams PQ regardless (the diff --git a/crates/punktfunk-host/src/punktfunk1.rs b/crates/punktfunk-host/src/punktfunk1.rs index b0b593f..ef9f1ff 100644 --- a/crates/punktfunk-host/src/punktfunk1.rs +++ b/crates/punktfunk-host/src/punktfunk1.rs @@ -3508,7 +3508,11 @@ fn build_pipeline( bit_depth: u8, plan: crate::session_plan::SessionPlan, ) -> Result { - let vout = vd.create(mode).context("create virtual output")?; + // Acquire through the registry (design/display-management.md): on Linux this pools the display + // for keep-alive (reuse a kept one, or create + keep the backend's keepalive so it outlives the + // session per policy); on Windows it delegates to `vd.create` (the manager already leases). The + // returned `VirtualOutput`'s keepalive is a registry lease — the capturer holds it as before. + let vout = crate::vdisplay::registry::acquire(vd, mode).context("create virtual output")?; // The backend reports the refresh it actually achieved in `preferred_mode.2` (KWin may cap a // virtual output at 60 Hz if the custom-mode install was rejected). Pace the encoder + frame // clock to that, not the requested rate, so we don't emit phantom duplicate frames over a diff --git a/crates/punktfunk-host/src/vdisplay/registry.rs b/crates/punktfunk-host/src/vdisplay/registry.rs index 53cb189..42acf58 100644 --- a/crates/punktfunk-host/src/vdisplay/registry.rs +++ b/crates/punktfunk-host/src/vdisplay/registry.rs @@ -1,22 +1,34 @@ -//! Neutral **facade over the per-OS virtual-display lifecycle owners**, for the management API's -//! `/display/state` + `/display/release` (design: `design/display-management.md` §7). +//! Host-lifetime **virtual-display registry** (design: `design/display-management.md` §3/§7): the +//! owner of the display lifecycle, so a display can outlive the session that created it (keep-alive) +//! and the management API can list + release kept displays. //! -//! Windows already owns its display lifecycle in [`super::manager::VirtualDisplayManager`] (one -//! shared IddCx monitor, refcounted, lingering); this facade reads and controls it. Linux keep-alive -//! (a per-session output pool driven by [`super::lifecycle`]) lands in a following increment — it -//! needs on-glass validation on a GPU box, which the current headless VM can't provide — so until -//! then the Linux side reports no managed displays and release is a no-op. +//! **Windows** already owns its lifecycle in [`super::manager::VirtualDisplayManager`] (one shared +//! IddCx monitor, refcounted, lingering); [`acquire`] there is a pass-through to `vd.create` (the +//! manager does the leasing), and [`snapshot`]/[`release`] read/control it. //! -//! The lifecycle *state machine* ([`super::lifecycle::State`]) is the platform-neutral core both -//! sides converge on; Windows adopts it when its manager is refactored onto it (that unification is -//! deferred so the on-glass-validated Windows path stays untouched this stage). +//! **Linux** gains a per-session **pool** here, driven by the pure [`super::lifecycle`] machine. The +//! key enabling fact: KWin / Mutter / gamescope put their capture node on the *default* PipeWire +//! daemon (`VirtualOutput::remote_fd == None`), reachable by `node_id` alone — so keeping the +//! backend's keepalive alive keeps the node alive, and a reconnect just re-attaches a fresh PipeWire +//! consumer to the same `node_id`. No fd dup / re-open needed. wlroots (`remote_fd == Some`, the +//! sandboxed xdpw portal) can't be kept without re-opening the portal fd per attach, so it is passed +//! through unchanged (teardown-on-drop, today's behavior) until that fresh-portal-capture re-attach +//! lands — a runtime gate on `remote_fd.is_some()`. +//! +//! The ownership split: the session's capturer no longer owns the real keepalive — the registry does. +//! [`acquire`] hands the session a `VirtualOutput` whose `keepalive` is a lightweight, gen-stamped +//! `DisplayLease` (mirrors the Windows `MonitorLease`); dropping it releases the registry refcount, +//! and the lifecycle machine decides linger / teardown. `capture_virtual_output`'s signature is +//! unchanged — it just holds a lease instead of the real keepalive. + +use anyhow::Result; /// One live or kept virtual display, for the mgmt snapshot. #[derive(Clone, Debug)] pub struct DisplayInfo { - /// A stable-enough id for the `/display/release` slot argument (the backend's generation stamp). + /// A stable-enough id for the `/display/release` slot argument (the owner's generation stamp). pub slot: u64, - /// Backend name (`"pf-vdisplay"`, `"kwin"`, …). + /// Backend name (`"pf-vdisplay"`, `"kwin"`, `"mutter"`, …). pub backend: String, /// `(width, height, refresh_hz)`. pub mode: (u32, u32, u32), @@ -36,6 +48,27 @@ pub struct Snapshot { pub displays: Vec, } +/// Acquire a virtual display for a session: reuse a kept (lingering/pinned) display of the same +/// backend + mode if one exists, else create a fresh one. Returns a [`VirtualOutput`](super::VirtualOutput) +/// the capturer consumes as before — but its `keepalive` is a registry lease, so the *display* +/// outlives the capturer per the keep-alive policy. +/// +/// Windows delegates to the [`manager`](super::manager) via `vd.create` (unchanged); Linux uses the +/// pool below; other platforms pass through. +pub fn acquire( + vd: &mut Box, + mode: super::Mode, +) -> Result { + #[cfg(target_os = "linux")] + { + linux::acquire(vd, mode) + } + #[cfg(not(target_os = "linux"))] + { + vd.create(mode) + } +} + /// Snapshot the host's managed virtual displays. Cheap + side-effect-free (a state-lock read); /// safe per management request. pub fn snapshot() -> Snapshot { @@ -55,9 +88,14 @@ pub fn snapshot() -> Snapshot { .collect(); Snapshot { displays } } - #[cfg(not(target_os = "windows"))] + #[cfg(target_os = "linux")] + { + Snapshot { + displays: linux::snapshot(), + } + } + #[cfg(not(any(target_os = "windows", target_os = "linux")))] { - // Linux keep-alive pool: not yet (needs GPU-box validation) — no managed displays to report. Snapshot::default() } } @@ -66,15 +104,314 @@ pub fn snapshot() -> Snapshot { /// selects one by [`DisplayInfo::slot`]; `None` releases every kept display. Active displays are /// refused (releasing a display with live sessions is session management). Returns the number /// released. -pub fn release(_slot: Option) -> usize { +pub fn release(slot: Option) -> usize { #[cfg(target_os = "windows")] { // Windows manages a single shared monitor at Stage 1, so `slot` is moot — release the one // lingering monitor if present. (Multi-monitor gives `slot` meaning later.) + let _ = slot; usize::from(super::manager::force_release()) } - #[cfg(not(target_os = "windows"))] + #[cfg(target_os = "linux")] { + linux::force_release(slot) + } + #[cfg(not(any(target_os = "windows", target_os = "linux")))] + { + let _ = slot; 0 } } + +// --------------------------------------------------------------------------------------------- +// Linux keep-alive pool +// --------------------------------------------------------------------------------------------- + +#[cfg(target_os = "linux")] +mod linux { + use std::sync::atomic::{AtomicU64, Ordering}; + use std::sync::{Mutex, Once, OnceLock}; + use std::time::{Duration, Instant}; + + use anyhow::Result; + + use super::DisplayInfo; + use crate::vdisplay::lifecycle::{self, Acquire, Release}; + use crate::vdisplay::policy::{self, Linger}; + use crate::vdisplay::{Mode, VirtualDisplay, VirtualOutput}; + + /// One pooled display: the lifecycle state + the backend's REAL keepalive (kept alive here so the + /// compositor output — and thus its PipeWire `node_id` — survives past the session), plus the + /// capture coordinates a reconnecting session needs. + struct Entry { + life: lifecycle::State, + /// The backend's keepalive (KWin Wayland conn / Mutter D-Bus session / gamescope child). Its + /// `Drop` releases the compositor output — so it is dropped only on teardown/expiry. + keepalive: Box, + node_id: u32, + preferred_mode: Option<(u32, u32, u32)>, + mode: Mode, + backend: &'static str, + /// Generation stamp: a [`DisplayLease`] only releases if its gen still matches (a stale lease + /// — its entry was reused + re-stamped — is a no-op). + gen: u64, + } + + struct Reg { + entries: Mutex>, + gen: AtomicU64, + } + + static REG: OnceLock = OnceLock::new(); + + fn reg() -> &'static Reg { + REG.get_or_init(|| Reg { + entries: Mutex::new(Vec::new()), + gen: AtomicU64::new(1), + }) + } + + /// The linger resolution for Linux: the console policy's `keep_alive` when configured, else + /// **Immediate** (today's behavior — a Linux disconnect tears the output down at once). + fn linger() -> Linger { + policy::prefs() + .configured_effective() + .map(|e| e.keep_alive.linger()) + .unwrap_or(Linger::Immediate) + } + + /// Remove entries whose linger deadline has passed, returning them so the caller drops (tears + /// them down) *after* releasing the lock — a backend keepalive `Drop` (Mutter D-Bus Stop) can + /// block, and holding the pool lock across it would stall every other acquire/release. + fn take_expired(entries: &mut Vec, now: Instant) -> Vec { + let mut expired = Vec::new(); + let mut i = 0; + while i < entries.len() { + if entries[i].life.poll_expiry(now) { + expired.push(entries.remove(i)); + } else { + i += 1; + } + } + expired + } + + /// Background thread (started once): reap lingering displays past their deadline. + fn ensure_timer() { + static ONCE: Once = Once::new(); + ONCE.call_once(|| { + let _ = std::thread::Builder::new() + .name("vdisplay-linger".into()) + .spawn(|| loop { + std::thread::sleep(Duration::from_millis(500)); + let expired = { + let mut es = reg().entries.lock().unwrap(); + take_expired(&mut es, Instant::now()) + }; + for e in expired { + tracing::info!( + backend = e.backend, + "virtual display: linger expired — torn down" + ); + drop(e); // outside the lock + } + }); + }); + } + + /// Build the session-facing [`VirtualOutput`]: the kept node + a fresh gen-stamped lease. Only + /// the poolable (`remote_fd == None`) backends reach here, so `remote_fd` is always `None`. + fn output_for( + node_id: u32, + preferred_mode: Option<(u32, u32, u32)>, + gen: u64, + ) -> VirtualOutput { + VirtualOutput { + node_id, + remote_fd: None, + preferred_mode, + keepalive: Box::new(DisplayLease { gen }), + } + } + + pub(super) fn acquire(vd: &mut Box, mode: Mode) -> Result { + ensure_timer(); + let backend = vd.name(); + let r = reg(); + + // Reap expired first (drop outside the lock). + let expired = { + let mut es = r.entries.lock().unwrap(); + take_expired(&mut es, Instant::now()) + }; + drop(expired); + + // Reuse: a kept (lingering/pinned) display of the same backend + mode. A reconnecting session + // re-attaches a fresh PipeWire consumer to the still-live `node_id`. + { + let mut es = r.entries.lock().unwrap(); + if let Some(e) = es.iter_mut().find(|e| { + matches!( + e.life, + lifecycle::State::Lingering { .. } | lifecycle::State::Pinned + ) && e.backend == backend + && e.mode == mode + }) { + debug_assert_eq!(e.life.acquire(), Acquire::Reuse); + let gen = r.gen.fetch_add(1, Ordering::Relaxed); + e.gen = gen; + let out = output_for(e.node_id, e.preferred_mode, gen); + tracing::info!( + backend, + node_id = e.node_id, + "virtual display reused (keep-alive reconnect)" + ); + return Ok(out); + } + } + + // Create a fresh display (NOT under the lock — `vd.create` blocks + spawns threads). + let real = vd.create(mode)?; + + // wlroots (remote_fd = Some, sandboxed xdpw portal) can't be kept without re-opening the + // portal fd per attach — pass it through unchanged (capturer owns it, teardown on drop). The + // poolable backends put their node on the default daemon (remote_fd = None). + if real.remote_fd.is_some() { + tracing::debug!( + backend, + "virtual display not poolable (portal fd) — keep-alive off for this backend" + ); + return Ok(real); + } + + let node_id = real.node_id; + let preferred_mode = real.preferred_mode; + let gen = r.gen.fetch_add(1, Ordering::Relaxed); + let mut life = lifecycle::State::default(); + debug_assert_eq!(life.acquire(), Acquire::Create); + let entry = Entry { + life, + keepalive: real.keepalive, + node_id, + preferred_mode, + mode, + backend, + gen, + }; + r.entries.lock().unwrap().push(entry); + Ok(output_for(node_id, preferred_mode, gen)) + } + + /// The [`DisplayLease`] `Drop` path: release the session's hold on the pooled display. The + /// lifecycle machine decides linger / pin / teardown; a torn-down entry's keepalive drops *after* + /// the lock is released. + fn release(gen: u64) { + let Some(r) = REG.get() else { return }; + let linger = linger(); + let torn_down = { + let mut es = r.entries.lock().unwrap(); + let Some(idx) = es.iter().position(|e| e.gen == gen) else { + return; // stale lease (entry reused + re-stamped, or already gone) — no-op + }; + match es[idx].life.release(Instant::now(), linger) { + Release::Teardown | Release::Noop => Some(es.remove(idx)), + Release::Linger => { + tracing::info!( + backend = es[idx].backend, + "virtual display: last session left — lingering (keep-alive)" + ); + None + } + Release::Pin => { + tracing::info!( + backend = es[idx].backend, + "virtual display: last session left — pinned (keep-alive forever)" + ); + None + } + // Linux entries are single-session (refs == 1), so Decref never occurs; harmless. + Release::Decref => None, + } + }; + if let Some(e) = torn_down { + tracing::info!( + backend = e.backend, + "virtual display torn down (keep-alive off / released)" + ); + drop(e); // outside the lock — the keepalive Drop may block + } + } + + pub(super) fn snapshot() -> Vec { + let Some(r) = REG.get() else { + return Vec::new(); + }; + let now = Instant::now(); + r.entries + .lock() + .unwrap() + .iter() + .filter_map(|e| { + let (state, expires_in_ms, sessions) = match e.life { + lifecycle::State::Active { refs } => ("active", None, refs), + lifecycle::State::Lingering { until } => ( + "lingering", + Some(until.saturating_duration_since(now).as_millis() as u64), + 0, + ), + lifecycle::State::Pinned => ("pinned", None, 0), + // Idle entries are never stored (removed on teardown). + lifecycle::State::Idle => return None, + }; + Some(DisplayInfo { + slot: e.gen, + backend: e.backend.to_string(), + mode: (e.mode.width, e.mode.height, e.mode.refresh_hz), + state: state.to_string(), + expires_in_ms, + sessions, + client: None, + }) + }) + .collect() + } + + pub(super) fn force_release(slot: Option) -> usize { + let Some(r) = REG.get() else { return 0 }; + let released = { + let mut es = r.entries.lock().unwrap(); + let mut out = Vec::new(); + let mut i = 0; + while i < es.len() { + let selected = slot.is_none_or(|s| es[i].gen == s); + if selected && es[i].life.force_release() { + out.push(es.remove(i)); + } else { + i += 1; + } + } + out + }; + let n = released.len(); + for e in released { + tracing::info!( + backend = e.backend, + "virtual display released (mgmt /display/release)" + ); + drop(e); + } + n + } + + /// The session's refcount handle — the `keepalive` the capturer holds. `Drop` releases the + /// registry hold; a stale lease (its entry was reused + re-stamped, or torn down) is a no-op. + struct DisplayLease { + gen: u64, + } + + impl Drop for DisplayLease { + fn drop(&mut self) { + release(self.gen); + } + } +}