//! The dedicated video render thread: decoded frames flow session pump → bounded channel → here → //! `Presenter::present`. Presenting off the XAML thread means UI jank (layout, input, dialogs) //! never stalls video, and a filled present queue never blocks the UI thread — the two failure //! modes of the old present-from-`on_rendering` design. //! //! Pacing: block on the channel (the host paces the stream), then on the swapchain's //! frame-latency waitable (≤1 queued present — see `present.rs`), then drain to the NEWEST frame //! so a stream faster than the display drops backlog before any GPU work. The UI thread only //! writes panel size/DPI into [`RenderShared`] atomics; the loop applies them before the next //! draw (and redraws the held frame after a resize — fresh back buffers are blank). use crate::present::Presenter; use crate::session::{FrameRx, FrameTimes}; use crossbeam_channel::RecvTimeoutError; use std::sync::atomic::{AtomicBool, AtomicU32, AtomicU64, Ordering}; use std::sync::Arc; use std::time::{Duration, Instant}; /// The last 1-second render window, published for the HUD (one render thread at a time): /// presents/s, frames skipped by the newest-wins drain, the end-to-end (capture→on-glass) /// p50/p95 and the `display` stage (decoded→displayed) p50, all stamped post-`Present()`, in µs. /// Zeroed when a render thread starts so a new session never shows the previous one's numbers. static PRESENT_FPS: AtomicU32 = AtomicU32::new(0); static PRESENT_SKIPPED: AtomicU32 = AtomicU32::new(0); static E2E_P50_US: AtomicU64 = AtomicU64::new(0); static E2E_P95_US: AtomicU64 = AtomicU64::new(0); static DISPLAY_P50_US: AtomicU64 = AtomicU64::new(0); /// The last render window's glass-side numbers (see the statics above) — the HUD's headline /// (end-to-end) and trailing stage (display) come from here. #[derive(Clone, Copy, Default, PartialEq)] pub struct PresentStats { /// Presents per second (includes resize redraws of a held frame). pub fps: u32, /// Frames dropped by the newest-wins drain this window (client-side pacing skips). pub skipped: u32, /// End-to-end capture→displayed p50, ms (host-clock corrected, measured directly). pub e2e_p50_ms: f32, /// End-to-end capture→displayed p95, ms. pub e2e_p95_ms: f32, /// `display` stage p50, ms: decoded → displayed, single-clock client-local. pub display_p50_ms: f32, } pub fn present_stats() -> PresentStats { PresentStats { fps: PRESENT_FPS.load(Ordering::Relaxed), skipped: PRESENT_SKIPPED.load(Ordering::Relaxed), e2e_p50_ms: E2E_P50_US.load(Ordering::Relaxed) as f32 / 1000.0, e2e_p95_ms: E2E_P95_US.load(Ordering::Relaxed) as f32 / 1000.0, display_p50_ms: DISPLAY_P50_US.load(Ordering::Relaxed) as f32 / 1000.0, } } /// UI-thread → render-thread state. Size is packed into ONE atomic (w<<32|h) so a resize never /// tears into a (new-width, old-height) pair. pub struct RenderShared { size_px: AtomicU64, dpi: AtomicU32, stop: AtomicBool, } impl RenderShared { pub fn new(width: u32, height: u32, dpi: u32) -> Arc { Arc::new(RenderShared { size_px: AtomicU64::new(pack(width, height)), dpi: AtomicU32::new(dpi), stop: AtomicBool::new(false), }) } pub fn set_size(&self, width: u32, height: u32) { self.size_px.store(pack(width, height), Ordering::Relaxed); } pub fn set_dpi(&self, dpi: u32) { self.dpi.store(dpi, Ordering::Relaxed); } fn snapshot(&self) -> (u32, u32, u32) { let s = self.size_px.load(Ordering::Relaxed); ((s >> 32) as u32, s as u32, self.dpi.load(Ordering::Relaxed)) } } fn pack(w: u32, h: u32) -> u64 { ((w as u64) << 32) | h as u64 } /// Handle owned by the stream page; stops + joins the thread on unmount (and on drop, so a /// navigation away can't leak a presenting thread). pub struct RenderThread { shared: Arc, join: Option>, } impl RenderThread { pub fn shared(&self) -> &Arc { &self.shared } pub fn stop_and_join(&mut self) { self.shared.stop.store(true, Ordering::SeqCst); if let Some(j) = self.join.take() { let _ = j.join(); } } } impl Drop for RenderThread { fn drop(&mut self) { self.stop_and_join(); } } /// Moves the presenter (COM interfaces, `!Send` by default) onto the render thread. Sound here: /// the shared device + immediate context are multithread-protected (see `crate::gpu`), D3D/DXGI /// objects are apartment-agile, and after this one handoff the swapchain/RTV/context calls happen /// on exactly the render thread — the same single-owner discipline as `SharedDevice`. struct SendPresenter(Presenter); unsafe impl Send for SendPresenter {} /// Spawn the render thread. `frames` carries `(frame, FrameTimes)`; `clock_offset_ns` maps our /// wall clock onto the host's so the end-to-end (capture→on-glass) number is cross-machine valid /// (same math as the pump's host+network stage). pub fn spawn( presenter: Presenter, frames: FrameRx, shared: Arc, clock_offset_ns: i64, ) -> RenderThread { let boxed = SendPresenter(presenter); let shared_w = shared.clone(); let join = std::thread::Builder::new() .name("pf-render".into()) .spawn(move || run(boxed, frames, shared_w, clock_offset_ns)) .expect("spawn render thread"); RenderThread { shared, join: Some(join), } } fn now_ns() -> u64 { std::time::SystemTime::now() .duration_since(std::time::UNIX_EPOCH) .map(|d| d.as_nanos() as u64) .unwrap_or(0) } /// The window DPI, polled ~1 Hz as belt-and-braces for a monitor move that changes DPI without a /// `SizeChanged` (same DIP size on both screens). `None` when the window isn't up (headless). fn poll_window_dpi() -> Option { use windows::Win32::UI::HiDpi::GetDpiForWindow; use windows::Win32::UI::WindowsAndMessaging::FindWindowW; unsafe { let hwnd = FindWindowW(None, windows::core::w!("Punktfunk")).ok()?; match GetDpiForWindow(hwnd) { 0 => None, d => Some(d), } } } fn run(presenter: SendPresenter, frames: FrameRx, shared: Arc, clock_offset_ns: i64) { let mut p = presenter.0; let mut applied = (0u32, 0u32, 0u32); // last (w, h, dpi) handed to the presenter let mut presented = 0u32; let mut dropped = 0u32; // 1 s tumbling windows: end-to-end (capture→displayed) and the display stage // (decoded→displayed), sampled post-Present. Percentiles only (spec: stats-unification.md). let mut e2e_us: Vec = Vec::with_capacity(256); let mut display_us: Vec = Vec::with_capacity(256); let mut window_start = Instant::now(); let mut last_dpi_poll = Instant::now(); PRESENT_FPS.store(0, Ordering::Relaxed); PRESENT_SKIPPED.store(0, Ordering::Relaxed); E2E_P50_US.store(0, Ordering::Relaxed); E2E_P95_US.store(0, Ordering::Relaxed); DISPLAY_P50_US.store(0, Ordering::Relaxed); loop { if shared.stop.load(Ordering::SeqCst) { break; } let first = match frames.recv_timeout(Duration::from_millis(50)) { Ok(f) => Some(f), Err(RecvTimeoutError::Timeout) => None, Err(RecvTimeoutError::Disconnected) => break, }; if last_dpi_poll.elapsed() >= Duration::from_secs(1) { last_dpi_poll = Instant::now(); if let Some(dpi) = poll_window_dpi() { shared.set_dpi(dpi); } } let snap = shared.snapshot(); let resized = snap != applied && snap.0 > 0 && snap.1 > 0; if resized { p.resize(snap.0, snap.1, snap.2); applied = snap; } if first.is_none() && !resized { continue; // nothing new to show — don't burn GPU re-presenting a static frame } // Throttle to the compositor: with ≤1 present outstanding this returns as DWM frees a // slot, and frames decoded meanwhile are drained below so the newest is what's drawn. if !p.wait_present_slot(1000) { tracing::debug!("frame-latency waitable timed out — presenting anyway"); } let mut newest = first; while let Ok(f) = frames.try_recv() { if newest.is_some() { dropped += 1; } newest = Some(f); } // The session pump is the sole 0xCE consumer and stashes the latest here (rare updates). if let Some(meta) = *crate::present::LATEST_HDR_META.lock().unwrap() { p.set_hdr_metadata(meta); } let times: Option = newest.as_ref().map(|(_, t)| *t); p.present(newest.map(|(f, _)| f)); presented += 1; if let Some(t) = times { // The `displayed` point: post-Present() on this thread (the honest best-effort // presentation instant on Windows — endpoint label `capture→on-glass`). let displayed_ns = now_ns(); // End-to-end = capture → displayed, host-clock corrected, measured directly // (never the sum of stage percentiles). Clamped (0, 10 s). let e2e = (displayed_ns as i128 + clock_offset_ns as i128 - t.pts_ns as i128).max(0) as u64; if e2e > 0 && e2e < 10_000_000_000 { e2e_us.push(e2e / 1000); } // `display` stage = decoded → displayed, single-clock client-local. let disp = displayed_ns.saturating_sub(t.decoded_ns); if disp < 10_000_000_000 { display_us.push(disp / 1000); } } if window_start.elapsed() >= Duration::from_secs(1) { e2e_us.sort_unstable(); display_us.sort_unstable(); let p50 = |v: &[u64]| v.get(v.len() / 2).copied().unwrap_or(0); // p95 = sorted[min(len*95/100, len-1)] — the empty-window case falls to 0 via `get`. let p95 = |v: &[u64]| { v.get((v.len() * 95 / 100).min(v.len().saturating_sub(1))) .copied() .unwrap_or(0) }; tracing::debug!( presented, dropped, e2e_p50_us = p50(&e2e_us), e2e_p95_us = p95(&e2e_us), display_p50_us = p50(&display_us), "render window" ); PRESENT_FPS.store(presented, Ordering::Relaxed); PRESENT_SKIPPED.store(dropped, Ordering::Relaxed); E2E_P50_US.store(p50(&e2e_us), Ordering::Relaxed); E2E_P95_US.store(p95(&e2e_us), Ordering::Relaxed); DISPLAY_P50_US.store(p50(&display_us), Ordering::Relaxed); window_start = Instant::now(); presented = 0; dropped = 0; e2e_us.clear(); display_us.clear(); } } tracing::info!("render thread exiting"); }