feat(clients): unified stats vocabulary across every client + Moonlight comparison docs
One stat model everywhere (design/stats-unification.md): four measurement points (capture/received/decoded/displayed), three stages that tile the interval exactly, and a HUD that shows the addition explicitly — end-to-end 14.2 ms p50 · 19.8 p95 · capture→on-glass = host+network 9.8 + decode 2.1 + display 2.3 replacing each client's ad-hoc mix of overlapping absolutes (the Apple HUD's three arrow lines that looked sequential but weren't), mean-vs-median decode times (Windows/Linux), missing same-host-clock flags (Windows/Linux), and three different names for the same capture→received measurement (probe's "reassembled", Apple/Android's "client", Windows/Linux's post-decode "lat"). Per client: Apple threads receivedNs through the VT decode via the frame refcon bit pattern so the decode stage exists at all (stage-1 fallback honestly degrades to a capture→received headline); Windows carries FrameTimes through the existing frame channel to the render thread and adds e2e p50/p95 post-Present; Linux stamps received at AU pop and rides decoded_ns on DecodedFrame to the paintable-set site; Android pairs receipt stamps with MediaCodec output buffers via the codec's pts round-trip (JNI stats array 14→16 doubles, indexes 0-13 unchanged). fps now uniformly counts received AUs; lost/(received+lost) per window, hidden at zero. docs-site gains "Understanding the Stats Overlay": what each line means, why the equation only approximately sums (percentiles), and a line-by-line Moonlight/Sunshine matrix — including that Moonlight has no end-to-end number and its "network latency" is an ENet control RTT, so punktfunk's headline must not be compared against any single Moonlight line. Verified here: linux client + probe + core check/clippy/fmt green, android native cargo-ndk arm64 check green. Pending: Windows CI + on-glass, swift test on the mac, on-device Android. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
@@ -45,18 +45,40 @@ pub struct SessionParams {
|
||||
pub connect_timeout: Duration,
|
||||
}
|
||||
|
||||
/// The session pump's share of the unified stats window (design/stats-unification.md):
|
||||
/// stream facts plus the two stages measured before the presenter. The frame consumer in
|
||||
/// `ui_stream` contributes the `display` stage and the end-to-end percentiles.
|
||||
#[derive(Clone, Copy, Default)]
|
||||
pub struct Stats {
|
||||
/// AUs received (reassembled) per second, actual-elapsed-time denominator.
|
||||
pub fps: f32,
|
||||
/// Received payload bytes × 8 / elapsed (goodput, excludes FEC overhead).
|
||||
pub mbps: f32,
|
||||
/// p50 `host+network` stage: capture → received, host-clock corrected (ms).
|
||||
pub host_net_ms: f32,
|
||||
/// p50 `decode` stage: received → decoded, single-clock client-local (ms).
|
||||
pub decode_ms: f32,
|
||||
/// Median capture→decoded latency over the last window (host-clock corrected).
|
||||
pub latency_ms: f32,
|
||||
/// Unrecoverable network frame drops this window, and their share of
|
||||
/// received+lost (%). The OSD renders the counter line only when nonzero.
|
||||
pub lost: u32,
|
||||
pub lost_pct: f32,
|
||||
/// The decode path frames actually took this window (`"vaapi"`/`"software"`, empty
|
||||
/// until the first frame) — the OSD's trailing tag; tracks a mid-session fallback.
|
||||
pub decoder: &'static str,
|
||||
}
|
||||
|
||||
/// Sort a window of µs samples in place and return `(p50, p95)` per the spec's index
|
||||
/// rules (`sorted[len/2]`, `sorted[min(len*95/100, len-1)]`); an empty window reads 0.
|
||||
pub fn window_percentiles(samples: &mut [u64]) -> (u64, u64) {
|
||||
if samples.is_empty() {
|
||||
return (0, 0);
|
||||
}
|
||||
samples.sort_unstable();
|
||||
let p50 = samples[samples.len() / 2];
|
||||
let p95 = samples[(samples.len() * 95 / 100).min(samples.len() - 1)];
|
||||
(p50, p95)
|
||||
}
|
||||
|
||||
pub enum SessionEvent {
|
||||
Connected {
|
||||
connector: Arc<NativeClient>,
|
||||
@@ -219,13 +241,17 @@ fn pump(
|
||||
let mut window_start = Instant::now();
|
||||
let mut frames_n = 0u32;
|
||||
let mut bytes_n = 0u64;
|
||||
let mut decode_us_sum = 0u64;
|
||||
let mut lat_us: Vec<u64> = Vec::with_capacity(256);
|
||||
// Stage windows (µs samples): `host+network` = capture→received (host-clock
|
||||
// corrected), `decode` = received→decoded (client-local). p50 per 1 s window.
|
||||
let mut hostnet_us: Vec<u64> = Vec::with_capacity(256);
|
||||
let mut decode_us: Vec<u64> = Vec::with_capacity(256);
|
||||
// What actually decoded the last frame — a VAAPI failure demotes mid-session, so
|
||||
// this is read off each frame's image variant rather than fixed at startup.
|
||||
let mut dec_path: &'static str = "";
|
||||
// Loss recovery: watch the host→client unrecoverable-drop count and ask for an IDR when it climbs.
|
||||
let mut last_dropped = connector.frames_dropped();
|
||||
// The stats window keeps its own drop cursor — the OSD shows the per-window delta.
|
||||
let mut window_dropped = last_dropped;
|
||||
let mut last_kf_req: Option<Instant> = None;
|
||||
|
||||
let end: Option<String> = loop {
|
||||
@@ -237,7 +263,11 @@ fn pump(
|
||||
// every ~8–16 ms at 60–120 Hz anyway, so this rarely times out mid-stream).
|
||||
match connector.next_frame(Duration::from_millis(20)) {
|
||||
Ok(frame) => {
|
||||
let t0 = Instant::now();
|
||||
// The `received` point: AU fully reassembled, in hand, before decode.
|
||||
let received_ns = now_ns();
|
||||
// fps / goodput count every received AU (spec), decoded or not.
|
||||
frames_n += 1;
|
||||
bytes_n += frame.data.len() as u64;
|
||||
match decoder.decode(&frame.data) {
|
||||
Ok(Some(image)) => {
|
||||
total_frames += 1;
|
||||
@@ -252,18 +282,21 @@ fn pump(
|
||||
};
|
||||
tracing::info!(width = w, height = h, path, "first frame decoded");
|
||||
}
|
||||
// Latency: our wall clock expressed in the host's capture clock,
|
||||
// minus the host-stamped capture pts (same math as client-rs).
|
||||
let lat = (now_ns() as i128 + clock_offset as i128 - frame.pts_ns as i128)
|
||||
// The `decoded` point — travels with the frame so the presenter
|
||||
// can measure its `display` stage against it.
|
||||
let decoded_ns = now_ns();
|
||||
// `host+network` stage: received expressed in the host's capture
|
||||
// clock, minus the host-stamped capture pts (clamped (0, 10 s)).
|
||||
let hn = (received_ns as i128 + clock_offset as i128 - frame.pts_ns as i128)
|
||||
.max(0) as u64;
|
||||
if lat > 0 && lat < 10_000_000_000 {
|
||||
lat_us.push(lat / 1000);
|
||||
if hn > 0 && hn < 10_000_000_000 {
|
||||
hostnet_us.push(hn / 1000);
|
||||
}
|
||||
decode_us_sum += t0.elapsed().as_micros() as u64;
|
||||
frames_n += 1;
|
||||
bytes_n += frame.data.len() as u64;
|
||||
// `decode` stage: received→decoded, single clock, no skew.
|
||||
decode_us.push(decoded_ns.saturating_sub(received_ns) / 1000);
|
||||
let _ = frame_tx.force_send(DecodedFrame {
|
||||
pts_ns: frame.pts_ns,
|
||||
decoded_ns,
|
||||
image,
|
||||
});
|
||||
}
|
||||
@@ -295,30 +328,36 @@ fn pump(
|
||||
|
||||
if window_start.elapsed() >= Duration::from_secs(1) {
|
||||
let secs = window_start.elapsed().as_secs_f32();
|
||||
lat_us.sort_unstable();
|
||||
let p50 = lat_us.get(lat_us.len() / 2).copied().unwrap_or(0);
|
||||
let (hn_p50, _) = window_percentiles(&mut hostnet_us);
|
||||
let (dec_p50, _) = window_percentiles(&mut decode_us);
|
||||
let lost = dropped.saturating_sub(window_dropped) as u32;
|
||||
window_dropped = dropped;
|
||||
tracing::debug!(
|
||||
fps = frames_n,
|
||||
lat_p50_us = p50,
|
||||
hostnet_p50_us = hn_p50,
|
||||
decode_p50_us = dec_p50,
|
||||
lost,
|
||||
total_frames,
|
||||
"stream window"
|
||||
);
|
||||
let _ = ev_tx.try_send(SessionEvent::Stats(Stats {
|
||||
fps: frames_n as f32 / secs,
|
||||
mbps: bytes_n as f32 * 8.0 / 1e6 / secs,
|
||||
decode_ms: if frames_n > 0 {
|
||||
decode_us_sum as f32 / frames_n as f32 / 1000.0
|
||||
host_net_ms: hn_p50 as f32 / 1000.0,
|
||||
decode_ms: dec_p50 as f32 / 1000.0,
|
||||
lost,
|
||||
lost_pct: if lost > 0 {
|
||||
lost as f32 * 100.0 / (frames_n + lost) as f32
|
||||
} else {
|
||||
0.0
|
||||
},
|
||||
latency_ms: p50 as f32 / 1000.0,
|
||||
decoder: dec_path,
|
||||
}));
|
||||
window_start = Instant::now();
|
||||
frames_n = 0;
|
||||
bytes_n = 0;
|
||||
decode_us_sum = 0;
|
||||
lat_us.clear();
|
||||
hostnet_us.clear();
|
||||
decode_us.clear();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -31,33 +31,63 @@ use std::time::{Duration, Instant};
|
||||
pub struct StreamPage {
|
||||
pub page: adw::NavigationPage,
|
||||
stats_label: gtk::Label,
|
||||
/// Median capture→paintable-set latency (ms) over the frame consumer's last 1 s
|
||||
/// window — written there, folded into the OSD on each `Stats` event.
|
||||
present_ms: Rc<Cell<f32>>,
|
||||
/// The frame consumer's share of the stats window (end-to-end percentiles + the
|
||||
/// `display` stage) — written there each 1 s window, folded into the OSD on each
|
||||
/// `Stats` event.
|
||||
presented: Rc<PresentedStats>,
|
||||
/// The stream is HDR (PQ) right now — set by the frame consumer from each frame's
|
||||
/// signaling (the host can flip SDR↔HDR mid-session, in-band).
|
||||
hdr: Rc<Cell<bool>>,
|
||||
/// `clock_offset_ns == 0`: the skew handshake didn't run (or same host) — the
|
||||
/// end-to-end line carries the `(same-host clock)` flag (spec clock rules).
|
||||
same_host: bool,
|
||||
/// `W×H@Hz` for the OSD's first line — fixed at connect, per-session.
|
||||
mode_line: String,
|
||||
}
|
||||
|
||||
/// Presenter-side window results (design/stats-unification.md): end-to-end =
|
||||
/// capture→displayed measured directly (p50 + p95), `display` stage = decoded→displayed
|
||||
/// p50. All ms, refreshed once per 1 s window by the frame consumer.
|
||||
#[derive(Default)]
|
||||
struct PresentedStats {
|
||||
e2e_p50_ms: Cell<f32>,
|
||||
e2e_p95_ms: Cell<f32>,
|
||||
display_ms: Cell<f32>,
|
||||
}
|
||||
|
||||
impl StreamPage {
|
||||
/// Render the canonical unified-stats OSD (design/stats-unification.md — Linux
|
||||
/// endpoint is paintable-set, headline reads `capture→displayed`).
|
||||
pub fn update_stats(&self, s: Stats) {
|
||||
let mut line = format!(
|
||||
"{:.0} fps · {:.1} Mbit/s · dec {:.1} ms · lat {:.1} ms · present {:.1} ms",
|
||||
s.fps,
|
||||
s.mbps,
|
||||
s.decode_ms,
|
||||
s.latency_ms,
|
||||
self.present_ms.get()
|
||||
);
|
||||
let mut line1 = format!("{} · {:.0} fps · {:.1} Mb/s", self.mode_line, s.fps, s.mbps);
|
||||
// Which decoder actually ran this window (vaapi/software) — tracks a fallback.
|
||||
if !s.decoder.is_empty() {
|
||||
line.push_str(" · ");
|
||||
line.push_str(s.decoder);
|
||||
line1.push_str(" · ");
|
||||
line1.push_str(s.decoder);
|
||||
}
|
||||
if self.hdr.get() {
|
||||
line.push_str(" · HDR");
|
||||
line1.push_str(" · HDR");
|
||||
}
|
||||
self.stats_label.set_text(&line);
|
||||
let mut text = format!(
|
||||
"{line1}\n\
|
||||
end-to-end {:.1} ms p50 · {:.1} p95 · capture→displayed{}\n\
|
||||
= host+network {:.1} + decode {:.1} + display {:.1}",
|
||||
self.presented.e2e_p50_ms.get(),
|
||||
self.presented.e2e_p95_ms.get(),
|
||||
if self.same_host {
|
||||
" (same-host clock)"
|
||||
} else {
|
||||
""
|
||||
},
|
||||
s.host_net_ms,
|
||||
s.decode_ms,
|
||||
self.presented.display_ms.get(),
|
||||
);
|
||||
// Counters — only rendered when nonzero this window.
|
||||
if s.lost > 0 {
|
||||
text.push_str(&format!("\nlost {} ({:.1}%)", s.lost, s.lost_pct));
|
||||
}
|
||||
self.stats_label.set_text(&text);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -206,6 +236,13 @@ pub fn new(args: StreamPageArgs) -> StreamPage {
|
||||
let w = build_widgets(&window, &title, chromeless, pad_connected);
|
||||
w.stats_label.set_visible(show_stats);
|
||||
|
||||
// OSD line-1 facts, fixed for the session (the mode is negotiated per-session).
|
||||
let mode = connector.mode();
|
||||
let mode_line = format!("{}×{}@{}", mode.width, mode.height, mode.refresh_hz);
|
||||
// Offset 0 = the host didn't answer the skew handshake / same host — flagged on the
|
||||
// end-to-end line so an uncorrected cross-machine number is never shown silently.
|
||||
let same_host = clock_offset_ns == 0;
|
||||
|
||||
let capture = Rc::new(Capture {
|
||||
connector,
|
||||
window: window.clone(),
|
||||
@@ -218,13 +255,13 @@ pub fn new(args: StreamPageArgs) -> StreamPage {
|
||||
held_buttons: RefCell::new(HashSet::new()),
|
||||
});
|
||||
|
||||
let present_ms = Rc::new(Cell::new(0.0f32));
|
||||
let presented = Rc::new(PresentedStats::default());
|
||||
let hdr = Rc::new(Cell::new(false));
|
||||
spawn_frame_consumer(
|
||||
&w.picture,
|
||||
frames,
|
||||
clock_offset_ns,
|
||||
present_ms.clone(),
|
||||
presented.clone(),
|
||||
hdr.clone(),
|
||||
);
|
||||
attach_keyboard(&w.overlay, &window, &capture, &stop, &w.stats_label);
|
||||
@@ -248,8 +285,10 @@ pub fn new(args: StreamPageArgs) -> StreamPage {
|
||||
StreamPage {
|
||||
page: w.page,
|
||||
stats_label: w.stats_label,
|
||||
present_ms,
|
||||
presented,
|
||||
hdr,
|
||||
same_host,
|
||||
mode_line,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -456,12 +495,13 @@ fn attach_edge_reveal(
|
||||
/// then draws whatever paintable is current on its own frame clock. Ends itself when the
|
||||
/// channel closes or the picture is gone.
|
||||
///
|
||||
/// Also the capture→present-ish measurement point: at each paintable set the frame's
|
||||
/// host capture pts is compared against the local wall clock expressed in the host clock
|
||||
/// (`clock_offset_ns`, same math as the session's decode latency). This is
|
||||
/// capture→paintable-SET — GTK's own present adds one compositor cycle after this. The
|
||||
/// 1 s p50 lands on the stats OSD (via `present_ms`) and in a "present window" debug
|
||||
/// line for headless validation.
|
||||
/// Also the `displayed` measurement point (design/stats-unification.md): each paintable
|
||||
/// set stamps the local wall clock, yielding end-to-end = capture→displayed (host-clock
|
||||
/// corrected via `clock_offset_ns`, p50+p95, measured directly) and the client-local
|
||||
/// `display` stage = decoded→displayed. This is capture→paintable-SET — GTK's own
|
||||
/// present adds one compositor cycle after this. The 1 s window results land on the
|
||||
/// stats OSD (via `PresentedStats`) and in a "present window" debug line for headless
|
||||
/// validation.
|
||||
/// One-entry cache of `ColorDesc` → `GdkColorState` (signaling changes at most on an
|
||||
/// SDR↔HDR flip, never per frame).
|
||||
#[derive(Default)]
|
||||
@@ -516,7 +556,7 @@ fn spawn_frame_consumer(
|
||||
picture: >k::Picture,
|
||||
frames: async_channel::Receiver<DecodedFrame>,
|
||||
clock_offset_ns: i64,
|
||||
present_ms: Rc<Cell<f32>>,
|
||||
presented_stats: Rc<PresentedStats>,
|
||||
hdr: Rc<Cell<bool>>,
|
||||
) {
|
||||
let picture = picture.downgrade();
|
||||
@@ -528,7 +568,10 @@ fn spawn_frame_consumer(
|
||||
let mut yuv_state = ColorStateCache::default();
|
||||
let mut rgb_state = ColorStateCache::default();
|
||||
glib::spawn_future_local(async move {
|
||||
let mut win_lat_us: Vec<u64> = Vec::with_capacity(256);
|
||||
// Window samples (µs): end-to-end capture→displayed (host-clock corrected) and
|
||||
// the client-local display stage decoded→displayed.
|
||||
let mut win_e2e_us: Vec<u64> = Vec::with_capacity(256);
|
||||
let mut win_disp_us: Vec<u64> = Vec::with_capacity(256);
|
||||
let mut win_start = Instant::now();
|
||||
while let Ok(f) = frames.recv().await {
|
||||
let Some(picture) = picture.upgrade() else {
|
||||
@@ -601,26 +644,34 @@ fn spawn_frame_consumer(
|
||||
}
|
||||
}
|
||||
}
|
||||
// Capture→paintable-set latency, host-clock corrected (same math and sanity
|
||||
// bound as the session's decode-latency window).
|
||||
// The `displayed` stamp: end-to-end = capture→displayed host-clock corrected
|
||||
// (same clamp as the session's stage windows); display = decoded→displayed,
|
||||
// single clock, no skew.
|
||||
if presented {
|
||||
let lat = (crate::session::now_ns() as i128 + clock_offset_ns as i128
|
||||
- f.pts_ns as i128)
|
||||
.max(0) as u64;
|
||||
if lat > 0 && lat < 10_000_000_000 {
|
||||
win_lat_us.push(lat / 1000);
|
||||
let displayed_ns = crate::session::now_ns();
|
||||
let e2e = (displayed_ns as i128 + clock_offset_ns as i128 - f.pts_ns as i128).max(0)
|
||||
as u64;
|
||||
if e2e > 0 && e2e < 10_000_000_000 {
|
||||
win_e2e_us.push(e2e / 1000);
|
||||
}
|
||||
win_disp_us.push(displayed_ns.saturating_sub(f.decoded_ns) / 1000);
|
||||
}
|
||||
if win_start.elapsed() >= Duration::from_secs(1) {
|
||||
win_lat_us.sort_unstable();
|
||||
let p50 = win_lat_us.get(win_lat_us.len() / 2).copied().unwrap_or(0);
|
||||
let frames = win_e2e_us.len();
|
||||
let (e2e_p50, e2e_p95) = crate::session::window_percentiles(&mut win_e2e_us);
|
||||
let (disp_p50, _) = crate::session::window_percentiles(&mut win_disp_us);
|
||||
tracing::debug!(
|
||||
frames = win_lat_us.len(),
|
||||
present_p50_us = p50,
|
||||
frames,
|
||||
e2e_p50_us = e2e_p50,
|
||||
e2e_p95_us = e2e_p95,
|
||||
display_p50_us = disp_p50,
|
||||
"present window"
|
||||
);
|
||||
present_ms.set(p50 as f32 / 1000.0);
|
||||
win_lat_us.clear();
|
||||
presented_stats.e2e_p50_ms.set(e2e_p50 as f32 / 1000.0);
|
||||
presented_stats.e2e_p95_ms.set(e2e_p95 as f32 / 1000.0);
|
||||
presented_stats.display_ms.set(disp_p50 as f32 / 1000.0);
|
||||
win_e2e_us.clear();
|
||||
win_disp_us.clear();
|
||||
win_start = Instant::now();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -24,11 +24,15 @@ use std::os::fd::RawFd;
|
||||
use std::ptr;
|
||||
|
||||
/// One decoded frame headed for the presenter, carrying the host capture timestamp so the
|
||||
/// UI can measure capture→paintable-set latency at the moment it presents.
|
||||
/// UI can measure capture→displayed latency at the moment it presents.
|
||||
pub struct DecodedFrame {
|
||||
/// Host-clock capture pts (ns) of the AU this image decoded from — compare against
|
||||
/// the local wall clock + `clock_offset_ns` at paintable-set time.
|
||||
pub pts_ns: u64,
|
||||
/// Local wall clock (ns) when the decoder emitted this image — the `decoded`
|
||||
/// measurement point (design/stats-unification.md); the presenter subtracts it from
|
||||
/// its paintable-set stamp for the client-local `display` stage.
|
||||
pub decoded_ns: u64,
|
||||
pub image: DecodedImage,
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user