Files
punktfunk/clients/android/native/src/stats.rs
T
enricobuehler 69609945a3 feat(clients): host/network split in every stats HUD (stats phase 2, client side)
Consumes the 0xCF host-timing plane (449a67c) on all four GUI clients: each
keeps a bounded pending ring of receipt samples keyed by pts, matches the
host's per-AU capture→sent reports against it, and the HUD equation becomes

  = host 3.1 + network 6.7 + decode 2.1 + display 2.3

falling back to the combined `= host+network …` term whenever no timing
matched the window (old host / datagram loss) — same total, one split
fewer, never a misleading zero. Apple additionally gains the split as the
only equation line under the stage-1 fallback presenter (receipt is
presenter-independent), a `nextHostTiming` wrapper with its own plane lock,
and a unit-tested `HostNetworkSplitter`; Android extends the JNI stats
array 16→18 doubles (0–15 unchanged); Windows/Linux thread the split
through `Stats` into the HUD and the headless/debug logs.

Docs updated: design/stats-unification.md Phase 2 → implemented (wire
format, fallback semantics), and the docs-site matrix's Sunshine "Host
processing latency" row is now a direct match (ours includes the paced
send; avg vs p50).

Verified here: linux client clippy -D warnings green on the live tree,
windows stub check + hand-verified diff, android cargo-ndk arm64 check
green, apple loopback test extended (needs the rebuilt xcframework + swift
test on the mac). On-glass: pending on all platforms.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-07-03 21:31:49 +00:00

227 lines
9.9 KiB
Rust

//! Live decode stats for the on-stream HUD, following the unified stats spec
//! (`design/stats-unification.md`): FPS, receive throughput, and the Android v1 stage split —
//! headline `end-to-end` = capture→decoded (p50/p95) tiled by `host+network` = capture→received
//! and `decode` = received→decoded (stage p50s). When the host emits per-AU 0xCF host timings, the
//! `host+network` term further splits into `host` + `network` (Phase 2, `note_host_split`); an old
//! host emits none and the combined term stands. The decode thread is the sole writer
//! (`note_received` per access unit at receipt, `note_decoded` per decoder output buffer); the JNI
//! accessor `nativeVideoStats` drains a snapshot ~1 Hz and resets the window. Sampling is gated on
//! the HUD actually being visible (`set_enabled`, driven by `nativeSetVideoStatsEnabled`) so the
//! hidden steady state costs one relaxed atomic load per frame.
//! Pure `std` so it compiles on the host build too (the decode thread is android-only, but
//! `SessionHandle` holds the shared handle unconditionally).
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Mutex;
use std::time::Instant;
/// Rolling per-window accumulator. Rates are computed over the actual elapsed wall-time at drain
/// (robust to poll jitter), so a poll that lands at 0.9 s or 1.1 s still reports the right FPS.
pub struct VideoStats {
/// HUD gate: the samplers run on the per-frame decode path, so while the overlay is hidden
/// they (and the caller's latency computation — see `enabled`) early-out on this flag alone.
/// Off until Kotlin shows the HUD.
enabled: AtomicBool,
inner: Mutex<Inner>,
}
struct Inner {
window_start: Instant,
frames: u64,
bytes: u64,
/// `end-to-end` = capture→decoded latency samples for this window, in microseconds
/// (skew-corrected clock base).
e2e_us: Vec<u64>,
/// `host+network` stage = capture→received samples, in microseconds (skew-corrected).
hostnet_us: Vec<u64>,
/// Phase-2 split of `host+network` (design/stats-unification.md Phase 2), fed only when the
/// host emits per-AU 0xCF timings: `host` = the host's own capture→sent duration, µs.
host_us: Vec<u64>,
/// The matching `network` term, µs: capture→received minus the host's capture→sent
/// (wire + reassembly). Always pushed in lockstep with `host_us`.
net_us: Vec<u64>,
/// `decode` stage = received→decoded samples, in microseconds (client-local, single clock).
decode_us: Vec<u64>,
/// Whether the host answered the clock-skew handshake (latency is cross-machine valid).
skew_corrected: bool,
}
/// A drained, computed view of one window. `lat_valid` is false when no in-range end-to-end sample
/// landed (then the latency figures are 0 and the HUD hides the latency lines, exactly like the
/// Apple client).
pub struct Snapshot {
pub fps: f64,
pub mbps: f64,
/// Headline `end-to-end` (capture→decoded) percentiles, ms.
pub e2e_p50_ms: f64,
pub e2e_p95_ms: f64,
/// Stage p50s (ms): `host+network` (capture→received) and `decode` (received→decoded).
pub hostnet_p50_ms: f64,
pub decode_p50_ms: f64,
/// Phase-2 `host` / `network` split p50s (ms) — 0.0 when no 0xCF timing matched this window
/// (old host / no samples yet), in which case the HUD keeps the combined `host+network` term.
pub host_p50_ms: f64,
pub net_p50_ms: f64,
pub lat_valid: bool,
pub skew_corrected: bool,
}
/// Percentile over a sorted-in-place µs sample vec, in ms. 0.0 when empty.
fn pctl_ms(sorted_us: &[u64], p: f64) -> f64 {
if sorted_us.is_empty() {
return 0.0;
}
let n = sorted_us.len();
sorted_us[((n as f64 * p) as usize).min(n - 1)] as f64 / 1000.0
}
impl VideoStats {
pub fn new() -> VideoStats {
VideoStats {
enabled: AtomicBool::new(false),
inner: Mutex::new(Inner {
window_start: Instant::now(),
frames: 0,
bytes: 0,
e2e_us: Vec::with_capacity(256),
hostnet_us: Vec::with_capacity(256),
host_us: Vec::with_capacity(256),
net_us: Vec::with_capacity(256),
decode_us: Vec::with_capacity(256),
skew_corrected: false,
}),
}
}
/// Whether the HUD wants samples. The decode thread checks this BEFORE building a latency
/// sample, so the per-frame wall-clock reads are skipped too while hidden.
// Read only by the android-only decode thread; unreferenced on the host build — expected.
#[cfg_attr(not(target_os = "android"), allow(dead_code))]
pub fn enabled(&self) -> bool {
self.enabled.load(Ordering::Relaxed)
}
/// Toggle sampling. Enabling resets the window, so the first HUD poll after a show never mixes
/// in counters (or a window start) from before the overlay was visible.
pub fn set_enabled(&self, on: bool) {
let was = self.enabled.swap(on, Ordering::Relaxed);
if on && !was {
let mut g = self
.inner
.lock()
.unwrap_or_else(std::sync::PoisonError::into_inner);
g.window_start = Instant::now();
g.frames = 0;
g.bytes = 0;
g.e2e_us.clear();
g.hostnet_us.clear();
g.host_us.clear();
g.net_us.clear();
g.decode_us.clear();
}
}
/// Record one received access unit: its wire size and (if in range) its capture→received
/// `host+network` stage sample. Receipt is the fps/goodput counting point per the spec.
// Driven only by the android-only decode thread; unreferenced on the host build — expected.
#[cfg_attr(not(target_os = "android"), allow(dead_code))]
pub fn note_received(&self, bytes: usize, hostnet_us: Option<u64>, skew_corrected: bool) {
if !self.enabled.load(Ordering::Relaxed) {
return; // HUD hidden — skip the lock (the caller already skipped the clock read)
}
// Poison-proof: this runs per-frame on the decode thread, which has no catch_unwind —
// a panic elsewhere must not turn every later lock into a second panic (the counters
// stay consistent regardless).
let mut g = self
.inner
.lock()
.unwrap_or_else(std::sync::PoisonError::into_inner);
g.frames += 1;
g.bytes += bytes as u64;
g.skew_corrected = skew_corrected;
if let Some(l) = hostnet_us {
g.hostnet_us.push(l);
}
}
/// Record one matched host/network split sample (Phase 2): the host's reported capture→sent
/// duration and our capture→received minus it, both µs — one pair per AU whose 0xCF host
/// timing arrived and matched by pts. An old host emits none, leaving the vecs empty and the
/// snapshot p50s at 0 (HUD keeps the combined `host+network` term).
// Driven only by the android-only decode thread; unreferenced on the host build — expected.
#[cfg_attr(not(target_os = "android"), allow(dead_code))]
pub fn note_host_split(&self, host_us: u64, net_us: u64) {
if !self.enabled.load(Ordering::Relaxed) {
return; // HUD hidden — skip the lock
}
// Poison-proof for the same reason as `note_received`.
let mut g = self
.inner
.lock()
.unwrap_or_else(std::sync::PoisonError::into_inner);
g.host_us.push(host_us);
g.net_us.push(net_us);
}
/// Record one decoded output frame: its capture→decoded `end-to-end` sample and its
/// received→decoded `decode` stage sample (either may be absent — e.g. the receipt stamp for
/// this pts predates the HUD being shown).
// Driven only by the android-only decode thread; unreferenced on the host build — expected.
#[cfg_attr(not(target_os = "android"), allow(dead_code))]
pub fn note_decoded(&self, e2e_us: Option<u64>, decode_us: Option<u64>) {
if !self.enabled.load(Ordering::Relaxed) {
return; // HUD hidden — skip the lock (the caller already skipped the clock read)
}
// Poison-proof for the same reason as `note_received`.
let mut g = self
.inner
.lock()
.unwrap_or_else(std::sync::PoisonError::into_inner);
if let Some(l) = e2e_us {
g.e2e_us.push(l);
}
if let Some(l) = decode_us {
g.decode_us.push(l);
}
}
/// Compute the window's rates + latency percentiles, then reset for the next window.
pub fn drain(&self) -> Snapshot {
// Poison-proof for the same reason as `note_received` — a poisoned window still drains
// fine.
let mut g = self
.inner
.lock()
.unwrap_or_else(std::sync::PoisonError::into_inner);
let elapsed = g.window_start.elapsed().as_secs_f64().max(1e-3);
let fps = g.frames as f64 / elapsed;
let mbps = g.bytes as f64 * 8.0 / 1_000_000.0 / elapsed;
g.e2e_us.sort_unstable();
g.hostnet_us.sort_unstable();
g.host_us.sort_unstable();
g.net_us.sort_unstable();
g.decode_us.sort_unstable();
let snap = Snapshot {
fps,
mbps,
e2e_p50_ms: pctl_ms(&g.e2e_us, 0.50),
e2e_p95_ms: pctl_ms(&g.e2e_us, 0.95),
hostnet_p50_ms: pctl_ms(&g.hostnet_us, 0.50),
decode_p50_ms: pctl_ms(&g.decode_us, 0.50),
host_p50_ms: pctl_ms(&g.host_us, 0.50),
net_p50_ms: pctl_ms(&g.net_us, 0.50),
lat_valid: !g.e2e_us.is_empty(),
skew_corrected: g.skew_corrected,
};
g.window_start = Instant::now();
g.frames = 0;
g.bytes = 0;
g.e2e_us.clear();
g.hostnet_us.clear();
g.host_us.clear();
g.net_us.clear();
g.decode_us.clear();
snap
}
}