feat(clients): host/network split in every stats HUD (stats phase 2, client side)
Consumes the 0xCF host-timing plane (449a67c) on all four GUI clients: each
keeps a bounded pending ring of receipt samples keyed by pts, matches the
host's per-AU capture→sent reports against it, and the HUD equation becomes
= host 3.1 + network 6.7 + decode 2.1 + display 2.3
falling back to the combined `= host+network …` term whenever no timing
matched the window (old host / datagram loss) — same total, one split
fewer, never a misleading zero. Apple additionally gains the split as the
only equation line under the stage-1 fallback presenter (receipt is
presenter-independent), a `nextHostTiming` wrapper with its own plane lock,
and a unit-tested `HostNetworkSplitter`; Android extends the JNI stats
array 16→18 doubles (0–15 unchanged); Windows/Linux thread the split
through `Stats` into the HUD and the headless/debug logs.
Docs updated: design/stats-unification.md Phase 2 → implemented (wire
format, fallback semantics), and the docs-site matrix's Sunshine "Host
processing latency" row is now a direct match (ours includes the paced
send; avg vs p50).
Verified here: linux client clippy -D warnings green on the live tree,
windows stub check + hand-verified diff, android cargo-ndk arm64 check
green, apple loopback test extended (needs the rebuilt xcframework + swift
test on the mac). On-glass: pending on all platforms.
Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
@@ -25,6 +25,11 @@ use std::time::{Duration, Instant};
|
||||
/// flight, so anything beyond this is stale (codec flushed / HUD toggled) and gets evicted.
|
||||
const IN_FLIGHT_CAP: usize = 64;
|
||||
|
||||
/// Cap on received AUs awaiting their 0xCF host timing (Phase 2 host/network split): the timing
|
||||
/// datagram trails its AU by at most the wire, so a match lands within a frame or two — anything
|
||||
/// this deep is a lost datagram (or an old host that never sends any) and gets evicted.
|
||||
const PENDING_SPLIT_CAP: usize = 256;
|
||||
|
||||
/// The decode loop. Runs on the `pf-decode` thread until `shutdown` is set or the session closes.
|
||||
pub fn run(
|
||||
client: Arc<NativeClient>,
|
||||
@@ -155,6 +160,11 @@ pub fn run(
|
||||
// point (output-buffer dequeue — MediaCodec round-trips presentationTimeUs) can be paired back
|
||||
// to its receipt for the `decode` stage. Only fed while the HUD is visible.
|
||||
let mut in_flight: VecDeque<(u64, i128)> = VecDeque::new();
|
||||
// Phase-2 host/network split (design/stats-unification.md): received AUs awaiting their 0xCF
|
||||
// host timing, as (pts_ns, capture→received µs). The timings are drained non-blockingly right
|
||||
// where receipts are recorded and matched by pts; `network = hostnet − host` (saturating).
|
||||
// Only fed while the HUD is visible; an old host never sends a 0xCF, so entries just age out.
|
||||
let mut pending_split: VecDeque<(u64, u64)> = VecDeque::new();
|
||||
// The dataspace we've signalled on the Surface so far (None = default/SDR). Set reactively once
|
||||
// the decoder reports an HDR stream (see `drain`); avoids re-applying every format event.
|
||||
let mut applied_ds: Option<DataSpace> = None;
|
||||
@@ -190,6 +200,26 @@ pub fn run(
|
||||
if in_flight.len() > IN_FLIGHT_CAP {
|
||||
in_flight.pop_front(); // stale — codec never echoed it back
|
||||
}
|
||||
// Phase-2 split: park this AU's capture→received sample, then match any
|
||||
// 0xCF host timings that have arrived — host = the host's own
|
||||
// capture→sent, network = our capture→received minus it (per-frame
|
||||
// tiling; saturating in case of clock jitter).
|
||||
if let Some(hostnet_us) = lat_us {
|
||||
pending_split.push_back((frame.pts_ns, hostnet_us));
|
||||
if pending_split.len() > PENDING_SPLIT_CAP {
|
||||
pending_split.pop_front(); // 0xCF lost / old host — evict
|
||||
}
|
||||
}
|
||||
while let Ok(t) = client.next_host_timing(Duration::ZERO) {
|
||||
if let Some(i) = pending_split.iter().position(|&(p, _)| p == t.pts_ns)
|
||||
{
|
||||
let (_, hostnet_us) = pending_split.remove(i).unwrap();
|
||||
stats.note_host_split(
|
||||
t.host_us as u64,
|
||||
hostnet_us.saturating_sub(t.host_us as u64),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
pending = Some(frame);
|
||||
}
|
||||
|
||||
@@ -73,13 +73,16 @@ pub extern "system" fn Java_io_unom_punktfunk_kit_NativeBridge_nativeStopVideo(
|
||||
}
|
||||
|
||||
/// `NativeBridge.nativeVideoStats(handle): DoubleArray?` — drain ~1 s of decode stats for the HUD
|
||||
/// (unified stats spec, `design/stats-unification.md`). Returns 16 doubles
|
||||
/// (unified stats spec, `design/stats-unification.md`). Returns 18 doubles
|
||||
/// `[fps, mbps, e2eP50Ms, e2eP95Ms, latValid, skewCorrected, width, height, refreshHz, framesLost,
|
||||
/// bitDepth, colorPrimaries, colorTransfer, chromaFormatIdc, hostNetP50Ms, decodeP50Ms]`
|
||||
/// (the two flags are 1.0/0.0; indexes 0–13 match the previous 14-double layout with the latency
|
||||
/// pair re-based from capture→received to the end-to-end capture→decoded headline; the two stage
|
||||
/// p50s tiling it — `host+network` = capture→received, `decode` = received→decoded — are appended
|
||||
/// at the end), or `null` when no decode thread is running. Poll ~1 Hz from the UI; each call
|
||||
/// bitDepth, colorPrimaries, colorTransfer, chromaFormatIdc, hostNetP50Ms, decodeP50Ms, hostP50Ms,
|
||||
/// netP50Ms]`
|
||||
/// (the two flags are 1.0/0.0; indexes 0–15 match the previous 16-double layout — 0–13 the original
|
||||
/// 14-double one with the latency pair re-based to the end-to-end capture→decoded headline, 14/15
|
||||
/// the stage p50s tiling it: `host+network` = capture→received, `decode` = received→decoded; 16/17
|
||||
/// are the Phase-2 split of the `host+network` term from the per-AU 0xCF host timings — `host` =
|
||||
/// the host's capture→sent, `network` = the remainder — both 0.0 when no timing matched this
|
||||
/// window, i.e. an old host), or `null` when no decode thread is running. Poll ~1 Hz from the UI; each call
|
||||
/// resets the measurement window. Not android-gated — pure `jni` + connector reads, so it links on
|
||||
/// the host build too (Kotlin only ever calls it on device).
|
||||
#[no_mangle]
|
||||
@@ -100,7 +103,7 @@ pub extern "system" fn Java_io_unom_punktfunk_kit_NativeBridge_nativeVideoStats(
|
||||
let snap = h.stats.drain();
|
||||
let mode = h.client.mode();
|
||||
let color = h.client.color;
|
||||
let buf: [f64; 16] = [
|
||||
let buf: [f64; 18] = [
|
||||
snap.fps,
|
||||
snap.mbps,
|
||||
snap.e2e_p50_ms,
|
||||
@@ -122,6 +125,10 @@ pub extern "system" fn Java_io_unom_punktfunk_kit_NativeBridge_nativeVideoStats(
|
||||
// Stage p50s tiling the end-to-end headline (appended to keep 0–13 index-compatible).
|
||||
snap.hostnet_p50_ms,
|
||||
snap.decode_p50_ms,
|
||||
// Phase-2 host/network split of the `host+network` stage (0xCF host timings): 0.0
|
||||
// when no timing matched this window (old host) — the HUD keeps the combined term.
|
||||
snap.host_p50_ms,
|
||||
snap.net_p50_ms,
|
||||
];
|
||||
let arr = match env.new_double_array(buf.len() as jsize) {
|
||||
Ok(a) => a,
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
//! Live decode stats for the on-stream HUD, following the unified stats spec
|
||||
//! (`design/stats-unification.md`): FPS, receive throughput, and the Android v1 stage split —
|
||||
//! headline `end-to-end` = capture→decoded (p50/p95) tiled by `host+network` = capture→received
|
||||
//! and `decode` = received→decoded (stage p50s). The decode thread is the sole writer
|
||||
//! and `decode` = received→decoded (stage p50s). When the host emits per-AU 0xCF host timings, the
|
||||
//! `host+network` term further splits into `host` + `network` (Phase 2, `note_host_split`); an old
|
||||
//! host emits none and the combined term stands. The decode thread is the sole writer
|
||||
//! (`note_received` per access unit at receipt, `note_decoded` per decoder output buffer); the JNI
|
||||
//! accessor `nativeVideoStats` drains a snapshot ~1 Hz and resets the window. Sampling is gated on
|
||||
//! the HUD actually being visible (`set_enabled`, driven by `nativeSetVideoStatsEnabled`) so the
|
||||
@@ -32,6 +34,12 @@ struct Inner {
|
||||
e2e_us: Vec<u64>,
|
||||
/// `host+network` stage = capture→received samples, in microseconds (skew-corrected).
|
||||
hostnet_us: Vec<u64>,
|
||||
/// Phase-2 split of `host+network` (design/stats-unification.md Phase 2), fed only when the
|
||||
/// host emits per-AU 0xCF timings: `host` = the host's own capture→sent duration, µs.
|
||||
host_us: Vec<u64>,
|
||||
/// The matching `network` term, µs: capture→received minus the host's capture→sent
|
||||
/// (wire + reassembly). Always pushed in lockstep with `host_us`.
|
||||
net_us: Vec<u64>,
|
||||
/// `decode` stage = received→decoded samples, in microseconds (client-local, single clock).
|
||||
decode_us: Vec<u64>,
|
||||
/// Whether the host answered the clock-skew handshake (latency is cross-machine valid).
|
||||
@@ -50,6 +58,10 @@ pub struct Snapshot {
|
||||
/// Stage p50s (ms): `host+network` (capture→received) and `decode` (received→decoded).
|
||||
pub hostnet_p50_ms: f64,
|
||||
pub decode_p50_ms: f64,
|
||||
/// Phase-2 `host` / `network` split p50s (ms) — 0.0 when no 0xCF timing matched this window
|
||||
/// (old host / no samples yet), in which case the HUD keeps the combined `host+network` term.
|
||||
pub host_p50_ms: f64,
|
||||
pub net_p50_ms: f64,
|
||||
pub lat_valid: bool,
|
||||
pub skew_corrected: bool,
|
||||
}
|
||||
@@ -73,6 +85,8 @@ impl VideoStats {
|
||||
bytes: 0,
|
||||
e2e_us: Vec::with_capacity(256),
|
||||
hostnet_us: Vec::with_capacity(256),
|
||||
host_us: Vec::with_capacity(256),
|
||||
net_us: Vec::with_capacity(256),
|
||||
decode_us: Vec::with_capacity(256),
|
||||
skew_corrected: false,
|
||||
}),
|
||||
@@ -101,6 +115,8 @@ impl VideoStats {
|
||||
g.bytes = 0;
|
||||
g.e2e_us.clear();
|
||||
g.hostnet_us.clear();
|
||||
g.host_us.clear();
|
||||
g.net_us.clear();
|
||||
g.decode_us.clear();
|
||||
}
|
||||
}
|
||||
@@ -128,6 +144,25 @@ impl VideoStats {
|
||||
}
|
||||
}
|
||||
|
||||
/// Record one matched host/network split sample (Phase 2): the host's reported capture→sent
|
||||
/// duration and our capture→received minus it, both µs — one pair per AU whose 0xCF host
|
||||
/// timing arrived and matched by pts. An old host emits none, leaving the vecs empty and the
|
||||
/// snapshot p50s at 0 (HUD keeps the combined `host+network` term).
|
||||
// Driven only by the android-only decode thread; unreferenced on the host build — expected.
|
||||
#[cfg_attr(not(target_os = "android"), allow(dead_code))]
|
||||
pub fn note_host_split(&self, host_us: u64, net_us: u64) {
|
||||
if !self.enabled.load(Ordering::Relaxed) {
|
||||
return; // HUD hidden — skip the lock
|
||||
}
|
||||
// Poison-proof for the same reason as `note_received`.
|
||||
let mut g = self
|
||||
.inner
|
||||
.lock()
|
||||
.unwrap_or_else(std::sync::PoisonError::into_inner);
|
||||
g.host_us.push(host_us);
|
||||
g.net_us.push(net_us);
|
||||
}
|
||||
|
||||
/// Record one decoded output frame: its capture→decoded `end-to-end` sample and its
|
||||
/// received→decoded `decode` stage sample (either may be absent — e.g. the receipt stamp for
|
||||
/// this pts predates the HUD being shown).
|
||||
@@ -163,6 +198,8 @@ impl VideoStats {
|
||||
let mbps = g.bytes as f64 * 8.0 / 1_000_000.0 / elapsed;
|
||||
g.e2e_us.sort_unstable();
|
||||
g.hostnet_us.sort_unstable();
|
||||
g.host_us.sort_unstable();
|
||||
g.net_us.sort_unstable();
|
||||
g.decode_us.sort_unstable();
|
||||
let snap = Snapshot {
|
||||
fps,
|
||||
@@ -171,6 +208,8 @@ impl VideoStats {
|
||||
e2e_p95_ms: pctl_ms(&g.e2e_us, 0.95),
|
||||
hostnet_p50_ms: pctl_ms(&g.hostnet_us, 0.50),
|
||||
decode_p50_ms: pctl_ms(&g.decode_us, 0.50),
|
||||
host_p50_ms: pctl_ms(&g.host_us, 0.50),
|
||||
net_p50_ms: pctl_ms(&g.net_us, 0.50),
|
||||
lat_valid: !g.e2e_us.is_empty(),
|
||||
skew_corrected: g.skew_corrected,
|
||||
};
|
||||
@@ -179,6 +218,8 @@ impl VideoStats {
|
||||
g.bytes = 0;
|
||||
g.e2e_us.clear();
|
||||
g.hostnet_us.clear();
|
||||
g.host_us.clear();
|
||||
g.net_us.clear();
|
||||
g.decode_us.clear();
|
||||
snap
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user