feat(clients): host/network split in every stats HUD (stats phase 2, client side)

Consumes the 0xCF host-timing plane (449a67c) on all four GUI clients: each
keeps a bounded pending ring of receipt samples keyed by pts, matches the
host's per-AU capture→sent reports against it, and the HUD equation becomes

  = host 3.1 + network 6.7 + decode 2.1 + display 2.3

falling back to the combined `= host+network …` term whenever no timing
matched the window (old host / datagram loss) — same total, one split
fewer, never a misleading zero. Apple additionally gains the split as the
only equation line under the stage-1 fallback presenter (receipt is
presenter-independent), a `nextHostTiming` wrapper with its own plane lock,
and a unit-tested `HostNetworkSplitter`; Android extends the JNI stats
array 16→18 doubles (0–15 unchanged); Windows/Linux thread the split
through `Stats` into the HUD and the headless/debug logs.

Docs updated: design/stats-unification.md Phase 2 → implemented (wire
format, fallback semantics), and the docs-site matrix's Sunshine "Host
processing latency" row is now a direct match (ours includes the paced
send; avg vs p50).

Verified here: linux client clippy -D warnings green on the live tree,
windows stub check + hand-verified diff, android cargo-ndk arm64 check
green, apple loopback test extended (needs the rebuilt xcframework + swift
test on the mac). On-glass: pending on all platforms.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
2026-07-03 21:31:49 +00:00
parent 8470419433
commit 69609945a3
19 changed files with 610 additions and 59 deletions
+17 -7
View File
@@ -175,7 +175,8 @@ fn fmt_uptime(secs: u32) -> String {
/// The streaming HUD overlay (top-right), unified stats vocabulary (design/stats-unification.md):
/// a chip row (mode · codec · decode path · HDR), a stream line (received fps · goodput ·
/// presenter fps), the end-to-end headline (capture→on-glass p50/p95, host-clock corrected), the
/// stage equation (= host+network + decode + display, stage p50s), a session line
/// stage equation (= host + network + decode + display when the host reports 0xCF timings, else
/// the combined = host+network + decode + display; stage p50s), a session line
/// (host · time · loss/skips), and the shortcut hints. Layered over the `SwapChainPanel` in the
/// same grid cell.
fn hud_overlay(hud: &HudSample, mode: Option<Mode>, host: &str) -> Element {
@@ -212,12 +213,21 @@ fn hud_overlay(hud: &HudSample, mode: Option<Mode>, host: &str) -> Element {
if stats.same_host {
e2e_line.push_str(" (same-host clock)");
}
// The equation: the three stages tile the headline interval per frame; the window p50s only
// approximately sum (percentiles aren't additive).
let stage_line = format!(
"= host+network {:.1} + decode {:.1} + display {:.1}",
stats.hostnet_ms, stats.decode_ms, present.display_p50_ms
);
// The equation: the stages tile the headline interval per frame; the window p50s only
// approximately sum (percentiles aren't additive). With per-AU 0xCF host timings the opaque
// `host+network` term splits into `host` (host capture→sent) + `network` (the remainder);
// an old host emits none and the combined term stays.
let stage_line = if stats.split {
format!(
"= host {:.1} + network {:.1} + decode {:.1} + display {:.1}",
stats.host_ms, stats.net_ms, stats.decode_ms, present.display_p50_ms
)
} else {
format!(
"= host+network {:.1} + decode {:.1} + display {:.1}",
stats.hostnet_ms, stats.decode_ms, present.display_p50_ms
)
};
let mut session_bits: Vec<String> = Vec::new();
if !host.is_empty() {
session_bits.push(host.to_string());
+12
View File
@@ -238,6 +238,18 @@ fn run_headless_cli(args: &[String], identity: (String, String)) {
session::SessionEvent::Connected {
mode, fingerprint, ..
} => tracing::info!(?mode, fp = %trust::hex(&fingerprint), "connected"),
// With per-AU 0xCF host timings the combined host+network stage splits into
// host (capture→sent on the host) + net; an old host emits none → combined only.
session::SessionEvent::Stats(s) if s.split => tracing::info!(
fps = format!("{:.0}", s.fps),
mbps = format!("{:.1}", s.mbps),
decode_p50_ms = format!("{:.2}", s.decode_ms),
hostnet_p50_ms = format!("{:.2}", s.hostnet_ms),
host_p50_ms = format!("{:.2}", s.host_ms),
net_p50_ms = format!("{:.2}", s.net_ms),
frames_seen,
"stats"
),
session::SessionEvent::Stats(s) => tracing::info!(
fps = format!("{:.0}", s.fps),
mbps = format!("{:.1}", s.mbps),
+44
View File
@@ -55,6 +55,15 @@ pub struct Stats {
/// `host+network` stage p50 over the last 1 s window: capture (`pts_ns`) → received,
/// host-clock corrected via `clock_offset_ns`.
pub hostnet_ms: f32,
/// `host` stage p50 (host capture→sent, from the per-AU 0xCF host-timing plane). Valid only
/// when `split` — an old host emits no 0xCF and the HUD keeps the combined stage.
pub host_ms: f32,
/// `network` stage p50 (`hostnet host`, tiled per frame before taking the percentile).
/// Valid only when `split`.
pub net_ms: f32,
/// True when any 0xCF host timings matched received AUs this window — the HUD then renders
/// `host + network` instead of the combined `host+network` term.
pub split: bool,
/// True when `clock_offset_ns == 0` (host didn't answer the skew handshake / same host) —
/// the HUD appends `(same-host clock)` to the end-to-end line.
pub same_host: bool,
@@ -330,6 +339,12 @@ fn pump(
// 1 s tumbling stage windows (spec: design/stats-unification.md — percentiles, never means).
let mut hostnet_us: Vec<u64> = Vec::with_capacity(256);
let mut decode_us: Vec<u64> = Vec::with_capacity(256);
// Host/network split (Phase 2): received AUs awaiting their 0xCF host timing, `(pts_ns,
// hostnet_us)`, matched as the datagrams arrive. Bounded — an old host never sends any.
let mut pending_split: std::collections::VecDeque<(u64, u64)> =
std::collections::VecDeque::with_capacity(256);
let mut host_us_w: Vec<u64> = Vec::with_capacity(256);
let mut net_us_w: Vec<u64> = Vec::with_capacity(256);
let mut pcm = vec![0f32; 5760 * channels as usize]; // scratch: max Opus frame (120 ms) × channels
// Loss recovery: watch the host→client unrecoverable-drop count and ask for an IDR when it climbs.
let mut last_dropped = connector.frames_dropped();
@@ -352,6 +367,11 @@ fn pump(
.max(0) as u64;
if hostnet > 0 && hostnet < 10_000_000_000 {
hostnet_us.push(hostnet / 1000);
// Remember this AU for the 0xCF match below (host/network split).
pending_split.push_back((frame.pts_ns, hostnet / 1000));
if pending_split.len() > 256 {
pending_split.pop_front();
}
}
// A D3D11VA→software demotion (see `Decoder::decode`) starts a FRESH decoder that
// has none of the stream's parameter sets; under infinite GOP it would sit on
@@ -440,15 +460,34 @@ fn pump(
*crate::present::LATEST_HDR_META.lock().unwrap() = Some(meta);
}
// Drain the per-AU host-timing plane (0xCF) and match by pts: `host` = the host's own
// capture→sent, `network` = our capture→received minus it — the two tile per frame
// (design/stats-unification.md Phase 2). An old host never emits any; `split` stays false
// and the HUD keeps the combined `host+network` stage.
while let Ok(t) = connector.next_host_timing(Duration::ZERO) {
if let Some(i) = pending_split.iter().position(|(p, _)| *p == t.pts_ns) {
let (_, hn_us) = pending_split.remove(i).unwrap();
host_us_w.push(t.host_us as u64);
net_us_w.push(hn_us.saturating_sub(t.host_us as u64));
}
}
if window_start.elapsed() >= Duration::from_secs(1) {
let secs = window_start.elapsed().as_secs_f32();
hostnet_us.sort_unstable();
decode_us.sort_unstable();
host_us_w.sort_unstable();
net_us_w.sort_unstable();
let p50 = |v: &[u64]| v.get(v.len() / 2).copied().unwrap_or(0);
let (hostnet_p50, decode_p50) = (p50(&hostnet_us), p50(&decode_us));
let (host_p50, net_p50) = (p50(&host_us_w), p50(&net_us_w));
let split = !host_us_w.is_empty();
tracing::debug!(
fps = frames_n,
hostnet_p50_us = hostnet_p50,
host_p50_us = host_p50,
net_p50_us = net_p50,
split,
decode_p50_us = decode_p50,
total_frames,
"stream window"
@@ -458,6 +497,9 @@ fn pump(
mbps: bytes_n as f32 * 8.0 / 1e6 / secs,
decode_ms: decode_p50 as f32 / 1000.0,
hostnet_ms: hostnet_p50 as f32 / 1000.0,
host_ms: host_p50 as f32 / 1000.0,
net_ms: net_p50 as f32 / 1000.0,
split,
same_host: clock_offset == 0,
hardware,
hdr,
@@ -470,6 +512,8 @@ fn pump(
bytes_n = 0;
hostnet_us.clear();
decode_us.clear();
host_us_w.clear();
net_us_w.clear();
}
};