feat(clients): unified stats vocabulary across every client + Moonlight comparison docs

One stat model everywhere (design/stats-unification.md): four measurement
points (capture/received/decoded/displayed), three stages that tile the
interval exactly, and a HUD that shows the addition explicitly —

  end-to-end 14.2 ms p50 · 19.8 p95 · capture→on-glass
  = host+network 9.8 + decode 2.1 + display 2.3

replacing each client's ad-hoc mix of overlapping absolutes (the Apple HUD's
three arrow lines that looked sequential but weren't), mean-vs-median decode
times (Windows/Linux), missing same-host-clock flags (Windows/Linux), and
three different names for the same capture→received measurement (probe's
"reassembled", Apple/Android's "client", Windows/Linux's post-decode "lat").

Per client: Apple threads receivedNs through the VT decode via the frame
refcon bit pattern so the decode stage exists at all (stage-1 fallback
honestly degrades to a capture→received headline); Windows carries
FrameTimes through the existing frame channel to the render thread and adds
e2e p50/p95 post-Present; Linux stamps received at AU pop and rides
decoded_ns on DecodedFrame to the paintable-set site; Android pairs receipt
stamps with MediaCodec output buffers via the codec's pts round-trip (JNI
stats array 14→16 doubles, indexes 0-13 unchanged). fps now uniformly counts
received AUs; lost/(received+lost) per window, hidden at zero.

docs-site gains "Understanding the Stats Overlay": what each line means, why
the equation only approximately sums (percentiles), and a line-by-line
Moonlight/Sunshine matrix — including that Moonlight has no end-to-end
number and its "network latency" is an ENet control RTT, so punktfunk's
headline must not be compared against any single Moonlight line.

Verified here: linux client + probe + core check/clippy/fmt green, android
native cargo-ndk arm64 check green. Pending: Windows CI + on-glass, swift
test on the mac, on-device Android.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
2026-07-03 21:01:29 +00:00
parent c7630ff5dc
commit 09a5957c6d
38 changed files with 1122 additions and 380 deletions
@@ -1,6 +1,10 @@
// Unit tests for LatencyMeter: percentiles, the skew-corrected flag, reset-on-drain, and the
// absurd-value guard. Latencies are constructed by stamping a pts a known interval in the past, so
// the result is that interval plus the (tiny) clock advance between reads asserted with tolerance.
// Unit tests for LatencyMeter (one instance per unified-stats stage see
// design/stats-unification.md): percentiles, the skew-corrected flag, reset-on-drain, the
// absurd-value guard, and the explicit-instant stage form (record(ptsNs:atNs:offsetNs:), used for
// the client-local decode/display stages and the at-present end-to-end stamp). Receipt-path
// latencies are constructed by stamping a pts a known interval in the past, so the result is that
// interval plus the (tiny) clock advance between reads asserted with tolerance; the explicit
// form is exact.
import Foundation
import XCTest
@@ -38,6 +42,26 @@ final class LatencyMeterTests: XCTestCase {
XCTAssertEqual(m.drain()?.skewCorrected, true)
}
func testExplicitStageRecordIsExact() {
let m = LatencyMeter()
// A client-local stage (decode: receiveddecoded) start instant as ptsNs, offset 0.
let receivedNs: Int64 = 1_000_000_000_000
m.record(ptsNs: UInt64(receivedNs), atNs: receivedNs + 3_000_000, offsetNs: 0)
guard let s = m.drain() else { return XCTFail("expected a sample") }
XCTAssertEqual(s.count, 1)
XCTAssertEqual(s.p50Ms, 3.0, "explicit instants make the sample exact")
XCTAssertFalse(s.skewCorrected, "local stages record with offset 0")
}
func testExplicitStageDropsNonPositiveInterval() {
let m = LatencyMeter()
// A stage whose start stamp is missing (0) or after its end must not pollute the window.
let decodedNs: Int64 = 1_000_000_000_000
m.record(ptsNs: 0, atNs: decodedNs, offsetNs: 0) // "start unknown" > 10 s dropped
m.record(ptsNs: UInt64(decodedNs + 1), atNs: decodedNs, offsetNs: 0) // negative dropped
XCTAssertNil(m.drain())
}
func testDropsAbsurdValues() {
let m = LatencyMeter()
let now = nowRealtimeNs()
@@ -31,7 +31,7 @@ final class Stage444Tests: XCTestCase {
let data = Data(Probe444Blobs.au444_8bit)
let format = try XCTUnwrap(
AnnexB.formatDescription(fromIDR: data, codec: .hevc), "the 4:4:4 blob must yield a format description")
let au = AccessUnit(data: data, ptsNs: 7_000_000, frameIndex: 0, flags: 0)
let au = AccessUnit(data: data, ptsNs: 7_000_000, frameIndex: 0, flags: 0, receivedNs: 0)
let box = FrameBox()
let done = DispatchSemaphore(value: 0)
@@ -38,7 +38,8 @@ final class VideoToolboxRoundTripTests: XCTestCase {
XCTAssertEqual(AnnexB.avcc(from: annexB, codec: .hevc), avccSample)
// 3) Sample buffer real decoder pixels.
let au = AccessUnit(data: annexB, ptsNs: 1_000_000, frameIndex: 0, flags: 0)
let au = AccessUnit(
data: annexB, ptsNs: 1_000_000, frameIndex: 0, flags: 0, receivedNs: 0)
let sample = try XCTUnwrap(AnnexB.sampleBuffer(au: au, format: rebuilt, codec: .hevc))
var session: VTDecompressionSession?
@@ -67,13 +68,14 @@ final class VideoToolboxRoundTripTests: XCTestCase {
}
/// Stage-2 decode half: the same known IDR through `VideoDecoder` assert its async output
/// callback fires with a CVPixelBuffer of the right dimensions, the pts round-trips, and
/// decode-completion is stamped.
/// callback fires with a CVPixelBuffer of the right dimensions, the pts and the receipt stamp
/// round-trip (the latter rides the frame refcon), and decode-completion is stamped.
func testVideoDecoderAsyncCallbackDeliversPixels() throws {
let (formatDesc, avccSample) = try encodeOneHEVCKeyframe()
let annexB = try annexBAU(formatDesc: formatDesc, avccSample: avccSample)
let format = try XCTUnwrap(AnnexB.formatDescription(fromIDR: annexB, codec: .hevc))
let au = AccessUnit(data: annexB, ptsNs: 42_000_000, frameIndex: 0, flags: 0)
let au = AccessUnit(
data: annexB, ptsNs: 42_000_000, frameIndex: 0, flags: 0, receivedNs: 41_000_000)
let box = FrameBox()
let done = DispatchSemaphore(value: 0)
@@ -100,6 +102,8 @@ final class VideoToolboxRoundTripTests: XCTestCase {
XCTAssertEqual(CVPixelBufferGetWidth(ready.pixelBuffer), width)
XCTAssertEqual(CVPixelBufferGetHeight(ready.pixelBuffer), height)
XCTAssertEqual(ready.ptsNs, 42_000_000, "pts round-trips through the decoder")
XCTAssertEqual(
ready.receivedNs, 41_000_000, "receivedNs round-trips through the frame refcon")
XCTAssertGreaterThan(ready.decodedNs, 0, "decode-completion is stamped")
}