feat(clients): unified stats vocabulary across every client + Moonlight comparison docs
One stat model everywhere (design/stats-unification.md): four measurement points (capture/received/decoded/displayed), three stages that tile the interval exactly, and a HUD that shows the addition explicitly — end-to-end 14.2 ms p50 · 19.8 p95 · capture→on-glass = host+network 9.8 + decode 2.1 + display 2.3 replacing each client's ad-hoc mix of overlapping absolutes (the Apple HUD's three arrow lines that looked sequential but weren't), mean-vs-median decode times (Windows/Linux), missing same-host-clock flags (Windows/Linux), and three different names for the same capture→received measurement (probe's "reassembled", Apple/Android's "client", Windows/Linux's post-decode "lat"). Per client: Apple threads receivedNs through the VT decode via the frame refcon bit pattern so the decode stage exists at all (stage-1 fallback honestly degrades to a capture→received headline); Windows carries FrameTimes through the existing frame channel to the render thread and adds e2e p50/p95 post-Present; Linux stamps received at AU pop and rides decoded_ns on DecodedFrame to the paintable-set site; Android pairs receipt stamps with MediaCodec output buffers via the codec's pts round-trip (JNI stats array 14→16 doubles, indexes 0-13 unchanged). fps now uniformly counts received AUs; lost/(received+lost) per window, hidden at zero. docs-site gains "Understanding the Stats Overlay": what each line means, why the equation only approximately sums (percentiles), and a line-by-line Moonlight/Sunshine matrix — including that Moonlight has no end-to-end number and its "network latency" is an ENet control RTT, so punktfunk's headline must not be compared against any single Moonlight line. Verified here: linux client + probe + core check/clippy/fmt green, android native cargo-ndk arm64 check green. Pending: Windows CI + on-glass, swift test on the mac, on-device Android. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
@@ -15,6 +15,10 @@ import VideoToolbox
|
||||
public struct ReadyFrame: @unchecked Sendable {
|
||||
/// Host capture clock (the AU's pts), in nanoseconds.
|
||||
public let ptsNs: UInt64
|
||||
/// Client `CLOCK_REALTIME` instant the AU was received (`AccessUnit.receivedNs`, threaded
|
||||
/// through the decode via the frame refcon), in nanoseconds. 0 when unknown (a caller that
|
||||
/// didn't stamp receipt) — the decode-stage meter then drops the sample via its sanity guard.
|
||||
public let receivedNs: Int64
|
||||
/// Client `CLOCK_REALTIME` instant decode completed, in nanoseconds.
|
||||
public let decodedNs: Int64
|
||||
/// The decoded image — 8-bit NV12 biplanar (SDR) or 10-bit P010 biplanar (HDR), Metal-compatible.
|
||||
@@ -25,13 +29,16 @@ public struct ReadyFrame: @unchecked Sendable {
|
||||
}
|
||||
|
||||
/// The C output callback can't capture context, so VideoToolbox hands it the refcon we set at
|
||||
/// session creation — a pointer back to the owning `VideoDecoder`.
|
||||
/// session creation — a pointer back to the owning `VideoDecoder`. The per-frame refcon carries
|
||||
/// the AU's `receivedNs` as a pointer bit pattern (a scalar smuggled through the C void*, never
|
||||
/// dereferenced) so the decode stage can be computed against decode-completion.
|
||||
private let decoderOutputCallback: VTDecompressionOutputCallback = {
|
||||
refcon, _, status, _, imageBuffer, pts, _ in
|
||||
refcon, frameRefcon, status, _, imageBuffer, pts, _ in
|
||||
guard let refcon else { return }
|
||||
let receivedNs = frameRefcon.map { Int64(Int(bitPattern: $0)) } ?? 0
|
||||
Unmanaged<VideoDecoder>.fromOpaque(refcon)
|
||||
.takeUnretainedValue()
|
||||
.handleDecoded(status: status, imageBuffer: imageBuffer, pts: pts)
|
||||
.handleDecoded(status: status, imageBuffer: imageBuffer, pts: pts, receivedNs: receivedNs)
|
||||
}
|
||||
|
||||
/// Owns a `VTDecompressionSession` rebuilt whenever the format description changes (every IDR /
|
||||
@@ -112,7 +119,9 @@ public final class VideoDecoder: @unchecked Sendable {
|
||||
session,
|
||||
sampleBuffer: sample,
|
||||
flags: [._EnableAsynchronousDecompression],
|
||||
frameRefcon: nil,
|
||||
// The AU's receipt instant rides through as a bit pattern (nil for 0 — the output
|
||||
// callback maps that back to 0); the callback needs it to stamp the decode stage.
|
||||
frameRefcon: UnsafeMutableRawPointer(bitPattern: Int(au.receivedNs)),
|
||||
infoFlagsOut: &infoOut)
|
||||
lock.unlock()
|
||||
if status != noErr {
|
||||
@@ -218,8 +227,11 @@ public final class VideoDecoder: @unchecked Sendable {
|
||||
return true
|
||||
}
|
||||
|
||||
/// VT thread. Stamp decode-completion and enqueue, or report the error.
|
||||
fileprivate func handleDecoded(status: OSStatus, imageBuffer: CVImageBuffer?, pts: CMTime) {
|
||||
/// VT thread. Stamp decode-completion and enqueue, or report the error. `receivedNs` is the
|
||||
/// AU's receipt instant threaded through the frame refcon (0 = unknown).
|
||||
fileprivate func handleDecoded(
|
||||
status: OSStatus, imageBuffer: CVImageBuffer?, pts: CMTime, receivedNs: Int64
|
||||
) {
|
||||
guard status == noErr, let imageBuffer else {
|
||||
onDecodeError(status)
|
||||
return
|
||||
@@ -242,6 +254,8 @@ public final class VideoDecoder: @unchecked Sendable {
|
||||
|| fmt == kCVPixelFormatType_444YpCbCr10BiPlanarVideoRange
|
||||
|| fmt == kCVPixelFormatType_444YpCbCr10BiPlanarFullRange
|
||||
onDecoded(
|
||||
ReadyFrame(ptsNs: ptsNs, decodedNs: decodedNs, pixelBuffer: imageBuffer, isHDR: isHDR))
|
||||
ReadyFrame(
|
||||
ptsNs: ptsNs, receivedNs: receivedNs, decodedNs: decodedNs,
|
||||
pixelBuffer: imageBuffer, isHDR: isHDR))
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user