feat(clients): host/network split in every stats HUD (stats phase 2, client side)

Consumes the 0xCF host-timing plane (449a67c) on all four GUI clients: each
keeps a bounded pending ring of receipt samples keyed by pts, matches the
host's per-AU capture→sent reports against it, and the HUD equation becomes

  = host 3.1 + network 6.7 + decode 2.1 + display 2.3

falling back to the combined `= host+network …` term whenever no timing
matched the window (old host / datagram loss) — same total, one split
fewer, never a misleading zero. Apple additionally gains the split as the
only equation line under the stage-1 fallback presenter (receipt is
presenter-independent), a `nextHostTiming` wrapper with its own plane lock,
and a unit-tested `HostNetworkSplitter`; Android extends the JNI stats
array 16→18 doubles (0–15 unchanged); Windows/Linux thread the split
through `Stats` into the HUD and the headless/debug logs.

Docs updated: design/stats-unification.md Phase 2 → implemented (wire
format, fallback semantics), and the docs-site matrix's Sunshine "Host
processing latency" row is now a direct match (ours includes the paced
send; avg vs p50).

Verified here: linux client clippy -D warnings green on the live tree,
windows stub check + hand-verified diff, android cargo-ndk arm64 check
green, apple loopback test extended (needs the rebuilt xcframework + swift
test on the mac). On-glass: pending on all platforms.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
2026-07-03 21:31:49 +00:00
parent 8470419433
commit 69609945a3
19 changed files with 610 additions and 59 deletions
@@ -326,9 +326,14 @@ struct ContentView: View {
onCaptureChange: { [weak model] captured in
model?.mouseCaptured = captured
},
onFrame: { [meter = model.meter, latency = model.latency, offset = conn.clockOffsetNs] au in
onFrame: { [meter = model.meter, latency = model.latency,
split = model.latencySplit, offset = conn.clockOffsetNs] au in
meter.note(byteCount: au.data.count)
latency.record(ptsNs: au.ptsNs, offsetNs: offset)
// The same receipt, keyed by pts, awaiting its 0xCF host timing (the
// host/network split drained by the 1 s stats tick).
split.recordReceipt(
ptsNs: au.ptsNs, receivedNs: au.receivedNs, offsetNs: offset)
},
onSessionEnd: { [weak model] in
Task { @MainActor in model?.sessionEnded() }
@@ -69,6 +69,14 @@ final class SessionModel: ObservableObject {
@Published var hostNetworkP95Ms = 0.0
@Published var hostNetworkValid = false
@Published var hostNetworkSkewCorrected = false
/// Phase 2 of the same stage: `host+network` split into its two terms via the host's per-AU
/// 0xCF timing reports (host = capturefully-sent as the host measured it, network = the
/// remainder), matched to receipts by pts in `latencySplit`. `splitValid` is false whenever
/// no timing matched in the window an old host that never emits the plane, or heavy 0xCF
/// loss and the HUD then falls back to the combined `host+network` term.
@Published var hostP50Ms = 0.0
@Published var networkP50Ms = 0.0
@Published var splitValid = false
/// End-to-end = captureon-glass, measured directly per frame (never summed from the stages)
/// the HUD headline. Only the stage-2 presenter can stamp it (it owns decode + a
/// CAMetalLayer/display-link present); stays invalid under stage-1, where the layer presents
@@ -96,6 +104,10 @@ final class SessionModel: ObservableObject {
/// Capturereceived (the host+network stage), fed per AU at receipt by the stream view's
/// onFrame under both presenters.
let latency = LatencyMeter()
/// The host/network split of that same stage: onFrame also records (pts, interval) receipts
/// here, and the 1 s stats tick drains the connection's 0xCF host timings into it under
/// both presenters (the receipt path is presenter-independent).
let latencySplit = HostNetworkSplitter()
/// The stage-2 meters, passed to StreamView: end-to-end (captureon-glass, stamped at
/// present), decode (receiveddecoded), display (decodedon-glass).
let endToEnd = LatencyMeter()
@@ -296,6 +308,7 @@ final class SessionModel: ObservableObject {
fps = 0
mbps = 0
hostNetworkValid = false
splitValid = false
endToEndValid = false
decodeValid = false
displayValid = false
@@ -341,6 +354,7 @@ final class SessionModel: ObservableObject {
private func startStatsTimer() {
lastFramesDropped = 0 // a fresh connection's cumulative drop counter starts at 0
latencySplit.reset() // no stale receipts/samples from a previous session
let timer = Timer(timeInterval: 1.0, repeats: true) { [weak self] _ in
guard let self else { return }
Task { @MainActor in
@@ -364,6 +378,25 @@ final class SessionModel: ObservableObject {
} else {
self.hostNetworkValid = false
}
// Phase 2: drain the window's per-AU host timings (0xCF) into the splitter
// non-blocking, bounded (a 240 fps window is ~240 reports; the cap only guards
// a pathological burst). `try?` flattens (SE-0230); a throw (.closed during
// teardown) just ends the drain. An old host never emits any splitValid stays
// false and the HUD keeps the combined host+network term.
if let conn = self.connection {
var burst = 0
while burst < 1024, let t = try? conn.nextHostTiming(timeoutMs: 0) {
self.latencySplit.noteHostTiming(ptsNs: t.ptsNs, hostUs: t.hostUs)
burst += 1
}
}
if let s = self.latencySplit.drain() {
self.hostP50Ms = s.hostP50Ms
self.networkP50Ms = s.networkP50Ms
self.splitValid = true
} else {
self.splitValid = false
}
if let e = self.endToEnd.drain() {
self.endToEndP50Ms = e.p50Ms
self.endToEndP95Ms = e.p95Ms
@@ -26,20 +26,34 @@ struct StreamHUDView: View {
Text("end-to-end \(model.endToEndP50Ms, specifier: "%.1f") ms p50 · \(model.endToEndP95Ms, specifier: "%.1f") p95 · capture→on-glass\(model.endToEndSkewCorrected ? "" : " (same-host clock)")")
.font(.system(.caption2, design: .monospaced))
.foregroundStyle(.secondary)
// The equation: the three stages tiling the headline interval (per-window p50s
// they only approximately sum to the directly-measured total).
// The equation: the stages tiling the headline interval (per-window p50s
// they only approximately sum to the directly-measured total). With a host
// that reports per-AU timings (0xCF) the first term splits into host + network
// (phase 2); an old host keeps the combined term.
if model.hostNetworkValid && model.decodeValid && model.displayValid {
Text("= host+network \(model.hostNetworkP50Ms, specifier: "%.1f") + decode \(model.decodeP50Ms, specifier: "%.1f") + display \(model.displayP50Ms, specifier: "%.1f")")
.font(.system(.caption2, design: .monospaced))
.foregroundStyle(.secondary)
if model.splitValid {
Text("= host \(model.hostP50Ms, specifier: "%.1f") + network \(model.networkP50Ms, specifier: "%.1f") + decode \(model.decodeP50Ms, specifier: "%.1f") + display \(model.displayP50Ms, specifier: "%.1f")")
.font(.system(.caption2, design: .monospaced))
.foregroundStyle(.secondary)
} else {
Text("= host+network \(model.hostNetworkP50Ms, specifier: "%.1f") + decode \(model.decodeP50Ms, specifier: "%.1f") + display \(model.displayP50Ms, specifier: "%.1f")")
.font(.system(.caption2, design: .monospaced))
.foregroundStyle(.secondary)
}
}
} else if model.hostNetworkValid {
// Stage-1 fallback presenter: the layer decodes + presents internally with no
// per-frame stamp, so the honest headline ends at receipt and there is no
// equation line (host+network is the whole measured interval).
// per-frame stamp, so the honest headline ends at receipt. The host/network
// split still applies there (receipt is presenter-independent) it becomes the
// only equation line; without it, host+network IS the whole measured interval.
Text("capture→received \(model.hostNetworkP50Ms, specifier: "%.1f") ms p50 · \(model.hostNetworkP95Ms, specifier: "%.1f") p95\(model.hostNetworkSkewCorrected ? "" : " (same-host clock)")")
.font(.system(.caption2, design: .monospaced))
.foregroundStyle(.secondary)
if model.splitValid {
Text("= host \(model.hostP50Ms, specifier: "%.1f") + network \(model.networkP50Ms, specifier: "%.1f")")
.font(.system(.caption2, design: .monospaced))
.foregroundStyle(.secondary)
}
}
if model.lostFrames > 0 {
// Unrecoverable network drops this window; hidden while the link is clean.
@@ -83,6 +83,9 @@ public final class PunktfunkConnection {
/// Same role for the feedback drain thread (rumble + HID-output two core planes,
/// drained sequentially by one thread).
private let feedbackLock = NSLock()
/// Same role for the host-timing (0xCF) puller its own plane in the core, drained
/// non-blockingly by the app's 1 s stats tick (never contends with the blocking pullers).
private let statsLock = NSLock()
/// Negotiated session mode (host-confirmed).
public private(set) var width: UInt32 = 0
@@ -665,6 +668,40 @@ public final class PunktfunkConnection {
}
}
/// One per-AU host-timing report (0xCF): the host's capturefully-sent duration for the
/// access unit whose `AccessUnit.ptsNs` equals `ptsNs` exactly. The stats consumer derives
/// `network = (receivedNs + clockOffsetNs ptsNs) hostUs` the host/network split of the
/// HUD's `host+network` stage (design/stats-unification.md Phase 2).
public struct HostTiming: Sendable, Equatable {
/// The AU's capture stamp (host capture clock matches the AU's `ptsNs`).
public let ptsNs: UInt64
/// Host capturesent duration, µs.
public let hostUs: UInt32
}
/// Pull the next per-AU host timing; nil on timeout, throws `.closed` once the session
/// ended. Best-effort plane: an older host never emits any keep showing the combined
/// `host+network` stage then. Drain non-blockingly (`timeoutMs: 0`) from ONE stats
/// consumer (its own core plane, safe alongside the other pullers).
public func nextHostTiming(timeoutMs: UInt32 = 0) throws -> HostTiming? {
statsLock.lock()
defer { statsLock.unlock() }
guard let h = liveHandle() else { throw PunktfunkClientError.closed }
var out = PunktfunkHostTiming()
let rc = punktfunk_connection_next_host_timing(h, &out, timeoutMs)
switch rc {
case statusOK:
return HostTiming(ptsNs: out.pts_ns, hostUs: out.host_us)
case statusNoFrame:
return nil
case statusClosed:
throw PunktfunkClientError.closed
default:
throw PunktfunkClientError.status(rc)
}
}
/// Send one input event (delivered to the host as a QUIC datagram). Thread-safe;
/// silently dropped after close.
public func send(_ event: PunktfunkInputEvent) {
@@ -684,10 +721,12 @@ public final class PunktfunkConnection {
pumpLock.lock() // pullers exit at their next poll boundary, releasing these
audioLock.lock()
feedbackLock.lock()
statsLock.lock()
abiLock.lock()
let h = handle
handle = nil
abiLock.unlock()
statsLock.unlock()
feedbackLock.unlock()
audioLock.unlock()
pumpLock.unlock()
@@ -0,0 +1,88 @@
// Splits the unified stats model's `host+network` stage (capturereceived) into its `host`
// (capturefully-sent, reported per AU by the host on the 0xCF plane) and `network`
// (the remainder) terms design/stats-unification.md Phase 2.
//
// Receipt samples are recorded per frame from the pump path; host timings are matched to them
// by exact pts (the 0xCF datagram carries the AU's own `pts_ns`). Best-effort by construction:
// a lost 0xCF datagram, an FEC-dropped AU, or an old host that never emits the plane simply
// contributes no split sample the HUD then keeps the combined `host+network` line. NSLock
// rather than an actor the receipt writer is the non-async pump path (same pattern as
// LatencyMeter/FrameMeter).
import Foundation
/// Per-frame `host` / `network` sampler: `recordReceipt` at AU receipt (pts + the combined
/// capturereceived interval), `noteHostTiming` per drained 0xCF report, `drain` the window's
/// p50s once a second. The pending ring is bounded (drop-oldest) so an old host receipts
/// forever, timings never costs a fixed ~4 KB, not growth.
public final class HostNetworkSplitter: @unchecked Sendable {
private let lock = NSLock()
/// Received AUs awaiting their 0xCF host timing: (pts, combined capturereceived µs).
private var pending: [(ptsNs: UInt64, combinedUs: Int64)] = []
private var hostUsSamples: [Int64] = []
private var networkUsSamples: [Int64] = []
/// ~1 s of frames at 240 fps; beyond it the oldest receipt can no longer expect a match.
private static let pendingCap = 256
public init() {}
/// Record one frame at receipt. `ptsNs` is the host capture clock (the AU's pts),
/// `receivedNs` the client `CLOCK_REALTIME` receipt instant (`AccessUnit.receivedNs`),
/// `offsetNs` the connect-time hostclient clock offset (0 = uncorrected). Same
/// absurd-value clamp as LatencyMeter a sample it would drop must not linger here.
public func recordReceipt(ptsNs: UInt64, receivedNs: Int64, offsetNs: Int64) {
let combinedNs = receivedNs &+ offsetNs &- Int64(bitPattern: ptsNs)
guard combinedNs > 0, combinedNs < 10_000_000_000 else { return }
lock.lock()
pending.append((ptsNs: ptsNs, combinedUs: combinedNs / 1000))
if pending.count > Self.pendingCap {
pending.removeFirst(pending.count - Self.pendingCap)
}
lock.unlock()
}
/// Match one host timing (0xCF) to its receipt: `host` = the reported capturesent,
/// `network` = the combined interval minus it, floored at 0 (the terms tile per frame; a
/// slightly-off skew offset must not produce a negative wire time). Unmatched timings
/// the AU was FEC-dropped, or its receipt raced this drain are simply skipped.
public func noteHostTiming(ptsNs: UInt64, hostUs: UInt32) {
lock.lock()
defer { lock.unlock() }
guard let i = pending.firstIndex(where: { $0.ptsNs == ptsNs }) else { return }
let combinedUs = pending.remove(at: i).combinedUs
hostUsSamples.append(Int64(hostUs))
networkUsSamples.append(max(0, combinedUs - Int64(hostUs)))
}
public struct Split: Sendable {
public let hostP50Ms: Double
public let networkP50Ms: Double
public let count: Int
}
/// The window's p50s since the last drain, then reset (matched samples only; the pending
/// ring survives a receipt may still match a timing drained next tick). `nil` when no
/// timing matched in the interval the caller falls back to the combined stage.
public func drain() -> Split? {
lock.lock()
let host = hostUsSamples.sorted()
let network = networkUsSamples.sorted()
hostUsSamples.removeAll(keepingCapacity: true)
networkUsSamples.removeAll(keepingCapacity: true)
lock.unlock()
guard !host.isEmpty else { return nil }
func p50(_ sorted: [Int64]) -> Double {
Double(sorted[min(sorted.count / 2, sorted.count - 1)]) / 1000.0 // µs ms
}
return Split(hostP50Ms: p50(host), networkP50Ms: p50(network), count: host.count)
}
/// Forget everything (pending receipts + window) a fresh connection starts clean.
public func reset() {
lock.lock()
pending.removeAll()
hostUsSamples.removeAll()
networkUsSamples.removeAll()
lock.unlock()
}
}
@@ -0,0 +1,107 @@
// Unit tests for HostNetworkSplitter (the host/network split of the unified stats model's
// host+network stage design/stats-unification.md Phase 2): pts matching, the per-frame
// tiling arithmetic (network = combined host, floored at 0), drain/reset semantics, the
// bounded pending ring, and the absurd-receipt clamp. All samples use explicit instants, so
// the expectations are exact.
import Foundation
import XCTest
@testable import PunktfunkKit
final class HostNetworkSplitterTests: XCTestCase {
/// An arbitrary host-capture pts (ns) far from zero, like a real CLOCK_REALTIME stamp.
private let basePts: UInt64 = 1_000_000_000_000
private func receipt(_ s: HostNetworkSplitter, pts: UInt64, combinedMs: Int64,
offsetNs: Int64 = 0) {
s.recordReceipt(
ptsNs: pts, receivedNs: Int64(pts) + combinedMs * 1_000_000 - offsetNs,
offsetNs: offsetNs)
}
func testEmptyDrainIsNil() {
XCTAssertNil(HostNetworkSplitter().drain())
}
func testMatchSplitsCombinedIntoHostAndNetwork() {
let s = HostNetworkSplitter()
receipt(s, pts: basePts, combinedMs: 8) // capturereceived 8 ms
s.noteHostTiming(ptsNs: basePts, hostUs: 3_000) // host says 3 ms of it was its own
guard let split = s.drain() else { return XCTFail("expected a matched sample") }
XCTAssertEqual(split.count, 1)
XCTAssertEqual(split.hostP50Ms, 3.0)
XCTAssertEqual(split.networkP50Ms, 5.0, "the two terms tile the combined interval")
XCTAssertNil(s.drain(), "drain resets the window")
}
func testSkewOffsetAppliesToTheCombinedInterval() {
let s = HostNetworkSplitter()
// Client clock 2 ms behind the host: the raw difference alone would read 6 ms.
receipt(s, pts: basePts, combinedMs: 8, offsetNs: 2_000_000)
s.noteHostTiming(ptsNs: basePts, hostUs: 3_000)
XCTAssertEqual(s.drain()?.networkP50Ms, 5.0)
}
func testUnmatchedTimingIsSkipped() {
let s = HostNetworkSplitter()
receipt(s, pts: basePts, combinedMs: 8)
// A timing for an AU we never received (FEC-dropped) must not fabricate a sample.
s.noteHostTiming(ptsNs: basePts + 1, hostUs: 3_000)
XCTAssertNil(s.drain())
}
func testReceiptSurvivesADrainUntilItsTimingArrives() {
let s = HostNetworkSplitter()
receipt(s, pts: basePts, combinedMs: 8)
XCTAssertNil(s.drain(), "no timing matched yet")
s.noteHostTiming(ptsNs: basePts, hostUs: 3_000) // arrives one tick late still matches
XCTAssertEqual(s.drain()?.hostP50Ms, 3.0)
}
func testEachReceiptMatchesOnce() {
let s = HostNetworkSplitter()
receipt(s, pts: basePts, combinedMs: 8)
s.noteHostTiming(ptsNs: basePts, hostUs: 3_000)
s.noteHostTiming(ptsNs: basePts, hostUs: 3_000) // duplicate 0xCF no second sample
XCTAssertEqual(s.drain()?.count, 1)
}
func testNetworkFlooredAtZero() {
let s = HostNetworkSplitter()
// A slightly-off skew offset can make host_us exceed the combined interval.
receipt(s, pts: basePts, combinedMs: 2)
s.noteHostTiming(ptsNs: basePts, hostUs: 3_000)
guard let split = s.drain() else { return XCTFail("expected a sample") }
XCTAssertEqual(split.hostP50Ms, 3.0)
XCTAssertEqual(split.networkP50Ms, 0.0)
}
func testPendingRingDropsOldest() {
let s = HostNetworkSplitter()
for i in 0..<300 { // cap is 256 the first receipts fall out
receipt(s, pts: basePts + UInt64(i), combinedMs: 8)
}
s.noteHostTiming(ptsNs: basePts, hostUs: 3_000) // evicted no match
XCTAssertNil(s.drain())
s.noteHostTiming(ptsNs: basePts + 299, hostUs: 3_000) // newest still pending
XCTAssertEqual(s.drain()?.count, 1)
}
func testAbsurdReceiptsAreDropped() {
let s = HostNetworkSplitter()
receipt(s, pts: basePts, combinedMs: -1) // received before capture clock step
receipt(s, pts: basePts + 1, combinedMs: 20_000) // > 10 s garbage pts/offset
s.noteHostTiming(ptsNs: basePts, hostUs: 1_000)
s.noteHostTiming(ptsNs: basePts + 1, hostUs: 1_000)
XCTAssertNil(s.drain())
}
func testResetForgetsPendingReceipts() {
let s = HostNetworkSplitter()
receipt(s, pts: basePts, combinedMs: 8)
s.reset()
s.noteHostTiming(ptsNs: basePts, hostUs: 3_000)
XCTAssertNil(s.drain(), "a fresh session must not match a previous session's receipts")
}
}
@@ -25,12 +25,18 @@ final class LoopbackIntegrationTests: XCTestCase {
XCTAssertEqual(conn.resolvedBitrateKbps, 50_000)
// Pull 25 synthetic frames and byte-verify the documented pattern:
// u32 LE frame index, then data[i] = (idx as u8) &+ (i as u8).
// u32 LE frame index, then data[i] = (idx as u8) &+ (i as u8). Alongside, drain the
// per-AU host-timing plane (0xCF) the way the app's stats tick does the connector
// ORs VIDEO_CAP_HOST_TIMING in unconditionally and the synthetic host stamps one
// report per AU, so the pts correlation must hold end to end through the xcframework.
var got = 0
var lastIndex: UInt32 = 0
var receivedPts = Set<UInt64>()
var timings: [PunktfunkConnection.HostTiming] = []
let deadline = Date().addingTimeInterval(30)
while got < 25 {
XCTAssertLessThan(Date(), deadline, "timed out after \(got) frames")
while let t = try conn.nextHostTiming(timeoutMs: 0) { timings.append(t) }
guard let au = try conn.nextAU(timeoutMs: 2000) else { continue }
let idx = au.data.prefix(4).reversed().reduce(UInt32(0)) { ($0 << 8) | UInt32($1) }
for (i, byte) in au.data.enumerated().dropFirst(4) {
@@ -41,10 +47,22 @@ final class LoopbackIntegrationTests: XCTestCase {
}
}
XCTAssertGreaterThan(au.ptsNs, 0)
receivedPts.insert(au.ptsNs)
lastIndex = idx
got += 1
}
XCTAssertGreaterThanOrEqual(lastIndex, 24)
// Belt-and-braces: the last frame's timing lands just after its AU give it a bounded
// grace drain (the stream keeps running, so this must not loop on fresh timings).
var grace = 0
while grace < 64, !timings.contains(where: { receivedPts.contains($0.ptsNs) }),
let t = try conn.nextHostTiming(timeoutMs: 100) {
timings.append(t)
grace += 1
}
XCTAssertTrue(
timings.contains { receivedPts.contains($0.ptsNs) },
"no 0xCF host timing matched a received AU's pts (got \(timings.count) timings)")
// Input goes the other way (enqueue-only; the host logs the count on close)
// including the touch kinds, gamepad events, the rich-input plane (DualSense