diff --git a/clients/apple/README.md b/clients/apple/README.md index c40720a..4b5a9de 100644 --- a/clients/apple/README.md +++ b/clients/apple/README.md @@ -61,7 +61,12 @@ What's here, all compiled and tested on macOS (Xcode 26.5 / Swift 6.3): trust-on-first-use fingerprint prompt over the live-but-blurred stream, and SPAKE2 PIN pairing (`PairSheet`, from a host card's context menu or the trust prompt; `ClientIdentityStore` keeps the client identity in the Keychain and presents it on - every connect) — then pinned reconnects, fps/Mb-s HUD. Settings also picks the HOST + every connect) — then pinned reconnects, fps/Mb-s HUD + a **capture→client-receipt latency** + line (`LatencyMeter`, p50/p95): the AU `pts_ns` (host capture clock) to the instant the client + received it, **skew-corrected** across machines via `PunktfunkConnection.clockOffsetNs` (the + connect-time wall-clock handshake, `punktfunk_connection_clock_offset_ns`). It excludes the + layer's decode+present (stage-1 `AVSampleBufferDisplayLayer` has no per-frame present callback); + true decode→present awaits the stage-2 presenter. Settings also picks the HOST compositor (KWin/wlroots/Mutter/gamescope, default automatic — the host honors it only if that backend is available there) and has a **Controllers** section: every detected controller (capability glyphs, battery, "In use" badge), which one to forward diff --git a/clients/apple/Sources/PunktfunkClient/ContentView.swift b/clients/apple/Sources/PunktfunkClient/ContentView.swift index 9d6ba61..fc31f57 100644 --- a/clients/apple/Sources/PunktfunkClient/ContentView.swift +++ b/clients/apple/Sources/PunktfunkClient/ContentView.swift @@ -478,7 +478,10 @@ struct ContentView: View { onCaptureChange: { [weak model] captured in model?.mouseCaptured = captured }, - onFrame: { [meter = model.meter] au in meter.note(byteCount: au.data.count) }, + onFrame: { [meter = model.meter, latency = model.latency, offset = conn.clockOffsetNs] au in + meter.note(byteCount: au.data.count) + latency.record(ptsNs: au.ptsNs, offsetNs: offset) + }, onSessionEnd: { [weak model] in Task { @MainActor in model?.sessionEnded() } } @@ -499,6 +502,14 @@ struct ContentView: View { Text("\(conn.width)×\(conn.height)@\(conn.refreshHz) \(model.fps) fps \(model.mbps, specifier: "%.1f") Mb/s") .font(.system(.caption, design: .monospaced)) } + if model.latencyValid { + // Capture→client-receipt (skew-corrected); excludes the layer's decode+present — + // see LatencyMeter. "(same-host)" when the host didn't answer the skew handshake. + Text("capture→client \(model.latencyP50Ms, specifier: "%.1f")/\(model.latencyP95Ms, specifier: "%.1f") ms p50/p95" + + (model.latencySkewCorrected ? "" : " (same-host)")) + .font(.system(.caption2, design: .monospaced)) + .foregroundStyle(.secondary) + } // While captured the cursor is hidden+frozen, so the button is keyboard-only // (⌘⎋ or Cmd+Tab release the cursor; released, it's clickable again). #if os(macOS) diff --git a/clients/apple/Sources/PunktfunkClient/SessionModel.swift b/clients/apple/Sources/PunktfunkClient/SessionModel.swift index 1265534..7527e0c 100644 --- a/clients/apple/Sources/PunktfunkClient/SessionModel.swift +++ b/clients/apple/Sources/PunktfunkClient/SessionModel.swift @@ -53,11 +53,20 @@ final class SessionModel: ObservableObject { @Published var fps = 0 @Published var mbps = 0.0 @Published var totalFrames = 0 + /// Capture→client-receipt latency (ms), skew-corrected across machines via the connect-time + /// clock offset — p50/p95 for the HUD. `latencyValid` is false until the first sample drains + /// (and whenever no host frames arrived in the last interval). `latencySkewCorrected` = the host + /// answered the skew handshake (the number is cross-machine valid, not just same-host). + @Published var latencyP50Ms = 0.0 + @Published var latencyP95Ms = 0.0 + @Published var latencyValid = false + @Published var latencySkewCorrected = false /// Mirrors StreamView's capture state (it owns the input capture; this drives the /// HUD's "click to capture" / "⌘⎋ releases" hint). @Published var mouseCaptured = false let meter = FrameMeter() + let latency = LatencyMeter() private var statsTimer: Timer? private var audio: SessionAudio? private var gamepadCapture: GamepadCapture? @@ -165,6 +174,7 @@ final class SessionModel: ObservableObject { phase = .idle fps = 0 mbps = 0 + latencyValid = false mouseCaptured = false } @@ -211,6 +221,14 @@ final class SessionModel: ObservableObject { self.fps = frames self.mbps = Double(bytes) * 8 / 1_000_000 self.totalFrames = total + if let lat = self.latency.drain() { + self.latencyP50Ms = lat.p50Ms + self.latencyP95Ms = lat.p95Ms + self.latencySkewCorrected = lat.skewCorrected + self.latencyValid = true + } else { + self.latencyValid = false + } } } // .common so the HUD keeps updating during window drags / menu tracking. diff --git a/clients/apple/Sources/PunktfunkKit/LatencyMeter.swift b/clients/apple/Sources/PunktfunkKit/LatencyMeter.swift new file mode 100644 index 0000000..1bbf371 --- /dev/null +++ b/clients/apple/Sources/PunktfunkKit/LatencyMeter.swift @@ -0,0 +1,71 @@ +// Per-frame latency sampler for the live HUD: records capture->client-receipt latency and drains +// percentiles on demand. NSLock rather than an actor — the writer is the non-async pump/arrival +// path (same pattern as the app's FrameMeter). + +import Foundation + +/// Samples the **capture->client-receipt** latency of each access unit and reports percentiles. +/// +/// The latency is `now - pts_ns`, where `pts_ns` is the host's capture wall clock (the AU's pts) and +/// `now` is the client's `CLOCK_REALTIME` instant the AU was received, shifted by the connect-time +/// **clock-skew offset** (`PunktfunkConnection.clockOffsetNs`, host minus client) so the difference +/// is valid across machines. `offsetNs == 0` means an old host that didn't answer the skew handshake +/// (or genuinely synced clocks) — the number is then only meaningful same-host. +/// +/// SCOPE (stage-1 presenter): this covers host capture -> encode -> FEC -> network -> reassembly -> +/// decrypt -> handed to the presenter. It does **not** include the on-device VideoToolbox decode or +/// the `AVSampleBufferDisplayLayer` present — that layer decodes and presents compressed samples +/// internally with no per-frame callback. True decode->present (the full glass-to-glass) needs the +/// stage-2 presenter (`VTDecompressionSession` decode-completion + `CAMetalLayer`/display-link +/// present); this meter is the substrate it will extend. +public final class LatencyMeter: @unchecked Sendable { + private let lock = NSLock() + private var samplesUs: [Int64] = [] + private var skewCorrected = false + + public init() {} + + /// Record one frame at receipt. `ptsNs` is the host capture clock (the AU's pts); `offsetNs` is + /// the host-client clock offset from the skew handshake (0 = uncorrected / old host). + public func record(ptsNs: UInt64, offsetNs: Int64) { + var ts = timespec() + clock_gettime(CLOCK_REALTIME, &ts) + let nowNs = Int64(ts.tv_sec) * 1_000_000_000 + Int64(ts.tv_nsec) + let latNs = nowNs &+ offsetNs &- Int64(bitPattern: ptsNs) + // Drop absurd values (a clock step, a wildly wrong offset, or garbage pts). + guard latNs > 0, latNs < 10_000_000_000 else { return } + lock.lock() + samplesUs.append(latNs / 1000) + if offsetNs != 0 { skewCorrected = true } + lock.unlock() + } + + public struct Stats: Sendable { + public let p50Ms: Double + public let p95Ms: Double + public let p99Ms: Double + public let count: Int + /// True if the skew offset was applied (a host that answered the handshake) — i.e. the + /// numbers are cross-machine valid, not just same-host. + public let skewCorrected: Bool + } + + /// Percentiles over the samples accumulated since the last drain, then reset the window. `nil` + /// when no samples arrived in the interval. + public func drain() -> Stats? { + lock.lock() + let sorted = samplesUs.sorted() + let corrected = skewCorrected + samplesUs.removeAll(keepingCapacity: true) + skewCorrected = false + lock.unlock() + guard !sorted.isEmpty else { return nil } + func pct(_ p: Double) -> Double { + let i = min(Int(Double(sorted.count) * p), sorted.count - 1) + return Double(sorted[i]) / 1000.0 // us -> ms + } + return Stats( + p50Ms: pct(0.50), p95Ms: pct(0.95), p99Ms: pct(0.99), + count: sorted.count, skewCorrected: corrected) + } +} diff --git a/clients/apple/Sources/PunktfunkKit/PunktfunkConnection.swift b/clients/apple/Sources/PunktfunkKit/PunktfunkConnection.swift index 93d47fa..3d2a50d 100644 --- a/clients/apple/Sources/PunktfunkKit/PunktfunkConnection.swift +++ b/clients/apple/Sources/PunktfunkKit/PunktfunkConnection.swift @@ -195,6 +195,13 @@ public final class PunktfunkConnection { /// DualSense feedback. public private(set) var resolvedGamepad: GamepadType = .auto + /// Host clock minus client clock (nanoseconds), from the connect-time wall-clock skew handshake + /// (`punktfunk_connection_clock_offset_ns`). Add it to a local `CLOCK_REALTIME` instant to + /// express that instant in the host's capture clock — the clock each `AccessUnit.ptsNs` is + /// stamped in — so a glass-to-glass latency (present/enqueue time minus `ptsNs`) is valid across + /// machines. `0` = no correction (an older host that didn't answer, or synchronized clocks). + public private(set) var clockOffsetNs: Int64 = 0 + /// Connect and start a session at the requested mode (the host creates a native virtual /// output at exactly this size/refresh). Blocks up to `timeoutMs`. /// @@ -251,6 +258,9 @@ public final class PunktfunkConnection { var gp: UInt32 = 0 _ = punktfunk_connection_gamepad(handle, &gp) resolvedGamepad = GamepadType(rawValue: gp) ?? .auto + var offset: Int64 = 0 + _ = punktfunk_connection_clock_offset_ns(handle, &offset) + clockOffsetNs = offset } /// Ask the host to switch the live session to a new mode (window resized) — no diff --git a/clients/apple/Tests/PunktfunkKitTests/LatencyMeterTests.swift b/clients/apple/Tests/PunktfunkKitTests/LatencyMeterTests.swift new file mode 100644 index 0000000..c276747 --- /dev/null +++ b/clients/apple/Tests/PunktfunkKitTests/LatencyMeterTests.swift @@ -0,0 +1,50 @@ +// Unit tests for LatencyMeter: percentiles, the skew-corrected flag, reset-on-drain, and the +// absurd-value guard. Latencies are constructed by stamping a pts a known interval in the past, so +// the result is that interval plus the (tiny) clock advance between reads — asserted with tolerance. + +import Foundation +import XCTest + +@testable import PunktfunkKit + +final class LatencyMeterTests: XCTestCase { + private func nowRealtimeNs() -> UInt64 { + var ts = timespec() + clock_gettime(CLOCK_REALTIME, &ts) + return UInt64(ts.tv_sec) * 1_000_000_000 + UInt64(ts.tv_nsec) + } + + func testEmptyDrainIsNil() { + XCTAssertNil(LatencyMeter().drain()) + } + + func testRecordsPercentilesAndResets() { + let m = LatencyMeter() + let now = nowRealtimeNs() + // Each frame "captured" 5 ms ago, no skew offset → latency ≈ 5 ms. + for _ in 0..<50 { m.record(ptsNs: now - 5_000_000, offsetNs: 0) } + guard let s = m.drain() else { return XCTFail("expected samples") } + XCTAssertEqual(s.count, 50) + XCTAssertFalse(s.skewCorrected, "offset 0 ⇒ not skew-corrected") + XCTAssertEqual(s.p50Ms, 5.0, accuracy: 2.0) + XCTAssertGreaterThanOrEqual(s.p99Ms, s.p50Ms) + XCTAssertNil(m.drain(), "drain resets the window") + } + + func testSkewCorrectedFlagSetByNonZeroOffset() { + let m = LatencyMeter() + let now = nowRealtimeNs() + m.record(ptsNs: now - 1_000_000, offsetNs: 250_000) // 1 ms ago, +0.25 ms offset + XCTAssertEqual(m.drain()?.skewCorrected, true) + } + + func testDropsAbsurdValues() { + let m = LatencyMeter() + let now = nowRealtimeNs() + // pts 1 s in the future → negative latency → dropped. + m.record(ptsNs: now + 1_000_000_000, offsetNs: 0) + // pts absurdly far in the past → > 10 s latency → dropped. + m.record(ptsNs: now - 20_000_000_000, offsetNs: 0) + XCTAssertNil(m.drain()) + } +} diff --git a/docs-site/content/docs/roadmap.md b/docs-site/content/docs/roadmap.md index 392096f..245f1e7 100644 --- a/docs-site/content/docs/roadmap.md +++ b/docs-site/content/docs/roadmap.md @@ -310,9 +310,13 @@ buffer; `sendmmsg`/`recvmmsg` batching; the capture-timestamp anchor placement. (`quic::clock_sync` → `ClockSkew`) used by both the reference client and the **embeddable connector** — `NativeClient` runs it at connect and exposes the offset over the C ABI (`punktfunk_connection_clock_offset_ns`), so the Apple client can convert a present instant to the - host clock. **Remaining for true glass-to-glass**: (1) the **Apple client present-stamp** - (decode→present) — Swift: stamp `AVSampleBufferDisplayLayer`/presenter time, add the C-ABI offset, - subtract the AU `pts_ns`; (2) the host **render→capture** term (PipeWire buffer presentation + host clock. The Apple client now consumes that offset: `PunktfunkConnection.clockOffsetNs` + + `LatencyMeter` surface a **capture→client-receipt** (skew-corrected) p50/p95 in the HUD — the first + cross-machine latency the real Apple client reports. **Remaining for *true* glass-to-glass**: + (1) the **decode→present** tail — the stage-1 `AVSampleBufferDisplayLayer` decodes+presents + compressed samples internally with no per-frame callback, so it needs the **stage-2 presenter** + (`VTDecompressionSession` decode-completion timestamp + `CAMetalLayer`/display-link present) to + stamp on-glass present time; (2) the host **render→capture** term (PipeWire buffer presentation timestamp vs our capture stamp). `tools/latency-probe` is still the cross-machine orchestrator. - **Bigger bets (ordered, deferred — need real-NIC/GPU/Mac validation):** 1. **CUDA stream+event** to drop one of two redundant `cuCtxSynchronize` in `submit_cuda` (keep the diff --git a/docs-site/content/docs/status.md b/docs-site/content/docs/status.md index 30596ce..7021aa4 100644 --- a/docs-site/content/docs/status.md +++ b/docs-site/content/docs/status.md @@ -29,6 +29,11 @@ All three appliances advertise over mDNS (`_punktfunk._udp`) and require PIN pai ## Progress log ### 2026-06-12 +- **Apple client latency HUD** — `PunktfunkConnection.clockOffsetNs` (from the C-ABI getter) + + `LatencyMeter` surface a skew-corrected **capture→client-receipt** p50/p95 in the macOS HUD: the + first cross-machine latency the real Apple client reports. (Stage-1 `AVSampleBufferDisplayLayer` + has no present callback, so decode→present is excluded — that needs the stage-2 presenter.) + Needs an `xcframework` rebuild + `swift test` on the Mac to validate. - **Skew handshake in the connector + C ABI** — `quic::clock_sync` is now a shared core helper used by both the reference client and `NativeClient`; the connector runs it at connect and exposes the host clock offset over the C ABI (`punktfunk_connection_clock_offset_ns`). This is the substrate