Files
punktfunk/clients/apple/Tests/PunktfunkKitTests/RemoteFirstLightTests.swift
T
enricobuehler b26f138699
ci / rust (push) Has been cancelled
feat(apple): session audio — host playback + mic uplink, device pickers in Settings
Both directions of the audio plane, on CoreAudio's built-in Opus codec
(kAudioFormatOpus — no bundled libopus; OpusCodec.swift, round trip unit-tested):

- Playback: a drain thread pulls nextAudio() packets, decodes, and writes a priming
  jitter ring feeding an AVAudioSourceNode (~20 ms prefill, adaptive to the device's
  render quantum so large-buffer devices don't oscillate prime/dropout; a high-water
  clamp sheds stall backlog so one network hiccup can't permanently lag audio behind
  video; underrun re-primes — one dip, not sustained crackle).
- Mic: a second engine taps the input device, resamples to 48 kHz stereo, Opus-encodes
  20 ms chunks and sendMic()s them into the host's virtual PipeWire source. Permission
  via AVCaptureDevice (NSMicrophoneUsageDescription added to the Xcode target).
- Settings: Speaker + Microphone pickers (CoreAudio HAL enumeration, persisted by
  device UID — "System default" leaves the engine unpinned so it follows macOS device
  changes) and a "Send microphone" toggle (default on). Applies from the next session.
- Audio starts with streaming, never during the trust prompt (no host sound — and no
  mic uplink — before the user trusted the host); teardown stops audio before close().

Adversarial-review fixes baked in: stop() and the dangling mic-permission callback
share one lock+flag protocol (no hot mic with no owner), the connect-success handler
bails when the attempt was abandoned mid-handshake (no session/mic for a dead window),
SessionAudio gets a deinit backstop (a dropped instance can't pin the connection via
its drain thread), and the render scratch buffer is block-owned (was leaked per
session).

Verified live against the box: remote test decodes 100 host Opus packets to PCM and
the host opens its virtual mic on the first uplinked frame ("punktfunk/1 virtual mic
ready"); on-glass session runs with both engines up.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-06-11 09:39:15 +02:00

176 lines
8.1 KiB
Swift
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
// First light, headless: the full client pipeline against a REAL remote host QUIC
// handshake over the LAN, NVENC HEVC AUs through FEC + AES-GCM, AnnexB conversion, and a
// real VTDecompressionSession turning them into pixels. Everything the GUI does except
// putting the layer on glass.
//
// Run (host side, on the Linux box):
// PUNKTFUNK_COMPOSITOR=gamescope PUNKTFUNK_GAMESCOPE_APP=vkcube PUNKTFUNK_ZEROCOPY=1 \
// punktfunk-host m3-host --source virtual --seconds 120
// Then here:
// PUNKTFUNK_REMOTE_HOST=192.168.1.70 swift test --filter RemoteFirstLightTests
import AVFoundation
import CoreMedia
import VideoToolbox
import XCTest
@testable import PunktfunkKit
final class RemoteFirstLightTests: XCTestCase {
/// The pairing ceremony over the real LAN, exactly as the app runs it: fresh identity,
/// SPAKE2 with the host's arming PIN, then a pinned + identified session. Needs the
/// host armed (--allow-pairing) and its logged PIN in PUNKTFUNK_REMOTE_PIN. Heads-up:
/// every run durably adds one throwaway "remote-test" identity to the host's
/// ~/.config/punktfunk/punktfunk1-paired.json prune those entries at will.
func testRemotePairingThenPinnedStream() throws {
let env = ProcessInfo.processInfo.environment
guard let host = env["PUNKTFUNK_REMOTE_HOST"], let pin = env["PUNKTFUNK_REMOTE_PIN"]
else {
throw XCTSkip("set PUNKTFUNK_REMOTE_HOST + PUNKTFUNK_REMOTE_PIN "
+ "(host armed with --allow-pairing)")
}
let port = env["PUNKTFUNK_REMOTE_PORT"].flatMap(UInt16.init) ?? 9777
let identity = try generateIdentity()
let fingerprint = try pair(
host: host, port: port, identity: identity, pin: pin, name: "remote-test")
XCTAssertEqual(fingerprint.count, 32)
let conn = try PunktfunkConnection(
host: host, port: port, width: 1280, height: 720, refreshHz: 60,
pinSHA256: fingerprint, identity: identity)
defer { conn.close() }
XCTAssertEqual(conn.hostFingerprint, fingerprint)
var got = 0
let deadline = Date().addingTimeInterval(20)
while got < 10, Date() < deadline {
if try conn.nextAU(timeoutMs: 2000) != nil { got += 1 }
}
XCTAssertGreaterThanOrEqual(got, 10, "paired + pinned session must stream")
}
/// Audio both ways against the real host: drain the Opus plane and decode it to PCM
/// (host speaker path minus the speaker), and uplink an encoded tone (mic path
/// minus the mic) the host logs "punktfunk/1 virtual mic ready" on first frame.
func testRemoteAudioBothDirections() throws {
let env = ProcessInfo.processInfo.environment
guard let host = env["PUNKTFUNK_REMOTE_HOST"] else {
throw XCTSkip("set PUNKTFUNK_REMOTE_HOST (and start m3-host --source virtual there)")
}
let port = env["PUNKTFUNK_REMOTE_PORT"].flatMap(UInt16.init) ?? 9777
let conn = try PunktfunkConnection(
host: host, port: port, width: 1280, height: 720, refreshHz: 60)
defer { conn.close() }
// Mic uplink: 2 s of 440 Hz tone (the host's mic service opens its virtual
// source on the first frame check its log).
let encoder = try OpusEncoder()
let chunk = AVAudioPCMBuffer(
pcmFormat: encoder.pcmFormat, frameCapacity: OpusEncoder.framesPerPacket)!
var phase: Float = 0
let step = 2 * Float.pi * 440 / 48_000
var seq: UInt32 = 0
for _ in 0..<100 {
chunk.frameLength = OpusEncoder.framesPerPacket
let p = chunk.floatChannelData![0]
for f in 0..<Int(OpusEncoder.framesPerPacket) {
let s = sin(phase) * 0.25
phase += step
p[f * 2] = s
p[f * 2 + 1] = s
}
for packet in try encoder.encode(chunk) {
conn.sendMic(packet, seq: seq, ptsNs: UInt64(seq) * 20_000_000)
seq &+= 1
}
}
XCTAssertGreaterThanOrEqual(seq, 95, "mic encoder must emit ~one packet per chunk")
// Downlink: pull host audio packets and decode them (the host streams its sink
// monitor silence still produces packets).
let decoder = try OpusDecoder(framesPerPacket: 240)
let pcm = AVAudioPCMBuffer(pcmFormat: decoder.pcmFormat, frameCapacity: 5760)!
var packets = 0
var decodedFrames = 0
let deadline = Date().addingTimeInterval(10)
while packets < 100, Date() < deadline {
guard let pkt = try conn.nextAudio(timeoutMs: 1000) else { continue }
packets += 1
decodedFrames += Int(try decoder.decode(pkt.data, into: pcm))
}
XCTAssertGreaterThanOrEqual(packets, 100, "host audio plane must deliver")
// 100 packets × 5 ms × 48 kHz = 24000 frames.
XCTAssertGreaterThan(decodedFrames, 20_000, "host packets must decode to PCM")
}
func testRemoteStreamDecodesToPixels() throws {
let env = ProcessInfo.processInfo.environment
guard let host = env["PUNKTFUNK_REMOTE_HOST"] else {
throw XCTSkip("set PUNKTFUNK_REMOTE_HOST (and start m3-host --source virtual there)")
}
let port = env["PUNKTFUNK_REMOTE_PORT"].flatMap(UInt16.init) ?? 9777
// PUNKTFUNK_REMOTE_COMPOSITOR=kwin|gamescope| asks the host for a specific
// backend (verify in its log: "punktfunk/1 virtual display compositor=").
let compositor = env["PUNKTFUNK_REMOTE_COMPOSITOR"]
.flatMap(PunktfunkConnection.Compositor.init(name:)) ?? .auto
let width: UInt32 = 1280
let height: UInt32 = 720
let conn = try PunktfunkConnection(
host: host, port: port, width: width, height: height, refreshHz: 60,
compositor: compositor)
defer { conn.close() }
XCTAssertEqual(conn.width, width)
XCTAssertEqual(conn.height, height)
var format: CMVideoFormatDescription?
var decoder: VTDecompressionSession?
defer { decoder.map { VTDecompressionSessionInvalidate($0) } }
var received = 0
var decoded = 0
var firstPtsNs: UInt64 = 0
var lastPtsNs: UInt64 = 0
let deadline = Date().addingTimeInterval(30)
while decoded < 60, Date() < deadline {
guard let au = try conn.nextAU(timeoutMs: 2000) else { continue }
received += 1
if firstPtsNs == 0 { firstPtsNs = au.ptsNs }
lastPtsNs = au.ptsNs
if let f = AnnexB.formatDescription(fromIDR: au.data) {
format = f
if decoder == nil {
let dims = CMVideoFormatDescriptionGetDimensions(f)
XCTAssertEqual(UInt32(dims.width), width)
XCTAssertEqual(UInt32(dims.height), height)
var session: VTDecompressionSession?
XCTAssertEqual(
VTDecompressionSessionCreate(
allocator: nil, formatDescription: f, decoderSpecification: nil,
imageBufferAttributes: nil, outputCallback: nil,
decompressionSessionOut: &session),
noErr)
decoder = session
}
}
guard let f = format, let dec = decoder,
let sample = AnnexB.sampleBuffer(au: au, format: f)
else { continue }
var gotPixels = false
VTDecompressionSessionDecodeFrame(
dec, sampleBuffer: sample, flags: [], infoFlagsOut: nil
) { status, _, imageBuffer, _, _ in
gotPixels = status == noErr && imageBuffer != nil
}
if gotPixels { decoded += 1 }
}
XCTAssertGreaterThanOrEqual(decoded, 60, "decoded \(decoded)/\(received) received AUs")
// The host stamps pts with its capture wall clock 60 frames should span ~1 s.
let spanMs = Double(lastPtsNs &- firstPtsNs) / 1_000_000
print("first light: \(decoded) frames decoded, \(received) received, pts span \(Int(spanMs)) ms")
}
}