feat(apple): session audio — host playback + mic uplink, device pickers in Settings
ci / rust (push) Has been cancelled

Both directions of the audio plane, on CoreAudio's built-in Opus codec
(kAudioFormatOpus — no bundled libopus; OpusCodec.swift, round trip unit-tested):

- Playback: a drain thread pulls nextAudio() packets, decodes, and writes a priming
  jitter ring feeding an AVAudioSourceNode (~20 ms prefill, adaptive to the device's
  render quantum so large-buffer devices don't oscillate prime/dropout; a high-water
  clamp sheds stall backlog so one network hiccup can't permanently lag audio behind
  video; underrun re-primes — one dip, not sustained crackle).
- Mic: a second engine taps the input device, resamples to 48 kHz stereo, Opus-encodes
  20 ms chunks and sendMic()s them into the host's virtual PipeWire source. Permission
  via AVCaptureDevice (NSMicrophoneUsageDescription added to the Xcode target).
- Settings: Speaker + Microphone pickers (CoreAudio HAL enumeration, persisted by
  device UID — "System default" leaves the engine unpinned so it follows macOS device
  changes) and a "Send microphone" toggle (default on). Applies from the next session.
- Audio starts with streaming, never during the trust prompt (no host sound — and no
  mic uplink — before the user trusted the host); teardown stops audio before close().

Adversarial-review fixes baked in: stop() and the dangling mic-permission callback
share one lock+flag protocol (no hot mic with no owner), the connect-success handler
bails when the attempt was abandoned mid-handshake (no session/mic for a dead window),
SessionAudio gets a deinit backstop (a dropped instance can't pin the connection via
its drain thread), and the render scratch buffer is block-owned (was leaked per
session).

Verified live against the box: remote test decodes 100 host Opus packets to PCM and
the host opens its virtual mic on the first uplinked frame ("punktfunk/1 virtual mic
ready"); on-glass session runs with both engines up.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
2026-06-11 09:39:08 +02:00
parent 2372b02620
commit b26f138699
9 changed files with 840 additions and 8 deletions
@@ -0,0 +1,83 @@
// The Opus codec through CoreAudio (kAudioFormatOpus): a real encode decode round
// trip. This is the load-bearing assumption of the whole audio feature (no bundled
// libopus) if AVAudioConverter can't handle raw Opus packets, fail HERE, not in the
// app.
import AVFoundation
import XCTest
@testable import PunktfunkKit
final class OpusCodecTests: XCTestCase {
/// Encode a 440 Hz stereo tone, decode it back, and require the result to be
/// recognizably the same signal (Opus is lossy check correlation, not bytes).
func testEncodeDecodeRoundTripPreservesTone() throws {
let encoder = try OpusEncoder()
let decoder = try OpusDecoder(framesPerPacket: UInt32(OpusEncoder.framesPerPacket))
let pcmFormat = encoder.pcmFormat
let frames = OpusEncoder.framesPerPacket
var packets: [Data] = []
var phase: Float = 0
let step = 2 * Float.pi * 440 / 48_000
// 50 packets = 1 s of tone.
for _ in 0..<50 {
let buf = AVAudioPCMBuffer(pcmFormat: pcmFormat, frameCapacity: frames)!
buf.frameLength = frames
let p = buf.floatChannelData![0] // interleaved: one plane, L R L R
for f in 0..<Int(frames) {
let s = sin(phase) * 0.5
phase += step
p[f * 2] = s
p[f * 2 + 1] = s
}
packets.append(contentsOf: try encoder.encode(buf))
}
XCTAssertGreaterThanOrEqual(packets.count, 45, "encoder must emit ~one packet per buffer")
XCTAssertTrue(packets.allSatisfy { !$0.isEmpty })
var decoded: [Float] = []
let out = AVAudioPCMBuffer(pcmFormat: decoder.pcmFormat, frameCapacity: 5760)!
for packet in packets {
let n = try decoder.decode(packet, into: out)
let p = out.floatChannelData![0]
for f in 0..<Int(n) {
decoded.append(p[f * 2]) // left channel
}
}
XCTAssertGreaterThan(decoded.count, 40_000, "~1 s of 48 kHz audio back out")
// The decoded signal must contain a strong 440 Hz component: correlate against
// quadrature reference tones (phase-agnostic), skipping the codec warm-up.
let skip = 4800
var inPhase: Float = 0
var quadrature: Float = 0
var energy: Float = 0
for (i, s) in decoded[skip...].enumerated() {
let t = Float(i) * step
inPhase += s * sin(t)
quadrature += s * cos(t)
energy += s * s
}
let n = Float(decoded.count - skip)
let correlation = (inPhase * inPhase + quadrature * quadrature).squareRoot() / n
let rms = (energy / n).squareRoot()
XCTAssertGreaterThan(rms, 0.2, "decoded audio is not silence")
// A clean sine at amplitude a yields correlation a/2 (0.25 here); noise 0.
XCTAssertGreaterThan(correlation, 0.15, "440 Hz tone must survive the round trip")
}
/// The host's audio plane is 5 ms (240-frame) packets make sure a 240-frame
/// decoder accepts packets that small (encoder-side we can't force 5 ms out of
/// CoreAudio, so this decodes the 20 ms packets with a mismatched nominal fpp,
/// which the packet descriptions override).
func testDecoderHandlesDTXAndOversizedPackets() throws {
let decoder = try OpusDecoder(framesPerPacket: 240)
let out = AVAudioPCMBuffer(pcmFormat: decoder.pcmFormat, frameCapacity: 5760)!
XCTAssertEqual(try decoder.decode(Data(), into: out), 0, "DTX decodes to silence/0")
XCTAssertThrowsError(
try decoder.decode(Data(repeating: 0, count: 2000), into: out),
"oversized packet must throw, not crash")
}
}
@@ -9,6 +9,7 @@
// Then here:
// PUNKTFUNK_REMOTE_HOST=192.168.1.70 swift test --filter RemoteFirstLightTests
import AVFoundation
import CoreMedia
import VideoToolbox
import XCTest
@@ -47,6 +48,61 @@ final class RemoteFirstLightTests: XCTestCase {
XCTAssertGreaterThanOrEqual(got, 10, "paired + pinned session must stream")
}
/// Audio both ways against the real host: drain the Opus plane and decode it to PCM
/// (host speaker path minus the speaker), and uplink an encoded tone (mic path
/// minus the mic) the host logs "punktfunk/1 virtual mic ready" on first frame.
func testRemoteAudioBothDirections() throws {
let env = ProcessInfo.processInfo.environment
guard let host = env["PUNKTFUNK_REMOTE_HOST"] else {
throw XCTSkip("set PUNKTFUNK_REMOTE_HOST (and start m3-host --source virtual there)")
}
let port = env["PUNKTFUNK_REMOTE_PORT"].flatMap(UInt16.init) ?? 9777
let conn = try PunktfunkConnection(
host: host, port: port, width: 1280, height: 720, refreshHz: 60)
defer { conn.close() }
// Mic uplink: 2 s of 440 Hz tone (the host's mic service opens its virtual
// source on the first frame check its log).
let encoder = try OpusEncoder()
let chunk = AVAudioPCMBuffer(
pcmFormat: encoder.pcmFormat, frameCapacity: OpusEncoder.framesPerPacket)!
var phase: Float = 0
let step = 2 * Float.pi * 440 / 48_000
var seq: UInt32 = 0
for _ in 0..<100 {
chunk.frameLength = OpusEncoder.framesPerPacket
let p = chunk.floatChannelData![0]
for f in 0..<Int(OpusEncoder.framesPerPacket) {
let s = sin(phase) * 0.25
phase += step
p[f * 2] = s
p[f * 2 + 1] = s
}
for packet in try encoder.encode(chunk) {
conn.sendMic(packet, seq: seq, ptsNs: UInt64(seq) * 20_000_000)
seq &+= 1
}
}
XCTAssertGreaterThanOrEqual(seq, 95, "mic encoder must emit ~one packet per chunk")
// Downlink: pull host audio packets and decode them (the host streams its sink
// monitor silence still produces packets).
let decoder = try OpusDecoder(framesPerPacket: 240)
let pcm = AVAudioPCMBuffer(pcmFormat: decoder.pcmFormat, frameCapacity: 5760)!
var packets = 0
var decodedFrames = 0
let deadline = Date().addingTimeInterval(10)
while packets < 100, Date() < deadline {
guard let pkt = try conn.nextAudio(timeoutMs: 1000) else { continue }
packets += 1
decodedFrames += Int(try decoder.decode(pkt.data, into: pcm))
}
XCTAssertGreaterThanOrEqual(packets, 100, "host audio plane must deliver")
// 100 packets × 5 ms × 48 kHz = 24000 frames.
XCTAssertGreaterThan(decodedFrames, 20_000, "host packets must decode to PCM")
}
func testRemoteStreamDecodesToPixels() throws {
let env = ProcessInfo.processInfo.environment
guard let host = env["PUNKTFUNK_REMOTE_HOST"] else {