feat(apple): session audio — host playback + mic uplink, device pickers in Settings
ci / rust (push) Has been cancelled
ci / rust (push) Has been cancelled
Both directions of the audio plane, on CoreAudio's built-in Opus codec
(kAudioFormatOpus — no bundled libopus; OpusCodec.swift, round trip unit-tested):
- Playback: a drain thread pulls nextAudio() packets, decodes, and writes a priming
jitter ring feeding an AVAudioSourceNode (~20 ms prefill, adaptive to the device's
render quantum so large-buffer devices don't oscillate prime/dropout; a high-water
clamp sheds stall backlog so one network hiccup can't permanently lag audio behind
video; underrun re-primes — one dip, not sustained crackle).
- Mic: a second engine taps the input device, resamples to 48 kHz stereo, Opus-encodes
20 ms chunks and sendMic()s them into the host's virtual PipeWire source. Permission
via AVCaptureDevice (NSMicrophoneUsageDescription added to the Xcode target).
- Settings: Speaker + Microphone pickers (CoreAudio HAL enumeration, persisted by
device UID — "System default" leaves the engine unpinned so it follows macOS device
changes) and a "Send microphone" toggle (default on). Applies from the next session.
- Audio starts with streaming, never during the trust prompt (no host sound — and no
mic uplink — before the user trusted the host); teardown stops audio before close().
Adversarial-review fixes baked in: stop() and the dangling mic-permission callback
share one lock+flag protocol (no hot mic with no owner), the connect-success handler
bails when the attempt was abandoned mid-handshake (no session/mic for a dead window),
SessionAudio gets a deinit backstop (a dropped instance can't pin the connection via
its drain thread), and the render scratch buffer is block-owned (was leaked per
session).
Verified live against the box: remote test decodes 100 host Opus packets to PCM and
the host opens its virtual mic on the first uplinked frame ("punktfunk/1 virtual mic
ready"); on-glass session runs with both engines up.
Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,140 @@
|
||||
// Opus ⇄ PCM through CoreAudio's built-in codec (kAudioFormatOpus, macOS 10.13+ / iOS
|
||||
// 11+) — no bundled libopus. The host's audio plane is raw Opus packets (48 kHz stereo,
|
||||
// one frame per packet); AVAudioConverter handles them as single-packet
|
||||
// AVAudioCompressedBuffers with explicit packet descriptions.
|
||||
//
|
||||
// Both classes are single-threaded by contract (one per direction, owned by their
|
||||
// drain/capture pipelines).
|
||||
|
||||
import AVFoundation
|
||||
|
||||
enum OpusCodecError: Error {
|
||||
/// CoreAudio rejected the Opus stream format or had no converter for it.
|
||||
case unavailable
|
||||
case convertFailed(String)
|
||||
}
|
||||
|
||||
/// 48 kHz stereo float32 interleaved — the PCM side of both converters and the layout
|
||||
/// of the playback ring buffer.
|
||||
func opusPCMFormat() -> AVAudioFormat? {
|
||||
AVAudioFormat(
|
||||
commonFormat: .pcmFormatFloat32, sampleRate: 48_000, channels: 2, interleaved: true)
|
||||
}
|
||||
|
||||
/// The compressed side: raw Opus, `framesPerPacket` nominal samples per packet at 48 kHz
|
||||
/// (240 = the host's 5 ms audio plane; 960 = the 20 ms packets the encoder emits).
|
||||
private func opusFormat(framesPerPacket: UInt32) -> AVAudioFormat? {
|
||||
var desc = AudioStreamBasicDescription(
|
||||
mSampleRate: 48_000,
|
||||
mFormatID: kAudioFormatOpus,
|
||||
mFormatFlags: 0,
|
||||
mBytesPerPacket: 0,
|
||||
mFramesPerPacket: framesPerPacket,
|
||||
mBytesPerFrame: 0,
|
||||
mChannelsPerFrame: 2,
|
||||
mBitsPerChannel: 0,
|
||||
mReserved: 0)
|
||||
return AVAudioFormat(streamDescription: &desc)
|
||||
}
|
||||
|
||||
final class OpusDecoder {
|
||||
private let converter: AVAudioConverter
|
||||
private let inBuf: AVAudioCompressedBuffer
|
||||
private let opus: AVAudioFormat
|
||||
let pcmFormat: AVAudioFormat
|
||||
|
||||
/// `framesPerPacket`: the sender's packet duration in samples (host audio = 240).
|
||||
init(framesPerPacket: UInt32) throws {
|
||||
guard let pcm = opusPCMFormat(), let opus = opusFormat(framesPerPacket: framesPerPacket),
|
||||
let converter = AVAudioConverter(from: opus, to: pcm)
|
||||
else { throw OpusCodecError.unavailable }
|
||||
self.converter = converter
|
||||
self.opus = opus
|
||||
self.pcmFormat = pcm
|
||||
inBuf = AVAudioCompressedBuffer(
|
||||
format: opus, packetCapacity: 1, maximumPacketSize: 1500)
|
||||
}
|
||||
|
||||
/// Decode one Opus packet into `out` (whose format must be `pcmFormat`); returns the
|
||||
/// number of frames written. Empty packets (DTX) decode to 0 frames.
|
||||
func decode(_ packet: Data, into out: AVAudioPCMBuffer) throws -> AVAudioFrameCount {
|
||||
guard !packet.isEmpty else { return 0 }
|
||||
guard packet.count <= Int(inBuf.maximumPacketSize) else {
|
||||
throw OpusCodecError.convertFailed("packet larger than maximumPacketSize")
|
||||
}
|
||||
packet.withUnsafeBytes { raw in
|
||||
inBuf.data.copyMemory(from: raw.baseAddress!, byteCount: raw.count)
|
||||
}
|
||||
inBuf.byteLength = UInt32(packet.count)
|
||||
inBuf.packetCount = 1
|
||||
inBuf.packetDescriptions![0] = AudioStreamPacketDescription(
|
||||
mStartOffset: 0, mVariableFramesInPacket: 0, mDataByteSize: UInt32(packet.count))
|
||||
|
||||
out.frameLength = 0
|
||||
var fed = false
|
||||
var convError: NSError?
|
||||
let status = converter.convert(to: out, error: &convError) { [inBuf] _, outStatus in
|
||||
if fed {
|
||||
outStatus.pointee = .noDataNow
|
||||
return nil
|
||||
}
|
||||
fed = true
|
||||
outStatus.pointee = .haveData
|
||||
return inBuf
|
||||
}
|
||||
if status == .error {
|
||||
throw OpusCodecError.convertFailed(convError?.localizedDescription ?? "decode")
|
||||
}
|
||||
return out.frameLength
|
||||
}
|
||||
}
|
||||
|
||||
final class OpusEncoder {
|
||||
/// The encoder's packet duration: 960 samples = 20 ms, CoreAudio's default Opus
|
||||
/// framing. The host's mic service decodes any Opus frame size up to 120 ms.
|
||||
static let framesPerPacket: AVAudioFrameCount = 960
|
||||
|
||||
private let converter: AVAudioConverter
|
||||
private let outBuf: AVAudioCompressedBuffer
|
||||
let pcmFormat: AVAudioFormat
|
||||
|
||||
init() throws {
|
||||
guard let pcm = opusPCMFormat(),
|
||||
let opus = opusFormat(framesPerPacket: UInt32(Self.framesPerPacket)),
|
||||
let converter = AVAudioConverter(from: pcm, to: opus)
|
||||
else { throw OpusCodecError.unavailable }
|
||||
converter.bitRate = 96_000
|
||||
self.converter = converter
|
||||
self.pcmFormat = pcm
|
||||
outBuf = AVAudioCompressedBuffer(
|
||||
format: opus, packetCapacity: 4, maximumPacketSize: 1500)
|
||||
}
|
||||
|
||||
/// Encode exactly `framesPerPacket` frames of `pcmFormat` audio; returns the encoded
|
||||
/// packets (normally one).
|
||||
func encode(_ pcm: AVAudioPCMBuffer) throws -> [Data] {
|
||||
outBuf.byteLength = 0
|
||||
outBuf.packetCount = 0
|
||||
var fed = false
|
||||
var convError: NSError?
|
||||
let status = converter.convert(to: outBuf, error: &convError) { _, outStatus in
|
||||
if fed {
|
||||
outStatus.pointee = .noDataNow
|
||||
return nil
|
||||
}
|
||||
fed = true
|
||||
outStatus.pointee = .haveData
|
||||
return pcm
|
||||
}
|
||||
if status == .error {
|
||||
throw OpusCodecError.convertFailed(convError?.localizedDescription ?? "encode")
|
||||
}
|
||||
guard let descs = outBuf.packetDescriptions else { return [] }
|
||||
return (0..<Int(outBuf.packetCount)).map { i in
|
||||
let d = descs[i]
|
||||
return Data(
|
||||
bytes: outBuf.data.advanced(by: Int(d.mStartOffset)),
|
||||
count: Int(d.mDataByteSize))
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user