42d1c74663
apple / swift (push) Successful in 1m5s
docker / build-push (ci, ci/rust-ci.Dockerfile, punktfunk-rust-ci) (push) Has been cancelled
docker / build-push (docs-site, docs-site/Dockerfile, punktfunk-docs) (push) Has been cancelled
docker / build-push (--build-arg FEDORA_VERSION=44, ci, ci/fedora-rpm.Dockerfile, punktfunk-fedora44-rpm) (push) Has been cancelled
android / android (push) Has been cancelled
apple / screenshots (push) Has been cancelled
ci / web (push) Has been cancelled
ci / docs-site (push) Has been cancelled
ci / bench (push) Has been cancelled
ci / rust (push) Has been cancelled
deb / build-publish (push) Has been cancelled
decky / build-publish (push) Has been cancelled
docker / build-push (., web/Dockerfile, punktfunk-web) (push) Has been cancelled
docker / build-push (ci, ci/fedora-rpm.Dockerfile, punktfunk-fedora-rpm) (push) Has been cancelled
docker / deploy-docs (push) Has been cancelled
rpm / build-publish (fedora-44, punktfunk-fedora44-rpm) (push) Has been cancelled
rpm / build-publish (bazzite, punktfunk-fedora-rpm) (push) Has been cancelled
release / apple (push) Has been cancelled
The mic uplink handed the host pure digital silence on a multi-channel interface: AVAudioConverter's N→stereo downmix takes channels 0/1, but a pro interface puts the mic on ONE higher discrete channel. Fold the input to a mono bus ourselves instead — pick the mic's channel (or sum all) and resample that to the encoder's 48 kHz stereo, so the silent 0/1 downmix never happens. - New "Microphone channel" setting (macOS): Auto (sum every channel — a lone hot mic passes at full level) or pin 1-based channel N. Picker appears only for multi-channel devices, driven by the device's input channel count. - Diagnostics that make this class of failure self-naming next session: log the actual live capture device + format + fold mode, warn on a silent UID fallback, and a one-shot silence tripwire on the EXTRACTED signal (WARN on 10 s of zeros, else peak dBFS). - foldToMono extracted as a pure, unit-tested helper (pin / sum-clamp x interleaved / deinterleaved / mono / out-of-range). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
550 lines
26 KiB
Swift
550 lines
26 KiB
Swift
// Session audio, both directions:
|
||
//
|
||
// host → speaker: a drain thread pulls Opus packets (nextAudio, its own plane in the
|
||
// core), decodes via OpusDecoder, and writes PCM into a jitter ring; an
|
||
// AVAudioSourceNode pulls from the ring (silence on underrun with re-priming, so a
|
||
// network gap costs one dip, not permanent crackle).
|
||
//
|
||
// mic → host: a second AVAudioEngine taps the input device, folds it to one mono bus (the
|
||
// chosen channel of a multi-channel interface, or a sum of all channels), resamples to 48 kHz
|
||
// stereo, slices 20 ms chunks, Opus-encodes, and sendMic()s each packet — the host feeds them
|
||
// into a virtual PipeWire source.
|
||
//
|
||
// Devices are chosen by UID ("" = system default: the engine is then never pinned to a
|
||
// concrete device and follows default-device changes). Two engines, not one — a single
|
||
// AVAudioEngine ties input+output to one aggregate clock, separate engines keep
|
||
// arbitrary mic/speaker combinations trivial.
|
||
|
||
import AVFoundation
|
||
import os
|
||
|
||
private let log = Logger(subsystem: "io.unom.punktfunk", category: "audio")
|
||
|
||
/// Render-block-owned scratch storage: freed exactly when the closure (and thus the
|
||
/// last possible render call) is released — never racing CoreAudio.
|
||
private final class ScratchBuffer {
|
||
// 8192 frames × up to 8 channels (7.1) — the render block caps `frames` at 8192.
|
||
let ptr = UnsafeMutablePointer<Float>.allocate(capacity: 8192 * 8)
|
||
deinit { ptr.deallocate() }
|
||
}
|
||
|
||
public final class SessionAudio {
|
||
private let connection: PunktfunkConnection
|
||
private let flag = StopFlag()
|
||
private let drainDone = DispatchSemaphore(value: 0)
|
||
/// Owns the engine handles + drainStarted, paired with `flag`: stop() sets the flag
|
||
/// BEFORE taking the engines, every publisher re-checks the flag under this lock
|
||
/// after publishing-side work — so a startCapture racing stop() (the mic-permission
|
||
/// callback arrives whenever the user clicks the prompt) can never leave a hot
|
||
/// microphone with no owner.
|
||
private let stateLock = NSLock()
|
||
private var playbackEngine: AVAudioEngine?
|
||
private var captureEngine: AVAudioEngine?
|
||
private var drainStarted = false
|
||
#if !os(macOS)
|
||
/// AVAudioSession `setCategory`/`setActive` are synchronous and block on the audio server, so
|
||
/// they must not run on the main thread (UI stall — AVFoundation warns about it). PROCESS-WIDE
|
||
/// (static) so every SessionAudio shares one serial queue: the AVAudioSession is a process
|
||
/// singleton, and across a reconnect the old session's deactivate must be ordered before the
|
||
/// new session's activate (a per-instance queue would let them race and leave the new session's
|
||
/// audio deactivated). stop() enqueues its deactivate promptly so it lands before the next
|
||
/// session's activate.
|
||
private static let sessionQueue = DispatchQueue(label: "io.unom.punktfunk.audio.session")
|
||
#endif
|
||
|
||
public init(connection: PunktfunkConnection) {
|
||
self.connection = connection
|
||
}
|
||
|
||
/// Backstop for an owner dropping us without stop() — unblocks the drain thread
|
||
/// (which captures the connection strongly, NOT self) within one poll timeout.
|
||
/// Engine teardown still belongs to stop().
|
||
deinit {
|
||
flag.stop()
|
||
}
|
||
|
||
/// Start playback (and, if enabled+authorized, the mic uplink). Empty UIDs = system default
|
||
/// device; on iOS the UIDs are ignored entirely (routes are AVAudioSession-managed). On macOS
|
||
/// the engines start synchronously on the caller's (main) thread. On iOS/tvOS start() is
|
||
/// ASYNCHRONOUS: it activates the AVAudioSession off the main thread, then starts the engines on
|
||
/// a later main-queue hop (gated by `!flag.isStopped`) — so playback is live shortly after, not
|
||
/// on return. The mic may start later still if the permission prompt is pending.
|
||
public func start(speakerUID: String, micUID: String, micChannel: Int, micEnabled: Bool) {
|
||
#if os(macOS)
|
||
// No AVAudioSession on macOS — start the engines directly (caller's thread, as before).
|
||
startEngines(
|
||
speakerUID: speakerUID, micUID: micUID, micChannel: micChannel, micEnabled: micEnabled)
|
||
#else
|
||
// Configure + activate the session OFF the main thread (it blocks on the audio server),
|
||
// then start the engines back on the main thread once it's active — engine routing/format
|
||
// depend on the active session. A stop() racing in between is caught by the flag guard.
|
||
Self.sessionQueue.async { [weak self] in
|
||
guard let self else { return }
|
||
self.activateAudioSession(micEnabled: micEnabled)
|
||
DispatchQueue.main.async { [weak self] in
|
||
guard let self, !self.flag.isStopped else { return }
|
||
self.startEngines(
|
||
speakerUID: speakerUID, micUID: micUID, micChannel: micChannel,
|
||
micEnabled: micEnabled)
|
||
}
|
||
}
|
||
#endif
|
||
}
|
||
|
||
#if !os(macOS)
|
||
/// Route + policy live in the session, not per-engine: stereo playback, mic capture when
|
||
/// enabled, Bluetooth allowed. Failure is non-fatal (defaults). Runs on `sessionQueue`.
|
||
private func activateAudioSession(micEnabled: Bool) {
|
||
let session = AVAudioSession.sharedInstance()
|
||
do {
|
||
#if os(iOS)
|
||
if micEnabled {
|
||
// .defaultToSpeaker: .playAndRecord otherwise routes to the iPhone EARPIECE; only
|
||
// affects the built-in route (headphones/BT still win).
|
||
try session.setCategory(
|
||
.playAndRecord, mode: .default,
|
||
options: [.allowBluetoothA2DP, .defaultToSpeaker])
|
||
} else {
|
||
try session.setCategory(.playback, mode: .default)
|
||
}
|
||
#else // tvOS — no app-accessible mic
|
||
try session.setCategory(.playback, mode: .default)
|
||
#endif
|
||
try session.setActive(true)
|
||
} catch {
|
||
log.warning("AVAudioSession setup failed: \(error.localizedDescription)")
|
||
}
|
||
}
|
||
#endif
|
||
|
||
/// Build + start the playback engine (and the mic uplink when enabled + authorized). Main
|
||
/// thread (engine setup); on iOS/tvOS the session is already active by the time this runs.
|
||
private func startEngines(
|
||
speakerUID: String, micUID: String, micChannel: Int, micEnabled: Bool
|
||
) {
|
||
startPlayback(speakerUID: speakerUID)
|
||
#if os(tvOS)
|
||
// No app-accessible microphone input on tvOS — playback only.
|
||
#else
|
||
guard micEnabled else { return }
|
||
switch AVCaptureDevice.authorizationStatus(for: .audio) {
|
||
case .authorized:
|
||
startCapture(micUID: micUID, micChannel: micChannel)
|
||
case .notDetermined:
|
||
AVCaptureDevice.requestAccess(for: .audio) { [weak self] granted in
|
||
DispatchQueue.main.async {
|
||
guard let self, granted, !self.flag.isStopped else { return }
|
||
self.startCapture(micUID: micUID, micChannel: micChannel)
|
||
}
|
||
}
|
||
default:
|
||
log.warning("microphone access denied — mic uplink disabled (System Settings → Privacy)")
|
||
}
|
||
#endif
|
||
}
|
||
|
||
/// Stop both directions. Safe from any thread; waits the drain thread out (≤ its
|
||
/// poll timeout) so the caller can close the connection right after.
|
||
public func stop() {
|
||
flag.stop() // before taking the engines — see stateLock's comment
|
||
stateLock.lock()
|
||
let capture = captureEngine
|
||
captureEngine = nil
|
||
let playback = playbackEngine
|
||
playbackEngine = nil
|
||
let wasDraining = drainStarted
|
||
drainStarted = false
|
||
stateLock.unlock()
|
||
if let capture {
|
||
capture.inputNode.removeTap(onBus: 0)
|
||
capture.stop()
|
||
}
|
||
playback?.stop()
|
||
#if !os(macOS)
|
||
// Release the session so audio we interrupted (Music, podcasts) gets its resume cue. Like
|
||
// activation, setActive is synchronous/blocking — run it on the shared serial session queue
|
||
// (off the main thread). Enqueued HERE — engines already stopped, and BEFORE the drain wait
|
||
// below — so across a reconnect it lands ahead of the next session's activate on the shared
|
||
// queue (otherwise a deferred deactivate could deactivate the new session). Fire-and-forget.
|
||
Self.sessionQueue.async {
|
||
do {
|
||
try AVAudioSession.sharedInstance().setActive(
|
||
false, options: .notifyOthersOnDeactivation)
|
||
} catch {
|
||
log.warning("AVAudioSession deactivation failed: \(error.localizedDescription)")
|
||
}
|
||
}
|
||
#endif
|
||
if wasDraining {
|
||
_ = drainDone.wait(timeout: .now() + .milliseconds(400))
|
||
}
|
||
}
|
||
|
||
// MARK: - Playback (host → speaker)
|
||
|
||
private func startPlayback(speakerUID: String) {
|
||
// Build the playback layout from the host-RESOLVED channel count (never the request):
|
||
// 2 = stereo / 6 = 5.1 / 8 = 7.1, canonical wire order FL FR FC LFE RL RR SL SR.
|
||
let channels = Int(connection.resolvedAudioChannels)
|
||
// 1 s interleaved capacity, ~20 ms prefill (four 5 ms host packets of jitter absorption
|
||
// before the first sample plays), both scaled by the channel count.
|
||
let ring = AudioRing(
|
||
capacity: 48_000 * channels, prefill: 960 * channels, channels: channels)
|
||
|
||
let engine = AVAudioEngine()
|
||
#if os(macOS)
|
||
if !speakerUID.isEmpty {
|
||
if let dev = AudioDevices.deviceID(forUID: speakerUID),
|
||
let unit = engine.outputNode.audioUnit {
|
||
if !Self.setDevice(dev, on: unit) {
|
||
log.error("could not select speaker \(speakerUID) — using default")
|
||
}
|
||
} else {
|
||
log.warning("speaker \(speakerUID) not present — using default")
|
||
}
|
||
}
|
||
#endif
|
||
|
||
// Engine-native deinterleaved float; the render block deinterleaves from the ring. Surround
|
||
// uses an explicit wire-order channel layout; the mixer downmixes to the output device when
|
||
// it has fewer speakers (e.g. an iPhone's stereo built-ins). (Explicit if/else rather than
|
||
// map/flatMap so it's correct whether the channelLayout initializer is failable or not.)
|
||
var format: AVAudioFormat?
|
||
if channels == 2 {
|
||
format = AVAudioFormat(standardFormatWithSampleRate: 48_000, channels: 2)
|
||
} else if let layout = wireChannelLayout(channels: channels) {
|
||
format = AVAudioFormat(standardFormatWithSampleRate: 48_000, channelLayout: layout)
|
||
}
|
||
guard let format else {
|
||
log.error("could not build \(channels)-channel audio format — audio disabled")
|
||
return
|
||
}
|
||
let scratch = ScratchBuffer() // block-owned; freed with the closure
|
||
let source = AVAudioSourceNode(format: format) { _, _, frameCount, abl -> OSStatus in
|
||
let frames = Int(frameCount)
|
||
guard frames <= 8192 else { return kAudioUnitErr_TooManyFramesToProcess }
|
||
ring.read(into: scratch.ptr, count: frames * channels)
|
||
let buffers = UnsafeMutableAudioBufferListPointer(abl)
|
||
// Deinterleave the wire-order interleaved ring into the engine's per-channel buses.
|
||
if buffers.count >= channels {
|
||
for ch in 0..<channels {
|
||
if let dst = buffers[ch].mData?.assumingMemoryBound(to: Float.self) {
|
||
for f in 0..<frames { dst[f] = scratch.ptr[f * channels + ch] }
|
||
}
|
||
}
|
||
}
|
||
return noErr
|
||
}
|
||
engine.attach(source)
|
||
engine.connect(source, to: engine.mainMixerNode, format: format)
|
||
engine.prepare()
|
||
do {
|
||
try engine.start()
|
||
} catch {
|
||
log.error("playback engine failed to start: \(error.localizedDescription)")
|
||
return
|
||
}
|
||
stateLock.lock()
|
||
if flag.isStopped {
|
||
stateLock.unlock()
|
||
engine.stop() // stop() already ran — don't strand a started engine
|
||
return
|
||
}
|
||
playbackEngine = engine
|
||
stateLock.unlock()
|
||
startDrain(into: ring)
|
||
}
|
||
|
||
private func startDrain(into ring: AudioRing) {
|
||
stateLock.lock()
|
||
drainStarted = true
|
||
stateLock.unlock()
|
||
let thread = Thread { [connection, flag, drainDone] in
|
||
defer { drainDone.signal() }
|
||
// Decode happens IN-CORE (libopus multistream) — AudioToolbox's Opus path is
|
||
// stereo-only — and is handed back as interleaved f32 PCM in wire channel order.
|
||
while !flag.isStopped {
|
||
let pcm: PunktfunkConnection.AudioPCM?
|
||
do {
|
||
pcm = try connection.nextAudioPcm(timeoutMs: 100)
|
||
} catch {
|
||
break // session closed
|
||
}
|
||
guard let pcm, pcm.frameCount > 0 else { continue }
|
||
pcm.samples.withUnsafeBufferPointer { p in
|
||
if let base = p.baseAddress {
|
||
ring.write(base, count: pcm.frameCount * pcm.channels)
|
||
}
|
||
}
|
||
}
|
||
}
|
||
thread.name = "punktfunk-audio"
|
||
thread.qualityOfService = .userInteractive
|
||
thread.start()
|
||
}
|
||
|
||
// MARK: - Mic (mic → host)
|
||
|
||
#if !os(tvOS)
|
||
private func startCapture(micUID: String, micChannel: Int) {
|
||
let engine = AVAudioEngine()
|
||
let input = engine.inputNode
|
||
#if os(macOS)
|
||
if !micUID.isEmpty {
|
||
if let dev = AudioDevices.deviceID(forUID: micUID), let unit = input.audioUnit {
|
||
if !Self.setDevice(dev, on: unit) {
|
||
log.error("could not select microphone \(micUID) — using default")
|
||
}
|
||
} else {
|
||
log.warning("microphone \(micUID) not present — using default")
|
||
}
|
||
}
|
||
#endif
|
||
|
||
let inFormat = input.outputFormat(forBus: 0)
|
||
guard inFormat.sampleRate > 0, inFormat.channelCount > 0 else {
|
||
log.error("no usable input device — mic uplink disabled")
|
||
return
|
||
}
|
||
|
||
// Multi-channel-interface handling. A pro interface exposes N discrete inputs with the mic
|
||
// on ONE of them, but AVAudioConverter's N→stereo downmix takes channels 0/1 — dead
|
||
// silence when the mic sits higher up (the classic "host receives zeros"). So we fold the
|
||
// input to a single mono bus OURSELVES and resample that. micChannel: 0 = Auto (sum every
|
||
// channel — a lone hot mic passes at full level), n≥1 pins 1-based input channel n.
|
||
let inChannels = Int(inFormat.channelCount)
|
||
let pinnedChannel: Int? = {
|
||
guard micChannel >= 1 else { return nil }
|
||
let idx = micChannel - 1
|
||
guard idx < inChannels else {
|
||
log.warning(
|
||
"mic channel \(micChannel) out of range (device has \(inChannels)) — mixing all")
|
||
return nil
|
||
}
|
||
return idx
|
||
}()
|
||
let channelPlan = pinnedChannel.map { "channel \($0 + 1)/\(inChannels)" }
|
||
?? (inChannels > 1 ? "mix \(inChannels)ch→mono" : "mono")
|
||
|
||
// Name the device we're ACTUALLY recording from + its format + how we fold it, once per
|
||
// session. This single line localizes the whole class of "host receives silence" failures
|
||
// that otherwise need a host-side tone injection to pin down: a UID that silently fell back
|
||
// to the default, the wrong device being live, or the wrong channel picked.
|
||
#if os(macOS)
|
||
if let unit = input.audioUnit, let live = Self.currentDevice(of: unit),
|
||
let dev = AudioDevices.describe(live) {
|
||
if !micUID.isEmpty, dev.uid != micUID {
|
||
log.warning("""
|
||
mic selection not honored — requested \(micUID) but capturing from \
|
||
\(dev.name) [\(dev.uid)]; the device's UID likely changed (replug) — \
|
||
reselect it in Settings
|
||
""")
|
||
}
|
||
log.info("""
|
||
mic capture: \(dev.name) [\(dev.uid)] — \(Int(inFormat.sampleRate)) Hz, \
|
||
\(inChannels) ch, \(channelPlan)
|
||
""")
|
||
} else {
|
||
log.info("""
|
||
mic capture: <device unavailable> — \(Int(inFormat.sampleRate)) Hz, \
|
||
\(inChannels) ch, \(channelPlan)
|
||
""")
|
||
}
|
||
#else
|
||
log.info(
|
||
"mic capture: \(Int(inFormat.sampleRate)) Hz, \(inChannels) ch, \(channelPlan)")
|
||
#endif
|
||
|
||
// Encode a single mono bus (folded from `inFormat` in the tap): the resampler goes
|
||
// mono@inputSR → the encoder's 48 kHz stereo, so it handles both the rate change and the
|
||
// mono→stereo duplication, and the wrong-channel downmix never happens.
|
||
guard let monoFormat = AVAudioFormat(
|
||
commonFormat: .pcmFormatFloat32, sampleRate: inFormat.sampleRate,
|
||
channels: 1, interleaved: false),
|
||
let encoder = try? OpusEncoder(),
|
||
let resampler = AVAudioConverter(from: monoFormat, to: encoder.pcmFormat),
|
||
let chunk = AVAudioPCMBuffer(
|
||
pcmFormat: encoder.pcmFormat, frameCapacity: OpusEncoder.framesPerPacket)
|
||
else {
|
||
log.error("Opus encoder unavailable — mic uplink disabled")
|
||
return
|
||
}
|
||
|
||
// Tap-thread-confined state: resample into `staging`, accumulate in `fifo`,
|
||
// slice 960-frame chunks for the encoder.
|
||
var fifo: [Float] = []
|
||
fifo.reserveCapacity(48_000)
|
||
var seq: UInt32 = 0
|
||
let connection = connection
|
||
let flag = flag
|
||
|
||
// Silence tripwire (tap-confined): a "recording" app can be handed pure digital zeros —
|
||
// a zeroed input-volume slider, a stale TCC grant, a muted device, OR the wrong channel
|
||
// picked — and everything downstream looks alive while the host gets silence. Track the
|
||
// peak of the EXTRACTED mono bus over the first ~10 s (not the raw device — a mic present
|
||
// on a channel we didn't grab must still read as silence) and emit exactly ONE verdict.
|
||
// This is the log line whose absence made the last occurrence take a host-side tone.
|
||
let silenceWindow = Int(inFormat.sampleRate * 10)
|
||
let deviceLabel = micUID.isEmpty ? "default input" : micUID
|
||
var framesInspected = 0
|
||
var inputPeak: Float = 0
|
||
var levelReported = false
|
||
|
||
input.installTap(onBus: 0, bufferSize: 2048, format: inFormat) { buffer, _ in
|
||
if flag.isStopped { return }
|
||
let frames = Int(buffer.frameLength)
|
||
guard frames > 0, let src = buffer.floatChannelData,
|
||
let mono = AVAudioPCMBuffer(
|
||
pcmFormat: monoFormat, frameCapacity: buffer.frameLength),
|
||
let dst = mono.floatChannelData?[0]
|
||
else { return }
|
||
mono.frameLength = buffer.frameLength
|
||
|
||
// Fold the multi-channel input down to the one mono bus we encode.
|
||
Self.foldToMono(
|
||
input: src, frames: frames, channels: Int(buffer.format.channelCount),
|
||
interleaved: buffer.format.isInterleaved, pinned: pinnedChannel, out: dst)
|
||
|
||
if !levelReported {
|
||
var localPeak: Float = 0
|
||
for i in 0..<frames where abs(dst[i]) > localPeak { localPeak = abs(dst[i]) }
|
||
if localPeak > inputPeak { inputPeak = localPeak }
|
||
framesInspected += frames
|
||
if framesInspected >= silenceWindow {
|
||
levelReported = true
|
||
if inputPeak == 0 {
|
||
log.warning("""
|
||
mic uplink has been pure digital SILENCE for 10 s (\(deviceLabel), \
|
||
\(channelPlan)) — check the input level (System Settings → Sound → \
|
||
Input), Privacy & Security → Microphone, and the Microphone channel in \
|
||
Settings; the host is receiving zeros
|
||
""")
|
||
} else {
|
||
let dbfs = 20 * log10(inputPeak)
|
||
log.info("""
|
||
mic uplink OK — peak \(String(format: "%.1f", dbfs)) dBFS over first \
|
||
10 s (\(deviceLabel), \(channelPlan))
|
||
""")
|
||
}
|
||
}
|
||
}
|
||
|
||
let ratio = 48_000 / inFormat.sampleRate
|
||
let outCapacity = AVAudioFrameCount((Double(frames) * ratio).rounded(.up) + 64)
|
||
guard let staging = AVAudioPCMBuffer(
|
||
pcmFormat: encoder.pcmFormat, frameCapacity: outCapacity)
|
||
else { return }
|
||
var fed = false
|
||
var convError: NSError?
|
||
let status = resampler.convert(to: staging, error: &convError) { _, outStatus in
|
||
if fed {
|
||
outStatus.pointee = .noDataNow
|
||
return nil
|
||
}
|
||
fed = true
|
||
outStatus.pointee = .haveData
|
||
return mono
|
||
}
|
||
guard status != .error, let p = staging.floatChannelData?[0] else { return }
|
||
fifo.append(contentsOf: UnsafeBufferPointer(
|
||
start: p, count: Int(staging.frameLength) * 2))
|
||
|
||
let samplesPerChunk = Int(OpusEncoder.framesPerPacket) * 2
|
||
while fifo.count >= samplesPerChunk {
|
||
chunk.frameLength = OpusEncoder.framesPerPacket
|
||
fifo.withUnsafeBufferPointer { src in
|
||
chunk.floatChannelData![0].update(
|
||
from: src.baseAddress!, count: samplesPerChunk)
|
||
}
|
||
fifo.removeFirst(samplesPerChunk)
|
||
guard let packets = try? encoder.encode(chunk) else { continue }
|
||
for packet in packets {
|
||
connection.sendMic(
|
||
packet, seq: seq, ptsNs: DispatchTime.now().uptimeNanoseconds)
|
||
seq &+= 1
|
||
}
|
||
}
|
||
}
|
||
|
||
engine.prepare()
|
||
do {
|
||
try engine.start()
|
||
} catch {
|
||
log.error("capture engine failed to start: \(error.localizedDescription)")
|
||
input.removeTap(onBus: 0)
|
||
return
|
||
}
|
||
stateLock.lock()
|
||
if flag.isStopped {
|
||
// stop() ran while we were starting (the permission prompt resolves at the
|
||
// user's leisure) — tear the engine down ourselves, nobody else owns it now.
|
||
stateLock.unlock()
|
||
input.removeTap(onBus: 0)
|
||
engine.stop()
|
||
return
|
||
}
|
||
captureEngine = engine
|
||
stateLock.unlock()
|
||
log.info("mic uplink started (\(micUID.isEmpty ? "default input" : micUID))")
|
||
}
|
||
|
||
/// Fold `channels` of input (`floatChannelData` layout: `interleaved` → one buffer strided by
|
||
/// channel count; else one buffer per channel) down to a single mono bus in `out` (`frames`
|
||
/// long). `pinned` (0-based, must be `< channels`) copies exactly that channel — the fix for a
|
||
/// mic on one input of a multi-channel interface; `nil` sums every channel, clamped to
|
||
/// [-1, 1], so a lone hot channel still passes at full level instead of the silent 0/1 the
|
||
/// default N→stereo downmix would grab. Pure + `internal` for unit testing the index math.
|
||
static func foldToMono(
|
||
input: UnsafePointer<UnsafeMutablePointer<Float>>, frames: Int, channels: Int,
|
||
interleaved: Bool, pinned: Int?, out: UnsafeMutablePointer<Float>
|
||
) {
|
||
if let ch = pinned, ch < channels {
|
||
if interleaved {
|
||
let d = input[0]
|
||
for i in 0..<frames { out[i] = d[i * channels + ch] }
|
||
} else {
|
||
let d = input[ch]
|
||
for i in 0..<frames { out[i] = d[i] }
|
||
}
|
||
} else if interleaved {
|
||
let d = input[0]
|
||
for i in 0..<frames {
|
||
var s: Float = 0
|
||
for c in 0..<channels { s += d[i * channels + c] }
|
||
out[i] = max(-1, min(1, s))
|
||
}
|
||
} else {
|
||
let d0 = input[0]
|
||
for i in 0..<frames { out[i] = d0[i] }
|
||
for c in 1..<channels {
|
||
let d = input[c]
|
||
for i in 0..<frames { out[i] += d[i] }
|
||
}
|
||
if channels > 1 { for i in 0..<frames { out[i] = max(-1, min(1, out[i])) } }
|
||
}
|
||
}
|
||
#endif
|
||
|
||
#if os(macOS)
|
||
private static func setDevice(_ id: AudioDeviceID, on unit: AudioUnit) -> Bool {
|
||
var dev = id
|
||
return AudioUnitSetProperty(
|
||
unit, kAudioOutputUnitProperty_CurrentDevice, kAudioUnitScope_Global, 0,
|
||
&dev, UInt32(MemoryLayout<AudioDeviceID>.size)) == noErr
|
||
}
|
||
|
||
/// Read back the AUHAL's live device — the definitive "what are we actually capturing
|
||
/// from", which catches a selection that succeeded on paper but silently fell back to
|
||
/// the system default (a stale/changed UID, a device that vanished between resolve and
|
||
/// start). 0 / an error means we couldn't tell.
|
||
private static func currentDevice(of unit: AudioUnit) -> AudioDeviceID? {
|
||
var dev = AudioDeviceID(0)
|
||
var size = UInt32(MemoryLayout<AudioDeviceID>.size)
|
||
let status = AudioUnitGetProperty(
|
||
unit, kAudioOutputUnitProperty_CurrentDevice, kAudioUnitScope_Global, 0, &dev, &size)
|
||
guard status == noErr, dev != 0 else { return nil }
|
||
return dev
|
||
}
|
||
#endif
|
||
}
|