Merge remote-tracking branch 'origin/main'
android / android (push) Has been cancelled
apple / swift (push) Has been cancelled
apple / screenshots (push) Has been cancelled
ci / docs-site (push) Has been cancelled
ci / bench (push) Has been cancelled
ci / rust (push) Has been cancelled
ci / web (push) Has been cancelled
deb / build-publish (push) Has been cancelled
decky / build-publish (push) Has been cancelled
docker / build-push (., web/Dockerfile, punktfunk-web) (push) Has been cancelled
docker / build-push (ci, ci/fedora-rpm.Dockerfile, punktfunk-fedora-rpm) (push) Has been cancelled
docker / build-push (ci, ci/rust-ci.Dockerfile, punktfunk-rust-ci) (push) Has been cancelled
docker / build-push (docs-site, docs-site/Dockerfile, punktfunk-docs) (push) Has been cancelled
docker / build-push (--build-arg FEDORA_VERSION=44, ci, ci/fedora-rpm.Dockerfile, punktfunk-fedora44-rpm) (push) Has been cancelled
rpm / build-publish (fedora-44, punktfunk-fedora44-rpm) (push) Has been cancelled
windows-host / package (push) Has been cancelled
windows-msix / package (arm64, C:\Users\Public\ffmpeg-arm64, aarch64-pc-windows-msvc, C:\t-a64) (push) Has been cancelled
windows-msix / package (x64, C:\Users\Public\ffmpeg, x86_64-pc-windows-msvc, C:\t) (push) Has been cancelled
docker / deploy-docs (push) Has been cancelled
flatpak / build-publish (push) Has been cancelled
release / apple (push) Has been cancelled
rpm / build-publish (bazzite, punktfunk-fedora-rpm) (push) Has been cancelled
android / android (push) Has been cancelled
apple / swift (push) Has been cancelled
apple / screenshots (push) Has been cancelled
ci / docs-site (push) Has been cancelled
ci / bench (push) Has been cancelled
ci / rust (push) Has been cancelled
ci / web (push) Has been cancelled
deb / build-publish (push) Has been cancelled
decky / build-publish (push) Has been cancelled
docker / build-push (., web/Dockerfile, punktfunk-web) (push) Has been cancelled
docker / build-push (ci, ci/fedora-rpm.Dockerfile, punktfunk-fedora-rpm) (push) Has been cancelled
docker / build-push (ci, ci/rust-ci.Dockerfile, punktfunk-rust-ci) (push) Has been cancelled
docker / build-push (docs-site, docs-site/Dockerfile, punktfunk-docs) (push) Has been cancelled
docker / build-push (--build-arg FEDORA_VERSION=44, ci, ci/fedora-rpm.Dockerfile, punktfunk-fedora44-rpm) (push) Has been cancelled
rpm / build-publish (fedora-44, punktfunk-fedora44-rpm) (push) Has been cancelled
windows-host / package (push) Has been cancelled
windows-msix / package (arm64, C:\Users\Public\ffmpeg-arm64, aarch64-pc-windows-msvc, C:\t-a64) (push) Has been cancelled
windows-msix / package (x64, C:\Users\Public\ffmpeg, x86_64-pc-windows-msvc, C:\t) (push) Has been cancelled
docker / deploy-docs (push) Has been cancelled
flatpak / build-publish (push) Has been cancelled
release / apple (push) Has been cancelled
rpm / build-publish (bazzite, punktfunk-fedora-rpm) (push) Has been cancelled
This commit is contained in:
@@ -338,6 +338,7 @@ final class SessionModel: ObservableObject {
|
||||
audio.start(
|
||||
speakerUID: defaults.string(forKey: DefaultsKey.speakerUID) ?? "",
|
||||
micUID: defaults.string(forKey: DefaultsKey.micUID) ?? "",
|
||||
micChannel: defaults.integer(forKey: DefaultsKey.micChannel),
|
||||
micEnabled: defaults.object(forKey: DefaultsKey.micEnabled) as? Bool ?? true)
|
||||
self.audio = audio
|
||||
// Gamepads: forward GamepadManager's active controller as pad 0 and render the
|
||||
|
||||
@@ -208,6 +208,17 @@ extension SettingsView {
|
||||
}
|
||||
}
|
||||
.disabled(!micEnabled)
|
||||
// Multi-channel interfaces only: the mic sits on ONE discrete input, so let the user
|
||||
// pick it. Auto sums every channel (a lone hot mic still passes at full level).
|
||||
if micChannelCount > 1 {
|
||||
Picker("Microphone channel", selection: $micChannel) {
|
||||
Text("Auto (all channels)").tag(0)
|
||||
ForEach(1...micChannelCount, id: \.self) { ch in
|
||||
Text("Channel \(ch)").tag(ch)
|
||||
}
|
||||
}
|
||||
.disabled(!micEnabled)
|
||||
}
|
||||
#endif
|
||||
} header: {
|
||||
Text("Audio")
|
||||
|
||||
@@ -61,8 +61,12 @@ struct SettingsView: View {
|
||||
#if os(macOS)
|
||||
@AppStorage(DefaultsKey.speakerUID) var speakerUID = ""
|
||||
@AppStorage(DefaultsKey.micUID) var micUID = ""
|
||||
@AppStorage(DefaultsKey.micChannel) var micChannel = 0
|
||||
@State var outputDevices: [AudioDevice] = []
|
||||
@State var inputDevices: [AudioDevice] = []
|
||||
// Input channels of the selected mic — drives the "Microphone channel" picker, which only
|
||||
// appears for a multi-channel interface (>1). 0 until the Audio tab loads it.
|
||||
@State var micChannelCount = 0
|
||||
#endif
|
||||
|
||||
#if os(iOS)
|
||||
@@ -115,6 +119,12 @@ struct SettingsView: View {
|
||||
.onAppear {
|
||||
outputDevices = AudioDevices.outputs()
|
||||
inputDevices = AudioDevices.inputs()
|
||||
micChannelCount = AudioDevices.inputChannelCount(forUID: micUID)
|
||||
}
|
||||
.onChange(of: micUID) { _, newUID in
|
||||
// A different mic → different channel count; drop a now-out-of-range pin to Auto.
|
||||
micChannelCount = AudioDevices.inputChannelCount(forUID: newUID)
|
||||
if micChannel > micChannelCount { micChannel = 0 }
|
||||
}
|
||||
.tabItem { Label("Audio", systemImage: "speaker.wave.2") }
|
||||
|
||||
|
||||
@@ -33,6 +33,49 @@ public enum AudioDevices {
|
||||
}
|
||||
}
|
||||
|
||||
/// Input channel count of the mic the picker would use — the device with this UID, or the
|
||||
/// system default input when `uid` is empty. 0 when it can't be resolved. Drives the
|
||||
/// "Microphone channel" picker (only shown for multi-channel interfaces).
|
||||
public static func inputChannelCount(forUID uid: String) -> Int {
|
||||
let id = uid.isEmpty ? defaultInputDevice() : deviceID(forUID: uid)
|
||||
guard let id else { return 0 }
|
||||
return channelCount(id, scope: kAudioObjectPropertyScopeInput)
|
||||
}
|
||||
|
||||
private static func defaultInputDevice() -> AudioDeviceID? {
|
||||
var address = AudioObjectPropertyAddress(
|
||||
mSelector: kAudioHardwarePropertyDefaultInputDevice,
|
||||
mScope: kAudioObjectPropertyScopeGlobal,
|
||||
mElement: kAudioObjectPropertyElementMain)
|
||||
var dev = AudioDeviceID(0)
|
||||
var size = UInt32(MemoryLayout<AudioDeviceID>.size)
|
||||
guard AudioObjectGetPropertyData(
|
||||
AudioObjectID(kAudioObjectSystemObject), &address, 0, nil, &size, &dev) == noErr,
|
||||
dev != 0
|
||||
else { return nil }
|
||||
return dev
|
||||
}
|
||||
|
||||
/// Sum of channels across the device's streams in `scope` (its total input/output channels).
|
||||
private static func channelCount(
|
||||
_ id: AudioDeviceID, scope: AudioObjectPropertyScope
|
||||
) -> Int {
|
||||
var address = AudioObjectPropertyAddress(
|
||||
mSelector: kAudioDevicePropertyStreamConfiguration,
|
||||
mScope: scope,
|
||||
mElement: kAudioObjectPropertyElementMain)
|
||||
var size: UInt32 = 0
|
||||
guard AudioObjectGetPropertyDataSize(id, &address, 0, nil, &size) == noErr, size > 0
|
||||
else { return 0 }
|
||||
let raw = UnsafeMutableRawPointer.allocate(
|
||||
byteCount: Int(size), alignment: MemoryLayout<AudioBufferList>.alignment)
|
||||
defer { raw.deallocate() }
|
||||
guard AudioObjectGetPropertyData(id, &address, 0, nil, &size, raw) == noErr else { return 0 }
|
||||
let abl = UnsafeMutableAudioBufferListPointer(
|
||||
raw.assumingMemoryBound(to: AudioBufferList.self))
|
||||
return abl.reduce(0) { $0 + Int($1.mNumberChannels) }
|
||||
}
|
||||
|
||||
private static func all() -> [AudioDeviceID] {
|
||||
var address = AudioObjectPropertyAddress(
|
||||
mSelector: kAudioHardwarePropertyDevices,
|
||||
@@ -62,7 +105,8 @@ public enum AudioDevices {
|
||||
return AudioObjectGetPropertyDataSize(id, &address, 0, nil, &size) == noErr && size > 0
|
||||
}
|
||||
|
||||
private static func describe(_ id: AudioDeviceID) -> AudioDevice? {
|
||||
/// UID + human name for a live AudioDeviceID (nil if either property is unreadable).
|
||||
static func describe(_ id: AudioDeviceID) -> AudioDevice? {
|
||||
guard let uid = stringProperty(id, kAudioDevicePropertyDeviceUID),
|
||||
let name = stringProperty(id, kAudioObjectPropertyName)
|
||||
else { return nil }
|
||||
|
||||
@@ -5,9 +5,10 @@
|
||||
// AVAudioSourceNode pulls from the ring (silence on underrun with re-priming, so a
|
||||
// network gap costs one dip, not permanent crackle).
|
||||
//
|
||||
// mic → host: a second AVAudioEngine taps the input device, resamples to 48 kHz
|
||||
// stereo, slices 20 ms chunks, Opus-encodes, and sendMic()s each packet — the host
|
||||
// feeds them into a virtual PipeWire source.
|
||||
// mic → host: a second AVAudioEngine taps the input device, folds it to one mono bus (the
|
||||
// chosen channel of a multi-channel interface, or a sum of all channels), resamples to 48 kHz
|
||||
// stereo, slices 20 ms chunks, Opus-encodes, and sendMic()s each packet — the host feeds them
|
||||
// into a virtual PipeWire source.
|
||||
//
|
||||
// Devices are chosen by UID ("" = system default: the engine is then never pinned to a
|
||||
// concrete device and follows default-device changes). Two engines, not one — a single
|
||||
@@ -68,10 +69,11 @@ public final class SessionAudio {
|
||||
/// ASYNCHRONOUS: it activates the AVAudioSession off the main thread, then starts the engines on
|
||||
/// a later main-queue hop (gated by `!flag.isStopped`) — so playback is live shortly after, not
|
||||
/// on return. The mic may start later still if the permission prompt is pending.
|
||||
public func start(speakerUID: String, micUID: String, micEnabled: Bool) {
|
||||
public func start(speakerUID: String, micUID: String, micChannel: Int, micEnabled: Bool) {
|
||||
#if os(macOS)
|
||||
// No AVAudioSession on macOS — start the engines directly (caller's thread, as before).
|
||||
startEngines(speakerUID: speakerUID, micUID: micUID, micEnabled: micEnabled)
|
||||
startEngines(
|
||||
speakerUID: speakerUID, micUID: micUID, micChannel: micChannel, micEnabled: micEnabled)
|
||||
#else
|
||||
// Configure + activate the session OFF the main thread (it blocks on the audio server),
|
||||
// then start the engines back on the main thread once it's active — engine routing/format
|
||||
@@ -81,7 +83,9 @@ public final class SessionAudio {
|
||||
self.activateAudioSession(micEnabled: micEnabled)
|
||||
DispatchQueue.main.async { [weak self] in
|
||||
guard let self, !self.flag.isStopped else { return }
|
||||
self.startEngines(speakerUID: speakerUID, micUID: micUID, micEnabled: micEnabled)
|
||||
self.startEngines(
|
||||
speakerUID: speakerUID, micUID: micUID, micChannel: micChannel,
|
||||
micEnabled: micEnabled)
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@@ -115,7 +119,9 @@ public final class SessionAudio {
|
||||
|
||||
/// Build + start the playback engine (and the mic uplink when enabled + authorized). Main
|
||||
/// thread (engine setup); on iOS/tvOS the session is already active by the time this runs.
|
||||
private func startEngines(speakerUID: String, micUID: String, micEnabled: Bool) {
|
||||
private func startEngines(
|
||||
speakerUID: String, micUID: String, micChannel: Int, micEnabled: Bool
|
||||
) {
|
||||
startPlayback(speakerUID: speakerUID)
|
||||
#if os(tvOS)
|
||||
// No app-accessible microphone input on tvOS — playback only.
|
||||
@@ -123,12 +129,12 @@ public final class SessionAudio {
|
||||
guard micEnabled else { return }
|
||||
switch AVCaptureDevice.authorizationStatus(for: .audio) {
|
||||
case .authorized:
|
||||
startCapture(micUID: micUID)
|
||||
startCapture(micUID: micUID, micChannel: micChannel)
|
||||
case .notDetermined:
|
||||
AVCaptureDevice.requestAccess(for: .audio) { [weak self] granted in
|
||||
DispatchQueue.main.async {
|
||||
guard let self, granted, !self.flag.isStopped else { return }
|
||||
self.startCapture(micUID: micUID)
|
||||
self.startCapture(micUID: micUID, micChannel: micChannel)
|
||||
}
|
||||
}
|
||||
default:
|
||||
@@ -280,7 +286,7 @@ public final class SessionAudio {
|
||||
// MARK: - Mic (mic → host)
|
||||
|
||||
#if !os(tvOS)
|
||||
private func startCapture(micUID: String) {
|
||||
private func startCapture(micUID: String, micChannel: Int) {
|
||||
let engine = AVAudioEngine()
|
||||
let input = engine.inputNode
|
||||
#if os(macOS)
|
||||
@@ -300,8 +306,63 @@ public final class SessionAudio {
|
||||
log.error("no usable input device — mic uplink disabled")
|
||||
return
|
||||
}
|
||||
guard let encoder = try? OpusEncoder(),
|
||||
let resampler = AVAudioConverter(from: inFormat, to: encoder.pcmFormat),
|
||||
|
||||
// Multi-channel-interface handling. A pro interface exposes N discrete inputs with the mic
|
||||
// on ONE of them, but AVAudioConverter's N→stereo downmix takes channels 0/1 — dead
|
||||
// silence when the mic sits higher up (the classic "host receives zeros"). So we fold the
|
||||
// input to a single mono bus OURSELVES and resample that. micChannel: 0 = Auto (sum every
|
||||
// channel — a lone hot mic passes at full level), n≥1 pins 1-based input channel n.
|
||||
let inChannels = Int(inFormat.channelCount)
|
||||
let pinnedChannel: Int? = {
|
||||
guard micChannel >= 1 else { return nil }
|
||||
let idx = micChannel - 1
|
||||
guard idx < inChannels else {
|
||||
log.warning(
|
||||
"mic channel \(micChannel) out of range (device has \(inChannels)) — mixing all")
|
||||
return nil
|
||||
}
|
||||
return idx
|
||||
}()
|
||||
let channelPlan = pinnedChannel.map { "channel \($0 + 1)/\(inChannels)" }
|
||||
?? (inChannels > 1 ? "mix \(inChannels)ch→mono" : "mono")
|
||||
|
||||
// Name the device we're ACTUALLY recording from + its format + how we fold it, once per
|
||||
// session. This single line localizes the whole class of "host receives silence" failures
|
||||
// that otherwise need a host-side tone injection to pin down: a UID that silently fell back
|
||||
// to the default, the wrong device being live, or the wrong channel picked.
|
||||
#if os(macOS)
|
||||
if let unit = input.audioUnit, let live = Self.currentDevice(of: unit),
|
||||
let dev = AudioDevices.describe(live) {
|
||||
if !micUID.isEmpty, dev.uid != micUID {
|
||||
log.warning("""
|
||||
mic selection not honored — requested \(micUID) but capturing from \
|
||||
\(dev.name) [\(dev.uid)]; the device's UID likely changed (replug) — \
|
||||
reselect it in Settings
|
||||
""")
|
||||
}
|
||||
log.info("""
|
||||
mic capture: \(dev.name) [\(dev.uid)] — \(Int(inFormat.sampleRate)) Hz, \
|
||||
\(inChannels) ch, \(channelPlan)
|
||||
""")
|
||||
} else {
|
||||
log.info("""
|
||||
mic capture: <device unavailable> — \(Int(inFormat.sampleRate)) Hz, \
|
||||
\(inChannels) ch, \(channelPlan)
|
||||
""")
|
||||
}
|
||||
#else
|
||||
log.info(
|
||||
"mic capture: \(Int(inFormat.sampleRate)) Hz, \(inChannels) ch, \(channelPlan)")
|
||||
#endif
|
||||
|
||||
// Encode a single mono bus (folded from `inFormat` in the tap): the resampler goes
|
||||
// mono@inputSR → the encoder's 48 kHz stereo, so it handles both the rate change and the
|
||||
// mono→stereo duplication, and the wrong-channel downmix never happens.
|
||||
guard let monoFormat = AVAudioFormat(
|
||||
commonFormat: .pcmFormatFloat32, sampleRate: inFormat.sampleRate,
|
||||
channels: 1, interleaved: false),
|
||||
let encoder = try? OpusEncoder(),
|
||||
let resampler = AVAudioConverter(from: monoFormat, to: encoder.pcmFormat),
|
||||
let chunk = AVAudioPCMBuffer(
|
||||
pcmFormat: encoder.pcmFormat, frameCapacity: OpusEncoder.framesPerPacket)
|
||||
else {
|
||||
@@ -317,11 +378,59 @@ public final class SessionAudio {
|
||||
let connection = connection
|
||||
let flag = flag
|
||||
|
||||
// Silence tripwire (tap-confined): a "recording" app can be handed pure digital zeros —
|
||||
// a zeroed input-volume slider, a stale TCC grant, a muted device, OR the wrong channel
|
||||
// picked — and everything downstream looks alive while the host gets silence. Track the
|
||||
// peak of the EXTRACTED mono bus over the first ~10 s (not the raw device — a mic present
|
||||
// on a channel we didn't grab must still read as silence) and emit exactly ONE verdict.
|
||||
// This is the log line whose absence made the last occurrence take a host-side tone.
|
||||
let silenceWindow = Int(inFormat.sampleRate * 10)
|
||||
let deviceLabel = micUID.isEmpty ? "default input" : micUID
|
||||
var framesInspected = 0
|
||||
var inputPeak: Float = 0
|
||||
var levelReported = false
|
||||
|
||||
input.installTap(onBus: 0, bufferSize: 2048, format: inFormat) { buffer, _ in
|
||||
if flag.isStopped { return }
|
||||
let frames = Int(buffer.frameLength)
|
||||
guard frames > 0, let src = buffer.floatChannelData,
|
||||
let mono = AVAudioPCMBuffer(
|
||||
pcmFormat: monoFormat, frameCapacity: buffer.frameLength),
|
||||
let dst = mono.floatChannelData?[0]
|
||||
else { return }
|
||||
mono.frameLength = buffer.frameLength
|
||||
|
||||
// Fold the multi-channel input down to the one mono bus we encode.
|
||||
Self.foldToMono(
|
||||
input: src, frames: frames, channels: Int(buffer.format.channelCount),
|
||||
interleaved: buffer.format.isInterleaved, pinned: pinnedChannel, out: dst)
|
||||
|
||||
if !levelReported {
|
||||
var localPeak: Float = 0
|
||||
for i in 0..<frames where abs(dst[i]) > localPeak { localPeak = abs(dst[i]) }
|
||||
if localPeak > inputPeak { inputPeak = localPeak }
|
||||
framesInspected += frames
|
||||
if framesInspected >= silenceWindow {
|
||||
levelReported = true
|
||||
if inputPeak == 0 {
|
||||
log.warning("""
|
||||
mic uplink has been pure digital SILENCE for 10 s (\(deviceLabel), \
|
||||
\(channelPlan)) — check the input level (System Settings → Sound → \
|
||||
Input), Privacy & Security → Microphone, and the Microphone channel in \
|
||||
Settings; the host is receiving zeros
|
||||
""")
|
||||
} else {
|
||||
let dbfs = 20 * log10(inputPeak)
|
||||
log.info("""
|
||||
mic uplink OK — peak \(String(format: "%.1f", dbfs)) dBFS over first \
|
||||
10 s (\(deviceLabel), \(channelPlan))
|
||||
""")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let ratio = 48_000 / inFormat.sampleRate
|
||||
let outCapacity = AVAudioFrameCount(
|
||||
(Double(buffer.frameLength) * ratio).rounded(.up) + 64)
|
||||
let outCapacity = AVAudioFrameCount((Double(frames) * ratio).rounded(.up) + 64)
|
||||
guard let staging = AVAudioPCMBuffer(
|
||||
pcmFormat: encoder.pcmFormat, frameCapacity: outCapacity)
|
||||
else { return }
|
||||
@@ -334,7 +443,7 @@ public final class SessionAudio {
|
||||
}
|
||||
fed = true
|
||||
outStatus.pointee = .haveData
|
||||
return buffer
|
||||
return mono
|
||||
}
|
||||
guard status != .error, let p = staging.floatChannelData?[0] else { return }
|
||||
fifo.append(contentsOf: UnsafeBufferPointer(
|
||||
@@ -378,6 +487,42 @@ public final class SessionAudio {
|
||||
stateLock.unlock()
|
||||
log.info("mic uplink started (\(micUID.isEmpty ? "default input" : micUID))")
|
||||
}
|
||||
|
||||
/// Fold `channels` of input (`floatChannelData` layout: `interleaved` → one buffer strided by
|
||||
/// channel count; else one buffer per channel) down to a single mono bus in `out` (`frames`
|
||||
/// long). `pinned` (0-based, must be `< channels`) copies exactly that channel — the fix for a
|
||||
/// mic on one input of a multi-channel interface; `nil` sums every channel, clamped to
|
||||
/// [-1, 1], so a lone hot channel still passes at full level instead of the silent 0/1 the
|
||||
/// default N→stereo downmix would grab. Pure + `internal` for unit testing the index math.
|
||||
static func foldToMono(
|
||||
input: UnsafePointer<UnsafeMutablePointer<Float>>, frames: Int, channels: Int,
|
||||
interleaved: Bool, pinned: Int?, out: UnsafeMutablePointer<Float>
|
||||
) {
|
||||
if let ch = pinned, ch < channels {
|
||||
if interleaved {
|
||||
let d = input[0]
|
||||
for i in 0..<frames { out[i] = d[i * channels + ch] }
|
||||
} else {
|
||||
let d = input[ch]
|
||||
for i in 0..<frames { out[i] = d[i] }
|
||||
}
|
||||
} else if interleaved {
|
||||
let d = input[0]
|
||||
for i in 0..<frames {
|
||||
var s: Float = 0
|
||||
for c in 0..<channels { s += d[i * channels + c] }
|
||||
out[i] = max(-1, min(1, s))
|
||||
}
|
||||
} else {
|
||||
let d0 = input[0]
|
||||
for i in 0..<frames { out[i] = d0[i] }
|
||||
for c in 1..<channels {
|
||||
let d = input[c]
|
||||
for i in 0..<frames { out[i] += d[i] }
|
||||
}
|
||||
if channels > 1 { for i in 0..<frames { out[i] = max(-1, min(1, out[i])) } }
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if os(macOS)
|
||||
@@ -387,5 +532,18 @@ public final class SessionAudio {
|
||||
unit, kAudioOutputUnitProperty_CurrentDevice, kAudioUnitScope_Global, 0,
|
||||
&dev, UInt32(MemoryLayout<AudioDeviceID>.size)) == noErr
|
||||
}
|
||||
|
||||
/// Read back the AUHAL's live device — the definitive "what are we actually capturing
|
||||
/// from", which catches a selection that succeeded on paper but silently fell back to
|
||||
/// the system default (a stale/changed UID, a device that vanished between resolve and
|
||||
/// start). 0 / an error means we couldn't tell.
|
||||
private static func currentDevice(of unit: AudioUnit) -> AudioDeviceID? {
|
||||
var dev = AudioDeviceID(0)
|
||||
var size = UInt32(MemoryLayout<AudioDeviceID>.size)
|
||||
let status = AudioUnitGetProperty(
|
||||
unit, kAudioOutputUnitProperty_CurrentDevice, kAudioUnitScope_Global, 0, &dev, &size)
|
||||
guard status == noErr, dev != 0 else { return nil }
|
||||
return dev
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -24,6 +24,12 @@ public enum DefaultsKey {
|
||||
public static let micEnabled = "punktfunk.micEnabled"
|
||||
public static let speakerUID = "punktfunk.speakerUID"
|
||||
public static let micUID = "punktfunk.micUID"
|
||||
/// macOS: which input channel of the chosen mic device feeds the host. 0 = "Auto" (sum every
|
||||
/// channel to mono — a mic on a single input of a multi-channel interface passes at full
|
||||
/// level); n≥1 pins 1-based input channel n. Multi-channel interfaces expose the mic on ONE
|
||||
/// discrete channel, and the default N→stereo downmix grabs channels 0/1 (silence when the mic
|
||||
/// is higher up), so we fold to mono ourselves. Only meaningful for multi-channel devices.
|
||||
public static let micChannel = "punktfunk.micChannel"
|
||||
public static let presenter = "punktfunk.presenter"
|
||||
/// Request a 10-bit BT.2020 PQ (HDR10) stream. On by default; only takes effect when the host
|
||||
/// has HDR content AND this display supports HDR — otherwise the stream stays 8-bit SDR.
|
||||
|
||||
@@ -0,0 +1,93 @@
|
||||
// Multi-channel input → mono fold (SessionAudio.foldToMono): the fix for a mic on one channel of
|
||||
// a multi-channel interface. AVAudioConverter's default N→stereo downmix grabs channels 0/1 — dead
|
||||
// silence when the mic sits higher up — so we fold ourselves. This pins the fiddly bits (the
|
||||
// interleaved stride, channel pinning, the sum-clamp) against regressions without needing hardware.
|
||||
|
||||
#if !os(tvOS)
|
||||
import XCTest
|
||||
|
||||
@testable import PunktfunkKit
|
||||
|
||||
final class AudioChannelFoldTests: XCTestCase {
|
||||
/// Drive `foldToMono` over channel data expressed as `[[Float]]`, mirroring the two
|
||||
/// `floatChannelData` layouts:
|
||||
/// - deinterleaved: each inner array is one channel (all `frames` long).
|
||||
/// - interleaved: a single inner array already interleaved (c0f0, c1f0, …), with the real
|
||||
/// channel count passed separately.
|
||||
private func fold(
|
||||
_ planes: [[Float]], frames: Int, channels: Int, interleaved: Bool, pinned: Int?
|
||||
) -> [Float] {
|
||||
// One C buffer per plane + a table of pointers to them — the shape of floatChannelData.
|
||||
let buffers: [UnsafeMutablePointer<Float>] = planes.map { plane in
|
||||
let p = UnsafeMutablePointer<Float>.allocate(capacity: plane.count)
|
||||
for i in 0..<plane.count { p[i] = plane[i] }
|
||||
return p
|
||||
}
|
||||
let table = UnsafeMutablePointer<UnsafeMutablePointer<Float>>.allocate(
|
||||
capacity: buffers.count)
|
||||
for (i, b) in buffers.enumerated() { table[i] = b }
|
||||
let out = UnsafeMutablePointer<Float>.allocate(capacity: frames)
|
||||
defer {
|
||||
buffers.forEach { $0.deallocate() }
|
||||
table.deallocate()
|
||||
out.deallocate()
|
||||
}
|
||||
SessionAudio.foldToMono(
|
||||
input: table, frames: frames, channels: channels,
|
||||
interleaved: interleaved, pinned: pinned, out: out)
|
||||
return (0..<frames).map { out[$0] }
|
||||
}
|
||||
|
||||
// A pinned channel is copied verbatim — the exact fix: mic on a HIGH channel, not 0/1.
|
||||
func testPinsHigherChannelDeinterleaved() {
|
||||
let result = fold(
|
||||
[[0, 0, 0], [0, 0, 0], [0.1, 0.2, 0.3], [0, 0, 0]],
|
||||
frames: 3, channels: 4, interleaved: false, pinned: 2)
|
||||
XCTAssertEqual(result, [0.1, 0.2, 0.3])
|
||||
}
|
||||
|
||||
// Same signal, interleaved layout: [c0f0,c1f0,c2f0,c3f0, c0f1,…]. Guards the `i*ch + c` stride.
|
||||
func testPinsHigherChannelInterleaved() {
|
||||
let interleaved: [Float] = [
|
||||
0, 0, 0.1, 0,
|
||||
0, 0, 0.2, 0,
|
||||
0, 0, 0.3, 0,
|
||||
]
|
||||
let result = fold([interleaved], frames: 3, channels: 4, interleaved: true, pinned: 2)
|
||||
XCTAssertEqual(result, [0.1, 0.2, 0.3])
|
||||
}
|
||||
|
||||
// Auto (pinned: nil): a lone hot channel amid silence passes at FULL level, never attenuated.
|
||||
func testAutoSumsAllChannelsSoALoneMicSurvives() {
|
||||
let result = fold(
|
||||
[[0, 0], [0.4, -0.4], [0, 0]],
|
||||
frames: 2, channels: 3, interleaved: false, pinned: nil)
|
||||
XCTAssertEqual(result, [0.4, -0.4])
|
||||
}
|
||||
|
||||
// Two simultaneously-hot channels sum past the unit range → clamped, never wraps/overflows.
|
||||
func testAutoSumClampsToUnitRange() {
|
||||
let result = fold(
|
||||
[[0.8, -0.8], [0.9, -0.9]],
|
||||
frames: 2, channels: 2, interleaved: false, pinned: nil)
|
||||
XCTAssertEqual(result, [1.0, -1.0])
|
||||
}
|
||||
|
||||
// A plain mono device is passed through untouched (no clamp, no attenuation).
|
||||
func testMonoIsIdentity() {
|
||||
let result = fold(
|
||||
[[0.25, -0.5, 0.75]], frames: 3, channels: 1, interleaved: false, pinned: nil)
|
||||
XCTAssertEqual(result, [0.25, -0.5, 0.75])
|
||||
}
|
||||
|
||||
// Belt-and-suspenders: an out-of-range pin (the tap already guards, but the setting is
|
||||
// persisted) is ignored by foldToMono's own `ch < channels` guard, which sums instead of
|
||||
// reading past the buffer.
|
||||
func testOutOfRangePinFallsBackToSum() {
|
||||
let result = fold(
|
||||
[[0, 0], [0.3, 0.3]],
|
||||
frames: 2, channels: 2, interleaved: false, pinned: 2)
|
||||
XCTAssertEqual(result, [0.3, 0.3])
|
||||
}
|
||||
}
|
||||
#endif
|
||||
+32
-10
@@ -41,7 +41,7 @@
|
||||
//! Usage: `punktfunk-probe [--connect HOST:PORT] [--mode WxHxFPS] [--remode WxHxFPS:SECS]
|
||||
//! [--out FILE] [--bitrate KBPS] [--codec auto|h264|hevc|av1] [--audio-channels 2|6|8]
|
||||
//! [--launch APP] [--name NAME] [--speed-test KBPS:MS]
|
||||
//! [--input-test | --mic-test | --touch-test | --rich-input-test]
|
||||
//! [--input-test | --mic-test [--mic-burst] | --touch-test | --rich-input-test]
|
||||
//! [--pin HEX | --pair PIN] [--compositor NAME] [--gamepad NAME] | --discover [SECS]`
|
||||
//! Env: `PUNKTFUNK_CLIENT_10BIT=1` / `PUNKTFUNK_CLIENT_444=1` advertise the 10-bit / 4:4:4 caps.
|
||||
|
||||
@@ -65,6 +65,9 @@ struct Args {
|
||||
input_test: bool,
|
||||
/// `--mic-test` — stream a synthetic 440 Hz tone as the mic uplink (proves the mic path).
|
||||
mic_test: bool,
|
||||
/// `--mic-burst` — pace the mic-test like a real client's input tap (2× 20 ms per 40 ms),
|
||||
/// the arrival shape that exercises host-side jitter buffering.
|
||||
mic_burst: bool,
|
||||
/// `--touch-test` — drag a synthetic finger in a circle (proves the touch path).
|
||||
touch_test: bool,
|
||||
/// `--rich-input-test` — drive the DualSense touchpad + motion over 0xCC (host needs
|
||||
@@ -205,6 +208,7 @@ fn parse_args() -> Args {
|
||||
out: get("--out").map(String::from),
|
||||
input_test: argv.iter().any(|a| a == "--input-test"),
|
||||
mic_test: argv.iter().any(|a| a == "--mic-test"),
|
||||
mic_burst: argv.iter().any(|a| a == "--mic-burst"),
|
||||
touch_test: argv.iter().any(|a| a == "--touch-test"),
|
||||
rich_input_test: argv.iter().any(|a| a == "--rich-input-test"),
|
||||
pin,
|
||||
@@ -740,9 +744,16 @@ async fn session(args: Args) -> Result<()> {
|
||||
});
|
||||
}
|
||||
|
||||
// Mic plane: stream a synthetic 440 Hz tone as the mic uplink (0xCB), Opus-encoded 5 ms
|
||||
// stereo frames — proves client→host mic passthrough end to end without a real microphone
|
||||
// (the host decodes it into its virtual PipeWire source; record that source to hear the tone).
|
||||
// Mic plane: stream a synthetic 440 Hz tone as the mic uplink (0xCB) — proves client→host
|
||||
// mic passthrough end to end without a real microphone (the host decodes it into its virtual
|
||||
// source; record that source to hear the tone). Two pacing modes:
|
||||
// default — Opus 5 ms frames on a steady 5 ms tick (smooth arrival).
|
||||
// --mic-burst — two 20 ms Opus frames back-to-back every 40 ms, replicating a real
|
||||
// client's input-tap cadence (the Mac client's AVAudioEngine tap yields
|
||||
// ~2048-frame buffers → two packets per ~42 ms). This is the arrival
|
||||
// pattern that exposed the Windows host's missing jitter buffer (constant
|
||||
// crackle, 2026-07-03): a steady 5 ms stream never trips it. Record the
|
||||
// host mic and count silence gaps to regression-test host-side buffering.
|
||||
#[cfg(not(target_os = "linux"))]
|
||||
if args.mic_test {
|
||||
tracing::warn!("--mic-test requires Linux (libopus) — skipped");
|
||||
@@ -750,6 +761,7 @@ async fn session(args: Args) -> Result<()> {
|
||||
#[cfg(target_os = "linux")]
|
||||
if args.mic_test {
|
||||
let conn2 = conn.clone();
|
||||
let burst = args.mic_burst;
|
||||
tokio::spawn(async move {
|
||||
let mut enc =
|
||||
match opus::Encoder::new(48_000, opus::Channels::Stereo, opus::Application::Voip) {
|
||||
@@ -760,15 +772,23 @@ async fn session(args: Args) -> Result<()> {
|
||||
}
|
||||
};
|
||||
let _ = enc.set_bitrate(opus::Bitrate::Bits(64_000));
|
||||
tracing::info!("mic-test: streaming a 440 Hz tone as the mic uplink");
|
||||
// Frame size + tick per pacing mode; `per_tick` packets are sent back-to-back.
|
||||
let (frame, tick_ms, per_tick) = if burst {
|
||||
(960usize, 40u64, 2u32) // 2× 20 ms every 40 ms — the bursty real-client shape
|
||||
} else {
|
||||
(240usize, 5u64, 1u32) // 5 ms frames on a smooth tick
|
||||
};
|
||||
tracing::info!(burst, "mic-test: streaming a 440 Hz tone as the mic uplink");
|
||||
let mut phase = 0.0f32;
|
||||
let step = 2.0 * std::f32::consts::PI * 440.0 / 48_000.0;
|
||||
let mut pcm = [0f32; 240 * 2]; // 5 ms stereo
|
||||
let mut pcm = vec![0f32; frame * 2];
|
||||
let mut out = [0u8; 4000];
|
||||
let mut interval = tokio::time::interval(std::time::Duration::from_millis(5));
|
||||
for seq in 0u32.. {
|
||||
let mut interval = tokio::time::interval(std::time::Duration::from_millis(tick_ms));
|
||||
let mut seq = 0u32;
|
||||
'stream: loop {
|
||||
interval.tick().await;
|
||||
for f in 0..240 {
|
||||
for _ in 0..per_tick {
|
||||
for f in 0..frame {
|
||||
let s = (phase.sin()) * 0.25;
|
||||
phase += step;
|
||||
if phase > std::f32::consts::PI * 2.0 {
|
||||
@@ -780,9 +800,11 @@ async fn session(args: Args) -> Result<()> {
|
||||
if let Ok(n) = enc.encode_float(&pcm, &mut out) {
|
||||
let d = punktfunk_core::quic::encode_mic_datagram(seq, now_ns(), &out[..n]);
|
||||
if conn2.send_datagram(d.into()).is_err() {
|
||||
break;
|
||||
break 'stream;
|
||||
}
|
||||
}
|
||||
seq = seq.wrapping_add(1);
|
||||
}
|
||||
}
|
||||
tracing::info!("mic-test: done");
|
||||
});
|
||||
|
||||
@@ -19,10 +19,11 @@
|
||||
//! returns `false` and the pump reopens (re-planning, so endpoint churn re-resolves). Before this
|
||||
//! existed, the first device change silently killed mic passthrough for the rest of the host's life.
|
||||
//!
|
||||
//! `push` enqueues decoded interleaved-f32 PCM into a bounded ring (drop-oldest beyond ~80 ms so mic
|
||||
//! latency stays bounded); a dedicated COM-apartment thread renders it event-driven, filling silence
|
||||
//! when the client isn't talking. WASAPI objects are `!Send`, so they live entirely on that thread
|
||||
//! (mirrors `WasapiLoopbackCapturer`).
|
||||
//! `push` enqueues decoded interleaved-f32 PCM into a bounded ring (drop-oldest beyond ~120 ms so
|
||||
//! mic latency stays bounded); a dedicated COM-apartment thread renders it event-driven through an
|
||||
//! adaptive jitter buffer (prime → hold → re-prime, see the render loop — clients arrive in bursts,
|
||||
//! the device pulls per-period), filling silence when the client isn't talking. WASAPI objects are
|
||||
//! `!Send`, so they live entirely on that thread (mirrors `WasapiLoopbackCapturer`).
|
||||
|
||||
// Every `unsafe` block in this file carries a `// SAFETY:` proof; enforce it.
|
||||
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||
@@ -40,8 +41,17 @@ use wasapi::{Direction, SampleType, StreamMode, WaveFormat};
|
||||
const CHANNELS: u32 = 2;
|
||||
/// 48 kHz stereo f32: 2 channels * 4 bytes.
|
||||
const BLOCK_ALIGN: usize = 2 * 4;
|
||||
/// Bound the inject queue at ~80 ms so the passed-through mic stays low-latency (drop oldest beyond).
|
||||
const MAX_QUEUE_BYTES: usize = (SAMPLE_RATE as usize * 80 / 1000) * BLOCK_ALIGN;
|
||||
/// Jitter-buffer priming depth (~48 ms): the render loop emits pure silence until this much PCM
|
||||
/// is queued, then plays from the cushion. Clients deliver mic audio in BURSTS (the Mac client's
|
||||
/// input tap yields ~two 20 ms Opus packets every ~42 ms) while WASAPI pulls a small block every
|
||||
/// device period (~10 ms) — with no cushion the queue sits near-empty and most periods insert
|
||||
/// mid-stream silence: the "crackling mic" (heard live, Mac → Windows host 2026-07-03; the Linux
|
||||
/// backend's process callback primes the same way and the identical stream was clean there). The
|
||||
/// depth must cover the worst inter-burst gap (~42 ms), so ~48 ms with re-prime on a full drain.
|
||||
const PRIME_BYTES: usize = (SAMPLE_RATE as usize * 48 / 1000) * BLOCK_ALIGN;
|
||||
/// Bound the inject queue at ~120 ms so the passed-through mic stays low-latency (drop oldest
|
||||
/// beyond): the priming cushion (~48 ms) plus arrival-burst headroom.
|
||||
const MAX_QUEUE_BYTES: usize = (SAMPLE_RATE as usize * 120 / 1000) * BLOCK_ALIGN;
|
||||
|
||||
pub struct WasapiVirtualMic {
|
||||
queue: Arc<Mutex<VecDeque<u8>>>,
|
||||
@@ -299,7 +309,17 @@ fn render_thread(
|
||||
|
||||
// Any error below (endpoint invalidated/removed, engine restart) propagates out of the loop,
|
||||
// ending the thread — the `alive` flag flips in the spawn wrapper and the pump reopens.
|
||||
//
|
||||
// Adaptive jitter buffer (mirrors the Linux backend's process callback): clients push mic
|
||||
// audio in bursts on their own clock while the device pulls a block every period from an
|
||||
// independent clock, so a greedy per-period drain leaves the queue near-empty and pads most
|
||||
// periods with mid-stream silence — audible as constant crackling. Instead: emit silence
|
||||
// until [`PRIME_BYTES`] is buffered, then play from the cushion (zero-filling only a
|
||||
// momentary shortfall), and re-prime only after a genuine FULL drain (the client went quiet —
|
||||
// between talk spurts the cushion rebuilds, and [`VirtualMic::discard`] resets it across
|
||||
// session gaps).
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
let mut primed = false;
|
||||
while !stop.load(Ordering::Relaxed) {
|
||||
// The device signals when it wants more data; finite timeout keeps `stop` responsive.
|
||||
if h_event.wait_for_event(100).is_err() {
|
||||
@@ -315,14 +335,22 @@ fn render_thread(
|
||||
if buf.len() < need {
|
||||
buf.resize(need, 0);
|
||||
}
|
||||
// Silence base; overwrite with queued mic PCM (zero-pad the tail when the client is quiet).
|
||||
// Silence base; overwrite with queued mic PCM once the cushion is primed.
|
||||
buf[..need].fill(0);
|
||||
{
|
||||
let mut q = queue.lock().unwrap();
|
||||
if !primed && q.len() >= PRIME_BYTES {
|
||||
primed = true;
|
||||
}
|
||||
if primed {
|
||||
let n = q.len().min(need);
|
||||
for (i, b) in q.drain(..n).enumerate() {
|
||||
buf[i] = b;
|
||||
}
|
||||
if q.is_empty() {
|
||||
primed = false; // fully drained — re-prime before producing again
|
||||
}
|
||||
}
|
||||
}
|
||||
render_client
|
||||
.write_to_device(space, &buf[..need], None)
|
||||
|
||||
Reference in New Issue
Block a user