From 00acf5e44edcf277bf6f8b33194aa7bb96081ae3 Mon Sep 17 00:00:00 2001
From: enricobuehler <buehler@unom.io>
Date: Fri, 3 Jul 2026 22:31:18 +0000
Subject: [PATCH 1/3] =?UTF-8?q?fix(host/audio):=20WASAPI=20virtual=20mic?=
 =?UTF-8?q?=20=E2=80=94=20port=20the=20priming=20jitter=20buffer=20(crackl?=
 =?UTF-8?q?ing=20fix)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Mac → Windows mic passthrough crackled heavily while the identical
stream was clean on the Linux host. Cause: clients push mic audio in
BURSTS on their own clock (the Mac input tap yields ~two 20 ms Opus
packets every ~42 ms) while the WASAPI render loop pulled a block every
~10 ms device period and greedily drained whatever was queued, padding
the rest with zeros — the queue sat near-empty and most periods
inserted mid-stream silence. The Linux backend has absorbed this since
day one with its priming jitter buffer; the WASAPI loop had none.

Port the same semantics: emit silence until ~48 ms is buffered (covers
the worst inter-burst gap), then play from the cushion (zero-filling
only a momentary shortfall), re-prime only after a genuine full drain
(client went quiet). Queue cap raised 80 → 120 ms for burst headroom;
steady-state added latency ≈ the 48 ms cushion.

Diagnosed live on .173: probe tone recording from CABLE Output proved
the endpoint wiring, then the burst-vs-period math explained the
crackle. Build-verified on Windows; on-glass listen pending.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
---
 .../src/audio/windows/wasapi_mic.rs           | 48 +++++++++++++++----
 1 file changed, 38 insertions(+), 10 deletions(-)

diff --git a/crates/punktfunk-host/src/audio/windows/wasapi_mic.rs b/crates/punktfunk-host/src/audio/windows/wasapi_mic.rs
index cfb8f8b..1bf825f 100644
--- a/crates/punktfunk-host/src/audio/windows/wasapi_mic.rs
+++ b/crates/punktfunk-host/src/audio/windows/wasapi_mic.rs
@@ -19,10 +19,11 @@
 //! returns `false` and the pump reopens (re-planning, so endpoint churn re-resolves). Before this
 //! existed, the first device change silently killed mic passthrough for the rest of the host's life.
 //!
-//! `push` enqueues decoded interleaved-f32 PCM into a bounded ring (drop-oldest beyond ~80 ms so mic
-//! latency stays bounded); a dedicated COM-apartment thread renders it event-driven, filling silence
-//! when the client isn't talking. WASAPI objects are `!Send`, so they live entirely on that thread
-//! (mirrors `WasapiLoopbackCapturer`).
+//! `push` enqueues decoded interleaved-f32 PCM into a bounded ring (drop-oldest beyond ~120 ms so
+//! mic latency stays bounded); a dedicated COM-apartment thread renders it event-driven through an
+//! adaptive jitter buffer (prime → hold → re-prime, see the render loop — clients arrive in bursts,
+//! the device pulls per-period), filling silence when the client isn't talking. WASAPI objects are
+//! `!Send`, so they live entirely on that thread (mirrors `WasapiLoopbackCapturer`).
 
 // Every `unsafe` block in this file carries a `// SAFETY:` proof; enforce it.
 #![deny(clippy::undocumented_unsafe_blocks)]
@@ -40,8 +41,17 @@ use wasapi::{Direction, SampleType, StreamMode, WaveFormat};
 const CHANNELS: u32 = 2;
 /// 48 kHz stereo f32: 2 channels * 4 bytes.
 const BLOCK_ALIGN: usize = 2 * 4;
-/// Bound the inject queue at ~80 ms so the passed-through mic stays low-latency (drop oldest beyond).
-const MAX_QUEUE_BYTES: usize = (SAMPLE_RATE as usize * 80 / 1000) * BLOCK_ALIGN;
+/// Jitter-buffer priming depth (~48 ms): the render loop emits pure silence until this much PCM
+/// is queued, then plays from the cushion. Clients deliver mic audio in BURSTS (the Mac client's
+/// input tap yields ~two 20 ms Opus packets every ~42 ms) while WASAPI pulls a small block every
+/// device period (~10 ms) — with no cushion the queue sits near-empty and most periods insert
+/// mid-stream silence: the "crackling mic" (heard live, Mac → Windows host 2026-07-03; the Linux
+/// backend's process callback primes the same way and the identical stream was clean there). The
+/// depth must cover the worst inter-burst gap (~42 ms), so ~48 ms with re-prime on a full drain.
+const PRIME_BYTES: usize = (SAMPLE_RATE as usize * 48 / 1000) * BLOCK_ALIGN;
+/// Bound the inject queue at ~120 ms so the passed-through mic stays low-latency (drop oldest
+/// beyond): the priming cushion (~48 ms) plus arrival-burst headroom.
+const MAX_QUEUE_BYTES: usize = (SAMPLE_RATE as usize * 120 / 1000) * BLOCK_ALIGN;
 
 pub struct WasapiVirtualMic {
     queue: Arc<Mutex<VecDeque<u8>>>,
@@ -299,7 +309,17 @@ fn render_thread(
 
     // Any error below (endpoint invalidated/removed, engine restart) propagates out of the loop,
     // ending the thread — the `alive` flag flips in the spawn wrapper and the pump reopens.
+    //
+    // Adaptive jitter buffer (mirrors the Linux backend's process callback): clients push mic
+    // audio in bursts on their own clock while the device pulls a block every period from an
+    // independent clock, so a greedy per-period drain leaves the queue near-empty and pads most
+    // periods with mid-stream silence — audible as constant crackling. Instead: emit silence
+    // until [`PRIME_BYTES`] is buffered, then play from the cushion (zero-filling only a
+    // momentary shortfall), and re-prime only after a genuine FULL drain (the client went quiet —
+    // between talk spurts the cushion rebuilds, and [`VirtualMic::discard`] resets it across
+    // session gaps).
     let mut buf: Vec<u8> = Vec::new();
+    let mut primed = false;
     while !stop.load(Ordering::Relaxed) {
         // The device signals when it wants more data; finite timeout keeps `stop` responsive.
         if h_event.wait_for_event(100).is_err() {
@@ -315,13 +335,21 @@ fn render_thread(
         if buf.len() < need {
             buf.resize(need, 0);
         }
-        // Silence base; overwrite with queued mic PCM (zero-pad the tail when the client is quiet).
+        // Silence base; overwrite with queued mic PCM once the cushion is primed.
         buf[..need].fill(0);
         {
             let mut q = queue.lock().unwrap();
-            let n = q.len().min(need);
-            for (i, b) in q.drain(..n).enumerate() {
-                buf[i] = b;
+            if !primed && q.len() >= PRIME_BYTES {
+                primed = true;
+            }
+            if primed {
+                let n = q.len().min(need);
+                for (i, b) in q.drain(..n).enumerate() {
+                    buf[i] = b;
+                }
+                if q.is_empty() {
+                    primed = false; // fully drained — re-prime before producing again
+                }
             }
         }
         render_client

From 136f6e8f0eb402b8e3ae0c4be7d890b26219ccff Mon Sep 17 00:00:00 2001
From: enricobuehler <buehler@unom.io>
Date: Fri, 3 Jul 2026 22:35:49 +0000
Subject: [PATCH 2/3] =?UTF-8?q?feat(probe):=20--mic-burst=20=E2=80=94=20re?=
 =?UTF-8?q?al-client=20mic=20pacing=20for=20jitter-buffer=20regression=20t?=
 =?UTF-8?q?ests?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The steady 5 ms mic-test cadence never trips host-side buffering bugs:
the WASAPI crackle (fixed in the previous commit) only reproduced under
a real client's bursty input tap. --mic-burst paces the tone the same
way (two 20 ms Opus packets every 40 ms), so recording the host mic and
counting silence gaps regression-tests the jitter buffer headlessly.
Validated against the fixed Windows host on the lab box: 15 s of bursty
tone, zero mid-stream gaps >=3 ms (gaps confined to the first 40 ms
priming window).

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
---
 clients/probe/src/main.rs | 62 ++++++++++++++++++++++++++-------------
 1 file changed, 42 insertions(+), 20 deletions(-)

diff --git a/clients/probe/src/main.rs b/clients/probe/src/main.rs
index 9b39e93..7bf17ff 100644
--- a/clients/probe/src/main.rs
+++ b/clients/probe/src/main.rs
@@ -41,7 +41,7 @@
 //! Usage: `punktfunk-probe [--connect HOST:PORT] [--mode WxHxFPS] [--remode WxHxFPS:SECS]
 //!         [--out FILE] [--bitrate KBPS] [--codec auto|h264|hevc|av1] [--audio-channels 2|6|8]
 //!         [--launch APP] [--name NAME] [--speed-test KBPS:MS]
-//!         [--input-test | --mic-test | --touch-test | --rich-input-test]
+//!         [--input-test | --mic-test [--mic-burst] | --touch-test | --rich-input-test]
 //!         [--pin HEX | --pair PIN] [--compositor NAME] [--gamepad NAME] | --discover [SECS]`
 //! Env: `PUNKTFUNK_CLIENT_10BIT=1` / `PUNKTFUNK_CLIENT_444=1` advertise the 10-bit / 4:4:4 caps.
 
@@ -65,6 +65,9 @@ struct Args {
     input_test: bool,
     /// `--mic-test` — stream a synthetic 440 Hz tone as the mic uplink (proves the mic path).
     mic_test: bool,
+    /// `--mic-burst` — pace the mic-test like a real client's input tap (2× 20 ms per 40 ms),
+    /// the arrival shape that exercises host-side jitter buffering.
+    mic_burst: bool,
     /// `--touch-test` — drag a synthetic finger in a circle (proves the touch path).
     touch_test: bool,
     /// `--rich-input-test` — drive the DualSense touchpad + motion over 0xCC (host needs
@@ -205,6 +208,7 @@ fn parse_args() -> Args {
         out: get("--out").map(String::from),
         input_test: argv.iter().any(|a| a == "--input-test"),
         mic_test: argv.iter().any(|a| a == "--mic-test"),
+        mic_burst: argv.iter().any(|a| a == "--mic-burst"),
         touch_test: argv.iter().any(|a| a == "--touch-test"),
         rich_input_test: argv.iter().any(|a| a == "--rich-input-test"),
         pin,
@@ -740,9 +744,16 @@ async fn session(args: Args) -> Result<()> {
         });
     }
 
-    // Mic plane: stream a synthetic 440 Hz tone as the mic uplink (0xCB), Opus-encoded 5 ms
-    // stereo frames — proves client→host mic passthrough end to end without a real microphone
-    // (the host decodes it into its virtual PipeWire source; record that source to hear the tone).
+    // Mic plane: stream a synthetic 440 Hz tone as the mic uplink (0xCB) — proves client→host
+    // mic passthrough end to end without a real microphone (the host decodes it into its virtual
+    // source; record that source to hear the tone). Two pacing modes:
+    //   default      — Opus 5 ms frames on a steady 5 ms tick (smooth arrival).
+    //   --mic-burst  — two 20 ms Opus frames back-to-back every 40 ms, replicating a real
+    //                  client's input-tap cadence (the Mac client's AVAudioEngine tap yields
+    //                  ~2048-frame buffers → two packets per ~42 ms). This is the arrival
+    //                  pattern that exposed the Windows host's missing jitter buffer (constant
+    //                  crackle, 2026-07-03): a steady 5 ms stream never trips it. Record the
+    //                  host mic and count silence gaps to regression-test host-side buffering.
     #[cfg(not(target_os = "linux"))]
     if args.mic_test {
         tracing::warn!("--mic-test requires Linux (libopus) — skipped");
@@ -750,6 +761,7 @@ async fn session(args: Args) -> Result<()> {
     #[cfg(target_os = "linux")]
     if args.mic_test {
         let conn2 = conn.clone();
+        let burst = args.mic_burst;
         tokio::spawn(async move {
             let mut enc =
                 match opus::Encoder::new(48_000, opus::Channels::Stereo, opus::Application::Voip) {
@@ -760,28 +772,38 @@ async fn session(args: Args) -> Result<()> {
                     }
                 };
             let _ = enc.set_bitrate(opus::Bitrate::Bits(64_000));
-            tracing::info!("mic-test: streaming a 440 Hz tone as the mic uplink");
+            // Frame size + tick per pacing mode; `per_tick` packets are sent back-to-back.
+            let (frame, tick_ms, per_tick) = if burst {
+                (960usize, 40u64, 2u32) // 2× 20 ms every 40 ms — the bursty real-client shape
+            } else {
+                (240usize, 5u64, 1u32) // 5 ms frames on a smooth tick
+            };
+            tracing::info!(burst, "mic-test: streaming a 440 Hz tone as the mic uplink");
             let mut phase = 0.0f32;
             let step = 2.0 * std::f32::consts::PI * 440.0 / 48_000.0;
-            let mut pcm = [0f32; 240 * 2]; // 5 ms stereo
+            let mut pcm = vec![0f32; frame * 2];
             let mut out = [0u8; 4000];
-            let mut interval = tokio::time::interval(std::time::Duration::from_millis(5));
-            for seq in 0u32.. {
+            let mut interval = tokio::time::interval(std::time::Duration::from_millis(tick_ms));
+            let mut seq = 0u32;
+            'stream: loop {
                 interval.tick().await;
-                for f in 0..240 {
-                    let s = (phase.sin()) * 0.25;
-                    phase += step;
-                    if phase > std::f32::consts::PI * 2.0 {
-                        phase -= std::f32::consts::PI * 2.0;
+                for _ in 0..per_tick {
+                    for f in 0..frame {
+                        let s = (phase.sin()) * 0.25;
+                        phase += step;
+                        if phase > std::f32::consts::PI * 2.0 {
+                            phase -= std::f32::consts::PI * 2.0;
+                        }
+                        pcm[f * 2] = s;
+                        pcm[f * 2 + 1] = s;
                     }
-                    pcm[f * 2] = s;
-                    pcm[f * 2 + 1] = s;
-                }
-                if let Ok(n) = enc.encode_float(&pcm, &mut out) {
-                    let d = punktfunk_core::quic::encode_mic_datagram(seq, now_ns(), &out[..n]);
-                    if conn2.send_datagram(d.into()).is_err() {
-                        break;
+                    if let Ok(n) = enc.encode_float(&pcm, &mut out) {
+                        let d = punktfunk_core::quic::encode_mic_datagram(seq, now_ns(), &out[..n]);
+                        if conn2.send_datagram(d.into()).is_err() {
+                            break 'stream;
+                        }
                     }
+                    seq = seq.wrapping_add(1);
                 }
             }
             tracing::info!("mic-test: done");

From 42d1c746635409bd5857448bd7e2cf2df16e5096 Mon Sep 17 00:00:00 2001
From: enricobuehler <buehler@unom.io>
Date: Sat, 4 Jul 2026 00:36:24 +0200
Subject: [PATCH 3/3] fix(apple-client/audio): capture the right channel of a
 multi-channel mic + diagnostics
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The mic uplink handed the host pure digital silence on a multi-channel
interface: AVAudioConverter's N→stereo downmix takes channels 0/1, but a
pro interface puts the mic on ONE higher discrete channel. Fold the input
to a mono bus ourselves instead — pick the mic's channel (or sum all) and
resample that to the encoder's 48 kHz stereo, so the silent 0/1 downmix
never happens.

- New "Microphone channel" setting (macOS): Auto (sum every channel — a
  lone hot mic passes at full level) or pin 1-based channel N. Picker
  appears only for multi-channel devices, driven by the device's input
  channel count.
- Diagnostics that make this class of failure self-naming next session:
  log the actual live capture device + format + fold mode, warn on a
  silent UID fallback, and a one-shot silence tripwire on the EXTRACTED
  signal (WARN on 10 s of zeros, else peak dBFS).
- foldToMono extracted as a pure, unit-tested helper (pin / sum-clamp x
  interleaved / deinterleaved / mono / out-of-range).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../Session/SessionModel.swift                |   1 +
 .../Settings/SettingsView+Sections.swift      |  11 +
 .../Settings/SettingsView.swift               |  10 +
 .../PunktfunkKit/Audio/AudioDevices.swift     |  46 ++++-
 .../PunktfunkKit/Audio/SessionAudio.swift     | 188 ++++++++++++++++--
 .../PunktfunkKit/Support/DefaultsKeys.swift   |   6 +
 .../AudioChannelFoldTests.swift               |  93 +++++++++
 7 files changed, 339 insertions(+), 16 deletions(-)
 create mode 100644 clients/apple/Tests/PunktfunkKitTests/AudioChannelFoldTests.swift

diff --git a/clients/apple/Sources/PunktfunkClient/Session/SessionModel.swift b/clients/apple/Sources/PunktfunkClient/Session/SessionModel.swift
index c57c158..599b2f5 100644
--- a/clients/apple/Sources/PunktfunkClient/Session/SessionModel.swift
+++ b/clients/apple/Sources/PunktfunkClient/Session/SessionModel.swift
@@ -338,6 +338,7 @@ final class SessionModel: ObservableObject {
         audio.start(
             speakerUID: defaults.string(forKey: DefaultsKey.speakerUID) ?? "",
             micUID: defaults.string(forKey: DefaultsKey.micUID) ?? "",
+            micChannel: defaults.integer(forKey: DefaultsKey.micChannel),
             micEnabled: defaults.object(forKey: DefaultsKey.micEnabled) as? Bool ?? true)
         self.audio = audio
         // Gamepads: forward GamepadManager's active controller as pad 0 and render the
diff --git a/clients/apple/Sources/PunktfunkClient/Settings/SettingsView+Sections.swift b/clients/apple/Sources/PunktfunkClient/Settings/SettingsView+Sections.swift
index 088fa63..baa2ef0 100644
--- a/clients/apple/Sources/PunktfunkClient/Settings/SettingsView+Sections.swift
+++ b/clients/apple/Sources/PunktfunkClient/Settings/SettingsView+Sections.swift
@@ -208,6 +208,17 @@ extension SettingsView {
                 }
             }
             .disabled(!micEnabled)
+            // Multi-channel interfaces only: the mic sits on ONE discrete input, so let the user
+            // pick it. Auto sums every channel (a lone hot mic still passes at full level).
+            if micChannelCount > 1 {
+                Picker("Microphone channel", selection: $micChannel) {
+                    Text("Auto (all channels)").tag(0)
+                    ForEach(1...micChannelCount, id: \.self) { ch in
+                        Text("Channel \(ch)").tag(ch)
+                    }
+                }
+                .disabled(!micEnabled)
+            }
             #endif
         } header: {
             Text("Audio")
diff --git a/clients/apple/Sources/PunktfunkClient/Settings/SettingsView.swift b/clients/apple/Sources/PunktfunkClient/Settings/SettingsView.swift
index 1f34761..d66ed1d 100644
--- a/clients/apple/Sources/PunktfunkClient/Settings/SettingsView.swift
+++ b/clients/apple/Sources/PunktfunkClient/Settings/SettingsView.swift
@@ -61,8 +61,12 @@ struct SettingsView: View {
     #if os(macOS)
     @AppStorage(DefaultsKey.speakerUID) var speakerUID = ""
     @AppStorage(DefaultsKey.micUID) var micUID = ""
+    @AppStorage(DefaultsKey.micChannel) var micChannel = 0
     @State var outputDevices: [AudioDevice] = []
     @State var inputDevices: [AudioDevice] = []
+    // Input channels of the selected mic — drives the "Microphone channel" picker, which only
+    // appears for a multi-channel interface (>1). 0 until the Audio tab loads it.
+    @State var micChannelCount = 0
     #endif
 
     #if os(iOS)
@@ -115,6 +119,12 @@ struct SettingsView: View {
             .onAppear {
                 outputDevices = AudioDevices.outputs()
                 inputDevices = AudioDevices.inputs()
+                micChannelCount = AudioDevices.inputChannelCount(forUID: micUID)
+            }
+            .onChange(of: micUID) { _, newUID in
+                // A different mic → different channel count; drop a now-out-of-range pin to Auto.
+                micChannelCount = AudioDevices.inputChannelCount(forUID: newUID)
+                if micChannel > micChannelCount { micChannel = 0 }
             }
             .tabItem { Label("Audio", systemImage: "speaker.wave.2") }
 
diff --git a/clients/apple/Sources/PunktfunkKit/Audio/AudioDevices.swift b/clients/apple/Sources/PunktfunkKit/Audio/AudioDevices.swift
index f2113af..4584fd9 100644
--- a/clients/apple/Sources/PunktfunkKit/Audio/AudioDevices.swift
+++ b/clients/apple/Sources/PunktfunkKit/Audio/AudioDevices.swift
@@ -33,6 +33,49 @@ public enum AudioDevices {
         }
     }
 
+    /// Input channel count of the mic the picker would use — the device with this UID, or the
+    /// system default input when `uid` is empty. 0 when it can't be resolved. Drives the
+    /// "Microphone channel" picker (only shown for multi-channel interfaces).
+    public static func inputChannelCount(forUID uid: String) -> Int {
+        let id = uid.isEmpty ? defaultInputDevice() : deviceID(forUID: uid)
+        guard let id else { return 0 }
+        return channelCount(id, scope: kAudioObjectPropertyScopeInput)
+    }
+
+    private static func defaultInputDevice() -> AudioDeviceID? {
+        var address = AudioObjectPropertyAddress(
+            mSelector: kAudioHardwarePropertyDefaultInputDevice,
+            mScope: kAudioObjectPropertyScopeGlobal,
+            mElement: kAudioObjectPropertyElementMain)
+        var dev = AudioDeviceID(0)
+        var size = UInt32(MemoryLayout<AudioDeviceID>.size)
+        guard AudioObjectGetPropertyData(
+            AudioObjectID(kAudioObjectSystemObject), &address, 0, nil, &size, &dev) == noErr,
+            dev != 0
+        else { return nil }
+        return dev
+    }
+
+    /// Sum of channels across the device's streams in `scope` (its total input/output channels).
+    private static func channelCount(
+        _ id: AudioDeviceID, scope: AudioObjectPropertyScope
+    ) -> Int {
+        var address = AudioObjectPropertyAddress(
+            mSelector: kAudioDevicePropertyStreamConfiguration,
+            mScope: scope,
+            mElement: kAudioObjectPropertyElementMain)
+        var size: UInt32 = 0
+        guard AudioObjectGetPropertyDataSize(id, &address, 0, nil, &size) == noErr, size > 0
+        else { return 0 }
+        let raw = UnsafeMutableRawPointer.allocate(
+            byteCount: Int(size), alignment: MemoryLayout<AudioBufferList>.alignment)
+        defer { raw.deallocate() }
+        guard AudioObjectGetPropertyData(id, &address, 0, nil, &size, raw) == noErr else { return 0 }
+        let abl = UnsafeMutableAudioBufferListPointer(
+            raw.assumingMemoryBound(to: AudioBufferList.self))
+        return abl.reduce(0) { $0 + Int($1.mNumberChannels) }
+    }
+
     private static func all() -> [AudioDeviceID] {
         var address = AudioObjectPropertyAddress(
             mSelector: kAudioHardwarePropertyDevices,
@@ -62,7 +105,8 @@ public enum AudioDevices {
         return AudioObjectGetPropertyDataSize(id, &address, 0, nil, &size) == noErr && size > 0
     }
 
-    private static func describe(_ id: AudioDeviceID) -> AudioDevice? {
+    /// UID + human name for a live AudioDeviceID (nil if either property is unreadable).
+    static func describe(_ id: AudioDeviceID) -> AudioDevice? {
         guard let uid = stringProperty(id, kAudioDevicePropertyDeviceUID),
               let name = stringProperty(id, kAudioObjectPropertyName)
         else { return nil }
diff --git a/clients/apple/Sources/PunktfunkKit/Audio/SessionAudio.swift b/clients/apple/Sources/PunktfunkKit/Audio/SessionAudio.swift
index 327e785..00727e3 100644
--- a/clients/apple/Sources/PunktfunkKit/Audio/SessionAudio.swift
+++ b/clients/apple/Sources/PunktfunkKit/Audio/SessionAudio.swift
@@ -5,9 +5,10 @@
 //   AVAudioSourceNode pulls from the ring (silence on underrun with re-priming, so a
 //   network gap costs one dip, not permanent crackle).
 //
-//   mic → host: a second AVAudioEngine taps the input device, resamples to 48 kHz
-//   stereo, slices 20 ms chunks, Opus-encodes, and sendMic()s each packet — the host
-//   feeds them into a virtual PipeWire source.
+//   mic → host: a second AVAudioEngine taps the input device, folds it to one mono bus (the
+//   chosen channel of a multi-channel interface, or a sum of all channels), resamples to 48 kHz
+//   stereo, slices 20 ms chunks, Opus-encodes, and sendMic()s each packet — the host feeds them
+//   into a virtual PipeWire source.
 //
 // Devices are chosen by UID ("" = system default: the engine is then never pinned to a
 // concrete device and follows default-device changes). Two engines, not one — a single
@@ -68,10 +69,11 @@ public final class SessionAudio {
     /// ASYNCHRONOUS: it activates the AVAudioSession off the main thread, then starts the engines on
     /// a later main-queue hop (gated by `!flag.isStopped`) — so playback is live shortly after, not
     /// on return. The mic may start later still if the permission prompt is pending.
-    public func start(speakerUID: String, micUID: String, micEnabled: Bool) {
+    public func start(speakerUID: String, micUID: String, micChannel: Int, micEnabled: Bool) {
         #if os(macOS)
         // No AVAudioSession on macOS — start the engines directly (caller's thread, as before).
-        startEngines(speakerUID: speakerUID, micUID: micUID, micEnabled: micEnabled)
+        startEngines(
+            speakerUID: speakerUID, micUID: micUID, micChannel: micChannel, micEnabled: micEnabled)
         #else
         // Configure + activate the session OFF the main thread (it blocks on the audio server),
         // then start the engines back on the main thread once it's active — engine routing/format
@@ -81,7 +83,9 @@ public final class SessionAudio {
             self.activateAudioSession(micEnabled: micEnabled)
             DispatchQueue.main.async { [weak self] in
                 guard let self, !self.flag.isStopped else { return }
-                self.startEngines(speakerUID: speakerUID, micUID: micUID, micEnabled: micEnabled)
+                self.startEngines(
+                    speakerUID: speakerUID, micUID: micUID, micChannel: micChannel,
+                    micEnabled: micEnabled)
             }
         }
         #endif
@@ -115,7 +119,9 @@ public final class SessionAudio {
 
     /// Build + start the playback engine (and the mic uplink when enabled + authorized). Main
     /// thread (engine setup); on iOS/tvOS the session is already active by the time this runs.
-    private func startEngines(speakerUID: String, micUID: String, micEnabled: Bool) {
+    private func startEngines(
+        speakerUID: String, micUID: String, micChannel: Int, micEnabled: Bool
+    ) {
         startPlayback(speakerUID: speakerUID)
         #if os(tvOS)
         // No app-accessible microphone input on tvOS — playback only.
@@ -123,12 +129,12 @@ public final class SessionAudio {
         guard micEnabled else { return }
         switch AVCaptureDevice.authorizationStatus(for: .audio) {
         case .authorized:
-            startCapture(micUID: micUID)
+            startCapture(micUID: micUID, micChannel: micChannel)
         case .notDetermined:
             AVCaptureDevice.requestAccess(for: .audio) { [weak self] granted in
                 DispatchQueue.main.async {
                     guard let self, granted, !self.flag.isStopped else { return }
-                    self.startCapture(micUID: micUID)
+                    self.startCapture(micUID: micUID, micChannel: micChannel)
                 }
             }
         default:
@@ -280,7 +286,7 @@ public final class SessionAudio {
     // MARK: - Mic (mic → host)
 
     #if !os(tvOS)
-    private func startCapture(micUID: String) {
+    private func startCapture(micUID: String, micChannel: Int) {
         let engine = AVAudioEngine()
         let input = engine.inputNode
         #if os(macOS)
@@ -300,8 +306,63 @@ public final class SessionAudio {
             log.error("no usable input device — mic uplink disabled")
             return
         }
-        guard let encoder = try? OpusEncoder(),
-              let resampler = AVAudioConverter(from: inFormat, to: encoder.pcmFormat),
+
+        // Multi-channel-interface handling. A pro interface exposes N discrete inputs with the mic
+        // on ONE of them, but AVAudioConverter's N→stereo downmix takes channels 0/1 — dead
+        // silence when the mic sits higher up (the classic "host receives zeros"). So we fold the
+        // input to a single mono bus OURSELVES and resample that. micChannel: 0 = Auto (sum every
+        // channel — a lone hot mic passes at full level), n≥1 pins 1-based input channel n.
+        let inChannels = Int(inFormat.channelCount)
+        let pinnedChannel: Int? = {
+            guard micChannel >= 1 else { return nil }
+            let idx = micChannel - 1
+            guard idx < inChannels else {
+                log.warning(
+                    "mic channel \(micChannel) out of range (device has \(inChannels)) — mixing all")
+                return nil
+            }
+            return idx
+        }()
+        let channelPlan = pinnedChannel.map { "channel \($0 + 1)/\(inChannels)" }
+            ?? (inChannels > 1 ? "mix \(inChannels)ch→mono" : "mono")
+
+        // Name the device we're ACTUALLY recording from + its format + how we fold it, once per
+        // session. This single line localizes the whole class of "host receives silence" failures
+        // that otherwise need a host-side tone injection to pin down: a UID that silently fell back
+        // to the default, the wrong device being live, or the wrong channel picked.
+        #if os(macOS)
+        if let unit = input.audioUnit, let live = Self.currentDevice(of: unit),
+           let dev = AudioDevices.describe(live) {
+            if !micUID.isEmpty, dev.uid != micUID {
+                log.warning("""
+                    mic selection not honored — requested \(micUID) but capturing from \
+                    \(dev.name) [\(dev.uid)]; the device's UID likely changed (replug) — \
+                    reselect it in Settings
+                    """)
+            }
+            log.info("""
+                mic capture: \(dev.name) [\(dev.uid)] — \(Int(inFormat.sampleRate)) Hz, \
+                \(inChannels) ch, \(channelPlan)
+                """)
+        } else {
+            log.info("""
+                mic capture: <device unavailable> — \(Int(inFormat.sampleRate)) Hz, \
+                \(inChannels) ch, \(channelPlan)
+                """)
+        }
+        #else
+        log.info(
+            "mic capture: \(Int(inFormat.sampleRate)) Hz, \(inChannels) ch, \(channelPlan)")
+        #endif
+
+        // Encode a single mono bus (folded from `inFormat` in the tap): the resampler goes
+        // mono@inputSR → the encoder's 48 kHz stereo, so it handles both the rate change and the
+        // mono→stereo duplication, and the wrong-channel downmix never happens.
+        guard let monoFormat = AVAudioFormat(
+                  commonFormat: .pcmFormatFloat32, sampleRate: inFormat.sampleRate,
+                  channels: 1, interleaved: false),
+              let encoder = try? OpusEncoder(),
+              let resampler = AVAudioConverter(from: monoFormat, to: encoder.pcmFormat),
               let chunk = AVAudioPCMBuffer(
                   pcmFormat: encoder.pcmFormat, frameCapacity: OpusEncoder.framesPerPacket)
         else {
@@ -317,11 +378,59 @@ public final class SessionAudio {
         let connection = connection
         let flag = flag
 
+        // Silence tripwire (tap-confined): a "recording" app can be handed pure digital zeros —
+        // a zeroed input-volume slider, a stale TCC grant, a muted device, OR the wrong channel
+        // picked — and everything downstream looks alive while the host gets silence. Track the
+        // peak of the EXTRACTED mono bus over the first ~10 s (not the raw device — a mic present
+        // on a channel we didn't grab must still read as silence) and emit exactly ONE verdict.
+        // This is the log line whose absence made the last occurrence take a host-side tone.
+        let silenceWindow = Int(inFormat.sampleRate * 10)
+        let deviceLabel = micUID.isEmpty ? "default input" : micUID
+        var framesInspected = 0
+        var inputPeak: Float = 0
+        var levelReported = false
+
         input.installTap(onBus: 0, bufferSize: 2048, format: inFormat) { buffer, _ in
             if flag.isStopped { return }
+            let frames = Int(buffer.frameLength)
+            guard frames > 0, let src = buffer.floatChannelData,
+                  let mono = AVAudioPCMBuffer(
+                      pcmFormat: monoFormat, frameCapacity: buffer.frameLength),
+                  let dst = mono.floatChannelData?[0]
+            else { return }
+            mono.frameLength = buffer.frameLength
+
+            // Fold the multi-channel input down to the one mono bus we encode.
+            Self.foldToMono(
+                input: src, frames: frames, channels: Int(buffer.format.channelCount),
+                interleaved: buffer.format.isInterleaved, pinned: pinnedChannel, out: dst)
+
+            if !levelReported {
+                var localPeak: Float = 0
+                for i in 0..<frames where abs(dst[i]) > localPeak { localPeak = abs(dst[i]) }
+                if localPeak > inputPeak { inputPeak = localPeak }
+                framesInspected += frames
+                if framesInspected >= silenceWindow {
+                    levelReported = true
+                    if inputPeak == 0 {
+                        log.warning("""
+                            mic uplink has been pure digital SILENCE for 10 s (\(deviceLabel), \
+                            \(channelPlan)) — check the input level (System Settings → Sound → \
+                            Input), Privacy & Security → Microphone, and the Microphone channel in \
+                            Settings; the host is receiving zeros
+                            """)
+                    } else {
+                        let dbfs = 20 * log10(inputPeak)
+                        log.info("""
+                            mic uplink OK — peak \(String(format: "%.1f", dbfs)) dBFS over first \
+                            10 s (\(deviceLabel), \(channelPlan))
+                            """)
+                    }
+                }
+            }
+
             let ratio = 48_000 / inFormat.sampleRate
-            let outCapacity = AVAudioFrameCount(
-                (Double(buffer.frameLength) * ratio).rounded(.up) + 64)
+            let outCapacity = AVAudioFrameCount((Double(frames) * ratio).rounded(.up) + 64)
             guard let staging = AVAudioPCMBuffer(
                 pcmFormat: encoder.pcmFormat, frameCapacity: outCapacity)
             else { return }
@@ -334,7 +443,7 @@ public final class SessionAudio {
                 }
                 fed = true
                 outStatus.pointee = .haveData
-                return buffer
+                return mono
             }
             guard status != .error, let p = staging.floatChannelData?[0] else { return }
             fifo.append(contentsOf: UnsafeBufferPointer(
@@ -378,6 +487,42 @@ public final class SessionAudio {
         stateLock.unlock()
         log.info("mic uplink started (\(micUID.isEmpty ? "default input" : micUID))")
     }
+
+    /// Fold `channels` of input (`floatChannelData` layout: `interleaved` → one buffer strided by
+    /// channel count; else one buffer per channel) down to a single mono bus in `out` (`frames`
+    /// long). `pinned` (0-based, must be `< channels`) copies exactly that channel — the fix for a
+    /// mic on one input of a multi-channel interface; `nil` sums every channel, clamped to
+    /// [-1, 1], so a lone hot channel still passes at full level instead of the silent 0/1 the
+    /// default N→stereo downmix would grab. Pure + `internal` for unit testing the index math.
+    static func foldToMono(
+        input: UnsafePointer<UnsafeMutablePointer<Float>>, frames: Int, channels: Int,
+        interleaved: Bool, pinned: Int?, out: UnsafeMutablePointer<Float>
+    ) {
+        if let ch = pinned, ch < channels {
+            if interleaved {
+                let d = input[0]
+                for i in 0..<frames { out[i] = d[i * channels + ch] }
+            } else {
+                let d = input[ch]
+                for i in 0..<frames { out[i] = d[i] }
+            }
+        } else if interleaved {
+            let d = input[0]
+            for i in 0..<frames {
+                var s: Float = 0
+                for c in 0..<channels { s += d[i * channels + c] }
+                out[i] = max(-1, min(1, s))
+            }
+        } else {
+            let d0 = input[0]
+            for i in 0..<frames { out[i] = d0[i] }
+            for c in 1..<channels {
+                let d = input[c]
+                for i in 0..<frames { out[i] += d[i] }
+            }
+            if channels > 1 { for i in 0..<frames { out[i] = max(-1, min(1, out[i])) } }
+        }
+    }
     #endif
 
     #if os(macOS)
@@ -387,5 +532,18 @@ public final class SessionAudio {
             unit, kAudioOutputUnitProperty_CurrentDevice, kAudioUnitScope_Global, 0,
             &dev, UInt32(MemoryLayout<AudioDeviceID>.size)) == noErr
     }
+
+    /// Read back the AUHAL's live device — the definitive "what are we actually capturing
+    /// from", which catches a selection that succeeded on paper but silently fell back to
+    /// the system default (a stale/changed UID, a device that vanished between resolve and
+    /// start). 0 / an error means we couldn't tell.
+    private static func currentDevice(of unit: AudioUnit) -> AudioDeviceID? {
+        var dev = AudioDeviceID(0)
+        var size = UInt32(MemoryLayout<AudioDeviceID>.size)
+        let status = AudioUnitGetProperty(
+            unit, kAudioOutputUnitProperty_CurrentDevice, kAudioUnitScope_Global, 0, &dev, &size)
+        guard status == noErr, dev != 0 else { return nil }
+        return dev
+    }
     #endif
 }
diff --git a/clients/apple/Sources/PunktfunkKit/Support/DefaultsKeys.swift b/clients/apple/Sources/PunktfunkKit/Support/DefaultsKeys.swift
index 7a6ee1c..94656ae 100644
--- a/clients/apple/Sources/PunktfunkKit/Support/DefaultsKeys.swift
+++ b/clients/apple/Sources/PunktfunkKit/Support/DefaultsKeys.swift
@@ -24,6 +24,12 @@ public enum DefaultsKey {
     public static let micEnabled = "punktfunk.micEnabled"
     public static let speakerUID = "punktfunk.speakerUID"
     public static let micUID = "punktfunk.micUID"
+    /// macOS: which input channel of the chosen mic device feeds the host. 0 = "Auto" (sum every
+    /// channel to mono — a mic on a single input of a multi-channel interface passes at full
+    /// level); n≥1 pins 1-based input channel n. Multi-channel interfaces expose the mic on ONE
+    /// discrete channel, and the default N→stereo downmix grabs channels 0/1 (silence when the mic
+    /// is higher up), so we fold to mono ourselves. Only meaningful for multi-channel devices.
+    public static let micChannel = "punktfunk.micChannel"
     public static let presenter = "punktfunk.presenter"
     /// Request a 10-bit BT.2020 PQ (HDR10) stream. On by default; only takes effect when the host
     /// has HDR content AND this display supports HDR — otherwise the stream stays 8-bit SDR.
diff --git a/clients/apple/Tests/PunktfunkKitTests/AudioChannelFoldTests.swift b/clients/apple/Tests/PunktfunkKitTests/AudioChannelFoldTests.swift
new file mode 100644
index 0000000..ec8a21f
--- /dev/null
+++ b/clients/apple/Tests/PunktfunkKitTests/AudioChannelFoldTests.swift
@@ -0,0 +1,93 @@
+// Multi-channel input → mono fold (SessionAudio.foldToMono): the fix for a mic on one channel of
+// a multi-channel interface. AVAudioConverter's default N→stereo downmix grabs channels 0/1 — dead
+// silence when the mic sits higher up — so we fold ourselves. This pins the fiddly bits (the
+// interleaved stride, channel pinning, the sum-clamp) against regressions without needing hardware.
+
+#if !os(tvOS)
+import XCTest
+
+@testable import PunktfunkKit
+
+final class AudioChannelFoldTests: XCTestCase {
+    /// Drive `foldToMono` over channel data expressed as `[[Float]]`, mirroring the two
+    /// `floatChannelData` layouts:
+    ///   - deinterleaved: each inner array is one channel (all `frames` long).
+    ///   - interleaved: a single inner array already interleaved (c0f0, c1f0, …), with the real
+    ///     channel count passed separately.
+    private func fold(
+        _ planes: [[Float]], frames: Int, channels: Int, interleaved: Bool, pinned: Int?
+    ) -> [Float] {
+        // One C buffer per plane + a table of pointers to them — the shape of floatChannelData.
+        let buffers: [UnsafeMutablePointer<Float>] = planes.map { plane in
+            let p = UnsafeMutablePointer<Float>.allocate(capacity: plane.count)
+            for i in 0..<plane.count { p[i] = plane[i] }
+            return p
+        }
+        let table = UnsafeMutablePointer<UnsafeMutablePointer<Float>>.allocate(
+            capacity: buffers.count)
+        for (i, b) in buffers.enumerated() { table[i] = b }
+        let out = UnsafeMutablePointer<Float>.allocate(capacity: frames)
+        defer {
+            buffers.forEach { $0.deallocate() }
+            table.deallocate()
+            out.deallocate()
+        }
+        SessionAudio.foldToMono(
+            input: table, frames: frames, channels: channels,
+            interleaved: interleaved, pinned: pinned, out: out)
+        return (0..<frames).map { out[$0] }
+    }
+
+    // A pinned channel is copied verbatim — the exact fix: mic on a HIGH channel, not 0/1.
+    func testPinsHigherChannelDeinterleaved() {
+        let result = fold(
+            [[0, 0, 0], [0, 0, 0], [0.1, 0.2, 0.3], [0, 0, 0]],
+            frames: 3, channels: 4, interleaved: false, pinned: 2)
+        XCTAssertEqual(result, [0.1, 0.2, 0.3])
+    }
+
+    // Same signal, interleaved layout: [c0f0,c1f0,c2f0,c3f0, c0f1,…]. Guards the `i*ch + c` stride.
+    func testPinsHigherChannelInterleaved() {
+        let interleaved: [Float] = [
+            0, 0, 0.1, 0,
+            0, 0, 0.2, 0,
+            0, 0, 0.3, 0,
+        ]
+        let result = fold([interleaved], frames: 3, channels: 4, interleaved: true, pinned: 2)
+        XCTAssertEqual(result, [0.1, 0.2, 0.3])
+    }
+
+    // Auto (pinned: nil): a lone hot channel amid silence passes at FULL level, never attenuated.
+    func testAutoSumsAllChannelsSoALoneMicSurvives() {
+        let result = fold(
+            [[0, 0], [0.4, -0.4], [0, 0]],
+            frames: 2, channels: 3, interleaved: false, pinned: nil)
+        XCTAssertEqual(result, [0.4, -0.4])
+    }
+
+    // Two simultaneously-hot channels sum past the unit range → clamped, never wraps/overflows.
+    func testAutoSumClampsToUnitRange() {
+        let result = fold(
+            [[0.8, -0.8], [0.9, -0.9]],
+            frames: 2, channels: 2, interleaved: false, pinned: nil)
+        XCTAssertEqual(result, [1.0, -1.0])
+    }
+
+    // A plain mono device is passed through untouched (no clamp, no attenuation).
+    func testMonoIsIdentity() {
+        let result = fold(
+            [[0.25, -0.5, 0.75]], frames: 3, channels: 1, interleaved: false, pinned: nil)
+        XCTAssertEqual(result, [0.25, -0.5, 0.75])
+    }
+
+    // Belt-and-suspenders: an out-of-range pin (the tap already guards, but the setting is
+    // persisted) is ignored by foldToMono's own `ch < channels` guard, which sums instead of
+    // reading past the buffer.
+    func testOutOfRangePinFallsBackToSum() {
+        let result = fold(
+            [[0, 0], [0.3, 0.3]],
+            frames: 2, channels: 2, interleaved: false, pinned: 2)
+        XCTAssertEqual(result, [0.3, 0.3])
+    }
+}
+#endif