From a7daed5797dfd4baef89e3a42054760d53db227a Mon Sep 17 00:00:00 2001 From: enricobuehler Date: Wed, 17 Jun 2026 17:15:41 +0000 Subject: [PATCH] =?UTF-8?q?feat(host/windows):=20client=E2=86=92host=20mic?= =?UTF-8?q?=20passthrough=20via=20a=20virtual=20audio=20device?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The host received the client's mic uplink (0xCB Opus) but dropped it on Windows ("requires Linux"). Windows has no user-mode way to CREATE a capture endpoint, so target an existing virtual audio device and write the decoded mic PCM into its RENDER endpoint — the device's CAPTURE endpoint then surfaces as a microphone host apps record from (the inverse of a virtual cable). New audio::wasapi_mic::WasapiVirtualMic: finds the device by friendly-name (Steam Streaming Microphone / VB-Audio CABLE Input / VoiceMeeter / "virtual", override with PUNKTFUNK_MIC_DEVICE), opens a WASAPI shared event-driven RENDER client (48 kHz stereo f32, autoconvert), and a dedicated COM thread writes a bounded (~80 ms drop-oldest) inject queue with silence-fill. open_virtual_mic() gets a Windows arm; mic_service_thread (Opus decode → push) now compiles for windows too (opus is already a windows dep). Clear error + install guidance when no virtual device is present. Linux/cross-platform side cargo-checks; the Windows path is built/validated when the box is back (the wasapi render API was cross-checked against the docs + the existing capture path). Co-Authored-By: Claude Opus 4.8 --- crates/punktfunk-host/src/audio.rs | 15 +- crates/punktfunk-host/src/audio/wasapi_mic.rs | 235 ++++++++++++++++++ crates/punktfunk-host/src/m3.rs | 19 +- 3 files changed, 256 insertions(+), 13 deletions(-) create mode 100644 crates/punktfunk-host/src/audio/wasapi_mic.rs diff --git a/crates/punktfunk-host/src/audio.rs b/crates/punktfunk-host/src/audio.rs index 171c203..e09b792 100644 --- a/crates/punktfunk-host/src/audio.rs +++ b/crates/punktfunk-host/src/audio.rs @@ -67,18 +67,27 @@ pub trait VirtualMic: Send { } } -/// Open a virtual microphone PipeWire source with `channels` interleaved channels (1 or 2). +/// Open a virtual microphone with `channels` interleaved channels (1 or 2). Linux: a PipeWire +/// `Audio/Source`. Windows: writes into an existing virtual audio device's render endpoint (whose +/// capture endpoint apps see as a mic) — see [`wasapi_mic`]. #[cfg(target_os = "linux")] pub fn open_virtual_mic(channels: u32) -> Result> { linux::PwMicSource::open(channels).map(|m| Box::new(m) as Box) } -#[cfg(not(target_os = "linux"))] +#[cfg(target_os = "windows")] +pub fn open_virtual_mic(channels: u32) -> Result> { + wasapi_mic::WasapiVirtualMic::open(channels).map(|m| Box::new(m) as Box) +} + +#[cfg(not(any(target_os = "linux", target_os = "windows")))] pub fn open_virtual_mic(_channels: u32) -> Result> { - anyhow::bail!("virtual mic requires Linux + PipeWire") + anyhow::bail!("virtual mic requires Linux + PipeWire or Windows + a virtual audio device") } #[cfg(target_os = "linux")] mod linux; #[cfg(target_os = "windows")] mod wasapi_cap; +#[cfg(target_os = "windows")] +mod wasapi_mic; diff --git a/crates/punktfunk-host/src/audio/wasapi_mic.rs b/crates/punktfunk-host/src/audio/wasapi_mic.rs new file mode 100644 index 0000000..9de0e5d --- /dev/null +++ b/crates/punktfunk-host/src/audio/wasapi_mic.rs @@ -0,0 +1,235 @@ +//! WASAPI virtual microphone (Windows) — the inverse of [`super::wasapi_cap`]. Windows has no +//! user-mode way to *create* a capture (microphone) endpoint, so we target an EXISTING virtual audio +//! device and write the client's decoded mic PCM into that device's **render** endpoint; the device's +//! **capture** endpoint then surfaces as a microphone that host apps can record from. +//! +//! Target device, by friendly-name substring (first match wins; override with `PUNKTFUNK_MIC_DEVICE`): +//! "Steam Streaming Microphone" (ships with Steam Remote Play — exactly this purpose), VB-Audio +//! "CABLE Input", VoiceMeeter, or anything with "virtual" in the name. If none is present we return an +//! error with install guidance and the host runs without mic passthrough. +//! +//! `push` enqueues decoded interleaved-f32 PCM into a bounded ring (drop-oldest beyond ~80 ms so mic +//! latency stays bounded); a dedicated COM-apartment thread renders it event-driven, filling silence +//! when the client isn't talking. WASAPI objects are `!Send`, so they live entirely on that thread +//! (mirrors `WasapiLoopbackCapturer`). + +use super::{VirtualMic, SAMPLE_RATE}; +use anyhow::{anyhow, Context, Result}; +use std::collections::VecDeque; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::mpsc::{sync_channel, SyncSender}; +use std::sync::{Arc, Mutex}; +use std::thread::{self, JoinHandle}; +use std::time::Duration; +use wasapi::{Direction, SampleType, StreamMode, WaveFormat}; + +const CHANNELS: u32 = 2; +/// 48 kHz stereo f32: 2 channels * 4 bytes. +const BLOCK_ALIGN: usize = 2 * 4; +/// Bound the inject queue at ~80 ms so the passed-through mic stays low-latency (drop oldest beyond). +const MAX_QUEUE_BYTES: usize = (SAMPLE_RATE as usize * 80 / 1000) * BLOCK_ALIGN; + +/// Render-endpoint friendly-name substrings (lowercased) we can write into so the device's capture +/// endpoint becomes a host mic. Ordered by preference. +const CANDIDATES: &[&str] = &[ + "steam streaming microphone", + "cable input", + "voicemeeter input", + "voicemeeter aux input", + "virtual", +]; + +pub struct WasapiVirtualMic { + queue: Arc>>, + stop: Arc, + join: Option>, +} + +impl WasapiVirtualMic { + pub fn open(channels: u32) -> Result { + anyhow::ensure!( + channels == CHANNELS, + "virtual mic is stereo-only (got {channels})" + ); + let queue = Arc::new(Mutex::new(VecDeque::::new())); + let stop = Arc::new(AtomicBool::new(false)); + // Bring-up handshake: report the resolved device (or the error) before returning, so a missing + // virtual-mic device surfaces as Err (the caller retries with backoff) not a silent dead thread. + let (ready_tx, ready_rx) = sync_channel::>(1); + let (q, st) = (queue.clone(), stop.clone()); + let join = thread::Builder::new() + .name("punktfunk-wasapi-mic".into()) + .spawn(move || { + if let Err(e) = render_thread(q, st, ready_tx) { + tracing::error!(error = %format!("{e:#}"), "wasapi virtual-mic thread failed"); + } + }) + .context("spawn wasapi mic thread")?; + match ready_rx.recv_timeout(Duration::from_secs(3)) { + Ok(Ok(name)) => { + tracing::info!(device = %name, + "WASAPI virtual mic ready (client mic → this device's render endpoint)"); + Ok(WasapiVirtualMic { + queue, + stop, + join: Some(join), + }) + } + Ok(Err(e)) => Err(e), + Err(_) => Err(anyhow!("wasapi virtual-mic init timed out")), + } + } +} + +impl Drop for WasapiVirtualMic { + fn drop(&mut self) { + self.stop.store(true, Ordering::SeqCst); + if let Some(j) = self.join.take() { + let _ = j.join(); + } + } +} + +impl VirtualMic for WasapiVirtualMic { + fn push(&self, pcm: &[f32]) { + let Ok(mut q) = self.queue.lock() else { + return; + }; + q.reserve(pcm.len() * 4); + for &s in pcm { + q.extend(s.to_le_bytes()); + } + // Drop-oldest to keep latency bounded (mic is real-time; stale audio is worse than dropped). + if q.len() > MAX_QUEUE_BYTES { + let excess = q.len() - MAX_QUEUE_BYTES; + q.drain(..excess); + } + } + fn channels(&self) -> u32 { + CHANNELS + } +} + +/// Resolve the virtual-mic target among render endpoints by friendly-name. Logs all candidates so a +/// missing device is diagnosable. +fn find_device() -> Result { + let enumerator = wasapi::DeviceEnumerator::new().context("DeviceEnumerator")?; + let collection = enumerator + .get_device_collection(&Direction::Render) + .context("render device collection")?; + let n = collection.get_nbr_devices().context("device count")?; + let want = std::env::var("PUNKTFUNK_MIC_DEVICE") + .ok() + .map(|s| s.to_lowercase()); + let mut names = Vec::new(); + let mut found = None; + for i in 0..n { + let Ok(dev) = collection.get_device_at_index(i) else { + continue; + }; + let name = dev.get_friendlyname().unwrap_or_default(); + let lname = name.to_lowercase(); + let hit = match &want { + Some(w) => lname.contains(w), + None => CANDIDATES.iter().any(|c| lname.contains(c)), + }; + if hit && found.is_none() { + found = Some(dev); + } + names.push(name); + } + found.ok_or_else(|| { + anyhow!( + "no virtual-mic device among render endpoints {names:?}. Install VB-Audio Virtual Cable \ + or enable Steam Remote Play's microphone (Steam Streaming Microphone), or set \ + PUNKTFUNK_MIC_DEVICE=." + ) + }) +} + +fn render_thread( + queue: Arc>>, + stop: Arc, + ready: SyncSender>, +) -> Result<()> { + if let Err(e) = wasapi::initialize_mta() + .ok() + .context("CoInitializeEx (MTA)") + { + let _ = ready.send(Err(e)); + return Ok(()); + } + // Open + start the render stream. The WASAPI objects must outlive the loop, so build them here and + // keep them (a closure that *returned* them would drop them); on any failure report Err and exit. + let setup = (|| -> Result<(wasapi::AudioClient, wasapi::AudioRenderClient, wasapi::Handle, String)> { + let device = find_device()?; + let name = device.get_friendlyname().unwrap_or_else(|_| "virtual mic".into()); + let mut audio_client = device.get_iaudioclient().context("IAudioClient")?; + // 48 kHz stereo f32; autoconvert lets WASAPI shared-mode SRC match the device mix format. + let desired = WaveFormat::new( + 32, + 32, + &SampleType::Float, + SAMPLE_RATE as usize, + CHANNELS as usize, + None, + ); + let (default_period, _min) = audio_client.get_device_period().context("device period")?; + let mode = StreamMode::EventsShared { + autoconvert: true, + buffer_duration_hns: default_period, + }; + audio_client + .initialize_client(&desired, &Direction::Render, &mode) + .context("initialize render client")?; + let h_event = audio_client.set_get_eventhandle().context("event handle")?; + let render_client = audio_client + .get_audiorenderclient() + .context("IAudioRenderClient")?; + // Pre-fill the whole buffer with silence so the stream starts cleanly (no startup glitch). + let buf_frames = audio_client.get_buffer_size().context("buffer size")? as usize; + let _ = render_client.write_to_device(buf_frames, &vec![0u8; buf_frames * BLOCK_ALIGN], None); + audio_client.start_stream().context("start render stream")?; + Ok((audio_client, render_client, h_event, name)) + })(); + let (audio_client, render_client, h_event, name) = match setup { + Ok(t) => t, + Err(e) => { + let _ = ready.send(Err(anyhow!("{e:#}"))); + return Ok(()); + } + }; + let _ = ready.send(Ok(name)); + + let mut buf: Vec = Vec::new(); + while !stop.load(Ordering::Relaxed) { + // The device signals when it wants more data; finite timeout keeps `stop` responsive. + if h_event.wait_for_event(100).is_err() { + continue; + } + let space = audio_client + .get_available_space_in_frames() + .context("available space")? as usize; + if space == 0 { + continue; + } + let need = space * BLOCK_ALIGN; + if buf.len() < need { + buf.resize(need, 0); + } + // Silence base; overwrite with queued mic PCM (zero-pad the tail when the client is quiet). + buf[..need].fill(0); + { + let mut q = queue.lock().unwrap(); + let n = q.len().min(need); + for (i, b) in q.drain(..n).enumerate() { + buf[i] = b; + } + } + render_client + .write_to_device(space, &buf[..need], None) + .context("write_to_device")?; + } + audio_client.stop_stream().ok(); + Ok(()) +} diff --git a/crates/punktfunk-host/src/m3.rs b/crates/punktfunk-host/src/m3.rs index 05e1e2b..bed2fdd 100644 --- a/crates/punktfunk-host/src/m3.rs +++ b/crates/punktfunk-host/src/m3.rs @@ -201,7 +201,8 @@ pub(crate) async fn serve(opts: M3Options, np: Arc) -> Result<()> // wedged KWin's EIS setup ("EIS setup timed out"). Gamepads stay per-session (uinput). let injector = InjectorService::start(); // One virtual microphone for the whole host lifetime (see MicService): the client's mic uplink - // (0xCB) is Opus-decoded and fed into a persistent PipeWire Audio/Source host apps record from. + // (0xCB) is Opus-decoded and fed into a persistent virtual mic host apps record from (Linux + // PipeWire Audio/Source; Windows a virtual audio device's render endpoint). let mic_service = MicService::start(); // Host-lifetime worker that fires debounced TV-session restores (the managed gamescope path // restores the box's autologin gaming session on idle, not per-disconnect — see @@ -1094,22 +1095,20 @@ impl MicService { } } -/// Stub — mic passthrough needs Linux (PipeWire source + libopus); non-Linux dev builds -/// drain and drop the frames (sessions still count the datagrams), same as when the -/// source fails to open. -#[cfg(not(target_os = "linux"))] +/// Stub — mic passthrough needs a virtual-mic backend (Linux PipeWire source / Windows virtual audio +/// device); other platforms drain and drop the frames (sessions still count the datagrams). +#[cfg(not(any(target_os = "linux", target_os = "windows")))] fn mic_service_thread(rx: std::sync::mpsc::Receiver>) { - tracing::warn!( - "punktfunk/1 mic passthrough requires Linux (PipeWire + libopus) — frames dropped" - ); + tracing::warn!("punktfunk/1 mic passthrough unsupported on this platform — frames dropped"); for _ in rx {} } /// The host-lifetime mic worker: lazily open the virtual mic + decoder, then Opus-decode each /// forwarded frame and push the PCM into the source. Reopen (after [`INJECTOR_REOPEN_BACKOFF`]) /// on open failure or a decode error. Exits when every session sender and the service's own -/// sender drop (host shutdown), tearing the PipeWire source down. -#[cfg(target_os = "linux")] +/// sender drop (host shutdown), tearing the virtual mic down. Linux = PipeWire `Audio/Source`; +/// Windows = a virtual audio device's render endpoint (see `audio::wasapi_mic`). +#[cfg(any(target_os = "linux", target_os = "windows"))] fn mic_service_thread(rx: std::sync::mpsc::Receiver>) { let mut mic: Option> = None; let mut decoder: Option = None;