Files
punktfunk/crates/punktfunk-host/src/audio/linux/mod.rs
T
enricobuehler 3947d5b07a
apple / swift (push) Successful in 1m1s
ci / rust (push) Successful in 4m36s
ci / web (push) Successful in 48s
ci / docs-site (push) Successful in 55s
apple / screenshots (push) Successful in 5m9s
ci / bench (push) Successful in 4m36s
windows-host / package (push) Successful in 7m8s
windows-msix / package (arm64, C:\Users\Public\ffmpeg-arm64, aarch64-pc-windows-msvc, C:\t-a64) (push) Successful in 1m19s
release / apple (push) Successful in 9m52s
windows-msix / package (x64, C:\Users\Public\ffmpeg, x86_64-pc-windows-msvc, C:\t) (push) Successful in 1m16s
android / android (push) Successful in 3m21s
decky / build-publish (push) Successful in 11s
deb / build-publish (push) Successful in 2m45s
flatpak / build-publish (push) Successful in 4m11s
rpm / build-publish (bazzite, punktfunk-fedora-rpm) (push) Successful in 8m48s
rpm / build-publish (fedora-44, punktfunk-fedora44-rpm) (push) Successful in 8m35s
docker / deploy-docs (push) Successful in 18s
docker / build-push (ci, ci/rust-ci.Dockerfile, punktfunk-rust-ci) (push) Successful in 5s
docker / build-push (docs-site, docs-site/Dockerfile, punktfunk-docs) (push) Successful in 4s
docker / build-push (., web/Dockerfile, punktfunk-web) (push) Successful in 5s
docker / build-push (ci, ci/fedora-rpm.Dockerfile, punktfunk-fedora-rpm) (push) Successful in 5s
docker / build-push (--build-arg FEDORA_VERSION=44, ci, ci/fedora-rpm.Dockerfile, punktfunk-fedora44-rpm) (push) Successful in 8s
fix(host/audio): drive the Linux virtual mic with RT_PROCESS (was silent)
The punktfunk-mic PipeWire source connected without RT_PROCESS, so it ran as an
async/main-loop node. In the host's busy multi-stream graph (desktop audio + video
capture + the session) it never acquired a driver, stayed suspended, and its
process() callback never fired — every recorder reading the remote mic heard pure
silence (the long-standing "Linux host mic broken"). Connect the mic stream with
RT_PROCESS so it is a synchronous node that joins its consumer's driver group and
is actually driven.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-28 12:46:06 +00:00

482 lines
19 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
//! PipeWire audio capture of the default sink's monitor (system output).
//!
//! Connects to the user's PipeWire daemon (via `XDG_RUNTIME_DIR`, inherited from the Sway
//! session) and opens an input stream with `stream.capture.sink=true`, which routes the
//! default sink's monitor into us — no portal needed (unlike screen capture). The (`!Send`)
//! MainLoop/Stream live on a dedicated thread; interleaved `f32` chunks leave over a bounded
//! channel (dropped if the encoder falls behind, never blocking the PipeWire loop).
//!
//! The stream is opened at the *session's* channel count (2/6/8). If the sink has fewer
//! channels than requested, PipeWire's channel-mixer fills the extra positions with silence
//! (zero upmix), so a stereo desktop still produces a valid 5.1/7.1 capture. Dropping the
//! capturer quits the loop thread (via a `pipewire::channel` Terminate message), tearing the
//! stream down promptly — required so a surround session can replace a stereo capturer
//! without leaking a PipeWire consumer (see CLAUDE.md: a wedged link head-blocks the daemon).
use super::{AudioCapturer, VirtualMic, SAMPLE_RATE};
use anyhow::{anyhow, Context, Result};
use std::collections::VecDeque;
use std::sync::mpsc::{sync_channel, Receiver, RecvTimeoutError};
use std::thread;
use std::time::Duration;
/// Message asking the PipeWire loop thread to quit (sent from `Drop`).
struct Terminate;
pub struct PwAudioCapturer {
chunks: Receiver<Vec<f32>>,
channels: u32,
quit: pipewire::channel::Sender<Terminate>,
}
impl PwAudioCapturer {
pub fn open(channels: u32) -> Result<PwAudioCapturer> {
anyhow::ensure!(
matches!(channels, 1 | 2 | 6 | 8),
"unsupported audio channel count {channels} (want 2, 6 or 8)"
);
let (tx, rx) = sync_channel::<Vec<f32>>(64);
let (quit_tx, quit_rx) = pipewire::channel::channel::<Terminate>();
thread::Builder::new()
.name("punktfunk-pw-audio".into())
.spawn(move || {
if let Err(e) = pw_thread(tx, quit_rx, channels) {
tracing::error!(error = %format!("{e:#}"), "pipewire audio thread failed");
}
})
.context("spawn pipewire audio thread")?;
Ok(PwAudioCapturer {
chunks: rx,
channels,
quit: quit_tx,
})
}
}
impl Drop for PwAudioCapturer {
fn drop(&mut self) {
// Ask the loop thread to quit; the stream/core/loop unwind there (RAII). A failed
// send means the thread already exited — nothing to tear down.
let _ = self.quit.send(Terminate);
}
}
impl AudioCapturer for PwAudioCapturer {
fn next_chunk(&mut self) -> Result<Vec<f32>> {
match self.chunks.recv_timeout(Duration::from_secs(5)) {
Ok(c) => Ok(c),
// A quiet sink (paused game, idle desktop) is NOT a failure — return an empty chunk so the
// caller keeps the capturer alive. Only a dead capture thread is an Err (→ caller reopens).
Err(RecvTimeoutError::Timeout) => Ok(Vec::new()),
Err(RecvTimeoutError::Disconnected) => Err(anyhow!("pipewire audio thread ended")),
}
}
fn channels(&self) -> u32 {
self.channels
}
fn drain(&mut self) {
while self.chunks.try_recv().is_ok() {}
}
}
/// SPA channel position array for the GameStream surround order FL FR FC LFE RL RR [SL SR]
/// (= the PipeWire/PulseAudio default map for 6/8 channels, and the order Moonlight's
/// renderers expect — moonlight-common-c: "we use FL FR C LFE RL RR SL SR"). Values are
/// `enum spa_audio_channel` (spa/param/audio/raw.h): FL=3 FR=4 FC=5 LFE=6 SL=7 SR=8 RL=12
/// RR=13.
fn spa_positions(channels: u32) -> [u32; 64] {
const FL: u32 = 3;
const FR: u32 = 4;
const FC: u32 = 5;
const LFE: u32 = 6;
const SL: u32 = 7;
const SR: u32 = 8;
const RL: u32 = 12;
const RR: u32 = 13;
const MONO: u32 = 2;
let mut pos = [0u32; 64];
let order: &[u32] = match channels {
1 => &[MONO],
2 => &[FL, FR],
6 => &[FL, FR, FC, LFE, RL, RR],
8 => &[FL, FR, FC, LFE, RL, RR, SL, SR],
_ => unreachable!("validated in open()"),
};
pos[..order.len()].copy_from_slice(order);
pos
}
/// Virtual microphone: a PipeWire `Audio/Source` node host apps can record from. The host pushes
/// decoded client-mic PCM in; the loop thread's producer callback drains it (silence on
/// underrun) into PipeWire buffers. Mirrors [`PwAudioCapturer`] but inverted (Direction::Output).
pub struct PwMicSource {
pcm: std::sync::mpsc::SyncSender<Vec<f32>>,
channels: u32,
quit: pipewire::channel::Sender<Terminate>,
}
impl PwMicSource {
pub fn open(channels: u32) -> Result<PwMicSource> {
anyhow::ensure!(
matches!(channels, 1 | 2),
"virtual mic supports 1 or 2 channels, got {channels}"
);
let (pcm_tx, pcm_rx) = sync_channel::<Vec<f32>>(64);
let (quit_tx, quit_rx) = pipewire::channel::channel::<Terminate>();
thread::Builder::new()
.name("punktfunk-pw-mic".into())
.spawn(move || {
if let Err(e) = mic_pw_thread(pcm_rx, quit_rx, channels) {
tracing::error!(error = %format!("{e:#}"), "pipewire virtual-mic thread failed");
}
})
.context("spawn pipewire virtual-mic thread")?;
Ok(PwMicSource {
pcm: pcm_tx,
channels,
quit: quit_tx,
})
}
}
impl Drop for PwMicSource {
fn drop(&mut self) {
let _ = self.quit.send(Terminate);
}
}
impl VirtualMic for PwMicSource {
fn push(&self, pcm: &[f32]) {
let _ = self.pcm.try_send(pcm.to_vec()); // drop if the PipeWire side is behind
}
fn channels(&self) -> u32 {
self.channels
}
}
/// Producer-side state for the virtual-mic loop: incoming decoded PCM and a small ring buffer
/// the process callback drains into PipeWire buffers (capped, so latency stays bounded).
/// `primed` is a jitter buffer gate — see the process callback.
struct MicUserData {
rx: Receiver<Vec<f32>>,
ring: VecDeque<f32>,
channels: usize,
primed: bool,
}
fn mic_pw_thread(
pcm_rx: Receiver<Vec<f32>>,
quit_rx: pipewire::channel::Receiver<Terminate>,
channels: u32,
) -> Result<()> {
use pipewire as pw;
use pw::{properties::properties, spa};
use spa::param::audio::{AudioFormat, AudioInfoRaw};
use spa::pod::Pod;
crate::pwinit::ensure_init();
let mainloop = pw::main_loop::MainLoopRc::new(None).context("pw mic MainLoop")?;
let context = pw::context::ContextRc::new(&mainloop, None).context("pw mic Context")?;
let core = context
.connect_rc(None)
.context("pw mic connect (is PipeWire running in this session?)")?;
let _quit_guard = quit_rx.attach(mainloop.loop_(), {
let mainloop = mainloop.clone();
move |_| mainloop.quit()
});
// media.class=Audio/Source advertises us as a microphone (a recordable source), NOT a
// playback stream — without it, Direction::Output + Playback would route to the speakers.
let stream = pw::stream::StreamBox::new(
&core,
"punktfunk-mic",
properties! {
*pw::keys::MEDIA_TYPE => "Audio",
*pw::keys::MEDIA_CLASS => "Audio/Source",
*pw::keys::NODE_NAME => "punktfunk-mic",
*pw::keys::NODE_DESCRIPTION => "Punktfunk Remote Microphone",
// ~5 ms quantum (one Opus frame) so recording apps get smooth low-latency chunks.
*pw::keys::NODE_LATENCY => "240/48000",
},
)
.context("pw mic Stream")?;
let ud = MicUserData {
rx: pcm_rx,
ring: VecDeque::new(),
channels: channels as usize,
primed: false,
};
let _listener = stream
.add_local_listener_with_user_data(ud)
.state_changed(|_s, _ud, old, new| {
tracing::info!(?old, ?new, "pipewire virtual-mic stream state");
})
.param_changed(|_s, _ud, id, param| {
let Some(param) = param else { return };
if id != pw::spa::param::ParamType::Format.as_raw() {
return;
}
let mut info = AudioInfoRaw::default();
if info.parse(param).is_ok() {
tracing::info!(
format = ?info.format(),
rate = info.rate(),
channels = info.channels(),
"virtual-mic format negotiated"
);
}
})
.process(|stream, ud| {
let outcome = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
let Some(mut buffer) = stream.dequeue_buffer() else {
return;
};
// Pull all newly-decoded PCM into the ring.
while let Ok(frame) = ud.rx.try_recv() {
ud.ring.extend(frame);
}
let stride = 4 * ud.channels; // F32LE interleaved
let datas = buffer.datas_mut();
if datas.is_empty() {
return;
}
let data = &mut datas[0];
let want_frames = data.data().map(|s| s.len() / stride).unwrap_or(0);
let want = want_frames * ud.channels; // interleaved samples this quantum needs
static FIRST: std::sync::atomic::AtomicBool =
std::sync::atomic::AtomicBool::new(true);
if FIRST.swap(false, std::sync::atomic::Ordering::Relaxed) {
tracing::info!(
quantum_frames = want_frames,
quantum_ms = want_frames as f32 / 48.0,
"virtual-mic consumer connected"
);
}
// Adaptive jitter buffer. The client pushes 5 ms frames; the recorder pulls a
// whole *quantum* (often 2043 ms) from an independent clock. A drain of one
// quantum must not outrun what's buffered, or every call underruns to silence
// (the original ~58% gaps). So prime to ~3 quanta before producing, hold there,
// and re-prime only after a genuine full drain (the client went quiet). The ring
// is capped at a few quanta so latency stays bounded.
let target = (3 * want).clamp(720 * ud.channels, 9600 * ud.channels);
while ud.ring.len() > target.max(want) + want {
ud.ring.pop_front(); // bound latency: drop the oldest beyond ~1 quantum slack
}
if !ud.primed && ud.ring.len() >= target {
ud.primed = true;
}
let n_frames = if let Some(slice) = data.data() {
for k in 0..want {
let s = if ud.primed {
ud.ring.pop_front().unwrap_or(0.0) // silence on a momentary underrun
} else {
0.0 // not yet primed — emit silence while the buffer fills
};
let off = k * 4;
slice[off..off + 4].copy_from_slice(&s.to_le_bytes());
}
want_frames
} else {
0
};
if ud.ring.is_empty() {
ud.primed = false; // fully drained — re-prime before producing again
}
let chunk = data.chunk_mut();
*chunk.offset_mut() = 0;
*chunk.stride_mut() = stride as _;
*chunk.size_mut() = (stride * n_frames) as _;
}));
if outcome.is_err() {
tracing::error!("panic in pipewire virtual-mic callback");
}
})
.register()
.context("register virtual-mic stream listener")?;
let mut info = AudioInfoRaw::new();
info.set_format(AudioFormat::F32LE);
info.set_rate(SAMPLE_RATE);
info.set_channels(channels);
info.set_position(spa_positions(channels));
let obj = pw::spa::pod::Object {
type_: pw::spa::utils::SpaTypes::ObjectParamFormat.as_raw(),
id: pw::spa::param::ParamType::EnumFormat.as_raw(),
properties: info.into(),
};
let values: Vec<u8> = pw::spa::pod::serialize::PodSerializer::serialize(
std::io::Cursor::new(Vec::new()),
&pw::spa::pod::Value::Object(obj),
)
.context("serialize mic format pod")?
.0
.into_inner();
let mut params = [Pod::from_bytes(&values).context("mic pod from bytes")?];
// RT_PROCESS: run the producer callback on PipeWire's realtime data loop, so the source is a
// *synchronous* graph node that joins its consumer's driver group and is actually driven. Without
// it the node is async/main-loop and, in the host's busy multi-stream graph (desktop-audio +
// video capture + the session), never acquires a driver — it stays suspended and its process()
// never fires, so every recorder hears pure silence (the long-standing "Linux host mic broken").
stream
.connect(
spa::utils::Direction::Output, // we PRODUCE samples (a source)
None,
pw::stream::StreamFlags::AUTOCONNECT
| pw::stream::StreamFlags::MAP_BUFFERS
| pw::stream::StreamFlags::RT_PROCESS,
&mut params,
)
.context("pw mic stream connect")?;
mainloop.run();
tracing::debug!("pipewire virtual-mic loop exited (source dropped)");
Ok(())
}
fn pw_thread(
tx: std::sync::mpsc::SyncSender<Vec<f32>>,
quit_rx: pipewire::channel::Receiver<Terminate>,
channels: u32,
) -> Result<()> {
use pipewire as pw;
use pw::{properties::properties, spa};
use spa::param::audio::{AudioFormat, AudioInfoRaw};
use spa::pod::Pod;
crate::pwinit::ensure_init();
let mainloop = pw::main_loop::MainLoopRc::new(None).context("pw audio MainLoop")?;
let context = pw::context::ContextRc::new(&mainloop, None).context("pw audio Context")?;
let core = context
.connect_rc(None)
.context("pw audio connect (is PipeWire running in this session?)")?;
// Cross-thread teardown: the capturer's Drop sends Terminate; quit the loop here.
let _quit_guard = quit_rx.attach(mainloop.loop_(), {
let mainloop = mainloop.clone();
move |_| mainloop.quit()
});
let stream = pw::stream::StreamBox::new(
&core,
"punktfunk-audio",
properties! {
*pw::keys::MEDIA_TYPE => "Audio",
*pw::keys::MEDIA_CATEGORY => "Capture",
*pw::keys::MEDIA_ROLE => "Music",
// Capture the default sink's monitor (system output), not a microphone.
*pw::keys::STREAM_CAPTURE_SINK => "true",
// Ask for a ~5ms quantum (= one Opus frame) so buffers arrive smoothly rather than
// in large bursts the client's low-latency jitter buffer would hear as glitching.
*pw::keys::NODE_LATENCY => "240/48000",
},
)
.context("pw audio Stream")?;
let _listener = stream
.add_local_listener_with_user_data(tx)
.state_changed(|_s, _ud, old, new| {
tracing::info!(?old, ?new, "pipewire audio stream state");
})
.param_changed(|_stream, _tx, id, param| {
let Some(param) = param else { return };
if id != pw::spa::param::ParamType::Format.as_raw() {
return;
}
let mut info = AudioInfoRaw::default();
if info.parse(param).is_ok() {
tracing::info!(
format = ?info.format(),
rate = info.rate(),
channels = info.channels(),
"audio format negotiated"
);
}
})
.process(|stream, tx| {
let outcome = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
let Some(mut buffer) = stream.dequeue_buffer() else {
return;
};
let datas = buffer.datas_mut();
if datas.is_empty() {
return;
}
let d = &mut datas[0];
let (offset, size) = {
let c = d.chunk();
(c.offset() as usize, c.size() as usize)
};
let Some(buf) = d.data() else { return };
if offset > buf.len() {
return;
}
let region = &buf[offset..(offset + size).min(buf.len())];
// Negotiated as F32LE; reinterpret the byte region as interleaved f32.
let n = region.len() / 4;
static FIRST: std::sync::atomic::AtomicBool =
std::sync::atomic::AtomicBool::new(true);
if FIRST.swap(false, std::sync::atomic::Ordering::Relaxed) {
tracing::info!(samples = n, "audio first capture buffer");
}
let mut samples = Vec::with_capacity(n);
for i in 0..n {
let b = [
region[i * 4],
region[i * 4 + 1],
region[i * 4 + 2],
region[i * 4 + 3],
];
samples.push(f32::from_le_bytes(b));
}
let _ = tx.try_send(samples); // drop if the encoder is behind
}));
if outcome.is_err() {
tracing::error!("panic in pipewire audio callback — chunk dropped");
}
})
.register()
.context("register audio stream listener")?;
// Request F32LE, 48 kHz, at the session's channel count with explicit positions —
// PipeWire's channel-mixer up/downmixes the sink monitor to this layout.
let mut info = AudioInfoRaw::new();
info.set_format(AudioFormat::F32LE);
info.set_rate(SAMPLE_RATE);
info.set_channels(channels);
info.set_position(spa_positions(channels));
let obj = pw::spa::pod::Object {
type_: pw::spa::utils::SpaTypes::ObjectParamFormat.as_raw(),
id: pw::spa::param::ParamType::EnumFormat.as_raw(),
properties: info.into(),
};
let values: Vec<u8> = pw::spa::pod::serialize::PodSerializer::serialize(
std::io::Cursor::new(Vec::new()),
&pw::spa::pod::Value::Object(obj),
)
.context("serialize audio format pod")?
.0
.into_inner();
let mut params = [Pod::from_bytes(&values).context("audio pod from bytes")?];
stream
.connect(
spa::utils::Direction::Input,
None, // PW_ID_ANY — autoconnect to the default sink monitor
pw::stream::StreamFlags::AUTOCONNECT | pw::stream::StreamFlags::MAP_BUFFERS,
&mut params,
)
.context("pw audio stream connect")?;
mainloop.run();
tracing::debug!("pipewire audio loop exited (capturer dropped)");
Ok(())
}