fix(host/audio): mic pump — open handshake on Linux + rapid-death backoff
apple / swift (push) Successful in 1m8s
apple / screenshots (push) Successful in 5m18s
android / android (push) Successful in 3m21s
windows-host / package (push) Successful in 6m58s
ci / rust (push) Successful in 1m58s
ci / web (push) Successful in 50s
ci / docs-site (push) Successful in 1m1s
ci / bench (push) Successful in 4m49s
deb / build-publish (push) Successful in 4m37s
decky / build-publish (push) Successful in 14s
docker / build-push (--build-arg FEDORA_VERSION=44, ci, ci/fedora-rpm.Dockerfile, punktfunk-fedora44-rpm) (push) Successful in 6s
docker / build-push (., web/Dockerfile, punktfunk-web) (push) Successful in 5s
docker / build-push (ci, ci/fedora-rpm.Dockerfile, punktfunk-fedora-rpm) (push) Successful in 4s
docker / build-push (docs-site, docs-site/Dockerfile, punktfunk-docs) (push) Successful in 5s
docker / build-push (ci, ci/rust-ci.Dockerfile, punktfunk-rust-ci) (push) Successful in 2m17s
rpm / build-publish (bazzite, punktfunk-fedora-rpm) (push) Successful in 9m59s
docker / deploy-docs (push) Successful in 18s
rpm / build-publish (fedora-44, punktfunk-fedora44-rpm) (push) Successful in 9m34s

Found by a live boot-order test (host started before the user session's
PipeWire): PwMicSource::open returned Ok before the daemon connection was
attempted, so a PipeWire that wasn't running surfaced as an instantly-dead
instance instead of an open failure — and the pump churned
open→die→reopen at heartbeat rate (1 Hz "virtual mic ready" log spam)
instead of backing off.

- PwMicSource::open now has a bring-up handshake (mirrors the Windows
  backend): ready only after connect + stream connect succeed, so a
  down daemon is an open ERROR and the pump's backoff engages.
- The pump triages deaths: an instance that lived >= 5 s (a one-off
  daemon restart) reopens immediately with the backoff reset; one that
  died right after opening counts as a failed open and backs off
  (2 s → 60 s cap). New pump test rapid_death_backs_off.

Re-validated live: host started with PipeWire stopped → throttled
"unavailable" warns, zero churn; daemon started → mic node up on the
next retry; exactly one pump + one loop thread (no leak).

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
2026-07-03 20:58:06 +00:00
parent 2c7ded0f3c
commit c7630ff5dc
2 changed files with 319 additions and 238 deletions
+80 -22
View File
@@ -128,6 +128,10 @@ struct PumpTuning {
/// An uplink gap longer than this discards the backend's buffered audio before pushing the
/// next frame (a recorder must never hear a stale burst from before a mute/session end).
stale_gap: std::time::Duration,
/// A backend that dies before living this long counts as a FAILED open for backoff purposes
/// (an open that succeeds but dies instantly — e.g. a flapping daemon — must not churn at
/// heartbeat rate); one that lived longer resets the backoff.
stable_after: std::time::Duration,
}
const PUMP_TUNING: PumpTuning = PumpTuning {
@@ -135,6 +139,7 @@ const PUMP_TUNING: PumpTuning = PumpTuning {
backoff_cap: std::time::Duration::from_secs(60),
heartbeat: std::time::Duration::from_secs(1),
stale_gap: std::time::Duration::from_millis(600),
stable_after: std::time::Duration::from_secs(5),
};
/// Host-lifetime virtual-microphone pump: one thread owns the [`VirtualMic`] backend + an Opus
@@ -188,6 +193,26 @@ impl MicPump {
}
}
/// Sleep for `dur` while draining (and dropping) queued frames, so a closed/reopening backend
/// never accumulates a stale backlog and senders never see a wedged queue. Returns `false` when
/// every sender is gone (host shutdown).
#[cfg_attr(not(any(target_os = "linux", target_os = "windows")), allow(dead_code))]
fn drain_sleep(rx: &std::sync::mpsc::Receiver<Vec<u8>>, dur: std::time::Duration) -> bool {
use std::sync::mpsc::RecvTimeoutError;
let deadline = std::time::Instant::now() + dur;
loop {
let left = deadline.saturating_duration_since(std::time::Instant::now());
if left.is_zero() {
return true;
}
match rx.recv_timeout(left.min(std::time::Duration::from_millis(250))) {
Ok(_) => {} // drop frames while closed
Err(RecvTimeoutError::Timeout) => {} // keep waiting
Err(RecvTimeoutError::Disconnected) => return false, // host shutdown
}
}
}
/// The pump loop. `opener` is injected so the tests can run the REAL loop against a mock
/// backend; production passes [`open_virtual_mic`].
#[cfg_attr(not(any(target_os = "linux", target_os = "windows")), allow(dead_code))]
@@ -200,9 +225,8 @@ where
let mut backoff = tuning.backoff_start;
let mut open_fails: u64 = 0;
'reopen: loop {
// Open phase — eager, from thread start. While closed, keep draining the queue so a
// reopen never replays a backlog of stale frames (and senders never see a wedged queue).
loop {
// Open phase — eager, from thread start.
let (mic, mut decoder) = loop {
let opened = opener().and_then(|m| {
let d = opus::Decoder::new(SAMPLE_RATE, opus::Channels::Stereo)
@@ -219,28 +243,20 @@ where
tracing::warn!(error = %format!("{e:#}"), attempts = open_fails,
"virtual mic unavailable — retrying with backoff");
}
let deadline = Instant::now() + backoff;
loop {
let left = deadline.saturating_duration_since(Instant::now());
if left.is_zero() {
break;
}
match rx.recv_timeout(left.min(std::time::Duration::from_millis(250))) {
Ok(_) => {} // drop frames while closed
Err(RecvTimeoutError::Timeout) => {} // keep waiting
Err(RecvTimeoutError::Disconnected) => return, // host shutdown
}
if !drain_sleep(&rx, backoff) {
return;
}
backoff = (backoff * 2).min(tuning.backoff_cap);
}
}
};
backoff = tuning.backoff_start;
open_fails = 0;
tracing::info!("virtual mic ready (host-lifetime)");
// Drop anything queued while (re)opening — it predates the backend.
// Drop anything queued while (re)opening — it predates the backend. (The backoff does
// NOT reset here: only an instance that proves stable resets it — see the death triage.)
while rx.try_recv().is_ok() {}
let opened_at = Instant::now();
// Pump phase — runs until the backend dies (break) or the host shuts down (return).
let mut decode_fails: u64 = 0;
let mut pcm = vec![0f32; 5760 * MIC_CHANNELS as usize]; // up to 120 ms scratch
let mut last_push = Instant::now();
@@ -258,7 +274,7 @@ where
let total = (samples_per_ch * MIC_CHANNELS as usize).min(pcm.len());
if !mic.push(&pcm[..total]) {
tracing::warn!("virtual mic backend died — reopening");
continue 'reopen;
break;
}
last_push = Instant::now();
decode_fails = 0;
@@ -277,7 +293,7 @@ where
Err(RecvTimeoutError::Timeout) => {
if !mic.alive() {
tracing::warn!("virtual mic backend died while idle — reopening");
continue 'reopen;
break;
}
}
Err(RecvTimeoutError::Disconnected) => {
@@ -286,6 +302,21 @@ where
}
}
}
// Death triage: an instance that lived is a one-off (PipeWire/audio-engine restart) —
// reopen immediately with the backoff reset. One that died right after opening is a
// failed open in disguise (flapping daemon, endpoint racing away): back off like the
// open loop, or the pump would churn open→die→reopen at heartbeat rate.
if opened_at.elapsed() >= tuning.stable_after {
backoff = tuning.backoff_start;
open_fails = 0;
} else {
open_fails += 1;
if !drain_sleep(&rx, backoff) {
return;
}
backoff = (backoff * 2).min(tuning.backoff_cap);
}
}
}
@@ -343,8 +374,10 @@ mod pump_tests {
}
/// Run the REAL pump loop against mock backends; `fail_first` opens fail before the first
/// success (exercises the eager retry/backoff path).
fn start(fail_first: usize) -> Harness {
/// success (exercises the eager retry/backoff path). `dead_on_arrival` opens every instance
/// pre-killed (exercises the rapid-death churn guard). `stable_after` mirrors the tuning
/// field (ZERO = every death counts as stable → immediate reopen, keeping tests fast).
fn start_tuned(fail_first: usize, dead_on_arrival: bool, stable_after: Duration) -> Harness {
let (tx, rx) = std::sync::mpsc::sync_channel::<Vec<u8>>(MIC_QUEUE_CAP);
let opens = Arc::new(AtomicUsize::new(0));
let alive = Arc::new(Mutex::new(None::<Arc<AtomicBool>>));
@@ -361,6 +394,7 @@ mod pump_tests {
backoff_cap: Duration::from_millis(40),
heartbeat: Duration::from_millis(20),
stale_gap: Duration::from_millis(80),
stable_after,
};
let join = std::thread::spawn(move || {
pump_thread(
@@ -370,7 +404,7 @@ mod pump_tests {
if n < fail_first {
anyhow::bail!("backend not up yet (simulated)");
}
let a = Arc::new(AtomicBool::new(true));
let a = Arc::new(AtomicBool::new(!dead_on_arrival));
*alive2.lock().unwrap() = Some(a.clone());
Ok(Box::new(MockMic {
alive: a,
@@ -391,6 +425,10 @@ mod pump_tests {
}
}
fn start(fail_first: usize) -> Harness {
start_tuned(fail_first, false, Duration::ZERO)
}
fn wait_until(what: &str, mut cond: impl FnMut() -> bool) {
for _ in 0..200 {
if cond() {
@@ -471,6 +509,26 @@ mod pump_tests {
h.join.join().unwrap();
}
/// Instances that die immediately after opening must be retried with BACKOFF, not at
/// heartbeat rate — a flapping backend (daemon up but dropping us instantly) would
/// otherwise churn open→die→reopen every heartbeat forever.
#[test]
fn rapid_death_backs_off() {
// Every instance is dead on arrival; stability threshold high so each death counts
// as a failed open. Without the guard: ~1 reopen per heartbeat (20 ms) ≈ 25 opens in
// 500 ms. With backoff 10→20→40 (cap): ≈ 7.
let h = start_tuned(0, true, Duration::from_secs(10));
std::thread::sleep(Duration::from_millis(500));
let opens = h.opens.load(Ordering::SeqCst);
assert!(opens >= 2, "must keep retrying (got {opens})");
assert!(
opens <= 15,
"must back off, not churn per heartbeat (got {opens})"
);
drop(h.tx);
h.join.join().unwrap();
}
/// An uplink gap discards buffered-stale audio before the next frame plays.
#[test]
fn discards_after_gap() {
+26 -3
View File
@@ -147,24 +147,32 @@ impl PwMicSource {
let (quit_tx, quit_rx) = pipewire::channel::channel::<Terminate>();
let alive = Arc::new(AtomicBool::new(true));
let flush = Arc::new(AtomicBool::new(false));
// Bring-up handshake (mirrors the Windows backend): a PipeWire that isn't running (host
// service started before the user session) must surface as an open ERROR — engaging the
// pump's backoff — not as an instantly-dead instance the pump would churn on.
let (ready_tx, ready_rx) = sync_channel::<Result<()>>(1);
let (alive_t, flush_t) = (alive.clone(), flush.clone());
thread::Builder::new()
.name("punktfunk-pw-mic".into())
.spawn(move || {
if let Err(e) = mic_pw_thread(pcm_rx, quit_rx, channels, flush_t) {
if let Err(e) = mic_pw_thread(pcm_rx, quit_rx, channels, flush_t, ready_tx) {
tracing::error!(error = %format!("{e:#}"), "pipewire virtual-mic thread failed");
}
// Whether a clean quit or a daemon death: this instance is done — the pump reopens.
alive_t.store(false, Ordering::Release);
})
.context("spawn pipewire virtual-mic thread")?;
Ok(PwMicSource {
match ready_rx.recv_timeout(Duration::from_secs(5)) {
Ok(Ok(())) => Ok(PwMicSource {
pcm: pcm_tx,
channels,
quit: quit_tx,
alive,
flush,
})
}),
Ok(Err(e)) => Err(e),
Err(_) => Err(anyhow!("pipewire virtual-mic init timed out")),
}
}
}
@@ -224,12 +232,17 @@ fn mic_pw_thread(
quit_rx: pipewire::channel::Receiver<Terminate>,
channels: u32,
flush: Arc<AtomicBool>,
ready: std::sync::mpsc::SyncSender<Result<()>>,
) -> Result<()> {
use pipewire as pw;
use pw::{properties::properties, spa};
use spa::param::audio::{AudioFormat, AudioInfoRaw};
use spa::pod::Pod;
// The PipeWire objects are lifetime-chained (guards borrow the mainloop/core), so setup and
// the blocking run share one frame; the IIFE lets every setup `?` funnel through the ready
// handshake below (mirrors the Windows render_thread).
let result = (|| -> Result<()> {
crate::pwinit::ensure_init();
let mainloop = pw::main_loop::MainLoopRc::new(None).context("pw mic MainLoop")?;
let context = pw::context::ContextRc::new(&mainloop, None).context("pw mic Context")?;
@@ -452,9 +465,19 @@ fn mic_pw_thread(
)
.context("pw mic stream connect")?;
// Setup complete: the daemon connection and stream connect succeeded — report ready,
// then block until quit/death. (A PipeWire that isn't running never reaches this line;
// its connect error surfaces through the handshake as an OPEN failure, so the pump
// backs off instead of churning on instantly-dead instances.)
let _ = ready.send(Ok(()));
mainloop.run();
tracing::debug!("pipewire virtual-mic loop exited (source dropped)");
Ok(())
})();
if let Err(e) = &result {
let _ = ready.send(Err(anyhow!("{e:#}")));
}
result
}
fn pw_thread(