fix(host/audio): mic pump — open handshake on Linux + rapid-death backoff
apple / swift (push) Successful in 1m8s
apple / screenshots (push) Successful in 5m18s
android / android (push) Successful in 3m21s
windows-host / package (push) Successful in 6m58s
ci / rust (push) Successful in 1m58s
ci / web (push) Successful in 50s
ci / docs-site (push) Successful in 1m1s
ci / bench (push) Successful in 4m49s
deb / build-publish (push) Successful in 4m37s
decky / build-publish (push) Successful in 14s
docker / build-push (--build-arg FEDORA_VERSION=44, ci, ci/fedora-rpm.Dockerfile, punktfunk-fedora44-rpm) (push) Successful in 6s
docker / build-push (., web/Dockerfile, punktfunk-web) (push) Successful in 5s
docker / build-push (ci, ci/fedora-rpm.Dockerfile, punktfunk-fedora-rpm) (push) Successful in 4s
docker / build-push (docs-site, docs-site/Dockerfile, punktfunk-docs) (push) Successful in 5s
docker / build-push (ci, ci/rust-ci.Dockerfile, punktfunk-rust-ci) (push) Successful in 2m17s
rpm / build-publish (bazzite, punktfunk-fedora-rpm) (push) Successful in 9m59s
docker / deploy-docs (push) Successful in 18s
rpm / build-publish (fedora-44, punktfunk-fedora44-rpm) (push) Successful in 9m34s

Found by a live boot-order test (host started before the user session's
PipeWire): PwMicSource::open returned Ok before the daemon connection was
attempted, so a PipeWire that wasn't running surfaced as an instantly-dead
instance instead of an open failure — and the pump churned
open→die→reopen at heartbeat rate (1 Hz "virtual mic ready" log spam)
instead of backing off.

- PwMicSource::open now has a bring-up handshake (mirrors the Windows
  backend): ready only after connect + stream connect succeed, so a
  down daemon is an open ERROR and the pump's backoff engages.
- The pump triages deaths: an instance that lived >= 5 s (a one-off
  daemon restart) reopens immediately with the backoff reset; one that
  died right after opening counts as a failed open and backs off
  (2 s → 60 s cap). New pump test rapid_death_backs_off.

Re-validated live: host started with PipeWire stopped → throttled
"unavailable" warns, zero churn; daemon started → mic node up on the
next retry; exactly one pump + one loop thread (no leak).

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
2026-07-03 20:58:06 +00:00
parent 2c7ded0f3c
commit c7630ff5dc
2 changed files with 319 additions and 238 deletions
+80 -22
View File
@@ -128,6 +128,10 @@ struct PumpTuning {
/// An uplink gap longer than this discards the backend's buffered audio before pushing the
/// next frame (a recorder must never hear a stale burst from before a mute/session end).
stale_gap: std::time::Duration,
/// A backend that dies before living this long counts as a FAILED open for backoff purposes
/// (an open that succeeds but dies instantly — e.g. a flapping daemon — must not churn at
/// heartbeat rate); one that lived longer resets the backoff.
stable_after: std::time::Duration,
}
const PUMP_TUNING: PumpTuning = PumpTuning {
@@ -135,6 +139,7 @@ const PUMP_TUNING: PumpTuning = PumpTuning {
backoff_cap: std::time::Duration::from_secs(60),
heartbeat: std::time::Duration::from_secs(1),
stale_gap: std::time::Duration::from_millis(600),
stable_after: std::time::Duration::from_secs(5),
};
/// Host-lifetime virtual-microphone pump: one thread owns the [`VirtualMic`] backend + an Opus
@@ -188,6 +193,26 @@ impl MicPump {
}
}
/// Sleep for `dur` while draining (and dropping) queued frames, so a closed/reopening backend
/// never accumulates a stale backlog and senders never see a wedged queue. Returns `false` when
/// every sender is gone (host shutdown).
#[cfg_attr(not(any(target_os = "linux", target_os = "windows")), allow(dead_code))]
fn drain_sleep(rx: &std::sync::mpsc::Receiver<Vec<u8>>, dur: std::time::Duration) -> bool {
use std::sync::mpsc::RecvTimeoutError;
let deadline = std::time::Instant::now() + dur;
loop {
let left = deadline.saturating_duration_since(std::time::Instant::now());
if left.is_zero() {
return true;
}
match rx.recv_timeout(left.min(std::time::Duration::from_millis(250))) {
Ok(_) => {} // drop frames while closed
Err(RecvTimeoutError::Timeout) => {} // keep waiting
Err(RecvTimeoutError::Disconnected) => return false, // host shutdown
}
}
}
/// The pump loop. `opener` is injected so the tests can run the REAL loop against a mock
/// backend; production passes [`open_virtual_mic`].
#[cfg_attr(not(any(target_os = "linux", target_os = "windows")), allow(dead_code))]
@@ -200,9 +225,8 @@ where
let mut backoff = tuning.backoff_start;
let mut open_fails: u64 = 0;
'reopen: loop {
// Open phase — eager, from thread start. While closed, keep draining the queue so a
// reopen never replays a backlog of stale frames (and senders never see a wedged queue).
loop {
// Open phase — eager, from thread start.
let (mic, mut decoder) = loop {
let opened = opener().and_then(|m| {
let d = opus::Decoder::new(SAMPLE_RATE, opus::Channels::Stereo)
@@ -219,28 +243,20 @@ where
tracing::warn!(error = %format!("{e:#}"), attempts = open_fails,
"virtual mic unavailable — retrying with backoff");
}
let deadline = Instant::now() + backoff;
loop {
let left = deadline.saturating_duration_since(Instant::now());
if left.is_zero() {
break;
}
match rx.recv_timeout(left.min(std::time::Duration::from_millis(250))) {
Ok(_) => {} // drop frames while closed
Err(RecvTimeoutError::Timeout) => {} // keep waiting
Err(RecvTimeoutError::Disconnected) => return, // host shutdown
}
if !drain_sleep(&rx, backoff) {
return;
}
backoff = (backoff * 2).min(tuning.backoff_cap);
}
}
};
backoff = tuning.backoff_start;
open_fails = 0;
tracing::info!("virtual mic ready (host-lifetime)");
// Drop anything queued while (re)opening — it predates the backend.
// Drop anything queued while (re)opening — it predates the backend. (The backoff does
// NOT reset here: only an instance that proves stable resets it — see the death triage.)
while rx.try_recv().is_ok() {}
let opened_at = Instant::now();
// Pump phase — runs until the backend dies (break) or the host shuts down (return).
let mut decode_fails: u64 = 0;
let mut pcm = vec![0f32; 5760 * MIC_CHANNELS as usize]; // up to 120 ms scratch
let mut last_push = Instant::now();
@@ -258,7 +274,7 @@ where
let total = (samples_per_ch * MIC_CHANNELS as usize).min(pcm.len());
if !mic.push(&pcm[..total]) {
tracing::warn!("virtual mic backend died — reopening");
continue 'reopen;
break;
}
last_push = Instant::now();
decode_fails = 0;
@@ -277,7 +293,7 @@ where
Err(RecvTimeoutError::Timeout) => {
if !mic.alive() {
tracing::warn!("virtual mic backend died while idle — reopening");
continue 'reopen;
break;
}
}
Err(RecvTimeoutError::Disconnected) => {
@@ -286,6 +302,21 @@ where
}
}
}
// Death triage: an instance that lived is a one-off (PipeWire/audio-engine restart) —
// reopen immediately with the backoff reset. One that died right after opening is a
// failed open in disguise (flapping daemon, endpoint racing away): back off like the
// open loop, or the pump would churn open→die→reopen at heartbeat rate.
if opened_at.elapsed() >= tuning.stable_after {
backoff = tuning.backoff_start;
open_fails = 0;
} else {
open_fails += 1;
if !drain_sleep(&rx, backoff) {
return;
}
backoff = (backoff * 2).min(tuning.backoff_cap);
}
}
}
@@ -343,8 +374,10 @@ mod pump_tests {
}
/// Run the REAL pump loop against mock backends; `fail_first` opens fail before the first
/// success (exercises the eager retry/backoff path).
fn start(fail_first: usize) -> Harness {
/// success (exercises the eager retry/backoff path). `dead_on_arrival` opens every instance
/// pre-killed (exercises the rapid-death churn guard). `stable_after` mirrors the tuning
/// field (ZERO = every death counts as stable → immediate reopen, keeping tests fast).
fn start_tuned(fail_first: usize, dead_on_arrival: bool, stable_after: Duration) -> Harness {
let (tx, rx) = std::sync::mpsc::sync_channel::<Vec<u8>>(MIC_QUEUE_CAP);
let opens = Arc::new(AtomicUsize::new(0));
let alive = Arc::new(Mutex::new(None::<Arc<AtomicBool>>));
@@ -361,6 +394,7 @@ mod pump_tests {
backoff_cap: Duration::from_millis(40),
heartbeat: Duration::from_millis(20),
stale_gap: Duration::from_millis(80),
stable_after,
};
let join = std::thread::spawn(move || {
pump_thread(
@@ -370,7 +404,7 @@ mod pump_tests {
if n < fail_first {
anyhow::bail!("backend not up yet (simulated)");
}
let a = Arc::new(AtomicBool::new(true));
let a = Arc::new(AtomicBool::new(!dead_on_arrival));
*alive2.lock().unwrap() = Some(a.clone());
Ok(Box::new(MockMic {
alive: a,
@@ -391,6 +425,10 @@ mod pump_tests {
}
}
fn start(fail_first: usize) -> Harness {
start_tuned(fail_first, false, Duration::ZERO)
}
fn wait_until(what: &str, mut cond: impl FnMut() -> bool) {
for _ in 0..200 {
if cond() {
@@ -471,6 +509,26 @@ mod pump_tests {
h.join.join().unwrap();
}
/// Instances that die immediately after opening must be retried with BACKOFF, not at
/// heartbeat rate — a flapping backend (daemon up but dropping us instantly) would
/// otherwise churn open→die→reopen every heartbeat forever.
#[test]
fn rapid_death_backs_off() {
// Every instance is dead on arrival; stability threshold high so each death counts
// as a failed open. Without the guard: ~1 reopen per heartbeat (20 ms) ≈ 25 opens in
// 500 ms. With backoff 10→20→40 (cap): ≈ 7.
let h = start_tuned(0, true, Duration::from_secs(10));
std::thread::sleep(Duration::from_millis(500));
let opens = h.opens.load(Ordering::SeqCst);
assert!(opens >= 2, "must keep retrying (got {opens})");
assert!(
opens <= 15,
"must back off, not churn per heartbeat (got {opens})"
);
drop(h.tx);
h.join.join().unwrap();
}
/// An uplink gap discards buffered-stale audio before the next frame plays.
#[test]
fn discards_after_gap() {