fix(host/audio): mic pump — open handshake on Linux + rapid-death backoff
apple / swift (push) Successful in 1m8s
apple / screenshots (push) Successful in 5m18s
android / android (push) Successful in 3m21s
windows-host / package (push) Successful in 6m58s
ci / rust (push) Successful in 1m58s
ci / web (push) Successful in 50s
ci / docs-site (push) Successful in 1m1s
ci / bench (push) Successful in 4m49s
deb / build-publish (push) Successful in 4m37s
decky / build-publish (push) Successful in 14s
docker / build-push (--build-arg FEDORA_VERSION=44, ci, ci/fedora-rpm.Dockerfile, punktfunk-fedora44-rpm) (push) Successful in 6s
docker / build-push (., web/Dockerfile, punktfunk-web) (push) Successful in 5s
docker / build-push (ci, ci/fedora-rpm.Dockerfile, punktfunk-fedora-rpm) (push) Successful in 4s
docker / build-push (docs-site, docs-site/Dockerfile, punktfunk-docs) (push) Successful in 5s
docker / build-push (ci, ci/rust-ci.Dockerfile, punktfunk-rust-ci) (push) Successful in 2m17s
rpm / build-publish (bazzite, punktfunk-fedora-rpm) (push) Successful in 9m59s
docker / deploy-docs (push) Successful in 18s
rpm / build-publish (fedora-44, punktfunk-fedora44-rpm) (push) Successful in 9m34s
apple / swift (push) Successful in 1m8s
apple / screenshots (push) Successful in 5m18s
android / android (push) Successful in 3m21s
windows-host / package (push) Successful in 6m58s
ci / rust (push) Successful in 1m58s
ci / web (push) Successful in 50s
ci / docs-site (push) Successful in 1m1s
ci / bench (push) Successful in 4m49s
deb / build-publish (push) Successful in 4m37s
decky / build-publish (push) Successful in 14s
docker / build-push (--build-arg FEDORA_VERSION=44, ci, ci/fedora-rpm.Dockerfile, punktfunk-fedora44-rpm) (push) Successful in 6s
docker / build-push (., web/Dockerfile, punktfunk-web) (push) Successful in 5s
docker / build-push (ci, ci/fedora-rpm.Dockerfile, punktfunk-fedora-rpm) (push) Successful in 4s
docker / build-push (docs-site, docs-site/Dockerfile, punktfunk-docs) (push) Successful in 5s
docker / build-push (ci, ci/rust-ci.Dockerfile, punktfunk-rust-ci) (push) Successful in 2m17s
rpm / build-publish (bazzite, punktfunk-fedora-rpm) (push) Successful in 9m59s
docker / deploy-docs (push) Successful in 18s
rpm / build-publish (fedora-44, punktfunk-fedora44-rpm) (push) Successful in 9m34s
Found by a live boot-order test (host started before the user session's PipeWire): PwMicSource::open returned Ok before the daemon connection was attempted, so a PipeWire that wasn't running surfaced as an instantly-dead instance instead of an open failure — and the pump churned open→die→reopen at heartbeat rate (1 Hz "virtual mic ready" log spam) instead of backing off. - PwMicSource::open now has a bring-up handshake (mirrors the Windows backend): ready only after connect + stream connect succeed, so a down daemon is an open ERROR and the pump's backoff engages. - The pump triages deaths: an instance that lived >= 5 s (a one-off daemon restart) reopens immediately with the backoff reset; one that died right after opening counts as a failed open and backs off (2 s → 60 s cap). New pump test rapid_death_backs_off. Re-validated live: host started with PipeWire stopped → throttled "unavailable" warns, zero churn; daemon started → mic node up on the next retry; exactly one pump + one loop thread (no leak). Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
@@ -128,6 +128,10 @@ struct PumpTuning {
|
|||||||
/// An uplink gap longer than this discards the backend's buffered audio before pushing the
|
/// An uplink gap longer than this discards the backend's buffered audio before pushing the
|
||||||
/// next frame (a recorder must never hear a stale burst from before a mute/session end).
|
/// next frame (a recorder must never hear a stale burst from before a mute/session end).
|
||||||
stale_gap: std::time::Duration,
|
stale_gap: std::time::Duration,
|
||||||
|
/// A backend that dies before living this long counts as a FAILED open for backoff purposes
|
||||||
|
/// (an open that succeeds but dies instantly — e.g. a flapping daemon — must not churn at
|
||||||
|
/// heartbeat rate); one that lived longer resets the backoff.
|
||||||
|
stable_after: std::time::Duration,
|
||||||
}
|
}
|
||||||
|
|
||||||
const PUMP_TUNING: PumpTuning = PumpTuning {
|
const PUMP_TUNING: PumpTuning = PumpTuning {
|
||||||
@@ -135,6 +139,7 @@ const PUMP_TUNING: PumpTuning = PumpTuning {
|
|||||||
backoff_cap: std::time::Duration::from_secs(60),
|
backoff_cap: std::time::Duration::from_secs(60),
|
||||||
heartbeat: std::time::Duration::from_secs(1),
|
heartbeat: std::time::Duration::from_secs(1),
|
||||||
stale_gap: std::time::Duration::from_millis(600),
|
stale_gap: std::time::Duration::from_millis(600),
|
||||||
|
stable_after: std::time::Duration::from_secs(5),
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Host-lifetime virtual-microphone pump: one thread owns the [`VirtualMic`] backend + an Opus
|
/// Host-lifetime virtual-microphone pump: one thread owns the [`VirtualMic`] backend + an Opus
|
||||||
@@ -188,6 +193,26 @@ impl MicPump {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Sleep for `dur` while draining (and dropping) queued frames, so a closed/reopening backend
|
||||||
|
/// never accumulates a stale backlog and senders never see a wedged queue. Returns `false` when
|
||||||
|
/// every sender is gone (host shutdown).
|
||||||
|
#[cfg_attr(not(any(target_os = "linux", target_os = "windows")), allow(dead_code))]
|
||||||
|
fn drain_sleep(rx: &std::sync::mpsc::Receiver<Vec<u8>>, dur: std::time::Duration) -> bool {
|
||||||
|
use std::sync::mpsc::RecvTimeoutError;
|
||||||
|
let deadline = std::time::Instant::now() + dur;
|
||||||
|
loop {
|
||||||
|
let left = deadline.saturating_duration_since(std::time::Instant::now());
|
||||||
|
if left.is_zero() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
match rx.recv_timeout(left.min(std::time::Duration::from_millis(250))) {
|
||||||
|
Ok(_) => {} // drop frames while closed
|
||||||
|
Err(RecvTimeoutError::Timeout) => {} // keep waiting
|
||||||
|
Err(RecvTimeoutError::Disconnected) => return false, // host shutdown
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// The pump loop. `opener` is injected so the tests can run the REAL loop against a mock
|
/// The pump loop. `opener` is injected so the tests can run the REAL loop against a mock
|
||||||
/// backend; production passes [`open_virtual_mic`].
|
/// backend; production passes [`open_virtual_mic`].
|
||||||
#[cfg_attr(not(any(target_os = "linux", target_os = "windows")), allow(dead_code))]
|
#[cfg_attr(not(any(target_os = "linux", target_os = "windows")), allow(dead_code))]
|
||||||
@@ -200,9 +225,8 @@ where
|
|||||||
|
|
||||||
let mut backoff = tuning.backoff_start;
|
let mut backoff = tuning.backoff_start;
|
||||||
let mut open_fails: u64 = 0;
|
let mut open_fails: u64 = 0;
|
||||||
'reopen: loop {
|
loop {
|
||||||
// Open phase — eager, from thread start. While closed, keep draining the queue so a
|
// Open phase — eager, from thread start.
|
||||||
// reopen never replays a backlog of stale frames (and senders never see a wedged queue).
|
|
||||||
let (mic, mut decoder) = loop {
|
let (mic, mut decoder) = loop {
|
||||||
let opened = opener().and_then(|m| {
|
let opened = opener().and_then(|m| {
|
||||||
let d = opus::Decoder::new(SAMPLE_RATE, opus::Channels::Stereo)
|
let d = opus::Decoder::new(SAMPLE_RATE, opus::Channels::Stereo)
|
||||||
@@ -219,28 +243,20 @@ where
|
|||||||
tracing::warn!(error = %format!("{e:#}"), attempts = open_fails,
|
tracing::warn!(error = %format!("{e:#}"), attempts = open_fails,
|
||||||
"virtual mic unavailable — retrying with backoff");
|
"virtual mic unavailable — retrying with backoff");
|
||||||
}
|
}
|
||||||
let deadline = Instant::now() + backoff;
|
if !drain_sleep(&rx, backoff) {
|
||||||
loop {
|
return;
|
||||||
let left = deadline.saturating_duration_since(Instant::now());
|
|
||||||
if left.is_zero() {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
match rx.recv_timeout(left.min(std::time::Duration::from_millis(250))) {
|
|
||||||
Ok(_) => {} // drop frames while closed
|
|
||||||
Err(RecvTimeoutError::Timeout) => {} // keep waiting
|
|
||||||
Err(RecvTimeoutError::Disconnected) => return, // host shutdown
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
backoff = (backoff * 2).min(tuning.backoff_cap);
|
backoff = (backoff * 2).min(tuning.backoff_cap);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
backoff = tuning.backoff_start;
|
|
||||||
open_fails = 0;
|
|
||||||
tracing::info!("virtual mic ready (host-lifetime)");
|
tracing::info!("virtual mic ready (host-lifetime)");
|
||||||
// Drop anything queued while (re)opening — it predates the backend.
|
// Drop anything queued while (re)opening — it predates the backend. (The backoff does
|
||||||
|
// NOT reset here: only an instance that proves stable resets it — see the death triage.)
|
||||||
while rx.try_recv().is_ok() {}
|
while rx.try_recv().is_ok() {}
|
||||||
|
let opened_at = Instant::now();
|
||||||
|
|
||||||
|
// Pump phase — runs until the backend dies (break) or the host shuts down (return).
|
||||||
let mut decode_fails: u64 = 0;
|
let mut decode_fails: u64 = 0;
|
||||||
let mut pcm = vec![0f32; 5760 * MIC_CHANNELS as usize]; // up to 120 ms scratch
|
let mut pcm = vec![0f32; 5760 * MIC_CHANNELS as usize]; // up to 120 ms scratch
|
||||||
let mut last_push = Instant::now();
|
let mut last_push = Instant::now();
|
||||||
@@ -258,7 +274,7 @@ where
|
|||||||
let total = (samples_per_ch * MIC_CHANNELS as usize).min(pcm.len());
|
let total = (samples_per_ch * MIC_CHANNELS as usize).min(pcm.len());
|
||||||
if !mic.push(&pcm[..total]) {
|
if !mic.push(&pcm[..total]) {
|
||||||
tracing::warn!("virtual mic backend died — reopening");
|
tracing::warn!("virtual mic backend died — reopening");
|
||||||
continue 'reopen;
|
break;
|
||||||
}
|
}
|
||||||
last_push = Instant::now();
|
last_push = Instant::now();
|
||||||
decode_fails = 0;
|
decode_fails = 0;
|
||||||
@@ -277,7 +293,7 @@ where
|
|||||||
Err(RecvTimeoutError::Timeout) => {
|
Err(RecvTimeoutError::Timeout) => {
|
||||||
if !mic.alive() {
|
if !mic.alive() {
|
||||||
tracing::warn!("virtual mic backend died while idle — reopening");
|
tracing::warn!("virtual mic backend died while idle — reopening");
|
||||||
continue 'reopen;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Err(RecvTimeoutError::Disconnected) => {
|
Err(RecvTimeoutError::Disconnected) => {
|
||||||
@@ -286,6 +302,21 @@ where
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Death triage: an instance that lived is a one-off (PipeWire/audio-engine restart) —
|
||||||
|
// reopen immediately with the backoff reset. One that died right after opening is a
|
||||||
|
// failed open in disguise (flapping daemon, endpoint racing away): back off like the
|
||||||
|
// open loop, or the pump would churn open→die→reopen at heartbeat rate.
|
||||||
|
if opened_at.elapsed() >= tuning.stable_after {
|
||||||
|
backoff = tuning.backoff_start;
|
||||||
|
open_fails = 0;
|
||||||
|
} else {
|
||||||
|
open_fails += 1;
|
||||||
|
if !drain_sleep(&rx, backoff) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
backoff = (backoff * 2).min(tuning.backoff_cap);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -343,8 +374,10 @@ mod pump_tests {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Run the REAL pump loop against mock backends; `fail_first` opens fail before the first
|
/// Run the REAL pump loop against mock backends; `fail_first` opens fail before the first
|
||||||
/// success (exercises the eager retry/backoff path).
|
/// success (exercises the eager retry/backoff path). `dead_on_arrival` opens every instance
|
||||||
fn start(fail_first: usize) -> Harness {
|
/// pre-killed (exercises the rapid-death churn guard). `stable_after` mirrors the tuning
|
||||||
|
/// field (ZERO = every death counts as stable → immediate reopen, keeping tests fast).
|
||||||
|
fn start_tuned(fail_first: usize, dead_on_arrival: bool, stable_after: Duration) -> Harness {
|
||||||
let (tx, rx) = std::sync::mpsc::sync_channel::<Vec<u8>>(MIC_QUEUE_CAP);
|
let (tx, rx) = std::sync::mpsc::sync_channel::<Vec<u8>>(MIC_QUEUE_CAP);
|
||||||
let opens = Arc::new(AtomicUsize::new(0));
|
let opens = Arc::new(AtomicUsize::new(0));
|
||||||
let alive = Arc::new(Mutex::new(None::<Arc<AtomicBool>>));
|
let alive = Arc::new(Mutex::new(None::<Arc<AtomicBool>>));
|
||||||
@@ -361,6 +394,7 @@ mod pump_tests {
|
|||||||
backoff_cap: Duration::from_millis(40),
|
backoff_cap: Duration::from_millis(40),
|
||||||
heartbeat: Duration::from_millis(20),
|
heartbeat: Duration::from_millis(20),
|
||||||
stale_gap: Duration::from_millis(80),
|
stale_gap: Duration::from_millis(80),
|
||||||
|
stable_after,
|
||||||
};
|
};
|
||||||
let join = std::thread::spawn(move || {
|
let join = std::thread::spawn(move || {
|
||||||
pump_thread(
|
pump_thread(
|
||||||
@@ -370,7 +404,7 @@ mod pump_tests {
|
|||||||
if n < fail_first {
|
if n < fail_first {
|
||||||
anyhow::bail!("backend not up yet (simulated)");
|
anyhow::bail!("backend not up yet (simulated)");
|
||||||
}
|
}
|
||||||
let a = Arc::new(AtomicBool::new(true));
|
let a = Arc::new(AtomicBool::new(!dead_on_arrival));
|
||||||
*alive2.lock().unwrap() = Some(a.clone());
|
*alive2.lock().unwrap() = Some(a.clone());
|
||||||
Ok(Box::new(MockMic {
|
Ok(Box::new(MockMic {
|
||||||
alive: a,
|
alive: a,
|
||||||
@@ -391,6 +425,10 @@ mod pump_tests {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn start(fail_first: usize) -> Harness {
|
||||||
|
start_tuned(fail_first, false, Duration::ZERO)
|
||||||
|
}
|
||||||
|
|
||||||
fn wait_until(what: &str, mut cond: impl FnMut() -> bool) {
|
fn wait_until(what: &str, mut cond: impl FnMut() -> bool) {
|
||||||
for _ in 0..200 {
|
for _ in 0..200 {
|
||||||
if cond() {
|
if cond() {
|
||||||
@@ -471,6 +509,26 @@ mod pump_tests {
|
|||||||
h.join.join().unwrap();
|
h.join.join().unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Instances that die immediately after opening must be retried with BACKOFF, not at
|
||||||
|
/// heartbeat rate — a flapping backend (daemon up but dropping us instantly) would
|
||||||
|
/// otherwise churn open→die→reopen every heartbeat forever.
|
||||||
|
#[test]
|
||||||
|
fn rapid_death_backs_off() {
|
||||||
|
// Every instance is dead on arrival; stability threshold high so each death counts
|
||||||
|
// as a failed open. Without the guard: ~1 reopen per heartbeat (20 ms) ≈ 25 opens in
|
||||||
|
// 500 ms. With backoff 10→20→40 (cap): ≈ 7.
|
||||||
|
let h = start_tuned(0, true, Duration::from_secs(10));
|
||||||
|
std::thread::sleep(Duration::from_millis(500));
|
||||||
|
let opens = h.opens.load(Ordering::SeqCst);
|
||||||
|
assert!(opens >= 2, "must keep retrying (got {opens})");
|
||||||
|
assert!(
|
||||||
|
opens <= 15,
|
||||||
|
"must back off, not churn per heartbeat (got {opens})"
|
||||||
|
);
|
||||||
|
drop(h.tx);
|
||||||
|
h.join.join().unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
/// An uplink gap discards buffered-stale audio before the next frame plays.
|
/// An uplink gap discards buffered-stale audio before the next frame plays.
|
||||||
#[test]
|
#[test]
|
||||||
fn discards_after_gap() {
|
fn discards_after_gap() {
|
||||||
|
|||||||
@@ -147,24 +147,32 @@ impl PwMicSource {
|
|||||||
let (quit_tx, quit_rx) = pipewire::channel::channel::<Terminate>();
|
let (quit_tx, quit_rx) = pipewire::channel::channel::<Terminate>();
|
||||||
let alive = Arc::new(AtomicBool::new(true));
|
let alive = Arc::new(AtomicBool::new(true));
|
||||||
let flush = Arc::new(AtomicBool::new(false));
|
let flush = Arc::new(AtomicBool::new(false));
|
||||||
|
// Bring-up handshake (mirrors the Windows backend): a PipeWire that isn't running (host
|
||||||
|
// service started before the user session) must surface as an open ERROR — engaging the
|
||||||
|
// pump's backoff — not as an instantly-dead instance the pump would churn on.
|
||||||
|
let (ready_tx, ready_rx) = sync_channel::<Result<()>>(1);
|
||||||
let (alive_t, flush_t) = (alive.clone(), flush.clone());
|
let (alive_t, flush_t) = (alive.clone(), flush.clone());
|
||||||
thread::Builder::new()
|
thread::Builder::new()
|
||||||
.name("punktfunk-pw-mic".into())
|
.name("punktfunk-pw-mic".into())
|
||||||
.spawn(move || {
|
.spawn(move || {
|
||||||
if let Err(e) = mic_pw_thread(pcm_rx, quit_rx, channels, flush_t) {
|
if let Err(e) = mic_pw_thread(pcm_rx, quit_rx, channels, flush_t, ready_tx) {
|
||||||
tracing::error!(error = %format!("{e:#}"), "pipewire virtual-mic thread failed");
|
tracing::error!(error = %format!("{e:#}"), "pipewire virtual-mic thread failed");
|
||||||
}
|
}
|
||||||
// Whether a clean quit or a daemon death: this instance is done — the pump reopens.
|
// Whether a clean quit or a daemon death: this instance is done — the pump reopens.
|
||||||
alive_t.store(false, Ordering::Release);
|
alive_t.store(false, Ordering::Release);
|
||||||
})
|
})
|
||||||
.context("spawn pipewire virtual-mic thread")?;
|
.context("spawn pipewire virtual-mic thread")?;
|
||||||
Ok(PwMicSource {
|
match ready_rx.recv_timeout(Duration::from_secs(5)) {
|
||||||
|
Ok(Ok(())) => Ok(PwMicSource {
|
||||||
pcm: pcm_tx,
|
pcm: pcm_tx,
|
||||||
channels,
|
channels,
|
||||||
quit: quit_tx,
|
quit: quit_tx,
|
||||||
alive,
|
alive,
|
||||||
flush,
|
flush,
|
||||||
})
|
}),
|
||||||
|
Ok(Err(e)) => Err(e),
|
||||||
|
Err(_) => Err(anyhow!("pipewire virtual-mic init timed out")),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -224,12 +232,17 @@ fn mic_pw_thread(
|
|||||||
quit_rx: pipewire::channel::Receiver<Terminate>,
|
quit_rx: pipewire::channel::Receiver<Terminate>,
|
||||||
channels: u32,
|
channels: u32,
|
||||||
flush: Arc<AtomicBool>,
|
flush: Arc<AtomicBool>,
|
||||||
|
ready: std::sync::mpsc::SyncSender<Result<()>>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
use pipewire as pw;
|
use pipewire as pw;
|
||||||
use pw::{properties::properties, spa};
|
use pw::{properties::properties, spa};
|
||||||
use spa::param::audio::{AudioFormat, AudioInfoRaw};
|
use spa::param::audio::{AudioFormat, AudioInfoRaw};
|
||||||
use spa::pod::Pod;
|
use spa::pod::Pod;
|
||||||
|
|
||||||
|
// The PipeWire objects are lifetime-chained (guards borrow the mainloop/core), so setup and
|
||||||
|
// the blocking run share one frame; the IIFE lets every setup `?` funnel through the ready
|
||||||
|
// handshake below (mirrors the Windows render_thread).
|
||||||
|
let result = (|| -> Result<()> {
|
||||||
crate::pwinit::ensure_init();
|
crate::pwinit::ensure_init();
|
||||||
let mainloop = pw::main_loop::MainLoopRc::new(None).context("pw mic MainLoop")?;
|
let mainloop = pw::main_loop::MainLoopRc::new(None).context("pw mic MainLoop")?;
|
||||||
let context = pw::context::ContextRc::new(&mainloop, None).context("pw mic Context")?;
|
let context = pw::context::ContextRc::new(&mainloop, None).context("pw mic Context")?;
|
||||||
@@ -452,9 +465,19 @@ fn mic_pw_thread(
|
|||||||
)
|
)
|
||||||
.context("pw mic stream connect")?;
|
.context("pw mic stream connect")?;
|
||||||
|
|
||||||
|
// Setup complete: the daemon connection and stream connect succeeded — report ready,
|
||||||
|
// then block until quit/death. (A PipeWire that isn't running never reaches this line;
|
||||||
|
// its connect error surfaces through the handshake as an OPEN failure, so the pump
|
||||||
|
// backs off instead of churning on instantly-dead instances.)
|
||||||
|
let _ = ready.send(Ok(()));
|
||||||
mainloop.run();
|
mainloop.run();
|
||||||
tracing::debug!("pipewire virtual-mic loop exited (source dropped)");
|
tracing::debug!("pipewire virtual-mic loop exited (source dropped)");
|
||||||
Ok(())
|
Ok(())
|
||||||
|
})();
|
||||||
|
if let Err(e) = &result {
|
||||||
|
let _ = ready.send(Err(anyhow!("{e:#}")));
|
||||||
|
}
|
||||||
|
result
|
||||||
}
|
}
|
||||||
|
|
||||||
fn pw_thread(
|
fn pw_thread(
|
||||||
|
|||||||
Reference in New Issue
Block a user