feat: mic passthrough — client microphone → host virtual PipeWire source
ci / rust (push) Has been cancelled

The inverse of the host→client audio path: the client's mic, Opus-encoded, rides a
new 0xCB QUIC datagram to the host, which decodes it into a virtual PipeWire
Audio/Source its apps can record from (voice chat, etc.).

Protocol (punktfunk-core):
- MIC_MAGIC 0xCB + encode/decode_mic_datagram (mirror of the 0xC9 audio datagram).
- NativeClient::send_mic(seq, pts_ns, opus) over a new outbound channel + worker task
  (mirror of send_input); C ABI punktfunk_connection_send_mic for native clients.

Host:
- audio::VirtualMic + PwMicSource: a PipeWire output stream tagged media.class=
  Audio/Source (Direction::Output) — a recordable microphone node, fed decoded PCM.
- MicService: host-lifetime owner of the source + Opus decoder (mirror of
  InjectorService / the audio capturer slot); lazily opened, persists across sessions,
  self-heals. The per-session datagram reader now demuxes 0xCB→mic / 0xC8→input over a
  single read_datagram loop (two loops would race).
- Adaptive jitter buffer in the producer: primes to ~3 consumer quanta before emitting,
  so the 5 ms push / N ms pull clock skew never underruns — without it ~58% of output
  was silence; with it, glitch-free across consumer quanta.

Client: punktfunk-client-rs --mic-test streams a synthetic 440 Hz Opus tone as the mic
uplink (opus dep added) for end-to-end validation without a real microphone.

Validated live on headless KWin: client tone → host source → pw-record shows the
punktfunk-mic Audio/Source node, 440 Hz dominant (Goertzel power 20.7 vs <0.001
elsewhere), RMS 0.179 ≈ the ideal 0.177, 0.3–0.4% silence at both 256 ms and 10 ms
consumer quanta. Tests +1 (mic datagram roundtrip); workspace green, clippy/fmt clean.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-10 22:15:07 +00:00
parent f3ff5f648a
commit 0755c823a5
10 changed files with 545 additions and 10 deletions
+36
View File
@@ -885,6 +885,42 @@ pub unsafe extern "C" fn punktfunk_connection_send_input(
})
}
/// Send one Opus mic frame to the host as a QUIC datagram (48 kHz; the host decodes it into a
/// virtual microphone source its apps can record). Non-blocking enqueue; the host uses `seq`/
/// `pts_ns` (the caller's own counters) only for diagnostics. `opus_data`/`len` may be empty
/// (a DTX silence frame). The data is copied; the caller may reuse the buffer after this returns.
///
/// # Safety
/// `c` is a valid connection handle; `opus_data` is valid for `len` bytes (or `len == 0`).
#[cfg(feature = "quic")]
#[no_mangle]
pub unsafe extern "C" fn punktfunk_connection_send_mic(
c: *mut PunktfunkConnection,
opus_data: *const u8,
len: usize,
seq: u32,
pts_ns: u64,
) -> PunktfunkStatus {
guard(|| {
let c = match unsafe { c.as_ref() } {
Some(c) => c,
None => return PunktfunkStatus::NullPointer,
};
if opus_data.is_null() && len != 0 {
return PunktfunkStatus::NullPointer;
}
let opus = if len == 0 {
Vec::new()
} else {
unsafe { std::slice::from_raw_parts(opus_data, len) }.to_vec()
};
match c.inner.send_mic(seq, pts_ns, opus) {
Ok(()) => PunktfunkStatus::Ok,
Err(e) => e.status(),
}
})
}
/// The currently active session mode — the Welcome's, until an accepted
/// [`punktfunk_connection_request_mode`] switches it. Safe any time after connect.
///
+26
View File
@@ -50,6 +50,8 @@ pub struct NativeClient {
audio: Receiver<AudioPacket>,
rumble: Receiver<(u16, u16, u16)>,
input_tx: tokio::sync::mpsc::UnboundedSender<InputEvent>,
/// Outbound mic frames `(seq, pts_ns, opus)` → encoded as 0xCB datagrams by the worker.
mic_tx: tokio::sync::mpsc::UnboundedSender<(u32, u64, Vec<u8>)>,
reconfig_tx: tokio::sync::mpsc::UnboundedSender<Mode>,
shutdown: Arc<AtomicBool>,
worker: Option<std::thread::JoinHandle<()>>,
@@ -85,6 +87,7 @@ impl NativeClient {
let (audio_tx, audio_rx) = std::sync::mpsc::sync_channel::<AudioPacket>(AUDIO_QUEUE);
let (rumble_tx, rumble_rx) = std::sync::mpsc::sync_channel::<(u16, u16, u16)>(RUMBLE_QUEUE);
let (input_tx, input_rx) = tokio::sync::mpsc::unbounded_channel::<InputEvent>();
let (mic_tx, mic_rx) = tokio::sync::mpsc::unbounded_channel::<(u32, u64, Vec<u8>)>();
let (reconfig_tx, reconfig_rx) = tokio::sync::mpsc::unbounded_channel::<Mode>();
let (ready_tx, ready_rx) = std::sync::mpsc::channel::<Result<(Mode, [u8; 32])>>();
let shutdown = Arc::new(AtomicBool::new(false));
@@ -118,6 +121,7 @@ impl NativeClient {
audio_tx,
rumble_tx,
input_rx,
mic_rx,
reconfig_rx,
ready_tx,
shutdown: shutdown_w,
@@ -140,6 +144,7 @@ impl NativeClient {
audio: audio_rx,
rumble: rumble_rx,
input_tx,
mic_tx,
reconfig_tx,
shutdown,
worker: Some(worker),
@@ -296,6 +301,16 @@ impl NativeClient {
pub fn send_input(&self, ev: &InputEvent) -> Result<()> {
self.input_tx.send(*ev).map_err(|_| PunktfunkError::Closed)
}
/// Queue one Opus mic frame for delivery as a 0xCB uplink datagram (the inverse of
/// [`next_audio`](Self::next_audio)). `seq`/`pts_ns` are the caller's own counters (the host
/// uses them only for diagnostics). The host decodes it into a virtual microphone source.
/// Best-effort — like every datagram, it's dropped under loss; no retransmit.
pub fn send_mic(&self, seq: u32, pts_ns: u64, opus: Vec<u8>) -> Result<()> {
self.mic_tx
.send((seq, pts_ns, opus))
.map_err(|_| PunktfunkError::Closed)
}
}
impl Drop for NativeClient {
@@ -318,6 +333,7 @@ struct WorkerArgs {
audio_tx: SyncSender<AudioPacket>,
rumble_tx: SyncSender<(u16, u16, u16)>,
input_rx: tokio::sync::mpsc::UnboundedReceiver<InputEvent>,
mic_rx: tokio::sync::mpsc::UnboundedReceiver<(u32, u64, Vec<u8>)>,
reconfig_rx: tokio::sync::mpsc::UnboundedReceiver<Mode>,
ready_tx: std::sync::mpsc::Sender<Result<(Mode, [u8; 32])>>,
shutdown: Arc<AtomicBool>,
@@ -338,6 +354,7 @@ async fn worker_main(args: WorkerArgs) {
audio_tx,
rumble_tx,
mut input_rx,
mut mic_rx,
mut reconfig_rx,
ready_tx,
shutdown,
@@ -429,6 +446,15 @@ async fn worker_main(args: WorkerArgs) {
}
});
// Mic task: embedder Opus mic frames → 0xCB uplink datagrams (best-effort, dropped on loss).
let mic_conn = conn.clone();
tokio::spawn(async move {
while let Some((seq, pts_ns, opus)) = mic_rx.recv().await {
let d = crate::quic::encode_mic_datagram(seq, pts_ns, &opus);
let _ = mic_conn.send_datagram(d.into());
}
});
// Control task: the handshake stream stays open for mid-stream renegotiation. One
// request at a time — write Reconfigure, await Reconfigured, publish the active mode.
{
+46 -2
View File
@@ -554,10 +554,14 @@ pub fn frame(payload: &[u8]) -> Vec<u8> {
}
/// Datagram wire tags. Video rides UDP; everything low-rate rides QUIC datagrams,
/// demultiplexed by the first byte: input = [`crate::input::INPUT_MAGIC`] (0xC8),
/// audio = [`AUDIO_MAGIC`], rumble = [`RUMBLE_MAGIC`].
/// demultiplexed by the first byte: input = [`crate::input::INPUT_MAGIC`] (0xC8, client→host),
/// audio = [`AUDIO_MAGIC`] (0xC9, host→client), rumble = [`RUMBLE_MAGIC`] (0xCA, host→client),
/// mic = [`MIC_MAGIC`] (0xCB, client→host).
pub const AUDIO_MAGIC: u8 = 0xC9;
pub const RUMBLE_MAGIC: u8 = 0xCA;
/// Microphone uplink: the client's mic, Opus-encoded, client → host (the inverse of
/// [`AUDIO_MAGIC`]). The host feeds it into a virtual PipeWire source so its apps can record it.
pub const MIC_MAGIC: u8 = 0xCB;
/// Audio datagram, host → client: `[0xC9][u32 seq LE][u64 pts_ns LE][opus payload]`.
/// One Opus frame per datagram (5 ms — well under any MTU); QUIC already encrypts.
@@ -600,6 +604,27 @@ pub fn decode_rumble_datagram(b: &[u8]) -> Option<(u16, u16, u16)> {
Some((u16at(1), u16at(3), u16at(5)))
}
/// Mic datagram, client → host: `[0xCB][u32 seq LE][u64 pts_ns LE][opus payload]` — the same
/// layout as [`encode_audio_datagram`] with [`MIC_MAGIC`], one Opus frame per datagram.
pub fn encode_mic_datagram(seq: u32, pts_ns: u64, opus: &[u8]) -> Vec<u8> {
let mut b = Vec::with_capacity(13 + opus.len());
b.push(MIC_MAGIC);
b.extend_from_slice(&seq.to_le_bytes());
b.extend_from_slice(&pts_ns.to_le_bytes());
b.extend_from_slice(opus);
b
}
/// Parse a mic datagram → `(seq, pts_ns, opus payload)`. `None` on bad tag/length.
pub fn decode_mic_datagram(b: &[u8]) -> Option<(u32, u64, &[u8])> {
if b.len() < 13 || b[0] != MIC_MAGIC {
return None;
}
let seq = u32::from_le_bytes(b[1..5].try_into().unwrap());
let pts_ns = u64::from_le_bytes(b[5..13].try_into().unwrap());
Some((seq, pts_ns, &b[13..]))
}
/// Async framed-message IO over a quinn stream (`u16 LE length || payload`).
pub mod io {
/// Read one framed message (bounded at 64 KiB — control messages are tiny).
@@ -1178,6 +1203,25 @@ mod tests {
assert!(decode_rumble_datagram(&d[..6]).is_none());
}
#[test]
fn mic_datagram_roundtrip_and_disjoint_from_audio() {
let opus = [0x5Au8; 80];
let d = encode_mic_datagram(42, 9_999, &opus);
assert_eq!(d[0], MIC_MAGIC);
let (seq, pts, payload) = decode_mic_datagram(&d).unwrap();
assert_eq!((seq, pts), (42, 9_999));
assert_eq!(payload, opus);
assert!(decode_mic_datagram(&d[..12]).is_none()); // truncated
// Tag separation: a mic datagram is not an audio datagram and vice-versa.
assert!(decode_audio_datagram(&d).is_none());
assert!(decode_mic_datagram(&encode_audio_datagram(1, 2, &opus)).is_none());
// Empty payload (DTX) is legal.
assert!(decode_mic_datagram(&encode_mic_datagram(0, 0, &[]))
.unwrap()
.2
.is_empty());
}
#[test]
fn fingerprint_is_sha256_of_der() {
// Stable across calls, distinct for distinct certs.