Files
punktfunk/crates/punktfunk-client-windows/src/audio.rs
T
enricobuehler e4bdec97bd
apple / swift (push) Successful in 56s
android / android (push) Successful in 2m8s
audit / cargo-audit (push) Failing after 1m7s
ci / web (push) Successful in 32s
ci / docs-site (push) Successful in 30s
ci / bench (push) Successful in 1m32s
ci / rust (push) Failing after 3m31s
decky / build-publish (push) Successful in 13s
docker / build-push (--build-arg FEDORA_VERSION=44, ci, ci/fedora-rpm.Dockerfile, punktfunk-fedora44-rpm) (push) Successful in 5s
docker / build-push (., web/Dockerfile, punktfunk-web) (push) Successful in 4s
docker / build-push (ci, ci/fedora-rpm.Dockerfile, punktfunk-fedora-rpm) (push) Successful in 4s
docker / build-push (ci, ci/rust-ci.Dockerfile, punktfunk-rust-ci) (push) Successful in 4s
docker / build-push (docs-site, docs-site/Dockerfile, punktfunk-docs) (push) Successful in 3s
flatpak / build-publish (push) Successful in 4m10s
deb / build-publish (push) Successful in 6m14s
rpm / build-publish (bazzite, punktfunk-fedora-rpm) (push) Successful in 5m25s
rpm / build-publish (fedora-44, punktfunk-fedora44-rpm) (push) Successful in 5m12s
docker / deploy-docs (push) Successful in 18s
feat(windows-client): winit + D3D11 present, WASAPI render, input — builds live on MSVC
Builds on the prior headless scaffold (which was committed but never VM-built — its
audio.rs had two non-compiling wasapi calls). This makes the whole crate build + clippy
+ fmt + test green on x86_64-pc-windows-msvc and adds the windowed client.

- Fix audio.rs: `DeviceEnumerator::new()?.get_default_device(...)` (the free fn doesn't
  exist) and the 3-arg `write_to_device` (wasapi 0.23). WASAPI shared-mode event-driven
  render + mic capture now compile and link.
- present.rs: D3D11 renderer with WARP fallback (GPU-less dev box), runtime-compiled
  fullscreen-triangle shaders, dynamic RGBA video-texture upload, Contain-fit letterbox
  draw, and a flip-model swapchain on the window HWND.
- app.rs: winit 0.30 ApplicationHandler — present loop + Moonlight-style click-to-capture
  input (keyboard via the physical-KeyCode→VK keymap, absolute mouse, wheel, F11), held
  state flushed on release/focus-loss.
- keymap.rs: winit physical KeyCode → Windows VK (layout-independent positional mapping,
  the analogue of the Linux client's evdev table).
- main.rs: windowed default + `--headless` counting mode, `--discover` (mDNS list),
  `--pair PIN` (SPAKE2 ceremony), `--pin HEX`/known-host/TOFU trust, settings-backed
  CLI defaults.

UI decision: winit + raw D3D11 (the bootstrap doc's sanctioned fallback), confirmed by a
research pass — windows-rs "Reactor" ships no SwapChainPanel / SetSwapChain escape hatch,
so it can't host the presenter; winit+WARP validates on the GPU-less VM. Native-chrome
host-list/settings GUI + D3D11VA hardware decode + 10-bit/HDR present are follow-ups.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-15 21:59:40 +00:00

290 lines
11 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
//! Audio: playback (decoded PCM → a WASAPI shared-mode render stream) and the microphone
//! uplink (WASAPI capture → Opus → 0xCB datagrams, the inverse of the host's virtual mic).
//!
//! The WASAPI analogue of the Linux client's PipeWire backend. Playback mirrors the host's
//! virtual-mic producer's adaptive jitter buffer: the session pump pushes 5 ms Opus-decoded
//! chunks on the network clock; the WASAPI render thread pulls whole event-driven quanta on
//! the device clock. Prime to ~3 quanta before producing, cap the ring so latency stays
//! bounded, re-prime after a real drain.
//!
//! WASAPI objects are COM-apartment-bound and not `Send`, so they live on a dedicated thread
//! (the same discipline as the host's `wasapi_cap`); only the channel + stop flag + join
//! handle cross the boundary.
use anyhow::{anyhow, Context, Result};
use punktfunk_core::client::NativeClient;
use std::collections::VecDeque;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::mpsc::{Receiver, SyncSender, TrySendError};
use std::sync::Arc;
use std::time::Duration;
use wasapi::{DeviceEnumerator, Direction, SampleType, StreamMode, WaveFormat};
const SAMPLE_RATE: usize = 48_000;
const CHANNELS: usize = 2;
/// 48 kHz stereo f32: 2 channels * 4 bytes = 8 bytes per frame.
const BLOCK_ALIGN: usize = CHANNELS * 4;
/// Mic frames are 20 ms (960 samples/channel) — any size ≤ 120 ms is fine host-side.
const MIC_FRAME: usize = 960;
pub struct AudioPlayer {
pcm_tx: SyncSender<Vec<f32>>,
stop: Arc<AtomicBool>,
thread: Option<std::thread::JoinHandle<()>>,
}
impl AudioPlayer {
/// Spawn the WASAPI render thread. Failure (no render endpoint on this box) is
/// survivable — the caller streams video-only.
pub fn spawn() -> Result<AudioPlayer> {
// 64 × 5 ms = 320 ms of slack between the pump and the WASAPI loop.
let (pcm_tx, pcm_rx) = std::sync::mpsc::sync_channel::<Vec<f32>>(64);
let stop = Arc::new(AtomicBool::new(false));
let (ready_tx, ready_rx) = std::sync::mpsc::sync_channel::<Result<()>>(1);
let stop_t = stop.clone();
let thread = std::thread::Builder::new()
.name("punktfunk-audio".into())
.spawn(move || {
if let Err(e) = render_thread(pcm_rx, stop_t, ready_tx) {
tracing::warn!(error = format!("{e:#}"), "audio playback thread ended");
}
})
.context("spawn audio thread")?;
match ready_rx.recv_timeout(Duration::from_secs(3)) {
Ok(Ok(())) => {
tracing::info!("WASAPI render: 48 kHz stereo f32 (default endpoint)");
Ok(AudioPlayer {
pcm_tx,
stop,
thread: Some(thread),
})
}
Ok(Err(e)) => Err(e),
Err(_) => Err(anyhow!(
"wasapi render init timed out (no render endpoint?)"
)),
}
}
/// Queue one interleaved-stereo f32 chunk. Drops the chunk if the WASAPI side is wedged
/// (the renderer conceals the gap; never block the session pump).
pub fn push(&self, pcm: Vec<f32>) {
if let Err(TrySendError::Disconnected(_)) = self.pcm_tx.try_send(pcm) {
// Thread already dead — Drop will reap it; nothing to do per-chunk.
}
}
}
impl Drop for AudioPlayer {
fn drop(&mut self) {
self.stop.store(true, Ordering::SeqCst);
if let Some(t) = self.thread.take() {
let _ = t.join();
}
}
}
fn render_thread(
pcm_rx: Receiver<Vec<f32>>,
stop: Arc<AtomicBool>,
ready: SyncSender<Result<()>>,
) -> Result<()> {
if let Err(e) = wasapi::initialize_mta()
.ok()
.context("CoInitializeEx (MTA)")
{
let _ = ready.send(Err(e));
return Ok(());
}
let res = (|| -> Result<()> {
let device = DeviceEnumerator::new()
.context("DeviceEnumerator")?
.get_default_device(&Direction::Render)
.context("default render endpoint")?;
let mut audio_client = device.get_iaudioclient().context("IAudioClient")?;
let desired = WaveFormat::new(32, 32, &SampleType::Float, SAMPLE_RATE, CHANNELS, None);
let (default_period, _min_period) =
audio_client.get_device_period().context("device period")?;
let mode = StreamMode::EventsShared {
autoconvert: true,
buffer_duration_hns: default_period,
};
audio_client
.initialize_client(&desired, &Direction::Render, &mode)
.context("initialize render client")?;
let h_event = audio_client.set_get_eventhandle().context("event handle")?;
let render_client = audio_client
.get_audiorenderclient()
.context("IAudioRenderClient")?;
audio_client.start_stream().context("start render stream")?;
let _ = ready.send(Ok(()));
// Adaptive jitter buffer, in f32-byte units (same shape as the host's virtual mic).
let mut ring: VecDeque<u8> = VecDeque::new();
let mut primed = false;
while !stop.load(Ordering::Relaxed) {
if h_event.wait_for_event(100).is_err() {
continue;
}
// Drain everything the pump has queued into the ring.
while let Ok(chunk) = pcm_rx.try_recv() {
for s in chunk {
ring.extend(s.to_le_bytes());
}
}
let avail_frames = audio_client
.get_available_space_in_frames()
.context("available space")? as usize;
if avail_frames == 0 {
continue;
}
let want_bytes = avail_frames * BLOCK_ALIGN;
// Prime to ~3 quanta; cap at ~1 quantum of slack beyond that; re-prime on drain.
let target = (3 * want_bytes).clamp(720 * BLOCK_ALIGN, 9600 * BLOCK_ALIGN);
while ring.len() > target.max(want_bytes) + want_bytes {
ring.pop_front();
}
if !primed && ring.len() >= target {
primed = true;
}
let mut out = vec![0u8; want_bytes];
if primed {
let n = ring.len().min(want_bytes);
for (dst, b) in out.iter_mut().zip(ring.drain(..n)) {
*dst = b;
}
}
if ring.is_empty() {
primed = false;
}
render_client
.write_to_device(avail_frames, &out, None)
.context("write_to_device")?;
}
audio_client.stop_stream().ok();
Ok(())
})();
if let Err(ref e) = res {
let _ = ready.send(Err(anyhow!("{e:#}")));
}
res
}
/// The microphone uplink: capture the default input device, Opus-encode 20 ms chunks, ship
/// them as 0xCB datagrams into the host's virtual mic source.
pub struct MicStreamer {
stop: Arc<AtomicBool>,
thread: Option<std::thread::JoinHandle<()>>,
}
impl MicStreamer {
pub fn spawn(connector: Arc<NativeClient>) -> Result<MicStreamer> {
let stop = Arc::new(AtomicBool::new(false));
let stop_t = stop.clone();
let thread = std::thread::Builder::new()
.name("punktfunk-mic".into())
.spawn(move || {
if let Err(e) = mic_thread(&connector, stop_t) {
tracing::warn!(error = format!("{e:#}"), "mic uplink thread ended");
}
})
.context("spawn mic thread")?;
Ok(MicStreamer {
stop,
thread: Some(thread),
})
}
}
impl Drop for MicStreamer {
fn drop(&mut self) {
self.stop.store(true, Ordering::SeqCst);
if let Some(t) = self.thread.take() {
let _ = t.join();
}
}
}
fn mic_thread(connector: &Arc<NativeClient>, stop: Arc<AtomicBool>) -> Result<()> {
wasapi::initialize_mta()
.ok()
.context("CoInitializeEx (MTA)")?;
let mut encoder = opus::Encoder::new(
SAMPLE_RATE as u32,
opus::Channels::Stereo,
opus::Application::Voip,
)
.map_err(|e| anyhow!("opus encoder: {e}"))?;
let _ = encoder.set_bitrate(opus::Bitrate::Bits(64_000));
let device = DeviceEnumerator::new()
.context("DeviceEnumerator")?
.get_default_device(&Direction::Capture)
.context("default capture endpoint (no microphone?)")?;
let mut audio_client = device.get_iaudioclient().context("IAudioClient")?;
let desired = WaveFormat::new(32, 32, &SampleType::Float, SAMPLE_RATE, CHANNELS, None);
let (default_period, _min_period) =
audio_client.get_device_period().context("device period")?;
let mode = StreamMode::EventsShared {
autoconvert: true,
buffer_duration_hns: default_period,
};
audio_client
.initialize_client(&desired, &Direction::Capture, &mode)
.context("initialize capture client")?;
let h_event = audio_client.set_get_eventhandle().context("event handle")?;
let capture_client = audio_client
.get_audiocaptureclient()
.context("IAudioCaptureClient")?;
audio_client
.start_stream()
.context("start capture stream")?;
let mut bytes: VecDeque<u8> = VecDeque::new();
let mut ring: VecDeque<f32> = VecDeque::new();
let mut out = vec![0u8; 4000];
let mut seq = 0u32;
while !stop.load(Ordering::Relaxed) {
if h_event.wait_for_event(100).is_err() {
continue;
}
loop {
match capture_client.get_next_packet_size() {
Ok(Some(0)) | Ok(None) => break,
Ok(Some(_n)) => {
capture_client
.read_from_device_to_deque(&mut bytes)
.context("read capture")?;
}
Err(e) => return Err(anyhow!("get_next_packet_size: {e}")),
}
}
let whole = (bytes.len() / 4) * 4;
for c in bytes.drain(..whole).collect::<Vec<u8>>().chunks_exact(4) {
ring.push_back(f32::from_le_bytes([c[0], c[1], c[2], c[3]]));
}
// Ship every complete 20 ms stereo frame.
while ring.len() >= MIC_FRAME * CHANNELS {
let pcm: Vec<f32> = ring.drain(..MIC_FRAME * CHANNELS).collect();
match encoder.encode_float(&pcm, &mut out) {
Ok(len) => {
let pts = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_nanos() as u64)
.unwrap_or(0);
let _ = connector.send_mic(seq, pts, out[..len].to_vec());
seq = seq.wrapping_add(1);
}
Err(e) => tracing::debug!(error = %e, "opus mic encode"),
}
}
}
audio_client.stop_stream().ok();
Ok(())
}