fix(host/windows): don't 2-way-split-encode Main10 — it's SLOWER on Ada (fixes broken HDR animations)
apple / swift (push) Successful in 53s
audit / cargo-audit (push) Failing after 1m9s
android / android (push) Successful in 2m3s
docker / build-push (--build-arg FEDORA_VERSION=44, ci, ci/fedora-rpm.Dockerfile, punktfunk-fedora44-rpm) (push) Successful in 6s
ci / web (push) Successful in 29s
ci / docs-site (push) Successful in 29s
ci / bench (push) Successful in 1m31s
ci / rust (push) Successful in 4m26s
decky / build-publish (push) Successful in 11s
docker / build-push (., web/Dockerfile, punktfunk-web) (push) Successful in 4s
docker / build-push (ci, ci/fedora-rpm.Dockerfile, punktfunk-fedora-rpm) (push) Successful in 3s
docker / build-push (ci, ci/rust-ci.Dockerfile, punktfunk-rust-ci) (push) Successful in 5s
docker / build-push (docs-site, docs-site/Dockerfile, punktfunk-docs) (push) Successful in 3s
flatpak / build-publish (push) Successful in 3m34s
deb / build-publish (push) Successful in 6m55s
rpm / build-publish (bazzite, punktfunk-fedora-rpm) (push) Successful in 5m25s
docker / deploy-docs (push) Successful in 18s
rpm / build-publish (fedora-44, punktfunk-fedora44-rpm) (push) Successful in 5m10s

The "broken animations in HDR" was an encode-throughput cliff, not the ACCESS_LOST churn. Measured at
5120x1440@240 HEVC Main10 on the RTX 4090: forced 2-way split-encode = 7.6 ms/frame (~131 fps, well
over the 4.17 ms/240fps budget → choppy), while SINGLE engine = 2.8-3.9 ms/frame (~256-357 fps, fits
240). The split/merge overhead dominates for 10-bit; a single Ada NVENC engine already handles 5K@240
Main10 comfortably. So the split decision now forces DISABLE for Main10 (bit_depth >= 10), keeping the
existing forced-2 only for 8-bit above 1 Gpix/s. PUNKTFUNK_SPLIT_ENCODE still overrides. Added a
split-mode log line.

Validated live on the 4090: encode_us_p50 7.6 ms → 3.9 ms at 5K240 HDR with no env override.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-15 21:40:28 +00:00
parent b1e95a386f
commit b9f4cf1f3e
10 changed files with 1103 additions and 0 deletions
@@ -0,0 +1,285 @@
//! Audio: playback (decoded PCM → a WASAPI shared-mode render stream) and the microphone
//! uplink (WASAPI capture → Opus → 0xCB datagrams, the inverse of the host's virtual mic).
//!
//! The WASAPI analogue of the Linux client's PipeWire backend. Playback mirrors the host's
//! virtual-mic producer's adaptive jitter buffer: the session pump pushes 5 ms Opus-decoded
//! chunks on the network clock; the WASAPI render thread pulls whole event-driven quanta on
//! the device clock. Prime to ~3 quanta before producing, cap the ring so latency stays
//! bounded, re-prime after a real drain.
//!
//! WASAPI objects are COM-apartment-bound and not `Send`, so they live on a dedicated thread
//! (the same discipline as the host's `wasapi_cap`); only the channel + stop flag + join
//! handle cross the boundary.
use anyhow::{anyhow, Context, Result};
use punktfunk_core::client::NativeClient;
use std::collections::VecDeque;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::mpsc::{Receiver, SyncSender, TrySendError};
use std::sync::Arc;
use std::time::Duration;
use wasapi::{Direction, SampleType, StreamMode, WaveFormat};
const SAMPLE_RATE: usize = 48_000;
const CHANNELS: usize = 2;
/// 48 kHz stereo f32: 2 channels * 4 bytes = 8 bytes per frame.
const BLOCK_ALIGN: usize = CHANNELS * 4;
/// Mic frames are 20 ms (960 samples/channel) — any size ≤ 120 ms is fine host-side.
const MIC_FRAME: usize = 960;
pub struct AudioPlayer {
pcm_tx: SyncSender<Vec<f32>>,
stop: Arc<AtomicBool>,
thread: Option<std::thread::JoinHandle<()>>,
}
impl AudioPlayer {
/// Spawn the WASAPI render thread. Failure (no render endpoint on this box) is
/// survivable — the caller streams video-only.
pub fn spawn() -> Result<AudioPlayer> {
// 64 × 5 ms = 320 ms of slack between the pump and the WASAPI loop.
let (pcm_tx, pcm_rx) = std::sync::mpsc::sync_channel::<Vec<f32>>(64);
let stop = Arc::new(AtomicBool::new(false));
let (ready_tx, ready_rx) = std::sync::mpsc::sync_channel::<Result<()>>(1);
let stop_t = stop.clone();
let thread = std::thread::Builder::new()
.name("punktfunk-audio".into())
.spawn(move || {
if let Err(e) = render_thread(pcm_rx, stop_t, ready_tx) {
tracing::warn!(error = format!("{e:#}"), "audio playback thread ended");
}
})
.context("spawn audio thread")?;
match ready_rx.recv_timeout(Duration::from_secs(3)) {
Ok(Ok(())) => {
tracing::info!("WASAPI render: 48 kHz stereo f32 (default endpoint)");
Ok(AudioPlayer {
pcm_tx,
stop,
thread: Some(thread),
})
}
Ok(Err(e)) => Err(e),
Err(_) => Err(anyhow!(
"wasapi render init timed out (no render endpoint?)"
)),
}
}
/// Queue one interleaved-stereo f32 chunk. Drops the chunk if the WASAPI side is wedged
/// (the renderer conceals the gap; never block the session pump).
pub fn push(&self, pcm: Vec<f32>) {
if let Err(TrySendError::Disconnected(_)) = self.pcm_tx.try_send(pcm) {
// Thread already dead — Drop will reap it; nothing to do per-chunk.
}
}
}
impl Drop for AudioPlayer {
fn drop(&mut self) {
self.stop.store(true, Ordering::SeqCst);
if let Some(t) = self.thread.take() {
let _ = t.join();
}
}
}
fn render_thread(
pcm_rx: Receiver<Vec<f32>>,
stop: Arc<AtomicBool>,
ready: SyncSender<Result<()>>,
) -> Result<()> {
if let Err(e) = wasapi::initialize_mta()
.ok()
.context("CoInitializeEx (MTA)")
{
let _ = ready.send(Err(e));
return Ok(());
}
let res = (|| -> Result<()> {
let device =
wasapi::get_default_device(&Direction::Render).context("default render endpoint")?;
let mut audio_client = device.get_iaudioclient().context("IAudioClient")?;
let desired = WaveFormat::new(32, 32, &SampleType::Float, SAMPLE_RATE, CHANNELS, None);
let (default_period, _min_period) =
audio_client.get_device_period().context("device period")?;
let mode = StreamMode::EventsShared {
autoconvert: true,
buffer_duration_hns: default_period,
};
audio_client
.initialize_client(&desired, &Direction::Render, &mode)
.context("initialize render client")?;
let h_event = audio_client.set_get_eventhandle().context("event handle")?;
let render_client = audio_client
.get_audiorenderclient()
.context("IAudioRenderClient")?;
audio_client.start_stream().context("start render stream")?;
let _ = ready.send(Ok(()));
// Adaptive jitter buffer, in f32-byte units (same shape as the host's virtual mic).
let mut ring: VecDeque<u8> = VecDeque::new();
let mut primed = false;
while !stop.load(Ordering::Relaxed) {
if h_event.wait_for_event(100).is_err() {
continue;
}
// Drain everything the pump has queued into the ring.
while let Ok(chunk) = pcm_rx.try_recv() {
for s in chunk {
ring.extend(s.to_le_bytes());
}
}
let avail_frames = audio_client
.get_available_space_in_frames()
.context("available space")? as usize;
if avail_frames == 0 {
continue;
}
let want_bytes = avail_frames * BLOCK_ALIGN;
// Prime to ~3 quanta; cap at ~1 quantum of slack beyond that; re-prime on drain.
let target = (3 * want_bytes).clamp(720 * BLOCK_ALIGN, 9600 * BLOCK_ALIGN);
while ring.len() > target.max(want_bytes) + want_bytes {
ring.pop_front();
}
if !primed && ring.len() >= target {
primed = true;
}
let mut out = vec![0u8; want_bytes];
if primed {
let n = ring.len().min(want_bytes);
for (dst, b) in out.iter_mut().zip(ring.drain(..n)) {
*dst = b;
}
}
if ring.is_empty() {
primed = false;
}
render_client
.write_to_device(avail_frames, BLOCK_ALIGN, &out, None)
.context("write_to_device")?;
}
audio_client.stop_stream().ok();
Ok(())
})();
if let Err(ref e) = res {
let _ = ready.send(Err(anyhow!("{e:#}")));
}
res
}
/// The microphone uplink: capture the default input device, Opus-encode 20 ms chunks, ship
/// them as 0xCB datagrams into the host's virtual mic source.
pub struct MicStreamer {
stop: Arc<AtomicBool>,
thread: Option<std::thread::JoinHandle<()>>,
}
impl MicStreamer {
pub fn spawn(connector: Arc<NativeClient>) -> Result<MicStreamer> {
let stop = Arc::new(AtomicBool::new(false));
let stop_t = stop.clone();
let thread = std::thread::Builder::new()
.name("punktfunk-mic".into())
.spawn(move || {
if let Err(e) = mic_thread(&connector, stop_t) {
tracing::warn!(error = format!("{e:#}"), "mic uplink thread ended");
}
})
.context("spawn mic thread")?;
Ok(MicStreamer {
stop,
thread: Some(thread),
})
}
}
impl Drop for MicStreamer {
fn drop(&mut self) {
self.stop.store(true, Ordering::SeqCst);
if let Some(t) = self.thread.take() {
let _ = t.join();
}
}
}
fn mic_thread(connector: &Arc<NativeClient>, stop: Arc<AtomicBool>) -> Result<()> {
wasapi::initialize_mta()
.ok()
.context("CoInitializeEx (MTA)")?;
let mut encoder = opus::Encoder::new(
SAMPLE_RATE as u32,
opus::Channels::Stereo,
opus::Application::Voip,
)
.map_err(|e| anyhow!("opus encoder: {e}"))?;
let _ = encoder.set_bitrate(opus::Bitrate::Bits(64_000));
let device = wasapi::get_default_device(&Direction::Capture)
.context("default capture endpoint (no microphone?)")?;
let mut audio_client = device.get_iaudioclient().context("IAudioClient")?;
let desired = WaveFormat::new(32, 32, &SampleType::Float, SAMPLE_RATE, CHANNELS, None);
let (default_period, _min_period) =
audio_client.get_device_period().context("device period")?;
let mode = StreamMode::EventsShared {
autoconvert: true,
buffer_duration_hns: default_period,
};
audio_client
.initialize_client(&desired, &Direction::Capture, &mode)
.context("initialize capture client")?;
let h_event = audio_client.set_get_eventhandle().context("event handle")?;
let capture_client = audio_client
.get_audiocaptureclient()
.context("IAudioCaptureClient")?;
audio_client
.start_stream()
.context("start capture stream")?;
let mut bytes: VecDeque<u8> = VecDeque::new();
let mut ring: VecDeque<f32> = VecDeque::new();
let mut out = vec![0u8; 4000];
let mut seq = 0u32;
while !stop.load(Ordering::Relaxed) {
if h_event.wait_for_event(100).is_err() {
continue;
}
loop {
match capture_client.get_next_packet_size() {
Ok(Some(0)) | Ok(None) => break,
Ok(Some(_n)) => {
capture_client
.read_from_device_to_deque(&mut bytes)
.context("read capture")?;
}
Err(e) => return Err(anyhow!("get_next_packet_size: {e}")),
}
}
let whole = (bytes.len() / 4) * 4;
for c in bytes.drain(..whole).collect::<Vec<u8>>().chunks_exact(4) {
ring.push_back(f32::from_le_bytes([c[0], c[1], c[2], c[3]]));
}
// Ship every complete 20 ms stereo frame.
while ring.len() >= MIC_FRAME * CHANNELS {
let pcm: Vec<f32> = ring.drain(..MIC_FRAME * CHANNELS).collect();
match encoder.encode_float(&pcm, &mut out) {
Ok(len) => {
let pts = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_nanos() as u64)
.unwrap_or(0);
let _ = connector.send_mic(seq, pts, out[..len].to_vec());
seq = seq.wrapping_add(1);
}
Err(e) => tracing::debug!(error = %e, "opus mic encode"),
}
}
}
audio_client.stop_stream().ok();
Ok(())
}