diff --git a/CLAUDE.md b/CLAUDE.md index c7e2d11..663dd5a 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -346,7 +346,23 @@ FFI also link-needs `libGL`/`libgbm`/`libcuda` at build time). Env knobs: `PUNKT `PUNKTFUNK_COMPOSITOR=kwin|gamescope|mutter`, `PUNKTFUNK_ZEROCOPY=1`, `PUNKTFUNK_GAMESCOPE_APP=...`, `PUNKTFUNK_INPUT_BACKEND=...`, `PUNKTFUNK_PERF=1` (per-stage timing), `PUNKTFUNK_VIDEO_DROP=N` (FEC test), `PUNKTFUNK_FEC_PCT=N`, `PUNKTFUNK_DSCP=1` (opt-in DSCP/SO_PRIORITY media QoS on the data + -GameStream video/audio sockets; no-op on the wire on Windows without a qWAVE policy). +GameStream video/audio sockets; no-op on the wire on Windows without a qWAVE policy), +`PUNKTFUNK_444=1` (full-chroma HEVC 4:4:4, see below). + +**HEVC 4:4:4 (full chroma, Range Extensions)**: opt-in via `PUNKTFUNK_444`, negotiated like 10-bit — +the host emits 4:4:4 only when the client advertised `VIDEO_CAP_444` (wire bit `0x04` + ABI +`PUNKTFUNK_VIDEO_CAP_444`), the codec is HEVC, the session is single-process, **and** a GPU probe +(`encode::can_encode_444`, run before the Welcome) confirms support — else it resolves to 4:2:0 and +`Welcome::chroma_format` reflects the real value (honest downgrade; the client reads it via +`punktfunk_connection_chroma_format`). **punktfunk/1-native only** — GameStream/Moonlight stays 4:2:0 +(stock clients can't decode 4:4:4). **NVENC is the implemented path**: Linux `hevc_nvenc` feeds a +swscale'd `yuv444p` (RGB-in is always 4:2:0 — verified on the RTX 5070 Ti — so the session forces CPU +RGB capture for 4:4:4); Windows NVENC keeps ARGB input + FREXT profile + `chromaFormatIDC=3` and the +DDA capturer delivers RGB. VAAPI / AMF / QSV **decline** (probe returns false — no validated 4:4:4 +hardware in the lab; they'd produce 4:2:0). Software (openh264) is 4:2:0-only. Test with +`PUNKTFUNK_CLIENT_444=1 punktfunk-probe --out x.h265` then `ffprobe x.h265` (expect `pix_fmt yuv444p`). +*Linux NVENC mechanism validated on the RTX 5070 Ti (ffmpeg CLI); Windows NVENC + 10-bit-4:4:4 not yet +on-glass validated.* ## Conventions diff --git a/Cargo.lock b/Cargo.lock index 611cdbe..e145cda 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2828,6 +2828,7 @@ dependencies = [ "fec-rs", "hmac", "libc", + "opus", "proptest", "quinn", "rand 0.9.4", @@ -2855,7 +2856,6 @@ dependencies = [ "anyhow", "ash", "ashpd", - "audiopus_sys", "axum", "axum-server", "base64", diff --git a/clients/android/app/src/main/kotlin/io/unom/punktfunk/ConnectScreen.kt b/clients/android/app/src/main/kotlin/io/unom/punktfunk/ConnectScreen.kt index a0c28ed..4ee6e8a 100644 --- a/clients/android/app/src/main/kotlin/io/unom/punktfunk/ConnectScreen.kt +++ b/clients/android/app/src/main/kotlin/io/unom/punktfunk/ConnectScreen.kt @@ -163,7 +163,7 @@ fun ConnectScreen(settings: Settings, onConnected: (Long) -> Unit) { targetHost, targetPort, w, h, hz, id.certPem, id.privateKeyPem, pinHex ?: "", settings.bitrateKbps, settings.compositor, gamepadPref, - hdrEnabled, + hdrEnabled, settings.audioChannels, ) } connecting = false diff --git a/clients/android/app/src/main/kotlin/io/unom/punktfunk/Settings.kt b/clients/android/app/src/main/kotlin/io/unom/punktfunk/Settings.kt index 85fb3eb..ab970e1 100644 --- a/clients/android/app/src/main/kotlin/io/unom/punktfunk/Settings.kt +++ b/clients/android/app/src/main/kotlin/io/unom/punktfunk/Settings.kt @@ -16,6 +16,9 @@ data class Settings( val bitrateKbps: Int = 0, val compositor: Int = 0, val gamepad: Int = 0, + /** Requested audio channel count: 2 (stereo), 6 (5.1) or 8 (7.1). The host clamps to what it + * can capture; the resolved count drives the decoder + AAudio layout. */ + val audioChannels: Int = 2, val micEnabled: Boolean = false, /** Show the live stats overlay (FPS / throughput / latency) during a stream. */ val statsHudEnabled: Boolean = true, @@ -39,6 +42,7 @@ class SettingsStore(context: Context) { bitrateKbps = prefs.getInt(K_BITRATE, 0), compositor = prefs.getInt(K_COMPOSITOR, 0), gamepad = prefs.getInt(K_GAMEPAD, 0), + audioChannels = prefs.getInt(K_AUDIO_CH, 2), micEnabled = prefs.getBoolean(K_MIC, false), statsHudEnabled = prefs.getBoolean(K_HUD, true), trackpadMode = prefs.getBoolean(K_TRACKPAD, true), @@ -52,6 +56,7 @@ class SettingsStore(context: Context) { .putInt(K_BITRATE, s.bitrateKbps) .putInt(K_COMPOSITOR, s.compositor) .putInt(K_GAMEPAD, s.gamepad) + .putInt(K_AUDIO_CH, s.audioChannels) .putBoolean(K_MIC, s.micEnabled) .putBoolean(K_HUD, s.statsHudEnabled) .putBoolean(K_TRACKPAD, s.trackpadMode) @@ -65,6 +70,7 @@ class SettingsStore(context: Context) { const val K_BITRATE = "bitrate_kbps" const val K_COMPOSITOR = "compositor" const val K_GAMEPAD = "gamepad" + const val K_AUDIO_CH = "audio_channels" const val K_MIC = "mic_enabled" const val K_HUD = "stats_hud_enabled" const val K_TRACKPAD = "trackpad_mode" @@ -133,6 +139,13 @@ val REFRESH_OPTIONS = listOf( 240 to "240 Hz", ) +/** (channel count, label). 2 = stereo (default), 6 = 5.1, 8 = 7.1. */ +val AUDIO_CHANNEL_OPTIONS = listOf( + 2 to "Stereo", + 6 to "5.1 Surround", + 8 to "7.1 Surround", +) + /** (kbps, label). `0` = host default. */ val BITRATE_OPTIONS = listOf( 0 to "Automatic", diff --git a/clients/android/app/src/main/kotlin/io/unom/punktfunk/SettingsScreen.kt b/clients/android/app/src/main/kotlin/io/unom/punktfunk/SettingsScreen.kt index 980541b..66da4fa 100644 --- a/clients/android/app/src/main/kotlin/io/unom/punktfunk/SettingsScreen.kt +++ b/clients/android/app/src/main/kotlin/io/unom/punktfunk/SettingsScreen.kt @@ -104,6 +104,12 @@ fun SettingsScreen(initial: Settings, onChange: (Settings) -> Unit, onBack: () - } SettingsGroup("Audio") { + SettingDropdown( + label = "Audio channels", + options = AUDIO_CHANNEL_OPTIONS, + selected = s.audioChannels, + ) { ch -> update(s.copy(audioChannels = ch)) } + ToggleRow( title = "Microphone", subtitle = "Send your mic to the host's virtual microphone", diff --git a/clients/android/kit/src/main/kotlin/io/unom/punktfunk/kit/NativeBridge.kt b/clients/android/kit/src/main/kotlin/io/unom/punktfunk/kit/NativeBridge.kt index 6be7552..898f3de 100644 --- a/clients/android/kit/src/main/kotlin/io/unom/punktfunk/kit/NativeBridge.kt +++ b/clients/android/kit/src/main/kotlin/io/unom/punktfunk/kit/NativeBridge.kt @@ -45,6 +45,7 @@ object NativeBridge { compositorPref: Int, gamepadPref: Int, hdrEnabled: Boolean, + audioChannels: Int, ): Long /** 64-hex SHA-256 of the cert the host presented on [handle]; valid after a successful connect. */ diff --git a/clients/android/native/src/audio.rs b/clients/android/native/src/audio.rs index ed7f76b..d70b942 100644 --- a/clients/android/native/src/audio.rs +++ b/clients/android/native/src/audio.rs @@ -1,7 +1,11 @@ //! Android audio playback (android-only): pull Opus packets from the connector, decode to -//! interleaved f32 stereo, and feed AAudio (LowLatency) via its realtime data callback through a -//! jitter ring. Mirrors [`crate::decode`]: one thread we own (the Opus decode producer) plus a -//! shutdown flag; the realtime callback thread is owned by AAudio. +//! interleaved f32 (stereo or 5.1/7.1 surround), and feed AAudio (LowLatency) via its realtime data +//! callback through a jitter ring. Mirrors [`crate::decode`]: one thread we own (the Opus decode +//! producer) plus a shutdown flag; the realtime callback thread is owned by AAudio. +//! +//! The layout is the host-RESOLVED channel count (`NativeClient::audio_channels`, negotiated at +//! connect), so an older/clamping host that can only capture stereo is decoded + played as stereo. +//! 2 = stereo / 6 = 5.1 / 8 = 7.1, in the canonical wire order FL FR FC LFE RL RR SL SR. //! //! The ring started as a port of `punktfunk-client-linux/src/audio.rs`, but AAudio — unlike //! PipeWire, which adaptively rate-matches the stream and absorbs a shallow buffer — hands us a raw @@ -26,36 +30,72 @@ use std::sync::mpsc::{sync_channel, Receiver, SyncSender, TrySendError}; use std::sync::Arc; use std::time::Duration; -const CHANNELS: usize = 2; const SAMPLE_RATE: i32 = 48_000; /// Decoded-chunk hand-off depth: 64 × 5 ms = 320 ms slack (matches the core's AUDIO_QUEUE). const RING_CHUNKS: usize = 64; -/// Opus decode scratch: worst-case 120 ms stereo frame (5760 samples/ch × 2 ch). -const PCM_SCRATCH: usize = 5760 * CHANNELS; -// --- Jitter-ring depths, in interleaved-f32 samples (all expressed in ms via `MS`). ----------- +// --- Jitter-ring depths, in MILLISECONDS (scaled to interleaved-f32 samples at runtime). -------- +// The channel count is negotiated, not a compile-time const, so these are kept in ms and multiplied +// by `ms` (interleaved-f32 samples per millisecond at the resolved layout) inside `start`. // Unlike the Linux client (PipeWire adaptively rate-matches the stream to the graph clock, masking // host↔DAC drift + a shallow ring), AAudio hands us a raw callback and we own the buffer: drift and // WiFi power-save bunching land as underruns/overflows = crackle. So Android runs a deliberately // deeper, smoothly-managed ring than Linux — keep the two clients' depths intentionally divergent. -/// Interleaved f32 samples per millisecond (48 kHz × 2 ch). -const MS: usize = (SAMPLE_RATE as usize / 1000) * CHANNELS; // 96 /// Prime/target floor: fill to ~40 ms before playing (and after a sustained drain). Deep enough to /// ride out WiFi arrival jitter + clock drift; the dominant Android-only anti-crackle lever. -const PRIME_FLOOR: usize = 40 * MS; +const PRIME_FLOOR_MS: usize = 40; /// Ceiling for the burst-scaled target (so a large quantum can't push the prime depth too high). -const PRIME_CEIL: usize = 80 * MS; +const PRIME_CEIL_MS: usize = 80; /// Drop-oldest headroom above the target before trimming — a ~80 ms band swallows an arrival burst /// without overflowing. -const JITTER_HEADROOM: usize = 80 * MS; +const JITTER_HEADROOM_MS: usize = 80; /// Hard latency bound: never let the ring exceed ~150 ms (the only thing that caps added latency). -const HARD_CAP: usize = 150 * MS; +const HARD_CAP_MS: usize = 150; /// Re-prime (go silent to refill) only after this many CONSECUTIVE empty callbacks, so one transient /// drain doesn't manufacture a fresh 40 ms silence (the old `if ring.is_empty()` re-primed instantly). const DEPRIME_AFTER_CALLBACKS: u32 = 5; /// Throttle the AAudio XRun-driven HW-buffer grow check (cheap, but no need to poll every quantum). const XRUN_CHECK_EVERY: u32 = 128; +/// Opus decoder for the audio plane: a plain stereo decoder (the validated path) or a multistream +/// decoder for 5.1/7.1, both behind one `decode_float`. Built from the host-RESOLVED channel count +/// via the shared layout table. Mirrors the Linux client's `AudioDec`. +enum AudioDec { + Stereo(opus::Decoder), + Surround(opus::MSDecoder), +} + +impl AudioDec { + fn new(channels: u8) -> Result { + if channels == 2 { + Ok(AudioDec::Stereo(opus::Decoder::new( + SAMPLE_RATE as u32, + opus::Channels::Stereo, + )?)) + } else { + let l = punktfunk_core::audio::layout_for(channels, false); + Ok(AudioDec::Surround(opus::MSDecoder::new( + SAMPLE_RATE as u32, + l.streams, + l.coupled, + l.mapping, + )?)) + } + } + + fn decode_float( + &mut self, + input: &[u8], + out: &mut [f32], + fec: bool, + ) -> Result { + match self { + AudioDec::Stereo(d) => d.decode_float(input, out, fec), + AudioDec::Surround(d) => d.decode_float(input, out, fec), + } + } +} + /// Diagnostics — written by the decode thread + the realtime callback, logged periodically. The /// audio analogue of the video `fed`/`rendered` counters (we can't "screenshot" sound). #[derive(Default)] @@ -74,9 +114,20 @@ pub struct AudioPlayback { } impl AudioPlayback { - /// Open AAudio (LowLatency, 48 kHz/stereo/f32) with a realtime callback draining a jitter ring, - /// then spawn the Opus decode thread. `None` on failure (the caller leaves video streaming). + /// Open AAudio (LowLatency, 48 kHz/f32, the host-resolved channel layout) with a realtime + /// callback draining a jitter ring, then spawn the Opus decode thread. `None` on failure (the + /// caller leaves video streaming). pub fn start(client: Arc) -> Option { + // Build playback from the host-RESOLVED channel count (never the request): 2 = stereo / + // 6 = 5.1 / 8 = 7.1, canonical wire order FL FR FC LFE RL RR SL SR. + let channels = punktfunk_core::audio::normalize_channels(client.audio_channels) as usize; + // Interleaved f32 samples per millisecond at this layout (48 kHz × channels); the ms- + // denominated jitter-ring depths scale by it. + let ms = (SAMPLE_RATE as usize / 1000) * channels; + let prime_floor = PRIME_FLOOR_MS * ms; + let prime_ceil = PRIME_CEIL_MS * ms; + let jitter_headroom = JITTER_HEADROOM_MS * ms; + let hard_cap_max = HARD_CAP_MS * ms; let counters = Arc::new(Counters::default()); let (tx, rx) = sync_channel::>(RING_CHUNKS); // Recycle free-list: drained PCM buffers go BACK to the decode thread to be refilled, so the @@ -92,13 +143,13 @@ impl AudioPlayback { // before the trim below = the hard cap plus one full channel of 5 ms (480-f32) frames — the // punktfunk protocol always sends 5 ms Opus frames (host `audio_thread`); a larger frame // would force a one-time realloc, asserted (not silently corrupted) in `decode_loop`. - let mut ring: VecDeque = VecDeque::with_capacity(HARD_CAP + RING_CHUNKS * 5 * MS); + let mut ring: VecDeque = VecDeque::with_capacity(hard_cap_max + RING_CHUNKS * 5 * ms); let mut primed = false; let mut empties: u32 = 0; // consecutive empty callbacks (de-prime hysteresis) let mut cb_count: u32 = 0; // callbacks since open (throttles the XRun grow check) let mut last_xrun: i32 = 0; // last AAudio XRun count we grew the buffer for let callback = move |s: &AudioStream, data: *mut c_void, num_frames: i32| { - let want = num_frames as usize * CHANNELS; + let want = num_frames as usize * channels; // SAFETY: AAudio provides `num_frames * channel_count` F32 slots at `data`. let out = unsafe { std::slice::from_raw_parts_mut(data as *mut f32, want) }; // Drain decoded chunks into the ring WITHOUT freeing on the RT thread: `drain(..)` empties @@ -108,11 +159,11 @@ impl AudioPlayback { ring.extend(chunk.drain(..)); let _ = free_tx.try_send(chunk); } - // Jitter buffer: prime to ~40 ms (PRIME_FLOOR) before playing and after a sustained drain; + // Jitter buffer: prime to ~40 ms (prime_floor) before playing and after a sustained drain; // drop-oldest only above a wide ~120 ms band. Decoupled from the AAudio burst `want` (tiny // on the LowLatency MMAP path) so the depth doesn't collapse to a single quantum. - let target = (3 * want).clamp(PRIME_FLOOR, PRIME_CEIL); - let hard_cap = (target + JITTER_HEADROOM).min(HARD_CAP); + let target = (3 * want).clamp(prime_floor, prime_ceil); + let hard_cap = (target + jitter_headroom).min(hard_cap_max); while ring.len() > hard_cap { ring.pop_front(); } @@ -166,7 +217,11 @@ impl AudioPlayback { .ok()? .direction(AudioDirection::Output) .sample_rate(SAMPLE_RATE) - .channel_count(CHANNELS as i32) + // The wire order (FL FR FC LFE RL RR SL SR) is the standard AAudio/Android channel + // order, so this is an IDENTITY mapping — no permute. AAudio infers the 5.1/7.1 mask + // from `channel_count` (the ndk crate's builder exposes no setChannelMask); the host + // captures + Opus-encodes in exactly this order. + .channel_count(channels as i32) .format(AudioFormat::PCM_Float) .performance_mode(AudioPerformanceMode::LowLatency) .sharing_mode(AudioSharingMode::Shared) @@ -206,7 +261,7 @@ impl AudioPlayback { let sd = shutdown.clone(); let join = std::thread::Builder::new() .name("pf-audio".into()) - .spawn(move || decode_loop(client, tx, free_rx, sd, counters)) + .spawn(move || decode_loop(client, tx, free_rx, sd, counters, channels)) .ok(); Some(AudioPlayback { @@ -236,29 +291,34 @@ fn decode_loop( free_rx: Receiver>, shutdown: Arc, counters: Arc, + channels: usize, ) { - let mut dec = match opus::Decoder::new(SAMPLE_RATE as u32, opus::Channels::Stereo) { + // Interleaved f32 samples per millisecond at this layout — the ring's 5 ms reserve check below. + let ms = (SAMPLE_RATE as usize / 1000) * channels; + // Opus decode scratch: worst-case 120 ms frame (5760 samples/ch) × channels. + let pcm_scratch = 5760 * channels; + let mut dec = match AudioDec::new(channels as u8) { Ok(d) => d, Err(e) => { log::error!("audio: opus decoder init: {e} — audio disabled"); return; } }; - let mut pcm = vec![0f32; PCM_SCRATCH]; + let mut pcm = vec![0f32; pcm_scratch]; let mut window_peak = 0f32; // loudest |sample| since the last log — tells a tone from silence while !shutdown.load(Ordering::Relaxed) { match client.next_audio(Duration::from_millis(5)) { Ok(pkt) => match dec.decode_float(&pkt.data, &mut pcm, false) { Ok(samples) => { - let n = samples * CHANNELS; + let n = samples * channels; for &s in &pcm[..n] { window_peak = window_peak.max(s.abs()); } - // The ring's pre-reservation in `start` assumes the protocol's 5 ms (≤480-f32) + // The ring's pre-reservation in `start` assumes the protocol's 5 ms (≤480-f32/ch) // frames; a larger frame would force a one-time realloc on the RT thread. Catch a // future host frame-size change here in debug, not as a silent audio glitch. debug_assert!( - n <= 5 * MS, + n <= 5 * ms, "audio frame {n} f32 exceeds the 5 ms ring reserve" ); let count = counters.opus_decoded.fetch_add(1, Ordering::Relaxed) + 1; @@ -266,7 +326,7 @@ fn decode_loop( // free-list is momentarily empty (startup / after a backpressure drop). let mut buf = free_rx .try_recv() - .unwrap_or_else(|_| Vec::with_capacity(PCM_SCRATCH)); + .unwrap_or_else(|_| Vec::with_capacity(pcm_scratch)); buf.clear(); buf.extend_from_slice(&pcm[..n]); match tx.try_send(buf) { diff --git a/clients/android/native/src/session.rs b/clients/android/native/src/session.rs index 28127e5..b77f9d3 100644 --- a/clients/android/native/src/session.rs +++ b/clients/android/native/src/session.rs @@ -140,10 +140,12 @@ pub extern "system" fn Java_io_unom_punktfunk_kit_NativeBridge_nativeGenerateIde } /// `NativeBridge.nativeConnect(host, port, w, h, hz, certPem, keyPem, pinHex, bitrateKbps, -/// compositorPref, gamepadPref): Long`. `certPem`/`keyPem` empty = anonymous, else presented as the -/// persistent identity. `pinHex` empty = TOFU (read `nativeHostFingerprint` after), else 64-hex -/// SHA-256 to pin the host (mismatch → 0). `bitrateKbps` 0 = host default. `compositorPref`/ -/// `gamepadPref` are `CompositorPref`/`GamepadPref` wire bytes (0 = Auto; unknown → Auto). +/// compositorPref, gamepadPref, hdrEnabled, audioChannels): Long`. `certPem`/`keyPem` empty = +/// anonymous, else presented as the persistent identity. `pinHex` empty = TOFU (read +/// `nativeHostFingerprint` after), else 64-hex SHA-256 to pin the host (mismatch → 0). `bitrateKbps` +/// 0 = host default. `compositorPref`/`gamepadPref` are `CompositorPref`/`GamepadPref` wire bytes +/// (0 = Auto; unknown → Auto). `audioChannels` is the requested surround layout (2/6/8; normalized, +/// anything else → stereo) — the host clamps it and the resolved count drives playback. /// Returns an opaque handle, or 0 on failure (logged). #[no_mangle] #[allow(clippy::too_many_arguments)] @@ -162,6 +164,7 @@ pub extern "system" fn Java_io_unom_punktfunk_kit_NativeBridge_nativeConnect<'lo compositor_pref: jint, gamepad_pref: jint, hdr_enabled: jboolean, + audio_channels: jint, ) -> jlong { let host: String = match env.get_string(&host) { Ok(s) => s.into(), @@ -213,6 +216,11 @@ pub extern "system" fn Java_io_unom_punktfunk_kit_NativeBridge_nativeConnect<'lo } else { 0 }, + // Requested surround layout (2 = stereo / 6 = 5.1 / 8 = 7.1). The host clamps to what it can + // capture and echoes the resolved count in `connector.audio_channels`, which drives the + // decoder + AAudio layout (read in `crate::audio::AudioPlayback::start`). Anything else + // normalizes to stereo here. + punktfunk_core::audio::normalize_channels(audio_channels.clamp(0, u8::MAX as jint) as u8), None, // launch: default app pin, // Some → Crypto on host-fp mismatch identity, // owned (cert, key) PEM, or None (anonymous) diff --git a/clients/apple/Sources/PunktfunkClient/ContentView.swift b/clients/apple/Sources/PunktfunkClient/ContentView.swift index 2ac8cb4..a3ef40a 100644 --- a/clients/apple/Sources/PunktfunkClient/ContentView.swift +++ b/clients/apple/Sources/PunktfunkClient/ContentView.swift @@ -25,6 +25,7 @@ struct ContentView: View { @AppStorage(DefaultsKey.compositor) private var compositor = 0 @AppStorage(DefaultsKey.gamepadType) private var gamepadType = 0 @AppStorage(DefaultsKey.bitrateKbps) private var bitrateKbps = 0 + @AppStorage(DefaultsKey.audioChannels) private var audioChannels = 2 @AppStorage(DefaultsKey.fullscreenWhileStreaming) private var fullscreenWhileStreaming = true @AppStorage(DefaultsKey.hudEnabled) private var hudEnabled = true @AppStorage(DefaultsKey.hudPlacement) private var hudPlacement = HUDPlacement.topTrailing.rawValue @@ -252,6 +253,7 @@ struct ContentView: View { setting: PunktfunkConnection.GamepadType( rawValue: UInt32(clamping: gamepadType)) ?? .auto), bitrateKbps: UInt32(clamping: bitrateKbps), + audioChannels: UInt8(clamping: audioChannels), launchID: launchID, allowTofu: host.pinnedSHA256 == nil) } @@ -351,6 +353,7 @@ struct ContentView: View { compositor: pref, gamepad: pad, bitrateKbps: bitrate, + audioChannels: UInt8(clamping: audioChannels), autoTrust: true) } } diff --git a/clients/apple/Sources/PunktfunkClient/SessionModel.swift b/clients/apple/Sources/PunktfunkClient/SessionModel.swift index 1949da5..f01f9a5 100644 --- a/clients/apple/Sources/PunktfunkClient/SessionModel.swift +++ b/clients/apple/Sources/PunktfunkClient/SessionModel.swift @@ -99,6 +99,7 @@ final class SessionModel: ObservableObject { compositor: PunktfunkConnection.Compositor = .auto, gamepad: PunktfunkConnection.GamepadType = .auto, bitrateKbps: UInt32 = 0, + audioChannels: UInt8 = 2, hdrEnabled: Bool = true, launchID: String? = nil, allowTofu: Bool = false, @@ -137,7 +138,7 @@ final class SessionModel: ObservableObject { width: width, height: height, refreshHz: hz, pinSHA256: pin, identity: identity, compositor: compositor, gamepad: gamepad, bitrateKbps: bitrateKbps, videoCaps: videoCaps, - launchID: launchID) } + audioChannels: audioChannels, launchID: launchID) } await MainActor.run { [weak self] in guard let self else { return } // The user may have abandoned this attempt (window closed, another host diff --git a/clients/apple/Sources/PunktfunkClient/SettingsView.swift b/clients/apple/Sources/PunktfunkClient/SettingsView.swift index 1197323..4292756 100644 --- a/clients/apple/Sources/PunktfunkClient/SettingsView.swift +++ b/clients/apple/Sources/PunktfunkClient/SettingsView.swift @@ -25,6 +25,7 @@ struct SettingsView: View { @AppStorage(DefaultsKey.libraryEnabled) private var libraryEnabled = false @AppStorage(DefaultsKey.fullscreenWhileStreaming) private var fullscreenWhileStreaming = true @AppStorage(DefaultsKey.micEnabled) private var micEnabled = true + @AppStorage(DefaultsKey.audioChannels) private var audioChannels = 2 @AppStorage(DefaultsKey.hudEnabled) private var hudEnabled = true @AppStorage(DefaultsKey.hudPlacement) private var hudPlacement = HUDPlacement.topTrailing.rawValue @ObservedObject private var gamepads = GamepadManager.shared @@ -173,6 +174,10 @@ struct SettingsView: View { TVSelectionRow(title: "Stream mode", options: options, selection: modeTag) TVSelectionRow( title: "Bitrate", options: bitrateOptions, selection: $bitrateKbps) + TVSelectionRow( + title: "Audio channels", + options: [("Stereo", 2), ("5.1 Surround", 6), ("7.1 Surround", 8)], + selection: $audioChannels) if bitrateKbps > 1_000_000 { Label(Self.gigabitWarning, systemImage: "exclamationmark.triangle.fill") .font(.caption) @@ -271,6 +276,11 @@ struct SettingsView: View { @ViewBuilder private var audioSection: some View { Section { + Picker("Audio channels", selection: $audioChannels) { + Text("Stereo").tag(2) + Text("5.1 Surround").tag(6) + Text("7.1 Surround").tag(8) + } #if os(macOS) Picker("Speaker", selection: $speakerUID) { Text("System default").tag("") diff --git a/clients/apple/Sources/PunktfunkKit/DefaultsKeys.swift b/clients/apple/Sources/PunktfunkKit/DefaultsKeys.swift index 88b5733..1b9c281 100644 --- a/clients/apple/Sources/PunktfunkKit/DefaultsKeys.swift +++ b/clients/apple/Sources/PunktfunkKit/DefaultsKeys.swift @@ -15,6 +15,9 @@ public enum DefaultsKey { public static let gamepadType = "punktfunk.gamepadType" public static let gamepadID = "punktfunk.gamepadID" public static let bitrateKbps = "punktfunk.bitrateKbps" + /// Requested audio channel count: 2 (stereo), 6 (5.1) or 8 (7.1). The host clamps to what it + /// can capture; the resolved count drives the in-core decode + AVAudioEngine layout. + public static let audioChannels = "punktfunk.audioChannels" public static let micEnabled = "punktfunk.micEnabled" public static let speakerUID = "punktfunk.speakerUID" public static let micUID = "punktfunk.micUID" diff --git a/clients/apple/Sources/PunktfunkKit/PunktfunkConnection.swift b/clients/apple/Sources/PunktfunkKit/PunktfunkConnection.swift index e44c01d..fa6b718 100644 --- a/clients/apple/Sources/PunktfunkKit/PunktfunkConnection.swift +++ b/clients/apple/Sources/PunktfunkKit/PunktfunkConnection.swift @@ -235,6 +235,12 @@ public final class PunktfunkConnection { /// drain `nextHdrMeta`. public var isHDR: Bool { colorTransfer == 16 || colorTransfer == 18 } + /// The audio channel count the host resolved for this session (the Welcome's echo of the + /// requested `audioChannels`, clamped to what the host can capture): `2` (stereo), `6` (5.1) + /// or `8` (7.1). Build the playback layout from THIS, never the request. `2` for an older host. + /// PCM from `nextAudioPcm` is interleaved in the canonical wire order FL FR FC LFE RL RR SL SR. + public private(set) var resolvedAudioChannels: UInt8 = 2 + /// Connect and start a session at the requested mode (the host creates a native virtual /// output at exactly this size/refresh). Blocks up to `timeoutMs`. /// @@ -264,6 +270,7 @@ public final class PunktfunkConnection { gamepad: GamepadType = .auto, bitrateKbps: UInt32 = 0, videoCaps: UInt8 = 0, + audioChannels: UInt8 = 2, launchID: String? = nil, timeoutMs: UInt32 = 10_000 ) throws { @@ -279,16 +286,16 @@ public final class PunktfunkConnection { withOptionalCString(launchID) { launch in if let pin = pinSHA256 { return pin.withUnsafeBytes { p in - punktfunk_connect_ex5( + punktfunk_connect_ex6( cs, port, width, height, refreshHz, compositor.rawValue, - gamepad.rawValue, bitrateKbps, videoCaps, launch, + gamepad.rawValue, bitrateKbps, videoCaps, audioChannels, launch, p.bindMemory(to: UInt8.self).baseAddress, &observed, cert, key, timeoutMs) } } - return punktfunk_connect_ex5( + return punktfunk_connect_ex6( cs, port, width, height, refreshHz, compositor.rawValue, - gamepad.rawValue, bitrateKbps, videoCaps, launch, + gamepad.rawValue, bitrateKbps, videoCaps, audioChannels, launch, nil, &observed, cert, key, timeoutMs) } } @@ -320,6 +327,9 @@ public final class PunktfunkConnection { colorMatrix = mtx colorFullRange = fullRange != 0 bitDepth = depth + var ac: UInt8 = 2 + _ = punktfunk_connection_audio_channels(handle, &ac) + resolvedAudioChannels = ac } /// A bandwidth speed-test measurement (see `startSpeedTest`). Partial until `done`. @@ -468,6 +478,50 @@ public final class PunktfunkConnection { } } + /// One decoded audio frame from `nextAudioPcm`: interleaved 32-bit float at 48 kHz, in the + /// canonical wire channel order FL FR FC LFE RL RR SL SR (the first `channels`). + public struct AudioPCM: Sendable { + /// Interleaved f32 samples (`frameCount * channels` long), wire channel order. + public let samples: [Float] + /// Samples per channel. + public let frameCount: Int + /// Channel count (2/6/8) — `resolvedAudioChannels`. + public let channels: Int + public let ptsNs: UInt64 + public let seq: UInt32 + } + + /// Pull the next audio frame, **decoded in-core** to interleaved f32 PCM — Apple's AudioToolbox + /// Opus path is stereo-only, so surround (and, for uniformity, stereo too) is decoded by the + /// Rust core (libopus multistream) and handed back as PCM. nil on timeout, throws `.closed` once + /// the session ended. Drain from a dedicated audio thread (do NOT also call `nextAudio` — they + /// share the underlying queue). The returned `samples` are copied out, so the buffer is owned. + public func nextAudioPcm(timeoutMs: UInt32 = 100) throws -> AudioPCM? { + audioLock.lock() + defer { audioLock.unlock() } + guard let h = liveHandle() else { throw PunktfunkClientError.closed } + + var out = PunktfunkAudioPcm() + let rc = punktfunk_connection_next_audio_pcm(h, &out, timeoutMs) + switch rc { + case statusOK: + let channels = Int(out.channels) + let total = Int(out.frame_count) * channels + guard let base = out.samples, total > 0 else { return nil } + // Copy: the pointer borrows connection memory only until the next PCM call. + let samples = Array(UnsafeBufferPointer(start: base, count: total)) + return AudioPCM( + samples: samples, frameCount: Int(out.frame_count), + channels: channels, ptsNs: out.pts_ns, seq: out.seq) + case statusNoFrame: + return nil + case statusClosed: + throw PunktfunkClientError.closed + default: + throw PunktfunkClientError.status(rc) + } + } + /// Pull the next force-feedback update for the GCController haptics engine: /// `(pad, lowFrequency, highFrequency)` with 0...0xFFFF amplitudes, (0, 0) = stop. /// Drain from the (single) feedback thread, alongside `nextHidOutput`. diff --git a/clients/apple/Sources/PunktfunkKit/SessionAudio.swift b/clients/apple/Sources/PunktfunkKit/SessionAudio.swift index ad254d4..7e10459 100644 --- a/clients/apple/Sources/PunktfunkKit/SessionAudio.swift +++ b/clients/apple/Sources/PunktfunkKit/SessionAudio.swift @@ -19,13 +19,13 @@ import os private let log = Logger(subsystem: "io.unom.punktfunk", category: "audio") -/// SPSC-ish jitter ring (interleaved stereo float), drain thread → render callback. -/// The unfair lock is held for microseconds; fine at render-callback rates. Priming: +/// SPSC-ish jitter ring (interleaved float, `channels` per frame), drain thread → render +/// callback. The unfair lock is held for microseconds; fine at render-callback rates. Priming: /// reads return silence until enough is buffered (at least `prefill`, and at least one /// packet more than the device's render quantum — large-buffer devices would otherwise /// chronically out-demand the prefill and oscillate prime → dropout → re-prime), and an /// underrun re-primes, concealing jitter as one short dip instead of sustained crackle. -/// All counts stay even (whole stereo frames), so L/R interleave can never flip. +/// All counts stay whole frames (multiples of `channels`), so the interleave can never slip. final class AudioRing: @unchecked Sendable { private var buf: [Float] private var readIdx = 0 @@ -34,12 +34,14 @@ final class AudioRing: @unchecked Sendable { private var renderQuantum = 0 private let prefill: Int private let highWater: Int + private let channels: Int private let lock = OSAllocatedUnfairLock() - /// `capacity`/`prefill` in samples (interleaved — 2 per frame, both must be even). - init(capacity: Int, prefill: Int) { + /// `capacity`/`prefill` in samples (interleaved — `channels` per frame, both whole frames). + init(capacity: Int, prefill: Int, channels: Int) { buf = [Float](repeating: 0, count: capacity) self.prefill = prefill + self.channels = channels highWater = prefill * 4 } @@ -74,8 +76,8 @@ final class AudioRing: @unchecked Sendable { renderQuantum = max(renderQuantum, count) let available = writeIdx - readIdx if !primed { - // 480 samples = one 5 ms host packet of slack beyond the device's demand. - if available >= max(prefill, renderQuantum + 480) { + // One 5 ms host packet (240 frames × channels) of slack beyond the device's demand. + if available >= max(prefill, renderQuantum + 240 * channels) { primed = true } else { for i in 0...allocate(capacity: 8192 * 2) + // 8192 frames × up to 8 channels (7.1) — the render block caps `frames` at 8192. + let ptr = UnsafeMutablePointer.allocate(capacity: 8192 * 8) deinit { ptr.deallocate() } } +/// CoreAudio channel layout for the canonical wire order FL FR FC LFE RL RR [SL SR]. nil for +/// stereo (the standard layout is correct). For 5.1/7.1 we list explicit channel labels via +/// `kAudioChannelLayoutTag_UseChannelDescriptions` — preset tags (DTS_5_1 etc.) don't reliably +/// match Moonlight's order. NB the 7.1 mapping (verified against the WASAPI 0x63F + SPA orderings): +/// wire idx 4-5 = RL/RR = the WAVE *back* pair → LeftSurround/RightSurround; idx 6-7 = SL/SR = the +/// WAVE *side* pair → LeftSurroundDirect/RightSurroundDirect. (Using RearSurround* for 6-7 would +/// swap side/back vs the Windows/Linux clients.) +private func wireChannelLayout(channels: Int) -> AVAudioChannelLayout? { + let labels: [AudioChannelLabel] + switch channels { + case 6: + labels = [ + kAudioChannelLabel_Left, kAudioChannelLabel_Right, kAudioChannelLabel_Center, + kAudioChannelLabel_LFEScreen, kAudioChannelLabel_LeftSurround, + kAudioChannelLabel_RightSurround, + ] + case 8: + labels = [ + kAudioChannelLabel_Left, kAudioChannelLabel_Right, kAudioChannelLabel_Center, + kAudioChannelLabel_LFEScreen, + kAudioChannelLabel_LeftSurround, kAudioChannelLabel_RightSurround, // wire RL/RR (back) + kAudioChannelLabel_LeftSurroundDirect, kAudioChannelLabel_RightSurroundDirect, // wire SL/SR (side) + ] + default: + return nil + } + let size = MemoryLayout.size + + (labels.count - 1) * MemoryLayout.stride + let raw = UnsafeMutableRawPointer.allocate(byteCount: size, alignment: 16) + defer { raw.deallocate() } + let layout = raw.bindMemory(to: AudioChannelLayout.self, capacity: 1) + layout.pointee.mChannelLayoutTag = kAudioChannelLayoutTag_UseChannelDescriptions + layout.pointee.mChannelBitmap = AudioChannelBitmap(rawValue: 0) + layout.pointee.mNumberChannelDescriptions = UInt32(labels.count) + let descs = UnsafeMutableBufferPointer( + start: &layout.pointee.mChannelDescriptions, count: labels.count) + for (i, lbl) in labels.enumerated() { + descs[i] = AudioChannelDescription( + mChannelLabel: lbl, mChannelFlags: AudioChannelFlags(rawValue: 0), + mCoordinates: (0, 0, 0)) + } + return AVAudioChannelLayout(layout: layout) +} + public final class SessionAudio { private let connection: PunktfunkConnection private let flag = StopFlag() @@ -229,9 +276,13 @@ public final class SessionAudio { // MARK: - Playback (host → speaker) private func startPlayback(speakerUID: String) { - // 1 s of interleaved stereo capacity, ~20 ms prefill: four 5 ms host packets of - // jitter absorption before the first sample plays. - let ring = AudioRing(capacity: 96_000, prefill: 1920) + // Build the playback layout from the host-RESOLVED channel count (never the request): + // 2 = stereo / 6 = 5.1 / 8 = 7.1, canonical wire order FL FR FC LFE RL RR SL SR. + let channels = Int(connection.resolvedAudioChannels) + // 1 s interleaved capacity, ~20 ms prefill (four 5 ms host packets of jitter absorption + // before the first sample plays), both scaled by the channel count. + let ring = AudioRing( + capacity: 48_000 * channels, prefill: 960 * channels, channels: channels) let engine = AVAudioEngine() #if os(macOS) @@ -247,21 +298,32 @@ public final class SessionAudio { } #endif - // Engine-native deinterleaved float; the render block deinterleaves from the ring. - guard let format = AVAudioFormat(standardFormatWithSampleRate: 48_000, channels: 2) - else { return } + // Engine-native deinterleaved float; the render block deinterleaves from the ring. Surround + // uses an explicit wire-order channel layout; the mixer downmixes to the output device when + // it has fewer speakers (e.g. an iPhone's stereo built-ins). (Explicit if/else rather than + // map/flatMap so it's correct whether the channelLayout initializer is failable or not.) + var format: AVAudioFormat? + if channels == 2 { + format = AVAudioFormat(standardFormatWithSampleRate: 48_000, channels: 2) + } else if let layout = wireChannelLayout(channels: channels) { + format = AVAudioFormat(standardFormatWithSampleRate: 48_000, channelLayout: layout) + } + guard let format else { + log.error("could not build \(channels)-channel audio format — audio disabled") + return + } let scratch = ScratchBuffer() // block-owned; freed with the closure let source = AVAudioSourceNode(format: format) { _, _, frameCount, abl -> OSStatus in let frames = Int(frameCount) guard frames <= 8192 else { return kAudioUnitErr_TooManyFramesToProcess } - ring.read(into: scratch.ptr, count: frames * 2) + ring.read(into: scratch.ptr, count: frames * channels) let buffers = UnsafeMutableAudioBufferListPointer(abl) - if buffers.count >= 2, - let left = buffers[0].mData?.assumingMemoryBound(to: Float.self), - let right = buffers[1].mData?.assumingMemoryBound(to: Float.self) { - for f in 0..= channels { + for ch in 0.. 0, let p = pcm.floatChannelData?[0] { - ring.write(p, count: Int(frames) * 2) + guard let pcm, pcm.frameCount > 0 else { continue } + pcm.samples.withUnsafeBufferPointer { p in + if let base = p.baseAddress { + ring.write(base, count: pcm.frameCount * pcm.channels) } - } catch { - // One corrupt packet ≠ a dead stream; skip it. - log.warning("audio decode failed: \(error.localizedDescription)") } } } diff --git a/clients/linux/src/app.rs b/clients/linux/src/app.rs index 349b53a..a3277f9 100644 --- a/clients/linux/src/app.rs +++ b/clients/linux/src/app.rs @@ -452,6 +452,7 @@ fn speed_test(app: Rc, req: ConnectRequest) { GamepadPref::Auto, 0, // bitrate_kbps (host default) 0, // video_caps: the Linux client has no 10-bit/HDR present path yet + 2, // audio_channels: speed-test probe, stereo None, // launch: speed-test probe connect, no game pin, Some(identity), @@ -573,6 +574,7 @@ fn start_session(app: Rc, req: ConnectRequest, pin: Option<[u8; 32]>) { }, bitrate_kbps: s.bitrate_kbps, mic_enabled: s.mic_enabled, + audio_channels: s.audio_channels, pin, identity: app.identity.clone(), }; diff --git a/clients/linux/src/audio.rs b/clients/linux/src/audio.rs index f1bce4b..e22a8ca 100644 --- a/clients/linux/src/audio.rs +++ b/clients/linux/src/audio.rs @@ -27,16 +27,17 @@ pub struct AudioPlayer { } impl AudioPlayer { - /// Spawn the PipeWire playback thread. Failure (no PipeWire in the session) is - /// survivable — the caller streams video-only. - pub fn spawn() -> Result { + /// Spawn the PipeWire playback thread for `channels` (2/6/8, canonical wire order + /// FL FR FC LFE RL RR SL SR). Failure (no PipeWire in the session) is survivable — the + /// caller streams video-only. + pub fn spawn(channels: u32) -> Result { // 64 × 5 ms = 320 ms of slack between the pump and the PipeWire loop. let (pcm_tx, pcm_rx) = std::sync::mpsc::sync_channel::>(64); let (quit_tx, quit_rx) = pipewire::channel::channel::(); let thread = std::thread::Builder::new() .name("punktfunk-audio".into()) .spawn(move || { - if let Err(e) = pw_thread(pcm_rx, quit_rx) { + if let Err(e) = pw_thread(pcm_rx, quit_rx, channels as usize) { tracing::warn!(error = %e, "audio playback thread ended"); } }) @@ -48,8 +49,8 @@ impl AudioPlayer { }) } - /// Queue one interleaved-stereo f32 chunk. Drops the chunk if the PipeWire side is - /// wedged (the renderer conceals the gap; never block the session pump). + /// Queue one interleaved f32 chunk (in the session's channel layout). Drops the chunk if the + /// PipeWire side is wedged (the renderer conceals the gap; never block the session pump). pub fn push(&self, pcm: Vec) { if let Err(TrySendError::Disconnected(_)) = self.pcm_tx.try_send(pcm) { // Thread already dead — Drop will reap it; nothing to do per-chunk. @@ -71,11 +72,14 @@ struct PlayerData { rx: Receiver>, ring: VecDeque, primed: bool, + /// Interleaved channel count this stream was opened with (2/6/8). + channels: usize, } fn pw_thread( pcm_rx: Receiver>, quit_rx: pipewire::channel::Receiver, + channels: usize, ) -> Result<()> { use pipewire as pw; use pw::{properties::properties, spa}; @@ -115,6 +119,7 @@ fn pw_thread( rx: pcm_rx, ring: VecDeque::new(), primed: false, + channels, }; let _listener = stream @@ -130,19 +135,19 @@ fn pw_thread( while let Ok(chunk) = ud.rx.try_recv() { ud.ring.extend(chunk); } - let stride = 4 * CHANNELS; // F32LE interleaved + let stride = 4 * ud.channels; // F32LE interleaved let datas = buffer.datas_mut(); if datas.is_empty() { return; } let data = &mut datas[0]; let want_frames = data.data().map(|s| s.len() / stride).unwrap_or(0); - let want = want_frames * CHANNELS; + let want = want_frames * ud.channels; // Adaptive jitter buffer (same shape as the host's virtual mic): prime to // ~3 quanta, cap at ~1 quantum of slack beyond that, re-prime after a // genuine drain. - let target = (3 * want).clamp(720 * CHANNELS, 9600 * CHANNELS); + let target = (3 * want).clamp(720 * ud.channels, 9600 * ud.channels); while ud.ring.len() > target.max(want) + want { ud.ring.pop_front(); } @@ -182,7 +187,13 @@ fn pw_thread( let mut info = AudioInfoRaw::new(); info.set_format(AudioFormat::F32LE); info.set_rate(SAMPLE_RATE); - info.set_channels(CHANNELS as u32); + info.set_channels(channels as u32); + // Channel positions in canonical wire order (FL FR FC LFE RL RR SL SR) so PipeWire routes each + // slot to the matching speaker (and downmixes when the sink has fewer). Identity, no permute. + let order = punktfunk_core::audio::spa_positions(channels as u8); + let mut positions = [0u32; 64]; + positions[..order.len()].copy_from_slice(order); + info.set_position(positions); let obj = pw::spa::pod::Object { type_: pw::spa::utils::SpaTypes::ObjectParamFormat.as_raw(), id: pw::spa::param::ParamType::EnumFormat.as_raw(), diff --git a/clients/linux/src/session.rs b/clients/linux/src/session.rs index e620ae5..a32eee7 100644 --- a/clients/linux/src/session.rs +++ b/clients/linux/src/session.rs @@ -20,6 +20,8 @@ pub struct SessionParams { pub compositor: CompositorPref, pub gamepad: GamepadPref, pub bitrate_kbps: u32, + /// Requested audio channel count (2/6/8); the host echoes the resolved value. + pub audio_channels: u8, /// Stream the default microphone to the host's virtual mic source. pub mic_enabled: bool, /// Pinned host fingerprint; `None` = trust on first use (caller persists the observed one). @@ -83,6 +85,42 @@ fn now_ns() -> u64 { .unwrap_or(0) } +/// Opus decoder for the audio plane: a plain stereo decoder (the validated path) or a multistream +/// decoder for 5.1/7.1, both behind one `decode_float`. Built from the host-RESOLVED channel count +/// via the shared layout table. +enum AudioDec { + Stereo(opus::Decoder), + Surround(opus::MSDecoder), +} + +impl AudioDec { + fn new(channels: u8) -> Result { + if channels == 2 { + Ok(AudioDec::Stereo(opus::Decoder::new( + 48_000, + opus::Channels::Stereo, + )?)) + } else { + let l = punktfunk_core::audio::layout_for(channels, false); + Ok(AudioDec::Surround(opus::MSDecoder::new( + 48_000, l.streams, l.coupled, l.mapping, + )?)) + } + } + + fn decode_float( + &mut self, + input: &[u8], + out: &mut [f32], + fec: bool, + ) -> Result { + match self { + AudioDec::Stereo(d) => d.decode_float(input, out, fec), + AudioDec::Surround(d) => d.decode_float(input, out, fec), + } + } +} + fn pump( params: SessionParams, ev_tx: async_channel::Sender, @@ -96,7 +134,8 @@ fn pump( params.compositor, params.gamepad, params.bitrate_kbps, - 0, // video_caps: the Linux client has no 10-bit/HDR present path yet + 0, // video_caps: the Linux client has no 10-bit/HDR present path yet + params.audio_channels, None, // launch: the Linux client has no library picker yet params.pin, Some(params.identity), @@ -134,11 +173,14 @@ fn pump( } }; // Audio is best-effort: a session without it still streams. Gamepads are the - // app-lifetime service's job (the UI attaches it on Connected). - let player = audio::AudioPlayer::spawn() + // app-lifetime service's job (the UI attaches it on Connected). Build the decoder + playback + // from the host-RESOLVED channel count (never the request), so an older/clamping host that + // resolves stereo is decoded as stereo. + let channels = connector.audio_channels; + let player = audio::AudioPlayer::spawn(channels as u32) .map_err(|e| tracing::warn!(error = %e, "audio disabled")) .ok(); - let mut opus_dec = opus::Decoder::new(48_000, opus::Channels::Stereo) + let mut opus_dec = AudioDec::new(channels) .map_err(|e| tracing::warn!(error = %e, "opus decoder failed — audio disabled")) .ok(); let _mic = params @@ -157,8 +199,8 @@ fn pump( let mut bytes_n = 0u64; let mut decode_us_sum = 0u64; let mut lat_us: Vec = Vec::with_capacity(256); - let mut pcm = vec![0f32; 5760 * 2]; // decode scratch: max Opus frame (120 ms stereo) - // Loss recovery: watch the host→client unrecoverable-drop count and ask for an IDR when it climbs. + let mut pcm = vec![0f32; 5760 * channels as usize]; // scratch: max Opus frame (120 ms) × channels + // Loss recovery: watch the host→client unrecoverable-drop count and ask for an IDR when it climbs. let mut last_dropped = connector.frames_dropped(); let mut last_kf_req: Option = None; @@ -221,7 +263,8 @@ fn pump( while let Ok(pkt) = connector.next_audio(Duration::ZERO) { if let (Some(player), Some(dec)) = (&player, opus_dec.as_mut()) { match dec.decode_float(&pkt.data, &mut pcm, false) { - Ok(samples) => player.push(pcm[..samples * 2].to_vec()), + // `samples` is per-channel; the interleaved frame is `samples * channels`. + Ok(samples) => player.push(pcm[..samples * channels as usize].to_vec()), Err(e) => tracing::debug!(error = %e, "opus decode"), } } diff --git a/clients/linux/src/trust.rs b/clients/linux/src/trust.rs index 87eb0bb..f95cbac 100644 --- a/clients/linux/src/trust.rs +++ b/clients/linux/src/trust.rs @@ -132,6 +132,9 @@ pub struct Settings { pub inhibit_shortcuts: bool, /// Stream the default microphone to the host's virtual mic source. pub mic_enabled: bool, + /// Requested audio channel count: 2 (stereo), 6 (5.1) or 8 (7.1). The host clamps to what it + /// can capture; the resolved count drives the decoder + playback layout. + pub audio_channels: u8, } impl Default for Settings { @@ -145,6 +148,7 @@ impl Default for Settings { compositor: "auto".into(), inhibit_shortcuts: true, mic_enabled: false, + audio_channels: 2, } } } diff --git a/clients/linux/src/ui_settings.rs b/clients/linux/src/ui_settings.rs index 187e752..bc0974c 100644 --- a/clients/linux/src/ui_settings.rs +++ b/clients/linux/src/ui_settings.rs @@ -140,6 +140,16 @@ pub fn show( input.add(&inhibit_row); let audio = adw::PreferencesGroup::builder().title("Audio").build(); + let surround_row = adw::ComboRow::builder() + .title("Audio channels") + .subtitle("Request stereo or surround (the host downmixes if its output has fewer)") + .model(>k::StringList::new(&[ + "Stereo", + "5.1 Surround", + "7.1 Surround", + ])) + .build(); + audio.add(&surround_row); let mic_row = adw::SwitchRow::builder() .title("Stream microphone") .subtitle("Send the default input device to the host's virtual microphone") @@ -170,6 +180,11 @@ pub fn show( compositor_row.set_selected(comp_i as u32); inhibit_row.set_active(s.inhibit_shortcuts); mic_row.set_active(s.mic_enabled); + surround_row.set_selected(match s.audio_channels { + 6 => 1, + 8 => 2, + _ => 0, + }); } let dialog = adw::PreferencesDialog::new(); @@ -186,6 +201,11 @@ pub fn show( .to_string(); s.inhibit_shortcuts = inhibit_row.is_active(); s.mic_enabled = mic_row.is_active(); + s.audio_channels = match surround_row.selected() { + 1 => 6, + 2 => 8, + _ => 2, + }; s.save(); }); dialog.present(Some(parent)); diff --git a/clients/probe/Cargo.toml b/clients/probe/Cargo.toml index 9c780ab..9c84589 100644 --- a/clients/probe/Cargo.toml +++ b/clients/probe/Cargo.toml @@ -18,8 +18,7 @@ tracing-subscriber = { version = "0.3", features = ["env-filter"] } # LAN host discovery (`--discover`): browse the native `_punktfunk._udp` mDNS service the host # advertises (same crate/version the host advertises with). mdns-sd = "0.20" - -# Linux-only: --mic-test's Opus encoder (libopus). The mic UPLINK itself is portable — -# only this synthetic-tone test rig needs the encoder. -[target.'cfg(target_os = "linux")'.dependencies] +# Opus: multistream DECODE of the host's audio plane (the surround validator) + `--mic-test`'s +# encoder. libopus is already in the graph via `punktfunk-core`'s quic feature; this exposes the +# name directly. Cross-platform (cmake-vendored), so the probe builds + validates everywhere. opus = "0.3" diff --git a/clients/probe/src/main.rs b/clients/probe/src/main.rs index aec9b02..cf1f7c7 100644 --- a/clients/probe/src/main.rs +++ b/clients/probe/src/main.rs @@ -78,6 +78,10 @@ struct Args { gamepad: GamepadPref, /// `--bitrate KBPS` — request this encoder bitrate (kilobits/s); 0 = host default. bitrate_kbps: u32, + /// `--audio-channels N` — request stereo (2), 5.1 (6) or 7.1 (8) audio; default 2. The probe + /// multistream-decodes the host's frames and asserts the per-channel sample count, so it's the + /// headless validator for the surround encode path. + audio_channels: u8, /// `--launch ID` — ask the host to launch a library title in this session (a store-qualified /// id from the host's `GET /api/v1/library`, e.g. `steam:570`). Host resolves it; `None` = none. launch: Option, @@ -201,6 +205,11 @@ fn parse_args() -> Args { compositor, gamepad, bitrate_kbps: get("--bitrate").and_then(|s| s.parse().ok()).unwrap_or(0), + audio_channels: punktfunk_core::audio::normalize_channels( + get("--audio-channels") + .and_then(|s| s.parse().ok()) + .unwrap_or(2), + ), launch: get("--launch").map(str::to_string), speed_test: get("--speed-test").and_then(|s| { let (kbps, ms) = s.split_once(':')?; @@ -385,13 +394,23 @@ async fn session(args: Args) -> Result<()> { // `--launch ID` — host resolves it against its own library and runs it this session. launch: args.launch.clone(), // This headless tool just dumps the bitstream (no decode), so it can always claim - // 10-bit support. Gated by env so latency runs stay on the 8-bit baseline: - // PUNKTFUNK_CLIENT_10BIT=1 advertises VIDEO_CAP_10BIT to exercise the host Main10 path. - video_caps: if std::env::var_os("PUNKTFUNK_CLIENT_10BIT").is_some() { - punktfunk_core::quic::VIDEO_CAP_10BIT - } else { - 0 + // 10-bit / 4:4:4 support. Gated by env so latency runs stay on the 8-bit 4:2:0 baseline: + // PUNKTFUNK_CLIENT_10BIT=1 advertises VIDEO_CAP_10BIT (host Main10 path); + // PUNKTFUNK_CLIENT_444=1 advertises VIDEO_CAP_444 (host HEVC 4:4:4 path) — verify the + // resulting chroma with `ffprobe` on the `--out` .h265. + video_caps: { + let mut caps = 0u8; + if std::env::var_os("PUNKTFUNK_CLIENT_10BIT").is_some() { + caps |= punktfunk_core::quic::VIDEO_CAP_10BIT; + } + if std::env::var_os("PUNKTFUNK_CLIENT_444").is_some() { + caps |= punktfunk_core::quic::VIDEO_CAP_444; + } + caps }, + // `--audio-channels` (default stereo); the probe multistream-decodes + validates the + // host's frames to exercise the surround encode path headlessly. + audio_channels: args.audio_channels, } .encode(), ) @@ -408,6 +427,8 @@ async fn session(args: Args) -> Result<()> { bit_depth = welcome.bit_depth, color = ?welcome.color, hdr = welcome.color.is_hdr(), + chroma_444 = welcome.chroma_format == punktfunk_core::quic::CHROMA_IDC_444, + chroma_format_idc = welcome.chroma_format, "session offer" ); @@ -830,13 +851,37 @@ async fn session(args: Args) -> Result<()> { hidout_pkts.clone(), ); let conn2 = conn.clone(); + // Build a multistream decoder for the host-RESOLVED layout so the probe actually decodes + // the surround stream (not just counts bytes) — the headless validator for the encode path. + let audio_channels = welcome.audio_channels; tokio::spawn(async move { use std::sync::atomic::Ordering::Relaxed; let mut hdr_logged = false; + let layout = punktfunk_core::audio::layout_for(audio_channels, false); + let mut audio_dec = + opus::MSDecoder::new(48_000, layout.streams, layout.coupled, layout.mapping).ok(); + let mut pcm = vec![0f32; 5760 * audio_channels as usize]; + let mut audio_decoded_logged = false; while let Ok(d) = conn2.read_datagram().await { if let Some((_, _, opus)) = punktfunk_core::quic::decode_audio_datagram(&d) { a.fetch_add(1, Relaxed); ab.fetch_add(opus.len() as u64, Relaxed); + // Decode + validate: the per-channel sample count must be a legal Opus frame + // size; log the first success so a loopback test can assert surround decoded. + if let Some(dec) = audio_dec.as_mut() { + match dec.decode_float(opus, &mut pcm, false) { + Ok(samples) if !audio_decoded_logged => { + audio_decoded_logged = true; + tracing::info!( + channels = audio_channels, + samples_per_channel = samples, + "audio decoded (Opus multistream)" + ); + } + Ok(_) => {} + Err(e) => tracing::debug!(error = %e, "probe audio decode"), + } + } } else if punktfunk_core::quic::decode_rumble_datagram(&d).is_some() { r.fetch_add(1, Relaxed); } else if let Some(meta) = punktfunk_core::quic::decode_hdr_meta_datagram(&d) { diff --git a/clients/windows/src/app.rs b/clients/windows/src/app.rs index de91713..6bdb855 100644 --- a/clients/windows/src/app.rs +++ b/clients/windows/src/app.rs @@ -39,6 +39,9 @@ const DECODERS: &[(&str, &str)] = &[ ]; /// Bitrate presets in Mb/s; `0` = host default. const BITRATES_MBPS: &[u32] = &[0, 10, 20, 30, 50, 80, 150]; +/// Audio channel presets: `(channel count, display label)`. The host clamps to what it can +/// capture; the resolved count drives the decoder + WASAPI render layout. +const AUDIO_CHANNELS: &[(u8, &str)] = &[(2, "Stereo"), (6, "5.1 Surround"), (8, "7.1 Surround")]; #[derive(Clone, PartialEq)] enum Screen { @@ -598,6 +601,7 @@ fn connect( compositor: CompositorPref::Auto, gamepad: gamepad_pref, bitrate_kbps: s.bitrate_kbps, + audio_channels: s.audio_channels, mic_enabled: s.mic_enabled, hdr_enabled: s.hdr_enabled, decoder: DecoderPref::from_name(&s.decoder), @@ -886,6 +890,23 @@ fn settings_page(ctx: &Arc, set_screen: &AsyncSetState) -> Eleme s.save(); }) }; + let ac_i = AUDIO_CHANNELS + .iter() + .position(|&(v, _)| v == s.audio_channels) + .unwrap_or(0) as i32; + let ac_names: Vec = AUDIO_CHANNELS.iter().map(|&(_, l)| l.to_string()).collect(); + let channels_combo = { + let ctx = ctx.clone(); + ComboBox::new(ac_names) + .header("Audio channels") + .selected_index(ac_i) + .on_selection_changed(move |i: i32| { + let (v, _) = AUDIO_CHANNELS[(i.max(0) as usize).min(AUDIO_CHANNELS.len() - 1)]; + let mut s = ctx.settings.lock().unwrap(); + s.audio_channels = v; + s.save(); + }) + }; let header = grid(( text_block("Settings") @@ -934,8 +955,17 @@ fn settings_page(ctx: &Arc, set_screen: &AsyncSetState) -> Eleme .spacing(10.0), ); - let audio_card = - card(vstack((text_block("Audio").font_size(15.0).semibold(), mic_toggle)).spacing(10.0)); + let audio_card = card( + vstack(( + text_block("Audio").font_size(15.0).semibold(), + text_block("Request stereo or surround — the host downmixes if its output has fewer.") + .font_size(12.0) + .foreground(ThemeRef::SecondaryText), + channels_combo, + mic_toggle, + )) + .spacing(10.0), + ); page(vec![ header.into(), diff --git a/clients/windows/src/audio.rs b/clients/windows/src/audio.rs index 07a7dc6..9eac674 100644 --- a/clients/windows/src/audio.rs +++ b/clients/windows/src/audio.rs @@ -21,9 +21,9 @@ use std::time::Duration; use wasapi::{DeviceEnumerator, Direction, SampleType, StreamMode, WaveFormat}; const SAMPLE_RATE: usize = 48_000; +/// The microphone uplink stays stereo (the host's virtual mic is stereo). The render path is +/// multichannel — its channel count + block align are runtime, driven by the host-resolved layout. const CHANNELS: usize = 2; -/// 48 kHz stereo f32: 2 channels * 4 bytes = 8 bytes per frame. -const BLOCK_ALIGN: usize = CHANNELS * 4; /// Mic frames are 20 ms (960 samples/channel) — any size ≤ 120 ms is fine host-side. const MIC_FRAME: usize = 960; @@ -34,9 +34,10 @@ pub struct AudioPlayer { } impl AudioPlayer { - /// Spawn the WASAPI render thread. Failure (no render endpoint on this box) is - /// survivable — the caller streams video-only. - pub fn spawn() -> Result { + /// Spawn the WASAPI render thread for `channels` (2/6/8, canonical wire order + /// FL FR FC LFE RL RR SL SR). Failure (no render endpoint on this box) is survivable — the + /// caller streams video-only. + pub fn spawn(channels: u8) -> Result { // 64 × 5 ms = 320 ms of slack between the pump and the WASAPI loop. let (pcm_tx, pcm_rx) = std::sync::mpsc::sync_channel::>(64); let stop = Arc::new(AtomicBool::new(false)); @@ -45,14 +46,14 @@ impl AudioPlayer { let thread = std::thread::Builder::new() .name("punktfunk-audio".into()) .spawn(move || { - if let Err(e) = render_thread(pcm_rx, stop_t, ready_tx) { + if let Err(e) = render_thread(pcm_rx, stop_t, ready_tx, channels) { tracing::warn!(error = format!("{e:#}"), "audio playback thread ended"); } }) .context("spawn audio thread")?; match ready_rx.recv_timeout(Duration::from_secs(3)) { Ok(Ok(())) => { - tracing::info!("WASAPI render: 48 kHz stereo f32 (default endpoint)"); + tracing::info!(channels, "WASAPI render: 48 kHz f32 (default endpoint)"); Ok(AudioPlayer { pcm_tx, stop, @@ -66,8 +67,8 @@ impl AudioPlayer { } } - /// Queue one interleaved-stereo f32 chunk. Drops the chunk if the WASAPI side is wedged - /// (the renderer conceals the gap; never block the session pump). + /// Queue one interleaved f32 chunk (in the session's channel layout). Drops the chunk if the + /// WASAPI side is wedged (the renderer conceals the gap; never block the session pump). pub fn push(&self, pcm: Vec) { if let Err(TrySendError::Disconnected(_)) = self.pcm_tx.try_send(pcm) { // Thread already dead — Drop will reap it; nothing to do per-chunk. @@ -88,6 +89,7 @@ fn render_thread( pcm_rx: Receiver>, stop: Arc, ready: SyncSender>, + channels: u8, ) -> Result<()> { if let Err(e) = wasapi::initialize_mta() .ok() @@ -97,12 +99,26 @@ fn render_thread( return Ok(()); } let res = (|| -> Result<()> { + // F32LE interleaved: channels × 4 bytes/sample. Stereo (channels == 2) is byte-identical + // to the old fixed path (mask 0x3, block align 8). + let block_align = channels as usize * 4; let device = DeviceEnumerator::new() .context("DeviceEnumerator")? .get_default_device(&Direction::Render) .context("default render endpoint")?; let mut audio_client = device.get_iaudioclient().context("IAudioClient")?; - let desired = WaveFormat::new(32, 32, &SampleType::Float, SAMPLE_RATE, CHANNELS, None); + // The explicit dwChannelMask is the wire order (FL FR FC LFE RL RR SL SR); 5.1 = 0x3F, + // 7.1 = 0x63F. WASAPI delivers channels in ascending mask-bit order, which equals the wire + // order, so the render mapping is the identity — no permute. `autoconvert` (below) lets the + // audio engine downmix when the endpoint has fewer speakers. + let desired = WaveFormat::new( + 32, + 32, + &SampleType::Float, + SAMPLE_RATE, + channels as usize, + Some(punktfunk_core::audio::wasapi_channel_mask(channels)), + ); let (default_period, _min_period) = audio_client.get_device_period().context("device period")?; let mode = StreamMode::EventsShared { @@ -139,10 +155,10 @@ fn render_thread( if avail_frames == 0 { continue; } - let want_bytes = avail_frames * BLOCK_ALIGN; + let want_bytes = avail_frames * block_align; // Prime to ~3 quanta; cap at ~1 quantum of slack beyond that; re-prime on drain. - let target = (3 * want_bytes).clamp(720 * BLOCK_ALIGN, 9600 * BLOCK_ALIGN); + let target = (3 * want_bytes).clamp(720 * block_align, 9600 * block_align); while ring.len() > target.max(want_bytes) + want_bytes { ring.pop_front(); } diff --git a/clients/windows/src/session.rs b/clients/windows/src/session.rs index c0f2c2e..02aecf9 100644 --- a/clients/windows/src/session.rs +++ b/clients/windows/src/session.rs @@ -23,6 +23,8 @@ pub struct SessionParams { pub compositor: CompositorPref, pub gamepad: GamepadPref, pub bitrate_kbps: u32, + /// Requested audio channel count (2/6/8); the host echoes the resolved value. + pub audio_channels: u8, /// Stream the default microphone to the host's virtual mic source. pub mic_enabled: bool, /// Advertise 10-bit + HDR10 so the host may upgrade HDR content to a Main10/PQ stream. @@ -94,6 +96,42 @@ fn now_ns() -> u64 { .unwrap_or(0) } +/// Opus decoder for the audio plane: a plain stereo decoder (the validated path) or a multistream +/// decoder for 5.1/7.1, both behind one `decode_float`. Built from the host-RESOLVED channel count +/// via the shared layout table. +enum AudioDec { + Stereo(opus::Decoder), + Surround(opus::MSDecoder), +} + +impl AudioDec { + fn new(channels: u8) -> Result { + if channels == 2 { + Ok(AudioDec::Stereo(opus::Decoder::new( + 48_000, + opus::Channels::Stereo, + )?)) + } else { + let l = punktfunk_core::audio::layout_for(channels, false); + Ok(AudioDec::Surround(opus::MSDecoder::new( + 48_000, l.streams, l.coupled, l.mapping, + )?)) + } + } + + fn decode_float( + &mut self, + input: &[u8], + out: &mut [f32], + fec: bool, + ) -> Result { + match self { + AudioDec::Stereo(d) => d.decode_float(input, out, fec), + AudioDec::Surround(d) => d.decode_float(input, out, fec), + } + } +} + fn pump( params: SessionParams, ev_tx: async_channel::Sender, @@ -122,6 +160,7 @@ fn pump( } 0 }, + params.audio_channels, None, // launch: the Windows client has no library picker yet params.pin, Some(params.identity), @@ -161,11 +200,14 @@ fn pump( let mut hardware = decoder.is_hardware(); let mut hdr = false; // Audio is best-effort: a session without it still streams. Gamepads are the - // app-lifetime service's job (the UI attaches it on Connected). - let player = audio::AudioPlayer::spawn() + // app-lifetime service's job (the UI attaches it on Connected). Build the decoder + playback + // from the host-RESOLVED channel count (never the request), so an older/clamping host that + // resolves stereo is decoded as stereo. + let channels = connector.audio_channels; + let player = audio::AudioPlayer::spawn(channels) .map_err(|e| tracing::warn!(error = %e, "audio disabled")) .ok(); - let mut opus_dec = opus::Decoder::new(48_000, opus::Channels::Stereo) + let mut opus_dec = AudioDec::new(channels) .map_err(|e| tracing::warn!(error = %e, "opus decoder failed — audio disabled")) .ok(); let _mic = params @@ -184,8 +226,8 @@ fn pump( let mut bytes_n = 0u64; let mut decode_us_sum = 0u64; let mut lat_us: Vec = Vec::with_capacity(256); - let mut pcm = vec![0f32; 5760 * 2]; // decode scratch: max Opus frame (120 ms stereo) - // Loss recovery: watch the host→client unrecoverable-drop count and ask for an IDR when it climbs. + let mut pcm = vec![0f32; 5760 * channels as usize]; // scratch: max Opus frame (120 ms) × channels + // Loss recovery: watch the host→client unrecoverable-drop count and ask for an IDR when it climbs. let mut last_dropped = connector.frames_dropped(); let mut last_kf_req: Option = None; @@ -253,7 +295,8 @@ fn pump( while let Ok(pkt) = connector.next_audio(Duration::ZERO) { if let (Some(player), Some(dec)) = (&player, opus_dec.as_mut()) { match dec.decode_float(&pkt.data, &mut pcm, false) { - Ok(samples) => player.push(pcm[..samples * 2].to_vec()), + // `samples` is per-channel; the interleaved frame is `samples * channels`. + Ok(samples) => player.push(pcm[..samples * channels as usize].to_vec()), Err(e) => tracing::debug!(error = %e, "opus decode"), } } diff --git a/clients/windows/src/trust.rs b/clients/windows/src/trust.rs index fa32df0..5d578ae 100644 --- a/clients/windows/src/trust.rs +++ b/clients/windows/src/trust.rs @@ -130,6 +130,9 @@ pub struct Settings { pub inhibit_shortcuts: bool, /// Stream the default microphone to the host's virtual mic source. pub mic_enabled: bool, + /// Requested audio channel count: 2 (stereo), 6 (5.1) or 8 (7.1). The host clamps to what it + /// can capture; the resolved count drives the decoder + WASAPI render layout. + pub audio_channels: u8, /// Advertise 10-bit + HDR10 so the host upgrades HDR content to a Main10/PQ stream (the client /// presents it on a 10-bit ST.2084 swapchain). No effect on SDR content. pub hdr_enabled: bool, @@ -148,6 +151,7 @@ impl Default for Settings { compositor: "auto".into(), inhibit_shortcuts: true, mic_enabled: false, + audio_channels: 2, hdr_enabled: true, decoder: "auto".into(), } diff --git a/crates/punktfunk-core/Cargo.toml b/crates/punktfunk-core/Cargo.toml index 5cfc63c..e537099 100644 --- a/crates/punktfunk-core/Cargo.toml +++ b/crates/punktfunk-core/Cargo.toml @@ -19,7 +19,7 @@ crate-type = ["lib", "cdylib", "staticlib"] default = [] # Control-plane QUIC (pairing, config, reverse audio). tokio is permitted ONLY here, # never on the per-frame hot path. Off by default so the core stays runtime-free. -quic = ["dep:quinn", "dep:tokio", "dep:rustls", "dep:rcgen", "dep:rustls-pki-types", "dep:sha2", "dep:hmac", "dep:spake2"] +quic = ["dep:quinn", "dep:tokio", "dep:rustls", "dep:rcgen", "dep:rustls-pki-types", "dep:sha2", "dep:hmac", "dep:spake2", "dep:opus"] [dependencies] reed-solomon-simd = "3.1" # GF(2^16) Leopard-RS, SIMD, O(n log n) — the wall-breaker (P2) @@ -51,6 +51,12 @@ sha2 = { version = "0.10", optional = true } hmac = { version = "0.12", optional = true } spake2 = { version = "0.4", optional = true } tokio = { version = "1", optional = true, features = ["rt-multi-thread", "net", "sync", "macros"] } +# In-core Opus (multistream) DECODE for the C-ABI `punktfunk_connection_next_audio_pcm` path — +# used by embedders without a multistream-capable Opus decoder (Apple's AudioToolbox is +# stereo-only). The Rust clients link `opus` themselves and decode the raw `next_audio` frames, +# so this only matters when the connection API (quic) is built. Same libopus the host vendors; +# cargo unifies the build. Multistream API: `opus::MSDecoder` (lib.rs:1187). +opus = { version = "0.3", optional = true } # `libc` for batched UDP syscalls: `sendmmsg`/`recvmmsg` on Linux (the 1 Gbps+ lever) and the # `recv(MSG_DONTWAIT)` drain on the other unix (Apple/BSD) targets, which have no `recvmmsg` diff --git a/crates/punktfunk-core/src/abi.rs b/crates/punktfunk-core/src/abi.rs index 9ece706..6ef215d 100644 --- a/crates/punktfunk-core/src/abi.rs +++ b/crates/punktfunk-core/src/abi.rs @@ -467,6 +467,23 @@ pub struct PunktfunkConnection { last: std::sync::Mutex>, /// Same, for `punktfunk_connection_next_audio` (independent of the video slot). last_audio: std::sync::Mutex>, + /// Decode-in-core state for `punktfunk_connection_next_audio_pcm` (Apple / any embedder + /// without a multistream Opus decoder). The decoder is built lazily from the negotiated + /// `inner.audio_channels`; `pcm` is a fixed-capacity reusable buffer the returned pointer + /// borrows until the next PCM call (same contract as `last_audio`). + audio_pcm: std::sync::Mutex, +} + +/// Lazily-initialized in-core Opus decode state. A coupled-1-stream multistream decoder is +/// equivalent to a plain stereo decoder, so one [`opus::MSDecoder`] handles 2/6/8 channels. +#[cfg(feature = "quic")] +#[derive(Default)] +struct AudioPcmState { + decoder: Option, + /// Interleaved f32 PCM, wire channel order. Pre-sized to the largest legal Opus frame + /// (120 ms @ 48 kHz = 5760 samples/ch) × 8 channels so decode never reallocates (which would + /// dangle the pointer handed to the embedder). + pcm: Vec, } /// `PunktfunkHidOutput::kind` — lightbar RGB (`r`/`g`/`b` valid). @@ -708,12 +725,18 @@ pub const PUNKTFUNK_VIDEO_CAP_10BIT: u8 = 0x01; /// Video-capability bit for [`punktfunk_connect_ex5`] (`video_caps`): the client can present /// BT.2020 PQ HDR10 (implies 10-bit). (Mirrors `quic::VIDEO_CAP_HDR`.) pub const PUNKTFUNK_VIDEO_CAP_HDR: u8 = 0x02; +/// Video-capability bit for [`punktfunk_connect_ex5`] (`video_caps`): the client can decode a +/// full-chroma 4:4:4 HEVC stream (Range Extensions). The host emits 4:4:4 only when this is set, +/// the host opted in, the codec is HEVC, and the GPU supports it — else the stream stays 4:2:0 and +/// [`punktfunk_connection_chroma_format`] reports the real value. (Mirrors `quic::VIDEO_CAP_444`.) +pub const PUNKTFUNK_VIDEO_CAP_444: u8 = 0x04; // Keep the ABI cap bits in lockstep with the wire constants (compile-time guard against drift). #[cfg(feature = "quic")] const _: () = { assert!(PUNKTFUNK_VIDEO_CAP_10BIT == crate::quic::VIDEO_CAP_10BIT); assert!(PUNKTFUNK_VIDEO_CAP_HDR == crate::quic::VIDEO_CAP_HDR); + assert!(PUNKTFUNK_VIDEO_CAP_444 == crate::quic::VIDEO_CAP_444); }; // Keep the ABI gamepad constants in lockstep with the wire enum (compile-time guard against drift). @@ -980,6 +1003,58 @@ pub unsafe extern "C" fn punktfunk_connect_ex5( client_cert_pem: *const std::os::raw::c_char, client_key_pem: *const std::os::raw::c_char, timeout_ms: u32, +) -> *mut PunktfunkConnection { + // Delegate to the surround-aware variant requesting stereo (the pre-surround behaviour). + unsafe { + punktfunk_connect_ex6( + host, + port, + width, + height, + refresh_hz, + compositor, + gamepad, + bitrate_kbps, + video_caps, + 2, // audio_channels = stereo + launch_id, + pin_sha256, + observed_sha256_out, + client_cert_pem, + client_key_pem, + timeout_ms, + ) + } +} + +/// Like [`punktfunk_connect_ex5`], but additionally requests the audio channel count: +/// `2` (stereo, the default behaviour of every earlier variant), `6` (5.1) or `8` (7.1). The host +/// clamps the request to what it can actually capture and echoes the resolved count via +/// [`punktfunk_connection_audio_channels`]; the `0xC9` audio frames are Opus-(multi)stream encoded +/// for that layout. A client that wants surround calls this; everything else inherits stereo. +/// +/// # Safety +/// Same as [`punktfunk_connect`]. +#[cfg(feature = "quic")] +#[no_mangle] +#[allow(clippy::too_many_arguments)] +pub unsafe extern "C" fn punktfunk_connect_ex6( + host: *const std::os::raw::c_char, + port: u16, + width: u32, + height: u32, + refresh_hz: u32, + compositor: u32, + gamepad: u32, + bitrate_kbps: u32, + video_caps: u8, + audio_channels: u8, + launch_id: *const std::os::raw::c_char, + pin_sha256: *const u8, + observed_sha256_out: *mut u8, + client_cert_pem: *const std::os::raw::c_char, + client_key_pem: *const std::os::raw::c_char, + timeout_ms: u32, ) -> *mut PunktfunkConnection { let r = std::panic::catch_unwind(AssertUnwindSafe(|| { if host.is_null() { @@ -1029,6 +1104,7 @@ pub unsafe extern "C" fn punktfunk_connect_ex5( gamepad, bitrate_kbps, video_caps, + crate::audio::normalize_channels(audio_channels), launch, pin, identity, @@ -1045,6 +1121,7 @@ pub unsafe extern "C" fn punktfunk_connect_ex5( inner: c, last: std::sync::Mutex::new(None), last_audio: std::sync::Mutex::new(None), + audio_pcm: std::sync::Mutex::new(AudioPcmState::default()), })) } Err(_) => std::ptr::null_mut(), @@ -1250,6 +1327,121 @@ pub unsafe extern "C" fn punktfunk_connection_next_audio( }) } +/// Read the audio channel count the host resolved for this session (from its Welcome): `2` +/// (stereo), `6` (5.1) or `8` (7.1). `*out` is filled when non-NULL. The `0xC9` Opus frames are +/// (multistream-)encoded for this layout; an embedder decoding raw frames itself must build its +/// decoder from THIS value (see [`crate::audio::layout_for`]) — or use +/// [`punktfunk_connection_next_audio_pcm`], which decodes in-core. Available immediately after a +/// successful connect (it doesn't change without a reconfigure). +/// +/// # Safety +/// `c` is a valid connection handle; `out` is NULL or writable for one `u8`. +#[cfg(feature = "quic")] +#[no_mangle] +pub unsafe extern "C" fn punktfunk_connection_audio_channels( + c: *mut PunktfunkConnection, + out: *mut u8, +) -> PunktfunkStatus { + guard(|| { + let c = match unsafe { c.as_ref() } { + Some(c) => c, + None => return PunktfunkStatus::NullPointer, + }; + if !out.is_null() { + // SAFETY: `out` is non-null and the caller guarantees it is writable for one `u8`. + unsafe { *out = c.inner.audio_channels }; + } + PunktfunkStatus::Ok + }) +} + +/// One decoded audio frame from [`punktfunk_connection_next_audio_pcm`]: interleaved 32-bit +/// float PCM at 48 kHz, in the canonical wire channel order `FL FR FC LFE RL RR SL SR` (the +/// first `channels` of it). `samples` points at `frame_count * channels` floats and borrows +/// connection memory **until the next PCM call** on this handle. +#[cfg(feature = "quic")] +#[repr(C)] +pub struct PunktfunkAudioPcm { + /// Interleaved f32 samples (wire channel order), `frame_count * channels` long. + pub samples: *const f32, + /// Samples per channel in this frame. + pub frame_count: u32, + /// Channel count (2/6/8) — the negotiated [`punktfunk_connection_audio_channels`]. + pub channels: u8, + /// Source packet sequence number. + pub seq: u32, + /// Capture presentation timestamp (ns). + pub pts_ns: u64, +} + +/// Pull the next audio frame and **decode it in-core** to interleaved f32 PCM — for embedders +/// without a multistream-capable Opus decoder (e.g. Apple, whose AudioToolbox Opus path is +/// stereo-only). The decoder is built once from the negotiated channel count and handles 2/6/8 +/// channels (a 1-coupled-stream multistream decoder is exactly a stereo decoder). Same +/// timeout/closed semantics as [`punktfunk_connection_next_audio`]; `out->samples` borrows +/// connection memory until the next PCM call on this handle. Use EITHER this or +/// [`punktfunk_connection_next_audio`] on a given connection, from one dedicated audio thread — +/// not both (they share the underlying queue). +/// +/// # Safety +/// `c` is a valid connection handle; `out` is writable. At most one thread pulls audio. +#[cfg(feature = "quic")] +#[no_mangle] +pub unsafe extern "C" fn punktfunk_connection_next_audio_pcm( + c: *mut PunktfunkConnection, + out: *mut PunktfunkAudioPcm, + timeout_ms: u32, +) -> PunktfunkStatus { + guard(|| { + let c = match unsafe { c.as_ref() } { + Some(c) => c, + None => return PunktfunkStatus::NullPointer, + }; + if out.is_null() { + return PunktfunkStatus::NullPointer; + } + let channels = crate::audio::normalize_channels(c.inner.audio_channels); + let pkt = match c + .inner + .next_audio(std::time::Duration::from_millis(timeout_ms as u64)) + { + Ok(pkt) => pkt, + Err(e) => return e.status(), + }; + let mut state = c.audio_pcm.lock().unwrap(); + if state.decoder.is_none() { + let layout = crate::audio::layout_for(channels, false); + match opus::MSDecoder::new(48_000, layout.streams, layout.coupled, layout.mapping) { + Ok(d) => { + // Largest legal Opus frame is 120 ms = 5760 samples/ch. + state.pcm = vec![0f32; 5760 * channels as usize]; + state.decoder = Some(d); + } + Err(_) => return PunktfunkStatus::Unsupported, + } + } + let AudioPcmState { decoder, pcm } = &mut *state; + let dec = decoder.as_mut().unwrap(); + // `decode_float` divides the output buffer length by the channel count to get the + // per-channel capacity; an empty payload requests packet-loss concealment. + match dec.decode_float(&pkt.data, pcm, false) { + Ok(frame_count) => { + unsafe { + *out = PunktfunkAudioPcm { + samples: pcm.as_ptr(), + frame_count: frame_count as u32, + channels, + seq: pkt.seq, + pts_ns: pkt.pts_ns, + }; + } + PunktfunkStatus::Ok + } + Err(_) => PunktfunkStatus::BadPacket, + } + }) +} + /// Pull the next rumble (force-feedback) update, waiting up to `timeout_ms`. Amplitudes /// are 0..0xFFFF (`low` = low-frequency motor, `high` = high-frequency), `(0, 0)` = stop. /// Same timeout/closed semantics as [`punktfunk_connection_next_audio`]. @@ -1414,6 +1606,33 @@ pub unsafe extern "C" fn punktfunk_connection_color_info( }) } +/// Read the session's resolved chroma subsampling (from the host's Welcome) as the HEVC +/// `chroma_format_idc`: `1` = 4:2:0 (the default every pre-4:4:4 host produced), `3` = full-chroma +/// 4:4:4. `*out` is filled when non-NULL. The in-band SPS is authoritative; this lets the embedder +/// pre-size its decoder / pick a 4:4:4 pixel format up front. Available immediately after a +/// successful connect (it doesn't change without a reconfigure). +/// +/// # Safety +/// `c` is a valid connection handle; `out` is NULL or writable for one `u8`. +#[cfg(feature = "quic")] +#[no_mangle] +pub unsafe extern "C" fn punktfunk_connection_chroma_format( + c: *mut PunktfunkConnection, + out: *mut u8, +) -> PunktfunkStatus { + guard(|| { + let c = match unsafe { c.as_ref() } { + Some(c) => c, + None => return PunktfunkStatus::NullPointer, + }; + if !out.is_null() { + // SAFETY: `out` is non-null and the caller guarantees it is writable for one `u8`. + unsafe { *out = c.inner.chroma_format }; + } + PunktfunkStatus::Ok + }) +} + /// Send one input event to the host as a QUIC datagram (non-blocking enqueue). /// /// # Safety diff --git a/crates/punktfunk-core/src/audio.rs b/crates/punktfunk-core/src/audio.rs new file mode 100644 index 0000000..057250f --- /dev/null +++ b/crates/punktfunk-core/src/audio.rs @@ -0,0 +1,298 @@ +//! Shared audio layout: the single source of truth for Opus (multi)stream surround across the +//! host, the GameStream compatibility path, and every client decoder. +//! +//! **Canonical wire channel order** is `FL FR FC LFE RL RR SL SR` (the GameStream/Moonlight +//! order, and the PipeWire/PulseAudio default map for 6/8 channels). Every host capturer +//! delivers PCM in this order and every client decodes into it, so the Opus multistream +//! `mapping` is the **identity** (`[0, 1, …, channels-1]`) on both ends — punktfunk owns the +//! encoder and every decoder, so the GFE-style pre-rotation Moonlight needs over SDP +//! (`gamestream::audio::surround_params`) is a GameStream-only concern and never touches the +//! native `punktfunk/1` path. +//! +//! Channel counts the protocol negotiates: `2` (stereo), `6` (5.1) and `8` (7.1). Anything +//! else clamps to stereo ([`normalize_channels`]). + +/// Canonical wire channel positions; the index is the channel's slot in the interleaved PCM +/// frame. A count of N uses positions `0..N` (always a prefix of this 8-channel order). +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +#[repr(u8)] +pub enum WirePos { + FrontLeft = 0, + FrontRight = 1, + FrontCenter = 2, + Lfe = 3, + RearLeft = 4, + RearRight = 5, + SideLeft = 6, + SideRight = 7, +} + +/// The full 8-channel wire order; the N-channel order is its first N entries. +pub const WIRE_ORDER_8: [WirePos; 8] = { + use WirePos::*; + [ + FrontLeft, + FrontRight, + FrontCenter, + Lfe, + RearLeft, + RearRight, + SideLeft, + SideRight, + ] +}; + +/// One Opus (multi)stream layout. `mapping` is the libopus multistream mapping we encode AND +/// decode with — identity, since punktfunk owns both ends. `streams`/`coupled` give the +/// normal-quality coupling (FL,FR)+(FC,LFE) [+(RL,RR) on 7.1] with the remaining channels as +/// mono streams; high quality is one mono stream per channel. Bitrates match Sunshine's +/// per-config values (stereo keeps punktfunk's live-validated 128 kbps). +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct OpusLayout { + /// Interleaved channel count (2, 6 or 8). + pub channels: u8, + /// Number of Opus streams in the multistream packet. + pub streams: u8, + /// How many of those streams are coupled (stereo) pairs. + pub coupled: u8, + /// libopus multistream channel mapping — identity `[0, 1, …, channels-1]`. + pub mapping: &'static [u8], + /// Target Opus bitrate in bits/sec (hard CBR; constant packet size, which GameStream's + /// audio FEC relies on). + pub bitrate: i32, +} + +/// Stereo: a plain coupled pair. The 128 kbps live-validated config. +pub const LAYOUT_STEREO: OpusLayout = OpusLayout { + channels: 2, + streams: 1, + coupled: 1, + mapping: &[0, 1], + bitrate: 128_000, +}; +/// 5.1 normal quality: (FL,FR)+(FC,LFE) coupled, RL+RR mono. +pub const LAYOUT_51: OpusLayout = OpusLayout { + channels: 6, + streams: 4, + coupled: 2, + mapping: &[0, 1, 2, 3, 4, 5], + bitrate: 256_000, +}; +/// 5.1 high quality: one mono stream per channel. +pub const LAYOUT_51_HQ: OpusLayout = OpusLayout { + channels: 6, + streams: 6, + coupled: 0, + mapping: &[0, 1, 2, 3, 4, 5], + bitrate: 1_536_000, +}; +/// 7.1 normal quality: (FL,FR)+(FC,LFE)+(RL,RR) coupled, SL+SR mono. +pub const LAYOUT_71: OpusLayout = OpusLayout { + channels: 8, + streams: 5, + coupled: 3, + mapping: &[0, 1, 2, 3, 4, 5, 6, 7], + bitrate: 450_000, +}; +/// 7.1 high quality: one mono stream per channel. +pub const LAYOUT_71_HQ: OpusLayout = OpusLayout { + channels: 8, + streams: 8, + coupled: 0, + mapping: &[0, 1, 2, 3, 4, 5, 6, 7], + bitrate: 2_048_000, +}; + +/// Pick the layout for a negotiated channel count. Unknown counts fall back to stereo (clients +/// only ever request 2/6/8). `high_quality` selects the uncoupled high-bitrate config. +pub fn layout_for(channels: u8, high_quality: bool) -> &'static OpusLayout { + match (channels, high_quality) { + (6, false) => &LAYOUT_51, + (6, true) => &LAYOUT_51_HQ, + (8, false) => &LAYOUT_71, + (8, true) => &LAYOUT_71_HQ, + _ => &LAYOUT_STEREO, + } +} + +/// Clamp an arbitrary (wire / requested) channel count to one the protocol negotiates. `0`, +/// absent, or any unsupported value becomes stereo. +pub fn normalize_channels(requested: u8) -> u8 { + match requested { + 6 => 6, + 8 => 8, + _ => 2, + } +} + +// ---- per-platform channel-layout helpers (pure data; no platform deps) -------------------- + +/// Windows `WAVEFORMATEXTENSIBLE.dwChannelMask` for the wire layout. +/// +/// NB 7.1 == `0x63F` (FL FR FC LFE **BL BR SL SR**), NOT `0xFF` — `0xFF` selects the +/// front-of-center pair FLC/FRC, the wrong speakers. WASAPI delivers channels in ascending +/// mask-bit order, which equals the wire order, so the decoded PCM needs no permutation. +pub const fn wasapi_channel_mask(channels: u8) -> u32 { + const FL: u32 = 0x1; + const FR: u32 = 0x2; + const FC: u32 = 0x4; + const LFE: u32 = 0x8; + const BL: u32 = 0x10; // back left (wire RL) + const BR: u32 = 0x20; // back right (wire RR) + const SL: u32 = 0x200; // side left + const SR: u32 = 0x400; // side right + match channels { + 6 => FL | FR | FC | LFE | BL | BR, // 0x3F + 8 => FL | FR | FC | LFE | BL | BR | SL | SR, // 0x63F + _ => FL | FR, // 0x3 (stereo) + } +} + +/// PipeWire / SPA `enum spa_audio_channel` positions in wire order — identical to the host +/// capture side (`punktfunk-host` `audio::linux::spa_positions`): FL=3 FR=4 FC=5 LFE=6 SL=7 +/// SR=8 RL=12 RR=13. Identity routing: the client sets these on its playback node so PipeWire +/// maps each wire slot to the matching speaker (and downmixes when the sink has fewer). +pub fn spa_positions(channels: u8) -> &'static [u32] { + const STEREO: [u32; 2] = [3, 4]; // FL FR + const C51: [u32; 6] = [3, 4, 5, 6, 12, 13]; // FL FR FC LFE RL RR + const C71: [u32; 8] = [3, 4, 5, 6, 12, 13, 7, 8]; // FL FR FC LFE RL RR SL SR + match channels { + 6 => &C51, + 8 => &C71, + _ => &STEREO, + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn layout_table_is_consistent() { + for l in [ + &LAYOUT_STEREO, + &LAYOUT_51, + &LAYOUT_51_HQ, + &LAYOUT_71, + &LAYOUT_71_HQ, + ] { + // Mapping is identity and exactly `channels` entries long. + assert_eq!(l.mapping.len(), l.channels as usize); + for (i, &m) in l.mapping.iter().enumerate() { + assert_eq!(m as usize, i, "mapping must be identity for {l:?}"); + } + // libopus invariant: total channels == coupled*2 + (streams - coupled). + assert_eq!( + l.coupled * 2 + (l.streams - l.coupled), + l.channels, + "stream/coupled accounting for {l:?}" + ); + assert!(l.coupled <= l.streams); + assert!(l.bitrate > 0); + } + } + + #[test] + fn layout_for_picks_expected() { + assert_eq!(layout_for(2, false), &LAYOUT_STEREO); + assert_eq!(layout_for(6, false), &LAYOUT_51); + assert_eq!(layout_for(6, true), &LAYOUT_51_HQ); + assert_eq!(layout_for(8, false), &LAYOUT_71); + assert_eq!(layout_for(8, true), &LAYOUT_71_HQ); + // Unknown / 0 → stereo. + assert_eq!(layout_for(0, false), &LAYOUT_STEREO); + assert_eq!(layout_for(3, false), &LAYOUT_STEREO); + assert_eq!(layout_for(7, true), &LAYOUT_STEREO); + } + + #[test] + fn normalize_clamps_to_negotiable() { + assert_eq!(normalize_channels(2), 2); + assert_eq!(normalize_channels(6), 6); + assert_eq!(normalize_channels(8), 8); + for bad in [0u8, 1, 3, 4, 5, 7, 9, 255] { + assert_eq!(normalize_channels(bad), 2, "{bad} must clamp to stereo"); + } + } + + #[test] + fn wasapi_masks_are_correct() { + assert_eq!(wasapi_channel_mask(2), 0x3); + assert_eq!(wasapi_channel_mask(6), 0x3F); + assert_eq!(wasapi_channel_mask(8), 0x63F); // NOT 0xFF + // Bit count must equal the channel count. + assert_eq!(wasapi_channel_mask(2).count_ones(), 2); + assert_eq!(wasapi_channel_mask(6).count_ones(), 6); + assert_eq!(wasapi_channel_mask(8).count_ones(), 8); + } + + #[test] + fn spa_positions_match_wire_order() { + assert_eq!(spa_positions(2), &[3, 4]); + assert_eq!(spa_positions(6), &[3, 4, 5, 6, 12, 13]); + assert_eq!(spa_positions(8), &[3, 4, 5, 6, 12, 13, 7, 8]); + assert_eq!(spa_positions(2).len(), 2); + assert_eq!(spa_positions(6).len(), 6); + assert_eq!(spa_positions(8).len(), 8); + } + + /// Real-libopus proof that the shared layout round-trips with channel identity: a tone fed + /// into wire channel N (host `opus::MSEncoder`) comes back out on channel N (client + /// `opus::MSDecoder`), for stereo / 5.1 / 7.1. This is the single guarantee the whole + /// feature rests on — encoder layout == decoder layout == identity mapping — so if a layout + /// constant is ever wrong, this fails. Gated on `quic` (where `opus` is a dependency). + #[cfg(feature = "quic")] + #[test] + fn multistream_layout_roundtrips_with_channel_identity() { + const SR: u32 = 48_000; + const SAMPLES: usize = 240; // 5 ms @ 48 kHz + for &channels in &[2u8, 6, 8] { + let l = layout_for(channels, false); + let ch = l.channels as usize; + let mut enc = opus::MSEncoder::new( + SR, + l.streams, + l.coupled, + l.mapping, + opus::Application::LowDelay, + ) + .expect("MSEncoder"); + enc.set_bitrate(opus::Bitrate::Bits(l.bitrate)).unwrap(); + enc.set_vbr(false).unwrap(); + let mut dec = + opus::MSDecoder::new(SR, l.streams, l.coupled, l.mapping).expect("MSDecoder"); + + for tone_ch in 0..ch { + let mut out = vec![0u8; 4000]; + let mut energy = vec![0f64; ch]; + // A few frames to clear the codec startup transient before measuring. + for f in 0..8 { + let mut frame = vec![0f32; SAMPLES * ch]; + for t in 0..SAMPLES { + let phase = (f * SAMPLES + t) as f32 * 440.0 * 2.0 * std::f32::consts::PI + / SR as f32; + frame[t * ch + tone_ch] = 0.5 * phase.sin(); + } + let n = enc.encode_float(&frame, &mut out).unwrap(); + let mut decoded = vec![0f32; SAMPLES * ch]; + let got = dec.decode_float(&out[..n], &mut decoded, false).unwrap(); + assert_eq!(got, SAMPLES, "{channels}ch frame size"); + if f >= 4 { + for t in 0..SAMPLES { + for (c, e) in energy.iter_mut().enumerate() { + *e += (decoded[t * ch + c] as f64).powi(2); + } + } + } + } + let loudest = (0..ch) + .max_by(|&a, &b| energy[a].total_cmp(&energy[b])) + .unwrap(); + assert_eq!( + loudest, tone_ch, + "{channels}ch: tone in channel {tone_ch} must come out on {tone_ch} (energies {energy:?})" + ); + } + } + } +} diff --git a/crates/punktfunk-core/src/client.rs b/crates/punktfunk-core/src/client.rs index c944265..1daf88a 100644 --- a/crates/punktfunk-core/src/client.rs +++ b/crates/punktfunk-core/src/client.rs @@ -40,8 +40,9 @@ enum CtrlRequest { /// mode, the host-resolved compositor backend, the host-resolved gamepad backend, the host's /// certificate fingerprint, the resolved encoder bitrate (kbps), and the host↔client clock offset /// (ns, host minus client; 0 = no skew correction / an old host that didn't answer the handshake). -/// The trailing `u8` is the resolved encode bit depth (8/10) and [`ColorInfo`] the resolved colour -/// signalling, both from the [`Welcome`]. +/// The trailing `u8`s are the resolved encode bit depth (8/10), the chroma `chroma_format_idc` +/// (1 = 4:2:0, 3 = 4:4:4), and the resolved audio channel count (2/6/8), with [`ColorInfo`] the +/// resolved colour signalling — all from the [`Welcome`]. type Negotiated = ( Mode, CompositorPref, @@ -51,6 +52,8 @@ type Negotiated = ( i64, u8, ColorInfo, + u8, + u8, ); /// Accumulated state of an in-flight / finished speed test. The data-plane pump mirrors the @@ -202,6 +205,17 @@ pub struct NativeClient { /// decoder/presenter from this. [`ColorInfo::SDR_BT709`] for an older host. The static HDR /// mastering metadata (when [`ColorInfo::is_hdr`]) arrives via [`NativeClient::next_hdr_meta`]. pub color: ColorInfo, + /// The chroma subsampling the host resolved for this session ([`Welcome::chroma_format`]), as the + /// HEVC `chroma_format_idc`: [`quic::CHROMA_IDC_420`] (4:2:0, the default / older host) or + /// [`quic::CHROMA_IDC_444`] (full-chroma 4:4:4). The in-band SPS is authoritative; this lets the + /// client pre-size its decoder. `CHROMA_IDC_420` for an older host that didn't report it. + pub chroma_format: u8, + /// The audio channel count the host resolved for this session ([`Welcome::audio_channels`]): + /// `2` (stereo), `6` (5.1) or `8` (7.1). The client MUST build its Opus (multistream) decoder + /// from this value (via [`crate::audio::layout_for`]) — never from its own request — so an older + /// host that omits it (→ `2`) yields working stereo. The `0xC9` audio frames are encoded with the + /// matching layout. + pub audio_channels: u8, } /// Pin the calling thread to the user-interactive QoS class on Apple targets. @@ -246,6 +260,9 @@ impl NativeClient { // VIDEO_CAP_HDR) — the host upgrades to a 10-bit / HDR encode only when the matching bit is // set. 0 = the 8-bit BT.709 stream every client understands. video_caps: u8, + // Requested audio channel count (2 = stereo / 6 = 5.1 / 8 = 7.1); the host clamps to what it + // can capture and echoes the result in [`NativeClient::audio_channels`]. + audio_channels: u8, launch: Option, pin: Option<[u8; 32]>, identity: Option<(String, String)>, @@ -298,6 +315,7 @@ impl NativeClient { gamepad, bitrate_kbps, video_caps, + audio_channels, launch, pin, identity, @@ -329,6 +347,8 @@ impl NativeClient { clock_offset_ns, bit_depth, color, + chroma_format, + audio_channels, ) = match ready_rx.recv_timeout(timeout) { Ok(Ok(t)) => t, Ok(Err(e)) => return Err(e), @@ -360,6 +380,8 @@ impl NativeClient { clock_offset_ns, bit_depth, color, + chroma_format, + audio_channels, }) } @@ -666,6 +688,7 @@ struct WorkerArgs { gamepad: GamepadPref, bitrate_kbps: u32, video_caps: u8, + audio_channels: u8, launch: Option, pin: Option<[u8; 32]>, identity: Option<(String, String)>, @@ -697,6 +720,7 @@ async fn worker_main(args: WorkerArgs) { gamepad, bitrate_kbps, video_caps, + audio_channels, launch, pin, identity, @@ -763,6 +787,8 @@ async fn worker_main(args: WorkerArgs) { // VIDEO_CAP_10BIT | VIDEO_CAP_HDR). The host only upgrades to a 10-bit / HDR encode // when the matching bit is set, so `0` stays an 8-bit BT.709 stream. video_caps, + // Requested surround channel count; the host echoes the resolved value in Welcome. + audio_channels, } .encode(), ) @@ -834,6 +860,8 @@ async fn worker_main(args: WorkerArgs) { clock_offset_ns, welcome.bit_depth, welcome.color, + welcome.chroma_format, + welcome.audio_channels, )) }; @@ -850,6 +878,8 @@ async fn worker_main(args: WorkerArgs) { clock_offset_ns, bit_depth, color, + chroma_format, + audio_channels, ) = match setup.await { Ok(t) => t, Err(e) => { @@ -866,6 +896,8 @@ async fn worker_main(args: WorkerArgs) { clock_offset_ns, bit_depth, color, + chroma_format, + audio_channels, ))); // Input task: embedder events → QUIC datagrams. diff --git a/crates/punktfunk-core/src/lib.rs b/crates/punktfunk-core/src/lib.rs index 83acb23..5cabee2 100644 --- a/crates/punktfunk-core/src/lib.rs +++ b/crates/punktfunk-core/src/lib.rs @@ -25,6 +25,7 @@ #![forbid(unsafe_op_in_unsafe_fn)] pub mod abi; +pub mod audio; #[cfg(feature = "quic")] pub mod client; pub mod config; diff --git a/crates/punktfunk-core/src/quic.rs b/crates/punktfunk-core/src/quic.rs index 51407ab..cedb1d2 100644 --- a/crates/punktfunk-core/src/quic.rs +++ b/crates/punktfunk-core/src/quic.rs @@ -78,12 +78,33 @@ pub struct Hello { /// zero-length name/launch placeholder precedes it when those are absent so the offset stays /// deterministic. Omitted by older clients (decodes to `0`). pub video_caps: u8, + /// Requested audio channel count: `2` (stereo, default), `6` (5.1) or `8` (7.1). The host + /// resolves it against what it can capture and echoes the final count in + /// [`Welcome::audio_channels`], which is what both ends build their Opus (multistream) + /// codec from. Appended after `video_caps` as a single trailing byte; when it differs from + /// the stereo default the name/launch/video_caps placeholders are forced (0) so it lands at a + /// deterministic offset. Omitted by older clients / when `2` (decodes to `2`, i.e. stereo) so + /// the stereo wire form stays byte-identical to the pre-surround build. + pub audio_channels: u8, } /// [`Hello::video_caps`] bit: the client can decode a 10-bit (Main10) HEVC stream. pub const VIDEO_CAP_10BIT: u8 = 0x01; /// [`Hello::video_caps`] bit: the client can present BT.2020 PQ HDR10 (implies 10-bit). pub const VIDEO_CAP_HDR: u8 = 0x02; +/// [`Hello::video_caps`] bit: the client can decode a full-chroma **4:4:4** HEVC stream (HEVC +/// Range Extensions / Rec.ITU-T H.265 `chroma_format_idc = 3`). The host emits 4:4:4 ONLY when this +/// bit is set, the host opted in (`PUNKTFUNK_444`), the codec is HEVC, **and** the GPU/driver +/// actually supports a 4:4:4 encode (probed) — otherwise the session stays 4:2:0 and +/// [`Welcome::chroma_format`] reflects the real resolved value. Independent of 10-bit/HDR (4:4:4 is a +/// chroma decision, bit depth is a depth decision; the two may combine where the hardware allows). +pub const VIDEO_CAP_444: u8 = 0x04; + +/// HEVC `chroma_format_idc` for 4:2:0 — what every pre-4:4:4 build produced and the back-compat +/// default when a peer omits [`Welcome::chroma_format`]. +pub const CHROMA_IDC_420: u8 = 1; +/// HEVC `chroma_format_idc` for full-chroma 4:4:4 (Range Extensions). +pub const CHROMA_IDC_444: u8 = 3; /// Per-session colour signalling (CICP / ITU-T H.273 code points) the host resolved for the /// encoded video, carried on [`Welcome`]. A client configures its decoder/presenter from these @@ -198,6 +219,22 @@ pub struct Welcome { /// [`ColorInfo::SDR_BT709`]. The client configures its decoder/presenter from this instead of /// guessing from the bitstream; the mastering metadata arrives separately on [`HDR_META_MAGIC`]. pub color: ColorInfo, + /// The chroma subsampling the host actually encodes at, as the HEVC `chroma_format_idc`: + /// [`CHROMA_IDC_420`] (4:2:0, default / older host) or [`CHROMA_IDC_444`] (full-chroma 4:4:4, + /// enabled only when the client advertised [`VIDEO_CAP_444`] *and* the host could open a real + /// 4:4:4 encode). The client sizes its decoder/surface pool from this; the in-band SPS carries + /// the authoritative value, so this is a hint (and the honest-downgrade channel — if the host + /// requested 4:4:4 but the GPU declined, this reads `CHROMA_IDC_420`). Appended after the colour + /// bytes as a single trailing byte; an older host that omits it decodes to [`CHROMA_IDC_420`]. + pub chroma_format: u8, + /// The audio channel count the host actually resolved and **will** send on the `0xC9` plane: + /// `2` (stereo, default), `6` (5.1) or `8` (7.1). Echoes [`Hello::audio_channels`] clamped to + /// what the host can capture (Linux PipeWire always synthesizes the count; Windows WASAPI + /// loopback is clamped to the render endpoint's mix-format channels). The client builds its Opus + /// (multistream) decoder from THIS value via [`crate::audio::layout_for`] — never from its own + /// request — so an older host that omits the byte (→ `2`) always yields working stereo. Appended + /// after `chroma_format` as a single trailing byte. + pub audio_channels: u8, } /// `client → host`: data plane is bound, begin streaming. @@ -630,10 +667,11 @@ impl Hello { // so a Hello with neither name nor launch stays byte-identical to the bitrate-era form // (26 bytes). When `launch` is present we must still emit name's length byte (0 for None) // so `launch` lands at a deterministic offset. - // `video_caps` is the last trailing field, after `launch`; when it's present (non-zero) - // the name/launch length bytes must still be emitted (0 for absent) so it lands at a + // `video_caps`/`audio_channels` are the trailing fields, after `launch`; when either is + // present (video_caps non-zero / audio_channels not stereo) the name/launch length bytes + // AND the video_caps byte must still be emitted (0 / 0) so the later byte lands at a // deterministic offset — the same discipline `launch` already imposes on `name`. - let need_placeholders = self.video_caps != 0; + let need_placeholders = self.video_caps != 0 || self.audio_channels != 2; match (&self.name, &self.launch) { (None, None) if !need_placeholders => {} (name, _) => { @@ -648,10 +686,15 @@ impl Hello { b.push(l.len() as u8); b.extend_from_slice(l.as_bytes()); } - // video_caps: single trailing byte. Last field. - if self.video_caps != 0 { + // video_caps: single trailing byte. Emitted when non-zero OR when audio_channels follows + // (so audio_channels lands at a deterministic offset right after it). + if self.video_caps != 0 || self.audio_channels != 2 { b.push(self.video_caps); } + // audio_channels: single trailing byte. Last field; omitted when stereo (default). + if self.audio_channels != 2 { + b.push(self.audio_channels); + } b } @@ -714,6 +757,15 @@ impl Hello { let launch_len = b.get(launch_off).copied().unwrap_or(0) as usize; b.get(launch_off + 1 + launch_len).copied().unwrap_or(0) }, + // Optional trailing audio-channel byte, one past video_caps. Absent on an older client + // → stereo. Normalized so a corrupt/unsupported value can't build a bad decoder. + audio_channels: { + let name_len = b.get(26).copied().unwrap_or(0) as usize; + let launch_off = 27 + name_len; + let launch_len = b.get(launch_off).copied().unwrap_or(0) as usize; + let video_caps_off = launch_off + 1 + launch_len; + crate::audio::normalize_channels(b.get(video_caps_off + 1).copied().unwrap_or(2)) + }, }) } } @@ -747,6 +799,10 @@ impl Welcome { b.push(self.color.transfer); b.push(self.color.matrix); b.push(self.color.full_range); + // Chroma subsampling at offset 64 — older clients stop before this → 4:2:0 (CHROMA_IDC_420). + b.push(self.chroma_format); + // Audio channel count at offset 65 — older clients stop before this → stereo (2). + b.push(self.audio_channels); b } @@ -755,7 +811,8 @@ impl Welcome { // scheme[22] pct[23] max_data[24..26] shard[26..28] encrypt[28] key[29..45] // salt[45..49] frames[49..53] compositor[53] gamepad[54] bitrate_kbps[55..59] // bit_depth[59] color.primaries[60] color.transfer[61] color.matrix[62] color.range[63] - // (everything from compositor on is an optional trailing byte; an older host stops earlier). + // chroma_format[64] audio_channels[65] (everything from compositor on is an optional + // trailing byte; an older host stops earlier). if b.len() < 53 || &b[0..4] != MAGIC { return Err(PunktfunkError::InvalidArg("bad Welcome")); } @@ -812,6 +869,15 @@ impl Welcome { matrix: b.get(62).copied().unwrap_or(ColorInfo::MC_BT709), full_range: b.get(63).copied().unwrap_or(0), }, + // Optional trailing chroma byte — absent on an older host (or an explicit 0 / unknown + // value) → 4:2:0. Only `CHROMA_IDC_444` flips the client to a 4:4:4 decode. + chroma_format: match b.get(64).copied() { + Some(CHROMA_IDC_444) => CHROMA_IDC_444, + _ => CHROMA_IDC_420, + }, + // Optional trailing audio-channel byte — absent on an older host → stereo. Any + // non-{6,8} value normalizes to stereo so a corrupt byte never builds a bad decoder. + audio_channels: crate::audio::normalize_channels(b.get(65).copied().unwrap_or(2)), }) } @@ -1809,6 +1875,8 @@ mod tests { bitrate_kbps: 50_000, bit_depth: 10, color: ColorInfo::HDR10_BT2020_PQ, + chroma_format: CHROMA_IDC_444, + audio_channels: 2, }; assert_eq!(Welcome::decode(&w.encode()).unwrap(), w); } @@ -1851,6 +1919,7 @@ mod tests { name: Some("Test Device".into()), launch: Some("steam:570".into()), video_caps: VIDEO_CAP_10BIT, + audio_channels: 2, }; assert_eq!(Hello::decode(&h.encode()).unwrap(), h); let s = Start { @@ -1930,6 +1999,7 @@ mod tests { name: None, launch: None, video_caps: 0, + audio_channels: 2, }; let enc = h.encode(); assert_eq!(enc.len(), 26); @@ -1969,9 +2039,11 @@ mod tests { bitrate_kbps: 120_000, bit_depth: 10, color: ColorInfo::HDR10_BT2020_PQ, + chroma_format: CHROMA_IDC_444, + audio_channels: 6, // 5.1 — exercises the non-default trailing byte }; let wenc = w.encode(); - assert_eq!(wenc.len(), 64); // 60 base + 4 colour bytes + assert_eq!(wenc.len(), 66); // 60 base + 4 colour + 1 chroma + 1 audio-channels byte let legacy_w = Welcome::decode(&wenc[..53]).unwrap(); assert_eq!(legacy_w.compositor, CompositorPref::Auto); assert_eq!(legacy_w.gamepad, GamepadPref::Auto); @@ -1991,13 +2063,29 @@ mod tests { let pre_color_w = Welcome::decode(&wenc[..60]).unwrap(); assert_eq!(pre_color_w.bit_depth, 10); assert_eq!(pre_color_w.color, ColorInfo::SDR_BT709); + assert_eq!(pre_color_w.chroma_format, CHROMA_IDC_420); // pre-chroma host → 4:2:0 assert_eq!(legacy_w.color, ColorInfo::SDR_BT709); + assert_eq!(legacy_w.chroma_format, CHROMA_IDC_420); + // A pre-chroma (64-byte) Welcome carries colour but no chroma/audio bytes → 4:2:0 + stereo. + let pre_chroma_w = Welcome::decode(&wenc[..64]).unwrap(); + assert_eq!(pre_chroma_w.color, ColorInfo::HDR10_BT2020_PQ); + assert_eq!(pre_chroma_w.chroma_format, CHROMA_IDC_420); + assert_eq!(pre_chroma_w.audio_channels, 2); // audio byte (offset 65) absent → stereo + // A pre-audio (65-byte) Welcome carries chroma but no audio byte → 4:4:4 + stereo. + let pre_audio_w = Welcome::decode(&wenc[..65]).unwrap(); + assert_eq!(pre_audio_w.chroma_format, CHROMA_IDC_444); + assert_eq!(pre_audio_w.audio_channels, 2); assert_eq!(Welcome::decode(&wenc).unwrap().bitrate_kbps, 120_000); assert_eq!(Welcome::decode(&wenc).unwrap().bit_depth, 10); // full form carries it assert_eq!( Welcome::decode(&wenc).unwrap().color, ColorInfo::HDR10_BT2020_PQ ); + assert_eq!( + Welcome::decode(&wenc).unwrap().chroma_format, + CHROMA_IDC_444 + ); // full form carries 4:4:4 + assert_eq!(Welcome::decode(&wenc).unwrap().audio_channels, 6); // ...and 5.1 } #[test] @@ -2015,6 +2103,7 @@ mod tests { name: Some("Enrico's MacBook".into()), launch: None, video_caps: 0, + audio_channels: 2, }; let enc = base.encode(); assert_eq!( @@ -2062,6 +2151,7 @@ mod tests { name: None, launch: None, video_caps: 0, + audio_channels: 2, }; // launch alone (no name): a zero-length name placeholder keeps the offset deterministic. let with_launch = Hello { @@ -2268,6 +2358,7 @@ mod tests { name: None, launch: None, video_caps: 0, + audio_channels: 2, } .encode(); assert!(PairRequest::decode(&h).is_err(), "abi {abi} parsed as pair"); diff --git a/crates/punktfunk-core/tests/c_abi.rs b/crates/punktfunk-core/tests/c_abi.rs index 2cf0a0b..88d3d7a 100644 --- a/crates/punktfunk-core/tests/c_abi.rs +++ b/crates/punktfunk-core/tests/c_abi.rs @@ -13,8 +13,10 @@ use std::process::Command; fn native_libs() -> &'static [&'static str] { if cfg!(target_os = "macos") { // The workspace build unifies features into the staticlib, and `quic` pulls - // rustls's platform verifier → Security/CoreFoundation. + // rustls's platform verifier → Security/CoreFoundation, plus libopus (the in-core + // `next_audio_pcm` decode path) which the `abi.rs` object references. &[ + "-lopus", "-liconv", "-lm", "-framework", @@ -23,7 +25,17 @@ fn native_libs() -> &'static [&'static str] { "CoreFoundation", ] } else if cfg!(target_os = "linux") { - &["-lgcc_s", "-lutil", "-lrt", "-lpthread", "-lm", "-ldl"] + // `-lopus`: the `quic` feature pulls in-core Opus decode (`next_audio_pcm`), whose + // symbols the linked `abi.rs` object references. Before `-lm` (opus needs libm). + &[ + "-lopus", + "-lgcc_s", + "-lutil", + "-lrt", + "-lpthread", + "-lm", + "-ldl", + ] } else { &[] } diff --git a/crates/punktfunk-host/Cargo.toml b/crates/punktfunk-host/Cargo.toml index 921b370..4a3a1a2 100644 --- a/crates/punktfunk-host/Cargo.toml +++ b/crates/punktfunk-host/Cargo.toml @@ -61,9 +61,10 @@ utoipa-scalar = { version = "0.3", features = ["axum"] } tower = { version = "0.5", features = ["util"] } http-body-util = "0.1" -# Opus stereo encode for the host->client audio plane. The `opus` crate vendors libopus via -# `audiopus_sys` (cmake-built from source — no system lib, no vcpkg), so it builds on Windows MSVC -# too (needs CMake + NASM, both on the box). Both platforms that have an audio-capture backend. +# Opus encode for the host->client audio plane — stereo (`opus::Encoder`) AND 5.1/7.1 surround +# (`opus::MSEncoder`, the safe multistream API the crate exposes; no `audiopus_sys` needed). The +# crate vendors libopus (cmake-built from source — no system lib, no vcpkg), so it builds on Windows +# MSVC too (needs CMake + NASM, both on the box). Both platforms that have an audio-capture backend. [target.'cfg(any(target_os = "linux", target_os = "windows"))'.dependencies] opus = "0.3" @@ -99,10 +100,6 @@ serde_json = "1" rusqlite = { version = "0.40", features = ["bundled"] } # Builds/validates the xkb keymap uploaded to the virtual keyboard + tracks modifier state. xkbcommon = "0.8" -# The safe `opus` crate is stereo-only; surround (5.1/7.1) needs the libopus *multistream* -# encoder (`opus_multistream_encoder_*`). `audiopus_sys` is the sys layer `opus` already -# vendors (same libopus link), so this adds bindings, not a second copy of the library. -audiopus_sys = "0.2" # libei (EI sender) for the portable input path on KWin/GNOME (RemoteDesktop portal). # The `tokio` feature wires reis's event stream into tokio's reactor. reis = { version = "0.6.1", features = ["tokio"] } diff --git a/crates/punktfunk-host/src/audio/windows/wasapi_cap.rs b/crates/punktfunk-host/src/audio/windows/wasapi_cap.rs index 92e1fd8..fcb84ab 100644 --- a/crates/punktfunk-host/src/audio/windows/wasapi_cap.rs +++ b/crates/punktfunk-host/src/audio/windows/wasapi_cap.rs @@ -1,7 +1,9 @@ //! WASAPI loopback capture of the default render endpoint (system output) — the Windows analogue -//! of the PipeWire sink-monitor backend. Delivers interleaved f32 PCM at 48 kHz stereo, ready for -//! the existing Opus path with NO resampling (WASAPI shared-mode autoconvert does any SRC). WASAPI -//! objects are COM-apartment-bound and not `Send`, so they live on a dedicated thread (mirrors +//! of the PipeWire sink-monitor backend. Delivers interleaved f32 PCM at 48 kHz in the requested +//! channel count (stereo / 5.1 / 7.1, canonical wire order FL FR FC LFE RL RR SL SR via the +//! explicit `dwChannelMask`), ready for the Opus path with NO resampling (WASAPI shared-mode +//! autoconvert does any SRC + up/downmix to the requested layout). WASAPI objects are +//! COM-apartment-bound and not `Send`, so they live on a dedicated thread (mirrors //! `linux::PwAudioCapturer`); only the channel + stop flag + join handle are in the struct. use super::{AudioCapturer, SAMPLE_RATE}; @@ -14,9 +16,6 @@ use std::thread::{self, JoinHandle}; use std::time::Duration; use wasapi::{DeviceEnumerator, Direction, SampleType, StreamMode, WaveFormat}; -// 48 kHz stereo 32-bit float: 2 channels * 4 bytes = 8 bytes per frame. -const BLOCK_ALIGN: usize = 2 * 4; - pub struct WasapiLoopbackCapturer { chunks: Receiver>, channels: u32, @@ -27,8 +26,8 @@ pub struct WasapiLoopbackCapturer { impl WasapiLoopbackCapturer { pub fn open(channels: u32) -> Result { anyhow::ensure!( - channels == 2, - "WASAPI loopback backend is stereo-only (got {channels})" + matches!(channels, 2 | 6 | 8), + "WASAPI loopback backend supports 2/6/8 channels (got {channels})" ); let (tx, rx) = sync_channel::>(64); let stop = Arc::new(AtomicBool::new(false)); @@ -39,7 +38,7 @@ impl WasapiLoopbackCapturer { let join = thread::Builder::new() .name("punktfunk-wasapi-audio".into()) .spawn(move || { - if let Err(e) = capture_thread(tx, stop_t, ready_tx) { + if let Err(e) = capture_thread(tx, stop_t, ready_tx, channels) { tracing::error!(error = format!("{e:#}"), "wasapi loopback thread failed"); } }) @@ -47,7 +46,8 @@ impl WasapiLoopbackCapturer { match ready_rx.recv_timeout(Duration::from_secs(3)) { Ok(Ok(())) => { tracing::info!( - "WASAPI loopback capture: 48 kHz stereo f32 (default render endpoint)" + channels, + "WASAPI loopback capture: 48 kHz f32 (default render endpoint)" ); Ok(WasapiLoopbackCapturer { chunks: rx, @@ -95,7 +95,10 @@ fn capture_thread( tx: SyncSender>, stop: Arc, ready: SyncSender>, + channels: u32, ) -> Result<()> { + // Interleaved f32: channels * 4 bytes per frame. + let block_align = channels as usize * 4; // COM must be initialized on THIS thread (MTA), before any device call. if let Err(e) = wasapi::initialize_mta() .ok() @@ -115,10 +118,20 @@ fn capture_thread( .get_default_device(&Direction::Render) .context("default render endpoint (loopback needs a render device)")?; let mut audio_client = device.get_iaudioclient().context("IAudioClient")?; - // 48 kHz stereo f32 interleaved; autoconvert lets WASAPI's shared-mode SRC match the engine - // mix format to ours, so we never resample in Rust. Loopback is implied by capturing a - // RENDER device with Direction::Capture in shared mode (wasapi sets STREAMFLAGS_LOOPBACK). - let desired = WaveFormat::new(32, 32, &SampleType::Float, SAMPLE_RATE as usize, 2, None); + // 48 kHz f32 interleaved in the requested channel layout; autoconvert lets WASAPI's + // shared-mode SRC match the engine mix format to ours (incl. up/downmix to the requested + // channel count), so we never resample/remix in Rust. The explicit dwChannelMask pins the + // wire order (FL FR FC LFE RL RR SL SR; 7.1 = 0x63F, not 0xFF). Loopback is implied by + // capturing a RENDER device with Direction::Capture in shared mode (STREAMFLAGS_LOOPBACK). + let mask = punktfunk_core::audio::wasapi_channel_mask(channels as u8); + let desired = WaveFormat::new( + 32, + 32, + &SampleType::Float, + SAMPLE_RATE as usize, + channels as usize, + Some(mask), + ); let (default_period, _min_period) = audio_client.get_device_period().context("device period")?; let mode = StreamMode::EventsShared { @@ -154,7 +167,7 @@ fn capture_thread( Err(e) => return Err(anyhow!("get_next_packet_size: {e}")), } } - let whole = (bytes.len() / BLOCK_ALIGN) * BLOCK_ALIGN; + let whole = (bytes.len() / block_align) * block_align; if whole == 0 { continue; } diff --git a/crates/punktfunk-host/src/capture.rs b/crates/punktfunk-host/src/capture.rs index 79497d1..9be175a 100644 --- a/crates/punktfunk-host/src/capture.rs +++ b/crates/punktfunk-host/src/capture.rs @@ -62,6 +62,11 @@ pub struct OutputFormat { /// HDR: the capturer converts to 10-bit (IDD-push FP16 → `Rgb10a2`; the DDA secure-desktop HDR hint). /// `false` = 8-bit SDR. pub hdr: bool, + /// Full-chroma 4:4:4 session: the capturer must keep full chroma — deliver packed **RGB** + /// (`Bgra` / `Rgb10a2`), NOT the subsampled `Nv12`/`P010` the Windows video-engine path produces by + /// default — because 4:4:4 can only be recovered from a full-chroma source. NVENC then does the + /// RGB→YUV444 CSC at encode (chroma_format_idc=3). `false` on every 4:2:0 session. + pub chroma_444: bool, } impl OutputFormat { @@ -73,6 +78,8 @@ impl OutputFormat { OutputFormat { gpu: gpu_encode(), hdr, + // The GameStream + spike paths are always 4:2:0 (4:4:4 is punktfunk/1-native only). + chroma_444: false, } } } @@ -361,13 +368,16 @@ pub fn open_portal_monitor() -> Result> { #[cfg(target_os = "linux")] pub fn capture_virtual_output( vout: crate::vdisplay::VirtualOutput, - _want: OutputFormat, + want: OutputFormat, _capture: crate::session_plan::CaptureBackend, ) -> Result> { - // The Linux host stays 8-bit (HDR is blocked upstream) and the portal negotiates its own format, so - // the `OutputFormat` is unused here; the capture backend is always the portal (the `CaptureBackend` - // arg is a Windows-only dispatch — ignored here). - linux::PortalCapturer::from_virtual_output(vout).map(|c| Box::new(c) as Box) + // The Linux host stays 8-bit (HDR is blocked upstream) and the portal negotiates its own pixel + // format, so only `want.gpu` is honored here: it gates GPU zero-copy capture (the capture backend + // is always the portal — the `CaptureBackend` arg is a Windows-only dispatch). `gpu = false` + // (a 4:4:4 NVENC session) forces the CPU mmap path so the encoder gets CPU-resident RGB to swscale + // into YUV444P — otherwise it would receive CUDA frames and bail. + linux::PortalCapturer::from_virtual_output(vout, want.gpu) + .map(|c| Box::new(c) as Box) } /// `PUNKTFUNK_NO_WGC=1` forces the pure single-process DDA (Desktop Duplication) path everywhere: it @@ -394,6 +404,14 @@ pub fn capture_virtual_output( })?; let pref = vout.preferred_mode; let keep = vout.keepalive; + // Full-chroma 4:4:4 needs a full-chroma RGB source. The IDD-push and WGC paths emit subsampled + // NV12/P010 by default, which can't reconstruct 4:4:4; route a 4:4:4 session to DDA, which delivers + // RGB (Bgra) when its `chroma_444` flag is set. (IDD-push/WGC 4:4:4 capture is a follow-up.) + if want.chroma_444 && capture != CaptureBackend::Dda { + tracing::info!("4:4:4 session — using DDA capture (RGB source) instead of {capture:?}"); + return dxgi::DuplCapturer::open(target, pref, keep, want.gpu, false, want.chroma_444) + .map(|c| Box::new(c) as Box); + } // P2 direct frame push (kill DDA): consume frames straight from the pf-vdisplay driver's shared // ring — no Desktop Duplication, no win32u reparenting hook. Resolved once in the `SessionPlan` // (was re-derived from `config().idd_push` here); `IddPush` takes the keepalive (owns the virtual @@ -414,8 +432,15 @@ pub fn capture_virtual_output( error = %format!("{e:#}"), "IDD-push open/attach failed — falling back to DDA" ); - return dxgi::DuplCapturer::open(target, pref, keep, want.gpu, false) - .map(|c| Box::new(c) as Box); + return dxgi::DuplCapturer::open( + target, + pref, + keep, + want.gpu, + false, + want.chroma_444, + ) + .map(|c| Box::new(c) as Box); } } } @@ -426,7 +451,7 @@ pub fn capture_virtual_output( // chosen backend (it owns the SudoVDA keepalive), so there's no open-time auto-fallback. The // backend choice (`dda`/`dxgi`/`PUNKTFUNK_NO_WGC` → DDA, else WGC) is now resolved once in the plan. if capture == CaptureBackend::Dda { - return dxgi::DuplCapturer::open(target, pref, keep, want.gpu, false) + return dxgi::DuplCapturer::open(target, pref, keep, want.gpu, false, want.chroma_444) .map(|c| Box::new(c) as Box); } // WGC default, with a watchdog'd DDA fallback. WGC's Direct3D11CaptureFramePool::CreateFreeThreaded @@ -461,12 +486,12 @@ pub fn capture_virtual_output( } Ok(Err(e)) => { tracing::warn!(error = %format!("{e:#}"), "WGC open failed — falling back to DDA"); - dxgi::DuplCapturer::open(target, pref, keep, want.gpu, false) + dxgi::DuplCapturer::open(target, pref, keep, want.gpu, false, want.chroma_444) .map(|c| Box::new(c) as Box) } Err(_) => { tracing::warn!("WGC open timed out (CreateFreeThreaded hang on the virtual display) — falling back to DDA"); - dxgi::DuplCapturer::open(target, pref, keep, want.gpu, false) + dxgi::DuplCapturer::open(target, pref, keep, want.gpu, false, want.chroma_444) .map(|c| Box::new(c) as Box) } } diff --git a/crates/punktfunk-host/src/capture/linux/mod.rs b/crates/punktfunk-host/src/capture/linux/mod.rs index 03c3ee8..5d22df5 100644 --- a/crates/punktfunk-host/src/capture/linux/mod.rs +++ b/crates/punktfunk-host/src/capture/linux/mod.rs @@ -89,21 +89,29 @@ impl PortalCapturer { node_id, "ScreenCast portal session started; connecting PipeWire" ); - Ok(spawn_pipewire(Some(fd), node_id, None)?.into_capturer(node_id, None)) + // This portal path (GameStream / monitor capture) is always 4:2:0, so allow zero-copy as before. + Ok(spawn_pipewire(Some(fd), node_id, None, true)?.into_capturer(node_id, None)) } /// Build a capturer from an already-created virtual output ([`crate::vdisplay::VirtualOutput`]): /// connect PipeWire to its node (`remote_fd` selects portal-remote vs. default-daemon) and /// take ownership of its keepalive so the output lives exactly as long as this capturer. This /// is how the client's requested resolution becomes the captured resolution without scaling. - pub fn from_virtual_output(vout: crate::vdisplay::VirtualOutput) -> Result { + /// `allow_zerocopy` mirrors [`OutputFormat::gpu`](crate::capture::OutputFormat): `false` forces the + /// CPU mmap path (a 4:4:4 NVENC session needs CPU-resident RGB), `true` keeps the GPU zero-copy + /// path subject to `PUNKTFUNK_ZEROCOPY`. + pub fn from_virtual_output( + vout: crate::vdisplay::VirtualOutput, + allow_zerocopy: bool, + ) -> Result { tracing::info!( node_id = vout.node_id, + allow_zerocopy, "connecting PipeWire to virtual output" ); let node_id = vout.node_id; Ok( - spawn_pipewire(vout.remote_fd, node_id, vout.preferred_mode)? + spawn_pipewire(vout.remote_fd, node_id, vout.preferred_mode, allow_zerocopy)? .into_capturer(node_id, Some(vout.keepalive)), ) } @@ -146,6 +154,12 @@ fn spawn_pipewire( fd: Option, node_id: u32, preferred: Option<(u32, u32, u32)>, + // Allow GPU zero-copy capture (dmabuf→CUDA/VA). `false` forces the CPU mmap path even when + // `PUNKTFUNK_ZEROCOPY` is set — a 4:4:4 NVENC session needs CPU-resident RGB (the encoder + // swscales RGB→YUV444P; `hevc_nvenc` can't 4:4:4 from a CUDA RGB surface), so the session plan + // passes `gpu = false` for it. Without this, a 4:4:4 session under `PUNKTFUNK_ZEROCOPY=1` would + // get CUDA frames and the encoder would bail (`want_444 && cuda`). + allow_zerocopy: bool, ) -> Result { // Frames flow from the pipewire thread over a small bounded channel. let (frame_tx, frame_rx) = sync_channel::(8); @@ -159,7 +173,7 @@ fn spawn_pipewire( // sender lives on the capturer and fires in its `Drop`. Absolute `::pipewire` path — the // inner `mod pipewire` shadows the crate name at this scope. let (quit_tx, quit_rx) = ::pipewire::channel::channel::<()>(); - let zerocopy = crate::zerocopy::enabled(); + let zerocopy = allow_zerocopy && crate::zerocopy::enabled(); let join = thread::Builder::new() .name("punktfunk-pipewire".into()) .spawn(move || { diff --git a/crates/punktfunk-host/src/capture/windows/dxgi.rs b/crates/punktfunk-host/src/capture/windows/dxgi.rs index 2f31972..1893452 100644 --- a/crates/punktfunk-host/src/capture/windows/dxgi.rs +++ b/crates/punktfunk-host/src/capture/windows/dxgi.rs @@ -2010,6 +2010,10 @@ pub struct DuplCapturer { /// first, retried (legacy DuplicateOutput can't capture HDR). Set for the secure-desktop DDA leg /// when the SudoVDA is in HDR; threaded into every (re)duplication incl. ACCESS_LOST recovery. want_hdr: bool, + /// Full-chroma 4:4:4 session: deliver packed RGB (`Bgra` SDR / `Rgb10a2` HDR) and SKIP the + /// video-engine RGB→YUV (NV12/P010) conversion — NVENC reconstructs 4:4:4 only from a full-chroma + /// source, so we hand it the RGB texture and it CSCs to YUV444 at encode (chroma_format_idc=3). + chroma_444: bool, /// HDR (scRGB FP16) capture state. Set when the duplication surface is `R16G16B16A16_FLOAT` /// (the desktop has HDR on). The frame can't be `CopyResource`d into a BGRA target, so the HDR /// path copies it into an FP16 SRV texture, composites the cursor, then runs [`HdrConverter`] to @@ -2087,6 +2091,8 @@ impl DuplCapturer { // stage 5) so the capturer never re-derives the encode backend itself. gpu: bool, want_hdr: bool, + // 4:4:4 session → deliver RGB, skip the NV12/P010 video-engine conversion (see the field doc). + chroma_444: bool, ) -> Result { // SAFETY: runs on the capture thread that will own this `DuplCapturer`. `install_gpu_pref_hook()` // and the DPI-context calls take by-value handles / no args and touch only thread/process state; @@ -2311,6 +2317,7 @@ impl DuplCapturer { gpu_copy: None, last_present: None, want_hdr, + chroma_444, hdr_fp16: is_hdr_init, hdr_meta: hdr_meta_init, fp16_src: None, @@ -3088,7 +3095,10 @@ impl DuplCapturer { // Video-engine path: scRGB FP16 → BT.2020 PQ P010 on the VIDEO engine (no 3D shader, and // NVENC encodes P010 natively). Fall back to the HdrConverter pixel shader (3D) only if the // video processor is unavailable. - if let Some(p010) = self.convert_to_yuv(&src, true) { + if let Some(p010) = (!self.chroma_444) + .then(|| self.convert_to_yuv(&src, true)) + .flatten() + { self.last_present = Some((p010.clone(), PixelFormat::P010)); return Ok(CapturedFrame { width: self.width, @@ -3148,7 +3158,10 @@ impl DuplCapturer { // conversion AND NVENC's encode stay OFF the 3D engine — the only way to keep up when a // game pins the 3D engine at ~100%. Fall back to handing NVENC the BGRA texture (it then // does RGB→YUV internally on the 3D/compute engine). - if let Some(nv12) = self.convert_to_yuv(&gpu, false) { + if let Some(nv12) = (!self.chroma_444) + .then(|| self.convert_to_yuv(&gpu, false)) + .flatten() + { self.last_present = Some((nv12.clone(), PixelFormat::Nv12)); return Ok(CapturedFrame { width: self.width, diff --git a/crates/punktfunk-host/src/config.rs b/crates/punktfunk-host/src/config.rs index 991dcc0..c67f326 100644 --- a/crates/punktfunk-host/src/config.rs +++ b/crates/punktfunk-host/src/config.rs @@ -7,7 +7,7 @@ //! **Goal-1 stages 1–2** (`design/windows-host-rewrite.md` §2.2): stage 1 stood this up; stage 2 migrated the //! genuinely-constant operator/dispatch knobs onto it (the dispatch-disagreement bug class: `idd_push`, //! `capture_backend`, `encoder_pref`, `render_adapter`, `no_wgc`, the vdisplay backend select — plus the -//! plan-named `secure_dda`/`idd_depth`/`zerocopy`/`ten_bit` and the multi-site `perf`/`compositor`/ +//! plan-named `secure_dda`/`idd_depth`/`zerocopy`/`ten_bit`/`four_four_four` and the multi-site `perf`/`compositor`/ //! `video_source`/`gamepad`). `SessionPlan` (stage 3) consumes it as the single owner of the //! capture/topology/encoder decision. //! @@ -63,6 +63,10 @@ pub struct HostConfig { pub zerocopy: bool, /// `PUNKTFUNK_10BIT` — host policy gate for HEVC Main10 (only honored when the client also advertised 10-bit). pub ten_bit: bool, + /// `PUNKTFUNK_444` — host policy gate for full-chroma HEVC 4:4:4 (Range Extensions). Honored only + /// when the client also advertised 4:4:4, the codec is HEVC, and the GPU/driver supports a 4:4:4 + /// encode (probed) — otherwise the session stays 4:2:0. Independent of `ten_bit` (chroma vs depth). + pub four_four_four: bool, /// `PUNKTFUNK_PERF` — per-stage timing instrumentation. pub perf: bool, /// `PUNKTFUNK_VIDEO_SOURCE` — GameStream video source select (`virtual` / `portal` / unset → synthetic). @@ -112,6 +116,7 @@ impl HostConfig { .unwrap_or(2), zerocopy: flag("PUNKTFUNK_ZEROCOPY"), ten_bit: flag("PUNKTFUNK_10BIT"), + four_four_four: flag("PUNKTFUNK_444"), perf: flag("PUNKTFUNK_PERF"), video_source: val("PUNKTFUNK_VIDEO_SOURCE"), compositor: val("PUNKTFUNK_COMPOSITOR"), diff --git a/crates/punktfunk-host/src/encode.rs b/crates/punktfunk-host/src/encode.rs index a463f6c..34781d0 100644 --- a/crates/punktfunk-host/src/encode.rs +++ b/crates/punktfunk-host/src/encode.rs @@ -29,6 +29,33 @@ pub enum Codec { Av1, } +/// Chroma subsampling the encoder emits, negotiated with the client (the `PUNKTFUNK_444` gate + the +/// client's `VIDEO_CAP_444` + a GPU probe). `Yuv420` is the universal default; `Yuv444` is HEVC-only, +/// native-protocol-only (GameStream stays 4:2:0), and the host only ever passes it after +/// [`can_encode_444`] confirmed the active backend supports it. +#[derive(Clone, Copy, Debug, PartialEq, Eq, Default)] +pub enum ChromaFormat { + #[default] + Yuv420, + Yuv444, +} + +impl ChromaFormat { + /// The HEVC `chroma_format_idc` this maps to: `1` (4:2:0) or `3` (4:4:4). Also the wire value + /// echoed in [`punktfunk_core::quic::Welcome::chroma_format`]. + pub fn idc(self) -> u8 { + match self { + ChromaFormat::Yuv420 => punktfunk_core::quic::CHROMA_IDC_420, + ChromaFormat::Yuv444 => punktfunk_core::quic::CHROMA_IDC_444, + } + } + + /// True for full-chroma 4:4:4. + pub fn is_444(self) -> bool { + matches!(self, ChromaFormat::Yuv444) + } +} + impl Codec { /// The FFmpeg NVENC encoder name (selected by name, not codec id — the latter would /// pick the software encoder). @@ -89,6 +116,13 @@ pub struct EncoderCaps { /// When `false`, `set_hdr_meta` is a no-op and no in-band grade reaches the client. Only the /// Windows direct-NVENC path attaches it today. pub supports_hdr_metadata: bool, + /// The opened encoder is actually producing a full-chroma 4:4:4 (`chroma_format_idc = 3`) stream. + /// `false` on every 4:2:0 session (the default) and on a backend that declined 4:4:4. Set by the + /// NVENC backends (Linux + Windows). The chroma is committed to the wire (`Welcome::chroma_format`) + /// from the pre-open probe, so this is a *post-open cross-check*: the session glue logs loudly if + /// the encoder's real chroma disagrees with what was negotiated (the in-band SPS is authoritative + /// for the decoder either way). + pub chroma_444: bool, } /// A hardware encoder. One per session; runs on the encode thread. @@ -193,8 +227,21 @@ pub fn open_video( bitrate_bps: u64, cuda: bool, bit_depth: u8, + chroma: ChromaFormat, ) -> Result> { validate_dimensions(codec, width, height)?; + // 4:4:4 is HEVC-only. The negotiator should never pass `Yuv444` for another codec (it gates on + // `codec == H265`), but defend the contract here so a future caller can't silently emit a stream + // no decoder expects: a non-HEVC 4:4:4 request degrades to 4:2:0 with a warning. + let chroma = if chroma.is_444() && codec != Codec::H265 { + tracing::warn!( + ?codec, + "4:4:4 requested for a non-HEVC codec — encoding 4:2:0" + ); + ChromaFormat::Yuv420 + } else { + chroma + }; #[cfg(target_os = "linux")] { // Pick the GPU encode backend. NVIDIA → NVENC/CUDA (the original path, unchanged); @@ -203,8 +250,17 @@ pub fn open_video( // its errors crisply instead of silently trying the other). let pref = crate::config::config().encoder_pref.as_str(); let open_vaapi = || -> Result> { - vaapi::VaapiEncoder::open(codec, format, width, height, fps, bitrate_bps, bit_depth) - .map(|e| Box::new(e) as Box) + vaapi::VaapiEncoder::open( + codec, + format, + width, + height, + fps, + bitrate_bps, + bit_depth, + chroma, + ) + .map(|e| Box::new(e) as Box) }; match pref { "nvenc" | "nvidia" | "cuda" => open_nvenc_probed( @@ -216,6 +272,7 @@ pub fn open_video( bitrate_bps, cuda, bit_depth, + chroma, ), "vaapi" | "amd" | "intel" => open_vaapi(), "auto" | "" => { @@ -231,6 +288,7 @@ pub fn open_video( bitrate_bps, cuda, bit_depth, + chroma, ) } else { open_vaapi() @@ -260,6 +318,7 @@ pub fn open_video( fps, bitrate_bps, bit_depth, + chroma, ) .map(|e| Box::new(e) as Box) } @@ -289,6 +348,7 @@ pub fn open_video( fps, bitrate_bps, bit_depth, + chroma, ) .map(|e| Box::new(e) as Box) } @@ -333,6 +393,7 @@ pub fn open_video( bitrate_bps, cuda, bit_depth, + chroma, ); anyhow::bail!("video encode requires Linux or Windows") } @@ -355,6 +416,7 @@ fn open_nvenc_probed( bitrate_bps: u64, cuda: bool, bit_depth: u8, + chroma: ChromaFormat, ) -> Result> { const MIN_PROBE_BPS: u64 = 50_000_000; let mut candidates = vec![bitrate_bps]; @@ -369,7 +431,9 @@ fn open_nvenc_probed( } let mut last: Option = None; for (i, &b) in candidates.iter().enumerate() { - match linux::NvencEncoder::open(codec, format, width, height, fps, b, cuda, bit_depth) { + match linux::NvencEncoder::open( + codec, format, width, height, fps, b, cuda, bit_depth, chroma, + ) { Ok(enc) => { if i > 0 { tracing::warn!( @@ -446,6 +510,65 @@ pub fn vaapi_codec_support() -> CodecSupport { }) } +/// Whether the active GPU encode backend can actually produce a full-chroma **4:4:4** HEVC stream. +/// Resolved (and cached, once) *before* the Welcome so the host advertises the chroma it will really +/// encode — the honest-downgrade channel. 4:4:4 is HEVC-only; the probe opens a tiny encoder on the +/// active backend (NVENC FREXT is broad on NVIDIA, but VAAPI / AMF / QSV 4:4:4 is hardware-specific, +/// so it must be probed, never assumed). Non-HEVC codecs are always `false`. +#[cfg(any(target_os = "linux", target_os = "windows"))] +pub fn can_encode_444(codec: Codec) -> bool { + use std::sync::OnceLock; + if codec != Codec::H265 { + return false; + } + static CACHE: OnceLock = OnceLock::new(); + *CACHE.get_or_init(|| { + let supported = { + #[cfg(target_os = "linux")] + { + // Mirror open_video's backend dispatch: VAAPI (AMD/Intel) vs NVENC (NVIDIA). + if linux_zero_copy_is_vaapi() { + vaapi::probe_can_encode_444(codec) + } else { + linux::probe_can_encode_444(codec) + } + } + #[cfg(target_os = "windows")] + { + match windows_resolved_backend() { + WindowsBackend::Nvenc => { + #[cfg(feature = "nvenc")] + { + nvenc::probe_can_encode_444(codec) + } + #[cfg(not(feature = "nvenc"))] + { + false + } + } + WindowsBackend::Amf | WindowsBackend::Qsv => { + #[cfg(feature = "amf-qsv")] + { + let vendor = match windows_resolved_backend() { + WindowsBackend::Qsv => ffmpeg_win::WinVendor::Qsv, + _ => ffmpeg_win::WinVendor::Amf, + }; + ffmpeg_win::probe_can_encode_444(vendor, codec) + } + #[cfg(not(feature = "amf-qsv"))] + { + false + } + } + WindowsBackend::Software => false, + } + } + }; + tracing::info!(supported, "HEVC 4:4:4 encode capability probed"); + supported + }) +} + // --------------------------------------------------------------------------------------------- // Windows backend selection (the analogue of the Linux nvidia_present / linux_zero_copy_is_vaapi // logic). NVIDIA → NVENC, AMD → AMF, Intel → QSV; `auto` (default) reads the DXGI adapter vendor. diff --git a/crates/punktfunk-host/src/encode/linux/mod.rs b/crates/punktfunk-host/src/encode/linux/mod.rs index ddd0fb3..d12a7ab 100644 --- a/crates/punktfunk-host/src/encode/linux/mod.rs +++ b/crates/punktfunk-host/src/encode/linux/mod.rs @@ -11,7 +11,7 @@ // Every `unsafe` block in this file carries a `// SAFETY:` proof; enforce it (unsafe-proof program). #![deny(clippy::undocumented_unsafe_blocks)] -use super::{Codec, EncodedFrame, Encoder}; +use super::{ChromaFormat, Codec, EncodedFrame, Encoder}; use crate::capture::{CapturedFrame, FramePayload, PixelFormat}; use anyhow::{anyhow, bail, Context, Result}; use ffmpeg::format::Pixel; @@ -19,9 +19,33 @@ use ffmpeg::util::frame::Video as VideoFrame; use ffmpeg::{codec, encoder, Dictionary, Packet, Rational}; use ffmpeg_next as ffmpeg; use std::os::raw::c_int; +use std::ptr; use ffmpeg::ffi; // = ffmpeg_sys_next +/// swscale: nearest-neighbour scaler flag (`SWS_POINT`). We never rescale (src dims == dst dims), so +/// the resampler choice only governs the colour-conversion path; POINT is the cheapest. +const SWS_POINT: c_int = 0x10; +/// swscale colorspace id for ITU-R BT.709 (`SWS_CS_ITU709`) — the CSC coefficients for our RGB→YUV. +const SWS_CS_ITU709: c_int = 1; + +/// The swscale *source* pixel format for a captured packed RGB/BGR layout (the real byte order, not +/// the NVENC-padded `*0` form). Used by the 4:4:4 RGB→YUV444P conversion path. Mirrors the VAAPI +/// CPU-input mapping; YUV/10-bit inputs can't feed this path (the 4:4:4 session forces packed RGB). +fn sws_src_pixel(format: PixelFormat) -> Result { + Ok(match format { + PixelFormat::Bgrx => Pixel::BGRZ, // bgr0 + PixelFormat::Rgbx => Pixel::RGBZ, // rgb0 + PixelFormat::Bgra => Pixel::BGRA, + PixelFormat::Rgba => Pixel::RGBA, + PixelFormat::Rgb => Pixel::RGB24, + PixelFormat::Bgr => Pixel::BGR24, + PixelFormat::Nv12 | PixelFormat::P010 | PixelFormat::Rgb10a2 => { + bail!("NVENC 4:4:4 CPU-input path supports packed RGB/BGR only; got {format:?}") + } + }) +} + /// `AVCUDADeviceContext` (libavutil/hwcontext_cuda.h) — not in the ffmpeg-sys bindings (the /// crate doesn't allowlist that header), so mirror its stable 3-pointer layout. We set the /// first field to *our* `CUcontext` so NVENC shares the context the EGL importer maps into. @@ -131,6 +155,10 @@ pub struct NvencEncoder { frame: Option, /// Zero-copy path: CUDA hwdevice/hwframes contexts (the encoder takes `AV_PIX_FMT_CUDA`). cuda: Option, + /// 4:4:4 path only: swscale context converting the captured packed RGB/BGR → planar YUV444P + /// (BT.709 limited) into [`Self::frame`], because `hevc_nvenc` only emits 4:4:4 from a YUV444 + /// *input* (RGB-in is always 4:2:0). `None` on the ordinary 4:2:0 RGB path. Freed in `Drop`. + sws_444: Option<*mut ffi::SwsContext>, src_format: PixelFormat, expand: bool, width: u32, @@ -142,10 +170,12 @@ pub struct NvencEncoder { force_kf: bool, } -// `CudaHw` holds raw `AVBufferRef`s; the encoder lives on a single thread. The CPU encoder is -// already `Send` via ffmpeg-next; assert it for the CUDA fields too. +// `CudaHw` holds raw `AVBufferRef`s and `sws_444` a raw `SwsContext`; the encoder lives on a single +// thread. The CPU encoder is already `Send` via ffmpeg-next; assert it for the raw fields too. // SAFETY: `NvencEncoder` owns an ffmpeg-next `Encoder`/`VideoFrame` (already `Send`) plus a `CudaHw` -// holding raw `AVBufferRef`s, which are not `Send` by default. The encoder is owned and driven by +// holding raw `AVBufferRef`s and an optional raw `SwsContext`, none of which are `Send` by default. +// The `SwsContext` is a self-contained swscale state object with no thread affinity, touched only +// through `&mut self` on the one encode thread. The encoder is owned and driven by // exactly ONE thread — the per-session encode thread it is moved to — and is only touched through // `&mut self` methods, so it is never aliased or accessed concurrently. The wrapped libav contexts // (and the shared `CUcontext` the `CudaHw` references) have no thread affinity, so transferring @@ -164,6 +194,7 @@ impl NvencEncoder { bitrate_bps: u64, cuda: bool, bit_depth: u8, + chroma: ChromaFormat, ) -> Result { // TODO(hdr): Linux 10-bit parity. Unlike the Windows raw-SDK path (which upconverts 8-bit // ARGB → Main10 via pixelBitDepthMinus8), libavcodec hevc_nvenc needs a 10-bit input pixel @@ -175,6 +206,18 @@ impl NvencEncoder { "Linux NVENC 10-bit not yet wired — encoding 8-bit" ); } + // Full-chroma 4:4:4 (HEVC Range Extensions). `hevc_nvenc` only emits 4:4:4 from a YUV444 + // *input* frame — feeding RGB always subsamples to 4:2:0 regardless of profile (verified on + // the RTX 5070 Ti). So a 4:4:4 session swscales the captured RGB → YUV444P (BT.709 limited) + // and feeds that with `profile=rext`. The negotiator gates this to HEVC + the single-process + // CPU-capture topology, so `cuda` must be false here; defend the contract. + let want_444 = chroma.is_444() && codec == Codec::H265; + if want_444 && cuda { + bail!( + "NVENC 4:4:4 needs CPU RGB frames (the session forces non-zero-copy capture for \ + 4:4:4); got a CUDA frame — capture/encoder negotiation mismatch" + ); + } ffmpeg::init().context("ffmpeg init")?; if std::env::var_os("PUNKTFUNK_FFMPEG_DEBUG").is_some() { // SAFETY: `av_log_set_level` sets libav's global integer log level; `48` (= AV_LOG_DEBUG) @@ -185,7 +228,14 @@ impl NvencEncoder { let name = codec.nvenc_name(); let av_codec = encoder::find_by_name(name) .ok_or_else(|| anyhow!("{name} not built into libavcodec"))?; - let (nvenc_pixel, expand) = nvenc_input(format); + let (rgb_pixel, rgb_expand) = nvenc_input(format); + // 4:4:4 feeds NVENC a planar YUV444P frame we produce by swscale; the ordinary path feeds the + // captured RGB straight in and lets NVENC's internal CSC subsample to 4:2:0. + let (nvenc_pixel, expand) = if want_444 { + (Pixel::YUV444P, false) + } else { + (rgb_pixel, rgb_expand) + }; let mut video = codec::context::Context::new_with_codec(av_codec) .encoder() @@ -234,12 +284,12 @@ impl NvencEncoder { (*video.as_mut_ptr()).gop_size = -1; } - // NV12 path: we did the RGB→YUV conversion ourselves as BT.709 *limited* range, so signal - // that in the bitstream VUI (colorspace/range/primaries/transfer) — otherwise the client - // decoder assumes a default and the picture comes out washed-out / wrong-contrast. The - // RGB-input paths leave these unset (NVENC's internal CSC writes its own VUI). Matches the - // Windows NV12 path's BT.709 limited-range signalling. - if matches!(format, PixelFormat::Nv12) { + // NV12 / 4:4:4 paths: we do the RGB→YUV conversion ourselves as BT.709 *limited* range + // (swscale), so signal that in the bitstream VUI (colorspace/range/primaries/transfer) — + // otherwise the client decoder assumes a default and the picture comes out washed-out / + // wrong-contrast. The RGB-input 4:2:0 path leaves these unset (NVENC's internal CSC writes + // its own VUI). Matches the Windows NV12 path's BT.709 limited-range signalling. + if matches!(format, PixelFormat::Nv12) || want_444 { // SAFETY: same `video` builder — `raw = video.as_mut_ptr()` is the non-null, properly- // aligned, sole-owned, not-yet-opened `AVCodecContext`. We set its four VUI colour enum // fields to valid `AVColorSpace`/`AVColorRange`/`AVColorPrimaries`/`AVColorTransfer- @@ -280,6 +330,45 @@ impl NvencEncoder { None }; + // 4:4:4: build the RGB→YUV444P swscale (BT.709 limited, no rescale). Mirrors the VAAPI CPU + // path's RGB→NV12 scaler, but the dst is full-chroma planar 4:4:4. + let sws_444 = if want_444 { + let src_av = pixel_to_av(sws_src_pixel(format)?); + // SAFETY: `sws_getContext` allocates a swscale context for the given src/dst dims + pixel + // formats. Both dims are the encoder's positive `width`/`height` as `c_int`; `src_av` is a + // valid `AVPixelFormat` (from the `sws_src_pixel`-validated, packed-RGB-only source), the + // dst is YUV444P. The trailing filter/param pointers are null = "use defaults" (documented + // as accepted). No Rust memory is borrowed; the returned pointer is null-checked below. + let sws = unsafe { + ffi::sws_getContext( + width as c_int, + height as c_int, + src_av, + width as c_int, + height as c_int, + ffi::AVPixelFormat::AV_PIX_FMT_YUV444P, + SWS_POINT, + ptr::null_mut(), + ptr::null_mut(), + ptr::null(), + ) + }; + if sws.is_null() { + bail!("sws_getContext(RGB→YUV444P) failed"); + } + // SAFETY: `sws` is the non-null context from the call above (null-checked). The ITU-709 + // coefficient table from `sws_getCoefficients` is a process-lifetime libswscale static, + // reused for src+dst matrices; `sws_setColorspaceDetails` only reads it and writes scalar + // CSC settings into `sws` (limited-range dst: dstRange = 0). No Rust memory is passed. + unsafe { + let cs709 = ffi::sws_getCoefficients(SWS_CS_ITU709); + ffi::sws_setColorspaceDetails(sws, cs709, 1, cs709, 0, 0, 1 << 16, 1 << 16); + } + Some(sws) + } else { + None + }; + // Low-latency NVENC tuning (plan §7 / linux-setup doc). let mut opts = Dictionary::new(); opts.set("preset", "p1"); // fastest @@ -288,6 +377,12 @@ impl NvencEncoder { opts.set("bf", "0"); opts.set("delay", "0"); opts.set("forced-idr", "1"); // RFI/request_keyframe → real IDR under the infinite GOP + if want_444 { + // HEVC Range Extensions — the profile that carries chroma_format_idc=3. With a YUV444P + // input `hevc_nvenc` auto-selects it, but pin it explicitly so the chroma is never silently + // dropped on a future libavcodec. + opts.set("profile", "rext"); + } // Split-frame encode across both NVENC engines (GB203 has 2) when the pixel rate exceeds // a single engine's HEVC capacity (~1 Gpix/s); e.g. 5120x1440@240 = 1.77 Gpix/s needs it, @@ -321,6 +416,7 @@ impl NvencEncoder { enc, frame, cuda: cuda_hw, + sws_444, src_format: format, expand, width, @@ -333,6 +429,15 @@ impl NvencEncoder { } impl Encoder for NvencEncoder { + fn caps(&self) -> super::EncoderCaps { + super::EncoderCaps { + // 4:4:4 iff this session opened the RGB→YUV444P swscale path (FREXT). RFI/HDR-SEI stay + // unsupported on libavcodec NVENC (the trait defaults). + chroma_444: self.sws_444.is_some(), + ..super::EncoderCaps::default() + } + } + fn submit(&mut self, captured: &CapturedFrame) -> Result<()> { anyhow::ensure!( captured.width == self.width && captured.height == self.height, @@ -411,6 +516,47 @@ impl NvencEncoder { bytes.len(), src_row * h ); + // 4:4:4: swscale the packed RGB straight into the planar YUV444P input frame (BT.709 limited), + // then send it — no byte-expand. The 4:2:0 RGB path (below) feeds NVENC packed RGB directly. + if let Some(sws) = self.sws_444 { + let frame = self + .frame + .as_mut() + .context("CPU frame missing (encoder opened in CUDA mode)")?; + // SAFETY: `format == self.src_format` and `bytes.len() >= src_row * h` (the `ensure!`s + // above), so `sws_scale` reads `h` rows of `src_row` bytes from `src_data[0] = bytes` + // (packed RGB is single-plane; the other src planes are null/0) — all in bounds. `sws` is + // the non-null context built in `open`. The dst is `frame`'s underlying `AVFrame`: its + // `data`/`linesize` in-struct arrays were sized for YUV444P by `VideoFrame::new`, and the + // 3 planes are each `width`×`height`. All pointers are live locals for this synchronous + // call; the encoder runs only on this thread (`unsafe impl Send`), so no aliasing/race. + unsafe { + let dst_av = frame.as_mut_ptr(); + let src_data: [*const u8; 4] = + [bytes.as_ptr(), ptr::null(), ptr::null(), ptr::null()]; + let src_stride: [c_int; 4] = [src_row as c_int, 0, 0, 0]; + let r = ffi::sws_scale( + sws, + src_data.as_ptr(), + src_stride.as_ptr(), + 0, + h as c_int, + (*dst_av).data.as_ptr(), + (*dst_av).linesize.as_ptr(), + ); + if r < 0 { + bail!("sws_scale(RGB→YUV444P) failed ({r})"); + } + } + frame.set_pts(Some(pts)); + frame.set_kind(if idr { + ffmpeg::picture::Type::I + } else { + ffmpeg::picture::Type::None + }); + self.enc.send_frame(frame).context("send_frame(444)")?; + return Ok(()); + } let frame = self .frame .as_mut() @@ -526,3 +672,51 @@ impl NvencEncoder { Ok(()) } } + +impl Drop for NvencEncoder { + fn drop(&mut self) { + if let Some(sws) = self.sws_444.take() { + // SAFETY: `sws` is the non-null `SwsContext` allocated by `sws_getContext` in `open` and + // owned exclusively by this encoder (taken out of the field so it can't be freed twice). + // `sws_freeContext` frees it; nothing else references it after this single-threaded drop. + unsafe { ffi::sws_freeContext(sws) }; + } + } +} + +/// Probe whether this NVIDIA GPU + driver + libavcodec can actually encode HEVC **4:4:4** (Range +/// Extensions). Opens a tiny real `hevc_nvenc` 4:4:4 session — the exact path [`NvencEncoder::open`] +/// takes for a live 4:4:4 stream — and reports whether it succeeded. HEVC-only; the result is cached +/// by the caller ([`crate::encode::can_encode_444`]). A GPU/driver/ffmpeg without RExt 4:4:4 fails +/// the open here, so the host resolves the session to 4:2:0 before the Welcome (honest downgrade). +pub fn probe_can_encode_444(codec: Codec) -> bool { + if codec != Codec::H265 { + return false; + } + if ffmpeg::init().is_err() { + return false; + } + // Quiet ffmpeg's open error on a GPU that lacks 4:4:4 — the probe failing is an expected outcome. + // SAFETY: libav initialized above; `av_log_{get,set}_level` only read/write the global int level + // (no pointer args) and are always sound post-init. + let prev = unsafe { + let p = ffi::av_log_get_level(); + ffi::av_log_set_level(ffi::AV_LOG_FATAL); + p + }; + let ok = NvencEncoder::open( + codec, + PixelFormat::Bgra, + 640, + 480, + 30, + 2_000_000, + false, // CPU input (the 4:4:4 path never uses CUDA) + 8, + ChromaFormat::Yuv444, + ) + .is_ok(); + // SAFETY: restore the saved global log level (scalar arg, no pointers). + unsafe { ffi::av_log_set_level(prev) }; + ok +} diff --git a/crates/punktfunk-host/src/encode/linux/vaapi.rs b/crates/punktfunk-host/src/encode/linux/vaapi.rs index 459dd87..7115bdc 100644 --- a/crates/punktfunk-host/src/encode/linux/vaapi.rs +++ b/crates/punktfunk-host/src/encode/linux/vaapi.rs @@ -160,6 +160,18 @@ pub fn probe_can_encode(codec: Codec) -> bool { } } +/// Whether the active VAAPI GPU can encode HEVC **4:4:4** (Range Extensions). **Deferred in v1 — +/// always `false`.** VAAPI HEVC 4:4:4 encode is narrow and vendor-specific (the lab's AMD Phoenix1 / +/// RDNA3 exposes only `VAProfileHEVCMain`/`Main10` `EncSlice`, no `Main444`), and there is no +/// validated hardware to build + verify the 4:4:4 surface/profile path against. Returning `false` +/// keeps the negotiation honest: a VAAPI host resolves every session to 4:2:0 before the Welcome, so +/// the client never builds a 4:4:4 decoder it would only get 4:2:0 frames for. (Follow-up: implement +/// + validate on an Intel Arc / RDNA4-class box that advertises a HEVC 4:4:4 encode entrypoint.) +pub fn probe_can_encode_444(_codec: Codec) -> bool { + tracing::info!("VAAPI HEVC 4:4:4 encode is not implemented yet — declining (encoding 4:2:0)"); + false +} + /// Drain the encoder for one packet (shared poll logic). fn poll_encoder(enc: &mut encoder::video::Encoder, fps: u32) -> Result> { let mut pkt = Packet::empty(); @@ -848,6 +860,7 @@ pub struct VaapiEncoder { unsafe impl Send for VaapiEncoder {} impl VaapiEncoder { + #[allow(clippy::too_many_arguments)] pub fn open( codec: Codec, format: PixelFormat, @@ -856,10 +869,18 @@ impl VaapiEncoder { fps: u32, bitrate_bps: u64, bit_depth: u8, + chroma: super::ChromaFormat, ) -> Result { if bit_depth != 8 { tracing::warn!(bit_depth, "VAAPI 10-bit not yet wired — encoding 8-bit"); } + // VAAPI 4:4:4 is deferred (see `probe_can_encode_444`): no validated AMD/Intel hardware in the + // lab exposes a HEVC 4:4:4 encode entrypoint, and the probe returns false so the host never + // negotiates 4:4:4 for a VAAPI session. If a request slips through, fall back to 4:2:0 rather + // than emit an unverified stream — the host signalled 4:2:0 in the Welcome anyway. + if chroma.is_444() { + tracing::warn!("VAAPI 4:4:4 encode not implemented — encoding 4:2:0"); + } ffmpeg::init().context("ffmpeg init")?; if std::env::var_os("PUNKTFUNK_FFMPEG_DEBUG").is_some() { // SAFETY: `av_log_set_level` sets libav's global integer log level; `48` (= AV_LOG_DEBUG) diff --git a/crates/punktfunk-host/src/encode/windows/ffmpeg_win.rs b/crates/punktfunk-host/src/encode/windows/ffmpeg_win.rs index b4543ac..8962ff8 100644 --- a/crates/punktfunk-host/src/encode/windows/ffmpeg_win.rs +++ b/crates/punktfunk-host/src/encode/windows/ffmpeg_win.rs @@ -31,7 +31,7 @@ // Every `unsafe` block in this file carries a `// SAFETY:` proof; enforce it (unsafe-proof program). #![deny(clippy::undocumented_unsafe_blocks)] -use super::{Codec, EncodedFrame, Encoder}; +use super::{ChromaFormat, Codec, EncodedFrame, Encoder}; use crate::capture::{dxgi::D3d11Frame, CapturedFrame, FramePayload, PixelFormat}; use anyhow::{anyhow, bail, Context, Result}; use ffmpeg::format::Pixel; @@ -241,6 +241,18 @@ unsafe fn open_win_encoder( /// driver/runtime rejects codecs the video engine can't do (AV1 on pre-RDNA3 AMD / pre-Arc Intel, /// or HEVC on a very old part). Used to build the GameStream codec advertisement so a client never /// negotiates a codec the encoder can't open. Torn down immediately. +/// Whether the active AMD (AMF) / Intel (QSV) GPU can encode HEVC **4:4:4**. **Deferred in v1 — +/// always `false`.** AMF/QSV HEVC 4:4:4 encode is narrow (AMD RDNA3+, Intel Arc/Xe2+) and the +/// libavcodec profile/pixel-format incantation is vendor- and driver-specific — a wrong profile +/// `avcodec_open2` *silently* falls back to 4:2:0, so a positive probe would need a verify-by-frame, +/// and there is no AMD/Intel Windows box in the lab to build + validate that against. Returning +/// `false` keeps the negotiation honest: an AMF/QSV host resolves every session to 4:2:0 before the +/// Welcome. (Follow-up: implement + validate on an RDNA3+/Arc Windows box.) +pub fn probe_can_encode_444(_vendor: WinVendor, _codec: Codec) -> bool { + tracing::info!("AMF/QSV HEVC 4:4:4 encode is not implemented yet — declining (encoding 4:2:0)"); + false +} + pub fn probe_can_encode(vendor: WinVendor, codec: Codec) -> bool { if ffmpeg::init().is_err() { return false; @@ -1096,6 +1108,7 @@ pub struct FfmpegWinEncoder { unsafe impl Send for FfmpegWinEncoder {} impl FfmpegWinEncoder { + #[allow(clippy::too_many_arguments)] #[allow(clippy::too_many_arguments)] pub fn open( vendor: WinVendor, @@ -1106,7 +1119,15 @@ impl FfmpegWinEncoder { fps: u32, bitrate_bps: u64, bit_depth: u8, + chroma: ChromaFormat, ) -> Result { + // AMF/QSV 4:4:4 is deferred (see `probe_can_encode_444`): no validated AMD/Intel Windows + // hardware in the lab, and the AMF/QSV HEVC 4:4:4 profile/format incantations are vendor- and + // driver-specific (a wrong profile silently encodes 4:2:0). The probe returns false so the host + // never negotiates 4:4:4 for an AMF/QSV session; if a request slips through, fall back to 4:2:0. + if chroma.is_444() { + tracing::warn!("AMF/QSV 4:4:4 encode not implemented — encoding 4:2:0"); + } ffmpeg::init().context("ffmpeg init")?; if std::env::var_os("PUNKTFUNK_FFMPEG_DEBUG").is_some() { // SAFETY: `ffmpeg::init()` ran on the line above, so libav is initialised; `av_log_set_level` diff --git a/crates/punktfunk-host/src/encode/windows/nvenc.rs b/crates/punktfunk-host/src/encode/windows/nvenc.rs index d287118..529626e 100644 --- a/crates/punktfunk-host/src/encode/windows/nvenc.rs +++ b/crates/punktfunk-host/src/encode/windows/nvenc.rs @@ -16,7 +16,7 @@ // Every `unsafe` block / impl in this file carries a `// SAFETY:` proof; enforce it. #![deny(clippy::undocumented_unsafe_blocks)] -use super::{Codec, EncodedFrame, Encoder, EncoderCaps}; +use super::{ChromaFormat, Codec, EncodedFrame, Encoder, EncoderCaps}; use crate::capture::{CapturedFrame, FramePayload, PixelFormat}; use anyhow::{anyhow, bail, Context, Result}; use std::collections::{HashMap, VecDeque}; @@ -57,6 +57,15 @@ pub struct NvencD3d11Encoder { buffer_fmt: nv::NV_ENC_BUFFER_FORMAT, /// Encoded bit depth (8 or 10). 10 → HEVC Main10 (NVENC upconverts the 8-bit ARGB input). bit_depth: u8, + /// Full-chroma 4:4:4 (HEVC Range Extensions, `chroma_format_idc = 3`) requested for this session. + /// NVENC ingests the RGB (ARGB/ABGR10) input and CSCs it to YUV444 internally — the `FREXT` profile + /// + `chromaFormatIDC = 3` in the encode config carry the chroma. Gated on the GPU's + /// `NV_ENC_CAPS_SUPPORT_YUV444_ENCODE` (cleared in `query_caps` on a card that lacks it) and on an + /// RGB input format (NV12/P010 capture can't reconstruct 4:4:4). HEVC-only. + chroma_444: bool, + /// `NV_ENC_CAPS_SUPPORT_YUV444_ENCODE` from the caps probe — whether this GPU can 4:4:4 encode at + /// all. `chroma_444` is forced off when this is false (graceful downgrade to 4:2:0). + yuv444_supported: bool, /// HDR: the capturer is delivering BT.2020 PQ 10-bit (`PixelFormat::Rgb10a2`) frames. Sets the /// `ABGR10` input format + the BT.2020/PQ colour VUI. Derived per-frame from the capture format /// (HDR can toggle mid-session); a change re-inits the session. @@ -103,6 +112,7 @@ pub struct NvencD3d11Encoder { unsafe impl Send for NvencD3d11Encoder {} impl NvencD3d11Encoder { + #[allow(clippy::too_many_arguments)] pub fn open( codec: Codec, _format: PixelFormat, @@ -111,6 +121,7 @@ impl NvencD3d11Encoder { fps: u32, bitrate_bps: u64, bit_depth: u8, + chroma: ChromaFormat, ) -> Result { Ok(Self { encoder: ptr::null_mut(), @@ -122,6 +133,9 @@ impl NvencD3d11Encoder { bitrate_bps, buffer_fmt: nv::NV_ENC_BUFFER_FORMAT::NV_ENC_BUFFER_FORMAT_ARGB, bit_depth, + // 4:4:4 is HEVC-only; the GPU-support gate is applied in `query_caps`. + chroma_444: chroma.is_444() && codec == Codec::H265, + yuv444_supported: false, hdr: false, hdr_meta: None, regs: HashMap::new(), @@ -209,6 +223,7 @@ impl NvencD3d11Encoder { let wmax = self.get_cap(enc, nv::NV_ENC_CAPS::NV_ENC_CAPS_WIDTH_MAX); let hmax = self.get_cap(enc, nv::NV_ENC_CAPS::NV_ENC_CAPS_HEIGHT_MAX); let ten_bit = self.get_cap(enc, nv::NV_ENC_CAPS::NV_ENC_CAPS_SUPPORT_10BIT_ENCODE); + let yuv444 = self.get_cap(enc, nv::NV_ENC_CAPS::NV_ENC_CAPS_SUPPORT_YUV444_ENCODE); let rfi = self.get_cap( enc, nv::NV_ENC_CAPS::NV_ENC_CAPS_SUPPORT_REF_PIC_INVALIDATION, @@ -235,6 +250,13 @@ impl NvencD3d11Encoder { self.bit_depth = 8; self.hdr = false; } + // Same for 4:4:4: a card without YUV444 encode falls back to 4:2:0. (The host already probed + // this via `probe_can_encode_444` before the Welcome, so this is a belt-and-braces guard.) + self.yuv444_supported = yuv444 != 0; + if self.chroma_444 && !self.yuv444_supported { + tracing::warn!("NVENC: this GPU can't 4:4:4 encode — falling back to 4:2:0"); + self.chroma_444 = false; + } self.rfi_supported = rfi != 0; self.custom_vbv = custom_vbv != 0; tracing::info!( @@ -313,9 +335,31 @@ impl NvencD3d11Encoder { cfg.encodeCodecConfig.hevcConfig.tier = 1; cfg.encodeCodecConfig.hevcConfig.level = 0; - // 10-bit HEVC Main10 (HDR foundation): NVENC upconverts the 8-bit input; 8-bit leaves the - // preset default (Main) untouched. - if self.bit_depth == 10 { + // Chroma + bit depth. Full-chroma 4:4:4 (HEVC Range Extensions) takes precedence and composes + // with 10-bit (Main 4:4:4 10): NVENC ingests the RGB input (ARGB / ABGR10) and CSCs it to + // YUV444 internally when `chromaFormatIDC = 3` under the FREXT profile. Only valid on an RGB + // input — a subsampled NV12/P010 source can't reconstruct full chroma (so the capturer is + // forced to RGB for a 4:4:4 session, and we guard on the input format here too). + // + // ON-GLASS TODO (RTX box): confirm ARGB + chromaFormatIDC=3 + FREXT yields a *true* 4:4:4 + // stream. NVENC's RGB→YUV CSC is documented to honor chromaFormatIDC (unlike libavcodec's + // wrapper, which always subsamples RGB to 4:2:0 — hence the Linux path feeds planar YUV444 + // instead). If on-glass shows 4:2:0, the follow-up is a BGRA→AYUV shader feeding the native + // `NV_ENC_BUFFER_FORMAT_AYUV` 4:4:4 input format. + let rgb_input = matches!( + self.buffer_fmt, + nv::NV_ENC_BUFFER_FORMAT::NV_ENC_BUFFER_FORMAT_ARGB + | nv::NV_ENC_BUFFER_FORMAT::NV_ENC_BUFFER_FORMAT_ABGR10 + ); + if self.chroma_444 && rgb_input { + cfg.profileGUID = nv::NV_ENC_HEVC_PROFILE_FREXT_GUID; + cfg.encodeCodecConfig.hevcConfig.set_chromaFormatIDC(3); + if self.bit_depth == 10 { + cfg.encodeCodecConfig.hevcConfig.set_pixelBitDepthMinus8(2); // Main 4:4:4 10 + } + } else if self.bit_depth == 10 { + // 10-bit HEVC Main10 (HDR foundation): NVENC upconverts the 8-bit input; 8-bit leaves the + // preset default (Main) untouched. cfg.profileGUID = nv::NV_ENC_HEVC_PROFILE_MAIN10_GUID; cfg.encodeCodecConfig.hevcConfig.set_pixelBitDepthMinus8(2); // 10 - 8 } @@ -787,6 +831,9 @@ impl Encoder for NvencD3d11Encoder { EncoderCaps { supports_rfi: self.rfi_supported, supports_hdr_metadata: self.hdr, + // Reflects what the session actually configured (cleared in `query_caps` if the GPU lacks + // YUV444 encode), so the glue can confirm 4:4:4 vs the negotiated request. + chroma_444: self.chroma_444, } } @@ -904,3 +951,69 @@ impl Drop for NvencD3d11Encoder { unsafe { self.teardown() }; } } + +/// Probe whether the active NVIDIA GPU can encode HEVC **4:4:4** (`NV_ENC_CAPS_SUPPORT_YUV444_ENCODE`). +/// Creates a throwaway hardware D3D11 device + NVENC session, queries the cap, and tears down. HEVC-only; +/// the result is cached by the caller ([`crate::encode::can_encode_444`]) and read *before* the Welcome +/// so the host advertises the chroma it can really encode (honest downgrade to 4:2:0 on a card without it). +pub fn probe_can_encode_444(codec: Codec) -> bool { + use windows::Win32::Foundation::HMODULE; + use windows::Win32::Graphics::Direct3D::{D3D_DRIVER_TYPE_HARDWARE, D3D_FEATURE_LEVEL_11_0}; + use windows::Win32::Graphics::Direct3D11::{ + D3D11CreateDevice, D3D11_CREATE_DEVICE_BGRA_SUPPORT, D3D11_SDK_VERSION, + }; + if codec != Codec::H265 { + return false; + } + // SAFETY: a self-contained probe owning every handle it creates. `D3D11CreateDevice` (HARDWARE + // driver, NULL adapter) fills `device` or returns Err (→ false). `open_encode_session_ex` opens an + // NVENC session against that device's raw pointer (valid while `device` is held) or errors (→ false, + // tearing nothing down). `get_encode_caps` reads one scalar cap into `val` via the loaded API table. + // `destroy_encoder` frees the session exactly once; `device`/its context drop with the COM wrappers. + // No handle escapes this call and nothing runs concurrently. + unsafe { + let mut device: Option = None; + if D3D11CreateDevice( + None, + D3D_DRIVER_TYPE_HARDWARE, + HMODULE::default(), + D3D11_CREATE_DEVICE_BGRA_SUPPORT, + Some(&[D3D_FEATURE_LEVEL_11_0]), + D3D11_SDK_VERSION, + Some(&mut device), + None, + None, + ) + .is_err() + { + return false; + } + let Some(device) = device else { return false }; + let mut params = nv::NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS { + version: nv::NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER, + deviceType: nv::NV_ENC_DEVICE_TYPE::NV_ENC_DEVICE_TYPE_DIRECTX, + device: device.as_raw(), + apiVersion: nv::NVENCAPI_VERSION, + ..Default::default() + }; + let mut enc: *mut c_void = ptr::null_mut(); + if (API.open_encode_session_ex)(&mut params, &mut enc) + .result_without_string() + .is_err() + { + return false; + } + let mut param = nv::NV_ENC_CAPS_PARAM { + version: nv::NV_ENC_CAPS_PARAM_VER, + capsToQuery: nv::NV_ENC_CAPS::NV_ENC_CAPS_SUPPORT_YUV444_ENCODE, + reserved: [0; 62], + }; + let mut val: i32 = 0; + let ok = (API.get_encode_caps)(enc, nv::NV_ENC_CODEC_HEVC_GUID, &mut param, &mut val) + .result_without_string() + .is_ok() + && val != 0; + let _ = (API.destroy_encoder)(enc); + ok + } +} diff --git a/crates/punktfunk-host/src/gamestream/audio.rs b/crates/punktfunk-host/src/gamestream/audio.rs index 62fe3a1..52af188 100644 --- a/crates/punktfunk-host/src/gamestream/audio.rs +++ b/crates/punktfunk-host/src/gamestream/audio.rs @@ -41,8 +41,6 @@ type Aes128CbcEnc = cbc::Encryptor; /// `RTP_PAYLOAD_TYPE_FEC 127`). const AUDIO_PACKET_TYPE: u8 = 97; const AUDIO_FEC_PACKET_TYPE: u8 = 127; -/// Stereo Opus bitrate (unchanged from the live-validated stereo path). -const OPUS_BITRATE: i32 = 128_000; /// Audio FEC geometry (moonlight-common-c `RtpAudioQueue.h`: `RTPA_DATA_SHARDS 4`, /// `RTPA_FEC_SHARDS 2`). Blocks are aligned: the client synthesizes the block base as @@ -82,67 +80,20 @@ impl Default for AudioParams { } } -/// One Opus (multi)stream layout. Channel order is the GameStream/Moonlight order -/// FL FR FC LFE RL RR [SL SR]; `mapping` is the libopus multistream mapping we *encode* -/// with — identical to Sunshine's `audio.cpp stream_configs` (verified verbatim 2026-06-10): -/// identity mapping, so normal quality couples (FL,FR) and (FC,LFE) [+ (RL,RR) on 7.1] with -/// the remaining channels as mono streams; high quality is one mono stream per channel. -/// Bitrates are Sunshine's per-config values (stereo keeps punktfunk's existing 128 kbps). -pub struct OpusLayout { - pub channels: u8, - pub streams: u8, - pub coupled: u8, - pub mapping: &'static [u8], - pub bitrate: i32, -} - -pub const LAYOUT_STEREO: OpusLayout = OpusLayout { - channels: 2, - streams: 1, - coupled: 1, - mapping: &[0, 1], - bitrate: OPUS_BITRATE, -}; -pub const LAYOUT_51: OpusLayout = OpusLayout { - channels: 6, - streams: 4, - coupled: 2, - mapping: &[0, 1, 2, 3, 4, 5], - bitrate: 256_000, -}; -pub const LAYOUT_51_HQ: OpusLayout = OpusLayout { - channels: 6, - streams: 6, - coupled: 0, - mapping: &[0, 1, 2, 3, 4, 5], - bitrate: 1_536_000, -}; -pub const LAYOUT_71: OpusLayout = OpusLayout { - channels: 8, - streams: 5, - coupled: 3, - mapping: &[0, 1, 2, 3, 4, 5, 6, 7], - bitrate: 450_000, -}; -pub const LAYOUT_71_HQ: OpusLayout = OpusLayout { - channels: 8, - streams: 8, - coupled: 0, - mapping: &[0, 1, 2, 3, 4, 5, 6, 7], - bitrate: 2_048_000, +// The Opus surround layout table (channel order FL FR FC LFE RL RR [SL SR], identity mapping, +// Sunshine's per-config bitrates) now lives in `punktfunk_core::audio`, shared with the native +// `punktfunk/1` path and every client decoder. Re-export the pieces the GameStream module + its +// RTSP SDP (`rtsp.rs`) reference; the GFE-specific `surround_params` SDP rotation stays below. +pub use punktfunk_core::audio::{ + OpusLayout, LAYOUT_51, LAYOUT_51_HQ, LAYOUT_71, LAYOUT_71_HQ, LAYOUT_STEREO, }; -/// Pick the encoder layout for the negotiated session parameters. Unknown channel counts -/// fall back to stereo (the client can only request 2/6/8 — `AUDIO_CONFIGURATION_*` in +/// Pick the encoder layout for the negotiated session parameters. Thin wrapper over the shared +/// [`punktfunk_core::audio::layout_for`] keyed on this module's [`AudioParams`] (unknown channel +/// counts fall back to stereo; the client can only request 2/6/8 — `AUDIO_CONFIGURATION_*` in /// Limelight.h). pub fn layout_for(params: &AudioParams) -> &'static OpusLayout { - match (params.channels, params.high_quality) { - (6, false) => &LAYOUT_51, - (6, true) => &LAYOUT_51_HQ, - (8, false) => &LAYOUT_71, - (8, true) => &LAYOUT_71_HQ, - _ => &LAYOUT_STEREO, - } + punktfunk_core::audio::layout_for(params.channels, params.high_quality) } /// The `a=fmtp:97 surround-params=` digit string for a layout: channelCount, streams, @@ -345,21 +296,21 @@ fn run( } /// Opus encoder for one session: the plain stereo encoder (the live-validated path, byte -/// identical) or a libopus multistream encoder for 5.1/7.1. +/// identical) or the safe `opus::MSEncoder` multistream encoder for 5.1/7.1. Both are +/// cross-platform (Linux + Windows) — surround no longer needs `audiopus_sys`. #[cfg(any(target_os = "linux", target_os = "windows"))] enum SessionEncoder { Stereo(opus::Encoder), - // Surround needs the libopus *multistream* encoder via `audiopus_sys` (Linux-only dep). - #[cfg(target_os = "linux")] - Surround(MsEncoder), + Surround(opus::MSEncoder), } #[cfg(any(target_os = "linux", target_os = "windows"))] impl SessionEncoder { fn new(layout: &'static OpusLayout) -> Result { + // RESTRICTED_LOWDELAY (`opus::Application::LowDelay`) + hard CBR, matching Sunshine — CBR + // keeps the Opus packet size constant, which the GameStream audio FEC (equal-length shards) + // relies on, and the client asserts a constant per-stream TOC. if layout.channels == 2 { - // RESTRICTED_LOWDELAY + CBR, matching Sunshine — CBR keeps the Opus TOC byte - // constant, which the client asserts per stream. let mut enc = opus::Encoder::new( SAMPLE_RATE, opus::Channels::Stereo, @@ -370,138 +321,32 @@ impl SessionEncoder { enc.set_vbr(false).ok(); Ok(SessionEncoder::Stereo(enc)) } else { - #[cfg(target_os = "linux")] - { - Ok(SessionEncoder::Surround(MsEncoder::new(layout)?)) - } - #[cfg(not(target_os = "linux"))] - { - anyhow::bail!( - "surround audio ({} ch) needs the libopus multistream encoder (Linux only) — \ - use a stereo session", - layout.channels - ) - } + let mut enc = opus::MSEncoder::new( + SAMPLE_RATE, + layout.streams, + layout.coupled, + layout.mapping, + opus::Application::LowDelay, + ) + .map_err(|e| anyhow::anyhow!("create Opus multistream encoder: {e}"))?; + enc.set_bitrate(opus::Bitrate::Bits(layout.bitrate)).ok(); + enc.set_vbr(false).ok(); + Ok(SessionEncoder::Surround(enc)) } } - /// Encode one interleaved frame (`samples_per_channel * channels` f32s) into `out`, - /// returning the packet length. - fn encode_float( - &mut self, - frame: &[f32], - samples_per_channel: usize, - out: &mut [u8], - ) -> Result { - // `samples_per_channel` only feeds the multistream (surround) encoder; stereo infers it. - #[cfg(not(target_os = "linux"))] - let _ = samples_per_channel; + /// Encode one interleaved frame into `out`, returning the packet length. Both encoders infer + /// the per-channel sample count from `frame.len()` and their channel count. + fn encode_float(&mut self, frame: &[f32], out: &mut [u8]) -> Result { match self { SessionEncoder::Stereo(enc) => enc.encode_float(frame, out).context("opus encode"), - #[cfg(target_os = "linux")] - SessionEncoder::Surround(enc) => enc.encode_float(frame, samples_per_channel, out), + SessionEncoder::Surround(enc) => enc + .encode_float(frame, out) + .context("opus multistream encode"), } } } -/// RAII wrapper for `OpusMSEncoder` (the safe `opus` crate is stereo-only; the multistream -/// API comes from `audiopus_sys`, the same libopus the crate already links). Configured like -/// the stereo path: RESTRICTED_LOWDELAY, hard CBR, per-layout bitrate. -#[cfg(target_os = "linux")] -struct MsEncoder { - st: std::ptr::NonNull, -} - -// SAFETY: `MsEncoder` owns a unique `OpusMSEncoder` via `NonNull` (it is neither `Clone` nor -// `Sync`, so the pointer is never aliased). libopus's multistream encoder state is a self-contained -// heap allocation with no thread-local or thread-affine state, so moving ownership to another thread -// is sound; every method takes `&mut self`, keeping access single-threaded at any instant. -#[cfg(target_os = "linux")] -unsafe impl Send for MsEncoder {} - -#[cfg(target_os = "linux")] -impl MsEncoder { - fn new(layout: &OpusLayout) -> Result { - use std::os::raw::c_int; - let mut err: c_int = 0; - // SAFETY: every scalar arg is a valid libopus input (sample rate, channel/stream/coupled - // counts, the RESTRICTED_LOWDELAY application constant). `layout.mapping.as_ptr()` addresses - // a 'static slice of exactly `layout.channels` bytes (every `OpusLayout` constant upholds - // that), which is the element count `opus_multistream_encoder_create` reads through it, and - // `&mut err` is a live local the call writes its status into. libopus copies the mapping into - // its own allocation, so the pointer need only be valid for the call; the returned pointer is - // null/`OPUS_OK`-checked below before any use. - let st = unsafe { - audiopus_sys::opus_multistream_encoder_create( - SAMPLE_RATE as i32, - layout.channels as c_int, - layout.streams as c_int, - layout.coupled as c_int, - layout.mapping.as_ptr(), - audiopus_sys::OPUS_APPLICATION_RESTRICTED_LOWDELAY, - &mut err, - ) - }; - let st = std::ptr::NonNull::new(st) - .filter(|_| err == audiopus_sys::OPUS_OK) - .ok_or_else(|| anyhow::anyhow!("opus_multistream_encoder_create failed ({err})"))?; - // SAFETY: `st` is the non-null encoder `opus_multistream_encoder_create` just returned, owned - // exclusively here. Each `opus_multistream_encoder_ctl` call passes a valid request constant - // with the single by-value `c_int` argument that request's variadic ABI expects - // (`OPUS_SET_BITRATE_REQUEST` → bitrate, `OPUS_SET_VBR_REQUEST` → 0). No pointer escapes the - // call and the encoder outlives it. - unsafe { - audiopus_sys::opus_multistream_encoder_ctl( - st.as_ptr(), - audiopus_sys::OPUS_SET_BITRATE_REQUEST, - layout.bitrate as c_int, - ); - audiopus_sys::opus_multistream_encoder_ctl( - st.as_ptr(), - audiopus_sys::OPUS_SET_VBR_REQUEST, - 0 as c_int, // hard CBR (constant packet size — also what audio FEC relies on) - ); - } - Ok(MsEncoder { st }) - } - - fn encode_float( - &mut self, - frame: &[f32], - samples_per_channel: usize, - out: &mut [u8], - ) -> Result { - // SAFETY: `self.st` is the live encoder from `new`. libopus reads `samples_per_channel * - // channels` f32s through `frame.as_ptr()`; every caller passes a `frame` of exactly that - // length together with the matching `samples_per_channel` (`audio_body`'s `frame_len = - // samples_per_channel * layout.channels`; the round-trip tests size identically), so the read - // stays in bounds. `out.as_mut_ptr()` is written for at most `out.len()` bytes, which is - // passed as the capacity bound. Both buffers are live locals outliving this synchronous call; - // the return value is range-checked before being used as a length. - let n = unsafe { - audiopus_sys::opus_multistream_encode_float( - self.st.as_ptr(), - frame.as_ptr(), - samples_per_channel as std::os::raw::c_int, - out.as_mut_ptr(), - out.len() as i32, - ) - }; - anyhow::ensure!(n > 0, "opus_multistream_encode_float failed ({n})"); - Ok(n as usize) - } -} - -#[cfg(target_os = "linux")] -impl Drop for MsEncoder { - fn drop(&mut self) { - // SAFETY: `self.st` is the encoder `opus_multistream_encoder_create` returned; this - // `MsEncoder` owns it uniquely and `drop` runs exactly once, so the destroy frees it once - // with no subsequent use. - unsafe { audiopus_sys::opus_multistream_encoder_destroy(self.st.as_ptr()) } - } -} - #[cfg(any(target_os = "linux", target_os = "windows"))] fn audio_body( cap: &mut dyn AudioCapturer, @@ -565,7 +410,7 @@ fn audio_body( *s = (*s * gain).clamp(-1.0, 1.0); } } - let n = enc.encode_float(&frame, samples_per_channel, &mut out)?; + let n = enc.encode_float(&frame, &mut out)?; // AES-128-CBC the Opus payload (RTP header stays plaintext). Per-packet IV = // BE32(rikeyid + seq) in [0..4], zero elsewhere; PKCS7 padding. let iv_seq = (rikeyid as u32).wrapping_add(seq as u32); @@ -775,41 +620,33 @@ mod tests { /// Real-codec proof of the 5.1 mapping math: encode with our encoder layout, decode with /// the mapping a stock Moonlight client derives from our advertised surround-params /// (parse → GFE swap), and verify a tone fed into each input channel comes out on the - /// same output channel. - #[cfg(target_os = "linux")] + /// same output channel. Cross-platform via the safe `opus` crate — this also guards the + /// (now un-gated) Windows GameStream surround build. #[test] fn multistream_51_roundtrip_channel_identity() { let layout = &LAYOUT_51; let samples = 240; // 5 ms let ch = layout.channels as usize; - // Client-side decoder mapping derived exactly as moonlight-common-c does. + // Client-side decoder mapping derived exactly as moonlight-common-c does (GFE swap). let s = surround_params(layout, false); let digits: Vec = s.bytes().map(|b| b - b'0').collect(); let client_mapping = client_swap(&digits[3..]); - let mut err = 0i32; - // SAFETY: scalar args are valid libopus inputs. `client_mapping.as_ptr()` addresses a - // `Vec` of exactly `ch` entries (derived from the advertised surround-params), which is - // the element count the decoder reads through it, and `&mut err` is a live local the call - // writes. The returned pointer is `OPUS_OK`/non-null-checked immediately below before use. - let dec = unsafe { - audiopus_sys::opus_multistream_decoder_create( - SAMPLE_RATE as i32, - ch as i32, - layout.streams as i32, - layout.coupled as i32, - client_mapping.as_ptr(), - &mut err, - ) - }; - assert_eq!(err, audiopus_sys::OPUS_OK); - assert!(!dec.is_null()); + let mut dec = + opus::MSDecoder::new(SAMPLE_RATE, layout.streams, layout.coupled, &client_mapping) + .expect("multistream decoder"); for tone_ch in 0..ch { - let mut enc = MsEncoder::new(layout).unwrap(); + let mut enc = opus::MSEncoder::new( + SAMPLE_RATE, + layout.streams, + layout.coupled, + layout.mapping, + opus::Application::LowDelay, + ) + .expect("multistream encoder"); let mut out = vec![0u8; 1400]; - let mut decoded = vec![0f32; samples * ch]; let mut energy = vec![0f64; ch]; // A few frames so the codec converges past its startup transient. for f in 0..8 { @@ -819,28 +656,15 @@ mod tests { / SAMPLE_RATE as f32; frame[t * ch + tone_ch] = 0.5 * phase.sin(); } - let n = enc.encode_float(&frame, samples, &mut out).unwrap(); + let n = enc.encode_float(&frame, &mut out).unwrap(); assert!(n > 0); - // SAFETY: `dec` is the non-null decoder asserted above. `out.as_ptr()` is read for - // the `n` encoded bytes just produced by `encode_float`; `decoded.as_mut_ptr()` is - // written for up to `samples * ch` f32s and `decoded` is exactly that long; `samples` - // is the per-channel frame size. All buffers are live locals outliving the call; the - // return is checked to equal `samples`. - let got = unsafe { - audiopus_sys::opus_multistream_decode_float( - dec, - out.as_ptr(), - n as i32, - decoded.as_mut_ptr(), - samples as i32, - 0, - ) - }; - assert_eq!(got as usize, samples); + let mut decoded = vec![0f32; samples * ch]; + let got = dec.decode_float(&out[..n], &mut decoded, false).unwrap(); + assert_eq!(got, samples); if f >= 4 { for t in 0..samples { - for c in 0..ch { - energy[c] += (decoded[t * ch + c] as f64).powi(2); + for (c, e) in energy.iter_mut().enumerate() { + *e += (decoded[t * ch + c] as f64).powi(2); } } } @@ -854,9 +678,6 @@ mod tests { (energies: {energy:?})" ); } - // SAFETY: `dec` is the decoder `opus_multistream_decoder_create` returned; the test owns it - // and destroys it exactly once here, after the final decode — no later use, no double free. - unsafe { audiopus_sys::opus_multistream_decoder_destroy(dec) }; } /// Live 5.1 capture → multistream encode → decode, against a real PipeWire session. @@ -869,7 +690,15 @@ mod tests { fn surround_capture_live() { let mut cap = crate::audio::open_audio_capture(6).expect("open 6ch capture"); let layout = &LAYOUT_51; - let mut enc = MsEncoder::new(layout).unwrap(); + let mut enc = opus::MSEncoder::new( + SAMPLE_RATE, + layout.streams, + layout.coupled, + layout.mapping, + opus::Application::LowDelay, + ) + .unwrap(); + enc.set_vbr(false).ok(); // hard CBR so packet sizes are constant (audio FEC relies on it) let mut out = vec![0u8; 1400]; let mut acc: Vec = Vec::new(); let frame_len = 240 * 6; @@ -880,49 +709,24 @@ mod tests { acc.extend_from_slice(&chunk); while acc.len() >= frame_len && packets < 100 { let frame: Vec = acc.drain(..frame_len).collect(); - let n = enc.encode_float(&frame, 240, &mut out).unwrap(); + let n = enc.encode_float(&frame, &mut out).unwrap(); sizes.insert(n); packets += 1; } } // Hard CBR: every multistream packet must be the same size (audio FEC relies on it). assert_eq!(sizes.len(), 1, "CBR sizes: {sizes:?}"); - // And a stock client's decoder must accept them. + // And a stock client's GFE-derived decoder must accept them. let s = surround_params(layout, false); let digits: Vec = s.bytes().map(|b| b - b'0').collect(); let client_mapping = client_swap(&digits[3..]); - let mut err = 0i32; - // SAFETY: scalar args are valid; `client_mapping.as_ptr()` addresses a 6-entry `Vec` - // (matches the 6-channel layout the decoder reads through it), alive past the call, and - // `&mut err` is a live local. The pointer is `OPUS_OK`-checked before use. - let dec = unsafe { - audiopus_sys::opus_multistream_decoder_create( - 48000, - 6, - layout.streams as i32, - layout.coupled as i32, - client_mapping.as_ptr(), - &mut err, - ) - }; - assert_eq!(err, audiopus_sys::OPUS_OK); + let mut dec = + opus::MSDecoder::new(SAMPLE_RATE, layout.streams, layout.coupled, &client_mapping) + .unwrap(); let mut pcm = vec![0f32; 240 * 6]; - // SAFETY: `dec` is the non-null decoder from create. `out.as_ptr()` is read for the CBR - // packet length passed in (`*sizes.first()`, a real encoded packet size in `out`); - // `pcm.as_mut_ptr()` is written for up to `240 * 6` f32s and `pcm` is exactly that long; - // `240` is the per-channel frame size. All buffers are live locals outliving the call. - let got = unsafe { - audiopus_sys::opus_multistream_decode_float( - dec, - out.as_ptr(), - *sizes.first().unwrap() as i32, - pcm.as_mut_ptr(), - 240, - 0, - ) - }; - // SAFETY: `dec` is owned by the test; destroyed exactly once here after the final decode. - unsafe { audiopus_sys::opus_multistream_decoder_destroy(dec) }; + let got = dec + .decode_float(&out[..*sizes.first().unwrap()], &mut pcm, false) + .unwrap(); assert_eq!(got, 240); } } diff --git a/crates/punktfunk-host/src/gamestream/stream.rs b/crates/punktfunk-host/src/gamestream/stream.rs index 01b0132..1696ca5 100644 --- a/crates/punktfunk-host/src/gamestream/stream.rs +++ b/crates/punktfunk-host/src/gamestream/stream.rs @@ -431,6 +431,9 @@ fn stream_body( cfg.bitrate_kbps as u64 * 1000, frame.is_cuda(), 8, // GameStream/Moonlight path: 8-bit (its own codec negotiation) + // GameStream/Moonlight stays 4:2:0 — stock Moonlight clients can't decode 4:4:4, and the + // protocol has no chroma negotiation. 4:4:4 is punktfunk/1-native only. + encode::ChromaFormat::Yuv420, ) .context("open video encoder for stream")?; // FEC overhead percent (Sunshine default 20). Override with PUNKTFUNK_FEC_PCT (0 = data-only). @@ -560,6 +563,7 @@ fn stream_body( cfg.bitrate_kbps as u64 * 1000, frame.is_cuda(), 8, + encode::ChromaFormat::Yuv420, // GameStream stays 4:2:0 ) .context("reopen encoder after rebuild")?; supports_rfi = enc.caps().supports_rfi; diff --git a/crates/punktfunk-host/src/punktfunk1.rs b/crates/punktfunk-host/src/punktfunk1.rs index 652dc8b..00d1542 100644 --- a/crates/punktfunk-host/src/punktfunk1.rs +++ b/crates/punktfunk-host/src/punktfunk1.rs @@ -355,6 +355,15 @@ fn resolve_bitrate_kbps(requested: u32) -> u32 { } } +/// Resolve the audio channel count the session will capture + encode from the client's request. +/// Normalizes to one of 2 (stereo) / 6 (5.1) / 8 (7.1); anything else (older client, garbage) +/// becomes stereo. Both backends can produce the requested count (PipeWire pads/upmixes positions, +/// WASAPI loopback up/downmixes via AUTOCONVERTPCM), so no capability clamp is needed here — the +/// surround channels just carry up/downmixed content when the host's sink has fewer real channels. +fn resolve_audio_channels(requested: u8) -> u8 { + punktfunk_core::audio::normalize_channels(requested) +} + /// Static FEC override: `PUNKTFUNK_FEC_PCT`, when set, PINS the recovery percent and DISABLES /// adaptive FEC — so a speed test / measurement keeps a fixed, known overhead. `None` ⇒ adaptive /// FEC (the host sizes recovery to the loss the client reports). `0` disables FEC entirely. @@ -623,6 +632,17 @@ async fn serve_session( "encoder bitrate" ); + // Resolve the audio channel count (client request → stereo / 5.1 / 7.1). The capturer opens + // at this count: PipeWire synthesizes the requested positions (padding with silence when the + // sink has fewer), WASAPI loopback up/downmixes via AUTOCONVERTPCM — so a client always gets + // the channels it asked for, and the Welcome echoes the value the audio thread will encode. + let audio_channels = resolve_audio_channels(hello.audio_channels); + tracing::info!( + requested = hello.audio_channels, + resolved = audio_channels, + "audio channels" + ); + // Resolve the encode bit depth: HEVC Main10 only when the client advertised it AND the host // opted in (PUNKTFUNK_10BIT). A client that can't decode 10-bit (caps bit clear, or an older // client) always gets the 8-bit stream. PUNKTFUNK_10BIT is the host policy gate until a @@ -642,6 +662,44 @@ async fn serve_session( "encode bit depth" ); + // Resolve the chroma subsampling: full-chroma HEVC 4:4:4 only when ALL of — the host opted in + // (PUNKTFUNK_444), the client advertised VIDEO_CAP_444, the session is single-process (the + // two-process WGC relay encodes 4:2:0 in v1), and the active GPU/driver actually supports a + // 4:4:4 encode (probed, cached). The native path always encodes HEVC. We resolve this BEFORE + // the Welcome so `chroma_format` reflects what we'll really emit — the honest-downgrade + // channel: if any gate fails the client is told 4:2:0 before it builds its decoder. The probe + // opens a tiny encoder; it runs only when both opt-ins are set and is cached after the first. + let host_wants_444 = crate::config::config().four_four_four; + let client_supports_444 = hello.video_caps & punktfunk_core::quic::VIDEO_CAP_444 != 0; + let single_process = crate::session_plan::resolve_topology() + == crate::session_plan::SessionTopology::SingleProcess; + // The GPU probe opens a real (tiny) encoder on first use, so run it off the reactor like the + // compositor probe above (blocking probes → spawn_blocking). Short-circuit so it only runs when + // the cheap gates already pass. The result is cached process-wide (a negative latches until + // restart — acceptable: a GPU either supports HEVC 4:4:4 or it doesn't, and a transient open + // failure here is rare since the session's own encoder isn't open yet). + let gpu_supports_444 = if host_wants_444 && client_supports_444 && single_process { + tokio::task::spawn_blocking(|| { + crate::encode::can_encode_444(crate::encode::Codec::H265) + }) + .await + .context("4:4:4 capability probe task")? + } else { + false + }; + let chroma = if gpu_supports_444 { + crate::encode::ChromaFormat::Yuv444 + } else { + crate::encode::ChromaFormat::Yuv420 + }; + tracing::info!( + chroma = ?chroma, + host_wants_444, + client_supports_444, + single_process, + "encode chroma" + ); + // Reserve a UDP port for the data plane (bind, read it back, rebind in UdpTransport). let probe = std::net::UdpSocket::bind("0.0.0.0:0")?; let udp_port = probe.local_addr()?.port(); @@ -691,6 +749,12 @@ async fn serve_session( } else { ColorInfo::SDR_BT709 }, + // The chroma the encoder will actually emit (resolved + GPU-probed above) — 4:4:4 only + // when every gate passed, else 4:2:0. The client sizes its decoder from this. + chroma_format: chroma.idc(), + // The resolved audio channel count the audio thread will capture + Opus-(multi)stream + // encode (2/6/8). The client builds its decoder from this echoed value. + audio_channels, }; io::write_msg(&mut send, &welcome.encode()).await?; @@ -884,9 +948,10 @@ async fn serve_session( let conn = conn.clone(); let stop = stop.clone(); let cap = audio_cap.clone(); + let channels = welcome.audio_channels; std::thread::Builder::new() .name("punktfunk1-audio".into()) - .spawn(move || audio_thread(conn, stop, cap)) + .spawn(move || audio_thread(conn, stop, cap, channels)) .map_err(|e| tracing::error!(error = %e, "audio thread spawn failed — session continues without audio")) .ok() } else { @@ -946,6 +1011,13 @@ async fn serve_session( let launch_for_dp = hello.launch.clone(); let bitrate_kbps = welcome.bitrate_kbps; // resolved encoder bitrate (Hello clamped, or default) let bit_depth = welcome.bit_depth; // resolved encode bit depth (8, or 10 when negotiated) + // Resolved chroma — derive the typed value back from the wire byte the Welcome carried (so the + // session uses exactly what the client was told). `Yuv444` only when the handshake gate passed. + let chroma = if welcome.chroma_format == punktfunk_core::quic::CHROMA_IDC_444 { + crate::encode::ChromaFormat::Yuv444 + } else { + crate::encode::ChromaFormat::Yuv420 + }; let stop_stream = stop.clone(); let fec_target_dp = fec_target.clone(); // data-plane handle to the adaptive-FEC target let conn_stream = conn.clone(); // for sending the source's real HDR metadata (0xCE) mid-stream @@ -1005,6 +1077,7 @@ async fn serve_session( compositor, bitrate_kbps, bit_depth, + chroma, probe_rx, probe_result_tx, fec_target: fec_target_dp, @@ -1493,33 +1566,88 @@ fn input_thread( } } -/// The audio thread: desktop capture → Opus (48 kHz stereo, 5 ms, CBR — same tuning as the -/// GameStream path) → `AUDIO_MAGIC` datagrams. QUIC already encrypts; no extra layer. -/// The capturer comes from (and returns to) the persistent slot — see [`AudioCapSlot`]. +/// Opus encoder for the native audio plane: a plain stereo encoder (the live-validated, +/// byte-identical path) or a libopus *multistream* encoder for 5.1/7.1, both behind one +/// `encode_float`. Surround uses the safe `opus::MSEncoder` (no `audiopus_sys`). #[cfg(any(target_os = "linux", target_os = "windows"))] -fn audio_thread(conn: quinn::Connection, stop: Arc, audio_cap: AudioCapSlot) { - use crate::audio::{CHANNELS, SAMPLE_RATE}; +enum NativeAudioEnc { + Stereo(opus::Encoder), + Surround(opus::MSEncoder), +} + +#[cfg(any(target_os = "linux", target_os = "windows"))] +impl NativeAudioEnc { + /// Build the encoder for `channels` (2/6/8), hard-CBR + RESTRICTED_LOWDELAY like the + /// GameStream path; bitrate from the shared layout table (stereo keeps the validated 128 kbps). + fn new(channels: u8) -> Result { + if channels == 2 { + let mut e = opus::Encoder::new( + crate::audio::SAMPLE_RATE, + opus::Channels::Stereo, + opus::Application::LowDelay, + )?; + e.set_bitrate(opus::Bitrate::Bits(128_000)).ok(); + e.set_vbr(false).ok(); + Ok(NativeAudioEnc::Stereo(e)) + } else { + let l = punktfunk_core::audio::layout_for(channels, false); + let mut e = opus::MSEncoder::new( + crate::audio::SAMPLE_RATE, + l.streams, + l.coupled, + l.mapping, + opus::Application::LowDelay, + )?; + e.set_bitrate(opus::Bitrate::Bits(l.bitrate)).ok(); + e.set_vbr(false).ok(); + Ok(NativeAudioEnc::Surround(e)) + } + } + + fn encode_float(&mut self, frame: &[f32], out: &mut [u8]) -> Result { + match self { + NativeAudioEnc::Stereo(e) => e.encode_float(frame, out), + NativeAudioEnc::Surround(e) => e.encode_float(frame, out), + } + } +} + +/// The audio thread: desktop capture → Opus (48 kHz, 5 ms, CBR — same tuning as the GameStream +/// path) → `AUDIO_MAGIC` datagrams, at the negotiated `channels` (2 stereo / 6 = 5.1 / 8 = 7.1, +/// canonical wire order FL FR FC LFE RL RR SL SR). QUIC already encrypts; no extra layer. The +/// capturer comes from (and returns to) the persistent slot — see [`AudioCapSlot`]. +#[cfg(any(target_os = "linux", target_os = "windows"))] +fn audio_thread( + conn: quinn::Connection, + stop: Arc, + audio_cap: AudioCapSlot, + channels: u8, +) { + use crate::audio::SAMPLE_RATE; const FRAME_MS: usize = 5; const SAMPLES_PER_FRAME: usize = SAMPLE_RATE as usize * FRAME_MS / 1000; // 240 + let want = punktfunk_core::audio::normalize_channels(channels); + // Reuse the cached capturer ONLY when its channel count matches this session's; a stereo + // capturer left by a prior session must not feed a 5.1/7.1 session (the encoder + the client's + // decoder are sized for `want`, so a mismatched capturer would garble/desync the audio). let capturer = match audio_cap.lock().unwrap().take() { - Some(mut c) => { + Some(mut c) if c.channels() == want as u32 => { c.drain(); // discard audio captured between sessions c } - None => match crate::audio::open_audio_capture(CHANNELS as u32) { - Ok(c) => c, - Err(e) => { - tracing::warn!(error = %format!("{e:#}"), "punktfunk/1 audio unavailable — session continues without it"); - return; + prev => { + drop(prev); // wrong channel count (or none): clean teardown, open fresh at `want` + match crate::audio::open_audio_capture(want as u32) { + Ok(c) => c, + Err(e) => { + tracing::warn!(error = %format!("{e:#}"), "punktfunk/1 audio unavailable — session continues without it"); + return; + } } - }, + } }; - let mut enc = match opus::Encoder::new( - SAMPLE_RATE, - opus::Channels::Stereo, - opus::Application::LowDelay, - ) { + let mut enc = match NativeAudioEnc::new(want) { Ok(e) => e, Err(e) => { tracing::error!(error = %e, "opus encoder"); @@ -1527,12 +1655,11 @@ fn audio_thread(conn: quinn::Connection, stop: Arc, audio_cap: Audio return; } }; - enc.set_bitrate(opus::Bitrate::Bits(128_000)).ok(); - enc.set_vbr(false).ok(); - let frame_len = SAMPLES_PER_FRAME * CHANNELS; + let frame_len = SAMPLES_PER_FRAME * want as usize; let mut acc: Vec = Vec::with_capacity(frame_len * 4); - let mut opus_buf = vec![0u8; 1500]; + // Sized for the largest surround frame (7.1 HQ ≈ 1.3 KB at 5 ms); ample for normal quality. + let mut opus_buf = vec![0u8; 4096]; let mut seq: u32 = 0; // Reopen-with-backoff: hold the capturer in an Option so a mid-session capture-thread death // (device unplug, daemon restart) reopens instead of muting the rest of a multi-hour session. @@ -1542,14 +1669,17 @@ fn audio_thread(conn: quinn::Connection, stop: Arc, audio_cap: Audio // restart). The first open already happened above; failing THAT still ends the session quietly. let mut capturer = Some(capturer); let mut last_failed: Option = None; - tracing::info!("punktfunk/1 audio streaming (Opus 48 kHz stereo, 5 ms datagrams)"); + tracing::info!( + channels = want, + "punktfunk/1 audio streaming (Opus 48 kHz, 5 ms datagrams)" + ); 'session: while !stop.load(Ordering::SeqCst) { if capturer.is_none() { if last_failed.is_some_and(|t| t.elapsed() < INJECTOR_REOPEN_BACKOFF) { std::thread::sleep(std::time::Duration::from_millis(200)); continue; } - match crate::audio::open_audio_capture(CHANNELS as u32) { + match crate::audio::open_audio_capture(want as u32) { Ok(c) => { tracing::info!("punktfunk/1 audio capture reopened"); capturer = Some(c); @@ -1599,7 +1729,12 @@ fn audio_thread(conn: quinn::Connection, stop: Arc, audio_cap: Audio /// Stub — punktfunk/1 audio needs Linux (PipeWire capture + libopus); non-Linux dev builds /// run sessions without it, same as when the capturer fails to open. #[cfg(not(any(target_os = "linux", target_os = "windows")))] -fn audio_thread(_conn: quinn::Connection, _stop: Arc, _audio_cap: AudioCapSlot) { +fn audio_thread( + _conn: quinn::Connection, + _stop: Arc, + _audio_cap: AudioCapSlot, + _channels: u8, +) { tracing::warn!("punktfunk/1 audio requires Linux or Windows — session continues without it"); } @@ -2368,6 +2503,8 @@ struct SessionContext { bitrate_kbps: u32, /// Negotiated encode bit depth (8, or 10 = HEVC Main10). bit_depth: u8, + /// Negotiated chroma subsampling (4:2:0, or 4:4:4 when the client + host + GPU all support it). + chroma: crate::encode::ChromaFormat, /// Speed-test burst requests (see [`service_probes`]). probe_rx: std::sync::mpsc::Receiver, /// Speed-test results back to the control task. @@ -2398,7 +2535,7 @@ fn virtual_stream(ctx: SessionContext) -> Result<()> { // path now reads this typed `SessionPlan` instead of re-deriving from config at each dispatch site // (the latent "capture and encode disagree on the backend" hazard, plan §2.4). `bit_depth` is the // only per-session input — capture/topology/encoder are otherwise pure functions of `HostConfig`. - let plan = crate::session_plan::SessionPlan::resolve(ctx.bit_depth); + let plan = crate::session_plan::SessionPlan::resolve(ctx.bit_depth, ctx.chroma); tracing::info!(?plan, "resolved session plan"); // Windows two-process secure-desktop path: when the host runs as SYSTEM (required for the secure // desktop + SendInput), WGC can't activate in-process, so we capture the normal desktop via a @@ -2420,6 +2557,8 @@ fn virtual_stream(ctx: SessionContext) -> Result<()> { compositor, bitrate_kbps, bit_depth, + // The resolved chroma is already captured in `plan` (above); ignore the duplicate here. + chroma: _, probe_rx, probe_result_tx, fec_target, @@ -2969,6 +3108,9 @@ fn virtual_stream_relay(ctx: SessionContext) -> Result<()> { compositor, bitrate_kbps, bit_depth, + // The two-process WGC relay encodes 4:2:0 in v1 — the handshake's `single_process` gate already + // forced `chroma` to Yuv420 for this topology, so the helper + secure-desktop DDA stay 4:2:0. + chroma: _, probe_rx, probe_result_tx, fec_target, @@ -3079,6 +3221,7 @@ fn virtual_stream_relay(ctx: SessionContext) -> Result<()> { // stage 5) so the DDA capturer doesn't re-derive it. crate::capture::gpu_encode(), hdr, + false, // the two-process relay path is 4:2:0 in v1 ) .context("open DDA for secure desktop")?; cap.set_active(true); @@ -3092,6 +3235,8 @@ fn virtual_stream_relay(ctx: SessionContext) -> Result<()> { bitrate_kbps as u64 * 1000, frame.is_cuda(), bit_depth, + // Secure-desktop DDA on the two-process relay path: 4:2:0 in v1 (matches the helper). + crate::encode::ChromaFormat::Yuv420, ) .context("open video encoder for DDA")?; Ok(DdaPipe { @@ -3491,6 +3636,9 @@ fn is_permanent_build_error(chain: &str) -> bool { "could not find output", // KWin < 6.5.6: createVirtualOutput unsupported "must be a node id", // PUNKTFUNK_GAMESCOPE_NODE not an integer "is it installed", // gamescope / kscreen-doctor not on PATH + // 4:4:4 NVENC got a CUDA frame — should never happen now the Linux capturer honors gpu=false, + // but fail fast instead of 8× retry (~90 s) rather than wedge the session if it ever recurs. + "capture/encoder negotiation mismatch", ]; let lower = chain.to_ascii_lowercase(); PERMANENT.iter().any(|p| lower.contains(p)) @@ -3540,8 +3688,20 @@ fn build_pipeline( bitrate_kbps as u64 * 1000, frame.is_cuda(), bit_depth, + plan.chroma, ) .context("open video encoder")?; + // Post-open cross-check: the Welcome already committed `chroma_format` from the pre-open probe, so + // warn loudly if the encoder actually opened a different chroma than negotiated (the in-band SPS is + // authoritative for the decoder, but a mismatch means the probe and the live open disagreed). + let opened_444 = enc.caps().chroma_444; + if opened_444 != plan.chroma.is_444() { + tracing::warn!( + negotiated_444 = plan.chroma.is_444(), + opened_444, + "encoder chroma disagrees with the negotiated Welcome — the client was told the other value" + ); + } let interval = std::time::Duration::from_secs_f64(1.0 / effective_hz.max(1) as f64); Ok((capturer, enc, frame, interval)) } @@ -3980,6 +4140,7 @@ mod tests { GamepadPref::Auto, 0, 0, // video_caps + 2, // audio_channels (stereo) None, // launch None, Some((cert.clone(), key.clone())), @@ -4012,6 +4173,7 @@ mod tests { GamepadPref::Auto, 0, 0, // video_caps + 2, // audio_channels (stereo) None, // launch None, Some((cert, key)), @@ -4065,6 +4227,7 @@ mod tests { GamepadPref::Auto, 0, 0, // video_caps + 2, // audio_channels (stereo) None, // launch None, None, @@ -4090,6 +4253,7 @@ mod tests { GamepadPref::Auto, 0, 0, // video_caps + 2, // audio_channels (stereo) None, // launch Some(host_fp), Some((cert.clone(), key.clone())), diff --git a/crates/punktfunk-host/src/session_plan.rs b/crates/punktfunk-host/src/session_plan.rs index f94b608..7932797 100644 --- a/crates/punktfunk-host/src/session_plan.rs +++ b/crates/punktfunk-host/src/session_plan.rs @@ -106,17 +106,22 @@ pub struct SessionPlan { /// The IDD-push HDR hint (`bit_depth >= 10`) — the want-HDR flag the capturer was passed before. /// Non-IDD-push Windows backends ignore it and auto-detect HDR from the monitor; Linux is 8-bit. pub hdr: bool, + /// Handshake-negotiated chroma subsampling (4:2:0, or full-chroma 4:4:4 when the client + host + + /// GPU all support it). Resolved before the Welcome; `Yuv420` on every backend that declined it. + pub chroma: crate::encode::ChromaFormat, } impl SessionPlan { - /// Resolve the whole plan once from [`config`](crate::config) + the negotiated `bit_depth`. - pub fn resolve(bit_depth: u8) -> Self { + /// Resolve the whole plan once from [`config`](crate::config) + the negotiated `bit_depth` and + /// `chroma`. + pub fn resolve(bit_depth: u8, chroma: crate::encode::ChromaFormat) -> Self { SessionPlan { capture: CaptureBackend::resolve(), topology: resolve_topology(), encoder: resolve_encoder(), bit_depth, hdr: bit_depth >= 10, + chroma, } } @@ -124,9 +129,24 @@ impl SessionPlan { /// (no second backend probe), `hdr` from the plan. Handed into `capture::capture_virtual_output` so the /// capturer never re-derives the encode backend. pub fn output_format(&self) -> crate::capture::OutputFormat { + let gpu = self.encoder.is_gpu(); + // Linux NVENC 4:4:4: libavcodec `hevc_nvenc` only emits 4:4:4 from a YUV444 *input* frame — + // RGB-in is always subsampled to 4:2:0 (verified on the RTX 5070 Ti). So the encoder does an + // RGB→YUV444P swscale and needs CPU-resident RGB frames; force the zero-copy GPU capture off + // for a 4:4:4 NVENC session. (VAAPI 4:4:4, where the hardware supports it, keeps its dmabuf + // path via `scale_vaapi`; Windows NVENC ingests ARGB directly and stays GPU.) + #[cfg(target_os = "linux")] + let gpu = { + let force_cpu_for_nvenc_444 = + self.chroma.is_444() && !crate::encode::linux_zero_copy_is_vaapi(); + gpu && !force_cpu_for_nvenc_444 + }; crate::capture::OutputFormat { - gpu: self.encoder.is_gpu(), + gpu, hdr: self.hdr, + // 4:4:4 needs a full-chroma source: on Windows this keeps the capturer on RGB (not the + // default NV12/P010 video-engine output) so NVENC can CSC to 4:4:4. + chroma_444: self.chroma.is_444(), } } } @@ -134,7 +154,7 @@ impl SessionPlan { /// Process topology. On Windows this is the former `punktfunk1::should_use_helper` logic verbatim; on /// every other platform the session is always single-process. #[cfg(target_os = "windows")] -fn resolve_topology() -> SessionTopology { +pub(crate) fn resolve_topology() -> SessionTopology { let cfg = crate::config::config(); // `NO_HELPER`/`NO_WGC` force single-process; IDD-push captures in-process in Session 0 (no helper); // otherwise the helper runs when forced or when we're SYSTEM (in-process WGC can't activate there). @@ -151,7 +171,7 @@ fn resolve_topology() -> SessionTopology { } #[cfg(not(target_os = "windows"))] -fn resolve_topology() -> SessionTopology { +pub(crate) fn resolve_topology() -> SessionTopology { SessionTopology::SingleProcess } diff --git a/crates/punktfunk-host/src/spike.rs b/crates/punktfunk-host/src/spike.rs index cdfa07f..c8c5173 100644 --- a/crates/punktfunk-host/src/spike.rs +++ b/crates/punktfunk-host/src/spike.rs @@ -109,7 +109,8 @@ pub fn run(opts: Options) -> Result<()> { opts.fps, opts.bitrate_bps, first.is_cuda(), - 8, // spike synthetic harness: 8-bit + 8, // spike synthetic harness: 8-bit + encode::ChromaFormat::Yuv420, // ...and 4:2:0 ) .context("open encoder")?; diff --git a/crates/punktfunk-host/src/windows/wgc_helper.rs b/crates/punktfunk-host/src/windows/wgc_helper.rs index 0fef4f2..4224e96 100644 --- a/crates/punktfunk-host/src/windows/wgc_helper.rs +++ b/crates/punktfunk-host/src/windows/wgc_helper.rs @@ -98,6 +98,9 @@ pub fn run(opts: HelperOptions) -> Result<()> { opts.bitrate_kbps as u64 * 1000, false, // not cuda opts.bit_depth, // 8, or 10 = Main10 (HDR auto-upgrades from the Rgb10a2 frame regardless) + // The two-process WGC relay helper encodes 4:2:0 in v1 (4:4:4 over the relay is a follow-up); + // the host gates 4:4:4 to the single-process topology. + encode::ChromaFormat::Yuv420, ) .context("open NVENC")?; diff --git a/include/punktfunk_core.h b/include/punktfunk_core.h index 09e7328..d963b57 100644 --- a/include/punktfunk_core.h +++ b/include/punktfunk_core.h @@ -94,6 +94,12 @@ // BT.2020 PQ HDR10 (implies 10-bit). (Mirrors `quic::VIDEO_CAP_HDR`.) #define PUNKTFUNK_VIDEO_CAP_HDR 2 +// Video-capability bit for [`punktfunk_connect_ex5`] (`video_caps`): the client can decode a +// full-chroma 4:4:4 HEVC stream (Range Extensions). The host emits 4:4:4 only when this is set, +// the host opted in, the codec is HEVC, and the GPU supports it — else the stream stays 4:2:0 and +// [`punktfunk_connection_chroma_format`] reports the real value. (Mirrors `quic::VIDEO_CAP_444`.) +#define PUNKTFUNK_VIDEO_CAP_444 4 + // 16-byte AEAD authentication tag appended by GCM. #define TAG_LEN 16 @@ -180,6 +186,27 @@ #define VIDEO_CAP_HDR 2 #endif +#if defined(PUNKTFUNK_FEATURE_QUIC) +// [`Hello::video_caps`] bit: the client can decode a full-chroma **4:4:4** HEVC stream (HEVC +// Range Extensions / Rec.ITU-T H.265 `chroma_format_idc = 3`). The host emits 4:4:4 ONLY when this +// bit is set, the host opted in (`PUNKTFUNK_444`), the codec is HEVC, **and** the GPU/driver +// actually supports a 4:4:4 encode (probed) — otherwise the session stays 4:2:0 and +// [`Welcome::chroma_format`] reflects the real resolved value. Independent of 10-bit/HDR (4:4:4 is a +// chroma decision, bit depth is a depth decision; the two may combine where the hardware allows). +#define VIDEO_CAP_444 4 +#endif + +#if defined(PUNKTFUNK_FEATURE_QUIC) +// HEVC `chroma_format_idc` for 4:2:0 — what every pre-4:4:4 build produced and the back-compat +// default when a peer omits [`Welcome::chroma_format`]. +#define CHROMA_IDC_420 1 +#endif + +#if defined(PUNKTFUNK_FEATURE_QUIC) +// HEVC `chroma_format_idc` for full-chroma 4:4:4 (Range Extensions). +#define CHROMA_IDC_444 3 +#endif + #if defined(PUNKTFUNK_FEATURE_QUIC) // Longest device name carried in a [`Hello`] (bytes of UTF-8; longer names are truncated on // encode, rejected on decode — a one-byte length prefix caps it at 255 anyway). @@ -498,6 +525,25 @@ typedef struct { } PunktfunkAudioPacket; #endif +#if defined(PUNKTFUNK_FEATURE_QUIC) +// One decoded audio frame from [`punktfunk_connection_next_audio_pcm`]: interleaved 32-bit +// float PCM at 48 kHz, in the canonical wire channel order `FL FR FC LFE RL RR SL SR` (the +// first `channels` of it). `samples` points at `frame_count * channels` floats and borrows +// connection memory **until the next PCM call** on this handle. +typedef struct { + // Interleaved f32 samples (wire channel order), `frame_count * channels` long. + const float *samples; + // Samples per channel in this frame. + uint32_t frame_count; + // Channel count (2/6/8) — the negotiated [`punktfunk_connection_audio_channels`]. + uint8_t channels; + // Source packet sequence number. + uint32_t seq; + // Capture presentation timestamp (ns). + uint64_t pts_ns; +} PunktfunkAudioPcm; +#endif + #if defined(PUNKTFUNK_FEATURE_QUIC) // One DualSense HID-output feedback event a game wrote to the host's virtual pad // ([`punktfunk_connection_next_hidout`]). `kind` selects which fields are meaningful — replay it @@ -832,6 +878,33 @@ PunktfunkConnection *punktfunk_connect_ex5(const char *host, uint32_t timeout_ms); #endif +#if defined(PUNKTFUNK_FEATURE_QUIC) +// Like [`punktfunk_connect_ex5`], but additionally requests the audio channel count: +// `2` (stereo, the default behaviour of every earlier variant), `6` (5.1) or `8` (7.1). The host +// clamps the request to what it can actually capture and echoes the resolved count via +// [`punktfunk_connection_audio_channels`]; the `0xC9` audio frames are Opus-(multi)stream encoded +// for that layout. A client that wants surround calls this; everything else inherits stereo. +// +// # Safety +// Same as [`punktfunk_connect`]. +PunktfunkConnection *punktfunk_connect_ex6(const char *host, + uint16_t port, + uint32_t width, + uint32_t height, + uint32_t refresh_hz, + uint32_t compositor, + uint32_t gamepad, + uint32_t bitrate_kbps, + uint8_t video_caps, + uint8_t audio_channels, + const char *launch_id, + const uint8_t *pin_sha256, + uint8_t *observed_sha256_out, + const char *client_cert_pem, + const char *client_key_pem, + uint32_t timeout_ms); +#endif + #if defined(PUNKTFUNK_FEATURE_QUIC) // Generate a persistent client identity: a self-signed certificate + private key, both // PEM, NUL-terminated, written into the caller's buffers. Generate ONCE, store both @@ -897,6 +970,36 @@ PunktfunkStatus punktfunk_connection_next_audio(PunktfunkConnection *c, uint32_t timeout_ms); #endif +#if defined(PUNKTFUNK_FEATURE_QUIC) +// Read the audio channel count the host resolved for this session (from its Welcome): `2` +// (stereo), `6` (5.1) or `8` (7.1). `*out` is filled when non-NULL. The `0xC9` Opus frames are +// (multistream-)encoded for this layout; an embedder decoding raw frames itself must build its +// decoder from THIS value (see [`crate::audio::layout_for`]) — or use +// [`punktfunk_connection_next_audio_pcm`], which decodes in-core. Available immediately after a +// successful connect (it doesn't change without a reconfigure). +// +// # Safety +// `c` is a valid connection handle; `out` is NULL or writable for one `u8`. +PunktfunkStatus punktfunk_connection_audio_channels(PunktfunkConnection *c, uint8_t *out); +#endif + +#if defined(PUNKTFUNK_FEATURE_QUIC) +// Pull the next audio frame and **decode it in-core** to interleaved f32 PCM — for embedders +// without a multistream-capable Opus decoder (e.g. Apple, whose AudioToolbox Opus path is +// stereo-only). The decoder is built once from the negotiated channel count and handles 2/6/8 +// channels (a 1-coupled-stream multistream decoder is exactly a stereo decoder). Same +// timeout/closed semantics as [`punktfunk_connection_next_audio`]; `out->samples` borrows +// connection memory until the next PCM call on this handle. Use EITHER this or +// [`punktfunk_connection_next_audio`] on a given connection, from one dedicated audio thread — +// not both (they share the underlying queue). +// +// # Safety +// `c` is a valid connection handle; `out` is writable. At most one thread pulls audio. +PunktfunkStatus punktfunk_connection_next_audio_pcm(PunktfunkConnection *c, + PunktfunkAudioPcm *out, + uint32_t timeout_ms); +#endif + #if defined(PUNKTFUNK_FEATURE_QUIC) // Pull the next rumble (force-feedback) update, waiting up to `timeout_ms`. Amplitudes // are 0..0xFFFF (`low` = low-frequency motor, `high` = high-frequency), `(0, 0)` = stop. @@ -960,6 +1063,18 @@ PunktfunkStatus punktfunk_connection_color_info(PunktfunkConnection *c, uint8_t *bit_depth); #endif +#if defined(PUNKTFUNK_FEATURE_QUIC) +// Read the session's resolved chroma subsampling (from the host's Welcome) as the HEVC +// `chroma_format_idc`: `1` = 4:2:0 (the default every pre-4:4:4 host produced), `3` = full-chroma +// 4:4:4. `*out` is filled when non-NULL. The in-band SPS is authoritative; this lets the embedder +// pre-size its decoder / pick a 4:4:4 pixel format up front. Available immediately after a +// successful connect (it doesn't change without a reconfigure). +// +// # Safety +// `c` is a valid connection handle; `out` is NULL or writable for one `u8`. +PunktfunkStatus punktfunk_connection_chroma_format(PunktfunkConnection *c, uint8_t *out); +#endif + #if defined(PUNKTFUNK_FEATURE_QUIC) // Send one input event to the host as a QUIC datagram (non-blocking enqueue). // diff --git a/scripts/host.env.example b/scripts/host.env.example index 5e527d9..63ba9c1 100644 --- a/scripts/host.env.example +++ b/scripts/host.env.example @@ -48,6 +48,12 @@ PUNKTFUNK_ZEROCOPY=1 #PUNKTFUNK_INPUT_BACKEND=libei # wlr | libei | gamescope | uinput #PUNKTFUNK_FEC_PCT=20 # video FEC overhead percent #PUNKTFUNK_PERF=1 # per-stage timing logs +# Full-chroma 4:4:4 (HEVC Range Extensions) — sharper text/desktop, no chroma loss. Honored only on +# the punktfunk/1 native path when the client advertises 4:4:4 AND the GPU supports it (probed; else +# the session stays 4:2:0). HEVC-only; independent of 10-bit. NVENC (NVIDIA) is the validated path; +# VAAPI/AMF/QSV decline (4:2:0). GameStream/Moonlight always stays 4:2:0. +#PUNKTFUNK_444=1 +#PUNKTFUNK_10BIT=1 # HEVC Main10 / HDR (when the client advertises 10-bit) #RUST_LOG=info # Management API bearer token. The mgmt API is HTTPS + token-authenticated ALWAYS (even on # loopback); if unset it is auto-generated + persisted to ~/.config/punktfunk/mgmt-token (which the