From 9537efdcd54c546fb148c3fbd48b0d80da6cc756 Mon Sep 17 00:00:00 2001 From: enricobuehler Date: Wed, 17 Jun 2026 00:17:58 +0200 Subject: [PATCH] feat(client/windows): HDR10 (BT.2020 PQ) decode + present MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Light up the dormant 10-bit/HDR path end to end on the Windows client. - core: NativeClient::connect gains a video_caps param threaded into the Hello. The Windows client advertises VIDEO_CAP_10BIT | VIDEO_CAP_HDR; every other caller (the C ABI shim, Linux, Android, host test connects) passes 0, so the 8-bit BT.709 path is unchanged. The host already gates a Main10/PQ encode on these bits + PUNKTFUNK_10BIT. - video.rs: a PQ frame (color_trc == SMPTE2084) converts 10-bit YUV → X2BGR10 (== DXGI R10G10B10A2) with the BT.2020 matrix via sws_setColorspaceDetails; swscale applies only the matrix + range, so the PQ-encoded samples pass through untouched. - present.rs: on an HDR frame the swapchain flips in place (ResizeBuffers) to R10G10B10A2 + DXGI_COLOR_SPACE_RGB_FULL_G2084_NONE_P2020 + HDR10 metadata; the passthrough shader is unchanged and the compositor maps PQ→display. Switched to ALPHA_MODE_IGNORE so the 10-bit padding bits don't render transparent. SDR stays 8-bit B8G8R8A8. Co-Authored-By: Claude Opus 4.8 (1M context) --- crates/punktfunk-android/src/session.rs | 1 + crates/punktfunk-client-linux/src/app.rs | 3 +- crates/punktfunk-client-linux/src/session.rs | 1 + .../punktfunk-client-windows/src/present.rs | 98 ++++++++++++++++++- .../punktfunk-client-windows/src/session.rs | 6 +- crates/punktfunk-client-windows/src/video.rs | 60 +++++++++--- crates/punktfunk-core/src/abi.rs | 3 + crates/punktfunk-core/src/client.rs | 15 ++- crates/punktfunk-host/src/m3.rs | 4 + 9 files changed, 165 insertions(+), 26 deletions(-) diff --git a/crates/punktfunk-android/src/session.rs b/crates/punktfunk-android/src/session.rs index 98d2365..5b9b900 100644 --- a/crates/punktfunk-android/src/session.rs +++ b/crates/punktfunk-android/src/session.rs @@ -182,6 +182,7 @@ pub extern "system" fn Java_io_unom_punktfunk_kit_NativeBridge_nativeConnect<'lo CompositorPref::from_u8(compositor_pref.clamp(0, u8::MAX as jint) as u8), GamepadPref::from_u8(gamepad_pref.clamp(0, u8::MAX as jint) as u8), bitrate_kbps.max(0) as u32, // 0 = host default + 0, // video_caps: 8-bit only on Android for now None, // launch: default app pin, // Some → Crypto on host-fp mismatch identity, // owned (cert, key) PEM, or None (anonymous) diff --git a/crates/punktfunk-client-linux/src/app.rs b/crates/punktfunk-client-linux/src/app.rs index 265dc5b..177f748 100644 --- a/crates/punktfunk-client-linux/src/app.rs +++ b/crates/punktfunk-client-linux/src/app.rs @@ -308,7 +308,8 @@ fn speed_test(app: Rc, req: ConnectRequest) { }, CompositorPref::Auto, GamepadPref::Auto, - 0, + 0, // bitrate_kbps (host default) + 0, // video_caps: the Linux client has no 10-bit/HDR present path yet None, // launch: speed-test probe connect, no game pin, Some(identity), diff --git a/crates/punktfunk-client-linux/src/session.rs b/crates/punktfunk-client-linux/src/session.rs index 28ac757..e620ae5 100644 --- a/crates/punktfunk-client-linux/src/session.rs +++ b/crates/punktfunk-client-linux/src/session.rs @@ -96,6 +96,7 @@ fn pump( params.compositor, params.gamepad, params.bitrate_kbps, + 0, // video_caps: the Linux client has no 10-bit/HDR present path yet None, // launch: the Linux client has no library picker yet params.pin, Some(params.identity), diff --git a/crates/punktfunk-client-windows/src/present.rs b/crates/punktfunk-client-windows/src/present.rs index 5740b97..e72b708 100644 --- a/crates/punktfunk-client-windows/src/present.rs +++ b/crates/punktfunk-client-windows/src/present.rs @@ -5,8 +5,12 @@ //! The device prefers a hardware adapter and falls back to **WARP** (the GPU-less dev box runs //! the whole present path in software). The draw is a single full-screen triangle sampling the //! video texture; a letterbox is produced by clearing the back buffer black and setting the -//! viewport to the Contain-fit rect (no per-frame vertex buffer). SDR 8-bit path; the -//! 10-bit/HDR present (`R10G10B10A2` + `SetColorSpace1`) is a follow-up alongside P010 decode. +//! viewport to the Contain-fit rect (no per-frame vertex buffer). +//! +//! **HDR10**: when a frame is BT.2020 PQ (`CpuFrame::hdr`), the swapchain flips to +//! `R10G10B10A2` + `DXGI_COLOR_SPACE_RGB_FULL_G2084_NONE_P2020` (+ HDR10 metadata) via +//! `ResizeBuffers`/`SetColorSpace1`; the decoded samples are already PQ-encoded so the shader is a +//! plain passthrough and the compositor maps PQ→display. SDR stays 8-bit B8G8R8A8. //! //! All `windows` types here come from the same windows-rs commit as `windows-reactor`, so the //! `IDXGISwapChain1` handed to `set_swap_chain` satisfies reactor's `windows_core::Interface`. @@ -50,6 +54,9 @@ pub struct Presenter { /// Panel (swapchain) size in pixels, updated on resize. panel_w: u32, panel_h: u32, + /// Whether the swapchain is currently in 10-bit HDR10 (R10G10B10A2 + ST.2084) mode; flipped + /// to match each frame's `hdr` flag. + hdr: bool, } impl Presenter { @@ -69,6 +76,7 @@ impl Presenter { tex: None, panel_w: width.max(1), panel_h: height.max(1), + hdr: false, }) } @@ -100,6 +108,9 @@ impl Presenter { /// last texture (or black). Called from the reactor `on_rendering` per-frame callback. pub fn present(&mut self, frame: Option<&CpuFrame>) { if let Some(f) = frame { + if f.hdr != self.hdr { + self.set_hdr(f.hdr); + } if let Err(e) = self.upload(f) { tracing::warn!(error = %e, "frame upload failed"); } @@ -144,16 +155,74 @@ impl Presenter { } } + /// Switch the swapchain between 8-bit SDR (B8G8R8A8, sRGB/BT.709) and 10-bit HDR10 + /// (R10G10B10A2, ST.2084 PQ BT.2020). `ResizeBuffers` can change the back-buffer format in + /// place, so the panel binding (`set_swap_chain`) stays valid — no rebind needed. The decoded + /// samples are already PQ-encoded BT.2020 (see `video::convert`), so the colour space is all the + /// compositor needs to map them to the display. + fn set_hdr(&mut self, on: bool) { + self.rtv = None; // release back-buffer refs before ResizeBuffers + self.tex = None; // texture format changes (R10G10B10A2 vs R8G8B8A8) + let format = if on { + DXGI_FORMAT_R10G10B10A2_UNORM + } else { + DXGI_FORMAT_B8G8R8A8_UNORM + }; + unsafe { + if let Err(e) = self.swap.ResizeBuffers( + 0, + self.panel_w, + self.panel_h, + format, + DXGI_SWAP_CHAIN_FLAG(0), + ) { + tracing::warn!(error = %e, "ResizeBuffers for HDR switch failed"); + return; + } + let colorspace = if on { + DXGI_COLOR_SPACE_RGB_FULL_G2084_NONE_P2020 + } else { + DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709 + }; + if let Ok(sc3) = self.swap.cast::() { + // Only set a colour space the swapchain accepts for present (on an SDR desktop the + // DWM still tone-maps HDR10 → SDR, so leaving the default there is fine). + if let Ok(support) = sc3.CheckColorSpaceSupport(colorspace) { + if support & DXGI_SWAP_CHAIN_COLOR_SPACE_SUPPORT_FLAG_PRESENT.0 as u32 != 0 { + let _ = sc3.SetColorSpace1(colorspace); + } + } + } + if on { + if let Ok(sc4) = self.swap.cast::() { + let md = hdr10_metadata(); + let bytes = std::slice::from_raw_parts( + &md as *const DXGI_HDR_METADATA_HDR10 as *const u8, + std::mem::size_of::(), + ); + let _ = sc4.SetHDRMetaData(DXGI_HDR_METADATA_TYPE_HDR10, Some(bytes)); + } + } + } + self.hdr = on; + tracing::info!(hdr = on, "swapchain colour mode switched"); + } + fn upload(&mut self, frame: &CpuFrame) -> Result<()> { let (w, h) = (frame.width, frame.height); let need_new = !matches!(&self.tex, Some((_, _, tw, th)) if *tw == w && *th == h); if need_new { + let format = if self.hdr { + DXGI_FORMAT_R10G10B10A2_UNORM + } else { + DXGI_FORMAT_R8G8B8A8_UNORM + }; let desc = D3D11_TEXTURE2D_DESC { Width: w, Height: h, MipLevels: 1, ArraySize: 1, - Format: DXGI_FORMAT_R8G8B8A8_UNORM, + Format: format, SampleDesc: DXGI_SAMPLE_DESC { Count: 1, Quality: 0, @@ -191,7 +260,7 @@ impl Presenter { let row_bytes = (w as usize) * 4; for y in 0..h as usize { std::ptr::copy_nonoverlapping( - frame.rgba.as_ptr().add(y * src_pitch), + frame.pixels.as_ptr().add(y * src_pitch), dst.add(y * dst_pitch), row_bytes.min(src_pitch), ); @@ -273,7 +342,10 @@ fn create_composition_swapchain( BufferCount: 2, Scaling: DXGI_SCALING_STRETCH, SwapEffect: DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL, - AlphaMode: DXGI_ALPHA_MODE_PREMULTIPLIED, + // IGNORE (opaque), not PREMULTIPLIED: the video fills the panel and the HDR `X2BGR10` + // upload leaves the 2 padding/alpha bits 0 — premultiplied alpha would then make HDR frames + // transparent. Opaque is correct for a full-frame video surface either way. + AlphaMode: DXGI_ALPHA_MODE_IGNORE, Flags: 0, }; unsafe { @@ -354,3 +426,19 @@ fn blob_bytes(blob: &ID3DBlob) -> &[u8] { std::slice::from_raw_parts(p, n) } } + +/// Generic HDR10 mastering metadata: BT.2020 primaries + D65 white (0.00002 units), a 1000-nit +/// mastering display, MaxCLL 1000 / MaxFALL 400. The protocol doesn't carry the stream's real +/// mastering metadata yet (host follow-up), so these are sane defaults the display tone-maps from. +fn hdr10_metadata() -> DXGI_HDR_METADATA_HDR10 { + DXGI_HDR_METADATA_HDR10 { + RedPrimary: [35400, 14600], + GreenPrimary: [8500, 39850], + BluePrimary: [6550, 2300], + WhitePoint: [15635, 16450], + MaxMasteringLuminance: 1000, + MinMasteringLuminance: 1, // 0.0001-nit units → 0.0001 nits + MaxContentLightLevel: 1000, + MaxFrameAverageLightLevel: 400, + } +} diff --git a/crates/punktfunk-client-windows/src/session.rs b/crates/punktfunk-client-windows/src/session.rs index 127e547..080fe20 100644 --- a/crates/punktfunk-client-windows/src/session.rs +++ b/crates/punktfunk-client-windows/src/session.rs @@ -30,7 +30,7 @@ pub struct SessionParams { pub identity: (String, String), } -#[derive(Clone, Copy, Default)] +#[derive(Clone, Copy, Default, PartialEq)] pub struct Stats { pub fps: f32, pub mbps: f32, @@ -99,6 +99,10 @@ fn pump( params.compositor, params.gamepad, params.bitrate_kbps, + // Advertise 10-bit + HDR10: the presenter handles BT.2020 PQ (R10G10B10A2) frames, so the + // host may upgrade HDR content to a Main10/PQ stream (it still only does so for actual HDR + // content with its own 10-bit gate). 8-bit SDR is unaffected. + punktfunk_core::quic::VIDEO_CAP_10BIT | punktfunk_core::quic::VIDEO_CAP_HDR, None, // launch: the Windows client has no library picker yet params.pin, Some(params.identity), diff --git a/crates/punktfunk-client-windows/src/video.rs b/crates/punktfunk-client-windows/src/video.rs index afcf266..655e0d7 100644 --- a/crates/punktfunk-client-windows/src/video.rs +++ b/crates/punktfunk-client-windows/src/video.rs @@ -20,13 +20,17 @@ pub enum DecodedFrame { Cpu(CpuFrame), } -/// RGBA pixels for a D3D11 `R8G8B8A8_UNORM` texture upload (which takes a row pitch). +/// Packed 4-byte-per-pixel frame for a D3D11 texture upload (which takes a row pitch). The bytes +/// are `R8G8B8A8` for SDR and `X2BGR10` (== DXGI `R10G10B10A2`, R in the low 10 bits) for HDR. pub struct CpuFrame { pub width: u32, pub height: u32, - /// RGBA row stride in bytes (≥ width*4 — swscale pads rows for SIMD). + /// Row stride in bytes (≥ width*4 — swscale pads rows for SIMD). pub stride: usize, - pub rgba: Vec, + pub pixels: Vec, + /// BT.2020 PQ HDR10 frame: `pixels` is `X2BGR10` and the presenter switches to a 10-bit + /// R10G10B10A2 + ST.2084 swapchain. `false` = ordinary 8-bit BT.709 SDR. + pub hdr: bool, } pub struct Decoder { @@ -51,8 +55,9 @@ impl Decoder { struct SoftwareDecoder { decoder: ffmpeg::decoder::Video, - /// Rebuilt whenever the decoded format/size changes (mid-stream `Reconfigure`). - sws: Option<(scaling::Context, Pixel, u32, u32)>, + /// Rebuilt whenever the decoded format/size **or output format** changes (mid-stream + /// `Reconfigure`, or an SDR↔HDR flip): `(ctx, src_fmt, w, h, dst_fmt)`. + sws: Option<(scaling::Context, Pixel, u32, u32, Pixel)>, } impl SoftwareDecoder { @@ -79,28 +84,53 @@ impl SoftwareDecoder { let mut frame = AvFrame::empty(); let mut out = None; while self.decoder.receive_frame(&mut frame).is_ok() { - out = Some(self.convert_rgba(&frame)?); + out = Some(self.convert(&frame)?); } Ok(out) } - fn convert_rgba(&mut self, frame: &AvFrame) -> Result { + /// Convert the decoded YUV frame to a packed 4-byte format the presenter uploads directly: + /// SDR → `RGBA` (BT.709), HDR (SMPTE ST.2084 / PQ transfer) → `X2BGR10` (10-bit, == DXGI + /// R10G10B10A2) using the BT.2020 matrix. For HDR the PQ-encoded values pass through unchanged + /// (swscale only applies the YUV→RGB matrix + range, never the transfer) — exactly what an + /// HDR10/ST.2084 swapchain wants. + fn convert(&mut self, frame: &AvFrame) -> Result { + use ffmpeg::color::TransferCharacteristic; let (fmt, w, h) = (frame.format(), frame.width(), frame.height()); - let rebuild = - !matches!(&self.sws, Some((_, f, sw, sh)) if *f == fmt && *sw == w && *sh == h); + let hdr = frame.color_transfer_characteristic() == TransferCharacteristic::SMPTE2084; + let dst = if hdr { Pixel::X2BGR10LE } else { Pixel::RGBA }; + let rebuild = !matches!(&self.sws, Some((_, f, sw, sh, d)) if *f == fmt && *sw == w && *sh == h && *d == dst); if rebuild { - let ctx = scaling::Context::get(fmt, w, h, Pixel::RGBA, w, h, scaling::Flags::POINT) + let mut ctx = scaling::Context::get(fmt, w, h, dst, w, h, scaling::Flags::POINT) .context("swscale context")?; - self.sws = Some((ctx, fmt, w, h)); + if hdr { + // BT.2020 non-constant-luminance YUV (limited range) → full-range RGB. swscale + // applies only the matrix + range here, so the samples stay PQ-encoded. + unsafe { + let coef = ffmpeg::ffi::sws_getCoefficients(ffmpeg::ffi::SWS_CS_BT2020); + ffmpeg::ffi::sws_setColorspaceDetails( + ctx.as_mut_ptr(), + coef, + 0, // src range: limited (video) + coef, + 1, // dst range: full + 0, + 1 << 16, + 1 << 16, // brightness / contrast / saturation defaults (16.16) + ); + } + } + self.sws = Some((ctx, fmt, w, h, dst)); } let (sws, ..) = self.sws.as_mut().unwrap(); - let mut rgba = AvFrame::empty(); - sws.run(frame, &mut rgba).map_err(|e| anyhow!("sws: {e}"))?; + let mut conv = AvFrame::empty(); + sws.run(frame, &mut conv).map_err(|e| anyhow!("sws: {e}"))?; Ok(CpuFrame { width: w, height: h, - stride: rgba.stride(0), - rgba: rgba.data(0).to_vec(), + stride: conv.stride(0), + pixels: conv.data(0).to_vec(), + hdr, }) } } diff --git a/crates/punktfunk-core/src/abi.rs b/crates/punktfunk-core/src/abi.rs index 5c026df..6476d3c 100644 --- a/crates/punktfunk-core/src/abi.rs +++ b/crates/punktfunk-core/src/abi.rs @@ -891,6 +891,9 @@ pub unsafe extern "C" fn punktfunk_connect_ex4( pref, gamepad, bitrate_kbps, + // 8-bit only over the C ABI for now — the ABI doesn't yet carry the embedder's video + // caps (Apple/Android decode 8-bit). The native Windows client advertises 10-bit/HDR. + 0, launch, pin, identity, diff --git a/crates/punktfunk-core/src/client.rs b/crates/punktfunk-core/src/client.rs index eac4afc..49e9d86 100644 --- a/crates/punktfunk-core/src/client.rs +++ b/crates/punktfunk-core/src/client.rs @@ -196,6 +196,10 @@ impl NativeClient { compositor: CompositorPref, gamepad: GamepadPref, bitrate_kbps: u32, + // Client video capabilities advertised to the host (bitfield of quic::VIDEO_CAP_10BIT / + // VIDEO_CAP_HDR) — the host upgrades to a 10-bit / HDR encode only when the matching bit is + // set. 0 = the 8-bit BT.709 stream every client understands. + video_caps: u8, launch: Option, pin: Option<[u8; 32]>, identity: Option<(String, String)>, @@ -245,6 +249,7 @@ impl NativeClient { compositor, gamepad, bitrate_kbps, + video_caps, launch, pin, identity, @@ -569,6 +574,7 @@ struct WorkerArgs { compositor: CompositorPref, gamepad: GamepadPref, bitrate_kbps: u32, + video_caps: u8, launch: Option, pin: Option<[u8; 32]>, identity: Option<(String, String)>, @@ -597,6 +603,7 @@ async fn worker_main(args: WorkerArgs) { compositor, gamepad, bitrate_kbps, + video_caps, launch, pin, identity, @@ -657,10 +664,10 @@ async fn worker_main(args: WorkerArgs) { name: None, // Library id to launch this session, if the embedder asked for one. launch: launch.clone(), - // TODO(hdr): advertise the embedder's real decode caps once the ABI carries them - // and the Apple/Linux clients decode 10-bit. 0 = 8-bit only — the host then never - // upgrades this connector's session to a stream it can't yet present. - video_caps: 0, + // The embedder's decode/present caps (e.g. the Windows client advertises + // VIDEO_CAP_10BIT | VIDEO_CAP_HDR). The host only upgrades to a 10-bit / HDR encode + // when the matching bit is set, so `0` stays an 8-bit BT.709 stream. + video_caps, } .encode(), ) diff --git a/crates/punktfunk-host/src/m3.rs b/crates/punktfunk-host/src/m3.rs index 2c030fe..32954ad 100644 --- a/crates/punktfunk-host/src/m3.rs +++ b/crates/punktfunk-host/src/m3.rs @@ -3180,6 +3180,7 @@ mod tests { CompositorPref::Auto, GamepadPref::Auto, 0, + 0, // video_caps None, // launch None, Some((cert.clone(), key.clone())), @@ -3211,6 +3212,7 @@ mod tests { CompositorPref::Auto, GamepadPref::Auto, 0, + 0, // video_caps None, // launch None, Some((cert, key)), @@ -3271,6 +3273,7 @@ mod tests { CompositorPref::Auto, GamepadPref::Auto, 0, + 0, // video_caps None, // launch None, None, @@ -3297,6 +3300,7 @@ mod tests { CompositorPref::Auto, GamepadPref::Auto, 0, + 0, // video_caps None, // launch Some(host_fp), Some((cert.clone(), key.clone())),