diff --git a/clients/android/native/src/decode.rs b/clients/android/native/src/decode.rs index e46012e..00b7985 100644 --- a/clients/android/native/src/decode.rs +++ b/clients/android/native/src/decode.rs @@ -52,6 +52,24 @@ pub fn run( format.set_i32("priority", 0); // 0 = realtime format.set_i32("operating-rate", mode.refresh_hz as i32); + // HDR static metadata (ST.2086 mastering + content light level): when an HDR session was + // negotiated, set KEY_HDR_STATIC_INFO so the display tone-maps from the source's real grade. + // MediaCodec wants it BEFORE configure(), and the host sends a 0xCE right after the handshake, + // so it's typically already queued; wait briefly otherwise. The Surface DataSpace (applied on + // OutputFormatChanged below) carries transfer/primaries regardless — this adds the luminance the + // tone-mapper needs. A non-HDR display still gets sensible SurfaceFlinger tone-mapping. + if client.color.is_hdr() { + match client.next_hdr_meta(Duration::from_millis(250)) { + Ok(meta) => { + format.set_buffer("hdr-static-info", &android_hdr_static_info(&meta)); + log::info!("decode: HDR static metadata applied (KEY_HDR_STATIC_INFO)"); + } + Err(_) => { + log::info!("decode: HDR session but no mastering metadata yet — DataSpace only") + } + } + } + if let Err(e) = codec.configure(&format, Some(&window), MediaCodecDirection::Decoder) { log::error!("decode: configure failed: {e}"); return; @@ -258,3 +276,27 @@ fn hdr_dataspace(codec: &MediaCodec) -> Option { _ => None, // SDR (BT.709 / SDR_VIDEO) or unspecified } } + +/// Serialize [`HdrMeta`](punktfunk_core::quic::HdrMeta) into Android's `KEY_HDR_STATIC_INFO` +/// (`hdr-static-info`) layout: a 25-byte CTA-861.3 / `HDRStaticInfo.Type1` blob — descriptor id 0, +/// then primaries in **R, G, B** order, white point, max/min display luminance, MaxCLL, MaxFALL, all +/// **little-endian** `u16`. Two conversions vs our wire form: HdrMeta stores primaries in ST.2086 +/// **G, B, R** order (reorder to R, G, B), and `max_display_mastering_luminance` is in 0.0001-cd/m² +/// units while Android wants **whole nits** (min stays 0.0001-nit). Chromaticities (1/50000) and +/// MaxCLL/MaxFALL (nits) match 1:1. +fn android_hdr_static_info(m: &punktfunk_core::quic::HdrMeta) -> [u8; 25] { + let [g, b_, r] = m.display_primaries; // ST.2086 G, B, R + let max_nits = (m.max_display_mastering_luminance / 10_000).min(u16::MAX as u32) as u16; + let min_units = m.min_display_mastering_luminance.min(u16::MAX as u32) as u16; + let fields: [u16; 12] = [ + r[0], r[1], g[0], g[1], b_[0], b_[1], // R, G, B primaries + m.white_point[0], m.white_point[1], // white point + max_nits, min_units, // max (nits) / min (0.0001-nit) display luminance + m.max_cll, m.max_fall, // MaxCLL / MaxFALL (nits) + ]; + let mut out = [0u8; 25]; // out[0] = 0 (Type 1 descriptor id), already zero + for (i, v) in fields.iter().enumerate() { + out[1 + i * 2..3 + i * 2].copy_from_slice(&v.to_le_bytes()); + } + out +} diff --git a/clients/apple/Sources/PunktfunkKit/PunktfunkConnection.swift b/clients/apple/Sources/PunktfunkKit/PunktfunkConnection.swift index d500f97..ed6cd4a 100644 --- a/clients/apple/Sources/PunktfunkKit/PunktfunkConnection.swift +++ b/clients/apple/Sources/PunktfunkKit/PunktfunkConnection.swift @@ -214,6 +214,20 @@ public final class PunktfunkConnection { /// (20 000) when 0 was requested. `0` = an older host that didn't report it. public private(set) var resolvedBitrateKbps: UInt32 = 0 + /// The colour signalling the host actually encodes with (CICP code points): `colorPrimaries` + /// (1=BT.709, 9=BT.2020), `colorTransfer` (1=BT.709, 16=PQ, 18=HLG), `colorMatrix` + /// (1=BT.709, 9=BT.2020-NCL), `colorFullRange`. BT.709 limited SDR for an older host. Configure + /// the decoder/presenter from these; mastering metadata arrives via `nextHdrMeta`. + public private(set) var colorPrimaries: UInt8 = 1 + public private(set) var colorTransfer: UInt8 = 1 + public private(set) var colorMatrix: UInt8 = 1 + public private(set) var colorFullRange: Bool = false + /// Encoded bit depth (8 or 10). + public private(set) var bitDepth: UInt8 = 8 + /// True when the negotiated stream is HDR (PQ or HLG transfer) — drive an HDR present path and + /// drain `nextHdrMeta`. + public var isHDR: Bool { colorTransfer == 16 || colorTransfer == 18 } + /// Connect and start a session at the requested mode (the host creates a native virtual /// output at exactly this size/refresh). Blocks up to `timeoutMs`. /// @@ -242,11 +256,14 @@ public final class PunktfunkConnection { compositor: Compositor = .auto, gamepad: GamepadType = .auto, bitrateKbps: UInt32 = 0, + videoCaps: UInt8 = 0, launchID: String? = nil, timeoutMs: UInt32 = 10_000 ) throws { if let pin = pinSHA256, pin.count != 32 { throw PunktfunkClientError.invalidPin } var observed = [UInt8](repeating: 0, count: 32) + // `videoCaps` advertises decode/present capability (PUNKTFUNK_VIDEO_CAP_10BIT | _HDR): the + // host upgrades to a 10-bit / BT.2020 PQ stream only when set. 0 = 8-bit BT.709 SDR. // `launchID` (a host library id like "steam:570") asks the host to launch that title in // the session; the host resolves it against its own library — nil = the host's default. handle = host.withCString { cs in @@ -255,16 +272,16 @@ public final class PunktfunkConnection { withOptionalCString(launchID) { launch in if let pin = pinSHA256 { return pin.withUnsafeBytes { p in - punktfunk_connect_ex4( + punktfunk_connect_ex5( cs, port, width, height, refreshHz, compositor.rawValue, - gamepad.rawValue, bitrateKbps, launch, + gamepad.rawValue, bitrateKbps, videoCaps, launch, p.bindMemory(to: UInt8.self).baseAddress, &observed, cert, key, timeoutMs) } } - return punktfunk_connect_ex4( + return punktfunk_connect_ex5( cs, port, width, height, refreshHz, compositor.rawValue, - gamepad.rawValue, bitrateKbps, launch, + gamepad.rawValue, bitrateKbps, videoCaps, launch, nil, &observed, cert, key, timeoutMs) } } @@ -289,6 +306,13 @@ public final class PunktfunkConnection { var br: UInt32 = 0 _ = punktfunk_connection_bitrate(handle, &br) resolvedBitrateKbps = br + var prim: UInt8 = 1, trc: UInt8 = 1, mtx: UInt8 = 1, fullRange: UInt8 = 0, depth: UInt8 = 8 + _ = punktfunk_connection_color_info(handle, &prim, &trc, &mtx, &fullRange, &depth) + colorPrimaries = prim + colorTransfer = trc + colorMatrix = mtx + colorFullRange = fullRange != 0 + bitDepth = depth } /// A bandwidth speed-test measurement (see `startSpeedTest`). Partial until `done`. @@ -508,6 +532,78 @@ public final class PunktfunkConnection { } } + /// Static HDR mastering metadata (SMPTE ST.2086 + content light level) the host sent for an HDR + /// session. Mirrors the wire/ABI `PunktfunkHdrMeta`; primaries are in ST.2086 **G, B, R** order, + /// 1/50000 units; mastering luminance in 0.0001 cd/m²; MaxCLL/MaxFALL in nits. + public struct HdrMeta: Sendable, Equatable { + public let primariesX: [UInt16] // [green, blue, red] + public let primariesY: [UInt16] + public let whitePointX: UInt16 + public let whitePointY: UInt16 + public let maxMasteringLuminance: UInt32 // 0.0001 cd/m² + public let minMasteringLuminance: UInt32 // 0.0001 cd/m² + public let maxCLL: UInt16 + public let maxFALL: UInt16 + + /// The 24-byte `mastering_display_colour_volume` payload (big-endian, ST.2086 G,B,R) — pass + /// directly to `kCVImageBufferMasteringDisplayColorVolumeKey` or `CAEDRMetadata`'s displayInfo. + public func masteringDisplayColorVolume() -> Data { + var d = Data() + func be16(_ v: UInt16) { d.append(UInt8(v >> 8)); d.append(UInt8(v & 0xFF)) } + func be32(_ v: UInt32) { + d.append(UInt8((v >> 24) & 0xFF)); d.append(UInt8((v >> 16) & 0xFF)) + d.append(UInt8((v >> 8) & 0xFF)); d.append(UInt8(v & 0xFF)) + } + for i in 0..<3 { be16(primariesX[i]); be16(primariesY[i]) } // G, B, R + be16(whitePointX); be16(whitePointY) + be32(maxMasteringLuminance); be32(minMasteringLuminance) + return d + } + + /// The 4-byte `content_light_level_info` payload (big-endian: MaxCLL, MaxFALL) — for + /// `kCVImageBufferContentLightLevelInfoKey` or `CAEDRMetadata`'s contentInfo. + public func contentLightLevelInfo() -> Data { + var d = Data() + func be16(_ v: UInt16) { d.append(UInt8(v >> 8)); d.append(UInt8(v & 0xFF)) } + be16(maxCLL); be16(maxFALL) + return d + } + } + + /// Pull the next static HDR metadata update; nil on timeout, throws `.closed` once the session + /// ended. Drain from the feedback thread alongside `nextRumble`/`nextHidOutput`. Nothing arrives + /// unless `isHDR` — poll with a short timeout, never spin. + public func nextHdrMeta(timeoutMs: UInt32 = 0) throws -> HdrMeta? { + feedbackLock.lock() + defer { feedbackLock.unlock() } + guard let h = liveHandle() else { throw PunktfunkClientError.closed } + + var out = PunktfunkHdrMeta() + let rc = punktfunk_connection_next_hdr_meta(h, &out, timeoutMs) + switch rc { + case statusOK: + // The fixed C `uint16_t[3]` arrays import as tuples — copy them out. + let px = withUnsafeBytes(of: out.display_primaries_x) { + Array($0.bindMemory(to: UInt16.self)) + } + let py = withUnsafeBytes(of: out.display_primaries_y) { + Array($0.bindMemory(to: UInt16.self)) + } + return HdrMeta( + primariesX: px, primariesY: py, + whitePointX: out.white_point_x, whitePointY: out.white_point_y, + maxMasteringLuminance: out.max_display_mastering_luminance, + minMasteringLuminance: out.min_display_mastering_luminance, + maxCLL: out.max_cll, maxFALL: out.max_fall) + case statusNoFrame: + return nil + case statusClosed: + throw PunktfunkClientError.closed + default: + throw PunktfunkClientError.status(rc) + } + } + /// Send one input event (delivered to the host as a QUIC datagram). Thread-safe; /// silently dropped after close. public func send(_ event: PunktfunkInputEvent) { diff --git a/clients/linux/src/video.rs b/clients/linux/src/video.rs index fa459c4..2f79e64 100644 --- a/clients/linux/src/video.rs +++ b/clients/linux/src/video.rs @@ -164,8 +164,27 @@ impl SoftwareDecoder { let rebuild = !matches!(&self.sws, Some((_, f, sw, sh)) if *f == fmt && *sw == w && *sh == h); if rebuild { - let ctx = scaling::Context::get(fmt, w, h, Pixel::RGBA, w, h, scaling::Flags::POINT) - .context("swscale context")?; + let mut ctx = + scaling::Context::get(fmt, w, h, Pixel::RGBA, w, h, scaling::Flags::POINT) + .context("swscale context")?; + // swscale defaults to BT.601 coefficients, but our SDR HEVC stream is BT.709 limited + // range (the host signals BT.709 in the VUI). Without this, YUV→RGB decodes with BT.601 + // and SDR colours shift (greens/reds off). Source = limited/studio YUV, destination = + // full-range RGB. Inverse of the host's RGB→YUV CSC (encode/vaapi.rs). + const SWS_CS_ITU709: i32 = 1; + unsafe { + let cs709 = ffmpeg::ffi::sws_getCoefficients(SWS_CS_ITU709); + ffmpeg::ffi::sws_setColorspaceDetails( + ctx.as_mut_ptr(), + cs709, // inv_table: source (YUV) coefficients — BT.709 + 0, // srcRange: 0 = limited/studio (MPEG) + cs709, // table: destination coefficients (ignored for RGB output) + 1, // dstRange: 1 = full-range RGB + 0, + 1 << 16, + 1 << 16, // brightness, contrast, saturation (defaults) + ); + } self.sws = Some((ctx, fmt, w, h)); } let (sws, ..) = self.sws.as_mut().unwrap(); diff --git a/clients/probe/src/main.rs b/clients/probe/src/main.rs index 8b1dabd..5d0cf0a 100644 --- a/clients/probe/src/main.rs +++ b/clients/probe/src/main.rs @@ -402,6 +402,9 @@ async fn session(args: Args) -> Result<()> { frames = welcome.frames, compositor = welcome.compositor.as_str(), gamepad = welcome.gamepad.as_str(), + bit_depth = welcome.bit_depth, + color = ?welcome.color, + hdr = welcome.color.is_hdr(), "session offer" ); @@ -826,12 +829,20 @@ async fn session(args: Args) -> Result<()> { let conn2 = conn.clone(); tokio::spawn(async move { use std::sync::atomic::Ordering::Relaxed; + let mut hdr_logged = false; while let Ok(d) = conn2.read_datagram().await { if let Some((_, _, opus)) = punktfunk_core::quic::decode_audio_datagram(&d) { a.fetch_add(1, Relaxed); ab.fetch_add(opus.len() as u64, Relaxed); } else if punktfunk_core::quic::decode_rumble_datagram(&d).is_some() { r.fetch_add(1, Relaxed); + } else if let Some(meta) = punktfunk_core::quic::decode_hdr_meta_datagram(&d) { + // HDR static metadata (0xCE). Log the first receipt so a loopback test can + // assert the host sent it for an HDR session. + if !hdr_logged { + hdr_logged = true; + tracing::info!(?meta, "HDR static metadata (0xCE)"); + } } else if let Some(hid) = punktfunk_core::quic::HidOutput::decode(&d) { // The DualSense feedback plane (lightbar / player LEDs / adaptive triggers). // Log the first few so a playtest can see triggers/LEDs arrive without spam. diff --git a/clients/windows/src/app.rs b/clients/windows/src/app.rs index 312270b..de91713 100644 --- a/clients/windows/src/app.rs +++ b/clients/windows/src/app.rs @@ -951,6 +951,11 @@ fn settings_page(ctx: &Arc, set_screen: &AsyncSetState) -> Eleme // --- stream page -------------------------------------------------------------------------- fn present_newest(ctx: &mut PresentCtx) { + // Apply the latest source HDR mastering metadata (from the session pump's 0xCE drain) before + // presenting — a cheap no-op in the presenter when unchanged. + if let Some(meta) = *crate::present::LATEST_HDR_META.lock().unwrap() { + ctx.presenter.set_hdr_metadata(meta); + } // Drain to the newest decoded frame (drop any backlog) and hand it to the presenter by value — // the GPU zero-copy path retains the decoder surface across re-presents, so ownership matters. let mut newest = None; diff --git a/clients/windows/src/present.rs b/clients/windows/src/present.rs index 953a206..4ad8630 100644 --- a/clients/windows/src/present.rs +++ b/clients/windows/src/present.rs @@ -119,8 +119,18 @@ pub struct Presenter { panel_h: u32, /// Whether the swapchain is currently in 10-bit HDR10 (R10G10B10A2 + ST.2084) mode. hdr: bool, + /// The source's static HDR mastering metadata received over the protocol (`0xCE`), applied via + /// `SetHDRMetaData` so the display tone-maps from the real grade instead of a generic 1000-nit + /// guess. `None` until the first update arrives (then the generic baseline is used). + hdr_meta: Option, } +/// Latest source HDR mastering metadata, written by the session pump (`session.rs`, the sole +/// `next_hdr_meta` consumer) and read by `present_newest` on the UI thread — decoupled so the +/// presenter doesn't need the connector. One session at a time on the client, so a single slot. +pub static LATEST_HDR_META: std::sync::Mutex> = + std::sync::Mutex::new(None); + impl Presenter { /// Create the presenter on the process-wide shared D3D11 device (the one the decoder uses), plus /// the composition swapchain + shaders, sized to the panel. @@ -148,9 +158,23 @@ impl Presenter { panel_w: width.max(1), panel_h: height.max(1), hdr: false, + hdr_meta: None, }) } + /// Update the source HDR mastering metadata (from the `0xCE` plane). Stored for the next HDR + /// swapchain switch, and applied immediately if already presenting HDR. A no-op when unchanged + /// (so it's cheap to call every frame from the present loop). + pub fn set_hdr_metadata(&mut self, meta: punktfunk_core::quic::HdrMeta) { + if self.hdr_meta == Some(meta) { + return; + } + self.hdr_meta = Some(meta); + if self.hdr { + unsafe { self.apply_hdr_metadata() }; + } + } + /// The DXGI swapchain to hand to `SwapChainPanelHandle::set_swap_chain`. pub fn swap_chain(&self) -> &IDXGISwapChain1 { &self.swap @@ -350,25 +374,42 @@ impl Presenter { // DWM still tone-maps HDR10 → SDR, so leaving the default there is fine). if let Ok(support) = sc3.CheckColorSpaceSupport(colorspace) { if support & DXGI_SWAP_CHAIN_COLOR_SPACE_SUPPORT_FLAG_PRESENT.0 as u32 != 0 { - let _ = sc3.SetColorSpace1(colorspace); + if let Err(e) = sc3.SetColorSpace1(colorspace) { + // A silent failure here presents PQ content as SDR gamma (crushed/dark) — + // surface it instead of swallowing it. + tracing::warn!(error = %e, ?colorspace, "SetColorSpace1 failed"); + } + } else if on { + tracing::warn!("swapchain rejects BT.2020 PQ present colour space (SDR display?) — DWM tone-maps"); } } } + self.hdr = on; if on { - if let Ok(sc4) = self.swap.cast::() { - let md = hdr10_metadata(); - let bytes = std::slice::from_raw_parts( - &md as *const DXGI_HDR_METADATA_HDR10 as *const u8, - std::mem::size_of::(), - ); - let _ = sc4.SetHDRMetaData(DXGI_HDR_METADATA_TYPE_HDR10, Some(bytes)); - } + self.apply_hdr_metadata(); } } - self.hdr = on; tracing::info!(hdr = on, "swapchain colour mode switched"); } + /// Push the current `DXGI_HDR_METADATA_HDR10` to the swapchain. Uses the source's received + /// mastering metadata when known, else a generic HDR10 baseline. Caller ensures HDR mode. + unsafe fn apply_hdr_metadata(&self) { + if let Ok(sc4) = self.swap.cast::() { + let md = self + .hdr_meta + .map(hdr_meta_to_dxgi) + .unwrap_or_else(generic_hdr10_metadata); + let bytes = std::slice::from_raw_parts( + &md as *const DXGI_HDR_METADATA_HDR10 as *const u8, + std::mem::size_of::(), + ); + if let Err(e) = sc4.SetHDRMetaData(DXGI_HDR_METADATA_TYPE_HDR10, Some(bytes)) { + tracing::warn!(error = %e, "SetHDRMetaData failed"); + } + } + } + fn upload(&mut self, frame: &crate::video::CpuFrame) -> Result<()> { let (w, h) = (frame.width, frame.height); let need_new = !matches!(&self.cpu_tex, Some((_, _, tw, th)) if *tw == w && *th == h); @@ -579,9 +620,8 @@ fn blob_bytes(blob: &ID3DBlob) -> &[u8] { } /// Generic HDR10 mastering metadata: BT.2020 primaries + D65 white, a 1000-nit mastering display, -/// MaxCLL 1000 / MaxFALL 400. The protocol doesn't carry the stream's real mastering metadata yet -/// (host follow-up), so these are sane defaults the display tone-maps from. -fn hdr10_metadata() -> DXGI_HDR_METADATA_HDR10 { +/// MaxCLL 1000 / MaxFALL 400. The fallback used only until the host's real `0xCE` metadata arrives. +fn generic_hdr10_metadata() -> DXGI_HDR_METADATA_HDR10 { DXGI_HDR_METADATA_HDR10 { RedPrimary: [35400, 14600], GreenPrimary: [8500, 39850], @@ -593,3 +633,22 @@ fn hdr10_metadata() -> DXGI_HDR_METADATA_HDR10 { MaxFrameAverageLightLevel: 400, } } + +/// Map the protocol's [`HdrMeta`](punktfunk_core::quic::HdrMeta) to `DXGI_HDR_METADATA_HDR10`. +/// Two careful conversions: HdrMeta stores primaries in **ST.2086 G,B,R order**, DXGI wants +/// **R,G,B**; and HdrMeta mastering luminance is in **0.0001-cd/m² units** while DXGI's +/// `MaxMasteringLuminance` is in **whole nits** (MinMasteringLuminance stays 0.0001-nit). Chromaticity +/// units (1/50000) and MaxCLL/MaxFALL (nits) match 1:1. +fn hdr_meta_to_dxgi(m: punktfunk_core::quic::HdrMeta) -> DXGI_HDR_METADATA_HDR10 { + let [g, b, r] = m.display_primaries; // ST.2086 order + DXGI_HDR_METADATA_HDR10 { + RedPrimary: r, + GreenPrimary: g, + BluePrimary: b, + WhitePoint: m.white_point, + MaxMasteringLuminance: m.max_display_mastering_luminance / 10_000, // 0.0001-nit → nit + MinMasteringLuminance: m.min_display_mastering_luminance, // already 0.0001-nit + MaxContentLightLevel: m.max_cll, + MaxFrameAverageLightLevel: m.max_fall, + } +} diff --git a/clients/windows/src/session.rs b/clients/windows/src/session.rs index b3007f4..609c2b1 100644 --- a/clients/windows/src/session.rs +++ b/clients/windows/src/session.rs @@ -253,6 +253,13 @@ fn pump( } } + // Drain the HDR static-metadata plane (0xCE): the source's real mastering display + content + // light level. Stash the latest for the UI-thread presenter to apply via SetHDRMetaData — + // this pump is the sole consumer of the plane. Rare (start + on change/keyframe). + while let Ok(meta) = connector.next_hdr_meta(Duration::ZERO) { + *crate::present::LATEST_HDR_META.lock().unwrap() = Some(meta); + } + if window_start.elapsed() >= Duration::from_secs(1) { let secs = window_start.elapsed().as_secs_f32(); lat_us.sort_unstable(); diff --git a/crates/punktfunk-core/src/abi.rs b/crates/punktfunk-core/src/abi.rs index f2bbbf9..7137049 100644 --- a/crates/punktfunk-core/src/abi.rs +++ b/crates/punktfunk-core/src/abi.rs @@ -547,6 +547,56 @@ impl PunktfunkHidOutput { } } +/// Static HDR metadata for an HDR session ([`punktfunk_connection_next_hdr_meta`]): SMPTE ST.2086 +/// mastering display colour volume + CEA-861.3 content light level. All fields are in the standard +/// HDR10 SEI fixed-point units (primaries/white in 1/50000, luminance in 0.0001 cd/m²), ready for +/// DXGI `DXGI_HDR_METADATA_HDR10` / Apple `CAEDRMetadata` / Android `KEY_HDR_STATIC_INFO`. +#[cfg(feature = "quic")] +#[repr(C)] +#[derive(Clone, Copy)] +pub struct PunktfunkHdrMeta { + /// Display-primaries x-chromaticities in 1/50000 units, ST.2086 order [green, blue, red]. + pub display_primaries_x: [u16; 3], + /// Display-primaries y-chromaticities in 1/50000 units, ST.2086 order [green, blue, red]. + pub display_primaries_y: [u16; 3], + /// White-point x-chromaticity, 1/50000 units. + pub white_point_x: u16, + /// White-point y-chromaticity, 1/50000 units. + pub white_point_y: u16, + /// Max display mastering luminance, 0.0001 cd/m² units. + pub max_display_mastering_luminance: u32, + /// Min display mastering luminance, 0.0001 cd/m² units. + pub min_display_mastering_luminance: u32, + /// Maximum content light level (MaxCLL), nits. 0 = unknown. + pub max_cll: u16, + /// Maximum frame-average light level (MaxFALL), nits. 0 = unknown. + pub max_fall: u16, +} + +#[cfg(feature = "quic")] +impl PunktfunkHdrMeta { + fn from_meta(m: &crate::quic::HdrMeta) -> PunktfunkHdrMeta { + PunktfunkHdrMeta { + display_primaries_x: [ + m.display_primaries[0][0], + m.display_primaries[1][0], + m.display_primaries[2][0], + ], + display_primaries_y: [ + m.display_primaries[0][1], + m.display_primaries[1][1], + m.display_primaries[2][1], + ], + white_point_x: m.white_point[0], + white_point_y: m.white_point[1], + max_display_mastering_luminance: m.max_display_mastering_luminance, + min_display_mastering_luminance: m.min_display_mastering_luminance, + max_cll: m.max_cll, + max_fall: m.max_fall, + } + } +} + /// `PunktfunkRichInput::kind` — a touchpad contact (`finger`/`active`/`x`/`y` valid). pub const PUNKTFUNK_RICH_TOUCHPAD: u8 = 1; /// `PunktfunkRichInput::kind` — a motion sample (`gyro`/`accel` valid). @@ -642,6 +692,20 @@ pub const PUNKTFUNK_GAMEPAD_DUALSENSE: u32 = 2; /// Blocks up to `timeout_ms` for the handshake. Returns NULL on failure. Equivalent to /// [`punktfunk_connect_ex`] with `compositor = PUNKTFUNK_COMPOSITOR_AUTO`. /// +/// Video-capability bit for [`punktfunk_connect_ex5`] (`video_caps`): the client can decode a +/// 10-bit (Main10) HEVC stream. (Mirrors `quic::VIDEO_CAP_10BIT`.) +pub const PUNKTFUNK_VIDEO_CAP_10BIT: u8 = 0x01; +/// Video-capability bit for [`punktfunk_connect_ex5`] (`video_caps`): the client can present +/// BT.2020 PQ HDR10 (implies 10-bit). (Mirrors `quic::VIDEO_CAP_HDR`.) +pub const PUNKTFUNK_VIDEO_CAP_HDR: u8 = 0x02; + +// Keep the ABI cap bits in lockstep with the wire constants (compile-time guard against drift). +#[cfg(feature = "quic")] +const _: () = { + assert!(PUNKTFUNK_VIDEO_CAP_10BIT == crate::quic::VIDEO_CAP_10BIT); + assert!(PUNKTFUNK_VIDEO_CAP_HDR == crate::quic::VIDEO_CAP_HDR); +}; + /// Trust: `pin_sha256` (NULL or 32 bytes) is the expected SHA-256 fingerprint of the host's /// certificate — a mismatching host is rejected. NULL = trust on first use; persist the /// fingerprint written to `observed_sha256_out` (NULL or 32 bytes, filled on success) and @@ -843,6 +907,59 @@ pub unsafe extern "C" fn punktfunk_connect_ex4( client_cert_pem: *const std::os::raw::c_char, client_key_pem: *const std::os::raw::c_char, timeout_ms: u32, +) -> *mut PunktfunkConnection { + // Back-compat: ex4 advertises no video caps (8-bit BT.709 SDR). HDR-capable embedders call + // `punktfunk_connect_ex5` with the cap bits. + unsafe { + punktfunk_connect_ex5( + host, + port, + width, + height, + refresh_hz, + compositor, + gamepad, + bitrate_kbps, + 0, + launch_id, + pin_sha256, + observed_sha256_out, + client_cert_pem, + client_key_pem, + timeout_ms, + ) + } +} + +/// Like [`punktfunk_connect_ex4`], but additionally advertises the embedder's video decode/present +/// capabilities as `video_caps` — a bitfield of `PUNKTFUNK_VIDEO_CAP_10BIT` (can decode 10-bit +/// Main10) and `PUNKTFUNK_VIDEO_CAP_HDR` (can present BT.2020 PQ HDR10). The host upgrades to a +/// 10-bit / HDR encode ONLY when the matching bit is set (and the host opted in); `0` keeps the +/// 8-bit BT.709 SDR stream. After connecting, read the resolved colour via +/// [`punktfunk_connection_color_info`] and drain the mastering metadata via +/// [`punktfunk_connection_next_hdr_meta`]. +/// +/// # Safety +/// Same as [`punktfunk_connect`]; `launch_id`, when non-NULL, must be a NUL-terminated C string. +#[cfg(feature = "quic")] +#[no_mangle] +#[allow(clippy::too_many_arguments)] +pub unsafe extern "C" fn punktfunk_connect_ex5( + host: *const std::os::raw::c_char, + port: u16, + width: u32, + height: u32, + refresh_hz: u32, + compositor: u32, + gamepad: u32, + bitrate_kbps: u32, + video_caps: u8, + launch_id: *const std::os::raw::c_char, + pin_sha256: *const u8, + observed_sha256_out: *mut u8, + client_cert_pem: *const std::os::raw::c_char, + client_key_pem: *const std::os::raw::c_char, + timeout_ms: u32, ) -> *mut PunktfunkConnection { let r = std::panic::catch_unwind(AssertUnwindSafe(|| { if host.is_null() { @@ -891,9 +1008,7 @@ pub unsafe extern "C" fn punktfunk_connect_ex4( pref, gamepad, bitrate_kbps, - // 8-bit only over the C ABI for now — the ABI doesn't yet carry the embedder's video - // caps (Apple/Android decode 8-bit). The native Windows client advertises 10-bit/HDR. - 0, + video_caps, launch, pin, identity, @@ -1195,6 +1310,90 @@ pub unsafe extern "C" fn punktfunk_connection_next_hidout( }) } +/// Pull the next static HDR metadata update (ST.2086 mastering display + content light level) for +/// an HDR session, into `*out`. [`PunktfunkStatus::NoFrame`] on timeout, [`PunktfunkStatus::Closed`] +/// once the session ended. The host sends one near session start and re-sends it on mastering +/// changes / keyframes; apply the latest to the display (`SetHDRMetaData` / `CAEDRMetadata` / +/// `KEY_HDR_STATIC_INFO`). Only an HDR session (`punktfunk_connection_color_info` reports a PQ +/// transfer) ever emits these. Same threading rules as [`punktfunk_connection_next_rumble`] (one +/// puller, may run alongside the other planes). +/// +/// # Safety +/// `c` is a valid connection handle; `out` is writable for one `PunktfunkHdrMeta`. +#[cfg(feature = "quic")] +#[no_mangle] +pub unsafe extern "C" fn punktfunk_connection_next_hdr_meta( + c: *mut PunktfunkConnection, + out: *mut PunktfunkHdrMeta, + timeout_ms: u32, +) -> PunktfunkStatus { + guard(|| { + let c = match unsafe { c.as_ref() } { + Some(c) => c, + None => return PunktfunkStatus::NullPointer, + }; + if out.is_null() { + return PunktfunkStatus::NullPointer; + } + match c + .inner + .next_hdr_meta(std::time::Duration::from_millis(timeout_ms as u64)) + { + Ok(m) => { + unsafe { *out = PunktfunkHdrMeta::from_meta(&m) }; + PunktfunkStatus::Ok + } + Err(e) => e.status(), + } + }) +} + +/// Read the session's resolved colour signalling + encode bit depth (from the host's Welcome). +/// Each out pointer is filled when non-NULL: `primaries`/`transfer`/`matrix` are CICP code points +/// (BT.709 = 1; BT.2020 = 9; PQ transfer = 16, HLG = 18; BT.2020-NCL matrix = 9), `full_range` is +/// 0 (limited) or 1 (full), `bit_depth` is 8 or 10. A `transfer` of 16/18 means HDR — configure an +/// HDR present path and drain [`punktfunk_connection_next_hdr_meta`]. Available immediately after a +/// successful connect (these don't change without a reconfigure). +/// +/// # Safety +/// `c` is a valid connection handle; each out pointer is NULL or writable for its scalar. +#[cfg(feature = "quic")] +#[no_mangle] +pub unsafe extern "C" fn punktfunk_connection_color_info( + c: *mut PunktfunkConnection, + primaries: *mut u8, + transfer: *mut u8, + matrix: *mut u8, + full_range: *mut u8, + bit_depth: *mut u8, +) -> PunktfunkStatus { + guard(|| { + let c = match unsafe { c.as_ref() } { + Some(c) => c, + None => return PunktfunkStatus::NullPointer, + }; + let color = c.inner.color; + unsafe { + if !primaries.is_null() { + *primaries = color.primaries; + } + if !transfer.is_null() { + *transfer = color.transfer; + } + if !matrix.is_null() { + *matrix = color.matrix; + } + if !full_range.is_null() { + *full_range = color.full_range; + } + if !bit_depth.is_null() { + *bit_depth = c.inner.bit_depth; + } + } + PunktfunkStatus::Ok + }) +} + /// Send one input event to the host as a QUIC datagram (non-blocking enqueue). /// /// # Safety diff --git a/crates/punktfunk-core/src/client.rs b/crates/punktfunk-core/src/client.rs index 971b1bf..c944265 100644 --- a/crates/punktfunk-core/src/client.rs +++ b/crates/punktfunk-core/src/client.rs @@ -16,8 +16,8 @@ use crate::error::{PunktfunkError, Result}; use crate::input::InputEvent; use crate::packet::FLAG_PROBE; use crate::quic::{ - endpoint, io, window_loss_ppm, Hello, HidOutput, LossReport, ProbeRequest, ProbeResult, - Reconfigure, Reconfigured, RequestKeyframe, RichInput, Start, Welcome, + endpoint, io, window_loss_ppm, ColorInfo, HdrMeta, Hello, HidOutput, LossReport, ProbeRequest, + ProbeResult, Reconfigure, Reconfigured, RequestKeyframe, RichInput, Start, Welcome, }; use crate::session::{Frame, Session}; use crate::transport::UdpTransport; @@ -40,7 +40,18 @@ enum CtrlRequest { /// mode, the host-resolved compositor backend, the host-resolved gamepad backend, the host's /// certificate fingerprint, the resolved encoder bitrate (kbps), and the host↔client clock offset /// (ns, host minus client; 0 = no skew correction / an old host that didn't answer the handshake). -type Negotiated = (Mode, CompositorPref, GamepadPref, [u8; 32], u32, i64); +/// The trailing `u8` is the resolved encode bit depth (8/10) and [`ColorInfo`] the resolved colour +/// signalling, both from the [`Welcome`]. +type Negotiated = ( + Mode, + CompositorPref, + GamepadPref, + [u8; 32], + u32, + i64, + u8, + ColorInfo, +); /// Accumulated state of an in-flight / finished speed test. The data-plane pump mirrors the /// session's packet-level receive counters here; the control task finalizes the delivered figure @@ -121,6 +132,10 @@ const RUMBLE_QUEUE: usize = 16; /// Same overflow discipline as rumble; the host re-sends on the next feedback change. const HIDOUT_QUEUE: usize = 32; +/// Static HDR metadata (ST.2086 mastering + content light level) buffered for the embedder. Tiny +/// and low-rate (one on start, re-sent on mastering changes / keyframes); a small ring is ample. +const HDR_META_QUEUE: usize = 8; + /// One Opus packet from the host's audio datagram stream (48 kHz stereo, 5 ms frames). #[derive(Clone, Debug)] pub struct AudioPacket { @@ -140,6 +155,8 @@ pub struct NativeClient { rumble: Mutex>, /// Inbound DualSense feedback (lightbar / player LEDs / adaptive triggers) — 0xCD datagrams. hidout: Mutex>, + /// Inbound static HDR metadata (ST.2086 mastering + content light level) — 0xCE datagrams. + hdr_meta: Mutex>, input_tx: tokio::sync::mpsc::UnboundedSender, /// Outbound mic frames `(seq, pts_ns, opus)` → encoded as 0xCB datagrams by the worker. mic_tx: tokio::sync::mpsc::UnboundedSender<(u32, u64, Vec)>, @@ -178,6 +195,13 @@ pub struct NativeClient { /// glass-to-glass latency valid across machines. `0` = no correction (an old host that didn't /// answer, or genuinely synced clocks). pub clock_offset_ns: i64, + /// The encode bit depth the host resolved for this session ([`Welcome::bit_depth`]): `8`, or + /// `10` for a Main10 / HDR session. `8` for an older host that didn't report it. + pub bit_depth: u8, + /// The colour signalling the host encodes with ([`Welcome::color`]): the client configures its + /// decoder/presenter from this. [`ColorInfo::SDR_BT709`] for an older host. The static HDR + /// mastering metadata (when [`ColorInfo::is_hdr`]) arrives via [`NativeClient::next_hdr_meta`]. + pub color: ColorInfo, } /// Pin the calling thread to the user-interactive QoS class on Apple targets. @@ -231,6 +255,7 @@ impl NativeClient { let (audio_tx, audio_rx) = std::sync::mpsc::sync_channel::(AUDIO_QUEUE); let (rumble_tx, rumble_rx) = std::sync::mpsc::sync_channel::<(u16, u16, u16)>(RUMBLE_QUEUE); let (hidout_tx, hidout_rx) = std::sync::mpsc::sync_channel::(HIDOUT_QUEUE); + let (hdr_meta_tx, hdr_meta_rx) = std::sync::mpsc::sync_channel::(HDR_META_QUEUE); let (input_tx, input_rx) = tokio::sync::mpsc::unbounded_channel::(); let (mic_tx, mic_rx) = tokio::sync::mpsc::unbounded_channel::<(u32, u64, Vec)>(); let (rich_input_tx, rich_input_rx) = tokio::sync::mpsc::unbounded_channel::(); @@ -280,6 +305,7 @@ impl NativeClient { audio_tx, rumble_tx, hidout_tx, + hdr_meta_tx, input_rx, mic_rx, rich_input_rx, @@ -301,6 +327,8 @@ impl NativeClient { fingerprint, resolved_bitrate_kbps, clock_offset_ns, + bit_depth, + color, ) = match ready_rx.recv_timeout(timeout) { Ok(Ok(t)) => t, Ok(Err(e)) => return Err(e), @@ -315,6 +343,7 @@ impl NativeClient { audio: Mutex::new(audio_rx), rumble: Mutex::new(rumble_rx), hidout: Mutex::new(hidout_rx), + hdr_meta: Mutex::new(hdr_meta_rx), input_tx, mic_tx, rich_input_tx, @@ -329,6 +358,8 @@ impl NativeClient { resolved_gamepad, resolved_bitrate_kbps, clock_offset_ns, + bit_depth, + color, }) } @@ -579,6 +610,20 @@ impl NativeClient { } } + /// Pull the next static HDR metadata update (ST.2086 mastering display + content light level) + /// the host sent for an HDR session; same timeout/closed semantics as + /// [`NativeClient::next_hidout`]. The host sends one near session start and re-sends it on + /// mastering changes / keyframes, so an HDR presenter should drain this on its own thread and + /// apply the latest value to the display (DXGI `SetHDRMetaData` / `CAEDRMetadata` / + /// `KEY_HDR_STATIC_INFO`). Only an HDR session (`color.is_hdr()`, PQ) ever emits these. + pub fn next_hdr_meta(&self, timeout: Duration) -> Result { + match self.hdr_meta.lock().unwrap().recv_timeout(timeout) { + Ok(m) => Ok(m), + Err(RecvTimeoutError::Timeout) => Err(PunktfunkError::NoFrame), + Err(RecvTimeoutError::Disconnected) => Err(PunktfunkError::Closed), + } + } + /// Queue one input event for delivery as a QUIC datagram. pub fn send_input(&self, ev: &InputEvent) -> Result<()> { self.input_tx.send(*ev).map_err(|_| PunktfunkError::Closed) @@ -628,6 +673,7 @@ struct WorkerArgs { audio_tx: SyncSender, rumble_tx: SyncSender<(u16, u16, u16)>, hidout_tx: SyncSender, + hdr_meta_tx: SyncSender, input_rx: tokio::sync::mpsc::UnboundedReceiver, mic_rx: tokio::sync::mpsc::UnboundedReceiver<(u32, u64, Vec)>, rich_input_rx: tokio::sync::mpsc::UnboundedReceiver, @@ -658,6 +704,7 @@ async fn worker_main(args: WorkerArgs) { audio_tx, rumble_tx, hidout_tx, + hdr_meta_tx, mut input_rx, mut mic_rx, mut rich_input_rx, @@ -785,6 +832,8 @@ async fn worker_main(args: WorkerArgs) { fingerprint, welcome.bitrate_kbps, clock_offset_ns, + welcome.bit_depth, + welcome.color, )) }; @@ -799,6 +848,8 @@ async fn worker_main(args: WorkerArgs) { fingerprint, resolved_bitrate_kbps, clock_offset_ns, + bit_depth, + color, ) = match setup.await { Ok(t) => t, Err(e) => { @@ -813,6 +864,8 @@ async fn worker_main(args: WorkerArgs) { fingerprint, resolved_bitrate_kbps, clock_offset_ns, + bit_depth, + color, ))); // Input task: embedder events → QUIC datagrams. @@ -927,6 +980,11 @@ async fn worker_main(args: WorkerArgs) { let _ = hidout_tx.try_send(h); } } + Some(&crate::quic::HDR_META_MAGIC) => { + if let Some(m) = crate::quic::decode_hdr_meta_datagram(&d) { + let _ = hdr_meta_tx.try_send(m); + } + } _ => {} // unknown tag — a newer host; ignore } } diff --git a/crates/punktfunk-core/src/quic.rs b/crates/punktfunk-core/src/quic.rs index 4da9015..fa9dc1b 100644 --- a/crates/punktfunk-core/src/quic.rs +++ b/crates/punktfunk-core/src/quic.rs @@ -85,6 +85,72 @@ pub const VIDEO_CAP_10BIT: u8 = 0x01; /// [`Hello::video_caps`] bit: the client can present BT.2020 PQ HDR10 (implies 10-bit). pub const VIDEO_CAP_HDR: u8 = 0x02; +/// Per-session colour signalling (CICP / ITU-T H.273 code points) the host resolved for the +/// encoded video, carried on [`Welcome`]. A client configures its decoder/presenter from these +/// instead of inferring them from the bitstream VUI. An older host omits the bytes on the wire → +/// [`ColorInfo::SDR_BT709`] (the 8-bit BT.709 limited stream every pre-HDR build produced). +/// +/// The *static* HDR mastering metadata (ST.2086 + content light level) is larger and can change +/// mid-stream, so it rides the [`HDR_META_MAGIC`] datagram rather than this fixed struct. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct ColorInfo { + /// CICP colour primaries: 1 = BT.709, 9 = BT.2020. + pub primaries: u8, + /// CICP transfer characteristics: 1 = BT.709, 16 = PQ (SMPTE ST.2084), 18 = HLG. + pub transfer: u8, + /// CICP matrix coefficients: 1 = BT.709, 9 = BT.2020 non-constant-luminance. + pub matrix: u8, + /// `video_full_range_flag`: 0 = limited/studio range, 1 = full range. + pub full_range: u8, +} + +impl ColorInfo { + /// CICP colour-primaries code point: BT.709. + pub const CP_BT709: u8 = 1; + /// CICP colour-primaries code point: BT.2020. + pub const CP_BT2020: u8 = 9; + /// CICP transfer code point: BT.709. + pub const TRC_BT709: u8 = 1; + /// CICP transfer code point: PQ (SMPTE ST.2084). + pub const TRC_PQ: u8 = 16; + /// CICP transfer code point: HLG (ARIB STD-B67 / BT.2100). + pub const TRC_HLG: u8 = 18; + /// CICP matrix code point: BT.709. + pub const MC_BT709: u8 = 1; + /// CICP matrix code point: BT.2020 non-constant-luminance. (Never emit 10 / constant-luminance — + /// no client decodes it.) + pub const MC_BT2020_NCL: u8 = 9; + + /// 8-bit BT.709 limited-range SDR — what every pre-HDR build produced, and the back-compat + /// default when a peer omits the colour bytes. + pub const SDR_BT709: ColorInfo = ColorInfo { + primaries: Self::CP_BT709, + transfer: Self::TRC_BT709, + matrix: Self::MC_BT709, + full_range: 0, + }; + + /// BT.2020 PQ (HDR10), limited range — what the Windows host's HEVC VUI emits. + pub const HDR10_BT2020_PQ: ColorInfo = ColorInfo { + primaries: Self::CP_BT2020, + transfer: Self::TRC_PQ, + matrix: Self::MC_BT2020_NCL, + full_range: 0, + }; + + /// True when the transfer is an HDR curve (PQ or HLG): the stream needs HDR present, and + /// (for PQ) a [`HdrMeta`] datagram carries the mastering metadata. + pub fn is_hdr(&self) -> bool { + self.transfer == Self::TRC_PQ || self.transfer == Self::TRC_HLG + } +} + +impl Default for ColorInfo { + fn default() -> Self { + Self::SDR_BT709 + } +} + /// Longest device name carried in a [`Hello`] (bytes of UTF-8; longer names are truncated on /// encode, rejected on decode — a one-byte length prefix caps it at 255 anyway). pub const HELLO_NAME_MAX: usize = 64; @@ -124,9 +190,14 @@ pub struct Welcome { /// The luma/chroma bit depth the host actually encodes at — `8` (default / older host) or /// `10` (Main10, enabled only when the client advertised [`VIDEO_CAP_10BIT`]). The client /// configures its decoder for 10-bit (P010) when this is `10`. Appended to the wire form as a - /// single trailing byte; `8` when an older host omitted it. (Color space stays BT.709 in - /// Phase 1; BT.2020 PQ HDR signaling is added alongside HDR support.) + /// single trailing byte; `8` when an older host omitted it. pub bit_depth: u8, + /// The colour signalling (CICP primaries/transfer/matrix/range) the host encodes with — BT.709 + /// limited SDR by default, BT.2020 PQ when a 10-bit HDR session was negotiated. Appended after + /// `bit_depth` as 4 trailing bytes; an older host that omits them decodes to + /// [`ColorInfo::SDR_BT709`]. The client configures its decoder/presenter from this instead of + /// guessing from the bitstream; the mastering metadata arrives separately on [`HDR_META_MAGIC`]. + pub color: ColorInfo, } /// `client → host`: data plane is bound, begin streaming. @@ -671,6 +742,11 @@ impl Welcome { b.push(self.gamepad.to_u8()); // appended at offset 54 — same back-compat discipline b.extend_from_slice(&self.bitrate_kbps.to_le_bytes()); // appended at offset 55..59 b.push(self.bit_depth); // appended at offset 59 — older clients read [0..59] and skip it + // Colour signalling at offsets 60..64 — older clients stop before these → SDR BT.709. + b.push(self.color.primaries); + b.push(self.color.transfer); + b.push(self.color.matrix); + b.push(self.color.full_range); b } @@ -678,7 +754,8 @@ impl Welcome { // Layout (LE): magic[0..4] abi[4..8] port[8..10] w[10..14] h[14..18] hz[18..22] // scheme[22] pct[23] max_data[24..26] shard[26..28] encrypt[28] key[29..45] // salt[45..49] frames[49..53] compositor[53] gamepad[54] bitrate_kbps[55..59] - // bit_depth[59] (compositor/gamepad/bitrate/bit_depth are optional trailing bytes). + // bit_depth[59] color.primaries[60] color.transfer[61] color.matrix[62] color.range[63] + // (everything from compositor on is an optional trailing byte; an older host stops earlier). if b.len() < 53 || &b[0..4] != MAGIC { return Err(PunktfunkError::InvalidArg("bad Welcome")); } @@ -728,6 +805,13 @@ impl Welcome { // Optional trailing byte — absent on an older host → `8` (8-bit, the only depth they // encode). bit_depth: b.get(59).copied().unwrap_or(8), + // Optional trailing colour bytes — absent on an older host → SDR BT.709 limited. + color: ColorInfo { + primaries: b.get(60).copied().unwrap_or(ColorInfo::CP_BT709), + transfer: b.get(61).copied().unwrap_or(ColorInfo::TRC_BT709), + matrix: b.get(62).copied().unwrap_or(ColorInfo::MC_BT709), + full_range: b.get(63).copied().unwrap_or(0), + }, }) } @@ -988,7 +1072,8 @@ pub fn frame(payload: &[u8]) -> Vec { /// demultiplexed by the first byte: input = [`crate::input::INPUT_MAGIC`] (0xC8, client→host), /// audio = [`AUDIO_MAGIC`] (0xC9, host→client), rumble = [`RUMBLE_MAGIC`] (0xCA, host→client), /// mic = [`MIC_MAGIC`] (0xCB, client→host), rich-input = [`RICH_INPUT_MAGIC`] (0xCC, client→host), -/// HID-output = [`HIDOUT_MAGIC`] (0xCD, host→client). +/// HID-output = [`HIDOUT_MAGIC`] (0xCD, host→client), HDR metadata = [`HDR_META_MAGIC`] +/// (0xCE, host→client). pub const AUDIO_MAGIC: u8 = 0xC9; pub const RUMBLE_MAGIC: u8 = 0xCA; /// Microphone uplink: the client's mic, Opus-encoded, client → host (the inverse of @@ -1203,6 +1288,79 @@ impl HidOutput { } } +/// Static HDR metadata, host → client: SMPTE ST.2086 mastering display colour volume + CEA-861.3 +/// content light level. Tag [`HDR_META_MAGIC`]. Carried on a datagram (not [`Welcome`]) because it +/// is larger and can change mid-stream when the source's mastering intent changes; the host +/// re-sends it on keyframes so a client that dropped the best-effort datagram converges. Omitted +/// for HLG (scene-referred — no mastering metadata). +/// +/// All fields use the standard HDR10 SEI fixed-point units, so they pass straight to +/// `DXGI_HDR_METADATA_HDR10` / Android `KEY_HDR_STATIC_INFO` / Apple `CAEDRMetadata` — the +/// libavcodec `AVMasteringDisplayMetadata` side needs an `AVRational` conversion. +#[derive(Clone, Copy, Debug, PartialEq, Eq, Default)] +pub struct HdrMeta { + /// Display primaries G, B, R as (x, y) chromaticity in 1/50000 units (the ST.2086 RGB order + /// is G, B, R). + pub display_primaries: [[u16; 2]; 3], + /// White point (x, y) in 1/50000 units. + pub white_point: [u16; 2], + /// Max display mastering luminance, 0.0001 cd/m² units. + pub max_display_mastering_luminance: u32, + /// Min display mastering luminance, 0.0001 cd/m² units. + pub min_display_mastering_luminance: u32, + /// Maximum content light level (MaxCLL), nits. `0` = unknown. + pub max_cll: u16, + /// Maximum frame-average light level (MaxFALL), nits. `0` = unknown. + pub max_fall: u16, +} + +/// HDR static-metadata datagram tag, host → client (the static analog of the per-frame VUI; +/// see [`HdrMeta`]). Next tag after [`HIDOUT_MAGIC`]. +pub const HDR_META_MAGIC: u8 = 0xCE; + +/// Wire length of an [`HDR_META_MAGIC`] datagram: tag + 6×u16 primaries + 2×u16 white + 2×u32 +/// luminance + 2×u16 CLL/FALL = 29 bytes. +const HDR_META_LEN: usize = 1 + 12 + 4 + 8 + 4; + +/// Encode an [`HdrMeta`] into a [`HDR_META_MAGIC`] datagram. +pub fn encode_hdr_meta_datagram(m: &HdrMeta) -> Vec { + let mut b = Vec::with_capacity(HDR_META_LEN); + b.push(HDR_META_MAGIC); + for p in m.display_primaries.iter() { + b.extend_from_slice(&p[0].to_le_bytes()); + b.extend_from_slice(&p[1].to_le_bytes()); + } + b.extend_from_slice(&m.white_point[0].to_le_bytes()); + b.extend_from_slice(&m.white_point[1].to_le_bytes()); + b.extend_from_slice(&m.max_display_mastering_luminance.to_le_bytes()); + b.extend_from_slice(&m.min_display_mastering_luminance.to_le_bytes()); + b.extend_from_slice(&m.max_cll.to_le_bytes()); + b.extend_from_slice(&m.max_fall.to_le_bytes()); + b +} + +/// Parse a [`HDR_META_MAGIC`] datagram → [`HdrMeta`]. `None` on bad tag or a short/truncated buffer +/// (every attacker-controlled field is bounds-checked by the fixed length before any read). +pub fn decode_hdr_meta_datagram(b: &[u8]) -> Option { + if b.len() < HDR_META_LEN || b[0] != HDR_META_MAGIC { + return None; + } + let u16at = |o: usize| u16::from_le_bytes([b[o], b[o + 1]]); + let u32at = |o: usize| u32::from_le_bytes([b[o], b[o + 1], b[o + 2], b[o + 3]]); + Some(HdrMeta { + display_primaries: [ + [u16at(1), u16at(3)], + [u16at(5), u16at(7)], + [u16at(9), u16at(11)], + ], + white_point: [u16at(13), u16at(15)], + max_display_mastering_luminance: u32at(17), + min_display_mastering_luminance: u32at(21), + max_cll: u16at(25), + max_fall: u16at(27), + }) +} + /// Async framed-message IO over a quinn stream (`u16 LE length || payload`). pub mod io { /// Read one framed message (bounded at 64 KiB — control messages are tiny). @@ -1636,10 +1794,34 @@ mod tests { gamepad: GamepadPref::DualSense, bitrate_kbps: 50_000, bit_depth: 10, + color: ColorInfo::HDR10_BT2020_PQ, }; assert_eq!(Welcome::decode(&w.encode()).unwrap(), w); } + #[test] + fn hdr_meta_datagram_roundtrip_and_truncation() { + let m = HdrMeta { + // BT.2020 display primaries in 1/50000 units (the DXGI/ST.2086 reference values). + display_primaries: [[8500, 39850], [6550, 2300], [35400, 14600]], + white_point: [15635, 16450], // D65 + max_display_mastering_luminance: 10_000_000, // 1000 nits in 0.0001 cd/m² + min_display_mastering_luminance: 1, // 0.0001 nits + max_cll: 1000, + max_fall: 400, + }; + let d = encode_hdr_meta_datagram(&m); + assert_eq!(d[0], HDR_META_MAGIC); + assert_eq!(decode_hdr_meta_datagram(&d), Some(m)); + // Truncated buffers and a wrong tag are rejected (never partially read). + for n in 0..d.len() { + assert_eq!(decode_hdr_meta_datagram(&d[..n]), None); + } + let mut bad = d.clone(); + bad[0] = HIDOUT_MAGIC; + assert_eq!(decode_hdr_meta_datagram(&bad), None); + } + #[test] fn hello_start_roundtrip() { let h = Hello { @@ -1760,9 +1942,10 @@ mod tests { gamepad: GamepadPref::Xbox360, bitrate_kbps: 120_000, bit_depth: 10, + color: ColorInfo::HDR10_BT2020_PQ, }; let wenc = w.encode(); - assert_eq!(wenc.len(), 60); + assert_eq!(wenc.len(), 64); // 60 base + 4 colour bytes let legacy_w = Welcome::decode(&wenc[..53]).unwrap(); assert_eq!(legacy_w.compositor, CompositorPref::Auto); assert_eq!(legacy_w.gamepad, GamepadPref::Auto); @@ -1778,8 +1961,17 @@ mod tests { assert_eq!(pre_bitrate_w.bitrate_kbps, 0); assert_eq!(pre_bitrate_w.bit_depth, 8); // older host (no trailing byte) → 8-bit assumed assert_eq!(legacy_w.bit_depth, 8); + // A pre-colour (60-byte) Welcome → SDR BT.709 (the only colour those hosts produced). + let pre_color_w = Welcome::decode(&wenc[..60]).unwrap(); + assert_eq!(pre_color_w.bit_depth, 10); + assert_eq!(pre_color_w.color, ColorInfo::SDR_BT709); + assert_eq!(legacy_w.color, ColorInfo::SDR_BT709); assert_eq!(Welcome::decode(&wenc).unwrap().bitrate_kbps, 120_000); assert_eq!(Welcome::decode(&wenc).unwrap().bit_depth, 10); // full form carries it + assert_eq!( + Welcome::decode(&wenc).unwrap().color, + ColorInfo::HDR10_BT2020_PQ + ); } #[test] diff --git a/crates/punktfunk-host/src/capture.rs b/crates/punktfunk-host/src/capture.rs index 874cbb4..5b83fc2 100644 --- a/crates/punktfunk-host/src/capture.rs +++ b/crates/punktfunk-host/src/capture.rs @@ -133,6 +133,15 @@ pub trait Capturer: Send { /// the default is a no-op (synthetic sources are produced on demand). Set `true` for the /// duration of a stream, `false` when it ends. fn set_active(&self, _active: bool) {} + + /// The source's static HDR mastering metadata (SMPTE ST.2086 + content light level), when the + /// capturer can read it from the output (Windows `IDXGIOutput6::GetDesc1`). `None` = unknown / + /// SDR / a backend that doesn't expose it (the default — Linux capture has no HDR path yet). + /// The stream loop forwards this to the encoder (in-band SEI) and the client (`0xCE` datagram), + /// so the two stay a single source of truth. May change mid-session if the source is regraded. + fn hdr_meta(&self) -> Option { + None + } } /// A deterministic moving test pattern (BGRx). Lets the spike exercise the encode → file → diff --git a/crates/punktfunk-host/src/capture/dxgi.rs b/crates/punktfunk-host/src/capture/dxgi.rs index 5383841..2c44b74 100644 --- a/crates/punktfunk-host/src/capture/dxgi.rs +++ b/crates/punktfunk-host/src/capture/dxgi.rs @@ -41,7 +41,7 @@ use windows::Win32::Graphics::Dxgi::Common::{ }; use windows::Win32::Graphics::Dxgi::{ CreateDXGIFactory1, IDXGIAdapter1, IDXGIDevice, IDXGIDevice1, IDXGIFactory1, IDXGIOutput1, - IDXGIOutput5, IDXGIOutputDuplication, IDXGIResource, DXGI_ERROR_ACCESS_LOST, + IDXGIOutput5, IDXGIOutput6, IDXGIOutputDuplication, IDXGIResource, DXGI_ERROR_ACCESS_LOST, DXGI_ERROR_DEVICE_REMOVED, DXGI_ERROR_DEVICE_RESET, DXGI_ERROR_INVALID_CALL, DXGI_ERROR_MODE_CHANGE_IN_PROGRESS, DXGI_ERROR_WAIT_TIMEOUT, DXGI_OUTDUPL_DESC, DXGI_OUTDUPL_FRAME_INFO, DXGI_OUTDUPL_POINTER_SHAPE_INFO, @@ -129,6 +129,33 @@ pub(crate) unsafe fn find_output(gdi_name: &str) -> Result<(IDXGIAdapter1, IDXGI bail!("no DXGI output named {gdi_name} (gone after ACCESS_LOST?)") } +/// Read the source display's static HDR mastering metadata via `IDXGIOutput6::GetDesc1` (the +/// monitor IS the "mastering display" for a desktop capture, exactly as Sunshine/Apollo treat it). +/// GetDesc1 exposes the colour primaries, white point, and min/max mastering luminance but NOT a +/// content light level, so MaxCLL/MaxFALL are left `0` (unknown — the display tone-maps from the +/// mastering luminance). `None` if the output can't be cast to `IDXGIOutput6` or the call fails. +unsafe fn read_output_hdr_meta(output: &IDXGIOutput1) -> Option { + let out6: IDXGIOutput6 = output.cast().ok()?; + let d = out6.GetDesc1().ok()?; + let m = crate::hdr::hdr_meta_from_display( + (d.RedPrimary[0], d.RedPrimary[1]), + (d.GreenPrimary[0], d.GreenPrimary[1]), + (d.BluePrimary[0], d.BluePrimary[1]), + (d.WhitePoint[0], d.WhitePoint[1]), + d.MaxLuminance, + d.MinLuminance, + 0, // MaxCLL: GetDesc1 has no content light level (Apollo zeroes it) + 0, // MaxFALL + ); + tracing::info!( + max_nits = d.MaxLuminance, + min_nits = d.MinLuminance, + max_full_frame_nits = d.MaxFullFrameLuminance, + "read source display HDR mastering metadata (GetDesc1)" + ); + Some(m) +} + /// Create a fresh D3D11 device + context on a specific adapter (driver_type UNKNOWN with an explicit /// adapter). Used at open and on every ACCESS_LOST: a device created on one desktop cannot sustain a /// duplication on a *different* desktop (perpetual ACCESS_LOST), so the secure-desktop switch needs a @@ -1900,6 +1927,10 @@ pub struct DuplCapturer { /// produce a BT.2020 PQ 10-bit (`R10G10B10A2`) frame for NVENC. Toggling HDR fires ACCESS_LOST → /// `recreate_dupl` re-detects the format, so this tracks the *current* duplication. hdr_fp16: bool, + /// The source display's static HDR mastering metadata (ST.2086 + content light level), read from + /// `IDXGIOutput6::GetDesc1` whenever the duplication is HDR (`hdr_fp16`). The stream loop forwards + /// it to the encoder (in-band SEI) and the client (0xCE). `None` when SDR or the read failed. + hdr_meta: Option, /// FP16 copy of the duplication surface (RT|SRV): the cursor composites onto it and the converter /// samples it. Reallocated on device/size change. fp16_src: Option, @@ -2129,6 +2160,14 @@ impl DuplCapturer { let gpu_mode = std::env::var("PUNKTFUNK_ENCODER") .map(|v| matches!(v.to_ascii_lowercase().as_str(), "nvenc" | "hw" | "nvidia")) .unwrap_or(false); + // Read the source display's HDR mastering metadata while we still hold `output` (it is + // moved into the struct below). Only meaningful for an HDR (FP16) duplication. + let is_hdr_init = dd.ModeDesc.Format == DXGI_FORMAT_R16G16B16A16_FLOAT; + let hdr_meta_init = if is_hdr_init { + read_output_hdr_meta(&output) + } else { + None + }; tracing::info!( "DXGI duplication: {}x{}@{} on {} ({}) dxgi_format={} (87=BGRA8 24=R10G10B10A2 10=R16G16B16A16_FLOAT)", width, @@ -2165,7 +2204,8 @@ impl DuplCapturer { gpu_copy: None, last_present: None, want_hdr, - hdr_fp16: dd.ModeDesc.Format == DXGI_FORMAT_R16G16B16A16_FLOAT, + hdr_fp16: is_hdr_init, + hdr_meta: hdr_meta_init, fp16_src: None, fp16_srv: None, hdr10_out: None, @@ -2661,6 +2701,12 @@ impl DuplCapturer { // Re-detect HDR and drop the HDR textures/converter (old device). Toggling HDR on or // off is exactly this path: the duplication comes back as FP16 (HDR) or BGRA8. self.hdr_fp16 = dd.ModeDesc.Format == DXGI_FORMAT_R16G16B16A16_FLOAT; + // Re-read the source mastering metadata for the (possibly new) HDR output, or clear it on SDR. + self.hdr_meta = if self.hdr_fp16 { + read_output_hdr_meta(&self.output) + } else { + None + }; self.fp16_src = None; self.fp16_srv = None; self.hdr10_out = None; @@ -3084,6 +3130,15 @@ fn now_ns() -> u64 { } impl Capturer for DuplCapturer { + fn hdr_meta(&self) -> Option { + // Only when the duplication is actually HDR (FP16); cleared to None on an SDR rebuild. + if self.hdr_fp16 { + self.hdr_meta + } else { + None + } + } + fn next_frame(&mut self) -> Result { // Generous: a secure-desktop switch can take several seconds to settle (re-resolve + recreate // the duplication up to 12 s). Better a few seconds of frozen-last-frame than dropping the stream. diff --git a/crates/punktfunk-host/src/capture/wgc.rs b/crates/punktfunk-host/src/capture/wgc.rs index edfb3cf..213cdc1 100644 --- a/crates/punktfunk-host/src/capture/wgc.rs +++ b/crates/punktfunk-host/src/capture/wgc.rs @@ -127,6 +127,11 @@ pub struct WgcCapturer { first_frame: bool, hdr: bool, + /// The source display's static HDR mastering metadata (ST.2086 + content light level), read from + /// `IDXGIOutput6::GetDesc1` at open when the output is HDR. Forwarded to the encoder (in-band SEI) + /// and the client (0xCE) by the stream loop. `None` when SDR. (The helper relay path also encodes, + /// so this is what gives the secure/normal-desktop HDR stream its mastering SEI.) + hdr_meta: Option, hdr_conv: Option, fp16_src: Option, fp16_srv: Option, @@ -213,12 +218,31 @@ impl WgcCapturer { let hmonitor = od.Monitor; // HDR iff the output's colour space is BT.2020 PQ (G2084) — matches the DDA FP16 detection. - let hdr = output + // From the same desc, read the source display's mastering metadata (ST.2086) when HDR. + let desc1 = output .cast::() .ok() - .and_then(|o6| o6.GetDesc1().ok()) + .and_then(|o6| o6.GetDesc1().ok()); + let hdr = desc1 + .as_ref() .map(|d1| d1.ColorSpace == DXGI_COLOR_SPACE_RGB_FULL_G2084_NONE_P2020) .unwrap_or(false); + let hdr_meta = if hdr { + desc1.as_ref().map(|d| { + crate::hdr::hdr_meta_from_display( + (d.RedPrimary[0], d.RedPrimary[1]), + (d.GreenPrimary[0], d.GreenPrimary[1]), + (d.BluePrimary[0], d.BluePrimary[1]), + (d.WhitePoint[0], d.WhitePoint[1]), + d.MaxLuminance, + d.MinLuminance, + 0, // MaxCLL: GetDesc1 has no content light level (Apollo zeroes it) + 0, // MaxFALL + ) + }) + } else { + None + }; // Wrap our D3D11 device as a WinRT IDirect3DDevice so the frame pool allocates on it (the // pool textures land on our device → CopyResource + NVENC are same-device, no readback). @@ -326,6 +350,7 @@ impl WgcCapturer { timeout_ms, first_frame: true, hdr, + hdr_meta, hdr_conv: None, fp16_src: None, fp16_srv: None, @@ -680,6 +705,10 @@ impl WgcCapturer { } impl Capturer for WgcCapturer { + fn hdr_meta(&self) -> Option { + self.hdr_meta + } + fn next_frame(&mut self) -> Result { let overall = Instant::now() + Duration::from_secs(20); loop { diff --git a/crates/punktfunk-host/src/encode.rs b/crates/punktfunk-host/src/encode.rs index 93a275f..85116aa 100644 --- a/crates/punktfunk-host/src/encode.rs +++ b/crates/punktfunk-host/src/encode.rs @@ -57,6 +57,12 @@ pub trait Encoder: Send { /// Force the next submitted frame to be an IDR keyframe (e.g. after a client /// reference-frame-invalidation request). Default: no-op. fn request_keyframe(&mut self) {} + /// Set the source's static HDR mastering metadata (from the capturer). An HDR encoder emits it + /// as in-band SEI (`mastering_display_colour_volume` + `content_light_level_info`) on each + /// keyframe so any decoder — including stock Moonlight — tone-maps from the source's real grade. + /// Default: no-op (SDR encoders / libavcodec paths that don't attach it yet). Cheap to call + /// every frame; only the direct-NVENC path consumes it. + fn set_hdr_meta(&mut self, _meta: Option) {} /// Invalidate a contiguous range of previously-encoded reference frames (client frame numbers, /// as reported in a loss-recovery request) so the encoder re-references an older still-valid /// frame instead of emitting a full IDR. Returns `true` if a real reference invalidation was diff --git a/crates/punktfunk-host/src/encode/nvenc.rs b/crates/punktfunk-host/src/encode/nvenc.rs index ac754be..8876a61 100644 --- a/crates/punktfunk-host/src/encode/nvenc.rs +++ b/crates/punktfunk-host/src/encode/nvenc.rs @@ -58,6 +58,11 @@ pub struct NvencD3d11Encoder { /// `ABGR10` input format + the BT.2020/PQ colour VUI. Derived per-frame from the capture format /// (HDR can toggle mid-session); a change re-inits the session. hdr: bool, + /// The source's static HDR mastering metadata (from the capturer's `GetDesc1`), emitted as + /// in-band SEI (`mastering_display_colour_volume` + `content_light_level_info`) on each keyframe + /// when `hdr`. `None` = unknown → no SEI (the VUI still signals BT.2020 PQ). Set per-frame via + /// [`Encoder::set_hdr_meta`], so a mid-session regrade is picked up on the next keyframe. + hdr_meta: Option, /// Registrations of the capturer's input textures, cached by texture raw pointer — NVENC encodes /// them in place (no per-frame copy). The cloned `ID3D11Texture2D` keeps each alive until we /// unregister it (the capturer may drop its copy on a device recreate before our teardown runs). @@ -107,6 +112,7 @@ impl NvencD3d11Encoder { buffer_fmt: nv::NV_ENC_BUFFER_FORMAT::NV_ENC_BUFFER_FORMAT_ARGB, bit_depth, hdr: false, + hdr_meta: None, regs: HashMap::new(), next: 0, bitstreams: Vec::new(), @@ -303,16 +309,48 @@ impl NvencD3d11Encoder { cfg.encodeCodecConfig.hevcConfig.set_pixelBitDepthMinus8(2); // 10 - 8 } - // HDR colour signaling: BT.2020 primaries + SMPTE ST 2084 (PQ) in the HEVC VUI. + // HDR colour signaling: BT.2020 primaries + SMPTE ST.2084 (PQ) transfer + BT.2020-NCL + // matrix, limited (studio) range — NVENC's RGB→YUV default. HEVC/H.264 carry it in the VUI; + // AV1 has NO VUI, so the SAME CICP code points go in the sequence-header colour config + // (`colorPrimaries`/`transferCharacteristics`/`matrixCoefficients`/`colorRange`). Without + // this a non-HEVC decoder assumes BT.709 SDR → washed-out / colour-shifted HDR. + // + // This is the per-stream colour *description* only. The static mastering-display (ST.2086) + // and content-light (MaxCLL/MaxFALL) metadata — HEVC SEI / AV1 METADATA OBUs — is a + // separate follow-up, as is wiring AV1/H.264 to a true 10-bit (Main10) encode (only HEVC + // sets Main10 above today). if self.hdr { - let vui = &mut cfg.encodeCodecConfig.hevcConfig.hevcVUIParameters; - vui.videoSignalTypePresentFlag = 1; - vui.videoFullRangeFlag = 0; // limited (studio) range — NVENC RGB→YUV default - vui.colourDescriptionPresentFlag = 1; - vui.colourPrimaries = nv::NV_ENC_VUI_COLOR_PRIMARIES::NV_ENC_VUI_COLOR_PRIMARIES_BT2020; - vui.transferCharacteristics = + let prim = nv::NV_ENC_VUI_COLOR_PRIMARIES::NV_ENC_VUI_COLOR_PRIMARIES_BT2020; + let trc = nv::NV_ENC_VUI_TRANSFER_CHARACTERISTIC::NV_ENC_VUI_TRANSFER_CHARACTERISTIC_SMPTE2084; - vui.colourMatrix = nv::NV_ENC_VUI_MATRIX_COEFFS::NV_ENC_VUI_MATRIX_COEFFS_BT2020_NCL; + let mat = nv::NV_ENC_VUI_MATRIX_COEFFS::NV_ENC_VUI_MATRIX_COEFFS_BT2020_NCL; + match self.codec { + Codec::H265 => { + let vui = &mut cfg.encodeCodecConfig.hevcConfig.hevcVUIParameters; + vui.videoSignalTypePresentFlag = 1; + vui.videoFullRangeFlag = 0; + vui.colourDescriptionPresentFlag = 1; + vui.colourPrimaries = prim; + vui.transferCharacteristics = trc; + vui.colourMatrix = mat; + } + Codec::H264 => { + let vui = &mut cfg.encodeCodecConfig.h264Config.h264VUIParameters; + vui.videoSignalTypePresentFlag = 1; + vui.videoFullRangeFlag = 0; + vui.colourDescriptionPresentFlag = 1; + vui.colourPrimaries = prim; + vui.transferCharacteristics = trc; + vui.colourMatrix = mat; + } + Codec::Av1 => { + let av1 = &mut cfg.encodeCodecConfig.av1Config; + av1.colorPrimaries = prim; + av1.transferCharacteristics = trc; + av1.matrixCoefficients = mat; + av1.colorRange = 0; // studio/limited swing + } + } } // Reference-frame invalidation: keep a deeper DPB so an invalidated reference can fall back @@ -636,6 +674,51 @@ impl Encoder for NvencD3d11Encoder { encodePicFlags: flags as u32, ..Default::default() }; + + // In-band HDR10 SEI on every IDR (a forced keyframe, or the first frame NVENC opens with): + // `mastering_display_colour_volume` (ST.2086) + `content_light_level_info` (CEA-861.3), + // built from the source display's metadata. Any decoder — incl. stock Moonlight — then + // tone-maps from the real grade. HEVC/H.264 carry SEI; AV1 uses metadata OBUs (follow-up). + // The scratch buffers must outlive `encode_picture`, so they live in this scope. + let is_idr = flags != 0 || pts == 0; + let mastering_sei = self + .hdr_meta + .map(|m| crate::hdr::hevc_mastering_display_sei(&m)); + let cll_sei = self + .hdr_meta + .map(|m| crate::hdr::hevc_content_light_level_sei(&m)); + let mut sei: Vec = Vec::new(); + if is_idr && self.hdr { + if let Some(p) = mastering_sei.as_ref() { + sei.push(nv::NV_ENC_SEI_PAYLOAD { + payloadSize: p.len() as u32, + payloadType: crate::hdr::SEI_TYPE_MASTERING_DISPLAY_COLOUR_VOLUME, + payload: p.as_ptr() as *mut u8, + }); + } + if let Some(p) = cll_sei.as_ref() { + sei.push(nv::NV_ENC_SEI_PAYLOAD { + payloadSize: p.len() as u32, + payloadType: crate::hdr::SEI_TYPE_CONTENT_LIGHT_LEVEL_INFO, + payload: p.as_ptr() as *mut u8, + }); + } + } + if !sei.is_empty() { + // Writing a union field is safe; the pointers/len are read during encode_picture. + match self.codec { + Codec::H265 => { + pic.codecPicParams.hevcPicParams.seiPayloadArray = sei.as_mut_ptr(); + pic.codecPicParams.hevcPicParams.seiPayloadArrayCnt = sei.len() as u32; + } + Codec::H264 => { + pic.codecPicParams.h264PicParams.seiPayloadArray = sei.as_mut_ptr(); + pic.codecPicParams.h264PicParams.seiPayloadArrayCnt = sei.len() as u32; + } + // AV1 mastering/CLL ride METADATA OBUs, not SEI — separate follow-up. + Codec::Av1 => {} + } + } (API.encode_picture)(self.encoder, &mut pic) .result_without_string() .map_err(|e| anyhow!("encode_picture: {e:?}"))?; @@ -649,6 +732,12 @@ impl Encoder for NvencD3d11Encoder { self.force_kf = true; } + fn set_hdr_meta(&mut self, meta: Option) { + // Stored and emitted as in-band SEI on the next keyframe (see `submit`). Cheap to call every + // frame; only changes when the source is regraded or HDR toggles. + self.hdr_meta = meta; + } + fn invalidate_ref_frames(&mut self, first: i64, last: i64) -> bool { // No live session, the GPU can't invalidate, or a nonsense range → caller forces a full IDR. // (NVENC handles are single-threaded; this runs on the encode thread, like submit/poll.) diff --git a/crates/punktfunk-host/src/gamestream/cert.rs b/crates/punktfunk-host/src/gamestream/cert.rs index 94d5a3d..02094ea 100644 --- a/crates/punktfunk-host/src/gamestream/cert.rs +++ b/crates/punktfunk-host/src/gamestream/cert.rs @@ -33,12 +33,16 @@ impl ServerIdentity { (Ok(c), Ok(k)) if !c.trim().is_empty() && !k.trim().is_empty() => (c, k), _ => { let (c, k) = generate()?; - fs::create_dir_all(&dir).ok(); - fs::write(&cert_path, &c) - .with_context(|| format!("write {}", cert_path.display()))?; - fs::write(&key_path, &k) + // The private key is the trust root for EVERY surface (TLS server cert, pairing + // signing, the QUIC identity clients pin) — write it owner-only (0600 / SYSTEM-only + // DACL) so a local user can't read it and impersonate the host. The dir is 0700. + super::create_private_dir(&dir).ok(); + super::write_secret_file(&key_path, k.as_bytes()) .with_context(|| format!("write {}", key_path.display()))?; - tracing::info!(path = %cert_path.display(), "generated punktfunk host certificate (RSA-2048)"); + // The cert is public (handed to clients), but write it owner-only too for consistency. + super::write_secret_file(&cert_path, c.as_bytes()) + .with_context(|| format!("write {}", cert_path.display()))?; + tracing::info!(path = %cert_path.display(), "generated punktfunk host certificate (RSA-2048, key 0600)"); (c, k) } }; diff --git a/crates/punktfunk-host/src/gamestream/mod.rs b/crates/punktfunk-host/src/gamestream/mod.rs index 1d6c6a5..d4d1983 100644 --- a/crates/punktfunk-host/src/gamestream/mod.rs +++ b/crates/punktfunk-host/src/gamestream/mod.rs @@ -232,6 +232,91 @@ pub(crate) fn config_dir() -> PathBuf { base.join("punktfunk") } +/// Create `dir` (and parents) owner-private — **0700** on Unix (so the host's secrets aren't readable +/// by other local users via a traversable config path). Best-effort on Windows: the dir inherits the +/// (Users-readable) `%ProgramData%` ACL, so secret *files* are individually locked down by +/// [`write_secret_file`]. Tightens an already-existing dir too. +pub(crate) fn create_private_dir(dir: &std::path::Path) -> std::io::Result<()> { + #[cfg(unix)] + { + use std::os::unix::fs::{DirBuilderExt, PermissionsExt}; + let r = std::fs::DirBuilder::new() + .recursive(true) + .mode(0o700) + .create(dir); + // `recursive` doesn't re-chmod an existing dir — tighten it so an old 0755 dir gets locked. + if dir.exists() { + let _ = std::fs::set_permissions(dir, std::fs::Permissions::from_mode(0o700)); + } + r + } + #[cfg(not(unix))] + { + std::fs::create_dir_all(dir) + } +} + +/// Write `contents` to `path` as an **owner-only secret**: created and re-chmod'd **0600** on Unix +/// (never even briefly group/world-readable), and DACL-restricted to SYSTEM/Administrators/owner on +/// Windows (the default `%ProgramData%` ACL is Users-readable). Mirrors the mgmt-token hardening; used +/// for the host private key and the persisted trust stores so a local unprivileged user can neither +/// read the key (impersonation) nor tamper with the paired allow-list (unauthorized pairing). +pub(crate) fn write_secret_file(path: &std::path::Path, contents: &[u8]) -> std::io::Result<()> { + use std::io::Write; + let mut opts = std::fs::OpenOptions::new(); + opts.write(true).create(true).truncate(true); + #[cfg(unix)] + { + use std::os::unix::fs::OpenOptionsExt; + opts.mode(0o600); + } + let mut f = opts.open(path)?; + f.write_all(contents)?; + f.flush()?; + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + let _ = std::fs::set_permissions(path, std::fs::Permissions::from_mode(0o600)); + } + #[cfg(windows)] + restrict_to_system_admins(path); + Ok(()) +} + +/// Best-effort Windows DACL lockdown of a secret file: strip inherited ACEs and grant Full only to +/// SYSTEM, Administrators, and OWNER RIGHTS (the creating account — the SYSTEM service or a manually +/// running user keeps access). Without this the host key under the default Users-readable +/// `%ProgramData%` ACL is readable by ANY local user. Uses `icacls` with hard-coded SIDs +/// (locale-independent) via the absolute `%SystemRoot%` path (a privileged service must not trust +/// `PATH`). Never fatal — on failure the file is simply left at the inherited ACL (today's behaviour). +#[cfg(windows)] +fn restrict_to_system_admins(path: &std::path::Path) { + let icacls = std::env::var("SystemRoot") + .map(|r| format!("{r}\\System32\\icacls.exe")) + .unwrap_or_else(|_| "icacls".to_string()); + let status = std::process::Command::new(icacls) + .arg(path.as_os_str()) + .args([ + "/inheritance:r", + "/grant:r", + "*S-1-5-18:(F)", // NT AUTHORITY\SYSTEM + "/grant:r", + "*S-1-5-32-544:(F)", // BUILTIN\Administrators + "/grant:r", + "*S-1-3-4:(F)", // OWNER RIGHTS + ]) + .stdout(std::process::Stdio::null()) + .stderr(std::process::Stdio::null()) + .status(); + match status { + Ok(s) if s.success() => {} + _ => tracing::warn!( + path = %path.display(), + "icacls hardening did not succeed — this secret may be readable by other local users" + ), + } +} + fn hostname_string() -> String { #[cfg(target_os = "windows")] if let Some(n) = std::env::var_os("COMPUTERNAME") { @@ -304,7 +389,7 @@ fn load_paired() -> Vec> { pub(crate) fn save_paired(paired: &[Vec]) { let Some(path) = paired_path() else { return }; if let Some(dir) = path.parent() { - let _ = std::fs::create_dir_all(dir); + let _ = create_private_dir(dir); } let bytes = match serde_json::to_vec(paired) { Ok(b) => b, @@ -313,10 +398,10 @@ pub(crate) fn save_paired(paired: &[Vec]) { return; } }; - // Write to a sibling temp file, then rename over the target (atomic replace on Unix and - // Windows). Never write `path` in place. + // Write to a sibling temp file (owner-only, so a local user can't tamper the allow-list), then + // rename over the target (atomic replace on Unix and Windows). Never write `path` in place. let tmp = path.with_extension("json.tmp"); - if let Err(e) = std::fs::write(&tmp, &bytes) { + if let Err(e) = write_secret_file(&tmp, &bytes) { tracing::warn!(error = %e, "persisting pairings failed (temp write)"); return; } @@ -325,3 +410,29 @@ pub(crate) fn save_paired(paired: &[Vec]) { let _ = std::fs::remove_file(&tmp); } } + +#[cfg(all(test, unix))] +mod tests { + use super::{create_private_dir, write_secret_file}; + use std::os::unix::fs::PermissionsExt; + + #[test] + fn secrets_are_written_owner_only() { + let dir = std::env::temp_dir().join(format!("pf-secret-test-{}", std::process::id())); + let _ = std::fs::remove_dir_all(&dir); + create_private_dir(&dir).expect("create private dir"); + let dmode = std::fs::metadata(&dir).unwrap().permissions().mode() & 0o777; + assert_eq!(dmode, 0o700, "config dir must be owner-only (0700)"); + + let key = dir.join("key.pem"); + write_secret_file(&key, b"-----BEGIN PRIVATE KEY-----\n...").expect("write secret"); + let fmode = std::fs::metadata(&key).unwrap().permissions().mode() & 0o777; + assert_eq!(fmode, 0o600, "private key must be owner-only (0600)"); + + // Overwriting an existing secret keeps it 0600 (the truncate+reopen path). + write_secret_file(&key, b"new contents").expect("rewrite secret"); + let fmode = std::fs::metadata(&key).unwrap().permissions().mode() & 0o777; + assert_eq!(fmode, 0o600); + let _ = std::fs::remove_dir_all(&dir); + } +} diff --git a/crates/punktfunk-host/src/gamestream/rtsp.rs b/crates/punktfunk-host/src/gamestream/rtsp.rs index e81c0b4..3fb061a 100644 --- a/crates/punktfunk-host/src/gamestream/rtsp.rs +++ b/crates/punktfunk-host/src/gamestream/rtsp.rs @@ -272,7 +272,20 @@ fn stream_config(map: &HashMap) -> Option { let parse_u = |k: &str| map.get(k).and_then(|s| s.trim().parse::().ok()); let width = parse_u("x-nv-video[0].clientViewportWd")?; let height = parse_u("x-nv-video[0].clientViewportHt")?; + // packetSize is attacker-controlled and PRE-AUTH (the RTSP listener is unauthenticated). It sets + // the per-shard payload (`packet_size - 16`); a tiny value underflows / div-by-zeros the video + // thread, an absurd one amplifies per-shard allocation. Reject anything outside a sane range + // (real Moonlight uses ~1024) so a malformed ANNOUNCE fails here instead of panicking the stream. + const PACKET_SIZE_MIN: usize = 64; + const PACKET_SIZE_MAX: usize = 2048; let packet_size = parse_u("x-nv-video[0].packetSize")? as usize; + if !(PACKET_SIZE_MIN..=PACKET_SIZE_MAX).contains(&packet_size) { + tracing::warn!( + packet_size, + "RTSP ANNOUNCE: out-of-range packetSize — rejecting" + ); + return None; + } let fps = parse_u("x-nv-video[0].maxFPS") .filter(|&f| f > 0) .unwrap_or(60); @@ -424,6 +437,27 @@ mod tests { assert!(stream_config(&map).is_none()); } + /// packetSize is attacker-controlled AND pre-auth (the RTSP listener is unauthenticated), so an + /// out-of-range value must be rejected here rather than panic the video thread (≤16 → div-by-zero + /// / underflow; absurd → allocation amplification). Sane values (real Moonlight ~1024) pass. + #[test] + fn announce_rejects_out_of_range_packet_size() { + for bad in ["0", "16", "63", "4096", "999999"] { + let map = announce(&[("x-nv-video[0].packetSize", bad)]); + assert!( + stream_config(&map).is_none(), + "out-of-range packetSize {bad} must be rejected" + ); + } + for ok in ["64", "1024", "1392", "2048"] { + let map = announce(&[("x-nv-video[0].packetSize", ok)]); + assert!( + stream_config(&map).is_some(), + "in-range packetSize {ok} must be accepted" + ); + } + } + /// Audio negotiation: numChannels/AudioQuality/packetDuration, with Moonlight defaults. #[test] fn announce_audio_params() { diff --git a/crates/punktfunk-host/src/gamestream/video.rs b/crates/punktfunk-host/src/gamestream/video.rs index 70ce517..341aec1 100644 --- a/crates/punktfunk-host/src/gamestream/video.rs +++ b/crates/punktfunk-host/src/gamestream/video.rs @@ -55,7 +55,12 @@ impl VideoPacketizer { pub fn new(packet_size: usize, fec_percentage: u8, min_fec: u8) -> Self { VideoPacketizer { packet_size, - payload_per_shard: packet_size + 16 - SHARD_HEADER, + // Defense in depth: `pps` is a divisor in `packetize` (`% pps`, `div_ceil(pps)`), so it + // must never be 0. `blocksize = packet_size + 16`; a tiny attacker-supplied packet_size + // (≤ SHARD_HEADER-16 = 16) would otherwise underflow (panic) or yield pps==0 (div-by-zero). + // `stream_config` already rejects out-of-range packetSize; this saturating `.max(1)` makes + // a degenerate value structurally unable to panic, without affecting any valid size. + payload_per_shard: (packet_size + 16).saturating_sub(SHARD_HEADER).max(1), fec_percentage: fec_percentage as usize, min_fec: min_fec as usize, frame_index: 0, @@ -252,6 +257,18 @@ mod tests { } } + #[test] + fn degenerate_packet_size_does_not_panic() { + // A pre-auth attacker drives packetSize via the RTSP ANNOUNCE. `stream_config` rejects + // out-of-range values, but the packetizer must ALSO never panic (div-by-zero on `% pps` / + // `div_ceil(pps)`, or usize underflow) for ANY input — pps is clamped to >= 1. + for ps in [0usize, 15, 16, 17, 32] { + let mut pk = VideoPacketizer::new(ps, 20, 2); + assert!(pk.payload_per_shard >= 1, "pps must never be 0 (ps={ps})"); + let _ = pk.packetize(&[0xCDu8; 200], FrameType::Idr, 0); // must not panic + } + } + #[test] fn multi_block_split() { let mut pk = VideoPacketizer::new(1392, 0, 0); // data-only diff --git a/crates/punktfunk-host/src/hdr.rs b/crates/punktfunk-host/src/hdr.rs new file mode 100644 index 0000000..8a22a1b --- /dev/null +++ b/crates/punktfunk-host/src/hdr.rs @@ -0,0 +1,168 @@ +//! Pure HDR static-metadata helpers shared by the capture (source mastering metadata) and encode +//! (in-band SEI) paths — kept platform-independent and unit-tested here so the byte-level logic is +//! verified on every target, even though the only *callers* of the SEI builders are the Windows +//! NVENC path (`encode/nvenc.rs`) and of the display conversion the Windows DXGI/WGC capturers. +//! +//! Units follow the HDR10 standards so the values pass straight through: +//! - chromaticities in 1/50000 increments (SMPTE ST.2086 / DXGI `DXGI_HDR_METADATA_HDR10`), +//! - mastering luminance in 0.0001 cd/m², +//! - content light level (MaxCLL/MaxFALL) in cd/m² (nits). + +use punktfunk_core::quic::HdrMeta; + +/// HEVC/H.264 SEI payload type for `mastering_display_colour_volume` (SMPTE ST.2086). Same code +/// point in AVC and HEVC. +pub const SEI_TYPE_MASTERING_DISPLAY_COLOUR_VOLUME: u32 = 137; +/// HEVC/H.264 SEI payload type for `content_light_level_info` (CEA-861.3 MaxCLL/MaxFALL). +pub const SEI_TYPE_CONTENT_LIGHT_LEVEL_INFO: u32 = 144; + +/// Quantize a CIE xy chromaticity coordinate (0.0..=1.0) to ST.2086 1/50000 units. +fn xy_to_2086(v: f32) -> u16 { + (v * 50000.0).round().clamp(0.0, 65535.0) as u16 +} + +/// Build an [`HdrMeta`] from a source display's measured colour volume — the chromaticities (CIE xy) +/// and luminances (cd/m²) reported by e.g. Windows `IDXGIOutput6::GetDesc1`. `max_cll`/`max_fall` +/// are content light levels in nits; pass `0` when unknown (GetDesc1 doesn't expose them — Apollo +/// zeroes them too, and a `0` lets the display fall back to the mastering luminance). +#[allow(clippy::too_many_arguments)] +pub fn hdr_meta_from_display( + red: (f32, f32), + green: (f32, f32), + blue: (f32, f32), + white: (f32, f32), + max_mastering_nits: f32, + min_mastering_nits: f32, + max_cll: u16, + max_fall: u16, +) -> HdrMeta { + HdrMeta { + // ST.2086 stores primaries in G, B, R order. + display_primaries: [ + [xy_to_2086(green.0), xy_to_2086(green.1)], + [xy_to_2086(blue.0), xy_to_2086(blue.1)], + [xy_to_2086(red.0), xy_to_2086(red.1)], + ], + white_point: [xy_to_2086(white.0), xy_to_2086(white.1)], + max_display_mastering_luminance: (max_mastering_nits.max(0.0) * 10_000.0).round() as u32, + min_display_mastering_luminance: (min_mastering_nits.max(0.0) * 10_000.0).round() as u32, + max_cll, + max_fall, + } +} + +/// A generic HDR10 default (BT.2020 primaries, D65 white, 1000-nit mastering, MaxCLL 1000 / +/// MaxFALL 400) — the baseline a host sends until it reads the source display's real mastering +/// metadata, and the values clients used to hardcode. +pub fn generic_hdr10() -> HdrMeta { + HdrMeta { + display_primaries: [[8500, 39850], [6550, 2300], [35400, 14600]], // BT.2020 G, B, R + white_point: [15635, 16450], // D65 + max_display_mastering_luminance: 10_000_000, // 1000 nits + min_display_mastering_luminance: 1, // 0.0001 nits + max_cll: 1000, + max_fall: 400, + } +} + +/// The `mastering_display_colour_volume` SEI payload (HEVC/H.264 type +/// [`SEI_TYPE_MASTERING_DISPLAY_COLOUR_VOLUME`]) — 24 bytes, big-endian (SEI RBSP order), in G/B/R +/// primary order per ST.2086. Pass this raw payload to NVENC's `NV_ENC_SEI_PAYLOAD` (NVENC wraps it +/// in the SEI NAL). +pub fn hevc_mastering_display_sei(m: &HdrMeta) -> [u8; 24] { + let mut b = [0u8; 24]; + let mut o = 0; + let mut put16 = |v: u16| { + b[o..o + 2].copy_from_slice(&v.to_be_bytes()); + o += 2; + }; + for p in m.display_primaries.iter() { + put16(p[0]); + put16(p[1]); + } + put16(m.white_point[0]); + put16(m.white_point[1]); + let mut put32 = |v: u32| { + b[o..o + 4].copy_from_slice(&v.to_be_bytes()); + o += 4; + }; + put32(m.max_display_mastering_luminance); + put32(m.min_display_mastering_luminance); + debug_assert_eq!(o, 24); + b +} + +/// The `content_light_level_info` SEI payload (HEVC/H.264 type +/// [`SEI_TYPE_CONTENT_LIGHT_LEVEL_INFO`]) — 4 bytes, big-endian: MaxCLL then MaxFALL. +pub fn hevc_content_light_level_sei(m: &HdrMeta) -> [u8; 4] { + let mut b = [0u8; 4]; + b[0..2].copy_from_slice(&m.max_cll.to_be_bytes()); + b[2..4].copy_from_slice(&m.max_fall.to_be_bytes()); + b +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn display_conversion_bt2020_1000nit() { + // BT.2020 primaries + D65 white, a 1000-nit / 0.0001-nit mastering display. + let m = hdr_meta_from_display( + (0.708, 0.292), // red + (0.170, 0.797), // green + (0.131, 0.046), // blue + (0.3127, 0.3290), // D65 + 1000.0, + 0.0001, + 0, + 0, + ); + // ST.2086 G, B, R order, 1/50000 units. + assert_eq!( + m.display_primaries, + [[8500, 39850], [6550, 2300], [35400, 14600]] + ); + assert_eq!(m.white_point, [15635, 16450]); + assert_eq!(m.max_display_mastering_luminance, 10_000_000); // 1000 * 10000 + assert_eq!(m.min_display_mastering_luminance, 1); // 0.0001 * 10000 + assert_eq!((m.max_cll, m.max_fall), (0, 0)); + } + + #[test] + fn mastering_sei_is_24_bytes_big_endian_gbr() { + let m = generic_hdr10(); + let p = hevc_mastering_display_sei(&m); + assert_eq!(p.len(), 24); + // First field = green.x = 8500 = 0x2134, big-endian. + assert_eq!(&p[0..2], &8500u16.to_be_bytes()); + assert_eq!(&p[2..4], &39850u16.to_be_bytes()); // green.y + assert_eq!(&p[4..6], &6550u16.to_be_bytes()); // blue.x + assert_eq!(&p[12..14], &15635u16.to_be_bytes()); // white.x + assert_eq!(&p[16..20], &10_000_000u32.to_be_bytes()); // max lum + assert_eq!(&p[20..24], &1u32.to_be_bytes()); // min lum + } + + #[test] + fn cll_sei_is_4_bytes_big_endian() { + let m = generic_hdr10(); + let p = hevc_content_light_level_sei(&m); + assert_eq!(p, [0x03, 0xE8, 0x01, 0x90]); // 1000, 400 big-endian + } + + #[test] + fn clamps_out_of_range() { + let m = hdr_meta_from_display( + (2.0, 2.0), + (0.0, 0.0), + (0.0, 0.0), + (0.5, 0.5), + -5.0, + 0.0, + 0, + 0, + ); + assert_eq!(m.display_primaries[2], [65535, 65535]); // red clamped + assert_eq!(m.max_display_mastering_luminance, 0); // negative → 0 + } +} diff --git a/crates/punktfunk-host/src/main.rs b/crates/punktfunk-host/src/main.rs index 717d463..23a485d 100644 --- a/crates/punktfunk-host/src/main.rs +++ b/crates/punktfunk-host/src/main.rs @@ -23,6 +23,7 @@ mod dmabuf_fence; mod drm_sync; mod encode; mod gamestream; +mod hdr; mod inject; mod library; mod mgmt; diff --git a/crates/punktfunk-host/src/native_pairing.rs b/crates/punktfunk-host/src/native_pairing.rs index f913f8a..04ae152 100644 --- a/crates/punktfunk-host/src/native_pairing.rs +++ b/crates/punktfunk-host/src/native_pairing.rs @@ -115,12 +115,13 @@ fn load(path: &std::path::Path) -> PairedClients { fn save(state: &PairedState) -> Result<()> { if let Some(dir) = state.path.parent() { - std::fs::create_dir_all(dir)?; + crate::gamestream::create_private_dir(dir)?; } // Atomic replace: a crash/full-disk mid-write must not truncate the trust store (which would - // silently lock out every paired client on a --require-pairing host). Temp + rename. + // silently lock out every paired client on a --require-pairing host). Temp + rename. The temp is + // written owner-only so a local user can't inject a fingerprint to pair themselves. let tmp = state.path.with_extension("json.tmp"); - std::fs::write(&tmp, serde_json::to_vec_pretty(&state.clients)?)?; + crate::gamestream::write_secret_file(&tmp, &serde_json::to_vec_pretty(&state.clients)?)?; std::fs::rename(&tmp, &state.path)?; Ok(()) } diff --git a/crates/punktfunk-host/src/punktfunk1.rs b/crates/punktfunk-host/src/punktfunk1.rs index 246560f..f3f9998 100644 --- a/crates/punktfunk-host/src/punktfunk1.rs +++ b/crates/punktfunk-host/src/punktfunk1.rs @@ -27,9 +27,9 @@ use punktfunk_core::config::{CompositorPref, FecConfig, FecScheme, GamepadPref, use punktfunk_core::input::{InputEvent, InputKind}; use punktfunk_core::packet::{FLAG_PIC, FLAG_PROBE, FLAG_SOF}; use punktfunk_core::quic::{ - endpoint, io, ClockEcho, ClockProbe, Hello, LossReport, PairChallenge, PairProof, PairRequest, - PairResult, ProbeRequest, ProbeResult, Reconfigure, Reconfigured, RequestKeyframe, Start, - Welcome, + endpoint, io, ClockEcho, ClockProbe, ColorInfo, Hello, LossReport, PairChallenge, PairProof, + PairRequest, PairResult, ProbeRequest, ProbeResult, Reconfigure, Reconfigured, RequestKeyframe, + Start, Welcome, }; use punktfunk_core::transport::UdpTransport; use punktfunk_core::Session; @@ -418,6 +418,17 @@ async fn pair_ceremony( ) .await?; + // SINGLE-USE PIN: we've now sent the host key-confirmation, which lets the client TEST this one + // guess (a right PIN → its proof will match; a wrong PIN → the client detects the mismatch and + // aborts *without* sending its proof). So consume the PIN HERE — before reading the proof — + // regardless of the outcome: an attacker gets EXACTLY ONE online guess (the documented guarantee), + // not an unbounded brute-force of the 4-digit space against a static, never-rotating PIN. A + // malformed request that errored at `pake.finish` above never reached here, so it doesn't burn the + // window (no DoS from garbage). The operator re-arms (web console / restart) for the next device — + // including after a successful pair; the protocol gives no reliable host-observable "wrong PIN" + // signal to scope this to failures only (the client just disconnects). + np.disarm(); + let proof = tokio::time::timeout(PAIRING_TIMEOUT, io::read_msg(&mut recv)) .await .map_err(|_| anyhow!("pairing timed out waiting for the client's confirmation"))??; @@ -640,6 +651,16 @@ async fn serve_session( gamepad, bitrate_kbps, bit_depth, + // Colour signalling the client configures its decoder/presenter from. A negotiated + // 10-bit session is our HDR path (BT.2020 PQ — what the NVENC HEVC VUI emits from a + // 10-bit capture format); 8-bit stays BT.709 SDR. The mastering metadata (ST.2086 + + // CLL) rides the 0xCE datagram below. (A future step can refine this to the capturer's + // actual monitor HDR state and announce a mid-stream flip.) + color: if bit_depth >= 10 { + ColorInfo::HDR10_BT2020_PQ + } else { + ColorInfo::SDR_BT709 + }, }; io::write_msg(&mut send, &welcome.encode()).await?; @@ -842,6 +863,17 @@ async fn serve_session( None }; + // HDR static metadata (ST.2086 mastering + CEA-861.3 content light level), host → client, sent + // once at session start when an HDR session was negotiated, as a generic HDR10 baseline. The + // virtual-source stream loop then sends the source display's REAL mastering metadata (Windows + // GetDesc1) as soon as capture starts and re-sends it on keyframes; the client applies the + // latest it receives. This baseline covers the synthetic source and the pre-capture gap. + if welcome.color.is_hdr() { + let meta = crate::hdr::generic_hdr10(); + let _ = conn.send_datagram(punktfunk_core::quic::encode_hdr_meta_datagram(&meta).into()); + tracing::info!("sent HDR10 static metadata (0xCE; generic baseline)"); + } + // Test hook (synthetic source only): a scripted feedback burst on the host→client // planes — rumble (0xCA) + DualSense HID-output (0xCD) — so loopback tests can assert // the client's feedback path without a real game writing output reports to a real pad. @@ -882,6 +914,7 @@ async fn serve_session( let bit_depth = welcome.bit_depth; // resolved encode bit depth (8, or 10 when negotiated) let stop_stream = stop.clone(); let fec_target_dp = fec_target.clone(); // data-plane handle to the adaptive-FEC target + let conn_stream = conn.clone(); // for sending the source's real HDR metadata (0xCE) mid-stream let result: Result<()> = async { tokio::task::spawn_blocking(move || -> Result<()> { // Wait briefly for the client to hole-punch our data port, then stream to its OBSERVED @@ -935,6 +968,7 @@ async fn serve_session( probe_rx, probe_result_tx, fec_target_dp, + conn_stream, ) } } @@ -2041,6 +2075,7 @@ fn virtual_stream( probe_rx: std::sync::mpsc::Receiver, probe_result_tx: tokio::sync::mpsc::UnboundedSender, fec_target: Arc, + conn: quinn::Connection, ) -> Result<()> { // This thread runs the capture+encode loop (single-process: Linux / synthetic / NO_WGC DDA) — or // tail-calls the relay below. Elevate it so a CPU-heavy game can't deschedule our GPU submission. @@ -2064,6 +2099,7 @@ fn virtual_stream( probe_rx, probe_result_tx, fec_target, + conn, ); } tracing::info!( @@ -2150,6 +2186,8 @@ fn virtual_stream( let mut cur_mode = mode; const MAX_CAPTURE_REBUILDS: u32 = 5; let mut capture_rebuilds: u32 = 0; + // Last HDR mastering metadata we forwarded — re-sent as 0xCE on change/keyframe (see below). + let mut last_hdr_meta: Option = None; while !stop.load(Ordering::SeqCst) && std::time::Instant::now() < deadline { // Mid-stream session switch (the box flipped Gaming↔Desktop): rebuild the WHOLE backend in // place — a different compositor at the SAME client mode — keeping the Session + send thread @@ -2285,6 +2323,16 @@ fn virtual_stream( next = std::time::Instant::now(); } } + // The source's static HDR mastering metadata (Windows GetDesc1; None on Linux/SDR) is the + // single source of truth: hand it to the encoder (in-band SEI on keyframes) and, when it + // changes, to the client (0xCE). Re-sent on each keyframe below so a dropped best-effort + // datagram converges within a GOP. + let hdr_meta = capturer.hdr_meta(); + enc.set_hdr_meta(hdr_meta); + let mut resend_meta = hdr_meta != last_hdr_meta; + if resend_meta { + last_hdr_meta = hdr_meta; + } let capture_ns = now_ns(); enc.submit(&frame).context("encoder submit")?; // The deadline for this frame's packets (the next frame's due time); the send thread paces @@ -2297,6 +2345,15 @@ fn virtual_stream( } else { FLAG_PIC as u32 }; + // Re-send the HDR mastering metadata (0xCE) on each keyframe (a decoder-resync point) and + // whenever it changed, so a client that dropped the best-effort datagram re-converges. + if let Some(m) = last_hdr_meta { + if au.keyframe || resend_meta { + let _ = conn + .send_datagram(punktfunk_core::quic::encode_hdr_meta_datagram(&m).into()); + resend_meta = false; + } + } let encode_us = (now_ns().saturating_sub(capture_ns) / 1000) as u32; let msg = FrameMsg { data: au.data, @@ -2368,6 +2425,9 @@ fn virtual_stream_relay( probe_rx: std::sync::mpsc::Receiver, probe_result_tx: tokio::sync::mpsc::UnboundedSender, fec_target: Arc, + // The SYSTEM-host relay path doesn't yet send the source mastering metadata as 0xCE — the + // helper's in-band SEI carries it (Windows follow-up). Held for that future wiring. + _conn: quinn::Connection, ) -> Result<()> { use crate::capture::dxgi::WinCaptureTarget; use crate::capture::wgc_relay::HelperRelay; @@ -3329,15 +3389,7 @@ mod tests { refresh_hz: 60, }; - // 1: wrong PIN → Crypto, nothing stored. - let err = NativeClient::pair("127.0.0.1", 19778, identity, "0000", "imposter", timeout) - .unwrap_err(); - assert!( - matches!(err, punktfunk_core::PunktfunkError::Crypto), - "{err:?}" - ); - - // 2: anonymous session on a pairing-required host → rejected (connect fails). + // 1: anonymous session on a pairing-required host → rejected (independent of the PIN window). assert!( NativeClient::connect( "127.0.0.1", @@ -3356,16 +3408,14 @@ mod tests { "anonymous session must be rejected" ); - // 3: correct PIN → paired, host fingerprint returned. Space past the pairing - // cooldown that the wrong-PIN attempt above just triggered (a real retry is slower). - std::thread::sleep(PAIRING_COOLDOWN + std::time::Duration::from_millis(200)); + // 2: correct PIN → paired, host fingerprint returned. The ONE online attempt CONSUMES the + // arming window (single-use), verified by step 4. let host_fp = NativeClient::pair("127.0.0.1", 19778, identity, "4321", "test-client", timeout) .expect("pairing with the right PIN"); assert!(test_paired_path().exists()); - let _ = std::fs::remove_file(test_paired_path()); // already loaded; tidy /tmp - // 4: the paired identity gets a session — pinned to the ceremony's fingerprint. + // 3: the paired identity gets a session — pinned to the ceremony's fingerprint. let client = NativeClient::connect( "127.0.0.1", 19778, @@ -3387,6 +3437,17 @@ mod tests { assert_ne!(client.resolved_gamepad, GamepadPref::Auto); drop(client); + // 4: SINGLE-USE PIN — the completed ceremony in step 2 consumed the arming window, so a + // second pairing attempt (even with the CORRECT PIN) is now rejected. This is the documented + // "one online guess" guarantee: an attacker can't brute-force the static 4-digit PIN. (The + // operator re-arms via the console / restart for the next device.) + std::thread::sleep(PAIRING_COOLDOWN + std::time::Duration::from_millis(200)); + assert!( + NativeClient::pair("127.0.0.1", 19778, identity, "4321", "too-late", timeout).is_err(), + "the PIN window must be single-use (one online guess)" + ); + let _ = std::fs::remove_file(test_paired_path()); // tidy /tmp + host.join().unwrap().unwrap(); } } diff --git a/docs/hdr-pipeline-plan.md b/docs/hdr-pipeline-plan.md new file mode 100644 index 0000000..4b40d44 --- /dev/null +++ b/docs/hdr-pipeline-plan.md @@ -0,0 +1,243 @@ +# HDR pipeline — investigation & implementation plan + +Goal: **true, correct HDR glass-to-glass** for punktfunk, across the host (Windows today; +Linux blocked upstream) and every client (Windows / Apple / Android / Linux). + +This is an audit of the current state, the gap list, and a phased plan. It was produced from +a full read of every HDR-touching subsystem cross-checked against the HDR10 standards +(CICP/H.273 VUI, SMPTE ST.2086 mastering, CEA-861.3 MaxCLL/MaxFALL) and the +Sunshine/Apollo/Moonlight reference implementation. + +> Status legend: **blocker** (HDR can't work) · **correctness** (HDR works but looks wrong) · +> **quality** (correct-ish, missing refinement) · **ok**. + +--- + +## TL;DR + +Our HDR is **correct in isolated islands but broken end-to-end.** The pixel math and the HEVC +VUI we *do* emit are right (self-test validated, matches Apollo). What's missing is the +**metadata chain**: nothing measures, signals, transports, or applies the *static HDR metadata* +(mastering display colour volume + content light level) that tells a display how to tone-map — +so every client hardcodes generic values or infers from the bitstream, and one line +(`abi.rs:896`, `video_caps = 0`) makes the entire (correct) Apple HDR pipeline dead code. + +--- + +## What's already correct (the islands) + +| Stage | Where | +|---|---| +| Windows host HEVC **VUI** — primaries=9 (BT.2020) / transfer=16 (PQ) / matrix=9 (BT.2020-NCL) / limited range | `encode/nvenc.rs:307-316` | +| Windows host **scRGB→BT.2020 PQ** shader (×80 nits → BT.709→2020 → ST.2084 OETF, 10000-nit peak) | `capture/dxgi.rs` — self-test `<1` code error, matches Apollo | +| Windows client **P010 decode + YUV→RGB** (BT.2020-NCL, limited→full) + **R10G10B10A2 / G2084-P2020 swapchain** | `present.rs:66-77, 320-370` | +| Android client **Main10 decode + reactive DataSpace** (BT2020-PQ/HLG) | `decode.rs:210-227` | +| Apple client decode/present **code** (P010 VideoToolbox, BT.2020 PQ Metal, `itur_2100_PQ` + EDR) | correct — but never runs (blocker #2) | + +## Gap list + +### Blockers +1. **No color-metadata transport in the protocol** *(the keystone).* The wire carries only + `Hello.video_caps` (10BIT/HDR bits) and `Welcome.bit_depth` (8/10) — `quic.rs:127-128` + explicitly defers color. No primaries/transfer/matrix/range, **no ST.2086 mastering, no + MaxCLL/MaxFALL**. ST.2086/CLL host→client is impossible by construction today. +2. **C ABI hardcodes `video_caps = 0`** (`abi.rs:896`) → Apple's complete HDR pipeline is dead + code; no ABI embedder can request HDR. One-line root cause. +3. **H.264 and AV1 emit zero color signaling on Windows** — the `if self.hdr` VUI block in + `nvenc.rs` only writes `hevcConfig`. Any H.264+10-bit or AV1+HDR stream decodes as BT.709 SDR. + *(AV1 is **not** a "copy the HEVC VUI" fix — AV1 has no VUI/SEI; it carries + primaries/transfer/matrix in the sequence-header `color_config` and mastering/CLL in + **METADATA OBUs** `HDR_MDCV`/`HDR_CLL`. Verify whether NVENC's AV1 path accepts them.)* +4. **Linux host is 8-bit only end to end** — capture offers only 8-bit PipeWire formats + (`capture/linux.rs:443-453, 594-654`; gamescope #2126, portals don't wire PipeWire 1.6 + BT.2020/PQ); encode downgrades 10-bit (`encode/linux.rs:153-162` TODO, `vaapi.rs:719`) with + BT.709 hardcoded. The Windows-style 8-bit→Main10 upconvert shim is not implemented here. +5. **Linux client HDR is a complete non-feature** — `video_caps=0`, P010 decode path dead + (`video.rs:379`), CICP hardcoded BT.709 (`ui_stream.rs:239-243`), no Wayland + color-management (GTK4 0.11 too old). + +### Correctness +6. **No host ever emits the ST.2086 mastering or CEA-861.3 CLL SEI.** Windows never reads + `IDXGIOutput6::GetDesc1`; `nvenc.rs` never builds an `NV_ENC_SEI_PAYLOAD`; Linux attaches no + libavcodec `side_data`. Apollo reads `GetDesc1` and attaches it. +7. **Clients hardcode mastering metadata.** `present.rs:584-595` ships fixed + `1000-nit / MaxCLL 1000 / MaxFALL 400` (with the literal "the protocol doesn't carry the + stream's real mastering metadata yet" comment). Apple/Android set none. +8. **HDR→SDR tone-mapping is unaddressed — and it's the common case.** Most client displays are + SDR. No client queries display peak; silent `SetColorSpace1`/`SetHDRMetaData` failures present + PQ as SDR gamma (crushed/dark). We lean entirely on OS auto-fallback. +9. **Windows secure desktop drops HDR to SDR** on lock/UAC (`dxgi.rs:325-368`, + `sudovda.rs:234-277`). +10. **GameStream silently streams SDR** on a Moonlight HDR request (`mod.rs:48-56`, + `rtsp.rs:288-293`) — logged, but no negotiated error. Real Apollo parity needs the Moonlight + `SS_HDR_METADATA` blob on the **ENet control channel**, not just in-band. +11. **Linux client software path is color-wrong even for SDR** — BT.601 applied to BT.709 + (`video.rs:162-167`, no `color_state` on the texture). Standalone bug. + +### Quality +12. No per-content MaxCLL/MaxFALL (`GetDesc1` doesn't expose it). No encoder-CSC-range vs + signaled-range reconciliation (black-crush risk). No automated 10-bit test — `probe` never + even reads `Welcome.bit_depth` (`main.rs:396-406`). + +### Out of scope (call out, don't build) +- Dynamic metadata: HDR10+ (ST.2094-40) and Dolby Vision RPU. We handle *static* ST.2086 only, + with mid-stream changes carried by re-sending the static block (below). +- HLG: the colorimetry transfer enum carries `18` from day one (free), but the `0xCE` mastering + datagram is **omitted for HLG** (scene-referred, no mastering metadata). + +--- + +## Protocol design (the keystone — pure-additive, hardware-free, CI-testable) + +Two layers, both back-compat-safe via the established trailing-bytes / new-datagram-tag patterns. + +### (A) Per-session colorimetry — 4 trailing bytes on `Welcome` + +After the existing `bit_depth` (offset 59), append a fixed 4-byte CICP block at offsets 60..64. +(A future mirror on `Reconfigured` will announce a mid-stream SDR↔HDR / BT.709↔BT.2020 flip on the +control stream we already use for renegotiation — deferred to Step 1 with the mid-stream-flip work; +today a mode switch never changes the colour, and the `0xCE` re-send covers mastering changes.) + +``` +[60] colour_primaries (CICP: 1=BT.709, 9=BT.2020) +[61] transfer_characteristics (1=BT.709, 16=PQ/SMPTE2084, 18=HLG) +[62] matrix_coeffs (1=BT.709, 9=BT.2020-NCL) ← never emit 10 (CL): no client decodes it +[63] video_full_range_flag (0=limited, 1=full) +``` + +Decode with `b.get(60).unwrap_or(1)` etc. — an older host omits them → BT.709 limited SDR +(today's behavior). `Welcome` stays `Copy`. Modeled as a `ColorInfo` struct on the wire types +and exposed on `NativeClient` (with `bit_depth`) so clients *know* the colorimetry instead of +inferring it. + +### (B) Per-change mastering + CLL — a new host→client datagram, tag `0xCE` + +ST.2086 is variable and changes mid-stream, so it rides a datagram (next tag after `0xCD` +HIDOUT), demuxed in `client.rs` like AUDIO/RUMBLE/HIDOUT. 28 bytes, standard SEI fixed-point: + +``` +[0] = 0xCE +G.x G.y B.x B.y R.x R.y 6 × u16 LE display primaries, 1/50000 units +wp.x wp.y 2 × u16 LE white point, 1/50000 units +max_display_mastering_luminance u32 LE 0.0001 cd/m² +min_display_mastering_luminance u32 LE 0.0001 cd/m² +max_cll u16 LE nits +max_fall u16 LE nits +``` + +- Sent on session start and whenever `GetDesc1`/source mastering changes; **re-sent on every + IDR/RFI keyframe** so a client that dropped the (best-effort) datagram converges within a GOP. + Until first receipt the client uses the Welcome transfer + a documented generic default. +- **Bounds-check length before reading** (reassembler-bounds security invariant) — truncation + test required. +- **Omitted entirely for HLG.** +- Units note: these map straight to DXGI `DXGI_HDR_METADATA_HDR10`, Android `KEY_HDR_STATIC_INFO`, + and Apple `CAEDRMetadata.hdr10`. On the **libavcodec/Linux** side they need conversion — + `AVMasteringDisplayMetadata` stores `AVRational`, not raw fixed-point. + +### (C) C ABI + +- `punktfunk_connect_ex5(... video_caps: u8)` (ex4 delegates with 0); **fix `abi.rs:896`.** +- `punktfunk_connection_next_hdr_meta(c, *mut PunktfunkHdrMeta, timeout_ms)` — new plane, + one-puller contract like `next_audio`. +- `punktfunk_connection_color_info(c, *mut prim, *mut trc, *mut matrix, *mut range, *mut bit_depth)`. +- Regenerate `include/punktfunk_core.h` (cbindgen); `struct_size`/repr(C) guards on new structs. + +--- + +## Phases + +### Step 0 — Protocol + ABI carry color metadata end to end *(this change)* +The dominant cross-cutting blocker; everything else is downstream. No rendering changes, no +hardware, CI-testable. + +- **core:** `ColorInfo` + 4 Welcome bytes; `HdrMeta` + `0xCE` codec (bounds-checked); + `NativeClient` `color`/`bit_depth` fields + HdrMeta receiver + demux + `next_hdr_meta`. +- **C ABI:** `connect_ex5`, `next_hdr_meta`, `color_info`, fix caps=0; regen header. +- **host:** populate `Welcome.color` from the negotiated bit-depth/HDR decision; send a `0xCE` + (generic default for now) when HDR is negotiated. +- **clients:** Windows/Android inherit the demux via shared core; Apple flips to `ex5`. +- **validation:** `quic.rs` round-trip + truncation + **SDR back-compat** tests; `probe` logs + `bit_depth` + colorimetry; loopback asserts a 10-bit Welcome carries trc=16 and a `0xCE` lands. + +### Step 1 — Host emits correct in-band SEI + complete VUI on all codecs *(landed; RTX-validation pending)* +In-band SEI is read directly by decoders, so it fixes correctness even before clients consume +the protocol, and gives an Apollo/Moonlight on-glass parity gate. + +- **Single source of truth:** the capturer learns the source display's mastering metadata and + exposes it via `Capturer::hdr_meta() -> Option`. The stream loop forwards it to the + encoder (`Encoder::set_hdr_meta` → in-band SEI) **and** the client (real `0xCE`, re-sent on each + keyframe). Pure byte-level logic (float→fixed conversion + the HEVC/H.264 SEI payloads) lives in + the unit-tested, cross-platform `src/hdr.rs` (`hdr_meta_from_display`, `hevc_mastering_display_sei` + type **137**, `hevc_content_light_level_sei` type **144** — note: NOT "type 4", that was a + drafting error). +- **Windows (done, CI-compiled / RTX on-glass pending):** `dxgi.rs` + `wgc.rs` read + `IDXGIOutput6::GetDesc1` at capture init / output change → `HdrMeta` (MaxCLL/MaxFALL left 0 — + GetDesc1 has none, like Apollo). `nvenc.rs` attaches the mastering + CLL SEI on every IDR for + HEVC/H.264. (AV1 mastering rides METADATA OBUs, not SEI — follow-up; AV1 `color_config` already + lands in Step 0's quick win.) +- **Linux encode-ready — DEFERRED into Step 4:** Linux capture is 8-bit only, so signalling + BT.2020 PQ + attaching mastering side-data on a downconverted 8-bit stream would be *incorrect*. + The libavcodec `side_data` path (with the `AVRational` conversion) lands together with the + 8-bit→Main10 shim / true 10-bit capture in Step 4. +- **Windows secure-desktop relay** (`virtual_stream_relay`) still sends only the generic baseline + `0xCE`; the helper's in-band SEI carries the real grade. Wiring the relay's `0xCE` is a follow-up. +- **validation (RTX box):** `ffprobe -show_frames` shows mastering + CLL side-data with the + display's real luminance and VUI 9/16/9; stock Moonlight shows correct (not washed-out) HDR. + Add **encoder-CSC-range == signaled-range** check. + +### Step 2 — Clients apply the metadata (Windows + Apple + Android, parallelizable) +- **Windows:** feed `hdr10_metadata()` from the received `HdrMeta` (drop the hardcode); **log** + `SetColorSpace1`/`SetHDRMetaData` failures. +- **Apple:** attach `kCVImageBufferMasteringDisplayColorVolumeKey` + `ContentLightLevelInfoKey` + / `CAEDRMetadata` from `HdrMeta`; CV color attachments from Welcome. +- **Android:** set `MediaFormat KEY_HDR_STATIC_INFO` from `HdrMeta`. + +### Step 3 — Display-capability query + client tone-mapping + robust fallback +The common-case correctness step — most displays are SDR. + +- **HDR→SDR on every client** (defined BT.2390 EETF / Hable), not silent OS fallback. +- Content-peak > display-peak roll-off (`GetDesc1` / `NSScreen.maximumEDR…` / + `Display.getHdrCapabilities`); explicit SDR fallback when HDR present fails. +- Optional client→host "send me SDR" downgrade as a trailing field on `Reconfigure`. + +### Step 4 — Linux (last; capture blocked upstream) +- **8-bit→Main10 NVENC upconvert shim** (`encode/linux.rs`) — Main10 transport with correct + VUI/SEI without HDR capture (gate so we don't claim HDR transfer on SDR content). +- **Linux encode color + side-data (the deferred Step 1c):** set + `color_primaries/trc/colorspace/range` from the negotiated `ColorInfo` and attach + `AV_FRAME_DATA_MASTERING_DISPLAY_METADATA` / `CONTENT_LIGHT_LEVEL` side-data (with the + `AVRational` conversion) in `encode/linux.rs` + `vaapi.rs` — only once the encoder actually + produces 10-bit, so the signalling matches the bits. +- True 10-bit capture: offer `ABGR2101010`/`P010` PipeWire formats + read colorimetry; pilot on + Sway/wlroots; track gamescope #2126. **Don't block the rest of the plan on it.** +- Linux client: `ex5` caps, P010 decode, GdkDmabufTexture CICP from Welcome, + `wp_color_management` when GTK ≥ 4.14. + +## Quick wins (independent, land in parallel) +1. `connect_ex5` + fix `abi.rs:896` — resurrects Apple's pipeline *(Step 0)*. +2. H.264 VUI + AV1 `color_config` on `nvenc.rs` — closes two latent blockers *(Windows-only, + validated in CI / on the RTX box)*. +3. `probe` logs `bit_depth` + colorimetry — observability for every later round-trip assertion. +4. Linux client BT.601→BT.709 sws + texture `color_state` — standalone SDR correctness bug. +5. GameStream silent-downgrade already warns (`rtsp.rs:289`) — keep observable. + +## Open questions +- **MaxCLL source:** `GetDesc1` doesn't expose it (Apollo zeroes). Static default, or measure + per-frame peak in the PQ shader (only truly-correct, adds a readback)? +- **GameStream:** implement `SS_HDR_METADATA` for Moonlight parity, or keep it deliberately SDR + and steer HDR users to punktfunk/1? +- **HLG:** carry the enum from day one (free) — but do any sources actually produce HLG? +- **Linux:** is shipping the 8-bit→Main10 shim as "HDR-capable transport" acceptable, or does it + risk advertising HDR we can't truly deliver? + +## Ordering rationale +Step 0 first: it's the keystone (metadata transport is the dominant cross-cutter; the ABI line +is a one-line root cause) and needs no hardware. Step 1 next: in-band SEI is read directly by +decoders, so it fixes correctness even before our clients consume the protocol, and gives an +Apollo-parity on-glass gate. Steps 2–3 are mechanical per-client wiring once metadata flows. +Linux is last because capture is gated on upstream we don't control; the shim delivers Main10 +transport without that dependency. + +Hardware dependencies: Step 0 = none (CI); Step 1 = RTX Windows host; Steps 2–3 = a real HDR +display per platform; Step 4 = a Linux GPU box + HDR-capable Wayland compositor. diff --git a/docs/security-review.md b/docs/security-review.md new file mode 100644 index 0000000..740486b --- /dev/null +++ b/docs/security-review.md @@ -0,0 +1,170 @@ +# punktfunk — security audit (2026-06-21) + +Whole-project audit by a 10-surface multi-agent review; every finding adversarially verified (reachability, attacker-control, existing mitigation). **10 surfaces · 20 raw findings → 18 confirmed/partial, 2 refuted.** Threat model: a malicious network client (pre- and post-pairing) is the primary adversary; also an on-path MITM and a local unprivileged user (the host is privileged). + +## Executive summary + +Overall the punktfunk host is a security-conscious codebase with a strong cryptographic and wire-parsing core: the FEC/reassembler path bounds every attacker-controlled length field before allocation, AES-GCM is used correctly with per-direction nonce separation and seq-as-AAD on the native plane, and the native trust model (SPAKE2 PIN binding both cert fingerprints, fingerprint pinning that still verifies the real TLS handshake signature) is genuinely sound. The most serious real defects are (1) local secret-disclosure of the host's master private key (key.pem) — written with no restrictive mode/ACL while the far-less-sensitive mgmt token is carefully 0600 — which on Windows (%ProgramData% default Users-read ACL, LocalSystem service) is a near-certain cross-privilege host-impersonation primitive, and (2) the native SPAKE2 PIN ceremony permitting unlimited online guesses against a static, non-rotating 4-digit PIN (no disarm-on-failure, no lockout), which contradicts the documented "one online guess" guarantee and lets a pre-auth LAN attacker brute-force pairing of a fully-trusted rogue client in a few hours against the default standalone/CLI flow. Dominant themes: file-permission hygiene on secrets is inconsistent (the secure pattern exists but is applied selectively), pairing throttling relies on a single global rate-limit rather than attempt-bounding, and authorization is overbroad (any streaming-paired cert is also a full mgmt admin). The remaining findings are a contained pre-auth RTSP video-thread DoS (unbounded packetSize and Content-Length), a legacy GameStream control-stream GCM nonce-reuse that is muted by modern V2 negotiation and being key-gated, and several defense-in-depth nits (non-constant-time GameStream hash compare, no QUIC ALPN, cross-session env-var launch confusion, global NODE_TLS_REJECT_UNAUTHORIZED). No memory-unsafety or RCE was found on attacker wire bytes; panics are safe Rust and isolated by panic=unwind. Net: a solid foundation whose highest-leverage fixes are tightening secret file permissions and making the PIN single-use/lockout-bounded. + +## Findings (ranked by severity × exploitability) + +### 🟠 #1 [HIGH] Host master private key (key.pem) written with no restrictive file mode / ACL — local secret disclosure enabling full host impersonation + +**Surface:** `secrets-availability` +**Refs:** `crates/punktfunk-host/src/gamestream/cert.rs:36-44`, `crates/punktfunk-host/src/gamestream/mod.rs:216-232`, `crates/punktfunk-host/src/mgmt_token.rs:58-70`, `crates/punktfunk-host/src/service.rs:605-627`, `crates/punktfunk-host/src/native_pairing.rs:116-126` + +**Why it ranks here / impact:** Ranked #1 because it is the highest verdict-adjusted severity (high, three corroborating findings merged) and the most reliably exploitable post-foothold: key.pem is the single trust root for ALL surfaces — GameStream TLS server cert, GameStream pairing signing key, the punktfunk/1 QUIC identity every client pins, and the mgmt HTTPS cert — so its disclosure yields full host impersonation/MITM that defeats client fingerprint pinning, plus the mgmt bearer token is likewise unprotected on Windows. ServerIdentity::load_or_create writes it with a bare fs::write (no mode) and create_dir_all (no DACL). On Windows the leak is near-certain and umask-independent: config_dir() is %ProgramData%\punktfunk, whose default ACL grants BUILTIN\Users read, and the host runs as LocalSystem — any local unprivileged user reads the SYSTEM service's key; the mgmt-token 0o600 hardening is #[cfg(unix)] so it is a no-op there. On Linux the file lands at umask (commonly 0664/0644, verified live as world-readable) and is reachable cross-user whenever the home/config chain is traversable. The project demonstrably knows the secure pattern (mgmt_token.rs uses OpenOptions::mode(0o600)+set_permissions) but applies it to the less-sensitive token and not the master key. Local-only (adversary #3), not pre-auth/network, which caps it below critical. + +**Fix:** Write key.pem (and cert.pem) via OpenOptions::mode(0o600) + a follow-up set_permissions(0o600) on Unix, mirroring mgmt_token.rs; create config_dir() with DirBuilder::mode(0o700). On Windows set an explicit DACL granting only SYSTEM+Administrators on the punktfunk %ProgramData% subtree and per-file on key.pem / mgmt-token / *paired.json (or relocate the key under a SYSTEM-only path), since the default ProgramData ACL is Users-readable. Extend the same hardening to client-key.pem and the persisted trust stores. Add a regression test asserting 0600 on key.pem on Unix. + +### 🟠 #2 [HIGH] Native SPAKE2 PIN ceremony allows unlimited online guesses against a static 4-digit PIN — pre-auth brute-force to a fully-trusted rogue client + +**Surface:** `pairing-pin` +**Refs:** `crates/punktfunk-host/src/punktfunk1.rs:388-446`, `crates/punktfunk-host/src/punktfunk1.rs:475-491`, `crates/punktfunk-host/src/punktfunk1.rs:82`, `crates/punktfunk-host/src/native_pairing.rs:189-234`, `crates/punktfunk-host/src/native_pairing.rs:128-131`, `crates/punktfunk-host/src/mgmt.rs:841-842` + +**Why it ranks here / impact:** Ranked #2: high severity AND pre-auth + fully attacker-controlled, the strongest exploitability combination among the high-rated issues — gated only on pairing being armed and an hours-long active window. Merges the three pairing-pin brute-force findings (they share one root cause: no disarm/rotate-on-failure and no attempt budget). pair_ceremony logs a warning and returns Err on a wrong PIN but never calls np.disarm() or rotates the PIN; current_pin() returns the same value forever (cleared only by TTL or operator); the only throttle is one process-wide 2s PAIRING_COOLDOWN. The PIN space is 10,000. Critically the standalone punktfunk1-host default (--require-pairing forces allow_pairing) arms with expires_at:None at startup, so the indefinite static-PIN window is the DEFAULT for that binary, not an opt-in. At ~1 guess/2s the space exhausts in ~5.5h worst / ~2.8h avg, and on success the attacker's cert is permanently pinned, granting input injection, screen capture and app launch. This directly contradicts the documented 'one online guess, no offline dictionary' claim — the offline-dictionary resistance from SPAKE2 holds, but the online single-guess limit is simply not implemented. Mitigations partial: the web/mgmt arm path is TTL-bounded (15..600s), confining the worst case to the CLI/standalone mode. + +**Fix:** Make a failed confirmation consume the PIN: on ok==false in pair_ceremony, call np.disarm() (or rotate to a fresh random PIN) so a single wrong guess closes the window — this is what actually delivers the documented 'one online guess'. Add a per-window failed-attempt budget (auto-disarm after N>=1 failures), give the CLI no-expiry arm path a default expiry, and disarm after a SUCCESSFUL pair too. Keep the 2s cooldown as defence-in-depth and raise the web-armed PIN to 6 digits. + +### 🟡 #3 [MEDIUM] Pre-auth RTSP ANNOUNCE packetSize underflows/panics the GameStream video pipeline (div-by-zero / OOB slice / allocation amplification) + +**Surface:** `gamestream-parsing` +**Refs:** `crates/punktfunk-host/src/gamestream/rtsp.rs:275`, `crates/punktfunk-host/src/gamestream/video.rs:55-89`, `crates/punktfunk-host/src/gamestream/stream.rs:322` + +**Why it ranks here / impact:** Ranked #3: medium and fully pre-auth + attacker-controlled — the highest-exploitability of the medium-and-below tier. The RTSP listener on TCP 48010 performs no TLS/pairing/auth; an unauthenticated peer drives OPTIONS→ANNOUNCE→PLAY (+ a UDP ping to the video port) and the video thread starts on state.stream alone, no paired session required. x-nv-video[0].packetSize is read with no bound and flows into VideoPacketizer::new where payload_per_shard = packet_size - 16: packetSize==16 → pps==0 → div-by-zero panic; packetSize<16 → underflow → OOB slice panic; packetSize==17 → one byte/shard → per-frame datagram flood. Reliable remote pre-auth DoS of a privileged media service, made stickier because the panic unwinds before running.store(false) leaving the session wedged until restart. Calibrated medium (not higher) because it is a SAFE Rust panic (checked slice access, no memory corruption/UB) isolated to the punktfunk-video thread by panic=unwind — the host process and other listeners survive; not RCE. + +**Fix:** Validate packet_size in stream_config() before building StreamConfig: reject packetSize below a sane floor (e.g. < 64) and clamp to a sane max (e.g. <= 2048). Additionally harden VideoPacketizer::new to use checked/saturating arithmetic and refuse construction (or fall back to a default) when packet_size < SHARD_HEADER-16 so the per-frame path never sees pps==0 or a wrapped payload_per_shard. Also store(false) on the unwind path so a panic doesn't wedge the session. Add a regression test over packetSize in {0,15,16,17}. + +### 🔵 #4 [LOW] Any paired punktfunk/1 streaming client gets full management-API authority via the mTLS-paired-cert auth path (no streaming-vs-admin separation) + +**Surface:** `authz-trust` +**Refs:** `crates/punktfunk-host/src/mgmt.rs:459-488`, `crates/punktfunk-host/src/mgmt.rs:466-470` + +**Why it ranks here / impact:** Ranked #4: low but a genuine post-auth privilege over-broadening with concrete admin impact. require_auth grants any verified peer cert whose fingerprint is in the native paired store full unscoped access to every /api/v1 route — the SAME paired set that admits a device to stream. So a device paired purely to watch the screen can DELETE /clients/{fp} (unpair others), POST /native/pair/arm (open a pairing window and read the PIN), approve arbitrary knocking devices, DELETE /session, and CRUD the library; there is no role/scope check anywhere in the router. The native client presents its identity via TLS client auth on both ports, so the credential is genuinely usable against mgmt. Bounded to low because it requires being an already-paired (operator-trusted) device and the mgmt port binds loopback by default — remote reach needs an explicit routable --mgmt-bind (and the mTLS path then bypasses the token requirement). + +**Fix:** Separate streaming trust from management trust: keep a distinct admin allow-list (or an admin flag on a paired entry) for the mTLS mgmt path, or restrict mTLS-cert auth to read-only endpoints and require the bearer token for state-changing/admin routes. At minimum gate the pairing-administration endpoints (arm/approve/unpair) and session/library mutation behind the bearer token only. + +### 🔵 #5 [LOW] GameStream legacy control-stream AES-GCM nonce reuse across directions (host rumble vs client input share key+nonce) + +**Surface:** `crypto` +**Refs:** `crates/punktfunk-host/src/gamestream/control.rs:373-400`, `crates/punktfunk-host/src/gamestream/control.rs:257-266`, `crates/punktfunk-host/src/gamestream/control.rs:67,106-114` + +**Why it ranks here / impact:** Ranked #5: a real, correctly-identified catastrophic-class crypto defect (AES-GCM (key,nonce) reuse) but adjusted to low because reachability and impact are heavily muted. The legacy NonceKind branches apply no direction separation (other => other), so host rumble (rumble_seq from 0) and client control (seq from 0) under the shared rikey produce identical (key,nonce). BUT: (1) it only triggers on the legacy auto-detected scheme — modern moonlight-common-c negotiates the V2 scheme which flips marker[0] to 'H' and is direction-separated, so the default path is safe; the doc claim 'the legacy path — which we hit' is stale; (2) the rikey is delivered only over the mTLS /launch, so a pure MITM cannot derive the key — only a paired client can; (3) a paired client can already legitimately send any client→host control message (in-scope-by-design), so forgery is largely redundant and the only genuinely new gain is recovering low-value rumble keystream / forging rumble to its own client. Post-auth, conditional path. + +**Fix:** Separate the two directions' nonce spaces for the legacy schemes too — set a reserved high bit/byte of the legacy IV for host-originated packets (mirror the V2 'H' marker), or better, HKDF-derive an independent host→client key from the rikey with a direction label so host and client never share a GCM key. Never let host rumble and client input share (key,nonce). + +### 🔵 #6 [LOW] RTSP request Content-Length / header size unbounded with no read timeout or connection cap — pre-auth slow-loris / memory-growth DoS + +**Surface:** `gamestream-parsing` +**Refs:** `crates/punktfunk-host/src/gamestream/rtsp.rs:82-106`, `crates/punktfunk-host/src/gamestream/rtsp.rs:24-48` + +**Why it ranks here / impact:** Ranked #6: low, pre-auth and attacker-controlled but a rate-limited resource DoS, not unsafety or auth bypass. read_message parses content-length and computes total = end+4+content_len with no cap, looping buf.extend_from_slice until buf.len()>=total; the header scan is likewise unbounded and there is no body/header cap, no read/write timeout, and one unbounded native thread is spawned per connection with no global limit. Growth is bounded by attacker send rate (no pre-allocation), so it is slow exhaustion rather than instant OOM; the stronger lever is thread/FD exhaustion from many idle slow-loris connections at near-zero bandwidth. On a privileged LAN-facing plaintext listener with zero defensive caps. + +**Fix:** Cap Content-Length and total header size to small constants (e.g. reject content_len > 64 KiB, total header > 16 KiB) and close on violation. Add a read timeout so a slow-loris connection cannot pin a thread indefinitely, and bound concurrent RTSP connections. + +### 🔵 #7 [LOW] Per-session launch command carried via process-global PUNKTFUNK_GAMESCOPE_APP env var, stomped under concurrent native sessions (cross-session launch confusion) + +**Surface:** `privilege-process-launch` +**Refs:** `crates/punktfunk-host/src/punktfunk1.rs:560-571`, `crates/punktfunk-host/src/punktfunk1.rs:140`, `crates/punktfunk-host/src/vdisplay/gamescope.rs:629-647` + +**Why it ranks here / impact:** Ranked #7: low, post-auth cross-session isolation bug, explicitly NOT command injection. serve_session does std::env::set_var(PUNKTFUNK_GAMESCOPE_APP) per accepted connection with a stale comment claiming 'one session at a time', but DEFAULT_MAX_CONCURRENT=4 sessions run concurrently and the var is read in gamescope::spawn during VirtualDisplay::create — a genuine TOCTOU where client B's launch overwrites what client A's bare-spawn reads, and the never-cleared value leaks into a later no-launch client. Impact is capped because cmd always resolves through library::launch_command (digit-validated Steam appids / operator-only custom store), so the worst case is launching a DIFFERENT operator-approved title or a stale title — and it only affects the gamescope bare-spawn backend (kwin/mutter/wlroots/attach ignore the var). + +**Fix:** Stop carrying the per-session launch command in a process-global env var. Plumb the resolved command through the VirtualDisplay::create call / per-session context (e.g. a field on Mode or a per-session GamescopeDisplay), and on the bare-spawn path pass it explicitly to spawn(); clear/scope it so a stale value never leaks to the next client. + +### ⚪ #8 [INFO] GameStream pairing phase-4 hash compare is not constant-time + +**Surface:** `pairing-pin` +**Refs:** `crates/punktfunk-host/src/gamestream/pairing.rs:226-247` + +**Why it ranks here / impact:** Ranked #8: info / hardening only — a real variable-time `==` on attacker-influenced 32-byte SHA-256 digests, but not weaponizable. The compared `expected` mixes in host-random server_challenge that is never disclosed (so the attacker can neither compute nor aim at the target), the attacker cannot steer client_hash to a chosen value without the PIN key, and any mismatch removes the session (map.remove) forcing a fresh ceremony with new randomness — so there is no stable secret to recover prefix-by-prefix and no path from timing to PIN recovery or match forgery. Worth fixing for consistency since the codebase already has ct_eq for the native ceremony. + +**Fix:** Use a constant-time comparator (subtle::ConstantTimeEq or the project's existing ct_eq) for hash_ok, matching the constant-time discipline already used in the native SPAKE2 ceremony. + +### ⚪ #9 [INFO] GameStream pairing ceremony runs over plain HTTP — inherited GFE brute-forceable-PIN / MITM weakness + +**Surface:** `authz-trust` +**Refs:** `crates/punktfunk-host/src/gamestream/nvhttp.rs:33`, `crates/punktfunk-host/src/gamestream/nvhttp.rs:215-264`, `crates/punktfunk-host/src/gamestream/pairing.rs:102-247` + +**Why it ranks here / impact:** Ranked #9: info — real but intentional Moonlight-compat behavior, on record rather than a regression. The whole /pair flow (incl. phase-4 cert pinning) is on plain HTTP 47989 with no transport confidentiality and no rate-limiting; the AES key is pin_key(salt,pin) = SHA-256(salt||pin)[..16] feeding AES-128-ECB, so an on-path attacker observing a legitimate pairing can offline-brute-force the 4-digit PIN and forge a clientpairingsecret to get a cert pinned. This is the well-known GFE/Sunshine construction, fixed by interop, and is precisely why punktfunk/1's SPAKE2 path exists; it requires an active MITM during an operator-initiated pairing within the 300s window. A paired GameStream client is in-scope-by-design. + +**Fix:** Inherent to GameStream compatibility — document it and steer users to punktfunk/1 (SPAKE2) for untrusted networks. Optionally rate-limit pairing sessions per uniqueid/IP and tighten/expire the awaiting-PIN window aggressively. + +### ⚪ #10 [INFO] No ALPN configured on the native QUIC server/client (cross-protocol confusion hardening absent) + +**Surface:** `cert-tls-identity` +**Refs:** `crates/punktfunk-core/src/quic.rs:1335-1354`, `crates/punktfunk-core/src/quic.rs:1412-1448` + +**Why it ranks here / impact:** Ranked #10: info — factually correct (no alpn_protocols set on either endpoint; the cert.pem identity is shared with GameStream TLS) but no reachable confusion attack. ALPACA-style attacks need two TLS services sharing a cert on the SAME transport; here GameStream is TLS-over-TCP and punktfunk/1 is TLS-in-QUIC (UDP) — not cross-reachable — and there is exactly one QUIC server so ALPN would make no authorization decision. Trust is already enforced by fingerprint pinning + app-layer Hello/Welcome magic. Cheap future-proofing only. + +**Fix:** Set a fixed ALPN on both endpoints (e.g. rustls_cfg.alpn_protocols = vec![b"pkf1".to_vec()]) so a mismatched protocol is rejected during the TLS handshake — defense-in-depth against ever multiplexing protocols on the QUIC endpoint. + +### ⚪ #11 [INFO] FEC reconstruct error on the receive path is stream-fatal — code-contract inconsistency (not an exploitable DoS) + +**Surface:** `core-wire-deser` +**Refs:** `crates/punktfunk-core/src/packet.rs:411`, `crates/punktfunk-core/src/session.rs:283-289`, `clients/probe/src/main.rs:959`, `crates/punktfunk-host/src/spike.rs:251` + +**Why it ranks here / impact:** Ranked last: info — a correctly-identified contract inconsistency with NO demonstrable exploit. Reassembler::push propagates coder.reconstruct(...)? and both real receive-side callers treat any non-NoFrame error as fatal, inconsistent with the surrounding 'malformed = silent drop, never fatal' discipline. But every Err arm was traced unreachable from hostile input: header firewall + block-geometry pinning guarantee equal-length, correctly-counted shards; reconstruct is only called once received>=data_shards; Config::validate rejects odd/zero shard_payload before any decode; and MDS Reed-Solomon decodes any data_shards distinct shards. Reaching the reassembler also requires an AES-GCM-decryptable packet, so it is the connected host (not a port-sprayer), and it is client-side only — the privileged host never runs the reassembler on attacker bytes. Pure defense-in-depth hardening. + +**Fix:** Make a FEC reconstruction failure a counted drop rather than stream-fatal: in Reassembler::push match coder.reconstruct(...) and on Err bump packets_dropped (or a fec_failed counter), discard the block, and return Ok(None). Reserve poll_frame's Err for genuinely fatal conditions (role misuse, transport teardown), matching the discipline documented at packet.rs:298-300. + +### 🔵 #12 [LOW] Web console sets NODE_TLS_REJECT_UNAUTHORIZED=0 process-globally — latent footgun disabling all outbound TLS verification + +**Surface:** `deps-config-exposure` +**Refs:** `web/.env.example:22-24`, `web/web.env.example:11-14`, `web/server/util/auth.ts:17-22`, `web/vite.config.ts:23` + +**Why it ranks here / impact:** Ranked #12: low and not currently exploitable (attackerControlled false), included as a latent defense-in-depth defect. NODE_TLS_REJECT_UNAUTHORIZED=0 disables certificate validation for every outbound TLS connection the Node process makes, but the only current server-side outbound hop is the loopback proxy to https://127.0.0.1:47990 (CDN/art fetches are browser-side), and a loopback connection cannot be MITM'd — so impact is nil today. Real impact materializes silently if anyone later adds a server-side off-host HTTPS call (update check, webhook, metadata fetch) or points PUNKTFUNK_MGMT_URL off-loopback. + +**Fix:** Do not disable TLS verification globally. Pin the host's self-signed cert for the single loopback fetch: pass an https.Agent with the host cert as `ca` (or rejectUnauthorized:false on that one Agent only) to the proxyRequest fetch in server/routes/api/[...].ts, and drop NODE_TLS_REJECT_UNAUTHORIZED from the deployment env. + +## Cross-cutting themes + +- Inconsistent secret file-permission hygiene: the secure 0600/ACL pattern exists (mgmt-token) but is applied selectively, leaving the master private key and trust stores at umask/default-ACL — the highest-impact local-privilege gap, acute on Windows %ProgramData% + LocalSystem. +- Pairing throttling is rate-based, not attempt-bounded: a single global 2s cooldown and a static non-rotating 4-digit PIN with no disarm-on-failure/lockout means the documented 'one online guess' property is not actually implemented for the native ceremony. +- Overbroad authorization / collapsed trust tiers: 'paired to stream' equals 'paired to administer' (mgmt mTLS), and GameStream pairing inherits the plaintext-HTTP brute-forceable-PIN model — coarse trust boundaries where finer scopes are warranted. +- Pre-auth attack surface on the GameStream/RTSP listeners with missing input bounds and resource caps (unbounded packetSize, unbounded Content-Length, no timeouts/connection caps) — contained DoS, but on a privileged plaintext service. +- Stale concurrency assumptions and process-global mutable state (legacy GCM nonce direction, PUNKTFUNK_GAMESCOPE_APP env var) that were safe under a since-removed 'one session at a time' invariant and now cause cross-session confusion / crypto reuse. +- Strong, well-tested cryptographic and memory-safety core (bounded wire parsing, correct AEAD/SPAKE2/pinning, catch_unwind FFI, panic=unwind isolation) — the foundation is solid; the residual risk is in operational hardening and trust-tier granularity, not in unsafe/RCE. + +## Prioritized remediation (do in this order) + +1. Lock down secret files: write key.pem (and cert.pem) 0600 + create config_dir 0700 on Unix using the existing mgmt_token OpenOptions::mode pattern, and set an explicit SYSTEM+Administrators-only DACL on the punktfunk %ProgramData% subtree / key.pem / mgmt-token / *paired.json on Windows. Extend to client-key.pem; add a 0600 regression test. +2. Make the native PIN single-use and lockout-bounded: disarm or rotate the PIN on a failed SPAKE2 confirmation, add a per-window failed-attempt budget, give the CLI no-expiry arm path a default expiry, and disarm after a successful pair — this is what delivers the documented 'one online guess'. +3. Bound the RTSP video path: validate/clamp x-nv-video[0].packetSize (floor ~64, cap ~2048) in stream_config() and use checked/saturating arithmetic in VideoPacketizer::new so pps==0 / underflow can never occur; store(false) on the unwind path; add a {0,15,16,17} regression test. +4. Cap RTSP request parsing: enforce a Content-Length and total-header-size limit, add a read timeout, and bound concurrent connections so a pre-auth peer cannot slow-loris exhaust threads/memory. +5. Separate streaming trust from management trust: require the mgmt bearer token (not just a paired streaming cert) for state-changing and pairing-administration routes (arm/approve/unpair/session/library), or keep a distinct admin allow-list. +6. Fix the legacy GameStream GCM nonce reuse: HKDF-derive an independent host→client key from the rikey (direction label), or mirror the V2 'H' direction marker into the legacy IV so host rumble and client input never share (key,nonce). +7. Stop carrying the per-session gamescope launch command in a process-global env var: plumb it through the per-session VirtualDisplay::create/context and clear it when no launch is requested, eliminating cross-session stomping under concurrency. +8. Apply the cheap hardening nits: constant-time compare for the GameStream phase-4 hash (use ct_eq), set a fixed ALPN ('pkf1') on both QUIC endpoints, make FEC reconstruct failures a counted drop instead of stream-fatal, and replace the global NODE_TLS_REJECT_UNAUTHORIZED with a cert-pinned https.Agent scoped to the loopback mgmt fetch. + +## Security controls done right (positives) + +- Defense-in-depth wire parsing: every attacker-controllable FEC/reassembler header field is bounded against negotiated limits BEFORE any allocation keyed on it (packet.rs:328-343) — shard_bytes exact-match, data/total/block counts in range, indices in bounds, frame_bytes<=max — with no integer overflow in the size math and regression tests (rejects_oversized_shard_counts, rejects_inconsistent_block_geometry_without_panicking). +- Reassembler memory is bounded to a 16-frame reorder window with prune-on-push and completed-frame dedup (packet.rs:451-468), so a flood of distinct frame indices cannot grow memory unboundedly and late shards cannot resurrect emitted frames. +- AEAD gates the reassembler: on an encrypted session open_from_wire verifies the GCM tag (with seq as AAD) before any bytes reach push (session.rs:120-131), so an attacker cannot reach the reassembler without the session key; oversized-datagram truncation is always detectable (recv buffers MAX+1, len>MAX dropped). +- Native AES-GCM is correct and misuse-resistant: 96-bit nonce = 4-byte salt || 8-byte BE seq with seq also as AEAD AAD (tampering fails the tag, not shifts the nonce), per-direction salt-bit separation gives disjoint nonce spaces under the shared key, a fresh CSPRNG 128-bit key per session, and Config::validate rejects encrypt=true with an all-zero key (crypto.rs, session.rs). +- Host-side data-plane datagram decoders (mic / RichInput / HidOutput / InputEvent / gamepad) are all length-checked, Option-returning and non-fatal — the privileged host drops anything malformed and keeps draining, never reassembles attacker video, and never panics on truncated/hostile input. +- punktfunk/1 trust establishment is sound: PinVerify rejects a fingerprint mismatch AND still performs real TLS 1.2/1.3 CertificateVerify signature checks (not stubbed), so an active MITM cannot replay the host's public cert to satisfy a pin without the private key (quic.rs:1547-1608) — the single most important thing to get right, done correctly. +- SPAKE2 PIN pairing for the native plane is built correctly: a balanced PAKE binding BOTH cert fingerprints as identities and into the key-confirmation transcript, a wrong PIN yields a different key (one online guess, no offline dictionary, no error oracle), MITM with different certs per leg reaches no shared key, and confirmation MACs use a constant-time ct_eq — all exercised by tests. +- Authorization is cleanly split from authentication on both planes: AcceptAnyClientCert verifiers accept any self-signed cert at the handshake but still verify the handshake signature, so the post-handshake fingerprint proves key possession, and authorization is then enforced against the paired allow-list (--require-pairing default fail-closed; certless peers rejected). +- GameStream post-pair endpoints (applist/launch/resume/cancel) are gated by peer_is_paired() requiring a pinned mutual-TLS client cert, fail-closed for certless/unknown/None peers, with a dedicated regression test (nvhttp.rs:46-55, 303-328). +- No command injection on the launch surface: client-supplied launch ids resolve against the host's OWN catalog (client can only pick an existing title), Steam appids are digits-only validated (with a `570; rm -rf ~` rejection test), custom commands come only from the operator mgmt store, and gamescope::spawn uses argv (Command::new + args), never /bin/sh -c. +- Client-controlled display dimensions are validated (encode::validate_dimensions: zero/odd/over-max rejected) on both the initial Hello and mid-stream Reconfigure before reaching encoders or the privileged SudoVDA ADD ioctl, which marshals a fixed #[repr(C)] struct and only selects driver-advertised modes. +- mgmt API is authenticated on every route except /health even on loopback (fails closed on a blank token), with constant-time SHA-256-digest token comparison and a CSPRNG token persisted 0600 on Unix (O_CREAT mode + follow-up set_permissions, never briefly world-readable). +- Attacker-controlled device names are sanitized before logging/storage/UI (C0/C1 controls, Unicode bidi/format overrides, BOM stripped, length-capped, fingerprint fallback), blocking log/console-injection and trusted-device spoofing in the approval UI. +- The pending-knock / delegated-approval queue is bounded (PENDING_CAP=32, LRU eviction) and time-bounded (10-min TTL), in-memory only, so a LAN scanner cannot grow it unboundedly or poison the persistent trust store. +- Both trust stores are persisted atomically (temp + rename) with in-memory rollback on a failed persist, so a crash or full disk mid-write cannot truncate the allow-list and silently lock out or un-gate paired clients. +- C-ABI boundary is hardened: config_from_ptr enforces the struct_size skew guard before dereferencing, every narrowing field is range-checked before truncation, every data-processing entry point is wrapped in catch_unwind returning Panic (no unwind across FFI), and null/zero-length handling is consistent and safe. +- No secret material is logged anywhere — PINs, GCM/rikey keys, nonces/salts, the mgmt token, and private keys never reach tracing/println; pairing-failure logs include only the sanitized device name + fingerprint. +- Crypto stack is current and free of disclosed-vuln versions (rustls 0.23.40, quinn 0.11.9, ring 0.17.14, aes-gcm 0.10.3, spake2 0.4.0), ring-only with no aws-lc C dep; rsa 0.9.10 (RUSTSEC-2023-0071 Marvin) is used ONLY for sign/verify, never decryption, so the vulnerable path is not exercised. +- The Windows service launches the host with a correctly-scoped duplicated SYSTEM token (only the session id retargeted), a fixed winsta0\default desktop, a command line built from current_exe + an operator-controlled host.env subcommand (never network input), and a kill-on-job-close job object so a crash never orphans the SYSTEM host. + +## Refuted (investigated, NOT vulnerabilities) + +- **[unsafe-ffi-cabi] Free/close FFI entry points run Drop without catch_unwind — a panic in teardown is UB across the C boundary** — Verified all cited code. abi.rs:272-276 (punktfunk_session_free) and abi.rs:1627-1631 (punktfunk_connection_close) are verbatim as described — both call drop(Box::from_raw(..)) outside guard()/catch_unwind, while the guard helper (abi.rs:168-170) wraps catch_unwind. The doc at abi.rs:11 ("every entry point is wrapped in catch_unwind") is literally inaccurate for these two — that doc-vs-code discrepancy is REAL. + +But the finding's security claim — "the unwind would cross the extern \"C\" frame, which is undefined behavior" — is REFUTED by the build toolchain. This crate is edition 2021 built with rustc 1.96.0. Since Rust 1.81 (Sept 2024), an unwind that reaches a non-`-unwind` `extern "C"` boundary is a defined, safe process abort, not UB. None of these functions use `extern "C-unwind"`. So the worst possible outcome is a clean abort, which is also exactly what catch_unwind→PunktfunkStatus::Panic would avoid only by returning a status — but free/close return void, so there is no status to return anyway. + +Moreover the precondition (a panicking Drop) does not exist in the code. NativeClient::drop discards the worker join result with `let _ = w.join()`, so a worker-thread panic/poison cannot re-enter Drop. Config::drop only zeroizes. Session has no custom Drop. The transport Drop closes a socket. There is no .unwrap(), no Mutex::lock, and no result-propagating thread join on any teardown path — the finding's speculated panic sources are not present. + +The finding is correctly self-rated as informational and explicitly non-attacker-controlled / non-pre-auth. The accurate residual is a documentation inconsistency (the module doc overstates the catch_unwind invariant), not a security weakness. The substantive recommendation (keep Drop impls panic-free) is already satisfied. Net: refuted as a vulnerability; severity info, and even as a code-quality nit the UB framing is incorrect for the current compiler. Worth at most a one-line doc fix to say free/close intentionally abort-on-panic. +- **[unsafe-ffi-cabi] C-ABI pointer/length contracts (32-byte fingerprint buffers, caller buffers) are trusted, not validated — standard FFI, embedder-only** — All three cited spans are present and behave exactly as described. abi.rs:877 `from_raw_parts(pin_sha256, 32)` (after null-check at 873), abi.rs:905 `from_raw_parts_mut(observed_sha256_out, 32)` (after null-check at 903), abi.rs:1003 `from_raw_parts_mut(host_sha256_out, 32)` (after null-check at 990). The submit_frame/send_mic claim is also accurate: host_submit_frame (283-302, with an extra null+len guard) and send_mic (1233-1253) build slices from caller (data,len). These are C-ABI entry points whose pointer/length arguments come from the trusted embedding app (PunktfunkKit/GTK/WinUI), not from wire bytes. None of the threat-model adversaries can influence them: the malicious network client (pre- or post-auth) controls protocol bytes that flow *through* the embedder, not the embedder's own FFI call arguments; a MITM is irrelevant; a local unprivileged user cannot call into another process's loaded library. Each function carries a documented Safety contract and null-checks its pointers, and the hard-coded 32 matches the fingerprint type, so even a buggy-but-honest caller passing a correctly-sized buffer is safe. This is idiomatic, sound FFI — not a vulnerability. The finding's own posture (info, attackerControlled=false, preAuth=false, no fix required, listed for the record) is correct and well-calibrated. In vuln terms this is refuted (not a vuln / not reachable by any in-scope adversary), consistent with the info classification. diff --git a/include/punktfunk_core.h b/include/punktfunk_core.h index ecbbe54..3bb8c05 100644 --- a/include/punktfunk_core.h +++ b/include/punktfunk_core.h @@ -70,6 +70,18 @@ // only where available (Linux hosts); otherwise the host falls back to X-Box 360. #define PUNKTFUNK_GAMEPAD_DUALSENSE 2 +// Connect to a `punktfunk/1` host and start a session at `width`x`height`@`refresh_hz`. +// Blocks up to `timeout_ms` for the handshake. Returns NULL on failure. Equivalent to +// [`punktfunk_connect_ex`] with `compositor = PUNKTFUNK_COMPOSITOR_AUTO`. +// +// Video-capability bit for [`punktfunk_connect_ex5`] (`video_caps`): the client can decode a +// 10-bit (Main10) HEVC stream. (Mirrors `quic::VIDEO_CAP_10BIT`.) +#define PUNKTFUNK_VIDEO_CAP_10BIT 1 + +// Video-capability bit for [`punktfunk_connect_ex5`] (`video_caps`): the client can present +// BT.2020 PQ HDR10 (implies 10-bit). (Mirrors `quic::VIDEO_CAP_HDR`.) +#define PUNKTFUNK_VIDEO_CAP_HDR 2 + // 16-byte AEAD authentication tag appended by GCM. #define TAG_LEN 16 @@ -233,7 +245,8 @@ // demultiplexed by the first byte: input = [`crate::input::INPUT_MAGIC`] (0xC8, client→host), // audio = [`AUDIO_MAGIC`] (0xC9, host→client), rumble = [`RUMBLE_MAGIC`] (0xCA, host→client), // mic = [`MIC_MAGIC`] (0xCB, client→host), rich-input = [`RICH_INPUT_MAGIC`] (0xCC, client→host), -// HID-output = [`HIDOUT_MAGIC`] (0xCD, host→client). +// HID-output = [`HIDOUT_MAGIC`] (0xCD, host→client), HDR metadata = [`HDR_META_MAGIC`] +// (0xCE, host→client). #define PUNKTFUNK_AUDIO_MAGIC 201 #endif @@ -261,6 +274,48 @@ #define HIDOUT_MAGIC 205 #endif +#if defined(PUNKTFUNK_FEATURE_QUIC) +// HDR static-metadata datagram tag, host → client (the static analog of the per-frame VUI; +// see [`HdrMeta`]). Next tag after [`HIDOUT_MAGIC`]. +#define HDR_META_MAGIC 206 +#endif + +#if defined(PUNKTFUNK_FEATURE_QUIC) +// CICP colour-primaries code point: BT.709. +#define ColorInfo_CP_BT709 1 +#endif + +#if defined(PUNKTFUNK_FEATURE_QUIC) +// CICP colour-primaries code point: BT.2020. +#define ColorInfo_CP_BT2020 9 +#endif + +#if defined(PUNKTFUNK_FEATURE_QUIC) +// CICP transfer code point: BT.709. +#define ColorInfo_TRC_BT709 1 +#endif + +#if defined(PUNKTFUNK_FEATURE_QUIC) +// CICP transfer code point: PQ (SMPTE ST.2084). +#define ColorInfo_TRC_PQ 16 +#endif + +#if defined(PUNKTFUNK_FEATURE_QUIC) +// CICP transfer code point: HLG (ARIB STD-B67 / BT.2100). +#define ColorInfo_TRC_HLG 18 +#endif + +#if defined(PUNKTFUNK_FEATURE_QUIC) +// CICP matrix code point: BT.709. +#define ColorInfo_MC_BT709 1 +#endif + +#if defined(PUNKTFUNK_FEATURE_QUIC) +// CICP matrix code point: BT.2020 non-constant-luminance. (Never emit 10 / constant-luminance — +// no client decodes it.) +#define ColorInfo_MC_BT2020_NCL 9 +#endif + // Stable C ABI status codes. `Ok` is 0; all errors are negative so callers can // test `rc < 0`. Do not renumber existing variants — only append. enum PunktfunkStatus @@ -331,6 +386,17 @@ typedef uint8_t PunktfunkInputKind; #endif // __STDC_VERSION__ >= 202311L #endif // __cplusplus +#if defined(PUNKTFUNK_FEATURE_QUIC) +// Per-session colour signalling (CICP / ITU-T H.273 code points) the host resolved for the +// encoded video, carried on [`Welcome`]. A client configures its decoder/presenter from these +// instead of inferring them from the bitstream VUI. An older host omits the bytes on the wire → +// [`ColorInfo::SDR_BT709`] (the 8-bit BT.709 limited stream every pre-HDR build produced). +// +// The *static* HDR mastering metadata (ST.2086 + content light level) is larger and can change +// mid-stream, so it rides the [`HDR_META_MAGIC`] datagram rather than this fixed struct. +typedef struct ColorInfo ColorInfo; +#endif + #if defined(PUNKTFUNK_FEATURE_QUIC) // Opaque handle to a live `punktfunk/1` connection (QUIC control plane + UDP data plane, all // pumped on internal threads). @@ -447,6 +513,31 @@ typedef struct { } PunktfunkHidOutput; #endif +#if defined(PUNKTFUNK_FEATURE_QUIC) +// Static HDR metadata for an HDR session ([`punktfunk_connection_next_hdr_meta`]): SMPTE ST.2086 +// mastering display colour volume + CEA-861.3 content light level. All fields are in the standard +// HDR10 SEI fixed-point units (primaries/white in 1/50000, luminance in 0.0001 cd/m²), ready for +// DXGI `DXGI_HDR_METADATA_HDR10` / Apple `CAEDRMetadata` / Android `KEY_HDR_STATIC_INFO`. +typedef struct { + // Display-primaries x-chromaticities in 1/50000 units, ST.2086 order [green, blue, red]. + uint16_t display_primaries_x[3]; + // Display-primaries y-chromaticities in 1/50000 units, ST.2086 order [green, blue, red]. + uint16_t display_primaries_y[3]; + // White-point x-chromaticity, 1/50000 units. + uint16_t white_point_x; + // White-point y-chromaticity, 1/50000 units. + uint16_t white_point_y; + // Max display mastering luminance, 0.0001 cd/m² units. + uint32_t max_display_mastering_luminance; + // Min display mastering luminance, 0.0001 cd/m² units. + uint32_t min_display_mastering_luminance; + // Maximum content light level (MaxCLL), nits. 0 = unknown. + uint16_t max_cll; + // Maximum frame-average light level (MaxFALL), nits. 0 = unknown. + uint16_t max_fall; +} PunktfunkHdrMeta; +#endif + #if defined(PUNKTFUNK_FEATURE_QUIC) // One rich client→host input for the host's virtual DualSense // ([`punktfunk_connection_send_rich_input`]): a touchpad contact or a motion sample. Set `kind` @@ -498,6 +589,10 @@ typedef struct { uint32_t send_dropped; } PunktfunkProbeResult; + + + + #ifdef __cplusplus extern "C" { #endif // __cplusplus @@ -576,10 +671,6 @@ int32_t punktfunk_host_poll_input(PunktfunkSession *s); PunktfunkStatus punktfunk_get_stats(PunktfunkSession *s, PunktfunkStats *out); #if defined(PUNKTFUNK_FEATURE_QUIC) -// Connect to a `punktfunk/1` host and start a session at `width`x`height`@`refresh_hz`. -// Blocks up to `timeout_ms` for the handshake. Returns NULL on failure. Equivalent to -// [`punktfunk_connect_ex`] with `compositor = PUNKTFUNK_COMPOSITOR_AUTO`. -// // Trust: `pin_sha256` (NULL or 32 bytes) is the expected SHA-256 fingerprint of the host's // certificate — a mismatching host is rejected. NULL = trust on first use; persist the // fingerprint written to `observed_sha256_out` (NULL or 32 bytes, filled on success) and @@ -701,6 +792,34 @@ PunktfunkConnection *punktfunk_connect_ex4(const char *host, uint32_t timeout_ms); #endif +#if defined(PUNKTFUNK_FEATURE_QUIC) +// Like [`punktfunk_connect_ex4`], but additionally advertises the embedder's video decode/present +// capabilities as `video_caps` — a bitfield of `PUNKTFUNK_VIDEO_CAP_10BIT` (can decode 10-bit +// Main10) and `PUNKTFUNK_VIDEO_CAP_HDR` (can present BT.2020 PQ HDR10). The host upgrades to a +// 10-bit / HDR encode ONLY when the matching bit is set (and the host opted in); `0` keeps the +// 8-bit BT.709 SDR stream. After connecting, read the resolved colour via +// [`punktfunk_connection_color_info`] and drain the mastering metadata via +// [`punktfunk_connection_next_hdr_meta`]. +// +// # Safety +// Same as [`punktfunk_connect`]; `launch_id`, when non-NULL, must be a NUL-terminated C string. +PunktfunkConnection *punktfunk_connect_ex5(const char *host, + uint16_t port, + uint32_t width, + uint32_t height, + uint32_t refresh_hz, + uint32_t compositor, + uint32_t gamepad, + uint32_t bitrate_kbps, + uint8_t video_caps, + const char *launch_id, + const uint8_t *pin_sha256, + uint8_t *observed_sha256_out, + const char *client_cert_pem, + const char *client_key_pem, + uint32_t timeout_ms); +#endif + #if defined(PUNKTFUNK_FEATURE_QUIC) // Generate a persistent client identity: a self-signed certificate + private key, both // PEM, NUL-terminated, written into the caller's buffers. Generate ONCE, store both @@ -795,6 +914,40 @@ PunktfunkStatus punktfunk_connection_next_hidout(PunktfunkConnection *c, uint32_t timeout_ms); #endif +#if defined(PUNKTFUNK_FEATURE_QUIC) +// Pull the next static HDR metadata update (ST.2086 mastering display + content light level) for +// an HDR session, into `*out`. [`PunktfunkStatus::NoFrame`] on timeout, [`PunktfunkStatus::Closed`] +// once the session ended. The host sends one near session start and re-sends it on mastering +// changes / keyframes; apply the latest to the display (`SetHDRMetaData` / `CAEDRMetadata` / +// `KEY_HDR_STATIC_INFO`). Only an HDR session (`punktfunk_connection_color_info` reports a PQ +// transfer) ever emits these. Same threading rules as [`punktfunk_connection_next_rumble`] (one +// puller, may run alongside the other planes). +// +// # Safety +// `c` is a valid connection handle; `out` is writable for one `PunktfunkHdrMeta`. +PunktfunkStatus punktfunk_connection_next_hdr_meta(PunktfunkConnection *c, + PunktfunkHdrMeta *out, + uint32_t timeout_ms); +#endif + +#if defined(PUNKTFUNK_FEATURE_QUIC) +// Read the session's resolved colour signalling + encode bit depth (from the host's Welcome). +// Each out pointer is filled when non-NULL: `primaries`/`transfer`/`matrix` are CICP code points +// (BT.709 = 1; BT.2020 = 9; PQ transfer = 16, HLG = 18; BT.2020-NCL matrix = 9), `full_range` is +// 0 (limited) or 1 (full), `bit_depth` is 8 or 10. A `transfer` of 16/18 means HDR — configure an +// HDR present path and drain [`punktfunk_connection_next_hdr_meta`]. Available immediately after a +// successful connect (these don't change without a reconfigure). +// +// # Safety +// `c` is a valid connection handle; each out pointer is NULL or writable for its scalar. +PunktfunkStatus punktfunk_connection_color_info(PunktfunkConnection *c, + uint8_t *primaries, + uint8_t *transfer, + uint8_t *matrix, + uint8_t *full_range, + uint8_t *bit_depth); +#endif + #if defined(PUNKTFUNK_FEATURE_QUIC) // Send one input event to the host as a QUIC datagram (non-blocking enqueue). //