fix(client-linux): VAAPI green screen on AMD — flatten NV12 planes across DRM layers
First AMD test (Steam Deck, Mesa radeonsi) showed a mostly-green image with red whites — the classic fingerprint of NV12 chroma read as 0. Root cause (confirmed against FFmpeg/GTK/mpv source): FFmpeg's VAAPI export uses VA_EXPORT_SURFACE_SEPARATE_LAYERS unconditionally, so an NV12 surface comes back as TWO single-plane layers — layers[0]=R8 (luma), layers[1]=GR88 (chroma) — sharing one object/fd, the UV plane reached via offset. map_dmabuf took layers[0] only and used its format (R8) as the GTK fourcc, so GdkDmabufTexture got a luma-only texture with the chroma plane dropped → chroma defaults to 0 → green field, red highlights. Fix (matches mpv's dmabuf_interop_gl flatten pattern): - Derive the combined fourcc from the decoder's sw_format (AVHWFramesContext.sw_format → NV12 → DRM_FORMAT_NV12), NOT from the per-plane component formats. The frame format is absent from the separate-layer descriptor and must be deduced from sw_format. - Flatten every plane across every layer in declared order (Y then UV), each with its own fd (objects[plane.object_index].fd), offset, pitch. - One-time descriptor dump (objects/layers/formats/modifier) so a new driver's real layout is visible in the logs. - Unit test locks the DRM FourCC magic numbers (NV12=0x3231564e). Software decode (swscale, reads colorspace from the VUI) was always correct, which isolated the bug to this path. PUNKTFUNK_DECODER=software is the immediate workaround on an un-rebuilt binary. Awaiting Steam Deck reconfirm (no AMD VAAPI on the NVIDIA dev box to live-verify). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -108,8 +108,14 @@ Low-latency desktop/game streaming stack, Linux-first, with a shared Rust protoc
|
|||||||
default, saved-hosts list, .deb + RPM-subpackage CI (deb.yml/rpm.yml). **VAAPI decode
|
default, saved-hosts list, .deb + RPM-subpackage CI (deb.yml/rpm.yml). **VAAPI decode
|
||||||
→ DRM-PRIME dmabuf → `GdkDmabufTexture`** (BT.709 color state; Tier-1 zero-copy on
|
→ DRM-PRIME dmabuf → `GdkDmabufTexture`** (BT.709 color state; Tier-1 zero-copy on
|
||||||
Intel/AMD, `PUNKTFUNK_DECODER=software|vaapi` override) with a proven fallback ladder —
|
Intel/AMD, `PUNKTFUNK_DECODER=software|vaapi` override) with a proven fallback ladder —
|
||||||
no VAAPI device (NVIDIA) or mid-session VAAPI error → software decode; needs an
|
no VAAPI device (NVIDIA) or mid-session VAAPI error → software decode. **First AMD test
|
||||||
Intel/AMD client box to live-verify the hw path. Next: the stage-2 raw-Wayland
|
(Steam Deck) hit a green-screen bug, fixed:** FFmpeg's VAAPI export uses
|
||||||
|
`SEPARATE_LAYERS`, so NV12 arrives as two single-plane layers (R8 luma + GR88 chroma,
|
||||||
|
one shared fd); the mapper took `layers[0]` only → GTK got a luma-only R8 texture, chroma
|
||||||
|
read as 0 → green field / red whites. Fix derives the combined fourcc from the decoder
|
||||||
|
`sw_format` (→ `DRM_FORMAT_NV12`) and flattens all planes across all layers (mpv's
|
||||||
|
pattern); a first-frame descriptor dump logs the real layout. Awaiting Steam Deck
|
||||||
|
reconfirm. Next: the stage-2 raw-Wayland
|
||||||
presenter (wp_presentation feedback, tearing-control, Vulkan Video on NVIDIA) —
|
presenter (wp_presentation feedback, tearing-control, Vulkan Video on NVIDIA) —
|
||||||
**wgpu/winit rejected** (no dmabuf import / presentation feedback / shortcuts-inhibit).
|
**wgpu/winit rejected** (no dmabuf import / presentation feedback / shortcuts-inhibit).
|
||||||
**Windows stage 1 done 2026-06-15** (`crates/punktfunk-client-windows`, binary
|
**Windows stage 1 done 2026-06-15** (`crates/punktfunk-client-windows`, binary
|
||||||
|
|||||||
@@ -43,7 +43,8 @@ pub struct CpuFrame {
|
|||||||
pub struct DmabufFrame {
|
pub struct DmabufFrame {
|
||||||
pub width: u32,
|
pub width: u32,
|
||||||
pub height: u32,
|
pub height: u32,
|
||||||
/// DRM fourcc of the layer (NV12 for 8-bit VAAPI output).
|
/// Combined DRM fourcc of the whole surface (NV12 for 8-bit VAAPI output), derived
|
||||||
|
/// from the decoder's software format — NOT the per-plane component formats.
|
||||||
pub fourcc: u32,
|
pub fourcc: u32,
|
||||||
pub modifier: u64,
|
pub modifier: u64,
|
||||||
pub planes: Vec<DmabufPlane>,
|
pub planes: Vec<DmabufPlane>,
|
||||||
@@ -292,12 +293,31 @@ impl VaapiDecoder {
|
|||||||
|
|
||||||
/// Map the VAAPI surface to DRM PRIME (zero copy) and lift the descriptor into a
|
/// Map the VAAPI surface to DRM PRIME (zero copy) and lift the descriptor into a
|
||||||
/// `DmabufFrame`. The mapped frame keeps the surface alive via its buffer refs.
|
/// `DmabufFrame`. The mapped frame keeps the surface alive via its buffer refs.
|
||||||
|
///
|
||||||
|
/// FFmpeg's VAAPI export uses `VA_EXPORT_SURFACE_SEPARATE_LAYERS`, so an NV12 surface
|
||||||
|
/// comes back as TWO layers (`R8` luma + `GR88` chroma), each one plane — NOT a single
|
||||||
|
/// `NV12` layer. The previous code took `layers[0]` only: GTK then saw an `R8`
|
||||||
|
/// single-plane texture with the chroma dropped, painting the screen green. The fix:
|
||||||
|
/// derive the COMBINED fourcc from the decoder's software pixel format (NV12 →
|
||||||
|
/// `DRM_FORMAT_NV12`) and flatten every plane across every layer in order (Y then UV).
|
||||||
unsafe fn map_dmabuf(&mut self) -> Result<DmabufFrame> {
|
unsafe fn map_dmabuf(&mut self) -> Result<DmabufFrame> {
|
||||||
use ffmpeg::ffi;
|
use ffmpeg::ffi;
|
||||||
unsafe {
|
unsafe {
|
||||||
if (*self.frame).format != ffi::AVPixelFormat::AV_PIX_FMT_VAAPI as i32 {
|
if (*self.frame).format != ffi::AVPixelFormat::AV_PIX_FMT_VAAPI as i32 {
|
||||||
bail!("decoder returned a software frame (no VAAPI surface)");
|
bail!("decoder returned a software frame (no VAAPI surface)");
|
||||||
}
|
}
|
||||||
|
// The real pixel layout lives on the hardware frames context, not the
|
||||||
|
// DRM-PRIME layer formats (those are the per-plane R8/GR88 component formats).
|
||||||
|
let sw_format = {
|
||||||
|
let hwfc = (*self.frame).hw_frames_ctx;
|
||||||
|
if hwfc.is_null() {
|
||||||
|
bail!("VAAPI frame without a hardware frames context");
|
||||||
|
}
|
||||||
|
(*((*hwfc).data as *const ffi::AVHWFramesContext)).sw_format
|
||||||
|
};
|
||||||
|
let fourcc = drm_fourcc_for(sw_format)
|
||||||
|
.ok_or_else(|| anyhow!("unsupported VAAPI output format {sw_format:?}"))?;
|
||||||
|
|
||||||
let drm = ffi::av_frame_alloc();
|
let drm = ffi::av_frame_alloc();
|
||||||
(*drm).format = ffi::AVPixelFormat::AV_PIX_FMT_DRM_PRIME as i32;
|
(*drm).format = ffi::AVPixelFormat::AV_PIX_FMT_DRM_PRIME as i32;
|
||||||
let r = ffi::av_hwframe_map(drm, self.frame, ffi::AV_HWFRAME_MAP_READ as i32);
|
let r = ffi::av_hwframe_map(drm, self.frame, ffi::AV_HWFRAME_MAP_READ as i32);
|
||||||
@@ -309,24 +329,35 @@ impl VaapiDecoder {
|
|||||||
let desc = (*drm).data[0] as *const ffi::AVDRMFrameDescriptor;
|
let desc = (*drm).data[0] as *const ffi::AVDRMFrameDescriptor;
|
||||||
let guard = DrmFrameGuard(drm);
|
let guard = DrmFrameGuard(drm);
|
||||||
let d = &*desc;
|
let d = &*desc;
|
||||||
if d.nb_layers < 1 {
|
if d.nb_layers < 1 || d.nb_objects < 1 {
|
||||||
bail!("DRM descriptor without layers");
|
bail!("DRM descriptor without layers/objects");
|
||||||
}
|
}
|
||||||
let layer = &d.layers[0];
|
|
||||||
let mut planes = Vec::with_capacity(layer.nb_planes as usize);
|
// Flatten planes across ALL layers, in declared order — the combined fourcc's
|
||||||
for p in &layer.planes[..layer.nb_planes as usize] {
|
// plane order (Y, then UV for NV12) matches the layer order FFmpeg emits.
|
||||||
let obj = &d.objects[p.object_index as usize];
|
let mut planes = Vec::new();
|
||||||
planes.push(DmabufPlane {
|
for layer in &d.layers[..d.nb_layers as usize] {
|
||||||
fd: obj.fd,
|
for p in &layer.planes[..layer.nb_planes as usize] {
|
||||||
offset: p.offset as u32,
|
let obj = &d.objects[p.object_index as usize];
|
||||||
stride: p.pitch as u32,
|
planes.push(DmabufPlane {
|
||||||
});
|
fd: obj.fd,
|
||||||
|
offset: p.offset as u32,
|
||||||
|
stride: p.pitch as u32,
|
||||||
|
});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// The whole surface shares one tiling modifier (one BO on radeonsi); GTK takes
|
||||||
|
// a single modifier for the texture.
|
||||||
|
let modifier = d.objects[0].format_modifier;
|
||||||
|
|
||||||
|
log_descriptor_once(d, sw_format, fourcc, modifier);
|
||||||
|
|
||||||
Ok(DmabufFrame {
|
Ok(DmabufFrame {
|
||||||
width: (*self.frame).width as u32,
|
width: (*self.frame).width as u32,
|
||||||
height: (*self.frame).height as u32,
|
height: (*self.frame).height as u32,
|
||||||
fourcc: layer.format,
|
fourcc,
|
||||||
modifier: d.objects[0].format_modifier,
|
modifier,
|
||||||
planes,
|
planes,
|
||||||
guard,
|
guard,
|
||||||
})
|
})
|
||||||
@@ -334,6 +365,50 @@ impl VaapiDecoder {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// `fourcc(a,b,c,d)` — the DRM FourCC packing (little-endian, `a | b<<8 | c<<16 | d<<24`).
|
||||||
|
const fn fourcc(a: u8, b: u8, c: u8, d: u8) -> u32 {
|
||||||
|
(a as u32) | ((b as u32) << 8) | ((c as u32) << 16) | ((d as u32) << 24)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The combined DRM FourCC for a decoder software pixel format. The host streams 8-bit
|
||||||
|
/// 4:2:0 (NV12); P010 is here for the eventual 10-bit/HDR path.
|
||||||
|
fn drm_fourcc_for(sw: ffmpeg_next::ffi::AVPixelFormat) -> Option<u32> {
|
||||||
|
use ffmpeg_next::ffi::AVPixelFormat::*;
|
||||||
|
Some(match sw {
|
||||||
|
AV_PIX_FMT_NV12 => fourcc(b'N', b'V', b'1', b'2'),
|
||||||
|
AV_PIX_FMT_P010LE => fourcc(b'P', b'0', b'1', b'0'),
|
||||||
|
_ => return None,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// One-time dump of the DRM descriptor layout (objects, layers, planes, modifier) — so a
|
||||||
|
/// new client/driver combination's real layout is visible in the logs without a debugger.
|
||||||
|
fn log_descriptor_once(
|
||||||
|
d: &ffmpeg_next::ffi::AVDRMFrameDescriptor,
|
||||||
|
sw: ffmpeg_next::ffi::AVPixelFormat,
|
||||||
|
fourcc: u32,
|
||||||
|
modifier: u64,
|
||||||
|
) {
|
||||||
|
use std::sync::atomic::{AtomicBool, Ordering};
|
||||||
|
static ONCE: AtomicBool = AtomicBool::new(true);
|
||||||
|
if !ONCE.swap(false, Ordering::Relaxed) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
let layers: Vec<(u32, i32)> = d.layers[..d.nb_layers.max(0) as usize]
|
||||||
|
.iter()
|
||||||
|
.map(|l| (l.format, l.nb_planes))
|
||||||
|
.collect();
|
||||||
|
tracing::info!(
|
||||||
|
sw_format = ?sw,
|
||||||
|
chosen_fourcc = format_args!("{:#010x}", fourcc),
|
||||||
|
nb_objects = d.nb_objects,
|
||||||
|
nb_layers = d.nb_layers,
|
||||||
|
?layers,
|
||||||
|
modifier = format_args!("{:#018x}", modifier),
|
||||||
|
"VAAPI dmabuf descriptor layout (first frame)"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
impl Drop for VaapiDecoder {
|
impl Drop for VaapiDecoder {
|
||||||
fn drop(&mut self) {
|
fn drop(&mut self) {
|
||||||
use ffmpeg::ffi;
|
use ffmpeg::ffi;
|
||||||
@@ -345,3 +420,24 @@ impl Drop for VaapiDecoder {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
/// Lock the DRM FourCC magic numbers against typos — these are the exact values
|
||||||
|
/// `<drm_fourcc.h>` defines, and a wrong one is what painted the Steam Deck green.
|
||||||
|
#[test]
|
||||||
|
fn drm_fourcc_constants() {
|
||||||
|
assert_eq!(fourcc(b'N', b'V', b'1', b'2'), 0x3231_564e);
|
||||||
|
assert_eq!(fourcc(b'P', b'0', b'1', b'0'), 0x3031_3050);
|
||||||
|
assert_eq!(
|
||||||
|
drm_fourcc_for(ffmpeg::ffi::AVPixelFormat::AV_PIX_FMT_NV12),
|
||||||
|
Some(0x3231_564e)
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
drm_fourcc_for(ffmpeg::ffi::AVPixelFormat::AV_PIX_FMT_RGBA),
|
||||||
|
None
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user