Files
punktfunk/clients/linux/src/video_gl.rs
T
enricobuehler b488bd1d99 feat(client-linux): in-process GL presenter — hardware decode ships on the Steam Deck
VAAPI decode stays; what changes is who touches the YUV. The direct path hands
the NV12 dmabuf (tiled AMD modifier since Mesa 25.1) to GdkDmabufTexture, and
GTK's tiled-NV12 import renders corrupt/gray/washed-out on the Deck. Moonlight
and mpv are clean on the same box because they import the dmabuf into their own
EGL context and convert with their own shader — video_gl.rs is that
architecture for the GTK client: per-plane EGLImages (R8 + GR88, modifier
passed through) → our YUV→RGB shader (matrix/range from the stream's CICP
signaling, unit-tested) → RGBA texture in a GdkGLContext-shared context →
fence-synced GdkGLTexture. GTK composites plain RGBA; no YUV negotiation, no
compositor CSC.

The Deck's decoder default flips back to hardware (the software stopgap is
gone); desktops keep the direct dmabuf path (offload/scan-out eligible).
PUNKTFUNK_PRESENT=direct|gl overrides either way. New failure ladder: GL
converter init failure or a convert-error streak raises a shared flag and the
session pump demotes the decoder to software with a keyframe re-request — the
same mechanism also closes the old silent-black-screen gap where a rejected
dmabuf import had no recovery at all.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-07-04 12:00:18 +00:00

663 lines
26 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
//! VAAPI dmabuf → RGBA GL texture converter — the Steam Deck's hardware-decode presenter.
//!
//! The direct path hands the decoder's NV12 dmabuf (fds + AMD tiled modifier) to
//! `GdkDmabufTexture` and lets GTK import + color-convert it. On the Deck that renders
//! corrupt/gray/washed-out: since Mesa 25.1 radeonsi exports VCN decode surfaces TILED, and
//! GTK's tiled-NV12 import mishandles the layout (the Flatpak runtime's Mesa drives both
//! sides). Moonlight-qt and mpv are clean on the same box because they never let a toolkit
//! near the YUV: they import the dmabuf into their own EGL context and convert with their
//! own shader. This module is that architecture for the GTK client:
//!
//! VAAPI frame → per-plane `EGLImage`s (R8 luma + GR88 chroma, modifier passed through)
//! → our YUV→RGB shader (matrix + range from the stream's real CICP signaling)
//! → an RGBA texture in a `GdkGLContext`-shared context → `GdkGLTexture` (fence-synced).
//!
//! GTK then composites a plain RGBA texture — no YUV format negotiation, no modifier
//! handling, no compositor CSC. Same-Mesa export/import is the exact proven-working path.
//! Everything runs on the GTK main thread (the converter is driven by the frame consumer);
//! one 800p4K NV12→RGB pass is sub-millisecond GPU work.
//!
//! Failure at any step (GLX-backed GDK context, missing EGL extensions, import rejection)
//! is surfaced as an error — the caller falls back to software decode, never to the broken
//! direct path.
use crate::video::{ColorDesc, DmabufFrame};
use anyhow::{anyhow, bail, Context as _, Result};
use gtk::{gdk, prelude::*};
use khronos_egl as egl;
use std::ffi::c_void;
use std::sync::{Arc, Mutex};
// --- EGL_EXT_image_dma_buf_import(+_modifiers) constants (khronos-egl exposes none) ------
const EGL_LINUX_DMA_BUF_EXT: egl::Enum = 0x3270;
const EGL_LINUX_DRM_FOURCC_EXT: usize = 0x3271;
const EGL_DMA_BUF_PLANE0_FD_EXT: usize = 0x3272;
const EGL_DMA_BUF_PLANE0_OFFSET_EXT: usize = 0x3273;
const EGL_DMA_BUF_PLANE0_PITCH_EXT: usize = 0x3274;
const EGL_DMA_BUF_PLANE0_MODIFIER_LO_EXT: usize = 0x3443;
const EGL_DMA_BUF_PLANE0_MODIFIER_HI_EXT: usize = 0x3444;
const EGL_WIDTH: usize = 0x3057;
const EGL_HEIGHT: usize = 0x3056;
const EGL_NONE: usize = 0x3038;
const DRM_FORMAT_MOD_INVALID: u64 = 0x00ff_ffff_ffff_ffff;
/// `fourcc('N','V','1','2')` — the only decoder output today (8-bit 4:2:0). P010 joins when
/// the Linux host grows 10-bit.
const DRM_FORMAT_NV12: u32 = 0x3231_564e;
const DRM_FORMAT_R8: u32 = 0x2020_3852;
const DRM_FORMAT_GR88: u32 = 0x3838_5247;
// --- The slice of GL we use (loaded via eglGetProcAddress — Mesa/NVIDIA both implement
// --- EGL_KHR_get_all_proc_addresses, so core functions resolve too) ----------------------
const GL_TEXTURE_2D: u32 = 0x0DE1;
const GL_TEXTURE0: u32 = 0x84C0;
const GL_TEXTURE_MIN_FILTER: u32 = 0x2801;
const GL_TEXTURE_MAG_FILTER: u32 = 0x2800;
const GL_TEXTURE_WRAP_S: u32 = 0x2802;
const GL_TEXTURE_WRAP_T: u32 = 0x2803;
const GL_LINEAR: i32 = 0x2601;
const GL_CLAMP_TO_EDGE: i32 = 0x812F;
const GL_FRAMEBUFFER: u32 = 0x8D40;
const GL_COLOR_ATTACHMENT0: u32 = 0x8CE0;
const GL_FRAMEBUFFER_COMPLETE: u32 = 0x8CD5;
const GL_RGBA8: u32 = 0x8058;
const GL_RGBA: u32 = 0x1908;
const GL_UNSIGNED_BYTE: u32 = 0x1401;
const GL_TRIANGLES: u32 = 0x0004;
const GL_VERTEX_SHADER: u32 = 0x8B31;
const GL_FRAGMENT_SHADER: u32 = 0x8B30;
const GL_COMPILE_STATUS: u32 = 0x8B81;
const GL_LINK_STATUS: u32 = 0x8B82;
const GL_SYNC_GPU_COMMANDS_COMPLETE: u32 = 0x9117;
macro_rules! gl_fns {
($($name:ident : fn($($arg:ty),*) $(-> $ret:ty)?;)*) => {
#[allow(non_snake_case)]
struct GlFns { $($name: unsafe extern "C" fn($($arg),*) $(-> $ret)?,)* }
impl GlFns {
#[allow(non_snake_case)]
fn load(egl: &Egl) -> Result<GlFns> {
$(
// eglGetProcAddress returns a plain fn pointer; the signature is fixed
// by the GL spec for each name.
let $name = egl
.get_proc_address(concat!("gl", stringify!($name)))
.ok_or_else(|| anyhow!(concat!("gl", stringify!($name), " unresolvable")))?;
)*
// SAFETY: each pointer came from eglGetProcAddress for exactly that GL entry
// point; the transmute only fixes the signature the spec defines for it.
unsafe {
Ok(GlFns { $($name: std::mem::transmute::<extern "system" fn(), unsafe extern "C" fn($($arg),*) $(-> $ret)?>($name),)* })
}
}
}
};
}
gl_fns! {
GenTextures: fn(i32, *mut u32);
DeleteTextures: fn(i32, *const u32);
BindTexture: fn(u32, u32);
TexParameteri: fn(u32, u32, i32);
TexImage2D: fn(u32, i32, i32, i32, i32, i32, u32, u32, *const c_void);
ActiveTexture: fn(u32);
EGLImageTargetTexture2DOES: fn(u32, *const c_void);
GenFramebuffers: fn(i32, *mut u32);
DeleteFramebuffers: fn(i32, *const u32);
BindFramebuffer: fn(u32, u32);
FramebufferTexture2D: fn(u32, u32, u32, u32, i32);
CheckFramebufferStatus: fn(u32) -> u32;
Viewport: fn(i32, i32, i32, i32);
CreateShader: fn(u32) -> u32;
ShaderSource: fn(u32, i32, *const *const u8, *const i32);
CompileShader: fn(u32);
GetShaderiv: fn(u32, u32, *mut i32);
GetShaderInfoLog: fn(u32, i32, *mut i32, *mut u8);
DeleteShader: fn(u32);
CreateProgram: fn() -> u32;
AttachShader: fn(u32, u32);
LinkProgram: fn(u32);
GetProgramiv: fn(u32, u32, *mut i32);
UseProgram: fn(u32);
GetUniformLocation: fn(u32, *const u8) -> i32;
Uniform1i: fn(i32, i32);
Uniform3fv: fn(i32, i32, *const f32);
UniformMatrix3fv: fn(i32, i32, u8, *const f32);
GenVertexArrays: fn(i32, *mut u32);
DeleteVertexArrays: fn(i32, *const u32);
DeleteProgram: fn(u32);
BindVertexArray: fn(u32);
DrawArrays: fn(u32, i32, i32);
FenceSync: fn(u32, u32) -> *const c_void;
DeleteSync: fn(*const c_void);
Flush: fn();
GetError: fn() -> u32;
}
type Egl = egl::DynamicInstance<egl::EGL1_4>;
type EglCreateImageKhr = unsafe extern "C" fn(
*mut c_void, // EGLDisplay
*mut c_void, // EGLContext (EGL_NO_CONTEXT for dmabuf)
egl::Enum,
*mut c_void, // EGLClientBuffer (null for dmabuf)
*const usize,
) -> *const c_void;
type EglDestroyImageKhr = unsafe extern "C" fn(*mut c_void, *const c_void) -> egl::Boolean;
/// The YUV→RGB conversion for a stream's CICP signaling: `rgb = mat * (yuv + off)`, with the
/// limited/full-range expansion folded in. `mat` is column-major (GL convention). Pure —
/// unit-tested against the reference white/black points.
pub fn yuv_to_rgb(desc: ColorDesc) -> ([f32; 9], [f32; 3]) {
// BT.601 (5/6), BT.2020 (9/10); everything else — incl. unspecified — is the host's
// BT.709 SDR default (mirrors the software path's swscale coefficient choice).
let (kr, kb) = match desc.matrix {
5 | 6 => (0.299, 0.114),
9 | 10 => (0.2627, 0.0593),
_ => (0.2126, 0.0722),
};
let kg = 1.0 - kr - kb;
let (sy, oy, sc) = if desc.full_range {
(1.0f32, 0.0f32, 1.0f32)
} else {
(255.0 / 219.0, -16.0 / 255.0, 255.0 / 224.0)
};
let (kr, kb, kg) = (kr as f32, kb as f32, kg as f32);
// Column-major: columns are the Y, U, V contributions to (R, G, B).
let mat = [
sy,
sy,
sy, // Y column
0.0,
-2.0 * (1.0 - kb) * kb / kg * sc,
2.0 * (1.0 - kb) * sc, // U column
2.0 * (1.0 - kr) * sc,
-2.0 * (1.0 - kr) * kr / kg * sc,
0.0, // V column
];
(mat, [oy, -0.5, -0.5])
}
/// An output texture GTK has released, waiting to be recycled (or its fence deleted). GL
/// objects can only be touched with our context current, so releases park here and
/// [`GlConverter::convert`] drains them.
struct Retired {
tex: u32,
sync: usize, // GLsync as usize — the release closure must be Send
size: (u32, u32),
}
pub struct GlConverter {
ctx: gdk::GLContext,
egl: Egl,
egl_display: *mut c_void,
create_image: EglCreateImageKhr,
destroy_image: EglDestroyImageKhr,
gl: GlFns,
program: u32,
vao: u32,
fbo: u32,
u_mat: i32,
u_off: i32,
/// Uniforms match this signaling; a change (mid-stream SDR↔HDR) re-uploads them.
uniforms_for: Option<ColorDesc>,
/// Free output textures + fences returned by GTK's release funcs (shared with the
/// `Send` release closures; drained/recycled at each convert).
retired: Arc<Mutex<Vec<Retired>>>,
}
impl GlConverter {
/// Build against the widget's display. Must run on the GTK main thread; fails cleanly
/// on a GLX-backed GDK context or missing EGL dmabuf-import extensions (the caller
/// falls back to software decode).
pub fn new(widget: &impl IsA<gtk::Widget>) -> Result<GlConverter> {
let display = widget.display();
let ctx = display.create_gl_context().context("create GdkGLContext")?;
ctx.realize().context("realize GdkGLContext")?;
ctx.make_current();
// SAFETY (whole block): the GdkGLContext is current on this thread, so EGL/GL
// queries and object creation target it; pointers are only used while it lives.
unsafe {
let egl = Egl::load_required().context("dlopen libEGL")?;
let egl_display = egl
.get_current_display()
.ok_or_else(|| anyhow!("GDK context is not EGL-backed (GLX?)"))?;
let exts = egl
.query_string(Some(egl_display), egl::EXTENSIONS)
.context("EGL_EXTENSIONS")?
.to_string_lossy()
.into_owned();
for need in ["EGL_EXT_image_dma_buf_import", "EGL_KHR_image_base"] {
if !exts.contains(need) {
bail!("EGL lacks {need}");
}
}
// Tiled surfaces carry an explicit modifier — without the _modifiers extension
// the import would silently assume implied/linear and sample garbage.
if !exts.contains("EGL_EXT_image_dma_buf_import_modifiers") {
bail!("EGL lacks EGL_EXT_image_dma_buf_import_modifiers");
}
let create_image: EglCreateImageKhr =
std::mem::transmute::<extern "system" fn(), EglCreateImageKhr>(
egl.get_proc_address("eglCreateImageKHR")
.ok_or_else(|| anyhow!("no eglCreateImageKHR"))?,
);
let destroy_image: EglDestroyImageKhr =
std::mem::transmute::<extern "system" fn(), EglDestroyImageKhr>(
egl.get_proc_address("eglDestroyImageKHR")
.ok_or_else(|| anyhow!("no eglDestroyImageKHR"))?,
);
let gl = GlFns::load(&egl)?;
let es = ctx.api().contains(gdk::GLAPI::GLES);
let program = build_program(&gl, es)?;
(gl.UseProgram)(program);
let u_mat = (gl.GetUniformLocation)(program, c"u_mat".as_ptr() as *const u8);
let u_off = (gl.GetUniformLocation)(program, c"u_off".as_ptr() as *const u8);
let u_y = (gl.GetUniformLocation)(program, c"u_y".as_ptr() as *const u8);
let u_c = (gl.GetUniformLocation)(program, c"u_c".as_ptr() as *const u8);
(gl.Uniform1i)(u_y, 0);
(gl.Uniform1i)(u_c, 1);
let mut vao = 0u32;
(gl.GenVertexArrays)(1, &mut vao);
let mut fbo = 0u32;
(gl.GenFramebuffers)(1, &mut fbo);
tracing::info!(
gles = es,
"GL presenter ready — VAAPI dmabufs convert in-process (own EGL import + shader)"
);
Ok(GlConverter {
ctx,
egl,
egl_display: egl_display.as_ptr(),
create_image,
destroy_image,
gl,
program,
vao,
fbo,
u_mat,
u_off,
uniforms_for: None,
retired: Arc::new(Mutex::new(Vec::new())),
})
}
}
/// Convert one decoded frame into an RGBA `GdkTexture`. The source surface (guard) is
/// held until GTK releases the output texture — the GPU read is long finished by then.
/// `color_state` tags the output (full-range RGB, transfer left baked — same semantics
/// as the software path's tagged `GdkMemoryTexture`); `None` = untagged sRGB.
pub fn convert(
&mut self,
frame: DmabufFrame,
color_state: Option<&gdk::ColorState>,
) -> Result<gdk::Texture> {
if frame.fourcc != DRM_FORMAT_NV12 {
bail!("GL presenter handles NV12 only (got {:#x})", frame.fourcc);
}
if frame.planes.len() < 2 {
bail!("NV12 needs 2 planes (got {})", frame.planes.len());
}
self.ctx.make_current();
let gl = &self.gl;
// SAFETY (whole body): our context is current; every GL/EGL object created here is
// either destroyed before return or owned by the pool/release machinery.
unsafe {
// Recycle what GTK released since last frame (GL objects need the context, so
// the release closures only park entries — this is where they die/revive).
let size = (frame.width, frame.height);
let mut out_tex = 0u32;
{
let mut retired = self.retired.lock().unwrap();
retired.retain_mut(|r| {
if r.sync != 0 {
(gl.DeleteSync)(r.sync as *const c_void);
r.sync = 0;
}
if out_tex == 0 && r.size == size {
out_tex = r.tex;
false
} else if r.size != size {
(gl.DeleteTextures)(1, &r.tex); // stale size (mode change)
false
} else {
true // spare same-size texture for a later frame
}
});
}
if out_tex == 0 {
(gl.GenTextures)(1, &mut out_tex);
(gl.BindTexture)(GL_TEXTURE_2D, out_tex);
(gl.TexParameteri)(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
(gl.TexParameteri)(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
(gl.TexImage2D)(
GL_TEXTURE_2D,
0,
GL_RGBA8 as i32,
frame.width as i32,
frame.height as i32,
0,
GL_RGBA,
GL_UNSIGNED_BYTE,
std::ptr::null(),
);
}
// Import both planes with the surface's modifier — exactly the layer-wise
// import Moonlight/mpv drive on this hardware.
let y = &frame.planes[0];
let c = &frame.planes[1];
let img_y =
self.plane_image(frame.width, frame.height, DRM_FORMAT_R8, y, frame.modifier)?;
let img_c = match self.plane_image(
frame.width.div_ceil(2),
frame.height.div_ceil(2),
DRM_FORMAT_GR88,
c,
frame.modifier,
) {
Ok(img) => img,
Err(e) => {
(self.destroy_image)(self.egl_display, img_y);
return Err(e);
}
};
let mut planes = [0u32; 2];
(gl.GenTextures)(2, planes.as_mut_ptr());
for (tex, img) in planes.iter().zip([img_y, img_c]) {
(gl.BindTexture)(GL_TEXTURE_2D, *tex);
(gl.TexParameteri)(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
(gl.TexParameteri)(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
(gl.TexParameteri)(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
(gl.TexParameteri)(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
(gl.EGLImageTargetTexture2DOES)(GL_TEXTURE_2D, img);
}
(gl.UseProgram)(self.program);
if self.uniforms_for != Some(frame.color) {
let (mat, off) = yuv_to_rgb(frame.color);
(gl.UniformMatrix3fv)(self.u_mat, 1, 0, mat.as_ptr());
(gl.Uniform3fv)(self.u_off, 1, off.as_ptr());
self.uniforms_for = Some(frame.color);
}
(gl.BindFramebuffer)(GL_FRAMEBUFFER, self.fbo);
(gl.FramebufferTexture2D)(
GL_FRAMEBUFFER,
GL_COLOR_ATTACHMENT0,
GL_TEXTURE_2D,
out_tex,
0,
);
let status = (gl.CheckFramebufferStatus)(GL_FRAMEBUFFER);
if status != GL_FRAMEBUFFER_COMPLETE {
(gl.BindFramebuffer)(GL_FRAMEBUFFER, 0);
(gl.DeleteTextures)(2, planes.as_ptr());
(self.destroy_image)(self.egl_display, img_y);
(self.destroy_image)(self.egl_display, img_c);
(gl.DeleteTextures)(1, &out_tex);
bail!("FBO incomplete ({status:#x})");
}
(gl.Viewport)(0, 0, frame.width as i32, frame.height as i32);
(gl.BindVertexArray)(self.vao);
(gl.ActiveTexture)(GL_TEXTURE0);
(gl.BindTexture)(GL_TEXTURE_2D, planes[0]);
(gl.ActiveTexture)(GL_TEXTURE0 + 1);
(gl.BindTexture)(GL_TEXTURE_2D, planes[1]);
(gl.DrawArrays)(GL_TRIANGLES, 0, 3);
(gl.BindFramebuffer)(GL_FRAMEBUFFER, 0);
let sync = (gl.FenceSync)(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
(gl.Flush)();
// The draw is queued: plane textures + images can go now (the driver keeps the
// underlying buffers alive until the queued commands execute).
(gl.DeleteTextures)(2, planes.as_ptr());
(self.destroy_image)(self.egl_display, img_y);
(self.destroy_image)(self.egl_display, img_c);
let err = (gl.GetError)();
if err != 0 {
(gl.DeleteTextures)(1, &out_tex);
bail!("GL error {err:#x} during convert");
}
let mut b = gdk::GLTextureBuilder::new()
.set_context(Some(&self.ctx))
.set_id(out_tex)
.set_width(frame.width as i32)
.set_height(frame.height as i32)
.set_format(gdk::MemoryFormat::R8g8b8a8)
.set_sync(Some(sync));
if let Some(state) = color_state {
b = b.set_color_state(state);
}
let retired = self.retired.clone();
let guard = frame.guard;
let sync_bits = sync as usize; // GLsync as usize — the closure must be Send
let texture = b.build_with_release_func(move || {
drop(guard); // the decoder surface outlived every GPU read of it
retired.lock().unwrap().push(Retired {
tex: out_tex,
sync: sync_bits,
size,
});
});
Ok(texture)
}
}
/// One single-plane `EGLImage` over a dmabuf plane (R8 luma / GR88 chroma), modifier
/// passed explicitly.
///
/// # Safety
/// `self.ctx` must be current; the fd stays owned by the caller (EGL dups internally).
unsafe fn plane_image(
&self,
width: u32,
height: u32,
fourcc: u32,
plane: &crate::video::DmabufPlane,
modifier: u64,
) -> Result<*const c_void> {
let mut attribs = vec![
EGL_WIDTH,
width as usize,
EGL_HEIGHT,
height as usize,
EGL_LINUX_DRM_FOURCC_EXT,
fourcc as usize,
EGL_DMA_BUF_PLANE0_FD_EXT,
plane.fd as usize,
EGL_DMA_BUF_PLANE0_OFFSET_EXT,
plane.offset as usize,
EGL_DMA_BUF_PLANE0_PITCH_EXT,
plane.stride as usize,
];
if modifier != DRM_FORMAT_MOD_INVALID && modifier != 0 {
attribs.extend_from_slice(&[
EGL_DMA_BUF_PLANE0_MODIFIER_LO_EXT,
(modifier & 0xffff_ffff) as usize,
EGL_DMA_BUF_PLANE0_MODIFIER_HI_EXT,
(modifier >> 32) as usize,
]);
}
attribs.push(EGL_NONE);
// SAFETY: attribs is a valid EGL_NONE-terminated list; display/context are live.
let img = unsafe {
(self.create_image)(
self.egl_display,
std::ptr::null_mut(), // EGL_NO_CONTEXT — dmabuf import
EGL_LINUX_DMA_BUF_EXT,
std::ptr::null_mut(),
attribs.as_ptr(),
)
};
if img.is_null() {
bail!(
"eglCreateImageKHR rejected plane ({}x{} {:#x} mod {:#018x}): {:#x}",
width,
height,
fourcc,
modifier,
self.egl.get_error().map(|e| e as u32).unwrap_or(0)
);
}
Ok(img)
}
}
impl Drop for GlConverter {
/// Delete our objects from the shared context group (the context lives in GDK's share
/// group — per-session leftovers would pile up across sessions). Textures GTK still
/// holds at this moment release into `retired` afterwards, where nobody drains them:
/// those names leak, but it's ≤ the pool depth once per session, not per frame.
fn drop(&mut self) {
self.ctx.make_current();
let gl = &self.gl;
// SAFETY: context current; only objects this converter created are deleted.
unsafe {
for r in self.retired.lock().unwrap().drain(..) {
if r.sync != 0 {
(gl.DeleteSync)(r.sync as *const c_void);
}
(gl.DeleteTextures)(1, &r.tex);
}
(gl.DeleteFramebuffers)(1, &self.fbo);
(gl.DeleteVertexArrays)(1, &self.vao);
(gl.DeleteProgram)(self.program);
}
}
}
/// Compile the fullscreen-triangle NV12→RGB program (GLSL 300 es / 330 core per the GDK
/// context's API). `gl_VertexID` drives the geometry — no buffers at all.
///
/// # Safety
/// A GL context must be current; `gl` must belong to it.
unsafe fn build_program(gl: &GlFns, es: bool) -> Result<u32> {
let header = if es {
"#version 300 es\nprecision highp float;\n"
} else {
"#version 330 core\n"
};
let vs_src = format!(
"{header}
out vec2 v_uv;
void main() {{
vec2 p = vec2(float((gl_VertexID & 1) << 2) - 1.0, float((gl_VertexID & 2) << 1) - 1.0);
v_uv = p * 0.5 + 0.5;
gl_Position = vec4(p, 0.0, 1.0);
}}"
);
let fs_src = format!(
"{header}
in vec2 v_uv;
out vec4 frag;
uniform sampler2D u_y;
uniform sampler2D u_c;
uniform mat3 u_mat;
uniform vec3 u_off;
void main() {{
vec3 yuv = vec3(texture(u_y, v_uv).r, texture(u_c, v_uv).rg);
frag = vec4(clamp(u_mat * (yuv + u_off), 0.0, 1.0), 1.0);
}}"
);
// SAFETY: caller holds a current context; sources are valid UTF-8 with explicit lengths.
unsafe {
let compile = |kind: u32, src: &str| -> Result<u32> {
let sh = (gl.CreateShader)(kind);
let ptr = src.as_ptr();
let len = src.len() as i32;
(gl.ShaderSource)(sh, 1, &ptr, &len);
(gl.CompileShader)(sh);
let mut ok = 0i32;
(gl.GetShaderiv)(sh, GL_COMPILE_STATUS, &mut ok);
if ok == 0 {
let mut log = vec![0u8; 1024];
let mut n = 0i32;
(gl.GetShaderInfoLog)(sh, 1024, &mut n, log.as_mut_ptr());
(gl.DeleteShader)(sh);
bail!(
"shader compile: {}",
String::from_utf8_lossy(&log[..n.max(0) as usize])
);
}
Ok(sh)
};
let vs = compile(GL_VERTEX_SHADER, &vs_src)?;
let fs = match compile(GL_FRAGMENT_SHADER, &fs_src) {
Ok(fs) => fs,
Err(e) => {
(gl.DeleteShader)(vs);
return Err(e);
}
};
let prog = (gl.CreateProgram)();
(gl.AttachShader)(prog, vs);
(gl.AttachShader)(prog, fs);
(gl.LinkProgram)(prog);
(gl.DeleteShader)(vs);
(gl.DeleteShader)(fs);
let mut ok = 0i32;
(gl.GetProgramiv)(prog, GL_LINK_STATUS, &mut ok);
if ok == 0 {
bail!("program link failed");
}
Ok(prog)
}
}
#[cfg(test)]
mod tests {
use super::*;
fn desc(matrix: u8, full_range: bool) -> ColorDesc {
ColorDesc {
primaries: 1,
transfer: 1,
matrix,
full_range,
}
}
fn apply(mat: &[f32; 9], off: &[f32; 3], yuv: [f32; 3]) -> [f32; 3] {
let v = [yuv[0] + off[0], yuv[1] + off[1], yuv[2] + off[2]];
// Column-major: out[r] = Σ mat[col*3 + r] * v[col]
core::array::from_fn(|r| (0..3).map(|c| mat[c * 3 + r] * v[c]).sum())
}
/// Reference white (Y=235, U=V=128 limited) → RGB 1.0; reference black (Y=16) → 0.0.
#[test]
fn bt709_limited_white_black() {
let (mat, off) = yuv_to_rgb(desc(1, false));
let white = apply(&mat, &off, [235.0 / 255.0, 128.0 / 255.0, 128.0 / 255.0]);
let black = apply(&mat, &off, [16.0 / 255.0, 128.0 / 255.0, 128.0 / 255.0]);
for (w, b) in white.iter().zip(black) {
assert!((w - 1.0).abs() < 0.005, "white {white:?}");
assert!(b.abs() < 0.005, "black {black:?}");
}
}
/// Full-range identity points: Y=1 → white, Y=0 → black, and a 601-vs-709 red spot
/// check (pure V excursion produces R = 2(1Kr)·0.5).
#[test]
fn full_range_and_red_excursion() {
let (mat, off) = yuv_to_rgb(desc(5, true));
let white = apply(&mat, &off, [1.0, 0.5, 0.5]);
assert!(white.iter().all(|v| (v - 1.0).abs() < 1e-5), "{white:?}");
let red = apply(&mat, &off, [0.0, 0.5, 1.0]);
assert!((red[0] - 2.0 * (1.0 - 0.299) * 0.5).abs() < 1e-4, "{red:?}");
// 709 differs from 601 in the same spot — guards the matrix-code dispatch.
let (mat709, off709) = yuv_to_rgb(desc(1, true));
let red709 = apply(&mat709, &off709, [0.0, 0.5, 1.0]);
assert!(
(red709[0] - 2.0 * (1.0 - 0.2126) * 0.5).abs() < 1e-4,
"{red709:?}"
);
assert!((red[0] - red709[0]).abs() > 0.05);
}
}