feat: M2 zero-copy — PipeWire dmabuf negotiation + EGL device-platform import (WIP)

Wire the capture side of zero-copy (LUMEN_ZEROCOPY=1): - EGL importer now opens the headless EGLDisplay on the NVIDIA EGL device (EGL_PLATFORM_DEVICE_EXT) and queries its importable DRM modifiers (eglQueryDmaBufModifiersEXT). - The PipeWire stream advertises a BGRx dmabuf format with those modifiers as a mandatory enum Choice + a dmabuf-only Buffers param; the compositor fixates an importable tiled modifier. param_changed reads the negotiated modifier; the process callback imports the dmabuf (eglCreateImage with explicit LO/HI modifier) and would copy it into a CUDA buffer for the encoder. Validated against headless KWin (Plasma 6.4): negotiation succeeds (13 NVIDIA modifiers advertised, KWin fixates one, stream reaches Streaming with a real tiled dmabuf) and `eglCreateImage` succeeds. The remaining blocker is `cuGraphicsEGLRegisterImage` returning CUDA_ERROR_INVALID_VALUE on the dmabuf-imported EGLImage — the likely fix is to bind the EGLImage to a GL texture (glEGLImageTargetTexture2DOES) and register that via cuGraphicsGLRegisterImage (OBS/Sunshine's path), which needs a GL context. The CPU-copy path stays the default and is unaffected (regression-checked: real KWin capture → HEVC). LUMEN_ZEROCOPY is opt-in/experimental until the CUDA registration lands. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-09 15:41:31 +00:00
parent 16a00563a8
commit e3876c0d8a
4 changed files with 325 additions and 70 deletions
@@ -65,10 +65,13 @@ impl PortalCapturer {
        let (frame_tx, frame_rx) = sync_channel::<CapturedFrame>(8);
        let active = Arc::new(AtomicBool::new(false));
        let active_cb = active.clone();
+        let zerocopy = crate::zerocopy::enabled();
        thread::Builder::new()
            .name("lumen-pipewire".into())
            .spawn(move || {
-                if let Err(e) = pipewire::pipewire_thread(fd, node_id, frame_tx, active_cb) {
+                if let Err(e) =
+                    pipewire::pipewire_thread(fd, node_id, frame_tx, active_cb, zerocopy)
+                {
                    tracing::error!(error = %format!("{e:#}"), "pipewire capture thread failed");
                }
            })
@@ -328,9 +331,91 @@ mod pipewire {
        info: VideoInfoRaw,
        /// Negotiated layout (`None` until param_changed, or if unsupported).
        format: Option<PixelFormat>,
+        /// Negotiated DRM format modifier (for dmabuf import); 0 = LINEAR.
+        modifier: u64,
        tx: SyncSender<CapturedFrame>,
        /// When false (no active stream), skip the de-pad copy — the buffer is just released.
        active: Arc<AtomicBool>,
+        /// Present when zero-copy is enabled: imports a dmabuf → CUDA device buffer.
+        importer: Option<crate::zerocopy::EglImporter>,
+    }
+
+    fn serialize_pod(obj: pw::spa::pod::Object) -> Result<Vec<u8>> {
+        Ok(pw::spa::pod::serialize::PodSerializer::serialize(
+            std::io::Cursor::new(Vec::new()),
+            &pw::spa::pod::Value::Object(obj),
+        )
+        .context("serialize pod")?
+        .0
+        .into_inner())
+    }
+
+    /// Build a BGRx dmabuf `EnumFormat` pod advertising the EGL-importable `modifiers` as a
+    /// mandatory enum Choice; the compositor fixates to one of them that it can allocate, which
+    /// we read back in `param_changed`.
+    fn build_dmabuf_format(modifiers: &[u64]) -> Result<Vec<u8>> {
+        use pw::spa::param::format::{FormatProperties, MediaSubtype, MediaType};
+        let mut obj = pw::spa::pod::object!(
+            pw::spa::utils::SpaTypes::ObjectParamFormat,
+            pw::spa::param::ParamType::EnumFormat,
+            pw::spa::pod::property!(FormatProperties::MediaType, Id, MediaType::Video),
+            pw::spa::pod::property!(FormatProperties::MediaSubtype, Id, MediaSubtype::Raw),
+            pw::spa::pod::property!(FormatProperties::VideoFormat, Id, VideoFormat::BGRx),
+            pw::spa::pod::property!(
+                FormatProperties::VideoSize,
+                Choice,
+                Range,
+                Rectangle,
+                pw::spa::utils::Rectangle {
+                    width: 1920,
+                    height: 1080
+                },
+                pw::spa::utils::Rectangle {
+                    width: 1,
+                    height: 1
+                },
+                pw::spa::utils::Rectangle {
+                    width: 8192,
+                    height: 8192
+                }
+            ),
+            pw::spa::pod::property!(
+                FormatProperties::VideoFramerate,
+                Choice,
+                Range,
+                Fraction,
+                pw::spa::utils::Fraction { num: 60, denom: 1 },
+                pw::spa::utils::Fraction { num: 0, denom: 1 },
+                pw::spa::utils::Fraction { num: 240, denom: 1 }
+            ),
+        );
+        obj.properties.push(pw::spa::pod::Property {
+            key: pw::spa::sys::SPA_FORMAT_VIDEO_modifier,
+            flags: pw::spa::pod::PropertyFlags::MANDATORY,
+            value: pw::spa::pod::Value::Choice(pw::spa::pod::ChoiceValue::Long(
+                pw::spa::utils::Choice(
+                    pw::spa::utils::ChoiceFlags::empty(),
+                    pw::spa::utils::ChoiceEnum::Enum {
+                        default: modifiers[0] as i64,
+                        alternatives: modifiers.iter().map(|&m| m as i64).collect(),
+                    },
+                ),
+            )),
+        });
+        serialize_pod(obj)
+    }
+
+    /// Build a Buffers param requesting dmabuf-only buffers.
+    fn build_dmabuf_buffers() -> Result<Vec<u8>> {
+        serialize_pod(pw::spa::pod::Object {
+            type_: pw::spa::utils::SpaTypes::ObjectParamBuffers.as_raw(),
+            id: pw::spa::param::ParamType::Buffers.as_raw(),
+            properties: vec![pw::spa::pod::Property {
+                key: pw::spa::sys::SPA_PARAM_BUFFERS_dataType,
+                flags: pw::spa::pod::PropertyFlags::empty(),
+                value: pw::spa::pod::Value::Int(1i32 << pw::spa::sys::SPA_DATA_DmaBuf),
+            }],
+        })
    }

    pub fn pipewire_thread(
@@ -338,6 +423,7 @@ mod pipewire {
        node_id: u32,
        tx: SyncSender<CapturedFrame>,
        active: Arc<AtomicBool>,
+        zerocopy: bool,
    ) -> Result<()> {
        crate::pwinit::ensure_init();

@@ -347,11 +433,43 @@ mod pipewire {
            .connect_fd_rc(fd, None)
            .context("pw connect_fd (portal remote)")?;

+        // Build the EGL→CUDA importer up front; if it fails, log and fall back to the CPU path
+        // (we simply won't request dmabuf below).
+        let importer = if zerocopy {
+            match crate::zerocopy::EglImporter::new() {
+                Ok(i) => Some(i),
+                Err(e) => {
+                    tracing::warn!(error = %format!("{e:#}"), "zero-copy import unavailable — using CPU path");
+                    None
+                }
+            }
+        } else {
+            None
+        };
+        // Modifiers our EGL stack can import for BGRx (the layout KWin gives); if none, we can't
+        // negotiate dmabuf and fall back to the shm path.
+        let modifiers = importer
+            .as_ref()
+            .map(|i| i.supported_modifiers(crate::zerocopy::drm_fourcc(PixelFormat::Bgrx).unwrap()))
+            .unwrap_or_default();
+        let want_dmabuf = importer.is_some() && !modifiers.is_empty();
+        if zerocopy && !want_dmabuf {
+            tracing::warn!("zero-copy: no EGL-importable dmabuf modifiers — using CPU path");
+        } else if want_dmabuf {
+            tracing::info!(
+                count = modifiers.len(),
+                sample = ?&modifiers[..modifiers.len().min(6)],
+                "zero-copy: advertising EGL-importable dmabuf modifiers"
+            );
+        }
+
        let data = UserData {
            info: VideoInfoRaw::default(),
            format: None,
+            modifier: 0,
            tx,
            active,
+            importer,
        };

        let stream = pw::stream::StreamBox::new(
@@ -388,11 +506,13 @@ mod pipewire {
                if ud.info.parse(param).is_ok() {
                    let sz = ud.info.size();
                    ud.format = map_format(ud.info.format());
+                    ud.modifier = ud.info.modifier();
                    tracing::info!(
                        width = sz.width,
                        height = sz.height,
                        spa_format = ?ud.info.format(),
                        mapped = ?ud.format,
+                        modifier = ud.modifier,
                        "pipewire format negotiated"
                    );
                    if ud.format.is_none() {
@@ -423,6 +543,55 @@ mod pipewire {
                if w == 0 || h == 0 {
                    return; // format not negotiated yet
                }
+
+                // Zero-copy path: if the buffer is a dmabuf and we have an importer, import it
+                // into a CUDA device buffer (no CPU touch) and deliver that. Otherwise fall
+                // through to the shm de-pad copy below.
+                if let (Some(importer), Some(fmt)) = (ud.importer.as_ref(), ud.format) {
+                    if datas[0].type_() == pw::spa::buffer::DataType::DmaBuf {
+                        let plane = crate::zerocopy::DmabufPlane {
+                            fd: datas[0].fd(),
+                            offset: datas[0].chunk().offset(),
+                            stride: datas[0].chunk().stride().max(0) as u32,
+                        };
+                        // 0 (unset/LINEAR) → import with the implicit modifier; a real tiled
+                        // modifier (if the producer reported one) → import it explicitly.
+                        let modifier = (ud.modifier != 0).then_some(ud.modifier);
+                        if let Some(fourcc) = crate::zerocopy::drm_fourcc(fmt) {
+                            match importer.import(&plane, w as u32, h as u32, fourcc, modifier) {
+                                Ok(devbuf) => {
+                                    static ONCE: std::sync::atomic::AtomicBool =
+                                        std::sync::atomic::AtomicBool::new(true);
+                                    if ONCE.swap(false, Ordering::Relaxed) {
+                                        tracing::info!(w, h, modifier = ud.modifier,
+                                            "zero-copy: dmabuf imported to CUDA (no CPU copy)");
+                                    }
+                                    let pts_ns = SystemTime::now()
+                                        .duration_since(UNIX_EPOCH)
+                                        .map(|d| d.as_nanos() as u64)
+                                        .unwrap_or(0);
+                                    let _ = ud.tx.try_send(CapturedFrame {
+                                        width: w as u32,
+                                        height: h as u32,
+                                        pts_ns,
+                                        format: fmt,
+                                        payload: FramePayload::Cuda(devbuf),
+                                    });
+                                }
+                                Err(e) => {
+                                    static ONCE: std::sync::atomic::AtomicBool =
+                                        std::sync::atomic::AtomicBool::new(true);
+                                    if ONCE.swap(false, Ordering::Relaxed) {
+                                        tracing::warn!(error = %format!("{e:#}"),
+                                            "dmabuf import failed — frames dropped (consider unsetting LUMEN_ZEROCOPY)");
+                                    }
+                                }
+                            }
+                        }
+                        return;
+                    }
+                }
+
                let d = &mut datas[0];
                let (size, offset, stride) = {
                    let c = d.chunk();
@@ -534,14 +703,33 @@ mod pipewire {
            ),
        );

-        let values: Vec<u8> = pw::spa::pod::serialize::PodSerializer::serialize(
-            std::io::Cursor::new(Vec::new()),
-            &pw::spa::pod::Value::Object(obj),
-        )
-        .context("serialize format pod")?
-        .0
-        .into_inner();
-        let mut params = [Pod::from_bytes(&values).context("pod from bytes")?];
+        // When zero-copy is on, offer ONLY a BGRx dmabuf format with our EGL-importable modifiers
+        // (offering shm too makes the compositor pick shm). The modifier list is advertised with
+        // DONT_FIXATE so the compositor's allocator chooses one; we re-emit the fixated format in
+        // `param_changed` (the two-step DMA-BUF handshake). Otherwise offer the multi-format shm
+        // pod and let MAP_BUFFERS map it.
+        let shm_values = serialize_pod(obj)?;
+        let (dmabuf_values, buffers_values) = if want_dmabuf {
+            (
+                Some(build_dmabuf_format(&modifiers)?),
+                Some(build_dmabuf_buffers()?),
+            )
+        } else {
+            (None, None)
+        };
+
+        let mut byte_slices: Vec<&[u8]> = Vec::new();
+        match &dmabuf_values {
+            Some(d) => byte_slices.push(d),
+            None => byte_slices.push(&shm_values),
+        }
+        if let Some(b) = &buffers_values {
+            byte_slices.push(b);
+        }
+        let mut params: Vec<&Pod> = byte_slices
+            .iter()
+            .map(|&b| Pod::from_bytes(b).context("pod from bytes"))
+            .collect::<Result<_>>()?;

        stream
            .connect(
@@ -164,7 +164,13 @@ impl DeviceBuffer {
        let mut pitch: usize = 0;
        unsafe {
            ck(
-                cuMemAllocPitch_v2(&mut ptr, &mut pitch, width as usize * 4, height as usize, 16),
+                cuMemAllocPitch_v2(
+                    &mut ptr,
+                    &mut pitch,
+                    width as usize * 4,
+                    height as usize,
+                    16,
+                ),
                "cuMemAllocPitch_v2",
            )?;
        }
@@ -205,9 +211,10 @@ impl MappedImage {
    /// # Safety
    /// `image` must be a valid `EGLImage`; the shared context must be current on this thread.
    pub unsafe fn register(image: *mut c_void) -> Result<MappedImage> {
+        // CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY (0x01): we only read the surface (encode from it).
        let mut resource: CUgraphicsResource = std::ptr::null_mut();
        ck(
-            cuGraphicsEGLRegisterImage(&mut resource, image, 0),
+            cuGraphicsEGLRegisterImage(&mut resource, image, 0x01),
            "cuGraphicsEGLRegisterImage",
        )?;
        let mut frame = CUeglFrame::default();
@@ -1,20 +1,26 @@
-//! EGL side of the zero-copy path: open a headless EGLDisplay on the NVIDIA GPU (via the GBM
-//! platform on the render node) and import a PipeWire dmabuf as an `EGLImage` with
-//! `EGL_LINUX_DMA_BUF_EXT`. The DRM format **modifier** is mandatory on NVIDIA (its buffers are
-//! tiled; importing without the modifier yields a corrupt image or `EGL_BAD_MATCH`). The image
-//! is then handed to CUDA (`cuGraphicsEGLRegisterImage`) and copied device-to-device into an
-//! owned buffer so the dmabuf can be returned to the compositor immediately.
+//! EGL side of the zero-copy path: open a headless EGLDisplay on the NVIDIA EGL device and
+//! import a PipeWire dmabuf as an `EGLImage` with `EGL_LINUX_DMA_BUF_EXT`. The DRM format
+//! **modifier** is mandatory on NVIDIA (its buffers are tiled; importing without the modifier
+//! yields a corrupt image or `EGL_BAD_MATCH`). The image is handed to CUDA
+//! (`cuGraphicsEGLRegisterImage`) and copied device-to-device into an owned buffer so the
+//! dmabuf can be returned to the compositor immediately.
+//!
+//! NOTE (WIP): the negotiation + EGL import are verified end-to-end against KWin (a tiled
+//! dmabuf reaches `eglCreateImage` successfully), but `cuGraphicsEGLRegisterImage` currently
+//! returns `CUDA_ERROR_INVALID_VALUE` on the dmabuf-imported `EGLImage`. The likely fix is to
+//! bind the `EGLImage` to a GL texture (`glEGLImageTargetTexture2DOES`) and register *that* via
+//! `cuGraphicsGLRegisterImage` (OBS/Sunshine's path), which needs a GL context.

 #![allow(non_upper_case_globals)]

 use super::cuda::{self, DeviceBuffer, MappedImage};
 use anyhow::{ensure, Context as _, Result};
 use khronos_egl as egl;
-use std::os::raw::{c_int, c_void};
+use std::os::raw::c_void;

 // EGL_EXT_image_dma_buf_import / _modifiers + platform enums (not defined by khronos-egl).
 const EGL_LINUX_DMA_BUF_EXT: egl::Enum = 0x3270;
-const EGL_PLATFORM_GBM_KHR: egl::Enum = 0x31D7;
+const EGL_PLATFORM_DEVICE_EXT: egl::Enum = 0x313F;
 const EGL_LINUX_DRM_FOURCC_EXT: egl::Attrib = 0x3271;
 const EGL_DMA_BUF_PLANE0_FD_EXT: egl::Attrib = 0x3272;
 const EGL_DMA_BUF_PLANE0_OFFSET_EXT: egl::Attrib = 0x3273;
@@ -22,12 +28,6 @@ const EGL_DMA_BUF_PLANE0_PITCH_EXT: egl::Attrib = 0x3274;
 const EGL_DMA_BUF_PLANE0_MODIFIER_LO_EXT: egl::Attrib = 0x3443;
 const EGL_DMA_BUF_PLANE0_MODIFIER_HI_EXT: egl::Attrib = 0x3444;

-#[link(name = "gbm")]
-extern "C" {
-    fn gbm_create_device(fd: c_int) -> *mut c_void;
-    fn gbm_device_destroy(device: *mut c_void);
-}
-
 /// One dmabuf plane as delivered by PipeWire (single-plane for BGRx).
 #[derive(Clone, Copy, Debug)]
 pub struct DmabufPlane {
@@ -38,41 +38,58 @@ pub struct DmabufPlane {

 type Egl = egl::DynamicInstance<egl::EGL1_5>;

-/// Headless EGL display + GBM device used to import dmabufs. Lives on the capture thread.
+/// Headless EGLDisplay (NVIDIA device platform) used to import dmabufs. Lives on the capture
+/// thread. The device platform — not GBM — is what NVIDIA's CUDA-EGL interop registers against.
 pub struct EglImporter {
    egl: Egl,
    display: egl::Display,
    no_ctx: egl::Context,
-    gbm: *mut c_void,
-    render_fd: c_int,
 }

-// The EGL/GBM handles are confined to the capture thread; the struct is moved there once.
+// The EGL handles are confined to the capture thread; the struct is moved there once.
 unsafe impl Send for EglImporter {}

 impl EglImporter {
-    /// Open the render node, create a GBM device, and a headless EGLDisplay on it. Also forces
-    /// the shared CUDA context to exist (so a later `import` only touches the hot path).
+    /// Open a headless EGLDisplay on the NVIDIA EGL device. Also forces the shared CUDA context
+    /// to exist (so a later `import` only touches the hot path).
    pub fn new() -> Result<EglImporter> {
-        let path = std::ffi::CString::new("/dev/dri/renderD128").unwrap();
-        let render_fd = unsafe { libc::open(path.as_ptr(), libc::O_RDWR | libc::O_CLOEXEC) };
-        ensure!(render_fd >= 0, "open /dev/dri/renderD128 for GBM");
-        let gbm = unsafe { gbm_create_device(render_fd) };
-        if gbm.is_null() {
-            unsafe { libc::close(render_fd) };
-            anyhow::bail!("gbm_create_device failed");
-        }
-
        let egl: Egl =
            unsafe { Egl::load_required() }.context("load libEGL (EGL 1.5 dynamic instance)")?;
+
+        // Enumerate EGL devices and use the first (the NVIDIA GPU on a single-GPU box).
+        type QueryDevicesFn = unsafe extern "system" fn(
+            max_devices: i32,
+            devices: *mut *mut c_void,
+            num_devices: *mut i32,
+        ) -> u32;
+        let query_devices: QueryDevicesFn = unsafe {
+            std::mem::transmute(
+                egl.get_proc_address("eglQueryDevicesEXT")
+                    .context("eglQueryDevicesEXT unavailable")?,
+            )
+        };
+        let device = unsafe {
+            let mut count: i32 = 0;
+            ensure!(
+                query_devices(0, std::ptr::null_mut(), &mut count) != 0 && count > 0,
+                "no EGL devices found"
+            );
+            let mut devices = vec![std::ptr::null_mut::<c_void>(); count as usize];
+            ensure!(
+                query_devices(count, devices.as_mut_ptr(), &mut count) != 0,
+                "eglQueryDevicesEXT enumeration failed"
+            );
+            devices[0]
+        };
+
        let display = unsafe {
            egl.get_platform_display(
-                EGL_PLATFORM_GBM_KHR,
-                gbm as egl::NativeDisplayType,
+                EGL_PLATFORM_DEVICE_EXT,
+                device as egl::NativeDisplayType,
                &[egl::ATTRIB_NONE],
            )
        }
-        .context("eglGetPlatformDisplay(GBM) on the NVIDIA render node")?;
+        .context("eglGetPlatformDisplay(DEVICE) on the NVIDIA EGL device")?;
        egl.initialize(display).context("eglInitialize")?;

        let exts = egl
@@ -93,27 +110,79 @@ impl EglImporter {
        cuda::context().context("create CUDA context")?;

        let no_ctx = unsafe { egl::Context::from_ptr(egl::NO_CONTEXT) };
-        tracing::info!("zero-copy EGL importer ready (GBM platform, dma_buf_import + modifiers)");
+        tracing::info!(
+            "zero-copy EGL importer ready (EGL device platform, dma_buf_import + modifiers)"
+        );
        Ok(EglImporter {
            egl,
            display,
            no_ctx,
-            gbm,
-            render_fd,
        })
    }

-    /// Import one dmabuf and copy it device-to-device into a fresh owned CUDA buffer.
-    /// `fourcc` is the DRM FourCC, `modifier` the 64-bit DRM format modifier from PipeWire.
+    /// The DRM format modifiers the NVIDIA EGL stack can import for `fourcc`, via
+    /// `eglQueryDmaBufModifiersEXT`. We advertise these to PipeWire so the compositor allocates
+    /// a dmabuf in a layout we can import. Empty on failure (caller falls back).
+    pub fn supported_modifiers(&self, fourcc: u32) -> Vec<u64> {
+        type QueryFn = unsafe extern "system" fn(
+            dpy: *mut c_void,
+            format: i32,
+            max_modifiers: i32,
+            modifiers: *mut u64,
+            external_only: *mut u32,
+            num_modifiers: *mut i32,
+        ) -> u32;
+        let Some(sym) = self.egl.get_proc_address("eglQueryDmaBufModifiersEXT") else {
+            return Vec::new();
+        };
+        let query: QueryFn = unsafe { std::mem::transmute(sym) };
+        let dpy = self.display.as_ptr();
+        unsafe {
+            let mut count: i32 = 0;
+            if query(
+                dpy,
+                fourcc as i32,
+                0,
+                std::ptr::null_mut(),
+                std::ptr::null_mut(),
+                &mut count,
+            ) == 0
+                || count <= 0
+            {
+                return Vec::new();
+            }
+            let mut mods = vec![0u64; count as usize];
+            let mut ext = vec![0u32; count as usize];
+            let mut n: i32 = 0;
+            if query(
+                dpy,
+                fourcc as i32,
+                count,
+                mods.as_mut_ptr(),
+                ext.as_mut_ptr(),
+                &mut n,
+            ) == 0
+            {
+                return Vec::new();
+            }
+            mods.truncate(n.max(0) as usize);
+            mods
+        }
+    }
+
+    /// Import one dmabuf and copy it device-to-device into a fresh owned CUDA buffer. `fourcc`
+    /// is the DRM FourCC; `modifier` is the explicit 64-bit DRM format modifier when one was
+    /// negotiated, or `None` to import with the buffer's implicit modifier (base
+    /// `EGL_EXT_image_dma_buf_import`, which the NVIDIA driver resolves for its own buffers).
    pub fn import(
        &self,
        plane: &DmabufPlane,
        width: u32,
        height: u32,
        fourcc: u32,
-        modifier: u64,
+        modifier: Option<u64>,
    ) -> Result<DeviceBuffer> {
-        let attrs: [egl::Attrib; 19] = [
+        let mut attrs: Vec<egl::Attrib> = vec![
            egl::WIDTH as egl::Attrib,
            width as egl::Attrib,
            egl::HEIGHT as egl::Attrib,
@@ -126,14 +195,16 @@ impl EglImporter {
            plane.offset as egl::Attrib,
            EGL_DMA_BUF_PLANE0_PITCH_EXT,
            plane.stride as egl::Attrib,
-            EGL_DMA_BUF_PLANE0_MODIFIER_LO_EXT,
-            (modifier & 0xFFFF_FFFF) as egl::Attrib,
-            EGL_DMA_BUF_PLANE0_MODIFIER_HI_EXT,
-            (modifier >> 32) as egl::Attrib,
-            egl::ATTRIB_NONE,
-            0,
-            0,
        ];
+        if let Some(m) = modifier {
+            attrs.extend_from_slice(&[
+                EGL_DMA_BUF_PLANE0_MODIFIER_LO_EXT,
+                (m & 0xFFFF_FFFF) as egl::Attrib,
+                EGL_DMA_BUF_PLANE0_MODIFIER_HI_EXT,
+                (m >> 32) as egl::Attrib,
+            ]);
+        }
+        attrs.push(egl::ATTRIB_NONE);
        let client = unsafe { egl::ClientBuffer::from_ptr(std::ptr::null_mut()) };
        let image = self
            .egl
@@ -142,7 +213,7 @@ impl EglImporter {
                self.no_ctx,
                EGL_LINUX_DMA_BUF_EXT,
                client,
-                &attrs[..17], // up to and including ATTRIB_NONE
+                &attrs,
            )
            .context("eglCreateImage(EGL_LINUX_DMA_BUF_EXT) — modifier mismatch?")?;

@@ -160,14 +231,3 @@ impl EglImporter {
        result
    }
 }
-
-impl Drop for EglImporter {
-    fn drop(&mut self) {
-        if !self.gbm.is_null() {
-            unsafe { gbm_device_destroy(self.gbm) };
-        }
-        if self.render_fd >= 0 {
-            unsafe { libc::close(self.render_fd) };
-        }
-    }
-}
@@ -11,7 +11,7 @@ pub mod cuda;
 pub mod egl;

 pub use cuda::DeviceBuffer;
-pub use egl::EglImporter;
+pub use egl::{DmabufPlane, EglImporter};

 /// Whether the zero-copy path is opted in (`LUMEN_ZEROCOPY` truthy).
 pub fn enabled() -> bool {