diff --git a/Cargo.lock b/Cargo.lock index 7b57c8c..f6f6055 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -806,6 +806,15 @@ dependencies = [ "cipher", ] +[[package]] +name = "cudarc" +version = "0.16.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17200eb07e7d85a243aa1bf4569a7aa998385ba98d14833973a817a63cc86e92" +dependencies = [ + "libloading", +] + [[package]] name = "curve25519-dalek" version = "4.1.3" @@ -2222,6 +2231,16 @@ dependencies = [ "syn", ] +[[package]] +name = "nvidia-video-codec-sdk" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b150dfc88653e761947906dfc0ea14af16ae366cfc55122caab94381761605a" +dependencies = [ + "cudarc", + "lazy_static", +] + [[package]] name = "oid-registry" version = "0.7.1" @@ -2615,6 +2634,7 @@ dependencies = [ "khronos-egl", "libc", "mdns-sd", + "nvidia-video-codec-sdk", "openh264", "opus", "pipewire", diff --git a/crates/punktfunk-host/Cargo.toml b/crates/punktfunk-host/Cargo.toml index a2d1410..02fd85a 100644 --- a/crates/punktfunk-host/Cargo.toml +++ b/crates/punktfunk-host/Cargo.toml @@ -127,3 +127,16 @@ windows = { version = "0.62", features = [ openh264 = "0.9" # WASAPI loopback audio capture (default render endpoint -> 48 kHz stereo f32 for the Opus path). wasapi = "0.23" +# NVENC hardware encoder (NVENC SDK, D3D11 input). The SDK pins `cudarc` with +# `cuda-version-from-build-system` (a build-time CUDA-toolkit probe); its `ci-check` feature switches +# cudarc to `dynamic-loading` (loads nvcuda.dll at runtime — nothing needed at build), which is how +# the crate builds on docs.rs/CI. We enable it so the GPU-less VM/CI compiles; the DirectX NVENC path +# never calls CUDA at runtime, so the pinned CUDA bindings version is irrelevant. +nvidia-video-codec-sdk = { version = "0.4", features = ["ci-check"], optional = true } + +[features] +# NVENC hardware encode (Windows). OFF by default: it pulls the NVENC SDK, and the host then needs +# the NVENC entry points (NvEncodeAPICreateInstance / NvEncodeAPIGetMaxSupportedVersion) at link +# time — i.e. `nvencodeapi.lib` from the NVIDIA Video Codec SDK (or an import lib generated from +# nvEncodeAPI64.dll) on the linker path. Build the GPU host with `--features nvenc`. +nvenc = ["dep:nvidia-video-codec-sdk"] diff --git a/crates/punktfunk-host/build.rs b/crates/punktfunk-host/build.rs new file mode 100644 index 0000000..4c312d1 --- /dev/null +++ b/crates/punktfunk-host/build.rs @@ -0,0 +1,16 @@ +//! Build script. The only thing it does: with the `nvenc` feature (Windows GPU host), tell the +//! linker to pull the NVENC import library. The NVENC entry points +//! (`NvEncodeAPICreateInstance` / `NvEncodeAPIGetMaxSupportedVersion`) live in `nvEncodeAPI64.dll` +//! (shipped with the NVIDIA driver), so the host links against `nvencodeapi.lib`. Point +//! `PUNKTFUNK_NVENC_LIB_DIR` at a directory containing `nvencodeapi.lib` — from the NVIDIA Video +//! Codec SDK, or an import lib generated from the driver's `nvEncodeAPI64.dll` +//! (`lib /def:nvenc.def /machine:x64 /out:nvencodeapi.lib` with the two exports above). +fn main() { + if std::env::var_os("CARGO_FEATURE_NVENC").is_some() { + if let Some(dir) = std::env::var_os("PUNKTFUNK_NVENC_LIB_DIR") { + println!("cargo:rustc-link-search=native={}", dir.to_string_lossy()); + } + println!("cargo:rustc-link-lib=dylib=nvencodeapi"); + println!("cargo:rerun-if-env-changed=PUNKTFUNK_NVENC_LIB_DIR"); + } +} diff --git a/crates/punktfunk-host/src/capture/dxgi.rs b/crates/punktfunk-host/src/capture/dxgi.rs index 4f028f5..604773e 100644 --- a/crates/punktfunk-host/src/capture/dxgi.rs +++ b/crates/punktfunk-host/src/capture/dxgi.rs @@ -16,8 +16,9 @@ use windows::Win32::Foundation::{HMODULE, LUID}; use windows::Win32::Graphics::Direct3D::{D3D_DRIVER_TYPE_UNKNOWN, D3D_FEATURE_LEVEL_11_0}; use windows::Win32::Graphics::Direct3D11::{ D3D11CreateDevice, ID3D11Device, ID3D11DeviceContext, ID3D11Texture2D, D3D11_BIND_FLAG, - D3D11_CPU_ACCESS_READ, D3D11_CREATE_DEVICE_BGRA_SUPPORT, D3D11_MAPPED_SUBRESOURCE, D3D11_MAP_READ, - D3D11_SDK_VERSION, D3D11_TEXTURE2D_DESC, D3D11_USAGE_STAGING, + D3D11_BIND_RENDER_TARGET, D3D11_CPU_ACCESS_READ, D3D11_CREATE_DEVICE_BGRA_SUPPORT, + D3D11_MAPPED_SUBRESOURCE, D3D11_MAP_READ, D3D11_SDK_VERSION, D3D11_TEXTURE2D_DESC, + D3D11_USAGE_DEFAULT, D3D11_USAGE_STAGING, }; use windows::Win32::Graphics::Dxgi::Common::{DXGI_FORMAT_B8G8R8A8_UNORM, DXGI_SAMPLE_DESC}; use windows::Win32::Graphics::Dxgi::{ @@ -78,6 +79,13 @@ pub struct DuplCapturer { active: AtomicBool, timeout_ms: u32, last: Option>, + /// GPU-output mode (zero-copy → NVENC): produce `FramePayload::D3d11` instead of CPU BGRA. + /// Selected by `PUNKTFUNK_ENCODER=nvenc` so the capturer's output matches the encoder's input. + gpu_mode: bool, + /// Reused owned texture the duplication frame is copied into for the D3D11 path (the duplication + /// surface is transient and released each frame). + gpu_copy: Option, + have_gpu_frame: bool, _keepalive: Box, } // COM objects used only from the one thread that owns the capturer (the encode thread). @@ -154,12 +162,16 @@ impl DuplCapturer { .ok() .and_then(|s| s.parse().ok()) .unwrap_or((2000 / refresh_hz.max(1)).max(100)); + let gpu_mode = std::env::var("PUNKTFUNK_ENCODER") + .map(|v| matches!(v.to_ascii_lowercase().as_str(), "nvenc" | "hw" | "nvidia")) + .unwrap_or(false); tracing::info!( - "DXGI duplication: {}x{}@{} on {}", + "DXGI duplication: {}x{}@{} on {} ({})", width, height, refresh_hz, - target.gdi_name + target.gdi_name, + if gpu_mode { "D3D11 zero-copy" } else { "CPU staging" } ); Ok(Self { device, @@ -174,6 +186,9 @@ impl DuplCapturer { active: AtomicBool::new(false), timeout_ms, last: None, + gpu_mode, + gpu_copy: None, + have_gpu_frame: false, _keepalive: keepalive, }) } @@ -206,6 +221,33 @@ impl DuplCapturer { Ok(()) } + unsafe fn ensure_gpu_copy(&mut self) -> Result<()> { + if self.gpu_copy.is_some() { + return Ok(()); + } + let desc = D3D11_TEXTURE2D_DESC { + Width: self.width, + Height: self.height, + MipLevels: 1, + ArraySize: 1, + Format: DXGI_FORMAT_B8G8R8A8_UNORM, + SampleDesc: DXGI_SAMPLE_DESC { + Count: 1, + Quality: 0, + }, + Usage: D3D11_USAGE_DEFAULT, + BindFlags: D3D11_BIND_RENDER_TARGET.0 as u32, + CPUAccessFlags: 0, + MiscFlags: 0, + }; + let mut t: Option = None; + self.device + .CreateTexture2D(&desc, None, Some(&mut t)) + .context("CreateTexture2D(gpu copy)")?; + self.gpu_copy = t; + Ok(()) + } + unsafe fn recreate_dupl(&mut self) -> Result<()> { if self.holding_frame { let _ = self.dupl.ReleaseFrame(); @@ -238,6 +280,26 @@ impl DuplCapturer { self.holding_frame = true; let res = res.context("AcquireNextFrame: null resource")?; let tex: ID3D11Texture2D = res.cast().context("resource -> Texture2D")?; + if self.gpu_mode { + // Zero-copy path: keep the frame on the GPU for NVENC. Copy the transient duplication + // surface into a reused owned texture, release the duplication frame, hand off the texture. + self.ensure_gpu_copy()?; + let gpu = self.gpu_copy.clone().context("gpu copy texture")?; + self.context.CopyResource(&gpu, &tex); + let _ = self.dupl.ReleaseFrame(); + self.holding_frame = false; + self.have_gpu_frame = true; + return Ok(Some(CapturedFrame { + width: self.width, + height: self.height, + pts_ns: now_ns(), + format: PixelFormat::Bgra, + payload: FramePayload::D3d11(D3d11Frame { + texture: gpu, + device: self.device.clone(), + }), + })); + } self.ensure_staging()?; let staging = self.staging.clone().context("staging texture")?; self.context.CopyResource(&staging, &tex); @@ -277,6 +339,20 @@ impl Capturer for DuplCapturer { if let Some(f) = unsafe { self.acquire() }? { return Ok(f); } + if self.gpu_mode && self.have_gpu_frame { + if let Some(gpu) = &self.gpu_copy { + return Ok(CapturedFrame { + width: self.width, + height: self.height, + pts_ns: now_ns(), + format: PixelFormat::Bgra, + payload: FramePayload::D3d11(D3d11Frame { + texture: gpu.clone(), + device: self.device.clone(), + }), + }); + } + } if let Some(b) = &self.last { return Ok(CapturedFrame { width: self.width, diff --git a/crates/punktfunk-host/src/encode.rs b/crates/punktfunk-host/src/encode.rs index 03c7add..c71b393 100644 --- a/crates/punktfunk-host/src/encode.rs +++ b/crates/punktfunk-host/src/encode.rs @@ -162,15 +162,26 @@ pub fn open_video( .unwrap_or_default() .to_ascii_lowercase(); if matches!(pref.as_str(), "nvenc" | "hw" | "nvidia") { - anyhow::bail!( - "NVENC hardware encode is not yet implemented on Windows — omit PUNKTFUNK_ENCODER \ - or set it to 'software' to use the openh264 encoder" - ); + // Hardware path: NVENC over D3D11. The DXGI capturer switches to its zero-copy + // FramePayload::D3d11 output under the same env var so capture + encode share textures. + #[cfg(feature = "nvenc")] + { + let enc = + nvenc::NvencD3d11Encoder::open(codec, format, width, height, fps, bitrate_bps)?; + return Ok(Box::new(enc) as Box); + } + #[cfg(not(feature = "nvenc"))] + { + anyhow::bail!( + "NVENC requested but this host was built without it — rebuild with \ + `--features nvenc` (needs the NVENC SDK's nvencodeapi.lib at link time)" + ); + } } anyhow::ensure!( codec == Codec::H264, "the Windows software encoder supports H.264 only; client negotiated {codec:?} \ - (request H264, or use a GPU host once NVENC lands)" + (set PUNKTFUNK_ENCODER=nvenc for a GPU host, or request H264)" ); // Software H.264 realistically caps far below the negotiated hardware rates. const SW_BITRATE_CEIL: u64 = 100_000_000; @@ -189,6 +200,8 @@ pub fn open_video( mod linux; #[cfg(target_os = "windows")] mod sw; +#[cfg(all(target_os = "windows", feature = "nvenc"))] +mod nvenc; #[cfg(test)] mod tests { diff --git a/crates/punktfunk-host/src/encode/nvenc.rs b/crates/punktfunk-host/src/encode/nvenc.rs new file mode 100644 index 0000000..fdfb277 --- /dev/null +++ b/crates/punktfunk-host/src/encode/nvenc.rs @@ -0,0 +1,351 @@ +//! NVENC hardware encoder (Windows, D3D11 input) — zero-copy capture→encode on the GPU. +//! +//! Drives the raw NVENC API via `nvidia_video_codec_sdk::{sys, ENCODE_API}` (the safe `Encoder` +//! wrapper is CUDA-only). Opens an encode session bound to the **same** `ID3D11Device` as the DXGI +//! capturer (the device is carried on `FramePayload::D3d11`), registers a small pool of encoder-owned +//! BGRA textures once, and per frame `CopyResource`s the captured texture into a pooled one and +//! `encode_picture`s it. Mirrors the Linux NVENC config: CBR + ultra-low-latency, infinite GOP, +//! P-frames only, forced-IDR for RFI, in-band SPS/PPS each keyframe. +//! +//! Needs a real NVIDIA GPU at runtime (session creation fails otherwise) — compiles GPU-less, but +//! `open`/`submit` only succeed on a GPU box. The software encoder (`super::sw`) is the fallback. + +use super::{Codec, EncodedFrame, Encoder}; +use crate::capture::{CapturedFrame, FramePayload, PixelFormat}; +use anyhow::{anyhow, bail, Context, Result}; +use std::collections::VecDeque; +use std::ffi::c_void; +use std::ptr; +use windows::core::Interface; +use windows::Win32::Graphics::Direct3D11::{ + ID3D11Device, ID3D11DeviceContext, ID3D11Texture2D, D3D11_BIND_RENDER_TARGET, D3D11_TEXTURE2D_DESC, + D3D11_USAGE_DEFAULT, +}; +use windows::Win32::Graphics::Dxgi::Common::{DXGI_FORMAT_B8G8R8A8_UNORM, DXGI_SAMPLE_DESC}; + +use nvidia_video_codec_sdk::sys::nvEncodeAPI as nv; +use nvidia_video_codec_sdk::ENCODE_API as API; + +const POOL: usize = 4; + +fn codec_guid(codec: Codec) -> nv::GUID { + match codec { + Codec::H264 => nv::NV_ENC_CODEC_H264_GUID, + Codec::H265 => nv::NV_ENC_CODEC_HEVC_GUID, + Codec::Av1 => nv::NV_ENC_CODEC_AV1_GUID, + } +} + +struct PooledTex { + tex: ID3D11Texture2D, + reg: nv::NV_ENC_REGISTERED_PTR, + map: nv::NV_ENC_INPUT_PTR, +} + +pub struct NvencD3d11Encoder { + ctx: Option, + encoder: *mut c_void, + codec_guid: nv::GUID, + width: u32, + height: u32, + fps: u32, + bitrate_bps: u64, + buffer_fmt: nv::NV_ENC_BUFFER_FORMAT, + pool: Vec, + next: usize, + bitstreams: Vec, + pending: VecDeque<(nv::NV_ENC_OUTPUT_PTR, usize, u64)>, + frame_idx: i64, + force_kf: bool, + inited: bool, +} + +// Raw NVENC handle + COM ptrs; confined to the single encode thread (like the Linux encoder). +unsafe impl Send for NvencD3d11Encoder {} + +impl NvencD3d11Encoder { + pub fn open( + codec: Codec, + _format: PixelFormat, + width: u32, + height: u32, + fps: u32, + bitrate_bps: u64, + ) -> Result { + Ok(Self { + ctx: None, + encoder: ptr::null_mut(), + codec_guid: codec_guid(codec), + width, + height, + fps, + bitrate_bps, + buffer_fmt: nv::NV_ENC_BUFFER_FORMAT::NV_ENC_BUFFER_FORMAT_ARGB, + pool: Vec::new(), + next: 0, + bitstreams: Vec::new(), + pending: VecDeque::new(), + frame_idx: 0, + force_kf: false, + inited: false, + }) + } + + /// Lazily create the session on the first frame's D3D11 device (so capture + encode share it). + fn init_session(&mut self, device: &ID3D11Device) -> Result<()> { + unsafe { + self.ctx = Some(device.GetImmediateContext().context("D3D11 immediate context")?); + + // 1. open the session bound to the D3D11 device. + let mut params = nv::NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS { + version: nv::NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER, + deviceType: nv::NV_ENC_DEVICE_TYPE::NV_ENC_DEVICE_TYPE_DIRECTX, + device: device.as_raw(), + apiVersion: nv::NVENCAPI_VERSION, + ..Default::default() + }; + let mut enc: *mut c_void = ptr::null_mut(); + (API.open_encode_session_ex)(&mut params, &mut enc) + .result_without_string() + .map_err(|e| anyhow!("NVENC open_encode_session_ex: {e:?} (no NVIDIA GPU?)"))?; + self.encoder = enc; + + // 2. seed the P1 + ultra-low-latency preset config. + let mut preset = nv::NV_ENC_PRESET_CONFIG { + version: nv::NV_ENC_PRESET_CONFIG_VER, + presetCfg: nv::NV_ENC_CONFIG { + version: nv::NV_ENC_CONFIG_VER, + ..Default::default() + }, + ..Default::default() + }; + (API.get_encode_preset_config_ex)( + enc, + self.codec_guid, + nv::NV_ENC_PRESET_P1_GUID, + nv::NV_ENC_TUNING_INFO::NV_ENC_TUNING_INFO_ULTRA_LOW_LATENCY, + &mut preset, + ) + .result_without_string() + .map_err(|e| anyhow!("get_encode_preset_config_ex: {e:?}"))?; + let mut cfg = preset.presetCfg; + + // 3. mirror the Linux RC config: CBR, infinite GOP, P-only, ~1-frame VBV. + cfg.gopLength = nv::NVENC_INFINITE_GOPLENGTH; + cfg.frameIntervalP = 1; + cfg.rcParams.rateControlMode = nv::NV_ENC_PARAMS_RC_MODE::NV_ENC_PARAMS_RC_CBR; + let bps = self.bitrate_bps.min(u32::MAX as u64) as u32; + cfg.rcParams.averageBitRate = bps; + cfg.rcParams.maxBitRate = bps; + let vbv = (self.bitrate_bps as f64 / self.fps.max(1) as f64) as u32; + cfg.rcParams.vbvBufferSize = vbv; + cfg.rcParams.vbvInitialDelay = vbv; + + // 4. initialize the encoder. + let mut init = nv::NV_ENC_INITIALIZE_PARAMS { + version: nv::NV_ENC_INITIALIZE_PARAMS_VER, + encodeGUID: self.codec_guid, + presetGUID: nv::NV_ENC_PRESET_P1_GUID, + tuningInfo: nv::NV_ENC_TUNING_INFO::NV_ENC_TUNING_INFO_ULTRA_LOW_LATENCY, + encodeWidth: self.width, + encodeHeight: self.height, + darWidth: self.width, + darHeight: self.height, + frameRateNum: self.fps, + frameRateDen: 1, + enablePTD: 1, + encodeConfig: &mut cfg, + ..Default::default() + }; + (API.initialize_encoder)(enc, &mut init) + .result_without_string() + .map_err(|e| anyhow!("initialize_encoder: {e:?}"))?; + + // 5. encoder-owned BGRA texture pool, registered once, + one bitstream per slot. + let desc = D3D11_TEXTURE2D_DESC { + Width: self.width, + Height: self.height, + MipLevels: 1, + ArraySize: 1, + Format: DXGI_FORMAT_B8G8R8A8_UNORM, + SampleDesc: DXGI_SAMPLE_DESC { + Count: 1, + Quality: 0, + }, + Usage: D3D11_USAGE_DEFAULT, + BindFlags: D3D11_BIND_RENDER_TARGET.0 as u32, + CPUAccessFlags: 0, + MiscFlags: 0, + }; + for _ in 0..POOL { + let mut tex: Option = None; + device + .CreateTexture2D(&desc, None, Some(&mut tex)) + .context("CreateTexture2D(nvenc pool)")?; + let tex = tex.context("null pool texture")?; + let mut rr = nv::NV_ENC_REGISTER_RESOURCE { + version: nv::NV_ENC_REGISTER_RESOURCE_VER, + resourceType: + nv::NV_ENC_INPUT_RESOURCE_TYPE::NV_ENC_INPUT_RESOURCE_TYPE_DIRECTX, + width: self.width, + height: self.height, + pitch: 0, + resourceToRegister: tex.as_raw(), + bufferFormat: self.buffer_fmt, + bufferUsage: nv::NV_ENC_BUFFER_USAGE::NV_ENC_INPUT_IMAGE, + ..Default::default() + }; + (API.register_resource)(enc, &mut rr) + .result_without_string() + .map_err(|e| anyhow!("register_resource: {e:?}"))?; + self.pool.push(PooledTex { + tex, + reg: rr.registeredResource, + map: ptr::null_mut(), + }); + let mut cb = nv::NV_ENC_CREATE_BITSTREAM_BUFFER { + version: nv::NV_ENC_CREATE_BITSTREAM_BUFFER_VER, + ..Default::default() + }; + (API.create_bitstream_buffer)(enc, &mut cb) + .result_without_string() + .map_err(|e| anyhow!("create_bitstream_buffer: {e:?}"))?; + self.bitstreams.push(cb.bitstreamBuffer); + } + self.inited = true; + tracing::info!( + "NVENC D3D11 session: {}x{}@{} {} Mbps {:?}", + self.width, + self.height, + self.fps, + bps / 1_000_000, + self.codec_guid + ); + Ok(()) + } + } +} + +impl Encoder for NvencD3d11Encoder { + fn submit(&mut self, captured: &CapturedFrame) -> Result<()> { + let frame = match &captured.payload { + FramePayload::D3d11(f) => f, + FramePayload::Cpu(_) => { + bail!("NVENC D3D11 encoder needs a GPU texture frame (use the software encoder for CPU frames)") + } + }; + if !self.inited { + let device = frame.device.clone(); + self.init_session(&device)?; + } + let slot = self.next % POOL; + self.next += 1; + unsafe { + let ctx = self.ctx.as_ref().context("no D3D11 context")?; + ctx.CopyResource(&self.pool[slot].tex, &frame.texture); + + let mut mp = nv::NV_ENC_MAP_INPUT_RESOURCE { + version: nv::NV_ENC_MAP_INPUT_RESOURCE_VER, + registeredResource: self.pool[slot].reg, + ..Default::default() + }; + (API.map_input_resource)(self.encoder, &mut mp) + .result_without_string() + .map_err(|e| anyhow!("map_input_resource: {e:?}"))?; + self.pool[slot].map = mp.mappedResource; + + let pts = self.frame_idx as u64; + self.frame_idx += 1; + let flags = if std::mem::take(&mut self.force_kf) { + nv::NV_ENC_PIC_FLAGS::NV_ENC_PIC_FLAG_FORCEIDR as u32 + | nv::NV_ENC_PIC_FLAGS::NV_ENC_PIC_FLAG_OUTPUT_SPSPPS as u32 + } else { + 0 + }; + let mut pic = nv::NV_ENC_PIC_PARAMS { + version: nv::NV_ENC_PIC_PARAMS_VER, + inputWidth: self.width, + inputHeight: self.height, + inputPitch: 0, + inputBuffer: mp.mappedResource, + bufferFmt: mp.mappedBufferFmt, + outputBitstream: self.bitstreams[slot], + pictureStruct: nv::NV_ENC_PIC_STRUCT::NV_ENC_PIC_STRUCT_FRAME, + inputTimeStamp: pts, + encodePicFlags: flags as u32, + ..Default::default() + }; + (API.encode_picture)(self.encoder, &mut pic) + .result_without_string() + .map_err(|e| anyhow!("encode_picture: {e:?}"))?; + self.pending + .push_back((self.bitstreams[slot], slot, captured.pts_ns)); + } + Ok(()) + } + + fn request_keyframe(&mut self) { + self.force_kf = true; + } + + fn poll(&mut self) -> Result> { + let Some((bs, slot, pts_ns)) = self.pending.pop_front() else { + return Ok(None); + }; + unsafe { + let mut lock = nv::NV_ENC_LOCK_BITSTREAM { + version: nv::NV_ENC_LOCK_BITSTREAM_VER, + outputBitstream: bs, + ..Default::default() + }; + (API.lock_bitstream)(self.encoder, &mut lock) + .result_without_string() + .map_err(|e| anyhow!("lock_bitstream: {e:?}"))?; + let data = std::slice::from_raw_parts( + lock.bitstreamBufferPtr as *const u8, + lock.bitstreamSizeInBytes as usize, + ) + .to_vec(); + let keyframe = matches!( + lock.pictureType, + nv::NV_ENC_PIC_TYPE::NV_ENC_PIC_TYPE_IDR | nv::NV_ENC_PIC_TYPE::NV_ENC_PIC_TYPE_I + ); + (API.unlock_bitstream)(self.encoder, bs) + .result_without_string() + .map_err(|e| anyhow!("unlock_bitstream: {e:?}"))?; + if !self.pool[slot].map.is_null() { + let _ = (API.unmap_input_resource)(self.encoder, self.pool[slot].map); + self.pool[slot].map = ptr::null_mut(); + } + Ok(Some(EncodedFrame { + data, + pts_ns, + keyframe, + })) + } + } + + fn flush(&mut self) -> Result<()> { + Ok(()) // P1/ULL + frameIntervalP=1: each submit yields its AU; no internal queue to drain. + } +} + +impl Drop for NvencD3d11Encoder { + fn drop(&mut self) { + if self.encoder.is_null() { + return; + } + unsafe { + for p in &self.pool { + if !p.map.is_null() { + let _ = (API.unmap_input_resource)(self.encoder, p.map); + } + let _ = (API.unregister_resource)(self.encoder, p.reg); + } + for &bs in &self.bitstreams { + let _ = (API.destroy_bitstream_buffer)(self.encoder, bs); + } + let _ = (API.destroy_encoder)(self.encoder); + } + } +}