From 01fcb010190597f25d101fdfd995d778da029db1 Mon Sep 17 00:00:00 2001 From: enricobuehler Date: Fri, 3 Jul 2026 12:09:18 +0000 Subject: [PATCH] =?UTF-8?q?fix(encode/windows):=20resolve=20NVENC=20at=20r?= =?UTF-8?q?untime=20=E2=80=94=20AMD/Intel=20hosts=20no=20longer=20crash=20?= =?UTF-8?q?at=20start?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The nvenc build linked nvEncodeAPI64.dll's entry points at load time, so a --features nvenc binary hard-crashed on any box without the NVIDIA driver (AMD/Intel). Entry points now come from a runtime LoadLibrary table (encode/windows/nvenc.rs load_api); a missing DLL just falls through the encoder auto-detect to AMF/QSV/software. The generated import lib and all its plumbing (gen-nvenc-importlib.ps1, nvenc.def, PUNKTFUNK_NVENC_LIB_DIR, setup-build-env wiring) are gone. Live-validated on the RTX 4090 box (NVENC session, 7000+ frames). Co-Authored-By: Claude Fable 5 --- crates/punktfunk-host/Cargo.toml | 9 +- crates/punktfunk-host/build.rs | 21 +- crates/punktfunk-host/src/encode.rs | 2 +- .../src/encode/windows/nvenc.rs | 296 ++++++++++++++---- .../windows/nvenc/gen-nvenc-importlib.ps1 | 57 ---- packaging/windows/nvenc/nvenc.def | 14 - scripts/windows/README.md | 4 +- scripts/windows/deploy-host.ps1 | 2 +- scripts/windows/setup-build-env.ps1 | 4 +- 9 files changed, 259 insertions(+), 150 deletions(-) delete mode 100644 packaging/windows/nvenc/gen-nvenc-importlib.ps1 delete mode 100644 packaging/windows/nvenc/nvenc.def diff --git a/crates/punktfunk-host/Cargo.toml b/crates/punktfunk-host/Cargo.toml index 3465090..80da2fb 100644 --- a/crates/punktfunk-host/Cargo.toml +++ b/crates/punktfunk-host/Cargo.toml @@ -232,10 +232,11 @@ pf-driver-proto = { path = "../pf-driver-proto" } bytemuck = { version = "1.19", features = ["derive"] } [features] -# NVENC hardware encode (Windows). OFF by default: it pulls the NVENC SDK, and the host then needs -# the NVENC entry points (NvEncodeAPICreateInstance / NvEncodeAPIGetMaxSupportedVersion) at link -# time — i.e. `nvencodeapi.lib` from the NVIDIA Video Codec SDK (or an import lib generated from -# nvEncodeAPI64.dll) on the linker path. Build the GPU host with `--features nvenc`. +# NVENC hardware encode (Windows). OFF by default (it pulls the NVENC SDK crate); nothing is +# needed at link time — the entry points are resolved at RUNTIME from the driver's +# nvEncodeAPI64.dll (encode/windows/nvenc.rs `load_api`), so the same binary starts fine on +# AMD/Intel-only boxes and falls through to AMF/QSV/software. Build the GPU host with +# `--features nvenc`. nvenc = ["dep:nvidia-video-codec-sdk"] # AMD/Intel hardware encode on Windows (AMF/QSV via ffmpeg-next). OFF by default: it needs a # `FFMPEG_DIR` (BtbN lgpl-shared — includes `*_amf`/`*_qsv`; the GPL-only x264/x265 are never used, diff --git a/crates/punktfunk-host/build.rs b/crates/punktfunk-host/build.rs index 33d1d90..d6013ad 100644 --- a/crates/punktfunk-host/build.rs +++ b/crates/punktfunk-host/build.rs @@ -1,10 +1,9 @@ -//! Build script. The only thing it does: with the `nvenc` feature (Windows GPU host), tell the -//! linker to pull the NVENC import library. The NVENC entry points -//! (`NvEncodeAPICreateInstance` / `NvEncodeAPIGetMaxSupportedVersion`) live in `nvEncodeAPI64.dll` -//! (shipped with the NVIDIA driver), so the host links against `nvencodeapi.lib`. Point -//! `PUNKTFUNK_NVENC_LIB_DIR` at a directory containing `nvencodeapi.lib` — from the NVIDIA Video -//! Codec SDK, or an import lib generated from the driver's `nvEncodeAPI64.dll` -//! (`lib /def:nvenc.def /machine:x64 /out:nvencodeapi.lib` with the two exports above). +//! Build script: stamps the build version. NVENC deliberately needs NOTHING here — the entry +//! points (`NvEncodeAPICreateInstance` / `NvEncodeAPIGetMaxSupportedVersion`) live in +//! `nvEncodeAPI64.dll`, which only exists where the NVIDIA driver is installed, so +//! `encode/windows/nvenc.rs` resolves them at RUNTIME (`LoadLibraryExW`). The former link-time +//! import (`cargo:rustc-link-lib=nvencodeapi`) made the Windows loader kill the all-vendor host +//! binary on every AMD/Intel-only box before `main` ("nvencodeapi64.dll was not found"). fn main() { // Build provenance: stamp the exact package/build version into the binary so a running host // can report what it is (mgmt /health, the startup log, `--version`) and a stale/shadowed @@ -18,12 +17,4 @@ fn main() { .unwrap_or_else(|| std::env::var("CARGO_PKG_VERSION").unwrap_or_else(|_| "unknown".into())); println!("cargo:rustc-env=PUNKTFUNK_VERSION={version}"); println!("cargo:rerun-if-env-changed=PUNKTFUNK_BUILD_VERSION"); - - if std::env::var_os("CARGO_FEATURE_NVENC").is_some() { - if let Some(dir) = std::env::var_os("PUNKTFUNK_NVENC_LIB_DIR") { - println!("cargo:rustc-link-search=native={}", dir.to_string_lossy()); - } - println!("cargo:rustc-link-lib=dylib=nvencodeapi"); - println!("cargo:rerun-if-env-changed=PUNKTFUNK_NVENC_LIB_DIR"); - } } diff --git a/crates/punktfunk-host/src/encode.rs b/crates/punktfunk-host/src/encode.rs index 2067cfa..6fb07cc 100644 --- a/crates/punktfunk-host/src/encode.rs +++ b/crates/punktfunk-host/src/encode.rs @@ -530,7 +530,7 @@ fn open_video_backend( { anyhow::bail!( "NVENC requested/detected but this host was built without it — rebuild \ - with `--features nvenc` (needs the NVENC SDK's nvencodeapi.lib at link time)" + with `--features nvenc`" ) } } diff --git a/crates/punktfunk-host/src/encode/windows/nvenc.rs b/crates/punktfunk-host/src/encode/windows/nvenc.rs index 6e12782..c42e69f 100644 --- a/crates/punktfunk-host/src/encode/windows/nvenc.rs +++ b/crates/punktfunk-host/src/encode/windows/nvenc.rs @@ -1,7 +1,10 @@ //! NVENC hardware encoder (Windows, D3D11 input) — zero-copy capture→encode on the GPU. //! -//! Drives the raw NVENC API via `nvidia_video_codec_sdk::{sys, ENCODE_API}` (the safe `Encoder` -//! wrapper is CUDA-only). Opens an encode session bound to the **same** `ID3D11Device` as the DXGI +//! Drives the raw NVENC API via the `nvidia_video_codec_sdk` `sys` types and a **runtime-loaded** +//! entry table ([`EncodeApi`] — the crate's `ENCODE_API`/safe `Encoder` are deliberately unused: +//! the safe wrapper is CUDA-only, and its statically-declared entry points would put a load-time +//! `nvEncodeAPI64.dll` import on the all-vendor binary, killing it on every AMD/Intel-only box). +//! Opens an encode session bound to the **same** `ID3D11Device` as the DXGI //! capturer (the device is carried on `FramePayload::D3d11`), and **encodes the capturer's texture in //! place** — it registers each input texture with NVENC once (cached by pointer) and `encode_picture`s //! it directly, with NO per-frame `CopyResource`. (That's safe because the host encode loop is @@ -10,8 +13,10 @@ //! pipelined, the capturer must hand a ring of textures.) Mirrors the Linux NVENC config: CBR + //! ultra-low-latency, infinite GOP, P-frames only, forced-IDR for RFI, in-band SPS/PPS each keyframe. //! -//! Needs a real NVIDIA GPU at runtime (session creation fails otherwise) — compiles GPU-less, but -//! `open`/`submit` only succeed on a GPU box. The software encoder (`super::sw`) is the fallback. +//! Needs a real NVIDIA GPU at runtime (session creation fails otherwise) — compiles GPU-less and +//! **starts driver-less** (the DLL resolves at runtime; on an AMD/Intel box [`try_api`] fails +//! cleanly and the AMF/QSV/software backends carry the session). The software encoder +//! (`super::sw`) is the fallback. //! //! **Two-thread async retrieve** (`PUNKTFUNK_NVENC_ASYNC=1`, opt-in until on-glass validated — //! gpu-contention plan §5.B): the NVENC guide mandates that the main thread only *submit* @@ -44,7 +49,182 @@ use windows::Win32::Graphics::Direct3D11::{ID3D11Device, ID3D11Texture2D}; use windows::Win32::System::Threading::{CreateEventW, WaitForSingleObject}; use nvidia_video_codec_sdk::sys::nvEncodeAPI as nv; -use nvidia_video_codec_sdk::ENCODE_API as API; + +// --------------------------------------------------------------------------------------------- +// Runtime-loaded NVENC entry table. +// +// The NVENC entry points live in `nvEncodeAPI64.dll`, which exists ONLY where the NVIDIA driver +// is installed. They must be resolved at runtime (`LoadLibraryExW` + `GetProcAddress`), never as +// a link-time import: the shipped host binary compiles the `nvenc` feature in unconditionally, +// and a load-time DLL import makes the Windows loader refuse to start the process on every +// AMD/Intel-only box ("nvencodeapi64.dll was not found", before `main`) — `encode.rs` never gets +// the chance to dispatch to AMF/QSV. This is the Windows analogue of the Linux host's dlopen'd +// libcuda. Only the two real DLL exports are resolved by name; the rest of the table comes back +// through `NvEncodeAPICreateInstance`. +// --------------------------------------------------------------------------------------------- + +/// The `NV_ENCODE_API_FUNCTION_LIST` entries this encoder uses, unwrapped once at load so call +/// sites stay `(api().encode_picture)(…)`. Field names mirror the sdk crate's `EncodeAPI`, whose +/// lazy static must NOT be referenced — it calls the statically-declared externs, which is what +/// demanded the import lib at link time. +struct EncodeApi { + open_encode_session_ex: unsafe extern "C" fn( + *mut nv::NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS, + *mut *mut c_void, + ) -> nv::NVENCSTATUS, + initialize_encoder: + unsafe extern "C" fn(*mut c_void, *mut nv::NV_ENC_INITIALIZE_PARAMS) -> nv::NVENCSTATUS, + destroy_encoder: unsafe extern "C" fn(*mut c_void) -> nv::NVENCSTATUS, + get_encode_caps: unsafe extern "C" fn( + *mut c_void, + nv::GUID, + *mut nv::NV_ENC_CAPS_PARAM, + *mut core::ffi::c_int, + ) -> nv::NVENCSTATUS, + get_encode_preset_config_ex: unsafe extern "C" fn( + *mut c_void, + nv::GUID, + nv::GUID, + nv::NV_ENC_TUNING_INFO, + *mut nv::NV_ENC_PRESET_CONFIG, + ) -> nv::NVENCSTATUS, + create_bitstream_buffer: unsafe extern "C" fn( + *mut c_void, + *mut nv::NV_ENC_CREATE_BITSTREAM_BUFFER, + ) -> nv::NVENCSTATUS, + destroy_bitstream_buffer: + unsafe extern "C" fn(*mut c_void, nv::NV_ENC_OUTPUT_PTR) -> nv::NVENCSTATUS, + lock_bitstream: + unsafe extern "C" fn(*mut c_void, *mut nv::NV_ENC_LOCK_BITSTREAM) -> nv::NVENCSTATUS, + unlock_bitstream: unsafe extern "C" fn(*mut c_void, nv::NV_ENC_OUTPUT_PTR) -> nv::NVENCSTATUS, + register_resource: + unsafe extern "C" fn(*mut c_void, *mut nv::NV_ENC_REGISTER_RESOURCE) -> nv::NVENCSTATUS, + unregister_resource: + unsafe extern "C" fn(*mut c_void, nv::NV_ENC_REGISTERED_PTR) -> nv::NVENCSTATUS, + map_input_resource: + unsafe extern "C" fn(*mut c_void, *mut nv::NV_ENC_MAP_INPUT_RESOURCE) -> nv::NVENCSTATUS, + unmap_input_resource: + unsafe extern "C" fn(*mut c_void, nv::NV_ENC_INPUT_PTR) -> nv::NVENCSTATUS, + encode_picture: + unsafe extern "C" fn(*mut c_void, *mut nv::NV_ENC_PIC_PARAMS) -> nv::NVENCSTATUS, + register_async_event: + unsafe extern "C" fn(*mut c_void, *mut nv::NV_ENC_EVENT_PARAMS) -> nv::NVENCSTATUS, + unregister_async_event: + unsafe extern "C" fn(*mut c_void, *mut nv::NV_ENC_EVENT_PARAMS) -> nv::NVENCSTATUS, + invalidate_ref_frames: unsafe extern "C" fn(*mut c_void, u64) -> nv::NVENCSTATUS, +} + +/// Local `NVENCSTATUS` → `Result` (replaces the sdk's `result_without_string`, which lives in the +/// crate's `safe` module — code this file must not pull in, see [`EncodeApi`]). The raw status's +/// Debug repr (`NV_ENC_ERR_INVALID_PARAM`, …) is the error payload. +trait NvStatusExt { + fn nv_ok(self) -> std::result::Result<(), nv::NVENCSTATUS>; +} +impl NvStatusExt for nv::NVENCSTATUS { + fn nv_ok(self) -> std::result::Result<(), nv::NVENCSTATUS> { + match self { + nv::NVENCSTATUS::NV_ENC_SUCCESS => Ok(()), + err => Err(err), + } + } +} + +/// Resolve the table once per process. `Err` = NVENC genuinely unavailable on this machine (no +/// NVIDIA driver/DLL, or a driver older than our headers) — the entry points +/// ([`NvencD3d11Encoder::open`], [`probe_can_encode_444`]) gate on it and the AMF/QSV/software +/// backends carry on. +fn try_api() -> std::result::Result<&'static EncodeApi, &'static str> { + static TABLE: std::sync::OnceLock> = + std::sync::OnceLock::new(); + TABLE + .get_or_init(|| { + let table = load_api(); + if let Err(e) = &table { + // Once per process. Only reachable when something resolved to NVENC on this box + // (backend misdetect or a forced PUNKTFUNK_ENCODER=nvenc) — say why it will fail. + tracing::warn!("NVENC API unavailable: {e}"); + } + table + }) + .as_ref() + .map_err(|e| e.as_str()) +} + +/// The loaded table, for call sites past a [`try_api`] gate — a live session (or the probe's own +/// gate) implies the load succeeded, and the table lives for the process lifetime. +fn api() -> &'static EncodeApi { + try_api().expect("NVENC call before a successful try_api() gate") +} + +fn load_api() -> std::result::Result { + use windows::core::{s, w}; + use windows::Win32::System::LibraryLoader::{ + GetProcAddress, LoadLibraryExW, LOAD_LIBRARY_SEARCH_SYSTEM32, + }; + // SAFETY: `LoadLibraryExW`/`GetProcAddress` take static NUL-terminated names; the + // System32-only search path keeps a planted DLL out of the SYSTEM-service process. The two + // transmutes cast the resolved exports to their documented prototypes (nvEncodeAPI.h), the + // same contract the C SDK's own loader applies. `NvEncodeAPIGetMaxSupportedVersion` writes + // one u32 through a live pointer; `NvEncodeAPICreateInstance` fills `list`, a stack-local + // `#[repr(C)]` function list with `version` set, only during the call. The module is never + // freed, so every extracted function pointer stays valid for the process lifetime. + unsafe { + let module = LoadLibraryExW(w!("nvEncodeAPI64.dll"), None, LOAD_LIBRARY_SEARCH_SYSTEM32) + .map_err(|e| format!("nvEncodeAPI64.dll not loadable (no NVIDIA driver?): {e}"))?; + let get_version = GetProcAddress(module, s!("NvEncodeAPIGetMaxSupportedVersion")) + .ok_or("nvEncodeAPI64.dll exports no NvEncodeAPIGetMaxSupportedVersion")?; + let create_instance = GetProcAddress(module, s!("NvEncodeAPICreateInstance")) + .ok_or("nvEncodeAPI64.dll exports no NvEncodeAPICreateInstance")?; + let get_version: unsafe extern "C" fn(*mut u32) -> nv::NVENCSTATUS = + std::mem::transmute(get_version); + let create_instance: unsafe extern "C" fn( + *mut nv::NV_ENCODE_API_FUNCTION_LIST, + ) -> nv::NVENCSTATUS = std::mem::transmute(create_instance); + + let mut version = 0u32; + get_version(&mut version) + .nv_ok() + .map_err(|e| format!("NvEncodeAPIGetMaxSupportedVersion: {e:?}"))?; + // The sdk's assert_versions_match, minus the panic: an older driver is a clean Err. + let (major, minor) = (version >> 4, version & 0xf); + if (major, minor) < (nv::NVENCAPI_MAJOR_VERSION, nv::NVENCAPI_MINOR_VERSION) { + return Err(format!( + "driver NVENC API {major}.{minor} is older than the host's headers {}.{} — \ + update the NVIDIA driver", + nv::NVENCAPI_MAJOR_VERSION, + nv::NVENCAPI_MINOR_VERSION + )); + } + + let mut list = nv::NV_ENCODE_API_FUNCTION_LIST { + version: nv::NV_ENCODE_API_FUNCTION_LIST_VER, + ..Default::default() + }; + create_instance(&mut list) + .nv_ok() + .map_err(|e| format!("NvEncodeAPICreateInstance: {e:?}"))?; + const MISSING: &str = "NvEncodeAPICreateInstance left an entry point unfilled"; + Ok(EncodeApi { + open_encode_session_ex: list.nvEncOpenEncodeSessionEx.ok_or(MISSING)?, + initialize_encoder: list.nvEncInitializeEncoder.ok_or(MISSING)?, + destroy_encoder: list.nvEncDestroyEncoder.ok_or(MISSING)?, + get_encode_caps: list.nvEncGetEncodeCaps.ok_or(MISSING)?, + get_encode_preset_config_ex: list.nvEncGetEncodePresetConfigEx.ok_or(MISSING)?, + create_bitstream_buffer: list.nvEncCreateBitstreamBuffer.ok_or(MISSING)?, + destroy_bitstream_buffer: list.nvEncDestroyBitstreamBuffer.ok_or(MISSING)?, + lock_bitstream: list.nvEncLockBitstream.ok_or(MISSING)?, + unlock_bitstream: list.nvEncUnlockBitstream.ok_or(MISSING)?, + register_resource: list.nvEncRegisterResource.ok_or(MISSING)?, + unregister_resource: list.nvEncUnregisterResource.ok_or(MISSING)?, + map_input_resource: list.nvEncMapInputResource.ok_or(MISSING)?, + unmap_input_resource: list.nvEncUnmapInputResource.ok_or(MISSING)?, + encode_picture: list.nvEncEncodePicture.ok_or(MISSING)?, + register_async_event: list.nvEncRegisterAsyncEvent.ok_or(MISSING)?, + unregister_async_event: list.nvEncUnregisterAsyncEvent.ok_or(MISSING)?, + invalidate_ref_frames: list.nvEncInvalidateRefFrames.ok_or(MISSING)?, + }) + } +} // Output bitstream buffers = max in-flight encodes. The helper deep-pipelines (submits several frames // before locking the oldest) so per-frame GPU-scheduling waits OVERLAP instead of serializing under a @@ -143,7 +323,7 @@ fn retrieve_loop( outputBitstream: job.bs as *mut c_void, ..Default::default() }; - match (API.lock_bitstream)(enc as *mut c_void, &mut lock).result_without_string() { + match (api().lock_bitstream)(enc as *mut c_void, &mut lock).nv_ok() { Ok(()) => { let data = std::slice::from_raw_parts( lock.bitstreamBufferPtr as *const u8, @@ -155,7 +335,7 @@ fn retrieve_loop( nv::NV_ENC_PIC_TYPE::NV_ENC_PIC_TYPE_IDR | nv::NV_ENC_PIC_TYPE::NV_ENC_PIC_TYPE_I ); - let _ = (API.unlock_bitstream)(enc as *mut c_void, job.bs as *mut c_void); + let _ = (api().unlock_bitstream)(enc as *mut c_void, job.bs as *mut c_void); Ok((data, keyframe)) } Err(e) => Err(format!("lock_bitstream (async): {e:?}")), @@ -255,6 +435,11 @@ impl NvencD3d11Encoder { bit_depth: u8, chroma: ChromaFormat, ) -> Result { + // The runtime DLL load is the real "is NVENC possible here" gate: fail the open with a + // clear reason (backend misdetect / forced PUNKTFUNK_ENCODER=nvenc on a non-NVIDIA box) + // instead of an opaque session error on the first frame. Every later NVENC call in this + // file sits behind this gate (or the probe's), so the infallible `api()` is sound. + try_api().map_err(|e| anyhow!("NVENC unavailable: {e}"))?; Ok(Self { encoder: ptr::null_mut(), codec, @@ -309,11 +494,11 @@ impl NvencD3d11Encoder { // Unmap any in-flight inputs, then unregister every cached texture and destroy the bitstreams. for (_, map, _) in &self.pending { if !map.is_null() { - let _ = (API.unmap_input_resource)(self.encoder, *map); + let _ = (api().unmap_input_resource)(self.encoder, *map); } } for (reg, _tex) in self.regs.values() { - let _ = (API.unregister_resource)(self.encoder, *reg); + let _ = (api().unregister_resource)(self.encoder, *reg); } // Async events: unregister from the session, then close the Win32 handles. for &ev in &self.events { @@ -322,14 +507,14 @@ impl NvencD3d11Encoder { completionEvent: ev as *mut c_void, ..Default::default() }; - let _ = (API.unregister_async_event)(self.encoder, &mut ep); + let _ = (api().unregister_async_event)(self.encoder, &mut ep); let _ = CloseHandle(HANDLE(ev as *mut c_void)); } self.events.clear(); for &bs in &self.bitstreams { - let _ = (API.destroy_bitstream_buffer)(self.encoder, bs); + let _ = (api().destroy_bitstream_buffer)(self.encoder, bs); } - let _ = (API.destroy_encoder)(self.encoder); + let _ = (api().destroy_encoder)(self.encoder); self.regs.clear(); // drops the texture clones, releasing our refs self.bitstreams.clear(); self.pending.clear(); @@ -350,9 +535,7 @@ impl NvencD3d11Encoder { reserved: [0; 62], }; let mut val: i32 = 0; - match (API.get_encode_caps)(enc, self.codec_guid, &mut param, &mut val) - .result_without_string() - { + match (api().get_encode_caps)(enc, self.codec_guid, &mut param, &mut val).nv_ok() { Ok(()) => val, Err(_) => 0, } @@ -374,8 +557,8 @@ impl NvencD3d11Encoder { ..Default::default() }; let mut enc: *mut c_void = ptr::null_mut(); - (API.open_encode_session_ex)(&mut params, &mut enc) - .result_without_string() + (api().open_encode_session_ex)(&mut params, &mut enc) + .nv_ok() .map_err(|e| { anyhow!("NVENC open_encode_session_ex (caps probe): {e:?} (no NVIDIA GPU?)") })?; @@ -392,7 +575,7 @@ impl NvencD3d11Encoder { nv::NV_ENC_CAPS::NV_ENC_CAPS_SUPPORT_CUSTOM_VBV_BUF_SIZE, ); let async_enc = self.get_cap(enc, nv::NV_ENC_CAPS::NV_ENC_CAPS_ASYNC_ENCODE_SUPPORT); - let _ = (API.destroy_encoder)(enc); + let _ = (api().destroy_encoder)(enc); // Reject an over-range mode with a clear message instead of an opaque InvalidParam. if wmax > 0 && hmax > 0 && (self.width as i32 > wmax || self.height as i32 > hmax) { @@ -449,8 +632,8 @@ impl NvencD3d11Encoder { ..Default::default() }; let mut enc: *mut c_void = ptr::null_mut(); - (API.open_encode_session_ex)(&mut params, &mut enc) - .result_without_string() + (api().open_encode_session_ex)(&mut params, &mut enc) + .nv_ok() .map_err(|e| anyhow!("NVENC open_encode_session_ex: {e:?} (no NVIDIA GPU?)"))?; // Seed the P1 + ultra-low-latency preset config. @@ -462,16 +645,16 @@ impl NvencD3d11Encoder { }, ..Default::default() }; - if let Err(e) = (API.get_encode_preset_config_ex)( + if let Err(e) = (api().get_encode_preset_config_ex)( enc, self.codec_guid, nv::NV_ENC_PRESET_P1_GUID, nv::NV_ENC_TUNING_INFO::NV_ENC_TUNING_INFO_ULTRA_LOW_LATENCY, &mut preset, ) - .result_without_string() + .nv_ok() { - let _ = (API.destroy_encoder)(enc); + let _ = (api().destroy_encoder)(enc); return Err(anyhow!("get_encode_preset_config_ex: {e:?}")); } let mut cfg = preset.presetCfg; @@ -613,10 +796,10 @@ impl NvencD3d11Encoder { // splitEncodeMode is a C bitfield — set via the generated accessor, not a struct field. init.set_splitEncodeMode(split_mode); - match (API.initialize_encoder)(enc, &mut init).result_without_string() { + match (api().initialize_encoder)(enc, &mut init).nv_ok() { Ok(()) => Ok(enc), Err(e) => { - let _ = (API.destroy_encoder)(enc); + let _ = (api().destroy_encoder)(enc); Err(anyhow!("initialize_encoder: {e:?}")) } } @@ -624,8 +807,8 @@ impl NvencD3d11Encoder { /// Lazily create the session on the first frame's D3D11 device (so capture + encode share it). fn init_session(&mut self, device: &ID3D11Device) -> Result<()> { - // SAFETY: every call below goes through a function pointer resolved once from the loaded - // `nvidia_video_codec_sdk::ENCODE_API` (`nvEncodeAPI`) table, or through this type's own + // SAFETY: every call below goes through a function pointer resolved once from the + // runtime-loaded [`EncodeApi`] table (`api()`, gated in `open`), or through this type's own // `unsafe fn`s whose contract is met here. `query_caps`/`try_open_session` receive `device`, // the live `ID3D11Device` the caller pulled off the first frame; each returns either a valid // open NVENC session handle or an `Err`. `destroy_encoder` is only ever called on a handle a @@ -729,7 +912,7 @@ impl NvencD3d11Encoder { match self.try_open_session(device, mid, split_mode, use_async) { Ok(e) => { if !best.is_null() { - let _ = (API.destroy_encoder)(best); + let _ = (api().destroy_encoder)(best); } best = e; best_bps = mid; @@ -778,8 +961,8 @@ impl NvencD3d11Encoder { version: nv::NV_ENC_CREATE_BITSTREAM_BUFFER_VER, ..Default::default() }; - (API.create_bitstream_buffer)(enc, &mut cb) - .result_without_string() + (api().create_bitstream_buffer)(enc, &mut cb) + .nv_ok() .map_err(|e| anyhow!("create_bitstream_buffer: {e:?}"))?; self.bitstreams.push(cb.bitstreamBuffer); } @@ -795,8 +978,8 @@ impl NvencD3d11Encoder { completionEvent: ev.0, ..Default::default() }; - (API.register_async_event)(enc, &mut ep) - .result_without_string() + (api().register_async_event)(enc, &mut ep) + .nv_ok() .map_err(|e| anyhow!("register_async_event: {e:?}"))?; self.events.push(ev.0 as usize); } @@ -852,7 +1035,7 @@ impl NvencD3d11Encoder { // path's poll-side unmap, exactly once per mapping. unsafe { if !map.is_null() { - let _ = (API.unmap_input_resource)(self.encoder, map); + let _ = (api().unmap_input_resource)(self.encoder, map); } } let (data, keyframe) = done.result.map_err(|e| anyhow!("{e}"))?; @@ -953,7 +1136,7 @@ impl Encoder for NvencD3d11Encoder { } let slot = self.next % POOL; self.next += 1; - // SAFETY: every NVENC call goes through a function pointer from the loaded `ENCODE_API` table + // SAFETY: every NVENC call goes through a function pointer from the runtime-loaded `EncodeApi` table // and takes `self.encoder`, the live session `init_session` just established (non-null on the // path that reaches here). `NV_ENC_REGISTER_RESOURCE rr` has `version = // NV_ENC_REGISTER_RESOURCE_VER` and registers `frame.texture` — a D3D11 texture from @@ -986,8 +1169,8 @@ impl Encoder for NvencD3d11Encoder { bufferUsage: nv::NV_ENC_BUFFER_USAGE::NV_ENC_INPUT_IMAGE, ..Default::default() }; - (API.register_resource)(self.encoder, &mut rr) - .result_without_string() + (api().register_resource)(self.encoder, &mut rr) + .nv_ok() .map_err(|e| anyhow!("register_resource: {e:?}"))?; self.regs .insert(key, (rr.registeredResource, frame.texture.clone())); @@ -999,8 +1182,8 @@ impl Encoder for NvencD3d11Encoder { registeredResource: reg, ..Default::default() }; - (API.map_input_resource)(self.encoder, &mut mp) - .result_without_string() + (api().map_input_resource)(self.encoder, &mut mp) + .nv_ok() .map_err(|e| anyhow!("map_input_resource: {e:?}"))?; let pts = self.frame_idx as u64; @@ -1076,8 +1259,8 @@ impl Encoder for NvencD3d11Encoder { Codec::Av1 => {} } } - (API.encode_picture)(self.encoder, &mut pic) - .result_without_string() + (api().encode_picture)(self.encoder, &mut pic) + .nv_ok() .map_err(|e| anyhow!("encode_picture: {e:?}"))?; self.pending .push_back((self.bitstreams[slot], mp.mappedResource, captured.pts_ns)); @@ -1149,7 +1332,7 @@ impl Encoder for NvencD3d11Encoder { // We tag each input with `inputTimeStamp = frame_idx` (0,1,2,…), which is also the client's // frame number (the packetizer numbers frames in submit order), so the client's lost-frame // range maps 1:1 onto the timestamps NVENC invalidates here. - // SAFETY: `invalidate_ref_frames` is a function pointer from the loaded `ENCODE_API` table. + // SAFETY: `invalidate_ref_frames` is a function pointer from the runtime-loaded `EncodeApi` table. // `self.encoder` was checked non-null at the top of this fn and is the live session; this runs // on the encode thread (like submit/poll), so there is no concurrent NVENC use. Each `ts` was // clamped to `[oldest_in_dpb, frame_idx - 1]` above, so it names a frame still in the session's @@ -1157,8 +1340,8 @@ impl Encoder for NvencD3d11Encoder { // lifetime concern. unsafe { for ts in first..=last { - if (API.invalidate_ref_frames)(self.encoder, ts as u64) - .result_without_string() + if (api().invalidate_ref_frames)(self.encoder, ts as u64) + .nv_ok() .is_err() { return false; // any failure → fall back to IDR @@ -1195,7 +1378,7 @@ impl Encoder for NvencD3d11Encoder { }; // SAFETY: a non-empty `pending` implies `submit` ran, so `self.encoder` is the live session // (`teardown` clears `pending` whenever it nulls the handle); all calls below use function - // pointers from the loaded `ENCODE_API` table on the encode thread. `NV_ENC_LOCK_BITSTREAM lock` + // pointers from the runtime-loaded `EncodeApi` table on the encode thread. `NV_ENC_LOCK_BITSTREAM lock` // (version = `NV_ENC_LOCK_BITSTREAM_VER`) locks `bs`, a pool bitstream a prior `encode_picture` // targeted; `lock_bitstream` blocks until that encode finishes, so on success // `lock.bitstreamBufferPtr` is non-null and points at `lock.bitstreamSizeInBytes` bytes of @@ -1209,8 +1392,8 @@ impl Encoder for NvencD3d11Encoder { outputBitstream: bs, ..Default::default() }; - (API.lock_bitstream)(self.encoder, &mut lock) - .result_without_string() + (api().lock_bitstream)(self.encoder, &mut lock) + .nv_ok() .map_err(|e| anyhow!("lock_bitstream: {e:?}"))?; let data = std::slice::from_raw_parts( lock.bitstreamBufferPtr as *const u8, @@ -1221,11 +1404,11 @@ impl Encoder for NvencD3d11Encoder { lock.pictureType, nv::NV_ENC_PIC_TYPE::NV_ENC_PIC_TYPE_IDR | nv::NV_ENC_PIC_TYPE::NV_ENC_PIC_TYPE_I ); - (API.unlock_bitstream)(self.encoder, bs) - .result_without_string() + (api().unlock_bitstream)(self.encoder, bs) + .nv_ok() .map_err(|e| anyhow!("unlock_bitstream: {e:?}"))?; if !map.is_null() { - let _ = (API.unmap_input_resource)(self.encoder, map); + let _ = (api().unmap_input_resource)(self.encoder, map); } Ok(Some(EncodedFrame { data, @@ -1267,6 +1450,11 @@ pub fn probe_can_encode_444(codec: Codec) -> bool { if codec != Codec::H265 { return false; } + // No loadable NVENC on this box (non-NVIDIA / no driver) → the honest 4:4:4 answer is "no". + // This is also the `api()` gate for every NVENC call below. + if try_api().is_err() { + return false; + } // SAFETY: a self-contained probe owning every handle it creates. `CreateDXGIFactory1`/ // `EnumAdapterByLuid` return owned COM objects or err (→ default-adapter fallback). // `D3D11CreateDevice` (explicit adapter + UNKNOWN driver type, or NULL adapter + HARDWARE) @@ -1321,8 +1509,8 @@ pub fn probe_can_encode_444(codec: Codec) -> bool { ..Default::default() }; let mut enc: *mut c_void = ptr::null_mut(); - if (API.open_encode_session_ex)(&mut params, &mut enc) - .result_without_string() + if (api().open_encode_session_ex)(&mut params, &mut enc) + .nv_ok() .is_err() { return false; @@ -1333,11 +1521,11 @@ pub fn probe_can_encode_444(codec: Codec) -> bool { reserved: [0; 62], }; let mut val: i32 = 0; - let ok = (API.get_encode_caps)(enc, nv::NV_ENC_CODEC_HEVC_GUID, &mut param, &mut val) - .result_without_string() + let ok = (api().get_encode_caps)(enc, nv::NV_ENC_CODEC_HEVC_GUID, &mut param, &mut val) + .nv_ok() .is_ok() && val != 0; - let _ = (API.destroy_encoder)(enc); + let _ = (api().destroy_encoder)(enc); ok } } diff --git a/packaging/windows/nvenc/gen-nvenc-importlib.ps1 b/packaging/windows/nvenc/gen-nvenc-importlib.ps1 deleted file mode 100644 index 54e21a5..0000000 --- a/packaging/windows/nvenc/gen-nvenc-importlib.ps1 +++ /dev/null @@ -1,57 +0,0 @@ -<# -.SYNOPSIS - Generate the NVENC import library (nvencodeapi.lib) into -OutDir, so the host links with - `--features nvenc` on a box that has no NVIDIA Video Codec SDK and no GPU. - -.DESCRIPTION - The host links against nvencodeapi.lib (crates/punktfunk-host/build.rs). That import lib is just - a link-time stub for two exports of nvEncodeAPI64.dll (the real DLL ships with the NVIDIA driver - and resolves at runtime). We synthesise it from nvenc.def: - - 1. llvm-dlltool — preferred; LLVM is on the CI runner PATH (C:\Program Files\LLVM\bin) and this - works without a Visual Studio developer shell. - 2. MSVC lib.exe — fallback; located via vswhere (no vcvars needed). - - Point PUNKTFUNK_NVENC_LIB_DIR at -OutDir before `cargo build --features nvenc`. - -.EXAMPLE - pwsh -File gen-nvenc-importlib.ps1 -OutDir C:\t\nvenc -#> -[CmdletBinding()] -param( - [Parameter(Mandatory = $true)][string]$OutDir, - [string]$DefPath = (Join-Path $PSScriptRoot 'nvenc.def') -) -$ErrorActionPreference = 'Stop' -$ProgressPreference = 'SilentlyContinue' -$PSNativeCommandUseErrorActionPreference = $false # check $LASTEXITCODE ourselves (pwsh 7.4 safe) - -if (-not (Test-Path $DefPath)) { throw "module-definition file not found: $DefPath" } -New-Item -ItemType Directory -Force -Path $OutDir | Out-Null -$out = Join-Path $OutDir 'nvencodeapi.lib' - -# 1) llvm-dlltool (preferred) ------------------------------------------------------------------ -$dlltool = Get-Command llvm-dlltool -ErrorAction SilentlyContinue -if ($dlltool) { - Write-Host "==> llvm-dlltool -> $out" - & $dlltool.Source -m i386:x86-64 -d $DefPath -D nvEncodeAPI64.dll -l $out - if ($LASTEXITCODE -ne 0) { throw "llvm-dlltool failed ($LASTEXITCODE)" } - Write-Host " ok ($((Get-Item $out).Length) bytes)" - return -} - -# 2) MSVC lib.exe via vswhere (fallback) ------------------------------------------------------- -$vswhere = Join-Path ${env:ProgramFiles(x86)} 'Microsoft Visual Studio\Installer\vswhere.exe' -if (Test-Path $vswhere) { - $lib = & $vswhere -latest -prerelease -products * -find 'VC\Tools\MSVC\**\bin\Hostx64\x64\lib.exe' | - Select-Object -First 1 - if ($lib -and (Test-Path $lib)) { - Write-Host "==> lib.exe -> $out" - & $lib "/def:$DefPath" /machine:x64 "/out:$out" - if ($LASTEXITCODE -ne 0) { throw "lib.exe failed ($LASTEXITCODE)" } - Write-Host " ok ($((Get-Item $out).Length) bytes)" - return - } -} - -throw "neither llvm-dlltool (LLVM bin on PATH) nor MSVC lib.exe (via vswhere) was found to build $out" diff --git a/packaging/windows/nvenc/nvenc.def b/packaging/windows/nvenc/nvenc.def deleted file mode 100644 index 9f30005..0000000 --- a/packaging/windows/nvenc/nvenc.def +++ /dev/null @@ -1,14 +0,0 @@ -; Module-definition file for the NVENC import library the host links against with `--features nvenc`. -; -; The real entry points live in nvEncodeAPI64.dll, which ships with the NVIDIA driver. At LINK time -; the host only needs an import library exporting these two symbols (see crates/punktfunk-host/build.rs: -; it emits `cargo:rustc-link-lib=dylib=nvencodeapi` and searches PUNKTFUNK_NVENC_LIB_DIR). No GPU, -; driver, or NVIDIA Video Codec SDK is required to BUILD — only to run, where the DLL resolves from -; the installed driver. Generate nvencodeapi.lib from this file with gen-nvenc-importlib.ps1. -; -; The LIBRARY line names the DLL the import records point at — required for MSVC `lib.exe /def` -; (without it the import name would default to "nvenc.dll"). llvm-dlltool takes the name from `-D`. -LIBRARY nvEncodeAPI64.dll -EXPORTS - NvEncodeAPICreateInstance - NvEncodeAPIGetMaxSupportedVersion diff --git a/scripts/windows/README.md b/scripts/windows/README.md index 48d07e0..c847860 100644 --- a/scripts/windows/README.md +++ b/scripts/windows/README.md @@ -9,11 +9,11 @@ Helper scripts for the Windows host box (the RTX `.173` lab box, repo at powershell -ExecutionPolicy Bypass -File scripts\windows\setup-build-env.ps1 ``` -Persists (Machine scope) the three vars the NVENC build needs: +Persists (Machine scope) the vars the host build needs (NVENC itself needs none — its entry +points are runtime-loaded from the driver's `nvEncodeAPI64.dll`): | var | value | why | | --- | --- | --- | -| `PUNKTFUNK_NVENC_LIB_DIR` | `C:\Users\Public\nvenc` | NVENC import lib (`nvencodeapi.lib`) | | `LIBCLANG_PATH` | `C:\Program Files\LLVM\bin` | bindgen (`libclang.dll`) | | `CMAKE_POLICY_VERSION_MINIMUM` | `3.5` | `audiopus_sys` / cmake crates | diff --git a/scripts/windows/deploy-host.ps1 b/scripts/windows/deploy-host.ps1 index cf8c0e9..2a716d8 100644 --- a/scripts/windows/deploy-host.ps1 +++ b/scripts/windows/deploy-host.ps1 @@ -35,7 +35,7 @@ Set-Location $repo # Load the persisted build env (Machine scope) into THIS process, so the build sees it even # if this shell was started before setup-build-env.ps1 ran (env is inherited at spawn time). -foreach ($k in 'PUNKTFUNK_NVENC_LIB_DIR','LIBCLANG_PATH','CMAKE_POLICY_VERSION_MINIMUM') { +foreach ($k in 'LIBCLANG_PATH','CMAKE_POLICY_VERSION_MINIMUM') { $v = [Environment]::GetEnvironmentVariable($k, 'Machine') if ($v) { [Environment]::SetEnvironmentVariable($k, $v, 'Process'); Write-Host "env : $k=$v" } else { Write-Warning "env $k not set (run setup-build-env.ps1)" } diff --git a/scripts/windows/setup-build-env.ps1 b/scripts/windows/setup-build-env.ps1 index f755f22..f5d8c1e 100644 --- a/scripts/windows/setup-build-env.ps1 +++ b/scripts/windows/setup-build-env.ps1 @@ -12,9 +12,9 @@ $admin = ([Security.Principal.WindowsPrincipal][Security.Principal.WindowsIdenti ).IsInRole([Security.Principal.WindowsBuiltinRole]::Administrator) if (-not $admin) { throw "Run elevated (Machine-scope env requires Administrator)." } -# NVENC import lib (nvencodeapi.lib); libclang for bindgen; cmake policy floor for audiopus_sys. +# libclang for bindgen; cmake policy floor for audiopus_sys. (NVENC needs no build-time env: +# its entry points are runtime-loaded from the driver's nvEncodeAPI64.dll.) $vars = [ordered]@{ - 'PUNKTFUNK_NVENC_LIB_DIR' = 'C:\Users\Public\nvenc' 'LIBCLANG_PATH' = 'C:\Program Files\LLVM\bin' 'CMAKE_POLICY_VERSION_MINIMUM' = '3.5' # FFMPEG_DIR is only needed for the `amf-qsv` feature (libavcodec). The RTX box builds