fix(encode/windows): resolve NVENC at runtime — AMD/Intel hosts no longer crash at start
The nvenc build linked nvEncodeAPI64.dll's entry points at load time, so a --features nvenc binary hard-crashed on any box without the NVIDIA driver (AMD/Intel). Entry points now come from a runtime LoadLibrary table (encode/windows/nvenc.rs load_api); a missing DLL just falls through the encoder auto-detect to AMF/QSV/software. The generated import lib and all its plumbing (gen-nvenc-importlib.ps1, nvenc.def, PUNKTFUNK_NVENC_LIB_DIR, setup-build-env wiring) are gone. Live-validated on the RTX 4090 box (NVENC session, 7000+ frames). Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
@@ -232,10 +232,11 @@ pf-driver-proto = { path = "../pf-driver-proto" }
|
|||||||
bytemuck = { version = "1.19", features = ["derive"] }
|
bytemuck = { version = "1.19", features = ["derive"] }
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
# NVENC hardware encode (Windows). OFF by default: it pulls the NVENC SDK, and the host then needs
|
# NVENC hardware encode (Windows). OFF by default (it pulls the NVENC SDK crate); nothing is
|
||||||
# the NVENC entry points (NvEncodeAPICreateInstance / NvEncodeAPIGetMaxSupportedVersion) at link
|
# needed at link time — the entry points are resolved at RUNTIME from the driver's
|
||||||
# time — i.e. `nvencodeapi.lib` from the NVIDIA Video Codec SDK (or an import lib generated from
|
# nvEncodeAPI64.dll (encode/windows/nvenc.rs `load_api`), so the same binary starts fine on
|
||||||
# nvEncodeAPI64.dll) on the linker path. Build the GPU host with `--features nvenc`.
|
# AMD/Intel-only boxes and falls through to AMF/QSV/software. Build the GPU host with
|
||||||
|
# `--features nvenc`.
|
||||||
nvenc = ["dep:nvidia-video-codec-sdk"]
|
nvenc = ["dep:nvidia-video-codec-sdk"]
|
||||||
# AMD/Intel hardware encode on Windows (AMF/QSV via ffmpeg-next). OFF by default: it needs a
|
# AMD/Intel hardware encode on Windows (AMF/QSV via ffmpeg-next). OFF by default: it needs a
|
||||||
# `FFMPEG_DIR` (BtbN lgpl-shared — includes `*_amf`/`*_qsv`; the GPL-only x264/x265 are never used,
|
# `FFMPEG_DIR` (BtbN lgpl-shared — includes `*_amf`/`*_qsv`; the GPL-only x264/x265 are never used,
|
||||||
|
|||||||
@@ -1,10 +1,9 @@
|
|||||||
//! Build script. The only thing it does: with the `nvenc` feature (Windows GPU host), tell the
|
//! Build script: stamps the build version. NVENC deliberately needs NOTHING here — the entry
|
||||||
//! linker to pull the NVENC import library. The NVENC entry points
|
//! points (`NvEncodeAPICreateInstance` / `NvEncodeAPIGetMaxSupportedVersion`) live in
|
||||||
//! (`NvEncodeAPICreateInstance` / `NvEncodeAPIGetMaxSupportedVersion`) live in `nvEncodeAPI64.dll`
|
//! `nvEncodeAPI64.dll`, which only exists where the NVIDIA driver is installed, so
|
||||||
//! (shipped with the NVIDIA driver), so the host links against `nvencodeapi.lib`. Point
|
//! `encode/windows/nvenc.rs` resolves them at RUNTIME (`LoadLibraryExW`). The former link-time
|
||||||
//! `PUNKTFUNK_NVENC_LIB_DIR` at a directory containing `nvencodeapi.lib` — from the NVIDIA Video
|
//! import (`cargo:rustc-link-lib=nvencodeapi`) made the Windows loader kill the all-vendor host
|
||||||
//! Codec SDK, or an import lib generated from the driver's `nvEncodeAPI64.dll`
|
//! binary on every AMD/Intel-only box before `main` ("nvencodeapi64.dll was not found").
|
||||||
//! (`lib /def:nvenc.def /machine:x64 /out:nvencodeapi.lib` with the two exports above).
|
|
||||||
fn main() {
|
fn main() {
|
||||||
// Build provenance: stamp the exact package/build version into the binary so a running host
|
// Build provenance: stamp the exact package/build version into the binary so a running host
|
||||||
// can report what it is (mgmt /health, the startup log, `--version`) and a stale/shadowed
|
// can report what it is (mgmt /health, the startup log, `--version`) and a stale/shadowed
|
||||||
@@ -18,12 +17,4 @@ fn main() {
|
|||||||
.unwrap_or_else(|| std::env::var("CARGO_PKG_VERSION").unwrap_or_else(|_| "unknown".into()));
|
.unwrap_or_else(|| std::env::var("CARGO_PKG_VERSION").unwrap_or_else(|_| "unknown".into()));
|
||||||
println!("cargo:rustc-env=PUNKTFUNK_VERSION={version}");
|
println!("cargo:rustc-env=PUNKTFUNK_VERSION={version}");
|
||||||
println!("cargo:rerun-if-env-changed=PUNKTFUNK_BUILD_VERSION");
|
println!("cargo:rerun-if-env-changed=PUNKTFUNK_BUILD_VERSION");
|
||||||
|
|
||||||
if std::env::var_os("CARGO_FEATURE_NVENC").is_some() {
|
|
||||||
if let Some(dir) = std::env::var_os("PUNKTFUNK_NVENC_LIB_DIR") {
|
|
||||||
println!("cargo:rustc-link-search=native={}", dir.to_string_lossy());
|
|
||||||
}
|
|
||||||
println!("cargo:rustc-link-lib=dylib=nvencodeapi");
|
|
||||||
println!("cargo:rerun-if-env-changed=PUNKTFUNK_NVENC_LIB_DIR");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -530,7 +530,7 @@ fn open_video_backend(
|
|||||||
{
|
{
|
||||||
anyhow::bail!(
|
anyhow::bail!(
|
||||||
"NVENC requested/detected but this host was built without it — rebuild \
|
"NVENC requested/detected but this host was built without it — rebuild \
|
||||||
with `--features nvenc` (needs the NVENC SDK's nvencodeapi.lib at link time)"
|
with `--features nvenc`"
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,7 +1,10 @@
|
|||||||
//! NVENC hardware encoder (Windows, D3D11 input) — zero-copy capture→encode on the GPU.
|
//! NVENC hardware encoder (Windows, D3D11 input) — zero-copy capture→encode on the GPU.
|
||||||
//!
|
//!
|
||||||
//! Drives the raw NVENC API via `nvidia_video_codec_sdk::{sys, ENCODE_API}` (the safe `Encoder`
|
//! Drives the raw NVENC API via the `nvidia_video_codec_sdk` `sys` types and a **runtime-loaded**
|
||||||
//! wrapper is CUDA-only). Opens an encode session bound to the **same** `ID3D11Device` as the DXGI
|
//! entry table ([`EncodeApi`] — the crate's `ENCODE_API`/safe `Encoder` are deliberately unused:
|
||||||
|
//! the safe wrapper is CUDA-only, and its statically-declared entry points would put a load-time
|
||||||
|
//! `nvEncodeAPI64.dll` import on the all-vendor binary, killing it on every AMD/Intel-only box).
|
||||||
|
//! Opens an encode session bound to the **same** `ID3D11Device` as the DXGI
|
||||||
//! capturer (the device is carried on `FramePayload::D3d11`), and **encodes the capturer's texture in
|
//! capturer (the device is carried on `FramePayload::D3d11`), and **encodes the capturer's texture in
|
||||||
//! place** — it registers each input texture with NVENC once (cached by pointer) and `encode_picture`s
|
//! place** — it registers each input texture with NVENC once (cached by pointer) and `encode_picture`s
|
||||||
//! it directly, with NO per-frame `CopyResource`. (That's safe because the host encode loop is
|
//! it directly, with NO per-frame `CopyResource`. (That's safe because the host encode loop is
|
||||||
@@ -10,8 +13,10 @@
|
|||||||
//! pipelined, the capturer must hand a ring of textures.) Mirrors the Linux NVENC config: CBR +
|
//! pipelined, the capturer must hand a ring of textures.) Mirrors the Linux NVENC config: CBR +
|
||||||
//! ultra-low-latency, infinite GOP, P-frames only, forced-IDR for RFI, in-band SPS/PPS each keyframe.
|
//! ultra-low-latency, infinite GOP, P-frames only, forced-IDR for RFI, in-band SPS/PPS each keyframe.
|
||||||
//!
|
//!
|
||||||
//! Needs a real NVIDIA GPU at runtime (session creation fails otherwise) — compiles GPU-less, but
|
//! Needs a real NVIDIA GPU at runtime (session creation fails otherwise) — compiles GPU-less and
|
||||||
//! `open`/`submit` only succeed on a GPU box. The software encoder (`super::sw`) is the fallback.
|
//! **starts driver-less** (the DLL resolves at runtime; on an AMD/Intel box [`try_api`] fails
|
||||||
|
//! cleanly and the AMF/QSV/software backends carry the session). The software encoder
|
||||||
|
//! (`super::sw`) is the fallback.
|
||||||
//!
|
//!
|
||||||
//! **Two-thread async retrieve** (`PUNKTFUNK_NVENC_ASYNC=1`, opt-in until on-glass validated —
|
//! **Two-thread async retrieve** (`PUNKTFUNK_NVENC_ASYNC=1`, opt-in until on-glass validated —
|
||||||
//! gpu-contention plan §5.B): the NVENC guide mandates that the main thread only *submit*
|
//! gpu-contention plan §5.B): the NVENC guide mandates that the main thread only *submit*
|
||||||
@@ -44,7 +49,182 @@ use windows::Win32::Graphics::Direct3D11::{ID3D11Device, ID3D11Texture2D};
|
|||||||
use windows::Win32::System::Threading::{CreateEventW, WaitForSingleObject};
|
use windows::Win32::System::Threading::{CreateEventW, WaitForSingleObject};
|
||||||
|
|
||||||
use nvidia_video_codec_sdk::sys::nvEncodeAPI as nv;
|
use nvidia_video_codec_sdk::sys::nvEncodeAPI as nv;
|
||||||
use nvidia_video_codec_sdk::ENCODE_API as API;
|
|
||||||
|
// ---------------------------------------------------------------------------------------------
|
||||||
|
// Runtime-loaded NVENC entry table.
|
||||||
|
//
|
||||||
|
// The NVENC entry points live in `nvEncodeAPI64.dll`, which exists ONLY where the NVIDIA driver
|
||||||
|
// is installed. They must be resolved at runtime (`LoadLibraryExW` + `GetProcAddress`), never as
|
||||||
|
// a link-time import: the shipped host binary compiles the `nvenc` feature in unconditionally,
|
||||||
|
// and a load-time DLL import makes the Windows loader refuse to start the process on every
|
||||||
|
// AMD/Intel-only box ("nvencodeapi64.dll was not found", before `main`) — `encode.rs` never gets
|
||||||
|
// the chance to dispatch to AMF/QSV. This is the Windows analogue of the Linux host's dlopen'd
|
||||||
|
// libcuda. Only the two real DLL exports are resolved by name; the rest of the table comes back
|
||||||
|
// through `NvEncodeAPICreateInstance`.
|
||||||
|
// ---------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/// The `NV_ENCODE_API_FUNCTION_LIST` entries this encoder uses, unwrapped once at load so call
|
||||||
|
/// sites stay `(api().encode_picture)(…)`. Field names mirror the sdk crate's `EncodeAPI`, whose
|
||||||
|
/// lazy static must NOT be referenced — it calls the statically-declared externs, which is what
|
||||||
|
/// demanded the import lib at link time.
|
||||||
|
struct EncodeApi {
|
||||||
|
open_encode_session_ex: unsafe extern "C" fn(
|
||||||
|
*mut nv::NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS,
|
||||||
|
*mut *mut c_void,
|
||||||
|
) -> nv::NVENCSTATUS,
|
||||||
|
initialize_encoder:
|
||||||
|
unsafe extern "C" fn(*mut c_void, *mut nv::NV_ENC_INITIALIZE_PARAMS) -> nv::NVENCSTATUS,
|
||||||
|
destroy_encoder: unsafe extern "C" fn(*mut c_void) -> nv::NVENCSTATUS,
|
||||||
|
get_encode_caps: unsafe extern "C" fn(
|
||||||
|
*mut c_void,
|
||||||
|
nv::GUID,
|
||||||
|
*mut nv::NV_ENC_CAPS_PARAM,
|
||||||
|
*mut core::ffi::c_int,
|
||||||
|
) -> nv::NVENCSTATUS,
|
||||||
|
get_encode_preset_config_ex: unsafe extern "C" fn(
|
||||||
|
*mut c_void,
|
||||||
|
nv::GUID,
|
||||||
|
nv::GUID,
|
||||||
|
nv::NV_ENC_TUNING_INFO,
|
||||||
|
*mut nv::NV_ENC_PRESET_CONFIG,
|
||||||
|
) -> nv::NVENCSTATUS,
|
||||||
|
create_bitstream_buffer: unsafe extern "C" fn(
|
||||||
|
*mut c_void,
|
||||||
|
*mut nv::NV_ENC_CREATE_BITSTREAM_BUFFER,
|
||||||
|
) -> nv::NVENCSTATUS,
|
||||||
|
destroy_bitstream_buffer:
|
||||||
|
unsafe extern "C" fn(*mut c_void, nv::NV_ENC_OUTPUT_PTR) -> nv::NVENCSTATUS,
|
||||||
|
lock_bitstream:
|
||||||
|
unsafe extern "C" fn(*mut c_void, *mut nv::NV_ENC_LOCK_BITSTREAM) -> nv::NVENCSTATUS,
|
||||||
|
unlock_bitstream: unsafe extern "C" fn(*mut c_void, nv::NV_ENC_OUTPUT_PTR) -> nv::NVENCSTATUS,
|
||||||
|
register_resource:
|
||||||
|
unsafe extern "C" fn(*mut c_void, *mut nv::NV_ENC_REGISTER_RESOURCE) -> nv::NVENCSTATUS,
|
||||||
|
unregister_resource:
|
||||||
|
unsafe extern "C" fn(*mut c_void, nv::NV_ENC_REGISTERED_PTR) -> nv::NVENCSTATUS,
|
||||||
|
map_input_resource:
|
||||||
|
unsafe extern "C" fn(*mut c_void, *mut nv::NV_ENC_MAP_INPUT_RESOURCE) -> nv::NVENCSTATUS,
|
||||||
|
unmap_input_resource:
|
||||||
|
unsafe extern "C" fn(*mut c_void, nv::NV_ENC_INPUT_PTR) -> nv::NVENCSTATUS,
|
||||||
|
encode_picture:
|
||||||
|
unsafe extern "C" fn(*mut c_void, *mut nv::NV_ENC_PIC_PARAMS) -> nv::NVENCSTATUS,
|
||||||
|
register_async_event:
|
||||||
|
unsafe extern "C" fn(*mut c_void, *mut nv::NV_ENC_EVENT_PARAMS) -> nv::NVENCSTATUS,
|
||||||
|
unregister_async_event:
|
||||||
|
unsafe extern "C" fn(*mut c_void, *mut nv::NV_ENC_EVENT_PARAMS) -> nv::NVENCSTATUS,
|
||||||
|
invalidate_ref_frames: unsafe extern "C" fn(*mut c_void, u64) -> nv::NVENCSTATUS,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Local `NVENCSTATUS` → `Result` (replaces the sdk's `result_without_string`, which lives in the
|
||||||
|
/// crate's `safe` module — code this file must not pull in, see [`EncodeApi`]). The raw status's
|
||||||
|
/// Debug repr (`NV_ENC_ERR_INVALID_PARAM`, …) is the error payload.
|
||||||
|
trait NvStatusExt {
|
||||||
|
fn nv_ok(self) -> std::result::Result<(), nv::NVENCSTATUS>;
|
||||||
|
}
|
||||||
|
impl NvStatusExt for nv::NVENCSTATUS {
|
||||||
|
fn nv_ok(self) -> std::result::Result<(), nv::NVENCSTATUS> {
|
||||||
|
match self {
|
||||||
|
nv::NVENCSTATUS::NV_ENC_SUCCESS => Ok(()),
|
||||||
|
err => Err(err),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Resolve the table once per process. `Err` = NVENC genuinely unavailable on this machine (no
|
||||||
|
/// NVIDIA driver/DLL, or a driver older than our headers) — the entry points
|
||||||
|
/// ([`NvencD3d11Encoder::open`], [`probe_can_encode_444`]) gate on it and the AMF/QSV/software
|
||||||
|
/// backends carry on.
|
||||||
|
fn try_api() -> std::result::Result<&'static EncodeApi, &'static str> {
|
||||||
|
static TABLE: std::sync::OnceLock<std::result::Result<EncodeApi, String>> =
|
||||||
|
std::sync::OnceLock::new();
|
||||||
|
TABLE
|
||||||
|
.get_or_init(|| {
|
||||||
|
let table = load_api();
|
||||||
|
if let Err(e) = &table {
|
||||||
|
// Once per process. Only reachable when something resolved to NVENC on this box
|
||||||
|
// (backend misdetect or a forced PUNKTFUNK_ENCODER=nvenc) — say why it will fail.
|
||||||
|
tracing::warn!("NVENC API unavailable: {e}");
|
||||||
|
}
|
||||||
|
table
|
||||||
|
})
|
||||||
|
.as_ref()
|
||||||
|
.map_err(|e| e.as_str())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The loaded table, for call sites past a [`try_api`] gate — a live session (or the probe's own
|
||||||
|
/// gate) implies the load succeeded, and the table lives for the process lifetime.
|
||||||
|
fn api() -> &'static EncodeApi {
|
||||||
|
try_api().expect("NVENC call before a successful try_api() gate")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn load_api() -> std::result::Result<EncodeApi, String> {
|
||||||
|
use windows::core::{s, w};
|
||||||
|
use windows::Win32::System::LibraryLoader::{
|
||||||
|
GetProcAddress, LoadLibraryExW, LOAD_LIBRARY_SEARCH_SYSTEM32,
|
||||||
|
};
|
||||||
|
// SAFETY: `LoadLibraryExW`/`GetProcAddress` take static NUL-terminated names; the
|
||||||
|
// System32-only search path keeps a planted DLL out of the SYSTEM-service process. The two
|
||||||
|
// transmutes cast the resolved exports to their documented prototypes (nvEncodeAPI.h), the
|
||||||
|
// same contract the C SDK's own loader applies. `NvEncodeAPIGetMaxSupportedVersion` writes
|
||||||
|
// one u32 through a live pointer; `NvEncodeAPICreateInstance` fills `list`, a stack-local
|
||||||
|
// `#[repr(C)]` function list with `version` set, only during the call. The module is never
|
||||||
|
// freed, so every extracted function pointer stays valid for the process lifetime.
|
||||||
|
unsafe {
|
||||||
|
let module = LoadLibraryExW(w!("nvEncodeAPI64.dll"), None, LOAD_LIBRARY_SEARCH_SYSTEM32)
|
||||||
|
.map_err(|e| format!("nvEncodeAPI64.dll not loadable (no NVIDIA driver?): {e}"))?;
|
||||||
|
let get_version = GetProcAddress(module, s!("NvEncodeAPIGetMaxSupportedVersion"))
|
||||||
|
.ok_or("nvEncodeAPI64.dll exports no NvEncodeAPIGetMaxSupportedVersion")?;
|
||||||
|
let create_instance = GetProcAddress(module, s!("NvEncodeAPICreateInstance"))
|
||||||
|
.ok_or("nvEncodeAPI64.dll exports no NvEncodeAPICreateInstance")?;
|
||||||
|
let get_version: unsafe extern "C" fn(*mut u32) -> nv::NVENCSTATUS =
|
||||||
|
std::mem::transmute(get_version);
|
||||||
|
let create_instance: unsafe extern "C" fn(
|
||||||
|
*mut nv::NV_ENCODE_API_FUNCTION_LIST,
|
||||||
|
) -> nv::NVENCSTATUS = std::mem::transmute(create_instance);
|
||||||
|
|
||||||
|
let mut version = 0u32;
|
||||||
|
get_version(&mut version)
|
||||||
|
.nv_ok()
|
||||||
|
.map_err(|e| format!("NvEncodeAPIGetMaxSupportedVersion: {e:?}"))?;
|
||||||
|
// The sdk's assert_versions_match, minus the panic: an older driver is a clean Err.
|
||||||
|
let (major, minor) = (version >> 4, version & 0xf);
|
||||||
|
if (major, minor) < (nv::NVENCAPI_MAJOR_VERSION, nv::NVENCAPI_MINOR_VERSION) {
|
||||||
|
return Err(format!(
|
||||||
|
"driver NVENC API {major}.{minor} is older than the host's headers {}.{} — \
|
||||||
|
update the NVIDIA driver",
|
||||||
|
nv::NVENCAPI_MAJOR_VERSION,
|
||||||
|
nv::NVENCAPI_MINOR_VERSION
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut list = nv::NV_ENCODE_API_FUNCTION_LIST {
|
||||||
|
version: nv::NV_ENCODE_API_FUNCTION_LIST_VER,
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
create_instance(&mut list)
|
||||||
|
.nv_ok()
|
||||||
|
.map_err(|e| format!("NvEncodeAPICreateInstance: {e:?}"))?;
|
||||||
|
const MISSING: &str = "NvEncodeAPICreateInstance left an entry point unfilled";
|
||||||
|
Ok(EncodeApi {
|
||||||
|
open_encode_session_ex: list.nvEncOpenEncodeSessionEx.ok_or(MISSING)?,
|
||||||
|
initialize_encoder: list.nvEncInitializeEncoder.ok_or(MISSING)?,
|
||||||
|
destroy_encoder: list.nvEncDestroyEncoder.ok_or(MISSING)?,
|
||||||
|
get_encode_caps: list.nvEncGetEncodeCaps.ok_or(MISSING)?,
|
||||||
|
get_encode_preset_config_ex: list.nvEncGetEncodePresetConfigEx.ok_or(MISSING)?,
|
||||||
|
create_bitstream_buffer: list.nvEncCreateBitstreamBuffer.ok_or(MISSING)?,
|
||||||
|
destroy_bitstream_buffer: list.nvEncDestroyBitstreamBuffer.ok_or(MISSING)?,
|
||||||
|
lock_bitstream: list.nvEncLockBitstream.ok_or(MISSING)?,
|
||||||
|
unlock_bitstream: list.nvEncUnlockBitstream.ok_or(MISSING)?,
|
||||||
|
register_resource: list.nvEncRegisterResource.ok_or(MISSING)?,
|
||||||
|
unregister_resource: list.nvEncUnregisterResource.ok_or(MISSING)?,
|
||||||
|
map_input_resource: list.nvEncMapInputResource.ok_or(MISSING)?,
|
||||||
|
unmap_input_resource: list.nvEncUnmapInputResource.ok_or(MISSING)?,
|
||||||
|
encode_picture: list.nvEncEncodePicture.ok_or(MISSING)?,
|
||||||
|
register_async_event: list.nvEncRegisterAsyncEvent.ok_or(MISSING)?,
|
||||||
|
unregister_async_event: list.nvEncUnregisterAsyncEvent.ok_or(MISSING)?,
|
||||||
|
invalidate_ref_frames: list.nvEncInvalidateRefFrames.ok_or(MISSING)?,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Output bitstream buffers = max in-flight encodes. The helper deep-pipelines (submits several frames
|
// Output bitstream buffers = max in-flight encodes. The helper deep-pipelines (submits several frames
|
||||||
// before locking the oldest) so per-frame GPU-scheduling waits OVERLAP instead of serializing under a
|
// before locking the oldest) so per-frame GPU-scheduling waits OVERLAP instead of serializing under a
|
||||||
@@ -143,7 +323,7 @@ fn retrieve_loop(
|
|||||||
outputBitstream: job.bs as *mut c_void,
|
outputBitstream: job.bs as *mut c_void,
|
||||||
..Default::default()
|
..Default::default()
|
||||||
};
|
};
|
||||||
match (API.lock_bitstream)(enc as *mut c_void, &mut lock).result_without_string() {
|
match (api().lock_bitstream)(enc as *mut c_void, &mut lock).nv_ok() {
|
||||||
Ok(()) => {
|
Ok(()) => {
|
||||||
let data = std::slice::from_raw_parts(
|
let data = std::slice::from_raw_parts(
|
||||||
lock.bitstreamBufferPtr as *const u8,
|
lock.bitstreamBufferPtr as *const u8,
|
||||||
@@ -155,7 +335,7 @@ fn retrieve_loop(
|
|||||||
nv::NV_ENC_PIC_TYPE::NV_ENC_PIC_TYPE_IDR
|
nv::NV_ENC_PIC_TYPE::NV_ENC_PIC_TYPE_IDR
|
||||||
| nv::NV_ENC_PIC_TYPE::NV_ENC_PIC_TYPE_I
|
| nv::NV_ENC_PIC_TYPE::NV_ENC_PIC_TYPE_I
|
||||||
);
|
);
|
||||||
let _ = (API.unlock_bitstream)(enc as *mut c_void, job.bs as *mut c_void);
|
let _ = (api().unlock_bitstream)(enc as *mut c_void, job.bs as *mut c_void);
|
||||||
Ok((data, keyframe))
|
Ok((data, keyframe))
|
||||||
}
|
}
|
||||||
Err(e) => Err(format!("lock_bitstream (async): {e:?}")),
|
Err(e) => Err(format!("lock_bitstream (async): {e:?}")),
|
||||||
@@ -255,6 +435,11 @@ impl NvencD3d11Encoder {
|
|||||||
bit_depth: u8,
|
bit_depth: u8,
|
||||||
chroma: ChromaFormat,
|
chroma: ChromaFormat,
|
||||||
) -> Result<Self> {
|
) -> Result<Self> {
|
||||||
|
// The runtime DLL load is the real "is NVENC possible here" gate: fail the open with a
|
||||||
|
// clear reason (backend misdetect / forced PUNKTFUNK_ENCODER=nvenc on a non-NVIDIA box)
|
||||||
|
// instead of an opaque session error on the first frame. Every later NVENC call in this
|
||||||
|
// file sits behind this gate (or the probe's), so the infallible `api()` is sound.
|
||||||
|
try_api().map_err(|e| anyhow!("NVENC unavailable: {e}"))?;
|
||||||
Ok(Self {
|
Ok(Self {
|
||||||
encoder: ptr::null_mut(),
|
encoder: ptr::null_mut(),
|
||||||
codec,
|
codec,
|
||||||
@@ -309,11 +494,11 @@ impl NvencD3d11Encoder {
|
|||||||
// Unmap any in-flight inputs, then unregister every cached texture and destroy the bitstreams.
|
// Unmap any in-flight inputs, then unregister every cached texture and destroy the bitstreams.
|
||||||
for (_, map, _) in &self.pending {
|
for (_, map, _) in &self.pending {
|
||||||
if !map.is_null() {
|
if !map.is_null() {
|
||||||
let _ = (API.unmap_input_resource)(self.encoder, *map);
|
let _ = (api().unmap_input_resource)(self.encoder, *map);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (reg, _tex) in self.regs.values() {
|
for (reg, _tex) in self.regs.values() {
|
||||||
let _ = (API.unregister_resource)(self.encoder, *reg);
|
let _ = (api().unregister_resource)(self.encoder, *reg);
|
||||||
}
|
}
|
||||||
// Async events: unregister from the session, then close the Win32 handles.
|
// Async events: unregister from the session, then close the Win32 handles.
|
||||||
for &ev in &self.events {
|
for &ev in &self.events {
|
||||||
@@ -322,14 +507,14 @@ impl NvencD3d11Encoder {
|
|||||||
completionEvent: ev as *mut c_void,
|
completionEvent: ev as *mut c_void,
|
||||||
..Default::default()
|
..Default::default()
|
||||||
};
|
};
|
||||||
let _ = (API.unregister_async_event)(self.encoder, &mut ep);
|
let _ = (api().unregister_async_event)(self.encoder, &mut ep);
|
||||||
let _ = CloseHandle(HANDLE(ev as *mut c_void));
|
let _ = CloseHandle(HANDLE(ev as *mut c_void));
|
||||||
}
|
}
|
||||||
self.events.clear();
|
self.events.clear();
|
||||||
for &bs in &self.bitstreams {
|
for &bs in &self.bitstreams {
|
||||||
let _ = (API.destroy_bitstream_buffer)(self.encoder, bs);
|
let _ = (api().destroy_bitstream_buffer)(self.encoder, bs);
|
||||||
}
|
}
|
||||||
let _ = (API.destroy_encoder)(self.encoder);
|
let _ = (api().destroy_encoder)(self.encoder);
|
||||||
self.regs.clear(); // drops the texture clones, releasing our refs
|
self.regs.clear(); // drops the texture clones, releasing our refs
|
||||||
self.bitstreams.clear();
|
self.bitstreams.clear();
|
||||||
self.pending.clear();
|
self.pending.clear();
|
||||||
@@ -350,9 +535,7 @@ impl NvencD3d11Encoder {
|
|||||||
reserved: [0; 62],
|
reserved: [0; 62],
|
||||||
};
|
};
|
||||||
let mut val: i32 = 0;
|
let mut val: i32 = 0;
|
||||||
match (API.get_encode_caps)(enc, self.codec_guid, &mut param, &mut val)
|
match (api().get_encode_caps)(enc, self.codec_guid, &mut param, &mut val).nv_ok() {
|
||||||
.result_without_string()
|
|
||||||
{
|
|
||||||
Ok(()) => val,
|
Ok(()) => val,
|
||||||
Err(_) => 0,
|
Err(_) => 0,
|
||||||
}
|
}
|
||||||
@@ -374,8 +557,8 @@ impl NvencD3d11Encoder {
|
|||||||
..Default::default()
|
..Default::default()
|
||||||
};
|
};
|
||||||
let mut enc: *mut c_void = ptr::null_mut();
|
let mut enc: *mut c_void = ptr::null_mut();
|
||||||
(API.open_encode_session_ex)(&mut params, &mut enc)
|
(api().open_encode_session_ex)(&mut params, &mut enc)
|
||||||
.result_without_string()
|
.nv_ok()
|
||||||
.map_err(|e| {
|
.map_err(|e| {
|
||||||
anyhow!("NVENC open_encode_session_ex (caps probe): {e:?} (no NVIDIA GPU?)")
|
anyhow!("NVENC open_encode_session_ex (caps probe): {e:?} (no NVIDIA GPU?)")
|
||||||
})?;
|
})?;
|
||||||
@@ -392,7 +575,7 @@ impl NvencD3d11Encoder {
|
|||||||
nv::NV_ENC_CAPS::NV_ENC_CAPS_SUPPORT_CUSTOM_VBV_BUF_SIZE,
|
nv::NV_ENC_CAPS::NV_ENC_CAPS_SUPPORT_CUSTOM_VBV_BUF_SIZE,
|
||||||
);
|
);
|
||||||
let async_enc = self.get_cap(enc, nv::NV_ENC_CAPS::NV_ENC_CAPS_ASYNC_ENCODE_SUPPORT);
|
let async_enc = self.get_cap(enc, nv::NV_ENC_CAPS::NV_ENC_CAPS_ASYNC_ENCODE_SUPPORT);
|
||||||
let _ = (API.destroy_encoder)(enc);
|
let _ = (api().destroy_encoder)(enc);
|
||||||
|
|
||||||
// Reject an over-range mode with a clear message instead of an opaque InvalidParam.
|
// Reject an over-range mode with a clear message instead of an opaque InvalidParam.
|
||||||
if wmax > 0 && hmax > 0 && (self.width as i32 > wmax || self.height as i32 > hmax) {
|
if wmax > 0 && hmax > 0 && (self.width as i32 > wmax || self.height as i32 > hmax) {
|
||||||
@@ -449,8 +632,8 @@ impl NvencD3d11Encoder {
|
|||||||
..Default::default()
|
..Default::default()
|
||||||
};
|
};
|
||||||
let mut enc: *mut c_void = ptr::null_mut();
|
let mut enc: *mut c_void = ptr::null_mut();
|
||||||
(API.open_encode_session_ex)(&mut params, &mut enc)
|
(api().open_encode_session_ex)(&mut params, &mut enc)
|
||||||
.result_without_string()
|
.nv_ok()
|
||||||
.map_err(|e| anyhow!("NVENC open_encode_session_ex: {e:?} (no NVIDIA GPU?)"))?;
|
.map_err(|e| anyhow!("NVENC open_encode_session_ex: {e:?} (no NVIDIA GPU?)"))?;
|
||||||
|
|
||||||
// Seed the P1 + ultra-low-latency preset config.
|
// Seed the P1 + ultra-low-latency preset config.
|
||||||
@@ -462,16 +645,16 @@ impl NvencD3d11Encoder {
|
|||||||
},
|
},
|
||||||
..Default::default()
|
..Default::default()
|
||||||
};
|
};
|
||||||
if let Err(e) = (API.get_encode_preset_config_ex)(
|
if let Err(e) = (api().get_encode_preset_config_ex)(
|
||||||
enc,
|
enc,
|
||||||
self.codec_guid,
|
self.codec_guid,
|
||||||
nv::NV_ENC_PRESET_P1_GUID,
|
nv::NV_ENC_PRESET_P1_GUID,
|
||||||
nv::NV_ENC_TUNING_INFO::NV_ENC_TUNING_INFO_ULTRA_LOW_LATENCY,
|
nv::NV_ENC_TUNING_INFO::NV_ENC_TUNING_INFO_ULTRA_LOW_LATENCY,
|
||||||
&mut preset,
|
&mut preset,
|
||||||
)
|
)
|
||||||
.result_without_string()
|
.nv_ok()
|
||||||
{
|
{
|
||||||
let _ = (API.destroy_encoder)(enc);
|
let _ = (api().destroy_encoder)(enc);
|
||||||
return Err(anyhow!("get_encode_preset_config_ex: {e:?}"));
|
return Err(anyhow!("get_encode_preset_config_ex: {e:?}"));
|
||||||
}
|
}
|
||||||
let mut cfg = preset.presetCfg;
|
let mut cfg = preset.presetCfg;
|
||||||
@@ -613,10 +796,10 @@ impl NvencD3d11Encoder {
|
|||||||
// splitEncodeMode is a C bitfield — set via the generated accessor, not a struct field.
|
// splitEncodeMode is a C bitfield — set via the generated accessor, not a struct field.
|
||||||
init.set_splitEncodeMode(split_mode);
|
init.set_splitEncodeMode(split_mode);
|
||||||
|
|
||||||
match (API.initialize_encoder)(enc, &mut init).result_without_string() {
|
match (api().initialize_encoder)(enc, &mut init).nv_ok() {
|
||||||
Ok(()) => Ok(enc),
|
Ok(()) => Ok(enc),
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
let _ = (API.destroy_encoder)(enc);
|
let _ = (api().destroy_encoder)(enc);
|
||||||
Err(anyhow!("initialize_encoder: {e:?}"))
|
Err(anyhow!("initialize_encoder: {e:?}"))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -624,8 +807,8 @@ impl NvencD3d11Encoder {
|
|||||||
|
|
||||||
/// Lazily create the session on the first frame's D3D11 device (so capture + encode share it).
|
/// Lazily create the session on the first frame's D3D11 device (so capture + encode share it).
|
||||||
fn init_session(&mut self, device: &ID3D11Device) -> Result<()> {
|
fn init_session(&mut self, device: &ID3D11Device) -> Result<()> {
|
||||||
// SAFETY: every call below goes through a function pointer resolved once from the loaded
|
// SAFETY: every call below goes through a function pointer resolved once from the
|
||||||
// `nvidia_video_codec_sdk::ENCODE_API` (`nvEncodeAPI`) table, or through this type's own
|
// runtime-loaded [`EncodeApi`] table (`api()`, gated in `open`), or through this type's own
|
||||||
// `unsafe fn`s whose contract is met here. `query_caps`/`try_open_session` receive `device`,
|
// `unsafe fn`s whose contract is met here. `query_caps`/`try_open_session` receive `device`,
|
||||||
// the live `ID3D11Device` the caller pulled off the first frame; each returns either a valid
|
// the live `ID3D11Device` the caller pulled off the first frame; each returns either a valid
|
||||||
// open NVENC session handle or an `Err`. `destroy_encoder` is only ever called on a handle a
|
// open NVENC session handle or an `Err`. `destroy_encoder` is only ever called on a handle a
|
||||||
@@ -729,7 +912,7 @@ impl NvencD3d11Encoder {
|
|||||||
match self.try_open_session(device, mid, split_mode, use_async) {
|
match self.try_open_session(device, mid, split_mode, use_async) {
|
||||||
Ok(e) => {
|
Ok(e) => {
|
||||||
if !best.is_null() {
|
if !best.is_null() {
|
||||||
let _ = (API.destroy_encoder)(best);
|
let _ = (api().destroy_encoder)(best);
|
||||||
}
|
}
|
||||||
best = e;
|
best = e;
|
||||||
best_bps = mid;
|
best_bps = mid;
|
||||||
@@ -778,8 +961,8 @@ impl NvencD3d11Encoder {
|
|||||||
version: nv::NV_ENC_CREATE_BITSTREAM_BUFFER_VER,
|
version: nv::NV_ENC_CREATE_BITSTREAM_BUFFER_VER,
|
||||||
..Default::default()
|
..Default::default()
|
||||||
};
|
};
|
||||||
(API.create_bitstream_buffer)(enc, &mut cb)
|
(api().create_bitstream_buffer)(enc, &mut cb)
|
||||||
.result_without_string()
|
.nv_ok()
|
||||||
.map_err(|e| anyhow!("create_bitstream_buffer: {e:?}"))?;
|
.map_err(|e| anyhow!("create_bitstream_buffer: {e:?}"))?;
|
||||||
self.bitstreams.push(cb.bitstreamBuffer);
|
self.bitstreams.push(cb.bitstreamBuffer);
|
||||||
}
|
}
|
||||||
@@ -795,8 +978,8 @@ impl NvencD3d11Encoder {
|
|||||||
completionEvent: ev.0,
|
completionEvent: ev.0,
|
||||||
..Default::default()
|
..Default::default()
|
||||||
};
|
};
|
||||||
(API.register_async_event)(enc, &mut ep)
|
(api().register_async_event)(enc, &mut ep)
|
||||||
.result_without_string()
|
.nv_ok()
|
||||||
.map_err(|e| anyhow!("register_async_event: {e:?}"))?;
|
.map_err(|e| anyhow!("register_async_event: {e:?}"))?;
|
||||||
self.events.push(ev.0 as usize);
|
self.events.push(ev.0 as usize);
|
||||||
}
|
}
|
||||||
@@ -852,7 +1035,7 @@ impl NvencD3d11Encoder {
|
|||||||
// path's poll-side unmap, exactly once per mapping.
|
// path's poll-side unmap, exactly once per mapping.
|
||||||
unsafe {
|
unsafe {
|
||||||
if !map.is_null() {
|
if !map.is_null() {
|
||||||
let _ = (API.unmap_input_resource)(self.encoder, map);
|
let _ = (api().unmap_input_resource)(self.encoder, map);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let (data, keyframe) = done.result.map_err(|e| anyhow!("{e}"))?;
|
let (data, keyframe) = done.result.map_err(|e| anyhow!("{e}"))?;
|
||||||
@@ -953,7 +1136,7 @@ impl Encoder for NvencD3d11Encoder {
|
|||||||
}
|
}
|
||||||
let slot = self.next % POOL;
|
let slot = self.next % POOL;
|
||||||
self.next += 1;
|
self.next += 1;
|
||||||
// SAFETY: every NVENC call goes through a function pointer from the loaded `ENCODE_API` table
|
// SAFETY: every NVENC call goes through a function pointer from the runtime-loaded `EncodeApi` table
|
||||||
// and takes `self.encoder`, the live session `init_session` just established (non-null on the
|
// and takes `self.encoder`, the live session `init_session` just established (non-null on the
|
||||||
// path that reaches here). `NV_ENC_REGISTER_RESOURCE rr` has `version =
|
// path that reaches here). `NV_ENC_REGISTER_RESOURCE rr` has `version =
|
||||||
// NV_ENC_REGISTER_RESOURCE_VER` and registers `frame.texture` — a D3D11 texture from
|
// NV_ENC_REGISTER_RESOURCE_VER` and registers `frame.texture` — a D3D11 texture from
|
||||||
@@ -986,8 +1169,8 @@ impl Encoder for NvencD3d11Encoder {
|
|||||||
bufferUsage: nv::NV_ENC_BUFFER_USAGE::NV_ENC_INPUT_IMAGE,
|
bufferUsage: nv::NV_ENC_BUFFER_USAGE::NV_ENC_INPUT_IMAGE,
|
||||||
..Default::default()
|
..Default::default()
|
||||||
};
|
};
|
||||||
(API.register_resource)(self.encoder, &mut rr)
|
(api().register_resource)(self.encoder, &mut rr)
|
||||||
.result_without_string()
|
.nv_ok()
|
||||||
.map_err(|e| anyhow!("register_resource: {e:?}"))?;
|
.map_err(|e| anyhow!("register_resource: {e:?}"))?;
|
||||||
self.regs
|
self.regs
|
||||||
.insert(key, (rr.registeredResource, frame.texture.clone()));
|
.insert(key, (rr.registeredResource, frame.texture.clone()));
|
||||||
@@ -999,8 +1182,8 @@ impl Encoder for NvencD3d11Encoder {
|
|||||||
registeredResource: reg,
|
registeredResource: reg,
|
||||||
..Default::default()
|
..Default::default()
|
||||||
};
|
};
|
||||||
(API.map_input_resource)(self.encoder, &mut mp)
|
(api().map_input_resource)(self.encoder, &mut mp)
|
||||||
.result_without_string()
|
.nv_ok()
|
||||||
.map_err(|e| anyhow!("map_input_resource: {e:?}"))?;
|
.map_err(|e| anyhow!("map_input_resource: {e:?}"))?;
|
||||||
|
|
||||||
let pts = self.frame_idx as u64;
|
let pts = self.frame_idx as u64;
|
||||||
@@ -1076,8 +1259,8 @@ impl Encoder for NvencD3d11Encoder {
|
|||||||
Codec::Av1 => {}
|
Codec::Av1 => {}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
(API.encode_picture)(self.encoder, &mut pic)
|
(api().encode_picture)(self.encoder, &mut pic)
|
||||||
.result_without_string()
|
.nv_ok()
|
||||||
.map_err(|e| anyhow!("encode_picture: {e:?}"))?;
|
.map_err(|e| anyhow!("encode_picture: {e:?}"))?;
|
||||||
self.pending
|
self.pending
|
||||||
.push_back((self.bitstreams[slot], mp.mappedResource, captured.pts_ns));
|
.push_back((self.bitstreams[slot], mp.mappedResource, captured.pts_ns));
|
||||||
@@ -1149,7 +1332,7 @@ impl Encoder for NvencD3d11Encoder {
|
|||||||
// We tag each input with `inputTimeStamp = frame_idx` (0,1,2,…), which is also the client's
|
// We tag each input with `inputTimeStamp = frame_idx` (0,1,2,…), which is also the client's
|
||||||
// frame number (the packetizer numbers frames in submit order), so the client's lost-frame
|
// frame number (the packetizer numbers frames in submit order), so the client's lost-frame
|
||||||
// range maps 1:1 onto the timestamps NVENC invalidates here.
|
// range maps 1:1 onto the timestamps NVENC invalidates here.
|
||||||
// SAFETY: `invalidate_ref_frames` is a function pointer from the loaded `ENCODE_API` table.
|
// SAFETY: `invalidate_ref_frames` is a function pointer from the runtime-loaded `EncodeApi` table.
|
||||||
// `self.encoder` was checked non-null at the top of this fn and is the live session; this runs
|
// `self.encoder` was checked non-null at the top of this fn and is the live session; this runs
|
||||||
// on the encode thread (like submit/poll), so there is no concurrent NVENC use. Each `ts` was
|
// on the encode thread (like submit/poll), so there is no concurrent NVENC use. Each `ts` was
|
||||||
// clamped to `[oldest_in_dpb, frame_idx - 1]` above, so it names a frame still in the session's
|
// clamped to `[oldest_in_dpb, frame_idx - 1]` above, so it names a frame still in the session's
|
||||||
@@ -1157,8 +1340,8 @@ impl Encoder for NvencD3d11Encoder {
|
|||||||
// lifetime concern.
|
// lifetime concern.
|
||||||
unsafe {
|
unsafe {
|
||||||
for ts in first..=last {
|
for ts in first..=last {
|
||||||
if (API.invalidate_ref_frames)(self.encoder, ts as u64)
|
if (api().invalidate_ref_frames)(self.encoder, ts as u64)
|
||||||
.result_without_string()
|
.nv_ok()
|
||||||
.is_err()
|
.is_err()
|
||||||
{
|
{
|
||||||
return false; // any failure → fall back to IDR
|
return false; // any failure → fall back to IDR
|
||||||
@@ -1195,7 +1378,7 @@ impl Encoder for NvencD3d11Encoder {
|
|||||||
};
|
};
|
||||||
// SAFETY: a non-empty `pending` implies `submit` ran, so `self.encoder` is the live session
|
// SAFETY: a non-empty `pending` implies `submit` ran, so `self.encoder` is the live session
|
||||||
// (`teardown` clears `pending` whenever it nulls the handle); all calls below use function
|
// (`teardown` clears `pending` whenever it nulls the handle); all calls below use function
|
||||||
// pointers from the loaded `ENCODE_API` table on the encode thread. `NV_ENC_LOCK_BITSTREAM lock`
|
// pointers from the runtime-loaded `EncodeApi` table on the encode thread. `NV_ENC_LOCK_BITSTREAM lock`
|
||||||
// (version = `NV_ENC_LOCK_BITSTREAM_VER`) locks `bs`, a pool bitstream a prior `encode_picture`
|
// (version = `NV_ENC_LOCK_BITSTREAM_VER`) locks `bs`, a pool bitstream a prior `encode_picture`
|
||||||
// targeted; `lock_bitstream` blocks until that encode finishes, so on success
|
// targeted; `lock_bitstream` blocks until that encode finishes, so on success
|
||||||
// `lock.bitstreamBufferPtr` is non-null and points at `lock.bitstreamSizeInBytes` bytes of
|
// `lock.bitstreamBufferPtr` is non-null and points at `lock.bitstreamSizeInBytes` bytes of
|
||||||
@@ -1209,8 +1392,8 @@ impl Encoder for NvencD3d11Encoder {
|
|||||||
outputBitstream: bs,
|
outputBitstream: bs,
|
||||||
..Default::default()
|
..Default::default()
|
||||||
};
|
};
|
||||||
(API.lock_bitstream)(self.encoder, &mut lock)
|
(api().lock_bitstream)(self.encoder, &mut lock)
|
||||||
.result_without_string()
|
.nv_ok()
|
||||||
.map_err(|e| anyhow!("lock_bitstream: {e:?}"))?;
|
.map_err(|e| anyhow!("lock_bitstream: {e:?}"))?;
|
||||||
let data = std::slice::from_raw_parts(
|
let data = std::slice::from_raw_parts(
|
||||||
lock.bitstreamBufferPtr as *const u8,
|
lock.bitstreamBufferPtr as *const u8,
|
||||||
@@ -1221,11 +1404,11 @@ impl Encoder for NvencD3d11Encoder {
|
|||||||
lock.pictureType,
|
lock.pictureType,
|
||||||
nv::NV_ENC_PIC_TYPE::NV_ENC_PIC_TYPE_IDR | nv::NV_ENC_PIC_TYPE::NV_ENC_PIC_TYPE_I
|
nv::NV_ENC_PIC_TYPE::NV_ENC_PIC_TYPE_IDR | nv::NV_ENC_PIC_TYPE::NV_ENC_PIC_TYPE_I
|
||||||
);
|
);
|
||||||
(API.unlock_bitstream)(self.encoder, bs)
|
(api().unlock_bitstream)(self.encoder, bs)
|
||||||
.result_without_string()
|
.nv_ok()
|
||||||
.map_err(|e| anyhow!("unlock_bitstream: {e:?}"))?;
|
.map_err(|e| anyhow!("unlock_bitstream: {e:?}"))?;
|
||||||
if !map.is_null() {
|
if !map.is_null() {
|
||||||
let _ = (API.unmap_input_resource)(self.encoder, map);
|
let _ = (api().unmap_input_resource)(self.encoder, map);
|
||||||
}
|
}
|
||||||
Ok(Some(EncodedFrame {
|
Ok(Some(EncodedFrame {
|
||||||
data,
|
data,
|
||||||
@@ -1267,6 +1450,11 @@ pub fn probe_can_encode_444(codec: Codec) -> bool {
|
|||||||
if codec != Codec::H265 {
|
if codec != Codec::H265 {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
// No loadable NVENC on this box (non-NVIDIA / no driver) → the honest 4:4:4 answer is "no".
|
||||||
|
// This is also the `api()` gate for every NVENC call below.
|
||||||
|
if try_api().is_err() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
// SAFETY: a self-contained probe owning every handle it creates. `CreateDXGIFactory1`/
|
// SAFETY: a self-contained probe owning every handle it creates. `CreateDXGIFactory1`/
|
||||||
// `EnumAdapterByLuid` return owned COM objects or err (→ default-adapter fallback).
|
// `EnumAdapterByLuid` return owned COM objects or err (→ default-adapter fallback).
|
||||||
// `D3D11CreateDevice` (explicit adapter + UNKNOWN driver type, or NULL adapter + HARDWARE)
|
// `D3D11CreateDevice` (explicit adapter + UNKNOWN driver type, or NULL adapter + HARDWARE)
|
||||||
@@ -1321,8 +1509,8 @@ pub fn probe_can_encode_444(codec: Codec) -> bool {
|
|||||||
..Default::default()
|
..Default::default()
|
||||||
};
|
};
|
||||||
let mut enc: *mut c_void = ptr::null_mut();
|
let mut enc: *mut c_void = ptr::null_mut();
|
||||||
if (API.open_encode_session_ex)(&mut params, &mut enc)
|
if (api().open_encode_session_ex)(&mut params, &mut enc)
|
||||||
.result_without_string()
|
.nv_ok()
|
||||||
.is_err()
|
.is_err()
|
||||||
{
|
{
|
||||||
return false;
|
return false;
|
||||||
@@ -1333,11 +1521,11 @@ pub fn probe_can_encode_444(codec: Codec) -> bool {
|
|||||||
reserved: [0; 62],
|
reserved: [0; 62],
|
||||||
};
|
};
|
||||||
let mut val: i32 = 0;
|
let mut val: i32 = 0;
|
||||||
let ok = (API.get_encode_caps)(enc, nv::NV_ENC_CODEC_HEVC_GUID, &mut param, &mut val)
|
let ok = (api().get_encode_caps)(enc, nv::NV_ENC_CODEC_HEVC_GUID, &mut param, &mut val)
|
||||||
.result_without_string()
|
.nv_ok()
|
||||||
.is_ok()
|
.is_ok()
|
||||||
&& val != 0;
|
&& val != 0;
|
||||||
let _ = (API.destroy_encoder)(enc);
|
let _ = (api().destroy_encoder)(enc);
|
||||||
ok
|
ok
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,57 +0,0 @@
|
|||||||
<#
|
|
||||||
.SYNOPSIS
|
|
||||||
Generate the NVENC import library (nvencodeapi.lib) into -OutDir, so the host links with
|
|
||||||
`--features nvenc` on a box that has no NVIDIA Video Codec SDK and no GPU.
|
|
||||||
|
|
||||||
.DESCRIPTION
|
|
||||||
The host links against nvencodeapi.lib (crates/punktfunk-host/build.rs). That import lib is just
|
|
||||||
a link-time stub for two exports of nvEncodeAPI64.dll (the real DLL ships with the NVIDIA driver
|
|
||||||
and resolves at runtime). We synthesise it from nvenc.def:
|
|
||||||
|
|
||||||
1. llvm-dlltool — preferred; LLVM is on the CI runner PATH (C:\Program Files\LLVM\bin) and this
|
|
||||||
works without a Visual Studio developer shell.
|
|
||||||
2. MSVC lib.exe — fallback; located via vswhere (no vcvars needed).
|
|
||||||
|
|
||||||
Point PUNKTFUNK_NVENC_LIB_DIR at -OutDir before `cargo build --features nvenc`.
|
|
||||||
|
|
||||||
.EXAMPLE
|
|
||||||
pwsh -File gen-nvenc-importlib.ps1 -OutDir C:\t\nvenc
|
|
||||||
#>
|
|
||||||
[CmdletBinding()]
|
|
||||||
param(
|
|
||||||
[Parameter(Mandatory = $true)][string]$OutDir,
|
|
||||||
[string]$DefPath = (Join-Path $PSScriptRoot 'nvenc.def')
|
|
||||||
)
|
|
||||||
$ErrorActionPreference = 'Stop'
|
|
||||||
$ProgressPreference = 'SilentlyContinue'
|
|
||||||
$PSNativeCommandUseErrorActionPreference = $false # check $LASTEXITCODE ourselves (pwsh 7.4 safe)
|
|
||||||
|
|
||||||
if (-not (Test-Path $DefPath)) { throw "module-definition file not found: $DefPath" }
|
|
||||||
New-Item -ItemType Directory -Force -Path $OutDir | Out-Null
|
|
||||||
$out = Join-Path $OutDir 'nvencodeapi.lib'
|
|
||||||
|
|
||||||
# 1) llvm-dlltool (preferred) ------------------------------------------------------------------
|
|
||||||
$dlltool = Get-Command llvm-dlltool -ErrorAction SilentlyContinue
|
|
||||||
if ($dlltool) {
|
|
||||||
Write-Host "==> llvm-dlltool -> $out"
|
|
||||||
& $dlltool.Source -m i386:x86-64 -d $DefPath -D nvEncodeAPI64.dll -l $out
|
|
||||||
if ($LASTEXITCODE -ne 0) { throw "llvm-dlltool failed ($LASTEXITCODE)" }
|
|
||||||
Write-Host " ok ($((Get-Item $out).Length) bytes)"
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
# 2) MSVC lib.exe via vswhere (fallback) -------------------------------------------------------
|
|
||||||
$vswhere = Join-Path ${env:ProgramFiles(x86)} 'Microsoft Visual Studio\Installer\vswhere.exe'
|
|
||||||
if (Test-Path $vswhere) {
|
|
||||||
$lib = & $vswhere -latest -prerelease -products * -find 'VC\Tools\MSVC\**\bin\Hostx64\x64\lib.exe' |
|
|
||||||
Select-Object -First 1
|
|
||||||
if ($lib -and (Test-Path $lib)) {
|
|
||||||
Write-Host "==> lib.exe -> $out"
|
|
||||||
& $lib "/def:$DefPath" /machine:x64 "/out:$out"
|
|
||||||
if ($LASTEXITCODE -ne 0) { throw "lib.exe failed ($LASTEXITCODE)" }
|
|
||||||
Write-Host " ok ($((Get-Item $out).Length) bytes)"
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
throw "neither llvm-dlltool (LLVM bin on PATH) nor MSVC lib.exe (via vswhere) was found to build $out"
|
|
||||||
@@ -1,14 +0,0 @@
|
|||||||
; Module-definition file for the NVENC import library the host links against with `--features nvenc`.
|
|
||||||
;
|
|
||||||
; The real entry points live in nvEncodeAPI64.dll, which ships with the NVIDIA driver. At LINK time
|
|
||||||
; the host only needs an import library exporting these two symbols (see crates/punktfunk-host/build.rs:
|
|
||||||
; it emits `cargo:rustc-link-lib=dylib=nvencodeapi` and searches PUNKTFUNK_NVENC_LIB_DIR). No GPU,
|
|
||||||
; driver, or NVIDIA Video Codec SDK is required to BUILD — only to run, where the DLL resolves from
|
|
||||||
; the installed driver. Generate nvencodeapi.lib from this file with gen-nvenc-importlib.ps1.
|
|
||||||
;
|
|
||||||
; The LIBRARY line names the DLL the import records point at — required for MSVC `lib.exe /def`
|
|
||||||
; (without it the import name would default to "nvenc.dll"). llvm-dlltool takes the name from `-D`.
|
|
||||||
LIBRARY nvEncodeAPI64.dll
|
|
||||||
EXPORTS
|
|
||||||
NvEncodeAPICreateInstance
|
|
||||||
NvEncodeAPIGetMaxSupportedVersion
|
|
||||||
@@ -9,11 +9,11 @@ Helper scripts for the Windows host box (the RTX `.173` lab box, repo at
|
|||||||
powershell -ExecutionPolicy Bypass -File scripts\windows\setup-build-env.ps1
|
powershell -ExecutionPolicy Bypass -File scripts\windows\setup-build-env.ps1
|
||||||
```
|
```
|
||||||
|
|
||||||
Persists (Machine scope) the three vars the NVENC build needs:
|
Persists (Machine scope) the vars the host build needs (NVENC itself needs none — its entry
|
||||||
|
points are runtime-loaded from the driver's `nvEncodeAPI64.dll`):
|
||||||
|
|
||||||
| var | value | why |
|
| var | value | why |
|
||||||
| --- | --- | --- |
|
| --- | --- | --- |
|
||||||
| `PUNKTFUNK_NVENC_LIB_DIR` | `C:\Users\Public\nvenc` | NVENC import lib (`nvencodeapi.lib`) |
|
|
||||||
| `LIBCLANG_PATH` | `C:\Program Files\LLVM\bin` | bindgen (`libclang.dll`) |
|
| `LIBCLANG_PATH` | `C:\Program Files\LLVM\bin` | bindgen (`libclang.dll`) |
|
||||||
| `CMAKE_POLICY_VERSION_MINIMUM` | `3.5` | `audiopus_sys` / cmake crates |
|
| `CMAKE_POLICY_VERSION_MINIMUM` | `3.5` | `audiopus_sys` / cmake crates |
|
||||||
|
|
||||||
|
|||||||
@@ -35,7 +35,7 @@ Set-Location $repo
|
|||||||
|
|
||||||
# Load the persisted build env (Machine scope) into THIS process, so the build sees it even
|
# Load the persisted build env (Machine scope) into THIS process, so the build sees it even
|
||||||
# if this shell was started before setup-build-env.ps1 ran (env is inherited at spawn time).
|
# if this shell was started before setup-build-env.ps1 ran (env is inherited at spawn time).
|
||||||
foreach ($k in 'PUNKTFUNK_NVENC_LIB_DIR','LIBCLANG_PATH','CMAKE_POLICY_VERSION_MINIMUM') {
|
foreach ($k in 'LIBCLANG_PATH','CMAKE_POLICY_VERSION_MINIMUM') {
|
||||||
$v = [Environment]::GetEnvironmentVariable($k, 'Machine')
|
$v = [Environment]::GetEnvironmentVariable($k, 'Machine')
|
||||||
if ($v) { [Environment]::SetEnvironmentVariable($k, $v, 'Process'); Write-Host "env : $k=$v" }
|
if ($v) { [Environment]::SetEnvironmentVariable($k, $v, 'Process'); Write-Host "env : $k=$v" }
|
||||||
else { Write-Warning "env $k not set (run setup-build-env.ps1)" }
|
else { Write-Warning "env $k not set (run setup-build-env.ps1)" }
|
||||||
|
|||||||
@@ -12,9 +12,9 @@ $admin = ([Security.Principal.WindowsPrincipal][Security.Principal.WindowsIdenti
|
|||||||
).IsInRole([Security.Principal.WindowsBuiltinRole]::Administrator)
|
).IsInRole([Security.Principal.WindowsBuiltinRole]::Administrator)
|
||||||
if (-not $admin) { throw "Run elevated (Machine-scope env requires Administrator)." }
|
if (-not $admin) { throw "Run elevated (Machine-scope env requires Administrator)." }
|
||||||
|
|
||||||
# NVENC import lib (nvencodeapi.lib); libclang for bindgen; cmake policy floor for audiopus_sys.
|
# libclang for bindgen; cmake policy floor for audiopus_sys. (NVENC needs no build-time env:
|
||||||
|
# its entry points are runtime-loaded from the driver's nvEncodeAPI64.dll.)
|
||||||
$vars = [ordered]@{
|
$vars = [ordered]@{
|
||||||
'PUNKTFUNK_NVENC_LIB_DIR' = 'C:\Users\Public\nvenc'
|
|
||||||
'LIBCLANG_PATH' = 'C:\Program Files\LLVM\bin'
|
'LIBCLANG_PATH' = 'C:\Program Files\LLVM\bin'
|
||||||
'CMAKE_POLICY_VERSION_MINIMUM' = '3.5'
|
'CMAKE_POLICY_VERSION_MINIMUM' = '3.5'
|
||||||
# FFMPEG_DIR is only needed for the `amf-qsv` feature (libavcodec). The RTX box builds
|
# FFMPEG_DIR is only needed for the `amf-qsv` feature (libavcodec). The RTX box builds
|
||||||
|
|||||||
Reference in New Issue
Block a user