feat(host/windows): NVENC D3D11 hardware encoder (--features nvenc)
android / android (push) Failing after 36s
ci / rust (push) Failing after 45s
apple / swift (push) Successful in 55s
ci / web (push) Successful in 27s
ci / docs-site (push) Successful in 29s
ci / bench (push) Successful in 1m35s
decky / build-publish (push) Successful in 12s
docker / build-push (--build-arg FEDORA_VERSION=44, ci, ci/fedora-rpm.Dockerfile, punktfunk-fedora44-rpm) (push) Successful in 5s
docker / build-push (., web/Dockerfile, punktfunk-web) (push) Successful in 4s
docker / build-push (ci, ci/fedora-rpm.Dockerfile, punktfunk-fedora-rpm) (push) Successful in 4s
docker / build-push (ci, ci/rust-ci.Dockerfile, punktfunk-rust-ci) (push) Successful in 4s
docker / build-push (docs-site, docs-site/Dockerfile, punktfunk-docs) (push) Successful in 3s
flatpak / build-publish (push) Failing after 2s
deb / build-publish (push) Successful in 3m13s
rpm / build-publish (bazzite, punktfunk-fedora-rpm) (push) Failing after 1m17s
rpm / build-publish (fedora-44, punktfunk-fedora44-rpm) (push) Failing after 1m32s
docker / deploy-docs (push) Successful in 17s
android / android (push) Failing after 36s
ci / rust (push) Failing after 45s
apple / swift (push) Successful in 55s
ci / web (push) Successful in 27s
ci / docs-site (push) Successful in 29s
ci / bench (push) Successful in 1m35s
decky / build-publish (push) Successful in 12s
docker / build-push (--build-arg FEDORA_VERSION=44, ci, ci/fedora-rpm.Dockerfile, punktfunk-fedora44-rpm) (push) Successful in 5s
docker / build-push (., web/Dockerfile, punktfunk-web) (push) Successful in 4s
docker / build-push (ci, ci/fedora-rpm.Dockerfile, punktfunk-fedora-rpm) (push) Successful in 4s
docker / build-push (ci, ci/rust-ci.Dockerfile, punktfunk-rust-ci) (push) Successful in 4s
docker / build-push (docs-site, docs-site/Dockerfile, punktfunk-docs) (push) Successful in 3s
flatpak / build-publish (push) Failing after 2s
deb / build-publish (push) Successful in 3m13s
rpm / build-publish (bazzite, punktfunk-fedora-rpm) (push) Failing after 1m17s
rpm / build-publish (fedora-44, punktfunk-fedora44-rpm) (push) Failing after 1m32s
docker / deploy-docs (push) Successful in 17s
Zero-copy capture->encode on the GPU via the raw NVENC API (nvidia_video_codec_sdk sys + ENCODE_API; the safe wrapper is CUDA-only). Opens an NV_ENC_DEVICE_TYPE_DIRECTX session on the SAME ID3D11Device as the DXGI capturer (carried on the new FramePayload::D3d11), registers a pool of BGRA textures once, CopyResources each captured texture in and encode_picture; CBR/ULL, infinite GOP, P-only, forced-IDR for RFI. The DXGI capturer gains a D3D11 zero-copy output (selected, like the encoder, by PUNKTFUNK_ENCODER=nvenc) so capture+encode share textures. OFF by default (the nvenc feature pulls the NVENC SDK + cudarc): the default Windows host links without it (openh264 path). cudarc builds toolkit-less via the SDK ci-check feature (dynamic-loading). At link time --features nvenc needs nvencodeapi.lib (NVENC SDK, or an import lib generated from the driver's nvEncodeAPI64.dll) on PUNKTFUNK_NVENC_LIB_DIR. Both default and --features nvenc builds validated to compile+link GPU-less on the VM (import lib generated from the driver DLL). Runtime needs a real NVIDIA GPU. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -16,8 +16,9 @@ use windows::Win32::Foundation::{HMODULE, LUID};
|
||||
use windows::Win32::Graphics::Direct3D::{D3D_DRIVER_TYPE_UNKNOWN, D3D_FEATURE_LEVEL_11_0};
|
||||
use windows::Win32::Graphics::Direct3D11::{
|
||||
D3D11CreateDevice, ID3D11Device, ID3D11DeviceContext, ID3D11Texture2D, D3D11_BIND_FLAG,
|
||||
D3D11_CPU_ACCESS_READ, D3D11_CREATE_DEVICE_BGRA_SUPPORT, D3D11_MAPPED_SUBRESOURCE, D3D11_MAP_READ,
|
||||
D3D11_SDK_VERSION, D3D11_TEXTURE2D_DESC, D3D11_USAGE_STAGING,
|
||||
D3D11_BIND_RENDER_TARGET, D3D11_CPU_ACCESS_READ, D3D11_CREATE_DEVICE_BGRA_SUPPORT,
|
||||
D3D11_MAPPED_SUBRESOURCE, D3D11_MAP_READ, D3D11_SDK_VERSION, D3D11_TEXTURE2D_DESC,
|
||||
D3D11_USAGE_DEFAULT, D3D11_USAGE_STAGING,
|
||||
};
|
||||
use windows::Win32::Graphics::Dxgi::Common::{DXGI_FORMAT_B8G8R8A8_UNORM, DXGI_SAMPLE_DESC};
|
||||
use windows::Win32::Graphics::Dxgi::{
|
||||
@@ -78,6 +79,13 @@ pub struct DuplCapturer {
|
||||
active: AtomicBool,
|
||||
timeout_ms: u32,
|
||||
last: Option<Vec<u8>>,
|
||||
/// GPU-output mode (zero-copy → NVENC): produce `FramePayload::D3d11` instead of CPU BGRA.
|
||||
/// Selected by `PUNKTFUNK_ENCODER=nvenc` so the capturer's output matches the encoder's input.
|
||||
gpu_mode: bool,
|
||||
/// Reused owned texture the duplication frame is copied into for the D3D11 path (the duplication
|
||||
/// surface is transient and released each frame).
|
||||
gpu_copy: Option<ID3D11Texture2D>,
|
||||
have_gpu_frame: bool,
|
||||
_keepalive: Box<dyn Send>,
|
||||
}
|
||||
// COM objects used only from the one thread that owns the capturer (the encode thread).
|
||||
@@ -154,12 +162,16 @@ impl DuplCapturer {
|
||||
.ok()
|
||||
.and_then(|s| s.parse().ok())
|
||||
.unwrap_or((2000 / refresh_hz.max(1)).max(100));
|
||||
let gpu_mode = std::env::var("PUNKTFUNK_ENCODER")
|
||||
.map(|v| matches!(v.to_ascii_lowercase().as_str(), "nvenc" | "hw" | "nvidia"))
|
||||
.unwrap_or(false);
|
||||
tracing::info!(
|
||||
"DXGI duplication: {}x{}@{} on {}",
|
||||
"DXGI duplication: {}x{}@{} on {} ({})",
|
||||
width,
|
||||
height,
|
||||
refresh_hz,
|
||||
target.gdi_name
|
||||
target.gdi_name,
|
||||
if gpu_mode { "D3D11 zero-copy" } else { "CPU staging" }
|
||||
);
|
||||
Ok(Self {
|
||||
device,
|
||||
@@ -174,6 +186,9 @@ impl DuplCapturer {
|
||||
active: AtomicBool::new(false),
|
||||
timeout_ms,
|
||||
last: None,
|
||||
gpu_mode,
|
||||
gpu_copy: None,
|
||||
have_gpu_frame: false,
|
||||
_keepalive: keepalive,
|
||||
})
|
||||
}
|
||||
@@ -206,6 +221,33 @@ impl DuplCapturer {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
unsafe fn ensure_gpu_copy(&mut self) -> Result<()> {
|
||||
if self.gpu_copy.is_some() {
|
||||
return Ok(());
|
||||
}
|
||||
let desc = D3D11_TEXTURE2D_DESC {
|
||||
Width: self.width,
|
||||
Height: self.height,
|
||||
MipLevels: 1,
|
||||
ArraySize: 1,
|
||||
Format: DXGI_FORMAT_B8G8R8A8_UNORM,
|
||||
SampleDesc: DXGI_SAMPLE_DESC {
|
||||
Count: 1,
|
||||
Quality: 0,
|
||||
},
|
||||
Usage: D3D11_USAGE_DEFAULT,
|
||||
BindFlags: D3D11_BIND_RENDER_TARGET.0 as u32,
|
||||
CPUAccessFlags: 0,
|
||||
MiscFlags: 0,
|
||||
};
|
||||
let mut t: Option<ID3D11Texture2D> = None;
|
||||
self.device
|
||||
.CreateTexture2D(&desc, None, Some(&mut t))
|
||||
.context("CreateTexture2D(gpu copy)")?;
|
||||
self.gpu_copy = t;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
unsafe fn recreate_dupl(&mut self) -> Result<()> {
|
||||
if self.holding_frame {
|
||||
let _ = self.dupl.ReleaseFrame();
|
||||
@@ -238,6 +280,26 @@ impl DuplCapturer {
|
||||
self.holding_frame = true;
|
||||
let res = res.context("AcquireNextFrame: null resource")?;
|
||||
let tex: ID3D11Texture2D = res.cast().context("resource -> Texture2D")?;
|
||||
if self.gpu_mode {
|
||||
// Zero-copy path: keep the frame on the GPU for NVENC. Copy the transient duplication
|
||||
// surface into a reused owned texture, release the duplication frame, hand off the texture.
|
||||
self.ensure_gpu_copy()?;
|
||||
let gpu = self.gpu_copy.clone().context("gpu copy texture")?;
|
||||
self.context.CopyResource(&gpu, &tex);
|
||||
let _ = self.dupl.ReleaseFrame();
|
||||
self.holding_frame = false;
|
||||
self.have_gpu_frame = true;
|
||||
return Ok(Some(CapturedFrame {
|
||||
width: self.width,
|
||||
height: self.height,
|
||||
pts_ns: now_ns(),
|
||||
format: PixelFormat::Bgra,
|
||||
payload: FramePayload::D3d11(D3d11Frame {
|
||||
texture: gpu,
|
||||
device: self.device.clone(),
|
||||
}),
|
||||
}));
|
||||
}
|
||||
self.ensure_staging()?;
|
||||
let staging = self.staging.clone().context("staging texture")?;
|
||||
self.context.CopyResource(&staging, &tex);
|
||||
@@ -277,6 +339,20 @@ impl Capturer for DuplCapturer {
|
||||
if let Some(f) = unsafe { self.acquire() }? {
|
||||
return Ok(f);
|
||||
}
|
||||
if self.gpu_mode && self.have_gpu_frame {
|
||||
if let Some(gpu) = &self.gpu_copy {
|
||||
return Ok(CapturedFrame {
|
||||
width: self.width,
|
||||
height: self.height,
|
||||
pts_ns: now_ns(),
|
||||
format: PixelFormat::Bgra,
|
||||
payload: FramePayload::D3d11(D3d11Frame {
|
||||
texture: gpu.clone(),
|
||||
device: self.device.clone(),
|
||||
}),
|
||||
});
|
||||
}
|
||||
}
|
||||
if let Some(b) = &self.last {
|
||||
return Ok(CapturedFrame {
|
||||
width: self.width,
|
||||
|
||||
Reference in New Issue
Block a user