From 019f2677a748a6c0d59f826847c6eaa3b9f03f7a Mon Sep 17 00:00:00 2001 From: enricobuehler Date: Thu, 2 Jul 2026 13:57:18 +0200 Subject: [PATCH] =?UTF-8?q?feat(host,web):=20multi-GPU=20selection=20?= =?UTF-8?q?=E2=80=94=20GPU=20inventory=20+=20preference=20API,=20web-conso?= =?UTF-8?q?le=20GPU=20card?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - new crate::gpu (compiled on all platforms so the OpenAPI doc stays platform-independent): DXGI / sysfs GPU inventory with reboot-stable ids (PCI vendor:device + occurrence — LUIDs are per-boot), persisted auto/manual preference (/gpu-settings.json, atomic temp+rename with in-memory rollback), one selection with precedence console preference > PUNKTFUNK_RENDER_ADAPTER > max VRAM and graceful fallback when the preferred GPU is absent, plus a live "in use" record (RAII session guard wrapped around every encoder open_video returns) - fix: windows_gpu_vendor derived the encoder backend from DXGI adapter 0 instead of the selected render adapter — on a hybrid box (e.g. Intel iGPU at index 0 + NVIDIA dGPU) the backend could disagree with the GPU the capture ring / IddCx render pin sit on. The NVENC 4:4:4 probe now also runs on the selected adapter (was: OS default), the codec/4:4:4 probe caches are keyed per selected GPU (were process-lifetime OnceLocks), and an explicit PUNKTFUNK_ENCODER conflicting with the selected GPU's vendor warns up front - mgmt API: GET /api/v1/gpus (inventory + mode + preferred + next-session selection with reason + in-use GPU/backend/session-count) and PUT /api/v1/gpus/preference (validates mode/gpu_id before writing); openapi.json regenerated; the vdisplay render pin now also engages for a console preference (not just the env pin) - web console: GPU card on the Host page — list with vendor + VRAM, Automatic / Prefer controls, Preferred / Next session / "In use · backend" badges, missing-preferred-GPU warning and env-pin note; en + de messages - Linux: a matched manual preference picks the VAAPI render node and the NVENC-vs-VAAPI auto choice; auto mode is exactly the previous behavior Validated live on the hybrid laptop (RTX 3500 Ada + Intel Arc Pro, which enumerates twice — the occurrence ids disambiguate): enumerate, prefer, bad-id 400, restart persistence, auto-restore keeping the stored pick. Co-Authored-By: Claude Fable 5 --- CLAUDE.md | 20 +- api/openapi.json | 281 +++++++ .../src/capture/windows/idd_push.rs | 6 +- crates/punktfunk-host/src/encode.rs | 386 ++++++--- .../punktfunk-host/src/encode/linux/vaapi.rs | 9 +- .../src/encode/windows/ffmpeg_win.rs | 5 + .../src/encode/windows/nvenc.rs | 65 +- crates/punktfunk-host/src/gpu.rs | 757 ++++++++++++++++++ crates/punktfunk-host/src/main.rs | 1 + crates/punktfunk-host/src/mgmt.rs | 288 +++++++ .../src/vdisplay/windows/manager.rs | 13 +- .../punktfunk-host/src/windows/win_adapter.rs | 71 +- web/messages/de.json | 10 + web/messages/en.json | 10 + web/src/sections/Host/GpuCard.tsx | 146 ++++ web/src/sections/Host/index.tsx | 5 +- web/src/sections/Host/view.tsx | 8 +- 17 files changed, 1881 insertions(+), 200 deletions(-) create mode 100644 crates/punktfunk-host/src/gpu.rs create mode 100644 web/src/sections/Host/GpuCard.tsx diff --git a/CLAUDE.md b/CLAUDE.md index a782de2..6ebc747 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -107,12 +107,22 @@ Low-latency desktop/game streaming stack, Linux-first, with a shared Rust protoc session for secure-desktop (UAC/lock-screen) capture (`windows/service.rs`), bundles the pf-vdisplay driver + the FFmpeg DLLs (+ VB-CABLE for the virtual mic), and is published by `windows-host.yml`. **Encoder is GPU-aware** (`encode.rs` `open_video` + `windows_resolved_backend`): - `PUNKTFUNK_ENCODER=auto` (the host.env default) detects the DXGI adapter vendor → **NVENC** (NVIDIA, - direct SDK, `encode/windows/nvenc.rs`), **AMF** (AMD) / **QSV** (Intel) via libavcodec + `PUNKTFUNK_ENCODER=auto` (the host.env default) reads the **selected render adapter's** vendor → + **NVENC** (NVIDIA, direct SDK, `encode/windows/nvenc.rs`), **AMF** (AMD) / **QSV** (Intel) via libavcodec (`encode/windows/ffmpeg_win.rs`, the Windows analogue of the Linux VAAPI backend — `WinVendor{Amf,Qsv}`, system-memory NV12/P010 readback default + opt-in zero-copy D3D11 behind `PUNKTFUNK_ZEROCOPY` with a system fallback), or software H.264 (`encode/sw.rs`, GPU-less). GameStream codec advertisement is - probed per-GPU on AMF/QSV (`windows_codec_support` → `serverinfo`, AV1 gated). **HDR (10-bit)**: WGC + probed per-GPU on AMF/QSV (`windows_codec_support` → `serverinfo`, AV1 gated; cached per selected + GPU). **Multi-GPU is first-class** (`gpu.rs`): GPU inventory + a persisted auto/manual preference + (`/gpu-settings.json`, stored by stable PCI identity — LUIDs are per-boot) exposed over + `GET /api/v1/gpus` + `PUT /api/v1/gpus/preference` and a web-console GPU card (Host page: list, + Automatic/Prefer, "In use · backend" badge). One selection — precedence **console preference > + `PUNKTFUNK_RENDER_ADAPTER` > max VRAM**, graceful fallback when the preferred GPU is absent — + feeds `win_adapter::resolve_render_adapter_luid` (capture ring + IddCx render pin), the encoder + vendor auto-detect (previously DXGI adapter 0 — wrong on hybrid boxes like NVIDIA dGPU + Intel + Arc iGPU), and the NVENC 4:4:4 probe; a preference change applies to the next session. On Linux a + matched manual preference picks the VAAPI render node / NVENC-vs-VAAPI auto choice (auto mode + unchanged). *Implemented + unit-tested; not yet on-glass validated on the hybrid box.* **HDR (10-bit)**: WGC captures the HDR desktop as FP16/Rgb10a2 (DDA FP16 for the secure desktop), the encoder forces HEVC Main10 + BT.2020 PQ (NVENC ABGR10/P010; AMF/QSV P010 + a swscale Rgb10a2→P010 fallback), the client auto-detects PQ from the HEVC VUI — gated by `PUNKTFUNK_10BIT` + client `VIDEO_CAP_10BIT`; **Windows @@ -356,7 +366,7 @@ crates/punktfunk-host/ encode/linux/{mod,vaapi}.rs · encode/windows/{nvenc,ffmpeg_win}.rs · encode/sw.rs per-GPU encoders (NVENC/CUDA · VAAPI · AMF/QSV) + GPU-less openh264 capture/{linux/,windows/{dxgi,idd_push}}.rs · audio/{linux/,windows/wasapi_*}.rs windows/{service,install,interactive}.rs SCM service + in-binary driver/web install - capture.rs · encode.rs · audio.rs · spike.rs · punktfunk1.rs · mgmt.rs · native_pairing.rs · stats_recorder.rs · library.rs + capture.rs · encode.rs · audio.rs · gpu.rs · spike.rs · punktfunk1.rs · mgmt.rs · native_pairing.rs · stats_recorder.rs · library.rs clients/probe/ punktfunk/1 reference/probe client (headless test/measurement tool) clients/linux/ native Linux client (GTK4/libadwaita · FFmpeg · PipeWire · SDL3) clients/windows/ native Windows client (WinUI 3 via windows-reactor · D3D11 · WASAPI · SDL3) @@ -364,7 +374,7 @@ clients/apple/ native macOS/iOS/tvOS client (Swift · VideoToolbox · GameCon clients/android/ native Android client (Kotlin app + native/ Rust JNI core over punktfunk-core) clients/decky/ Steam Deck Decky plugin packaging/windows/drivers/{pf-vdisplay,pf-dualsense,pf-xusb}/ in-house UMDF drivers (built from source in CI) -web/ TanStack web console over the mgmt API (status · devices · pairing · performance graphs) +web/ TanStack web console over the mgmt API (status · devices · pairing · GPU selection · performance graphs) packaging/ apt(deb) · RPM/COPR · Arch/sysext · Flatpak · Bazzite bootc · Windows host installer (per-dir READMEs) tools/{loss-harness,latency-probe}/ measurement (plan §10) scripts/ 60-punktfunk.rules · punktfunk-host.service · host.env.example · headless/ diff --git a/api/openapi.json b/api/openapi.json index 530ff90..43ce110 100644 --- a/api/openapi.json +++ b/api/openapi.json @@ -138,6 +138,100 @@ } } }, + "/api/v1/gpus": { + "get": { + "tags": [ + "gpu" + ], + "summary": "GPU inventory and selection", + "description": "Lists the host's hardware GPUs, the persisted auto/manual preference, the GPU the next session\nwill use (and why), and the GPU live sessions encode on right now.", + "operationId": "listGpus", + "responses": { + "200": { + "description": "GPU inventory + selection state", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/GpuState" + } + } + } + }, + "401": { + "description": "Missing or invalid bearer token", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ApiError" + } + } + } + } + } + } + }, + "/api/v1/gpus/preference": { + "put": { + "tags": [ + "gpu" + ], + "summary": "Set the GPU preference", + "description": "`auto` restores automatic selection (`PUNKTFUNK_RENDER_ADAPTER` pin, else max dedicated VRAM);\n`manual` pins capture + encode to the given GPU. Persisted across restarts; applies to the\n**next** session (a running session keeps its GPU). If the preferred GPU is absent at session\nstart the host falls back to automatic selection rather than failing.", + "operationId": "setGpuPreference", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SetGpuPreference" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "Preference stored; the new selection state", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/GpuState" + } + } + } + }, + "400": { + "description": "Unknown mode, or `gpu_id` missing / not a listed GPU", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ApiError" + } + } + } + }, + "401": { + "description": "Missing or invalid bearer token", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ApiError" + } + } + } + }, + "500": { + "description": "Preference could not be persisted", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ApiError" + } + } + } + } + } + } + }, "/api/v1/health": { "get": { "tags": [ @@ -1373,6 +1467,40 @@ }, "components": { "schemas": { + "ApiActiveGpu": { + "type": "object", + "description": "The GPU live sessions are encoding on right now.", + "required": [ + "id", + "name", + "vendor", + "backend", + "sessions" + ], + "properties": { + "backend": { + "type": "string", + "description": "The encode backend in use (`nvenc` | `amf` | `qsv` | `vaapi` | `software`)." + }, + "id": { + "type": "string", + "description": "Stable id matching an entry of `gpus` (empty for the CPU/software encoder)." + }, + "name": { + "type": "string" + }, + "sessions": { + "type": "integer", + "format": "int32", + "description": "Number of live encode sessions on it.", + "minimum": 0 + }, + "vendor": { + "type": "string", + "description": "`nvidia` | `amd` | `intel` | `other`." + } + } + }, "ApiCodec": { "type": "string", "description": "Video codec identifier.", @@ -1394,6 +1522,64 @@ } } }, + "ApiGpu": { + "type": "object", + "description": "One hardware GPU on the host (software/WARP adapters are never listed).", + "required": [ + "id", + "name", + "vendor", + "vram_mb" + ], + "properties": { + "id": { + "type": "string", + "description": "Stable identifier (`vendorid-deviceid-occurrence`, hex PCI ids) — pass to `setGpuPreference`.\nStable across reboots and driver updates, unlike an adapter index or LUID.", + "example": "10de-2c05-0" + }, + "name": { + "type": "string", + "description": "Adapter/marketing name.", + "example": "NVIDIA GeForce RTX 5070 Ti" + }, + "vendor": { + "type": "string", + "description": "`nvidia` | `amd` | `intel` | `other`." + }, + "vram_mb": { + "type": "integer", + "format": "int64", + "description": "Dedicated VRAM in MiB (0 where the platform doesn't expose it).", + "minimum": 0 + } + } + }, + "ApiSelectedGpu": { + "type": "object", + "description": "The GPU the **next** session's pipeline will be created on, and why. (A preference change\napplies to the next session; a running session keeps the GPU it opened on.)", + "required": [ + "id", + "name", + "vendor", + "source" + ], + "properties": { + "id": { + "type": "string" + }, + "name": { + "type": "string" + }, + "source": { + "type": "string", + "description": "Why this GPU was selected: `preference` (the manual choice), `env`\n(`PUNKTFUNK_RENDER_ADAPTER`), `auto` (max dedicated VRAM / platform default), or\n`preference_missing` (a manual choice is set but that GPU is absent — auto-selected\ninstead so the host keeps streaming)." + }, + "vendor": { + "type": "string", + "description": "`nvidia` | `amd` | `intel` | `other`." + } + } + }, "ApprovePending": { "type": "object", "description": "Approve-pending-device request body. Send `{}` to keep the device's own name.", @@ -1671,6 +1857,75 @@ } } }, + "GpuState": { + "type": "object", + "description": "Full GPU-selection state for the console: inventory, the persisted preference, what the next\nsession will use, and what is in use right now.", + "required": [ + "gpus", + "mode", + "preferred_available" + ], + "properties": { + "active": { + "oneOf": [ + { + "type": "null" + }, + { + "$ref": "#/components/schemas/ApiActiveGpu", + "description": "The GPU live sessions use right now (absent while nothing is streaming)." + } + ] + }, + "env_override": { + "type": [ + "string", + "null" + ], + "description": "`PUNKTFUNK_RENDER_ADAPTER` (the host.env pin), when set — it applies while `mode` is\n`auto`; a manual preference overrides it." + }, + "gpus": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ApiGpu" + }, + "description": "The host's hardware GPUs." + }, + "mode": { + "type": "string", + "description": "`auto` or `manual`." + }, + "preferred_available": { + "type": "boolean", + "description": "Whether the preferred GPU is currently present." + }, + "preferred_id": { + "type": [ + "string", + "null" + ], + "description": "The manually preferred GPU's stable id, when one is stored (kept while `mode` is `auto` so\na console can offer returning to it). May reference a GPU that is currently absent." + }, + "preferred_name": { + "type": [ + "string", + "null" + ], + "description": "The stored name of the preferred GPU (a usable label even when it is absent)." + }, + "selected": { + "oneOf": [ + { + "type": "null" + }, + { + "$ref": "#/components/schemas/ApiSelectedGpu", + "description": "The GPU the next session will use." + } + ] + } + } + }, "Health": { "type": "object", "description": "Liveness + version probe.", @@ -2047,6 +2302,28 @@ } } }, + "SetGpuPreference": { + "type": "object", + "description": "Request body for `setGpuPreference`.", + "required": [ + "mode" + ], + "properties": { + "gpu_id": { + "type": [ + "string", + "null" + ], + "description": "Required when `mode` is `manual`: the stable `id` of a currently listed GPU\n(see `listGpus`).", + "example": "10de-2c05-0" + }, + "mode": { + "type": "string", + "description": "`auto` (env pin, else max dedicated VRAM — the default) or `manual`.", + "example": "manual" + } + } + }, "StageTiming": { "type": "object", "description": "One pipeline stage's latency in an aggregation window (microseconds).", @@ -2267,6 +2544,10 @@ "name": "host", "description": "Host identity, capabilities, and liveness" }, + { + "name": "gpu", + "description": "GPU inventory and selection: list the host's GPUs, choose automatic or a preferred GPU, see the one in use" + }, { "name": "clients", "description": "Paired Moonlight client management" diff --git a/crates/punktfunk-host/src/capture/windows/idd_push.rs b/crates/punktfunk-host/src/capture/windows/idd_push.rs index e2c1712..cd42951 100644 --- a/crates/punktfunk-host/src/capture/windows/idd_push.rs +++ b/crates/punktfunk-host/src/capture/windows/idd_push.rs @@ -1020,11 +1020,9 @@ pub fn spawn_observer(target: WinCaptureTarget, preferred: Option<(u32, u32, u32 }); } -/// The discrete render GPU LUID (where NVENC runs), falling back to the monitor's `OsAdapterLuid`. +/// The selected render GPU LUID (where the encoder runs), falling back to the monitor's `OsAdapterLuid`. fn resolve_render_adapter_luid_or(fallback_packed: i64) -> LUID { - // SAFETY: `resolve_render_adapter_luid` is an `unsafe fn` (it enumerates DXGI adapters) that takes no - // arguments and returns an owned `Option`, borrowing nothing. - if let Some(l) = unsafe { crate::win_adapter::resolve_render_adapter_luid() } { + if let Some(l) = crate::win_adapter::resolve_render_adapter_luid() { return l; } LUID { diff --git a/crates/punktfunk-host/src/encode.rs b/crates/punktfunk-host/src/encode.rs index a011a61..4b967c6 100644 --- a/crates/punktfunk-host/src/encode.rs +++ b/crates/punktfunk-host/src/encode.rs @@ -259,6 +259,127 @@ pub fn open_video( cuda: bool, bit_depth: u8, chroma: ChromaFormat, +) -> Result> { + let inner = open_video_backend( + codec, + format, + width, + height, + fps, + bitrate_bps, + cuda, + bit_depth, + chroma, + )?; + // Record what this session encodes on (the mgmt API's "currently used GPU"): the backend label + // mirrors the dispatch `open_video_backend` just took, the GPU identity is the same selection + // the capturer was created on ([`crate::gpu::selected_gpu`]). Dropping the returned encoder + // ends the record, so the live count is correct by construction. + let backend = resolved_backend_label(cuda); + let gpu = if backend == "software" { + crate::gpu::ActiveGpu { + id: String::new(), + name: "CPU (openh264)".into(), + vendor_id: 0, + backend, + } + } else { + match crate::gpu::selected_gpu() { + Some(sel) => crate::gpu::ActiveGpu { + id: sel.info.id, + name: sel.info.name, + vendor_id: sel.info.vendor_id, + backend, + }, + None => crate::gpu::ActiveGpu { + id: String::new(), + name: "GPU".into(), + vendor_id: 0, + backend, + }, + } + }; + Ok(Box::new(TrackedEncoder { + inner, + _session: crate::gpu::session_begin(gpu), + })) +} + +/// The display label of the backend [`open_video_backend`] resolves — kept in lockstep with its +/// dispatch (`windows_resolved_backend` on Windows; the `PUNKTFUNK_ENCODER`/auto match on Linux). +#[cfg(target_os = "windows")] +fn resolved_backend_label(_cuda: bool) -> &'static str { + match windows_resolved_backend() { + WindowsBackend::Nvenc => "nvenc", + WindowsBackend::Amf => "amf", + WindowsBackend::Qsv => "qsv", + WindowsBackend::Software => "software", + } +} + +#[cfg(target_os = "linux")] +fn resolved_backend_label(cuda: bool) -> &'static str { + match crate::config::config().encoder_pref.as_str() { + "nvenc" | "nvidia" | "cuda" => "nvenc", + "vaapi" | "amd" | "intel" => "vaapi", + "software" | "sw" | "openh264" => "software", + _ => { + if cuda || !linux_auto_is_vaapi() { + "nvenc" + } else { + "vaapi" + } + } + } +} + +#[cfg(not(any(target_os = "linux", target_os = "windows")))] +fn resolved_backend_label(_cuda: bool) -> &'static str { + "none" +} + +/// Ties the [`crate::gpu`] live-session record to the encoder's lifetime; pure delegation +/// otherwise. +struct TrackedEncoder { + inner: Box, + _session: crate::gpu::ActiveSession, +} + +impl Encoder for TrackedEncoder { + fn submit(&mut self, frame: &CapturedFrame) -> Result<()> { + self.inner.submit(frame) + } + fn caps(&self) -> EncoderCaps { + self.inner.caps() + } + fn request_keyframe(&mut self) { + self.inner.request_keyframe() + } + fn set_hdr_meta(&mut self, meta: Option) { + self.inner.set_hdr_meta(meta) + } + fn invalidate_ref_frames(&mut self, first_frame: i64, last_frame: i64) -> bool { + self.inner.invalidate_ref_frames(first_frame, last_frame) + } + fn poll(&mut self) -> Result> { + self.inner.poll() + } + fn flush(&mut self) -> Result<()> { + self.inner.flush() + } +} + +#[allow(clippy::too_many_arguments)] +fn open_video_backend( + codec: Codec, + format: PixelFormat, + width: u32, + height: u32, + fps: u32, + bitrate_bps: u64, + cuda: bool, + bit_depth: u8, + chroma: ChromaFormat, ) -> Result> { validate_dimensions(codec, width, height)?; // Refresh/fps must be positive and sane: fps feeds the encoder time_base (`Rational(1, fps)`) @@ -330,9 +451,10 @@ pub fn open_video( .map(|e| Box::new(e) as Box) } "auto" | "" => { - // A CUDA frame can ONLY be consumed by NVENC, and a box with the NVIDIA device - // nodes always prefers it. Everything else (AMD/Intel) takes the VAAPI path. - if cuda || nvidia_present() { + // A CUDA frame can ONLY be consumed by NVENC. Otherwise the shared auto decision + // (manual web-console GPU preference, else the NVIDIA-presence probe) picks the + // backend — see `linux_auto_is_vaapi`. + if cuda || !linux_auto_is_vaapi() { open_nvenc_probed( codec, format, @@ -357,8 +479,30 @@ pub fn open_video( { let _ = cuda; // always false on Windows (no Cuda payload) // NVIDIA → NVENC (direct SDK), AMD → AMF, Intel → QSV (both libavcodec), else → software - // H.264. `auto` (the default) resolves from the DXGI adapter vendor. - match windows_resolved_backend() { + // H.264. `auto` (the default) resolves from the selected render adapter's vendor. + let backend = windows_resolved_backend(); + // With `auto` the backend is derived from the selected GPU, so this can only fire when an + // explicit PUNKTFUNK_ENCODER contradicts the GPU the pipeline sits on (e.g. `nvenc` forced + // while the web-console preference pins the Intel iGPU) — the open below will then fail on + // a wrong-vendor device; say why up front instead of leaving an opaque encoder error. + if let Some(sel) = crate::gpu::selected_gpu() { + let mismatched = match backend { + WindowsBackend::Nvenc => sel.info.vendor_id != crate::gpu::VENDOR_NVIDIA, + WindowsBackend::Amf => sel.info.vendor_id != crate::gpu::VENDOR_AMD, + WindowsBackend::Qsv => sel.info.vendor_id != crate::gpu::VENDOR_INTEL, + WindowsBackend::Software => false, + }; + if mismatched { + tracing::warn!( + adapter = sel.info.name, + ?backend, + "encoder backend does not match the selected GPU's vendor (explicit \ + PUNKTFUNK_ENCODER conflicting with the GPU preference?) — the encoder \ + open will likely fail on this device" + ); + } + } + match backend { WindowsBackend::Nvenc => { // Hardware path: NVENC over D3D11. The DXGI capturer switches to its zero-copy // FramePayload::D3d11 output under the same env var so capture + encode share textures. @@ -422,8 +566,8 @@ pub fn open_video( "the Windows software encoder supports H.264 only; client negotiated {codec:?} \ (build a GPU backend: --features nvenc or amf-qsv, or request H264)" ); - let _ = bit_depth; // the software H.264 path is 8-bit only - // Software H.264 realistically caps far below the negotiated hardware rates. + let _ = (bit_depth, chroma); // the software H.264 path is 8-bit 4:2:0 only + // Software H.264 realistically caps far below the negotiated hardware rates. const SW_BITRATE_CEIL: u64 = 100_000_000; sw::OpenH264Encoder::open( format, @@ -518,6 +662,22 @@ fn nvidia_present() -> bool { std::path::Path::new("/dev/nvidiactl").exists() || std::path::Path::new("/dev/nvidia0").exists() } +/// The `auto` Linux backend decision, shared by [`open_video`] and [`linux_zero_copy_is_vaapi`]: +/// a manual web-console GPU preference (when that GPU is present — [`crate::gpu::manual_selection`]) +/// picks its vendor's backend — AMD/Intel → VAAPI on that GPU's render node, NVIDIA → NVENC (still +/// requiring the proprietary driver's device nodes; a nouveau NVIDIA GPU can't NVENC) — otherwise +/// today's NVIDIA-presence probe, unchanged. +#[cfg(target_os = "linux")] +fn linux_auto_is_vaapi() -> bool { + if let Some(g) = crate::gpu::manual_selection() { + if g.vendor_id == crate::gpu::VENDOR_NVIDIA { + return !nvidia_present(); + } + return true; + } + !nvidia_present() +} + /// True if the Linux GPU encode backend resolves to VAAPI (AMD/Intel) rather than NVENC — mirrors /// [`open_video`]'s dispatch so the capturer can choose the matching zero-copy path (raw dmabuf /// passthrough for VAAPI vs the EGL→CUDA import for NVENC). @@ -526,7 +686,7 @@ pub fn linux_zero_copy_is_vaapi() -> bool { match crate::config::config().encoder_pref.as_str() { "nvenc" | "nvidia" | "cuda" => false, "vaapi" | "amd" | "intel" => true, - _ => !nvidia_present(), + _ => linux_auto_is_vaapi(), } } @@ -571,56 +731,63 @@ pub fn vaapi_codec_support() -> CodecSupport { /// so it must be probed, never assumed). Non-HEVC codecs are always `false`. #[cfg(any(target_os = "linux", target_os = "windows"))] pub fn can_encode_444(codec: Codec) -> bool { - use std::sync::OnceLock; + use std::collections::HashMap; + use std::sync::{Mutex, OnceLock}; if codec != Codec::H265 { return false; } - static CACHE: OnceLock = OnceLock::new(); - *CACHE.get_or_init(|| { - let supported = { - #[cfg(target_os = "linux")] - { - // Mirror open_video's backend dispatch: VAAPI (AMD/Intel) vs NVENC (NVIDIA). - if linux_zero_copy_is_vaapi() { - vaapi::probe_can_encode_444(codec) - } else { - linux::probe_can_encode_444(codec) - } + // Cached per selected GPU (was a process-lifetime OnceLock): a web-console preference change + // re-probes on the newly selected adapter before the next Welcome. + static CACHE: OnceLock>> = OnceLock::new(); + let key = crate::gpu::selection_key(); + let cache = CACHE.get_or_init(|| Mutex::new(HashMap::new())); + if let Some(v) = cache.lock().unwrap().get(&key) { + return *v; + } + let supported = { + #[cfg(target_os = "linux")] + { + // Mirror open_video's backend dispatch: VAAPI (AMD/Intel) vs NVENC (NVIDIA). + if linux_zero_copy_is_vaapi() { + vaapi::probe_can_encode_444(codec) + } else { + linux::probe_can_encode_444(codec) } - #[cfg(target_os = "windows")] - { - match windows_resolved_backend() { - WindowsBackend::Nvenc => { - #[cfg(feature = "nvenc")] - { - nvenc::probe_can_encode_444(codec) - } - #[cfg(not(feature = "nvenc"))] - { - false - } + } + #[cfg(target_os = "windows")] + { + match windows_resolved_backend() { + WindowsBackend::Nvenc => { + #[cfg(feature = "nvenc")] + { + nvenc::probe_can_encode_444(codec) } - WindowsBackend::Amf | WindowsBackend::Qsv => { - #[cfg(feature = "amf-qsv")] - { - let vendor = match windows_resolved_backend() { - WindowsBackend::Qsv => ffmpeg_win::WinVendor::Qsv, - _ => ffmpeg_win::WinVendor::Amf, - }; - ffmpeg_win::probe_can_encode_444(vendor, codec) - } - #[cfg(not(feature = "amf-qsv"))] - { - false - } + #[cfg(not(feature = "nvenc"))] + { + false } - WindowsBackend::Software => false, } + WindowsBackend::Amf | WindowsBackend::Qsv => { + #[cfg(feature = "amf-qsv")] + { + let vendor = match windows_resolved_backend() { + WindowsBackend::Qsv => ffmpeg_win::WinVendor::Qsv, + _ => ffmpeg_win::WinVendor::Amf, + }; + ffmpeg_win::probe_can_encode_444(vendor, codec) + } + #[cfg(not(feature = "amf-qsv"))] + { + false + } + } + WindowsBackend::Software => false, } - }; - tracing::info!(supported, "HEVC 4:4:4 encode capability probed"); - supported - }) + } + }; + tracing::info!(supported, "HEVC 4:4:4 encode capability probed"); + cache.lock().unwrap().insert(key, supported); + supported } /// Non-Linux/Windows (the macOS dev/test build of the host — synthetic-source loopback only): @@ -632,7 +799,9 @@ pub fn can_encode_444(_codec: Codec) -> bool { // --------------------------------------------------------------------------------------------- // Windows backend selection (the analogue of the Linux nvidia_present / linux_zero_copy_is_vaapi -// logic). NVIDIA → NVENC, AMD → AMF, Intel → QSV; `auto` (default) reads the DXGI adapter vendor. +// logic). NVIDIA → NVENC, AMD → AMF, Intel → QSV; `auto` (default) reads the vendor of the +// SELECTED render adapter (crate::gpu — web-console preference / env pin / max VRAM), so the +// backend always matches the GPU the capture ring and virtual display sit on. // --------------------------------------------------------------------------------------------- #[cfg(target_os = "windows")] @@ -652,8 +821,9 @@ enum GpuVendor { Intel, } -/// Resolve the active Windows encode backend from `PUNKTFUNK_ENCODER` (`auto` → the DXGI adapter -/// vendor). Shared by [`open_video`] and the GameStream codec advertisement so both agree. +/// Resolve the active Windows encode backend from `PUNKTFUNK_ENCODER` (`auto` → the selected +/// render adapter's vendor). Shared by [`open_video`] and the GameStream codec advertisement so +/// both agree. #[cfg(target_os = "windows")] pub(crate) fn windows_resolved_backend() -> WindowsBackend { // Resolved ONCE in HostConfig (Goal-1) — was re-read from PUNKTFUNK_ENCODER on every call. @@ -683,72 +853,66 @@ pub fn windows_backend_is_ffmpeg() -> bool { ) } -/// Detect the host GPU vendor from the first hardware DXGI adapter (Windows has no `/dev/nvidia*` -/// probe). Cached. NVIDIA=0x10DE, AMD=0x1002, Intel=0x8086; the software/WARP adapter is skipped. +/// Detect the encode-GPU vendor from the **selected render adapter** ([`crate::gpu::selected_gpu`]: +/// web-console preference > `PUNKTFUNK_RENDER_ADAPTER` > max VRAM) — the same adapter the capture +/// ring and the IddCx render pin sit on, so the encoder backend can never disagree with where the +/// captured frames live. The old first-DXGI-adapter scan did exactly that on hybrid boxes: adapter +/// 0 is often the iGPU (e.g. Intel Arc) while capture/encode pin the dGPU — resolving QSV for a +/// pipeline whose textures sit on the NVIDIA card. Uncached: selection is preference-dependent and +/// only consulted at session setup / serverinfo time, never per-frame. Falls back to the first +/// known-vendor adapter when the selected one is an unknown vendor. #[cfg(target_os = "windows")] fn windows_gpu_vendor() -> Option { - use std::sync::OnceLock; - use windows::Win32::Graphics::Dxgi::{ - CreateDXGIFactory1, IDXGIFactory1, DXGI_ADAPTER_FLAG_SOFTWARE, - }; - static CACHE: OnceLock> = OnceLock::new(); - // SAFETY: `CreateDXGIFactory1` returns a fresh owned `IDXGIFactory1` COM object (refcounted by the - // windows-rs wrapper, Released when the local drops); `.ok()?` bails on failure so `factory` is a - // valid interface before any use. `EnumAdapters1(i)` hands back the i-th adapter as an owned - // `IDXGIAdapter1` (or an error past the last adapter, which ends the loop). `GetDesc1()` returns the - // `DXGI_ADAPTER_DESC1` by value (no out-pointer), so reading `desc.Flags`/`desc.VendorId` is plain - // field access. Every call only touches COM objects this closure owns; the `OnceLock` runs the - // closure once (no data race) and all interfaces are Released as the locals drop. No raw pointer is - // dereferenced and nothing is aliased. - *CACHE.get_or_init(|| unsafe { - let factory: IDXGIFactory1 = CreateDXGIFactory1().ok()?; - let mut i = 0u32; - while let Ok(adapter) = factory.EnumAdapters1(i) { - i += 1; - // windows-rs 0.62: GetDesc1 returns the desc by value (no out-param). - let Ok(desc) = adapter.GetDesc1() else { - continue; - }; - if (desc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE.0 as u32) != 0 { - continue; // skip the Microsoft Basic Render / WARP adapter - } - match desc.VendorId { - 0x10DE => return Some(GpuVendor::Nvidia), - 0x1002 => return Some(GpuVendor::Amd), - 0x8086 => return Some(GpuVendor::Intel), - _ => continue, - } + fn by_id(vendor_id: u32) -> Option { + match vendor_id { + crate::gpu::VENDOR_NVIDIA => Some(GpuVendor::Nvidia), + crate::gpu::VENDOR_AMD => Some(GpuVendor::Amd), + crate::gpu::VENDOR_INTEL => Some(GpuVendor::Intel), + _ => None, } - None + } + let sel = crate::gpu::selected_gpu()?; + by_id(sel.info.vendor_id).or_else(|| { + crate::gpu::enumerate() + .iter() + .find_map(|g| by_id(g.vendor_id)) }) } -/// Probe the active Windows AMF/QSV backend for its encodable codecs (cached; opens a tiny encoder -/// per codec, once). Mirrors [`vaapi_codec_support`]; called only when [`windows_backend_is_ffmpeg`] -/// is true. AV1 is narrow (AMD RDNA3+, Intel Arc/Xe2+), so it must be probed, not assumed. +/// Probe the active Windows AMF/QSV backend for its encodable codecs (opens a tiny encoder per +/// codec; cached **per (backend, selected GPU)** — a web-console preference change re-probes on the +/// newly selected adapter instead of serving the old GPU's answer for the process lifetime). +/// Mirrors [`vaapi_codec_support`]; called only when [`windows_backend_is_ffmpeg`] is true. AV1 is +/// narrow (AMD RDNA3+, Intel Arc/Xe2+), so it must be probed, not assumed. #[cfg(all(target_os = "windows", feature = "amf-qsv"))] pub fn windows_codec_support() -> CodecSupport { - use std::sync::OnceLock; - static CACHE: OnceLock = OnceLock::new(); - *CACHE.get_or_init(|| { - let vendor = match windows_resolved_backend() { - WindowsBackend::Qsv => ffmpeg_win::WinVendor::Qsv, - _ => ffmpeg_win::WinVendor::Amf, - }; - let caps = CodecSupport { - h264: ffmpeg_win::probe_can_encode(vendor, Codec::H264), - h265: ffmpeg_win::probe_can_encode(vendor, Codec::H265), - av1: ffmpeg_win::probe_can_encode(vendor, Codec::Av1), - }; - tracing::info!( - backend = ?vendor, - h264 = caps.h264, - h265 = caps.h265, - av1 = caps.av1, - "Windows AMF/QSV encode capabilities probed" - ); - caps - }) + use std::collections::HashMap; + use std::sync::{Mutex, OnceLock}; + static CACHE: OnceLock>> = OnceLock::new(); + let vendor = match windows_resolved_backend() { + WindowsBackend::Qsv => ffmpeg_win::WinVendor::Qsv, + _ => ffmpeg_win::WinVendor::Amf, + }; + let key = format!("{vendor:?}:{}", crate::gpu::selection_key()); + let cache = CACHE.get_or_init(|| Mutex::new(HashMap::new())); + if let Some(c) = cache.lock().unwrap().get(&key) { + return *c; + } + let caps = CodecSupport { + h264: ffmpeg_win::probe_can_encode(vendor, Codec::H264), + h265: ffmpeg_win::probe_can_encode(vendor, Codec::H265), + av1: ffmpeg_win::probe_can_encode(vendor, Codec::Av1), + }; + tracing::info!( + backend = ?vendor, + h264 = caps.h264, + h265 = caps.h265, + av1 = caps.av1, + "Windows AMF/QSV encode capabilities probed" + ); + // A concurrent first call may double-probe; both arrive at the same answer, last insert wins. + cache.lock().unwrap().insert(key, caps); + caps } // Goal-1 stage 6: GPU/CPU encoders confined to `encode/windows/` (NVENC, AMF/QSV ffmpeg, software) and diff --git a/crates/punktfunk-host/src/encode/linux/vaapi.rs b/crates/punktfunk-host/src/encode/linux/vaapi.rs index ce96b29..b063a14 100644 --- a/crates/punktfunk-host/src/encode/linux/vaapi.rs +++ b/crates/punktfunk-host/src/encode/linux/vaapi.rs @@ -50,10 +50,13 @@ const fn fourcc(a: u8, b: u8, c: u8, d: u8) -> u32 { (a as u32) | ((b as u32) << 8) | ((c as u32) << 16) | ((d as u32) << 24) } -/// The render node a VAAPI/DRM device should open. `PUNKTFUNK_RENDER_NODE` pins it on a multi-GPU -/// box; the default is correct on a single-GPU host. +/// The render node a VAAPI/DRM device should open, from [`crate::gpu::linux_render_node`]: a +/// matched web-console GPU preference pins it, else `PUNKTFUNK_RENDER_NODE`, else the single-GPU +/// default. fn render_node() -> CString { - let p = std::env::var("PUNKTFUNK_RENDER_NODE").unwrap_or_else(|_| "/dev/dri/renderD128".into()); + let p = crate::gpu::linux_render_node() + .to_string_lossy() + .into_owned(); CString::new(p).unwrap_or_else(|_| CString::new("/dev/dri/renderD128").unwrap()) } diff --git a/crates/punktfunk-host/src/encode/windows/ffmpeg_win.rs b/crates/punktfunk-host/src/encode/windows/ffmpeg_win.rs index 8962ff8..440ea70 100644 --- a/crates/punktfunk-host/src/encode/windows/ffmpeg_win.rs +++ b/crates/punktfunk-host/src/encode/windows/ffmpeg_win.rs @@ -254,6 +254,11 @@ pub fn probe_can_encode_444(_vendor: WinVendor, _codec: Codec) -> bool { } pub fn probe_can_encode(vendor: WinVendor, codec: Codec) -> bool { + // Deliberately NOT pinned to the selected render adapter (unlike `nvenc::probe_can_encode_444`): + // the system-input probe passes no hwdevice, and the AMF/QSV runtimes only ever bind their own + // vendor's silicon — on a mixed-vendor box the probe lands on the right GPU by construction. + // Only a two-same-vendor-GPU box could probe the wrong card (accepted; results are cached per + // selected GPU in `windows_codec_support`, so a fix here slots in without churn). if ffmpeg::init().is_err() { return false; } diff --git a/crates/punktfunk-host/src/encode/windows/nvenc.rs b/crates/punktfunk-host/src/encode/windows/nvenc.rs index 018d116..b172aee 100644 --- a/crates/punktfunk-host/src/encode/windows/nvenc.rs +++ b/crates/punktfunk-host/src/encode/windows/nvenc.rs @@ -958,34 +958,59 @@ impl Drop for NvencD3d11Encoder { /// so the host advertises the chroma it can really encode (honest downgrade to 4:2:0 on a card without it). pub fn probe_can_encode_444(codec: Codec) -> bool { use windows::Win32::Foundation::HMODULE; - use windows::Win32::Graphics::Direct3D::{D3D_DRIVER_TYPE_HARDWARE, D3D_FEATURE_LEVEL_11_0}; + use windows::Win32::Graphics::Direct3D::{ + D3D_DRIVER_TYPE_HARDWARE, D3D_DRIVER_TYPE_UNKNOWN, D3D_FEATURE_LEVEL_11_0, + }; use windows::Win32::Graphics::Direct3D11::{ D3D11CreateDevice, D3D11_CREATE_DEVICE_BGRA_SUPPORT, D3D11_SDK_VERSION, }; + use windows::Win32::Graphics::Dxgi::{CreateDXGIFactory1, IDXGIAdapter1, IDXGIFactory4}; if codec != Codec::H265 { return false; } - // SAFETY: a self-contained probe owning every handle it creates. `D3D11CreateDevice` (HARDWARE - // driver, NULL adapter) fills `device` or returns Err (→ false). `open_encode_session_ex` opens an - // NVENC session against that device's raw pointer (valid while `device` is held) or errors (→ false, - // tearing nothing down). `get_encode_caps` reads one scalar cap into `val` via the loaded API table. - // `destroy_encoder` frees the session exactly once; `device`/its context drop with the COM wrappers. - // No handle escapes this call and nothing runs concurrently. + // SAFETY: a self-contained probe owning every handle it creates. `CreateDXGIFactory1`/ + // `EnumAdapterByLuid` return owned COM objects or err (→ default-adapter fallback). + // `D3D11CreateDevice` (explicit adapter + UNKNOWN driver type, or NULL adapter + HARDWARE) + // fills `device` or returns Err (→ false). `open_encode_session_ex` opens an NVENC session + // against that device's raw pointer (valid while `device` is held) or errors (→ false, tearing + // nothing down). `get_encode_caps` reads one scalar cap into `val` via the loaded API table. + // `destroy_encoder` frees the session exactly once; `device`/its context drop with the COM + // wrappers. No handle escapes this call and nothing runs concurrently. unsafe { + // Probe on the SELECTED render adapter — the GPU the session will actually encode on + // (web-console preference / PUNKTFUNK_RENDER_ADAPTER / max VRAM). The OS default adapter + // (NULL) can be the *other* GPU on a hybrid box, answering for hardware we won't use. + let adapter: Option = crate::win_adapter::resolve_render_adapter_luid() + .and_then(|luid| { + let factory: IDXGIFactory4 = CreateDXGIFactory1().ok()?; + factory.EnumAdapterByLuid(luid).ok() + }); let mut device: Option = None; - if D3D11CreateDevice( - None, - D3D_DRIVER_TYPE_HARDWARE, - HMODULE::default(), - D3D11_CREATE_DEVICE_BGRA_SUPPORT, - Some(&[D3D_FEATURE_LEVEL_11_0]), - D3D11_SDK_VERSION, - Some(&mut device), - None, - None, - ) - .is_err() - { + let created = match &adapter { + Some(a) => D3D11CreateDevice( + a, + D3D_DRIVER_TYPE_UNKNOWN, + HMODULE::default(), + D3D11_CREATE_DEVICE_BGRA_SUPPORT, + Some(&[D3D_FEATURE_LEVEL_11_0]), + D3D11_SDK_VERSION, + Some(&mut device), + None, + None, + ), + None => D3D11CreateDevice( + None, + D3D_DRIVER_TYPE_HARDWARE, + HMODULE::default(), + D3D11_CREATE_DEVICE_BGRA_SUPPORT, + Some(&[D3D_FEATURE_LEVEL_11_0]), + D3D11_SDK_VERSION, + Some(&mut device), + None, + None, + ), + }; + if created.is_err() { return false; } let Some(device) = device else { return false }; diff --git a/crates/punktfunk-host/src/gpu.rs b/crates/punktfunk-host/src/gpu.rs new file mode 100644 index 0000000..8f357fe --- /dev/null +++ b/crates/punktfunk-host/src/gpu.rs @@ -0,0 +1,757 @@ +//! GPU inventory + operator GPU preference for multi-GPU hosts (web-console GPU selection). +//! +//! Three concerns, one module: +//! - **Enumeration** ([`enumerate`]): the machine's hardware GPUs — DXGI adapters on Windows +//! (WARP/Basic-Render filtered out), `/dev/dri/renderD*` + sysfs PCI ids on Linux, empty +//! elsewhere. Compiled on every platform so the management endpoints (and the checked-in +//! OpenAPI document) are identical everywhere. +//! - **Preference** ([`prefs`]): the operator's persisted auto/manual choice +//! (`/gpu-settings.json`, written by the mgmt API). A manual preference is stored by +//! *stable identity* — PCI vendor:device + occurrence + name — NOT by LUID (Windows LUIDs are +//! reassigned every boot) or adapter index (enumeration order can change across driver updates). +//! - **Selection** ([`selected_gpu`] / [`pick`]): the one place that turns (inventory, preference, +//! `PUNKTFUNK_RENDER_ADAPTER`) into the render/encode GPU. Precedence: **manual preference > +//! env substring > auto (max dedicated VRAM)**, with graceful fall-through — a preferred GPU +//! that vanished (unplugged eGPU, disabled iGPU) logs a warning and auto-selects so the host +//! keeps streaming, and the mgmt API surfaces the fallback instead of hiding it. +//! +//! A preference change applies to the **next session**: selection is read at capture/encode setup +//! (`win_adapter::resolve_render_adapter_luid`, the encoder-backend dispatch, the codec probes), a +//! running session keeps the device it opened on. [`session_begin`]/[`active`] record which GPU a +//! live session actually encodes on, for the console's "in use" display. + +use anyhow::Result; +use serde::{Deserialize, Serialize}; +use std::path::PathBuf; +use std::sync::{Mutex, OnceLock}; + +/// PCI vendor ids of the GPU vendors the encode backends know (NVENC / AMF / QSV, VAAPI on Linux). +pub(crate) const VENDOR_NVIDIA: u32 = 0x10DE; +pub(crate) const VENDOR_AMD: u32 = 0x1002; +pub(crate) const VENDOR_INTEL: u32 = 0x8086; + +/// Platform handle of an enumerated GPU — how the pipeline actually addresses it. Not part of the +/// stable identity (Windows LUIDs are per-boot; a render node can renumber across kernel updates). +#[derive(Clone, Debug, Default, PartialEq, Eq)] +pub(crate) struct GpuHandle { + /// DXGI `AdapterLuid` of this adapter (this boot only). + #[cfg(target_os = "windows")] + pub luid_low: u32, + #[cfg(target_os = "windows")] + pub luid_high: i32, + /// DRM render node (`/dev/dri/renderD*`) of this GPU. + #[cfg(target_os = "linux")] + pub render_node: Option, +} + +/// One hardware GPU as enumerated on this host. +#[derive(Clone, Debug)] +pub(crate) struct GpuInfo { + /// Stable identifier for the API/UI: `"{vendor:04x}-{device:04x}-{occurrence}"`. Occurrence + /// disambiguates identical cards (two of the same model) by enumeration order among their + /// twins — the best available tiebreaker (PCI order), imperfect but honest. + pub id: String, + /// Adapter description (Windows) / synthesized vendor label + node (Linux). + pub name: String, + pub vendor_id: u32, + pub device_id: u32, + /// Index among enumerated GPUs with the same (vendor_id, device_id). + pub occurrence: u32, + /// Dedicated VRAM in bytes (0 where the platform doesn't expose it — non-amdgpu Linux sysfs). + pub vram_bytes: u64, + pub handle: GpuHandle, +} + +/// Lowercase vendor tag for the API (`nvidia` / `amd` / `intel` / `other`). +pub(crate) fn vendor_tag(vendor_id: u32) -> &'static str { + match vendor_id { + VENDOR_NVIDIA => "nvidia", + VENDOR_AMD => "amd", + VENDOR_INTEL => "intel", + _ => "other", + } +} + +impl GpuInfo { + /// Lowercase vendor tag for the API (`nvidia` / `amd` / `intel` / `other`). + pub fn vendor_tag(&self) -> &'static str { + vendor_tag(self.vendor_id) + } + + /// The DXGI LUID this adapter had at enumeration time. + #[cfg(target_os = "windows")] + pub fn luid(&self) -> windows::Win32::Foundation::LUID { + windows::Win32::Foundation::LUID { + LowPart: self.handle.luid_low, + HighPart: self.handle.luid_high, + } + } +} + +/// Assign the stable `id` + `occurrence` fields after enumeration (occurrence = index among +/// same-(vendor,device) twins, in enumeration order). +fn assign_ids(gpus: &mut [GpuInfo]) { + for i in 0..gpus.len() { + let occ = gpus[..i] + .iter() + .filter(|g| g.vendor_id == gpus[i].vendor_id && g.device_id == gpus[i].device_id) + .count() as u32; + gpus[i].occurrence = occ; + gpus[i].id = format!( + "{:04x}-{:04x}-{}", + gpus[i].vendor_id, gpus[i].device_id, occ + ); + } +} + +// --------------------------------------------------------------------------------------------- +// Enumeration +// --------------------------------------------------------------------------------------------- + +/// Enumerate this host's hardware GPUs. Windows: DXGI adapters minus WARP/Basic-Render (the same +/// filter `win_adapter` always applied). Linux: `/dev/dri/renderD*` with PCI ids from sysfs. +/// Other platforms (the macOS dev/test host build): empty — the endpoints still exist, they just +/// report no GPUs. +#[cfg(target_os = "windows")] +pub(crate) fn enumerate() -> Vec { + use windows::Win32::Graphics::Dxgi::{ + CreateDXGIFactory1, IDXGIFactory1, DXGI_ADAPTER_FLAG_SOFTWARE, + }; + let mut out = Vec::new(); + // SAFETY: `CreateDXGIFactory1` returns an owned COM factory (Released when the local drops) or + // errs (→ empty inventory). `EnumAdapters1(i)` yields owned `IDXGIAdapter1`s until it errors + // past the last adapter; `GetDesc1()` returns the descriptor by value. Only locals are touched, + // nothing escapes, no raw pointer is dereferenced. + unsafe { + let Ok(factory) = CreateDXGIFactory1::() else { + return out; + }; + let mut i = 0u32; + while let Ok(adapter) = factory.EnumAdapters1(i) { + i += 1; + let Ok(d) = adapter.GetDesc1() else { continue }; + if (d.Flags & DXGI_ADAPTER_FLAG_SOFTWARE.0 as u32) != 0 { + continue; // Microsoft Basic Render / WARP + } + let name = String::from_utf16_lossy(&d.Description) + .trim_end_matches('\u{0}') + .to_string(); + let lname = name.to_ascii_lowercase(); + if lname.contains("basic render") || lname.contains("warp") { + continue; + } + out.push(GpuInfo { + id: String::new(), + name, + vendor_id: d.VendorId, + device_id: d.DeviceId, + occurrence: 0, + vram_bytes: d.DedicatedVideoMemory as u64, + handle: GpuHandle { + luid_low: d.AdapterLuid.LowPart, + luid_high: d.AdapterLuid.HighPart, + }, + }); + } + } + assign_ids(&mut out); + out +} + +#[cfg(target_os = "linux")] +pub(crate) fn enumerate() -> Vec { + let mut nodes: Vec = std::fs::read_dir("/dev/dri") + .map(|rd| { + rd.filter_map(|e| e.ok()) + .map(|e| e.file_name().to_string_lossy().into_owned()) + .filter(|n| n.starts_with("renderD")) + .collect() + }) + .unwrap_or_default(); + nodes.sort(); + let mut out = Vec::new(); + for node in nodes { + let sys = format!("/sys/class/drm/{node}/device"); + let read_hex = |f: &str| -> u32 { + std::fs::read_to_string(format!("{sys}/{f}")) + .ok() + .and_then(|s| u32::from_str_radix(s.trim().trim_start_matches("0x"), 16).ok()) + .unwrap_or(0) + }; + let vendor_id = read_hex("vendor"); + let device_id = read_hex("device"); + // Only amdgpu exposes a VRAM total in sysfs; 0 elsewhere (display-only — Linux auto + // selection is NVIDIA-presence + render node, not VRAM). + let vram_bytes = std::fs::read_to_string(format!("{sys}/mem_info_vram_total")) + .ok() + .and_then(|s| s.trim().parse::().ok()) + .unwrap_or(0); + let vendor_label = match vendor_id { + VENDOR_NVIDIA => "NVIDIA".to_string(), + VENDOR_AMD => "AMD".to_string(), + VENDOR_INTEL => "Intel".to_string(), + other => format!("GPU 0x{other:04x}"), + }; + out.push(GpuInfo { + id: String::new(), + name: format!("{vendor_label} GPU ({node})"), + vendor_id, + device_id, + occurrence: 0, + vram_bytes, + handle: GpuHandle { + render_node: Some(PathBuf::from(format!("/dev/dri/{node}"))), + }, + }); + } + assign_ids(&mut out); + out +} + +#[cfg(not(any(target_os = "windows", target_os = "linux")))] +pub(crate) fn enumerate() -> Vec { + Vec::new() +} + +// --------------------------------------------------------------------------------------------- +// Persisted preference +// --------------------------------------------------------------------------------------------- + +/// Operator GPU-selection mode: `Auto` (env substring, else max VRAM — today's behavior) or +/// `Manual` (an explicit GPU chosen in the web console). +#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub(crate) enum GpuMode { + #[default] + Auto, + Manual, +} + +/// Stable identity of the manually preferred GPU (see [`GpuInfo::id`] for why not LUID/index). +#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +pub(crate) struct PreferredGpu { + pub vendor_id: u32, + pub device_id: u32, + #[serde(default)] + pub occurrence: u32, + /// Adapter name at the time of selection — the last-resort matcher and the label the API + /// shows when the preferred GPU is currently absent. + #[serde(default)] + pub name: String, +} + +/// The persisted GPU preference (`/gpu-settings.json`). +#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)] +pub(crate) struct GpuPreference { + #[serde(default)] + pub mode: GpuMode, + /// `Some` when `mode == Manual` (kept when switching back to Auto so the console can offer + /// "return to your previous manual pick"). + #[serde(default)] + pub gpu: Option, +} + +/// The preference store: in-memory current value + its JSON file. Mirrors `native_pairing`'s +/// persistence discipline (private dir, secret-file temp write + atomic rename, in-memory +/// rollback if the disk write fails). +pub(crate) struct GpuPrefStore { + path: PathBuf, + cur: Mutex, +} + +impl GpuPrefStore { + /// Load the store from `path` (missing/corrupt file ⇒ default Auto, with a warning for the + /// corrupt case — never fail host startup over a settings file). + pub fn load_from(path: PathBuf) -> Self { + let cur = match std::fs::read(&path) { + Ok(bytes) => match serde_json::from_slice::(&bytes) { + Ok(p) => p, + Err(e) => { + tracing::warn!(path = %path.display(), "gpu-settings.json unreadable — using Auto: {e}"); + GpuPreference::default() + } + }, + Err(_) => GpuPreference::default(), + }; + GpuPrefStore { + path, + cur: Mutex::new(cur), + } + } + + pub fn get(&self) -> GpuPreference { + self.cur.lock().unwrap().clone() + } + + /// Persist + apply a new preference. The in-memory value only changes if the disk write + /// succeeds, so a full disk can't leave memory and file disagreeing. + pub fn set(&self, pref: GpuPreference) -> Result<()> { + if let Some(dir) = self.path.parent() { + crate::gamestream::create_private_dir(dir)?; + } + let tmp = self.path.with_extension("json.tmp"); + crate::gamestream::write_secret_file(&tmp, &serde_json::to_vec_pretty(&pref)?)?; + std::fs::rename(&tmp, &self.path)?; + *self.cur.lock().unwrap() = pref; + Ok(()) + } +} + +/// The process-wide preference store (config-dir file), loaded once on first access — the same +/// global-accessor shape as [`crate::config::config`], because selection happens deep inside +/// capture/encode setup where no app state is threaded. +pub(crate) fn prefs() -> &'static GpuPrefStore { + static STORE: OnceLock = OnceLock::new(); + STORE.get_or_init(|| { + GpuPrefStore::load_from(crate::gamestream::config_dir().join("gpu-settings.json")) + }) +} + +// --------------------------------------------------------------------------------------------- +// Selection +// --------------------------------------------------------------------------------------------- + +/// Why a GPU was selected — surfaced by the mgmt API so the console can explain the decision. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub(crate) enum PickSource { + /// The operator's manual preference matched a present GPU. + Preference, + /// `PUNKTFUNK_RENDER_ADAPTER` substring matched. + Env, + /// Auto: max dedicated VRAM (Windows) / platform default (Linux display). + Auto, + /// A manual preference is set but that GPU is absent — fell back to auto so the host keeps + /// streaming (logged; the console shows the fallback). + PreferenceMissing, +} + +impl PickSource { + pub fn tag(self) -> &'static str { + match self { + PickSource::Preference => "preference", + PickSource::Env => "env", + PickSource::Auto => "auto", + PickSource::PreferenceMissing => "preference_missing", + } + } +} + +/// A resolved selection: the GPU the next session's pipeline will be created on, and why. +#[derive(Clone, Debug)] +pub(crate) struct SelectedGpu { + pub info: GpuInfo, + pub source: PickSource, +} + +/// Find the manually preferred GPU in the inventory. Match order: exact stable identity +/// (vendor, device, occurrence) → same model (vendor, device; a twin renumbered) → exact name +/// (ids changed across a driver/firmware quirk but the marketing name survived). +pub(crate) fn find_preferred(gpus: &[GpuInfo], want: &PreferredGpu) -> Option { + gpus.iter() + .position(|g| { + g.vendor_id == want.vendor_id + && g.device_id == want.device_id + && g.occurrence == want.occurrence + }) + .or_else(|| { + gpus.iter() + .position(|g| g.vendor_id == want.vendor_id && g.device_id == want.device_id) + }) + .or_else(|| { + if want.name.is_empty() { + return None; + } + gpus.iter().position(|g| g.name == want.name) + }) +} + +/// Pure selection over an inventory: **manual preference > env substring > max VRAM**. Returns +/// the index into `gpus` plus the reason. `None` only when `gpus` is empty. A set-but-unmatched +/// env substring falls through to max-VRAM (same outcome as env unset — deliberately more robust +/// than the old `resolve_render_adapter_luid`, which returned *no* adapter on a stale substring). +pub(crate) fn pick( + gpus: &[GpuInfo], + pref: &GpuPreference, + env_substr: Option<&str>, +) -> Option<(usize, PickSource)> { + let mut preference_missing = false; + if pref.mode == GpuMode::Manual { + if let Some(want) = &pref.gpu { + match find_preferred(gpus, want) { + Some(i) => return Some((i, PickSource::Preference)), + None => preference_missing = true, + } + } + } + if let Some(sub) = env_substr.filter(|s| !s.is_empty()) { + let sub = sub.to_ascii_lowercase(); + if let Some(i) = gpus + .iter() + .position(|g| g.name.to_ascii_lowercase().contains(&sub)) + { + return Some((i, PickSource::Env)); + } + } + let i = gpus + .iter() + .enumerate() + .max_by_key(|(_, g)| g.vram_bytes) + .map(|(i, _)| i)?; + Some(( + i, + if preference_missing { + PickSource::PreferenceMissing + } else { + PickSource::Auto + }, + )) +} + +/// The GPU the next session will run on. Windows: the full precedence over the DXGI inventory — +/// this is what `win_adapter::resolve_render_adapter_luid` (capture ring + IddCx render pin) and +/// the encoder-vendor dispatch both consume, so capture, encode, and the advertisement agree by +/// construction. Pure query — callers log (this runs per serverinfo poll). +#[cfg(target_os = "windows")] +pub(crate) fn selected_gpu() -> Option { + let gpus = enumerate(); + let pref = prefs().get(); + let env = crate::config::config() + .render_adapter + .clone() + .filter(|s| !s.is_empty()); + let (i, source) = pick(&gpus, &pref, env.as_deref())?; + Some(SelectedGpu { + info: gpus.into_iter().nth(i)?, + source, + }) +} + +/// The GPU the next session will run on (Linux). Mirrors the encode dispatch for display: a +/// matched manual preference wins; otherwise NVIDIA-presence → the NVIDIA GPU, else the GPU that +/// owns the VAAPI render node. (The *authoritative* Linux switches stay in `encode::open_video` / +/// [`linux_render_node`] — this is the console's view of them.) +#[cfg(target_os = "linux")] +pub(crate) fn selected_gpu() -> Option { + let gpus = enumerate(); + let pref = prefs().get(); + let mut preference_missing = false; + if pref.mode == GpuMode::Manual { + if let Some(want) = &pref.gpu { + match find_preferred(&gpus, want) { + Some(i) => { + return Some(SelectedGpu { + info: gpus.into_iter().nth(i)?, + source: PickSource::Preference, + }) + } + None => preference_missing = true, + } + } + } + let source = if preference_missing { + PickSource::PreferenceMissing + } else { + PickSource::Auto + }; + if linux_nvidia_present() { + if let Some(i) = gpus.iter().position(|g| g.vendor_id == VENDOR_NVIDIA) { + return Some(SelectedGpu { + info: gpus.into_iter().nth(i)?, + source, + }); + } + } + let node = linux_render_node(); + let i = gpus + .iter() + .position(|g| g.handle.render_node.as_deref() == Some(node.as_path())) + .unwrap_or(0); + Some(SelectedGpu { + info: gpus.into_iter().nth(i)?, + source, + }) +} + +#[cfg(not(any(target_os = "windows", target_os = "linux")))] +pub(crate) fn selected_gpu() -> Option { + None +} + +/// The manually preferred GPU, only when `mode == Manual` **and** it is currently present. +/// The Linux encode dispatch consults this (auto mode keeps today's NVIDIA-presence behavior +/// exactly). +pub(crate) fn manual_selection() -> Option { + let pref = prefs().get(); + if pref.mode != GpuMode::Manual { + return None; + } + let want = pref.gpu?; + let gpus = enumerate(); + let i = find_preferred(&gpus, &want)?; + gpus.into_iter().nth(i) +} + +/// The VAAPI/DRM render node for this host: matched manual preference > `PUNKTFUNK_RENDER_NODE` +/// (a deliberate live env read — see `config.rs` module docs) > `/dev/dri/renderD128`. +#[cfg(target_os = "linux")] +pub(crate) fn linux_render_node() -> PathBuf { + if let Some(g) = manual_selection() { + if let Some(node) = g.handle.render_node { + return node; + } + } + std::env::var("PUNKTFUNK_RENDER_NODE") + .ok() + .filter(|s| !s.is_empty()) + .map(PathBuf::from) + .unwrap_or_else(|| PathBuf::from("/dev/dri/renderD128")) +} + +/// NVIDIA-presence probe (same device-node check as `encode::nvidia_present` — duplicated two +/// lines rather than widening that private fn's visibility). +#[cfg(target_os = "linux")] +fn linux_nvidia_present() -> bool { + std::path::Path::new("/dev/nvidiactl").exists() || std::path::Path::new("/dev/nvidia0").exists() +} + +/// A cache key that changes whenever the *selection* changes (preference edits included), for the +/// per-GPU probe caches (`can_encode_444`, `windows_codec_support`) that were process-lifetime +/// `OnceLock`s back when selection was env-only. +pub(crate) fn selection_key() -> String { + match selected_gpu() { + Some(sel) => { + #[cfg(target_os = "windows")] + { + format!( + "{}:{:08x}{:08x}", + sel.info.id, sel.info.handle.luid_high as u32, sel.info.handle.luid_low + ) + } + #[cfg(not(target_os = "windows"))] + { + sel.info.id + } + } + None => String::new(), + } +} + +// --------------------------------------------------------------------------------------------- +// Live "in use" record +// --------------------------------------------------------------------------------------------- + +/// What a live session encodes on — the console's "currently used GPU". +#[derive(Clone, Debug)] +pub(crate) struct ActiveGpu { + /// Stable id of the GPU ([`GpuInfo::id`]; empty for the CPU/software path) so a UI can match + /// it against the inventory. + pub id: String, + pub name: String, + pub vendor_id: u32, + /// The encode backend the session opened (`nvenc` / `amf` / `qsv` / `vaapi` / `software`). + pub backend: &'static str, +} + +struct ActiveState { + gpu: ActiveGpu, + sessions: u32, +} + +static ACTIVE: Mutex> = Mutex::new(None); + +/// RAII marker for one live encode session; dropping it decrements the session count. Held by the +/// encoder wrapper `open_video` returns, so the count is correct by construction (every successful +/// open is paired with a drop). +pub(crate) struct ActiveSession(()); + +impl Drop for ActiveSession { + fn drop(&mut self) { + let mut st = ACTIVE.lock().unwrap_or_else(|e| e.into_inner()); + if let Some(st) = st.as_mut() { + st.sessions = st.sessions.saturating_sub(1); + } + } +} + +/// Record a session opening on `gpu`. Concurrent sessions share one GPU (the Windows pipeline is +/// single-GPU by construction; Linux sessions share the selection), so the latest record wins and +/// a counter tracks liveness. +pub(crate) fn session_begin(gpu: ActiveGpu) -> ActiveSession { + let mut st = ACTIVE.lock().unwrap_or_else(|e| e.into_inner()); + let sessions = st.as_ref().map(|s| s.sessions).unwrap_or(0) + 1; + *st = Some(ActiveState { gpu, sessions }); + ActiveSession(()) +} + +/// The GPU live sessions encode on + how many sessions hold it. `Some` with `sessions == 0` means +/// "last used, idle now" — the mgmt API distinguishes the two. +pub(crate) fn active() -> Option<(ActiveGpu, u32)> { + ACTIVE + .lock() + .unwrap_or_else(|e| e.into_inner()) + .as_ref() + .map(|s| (s.gpu.clone(), s.sessions)) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn gpu(vendor: u32, device: u32, name: &str, vram_gb: u64) -> GpuInfo { + GpuInfo { + id: String::new(), + name: name.into(), + vendor_id: vendor, + device_id: device, + occurrence: 0, + vram_bytes: vram_gb * 1024 * 1024 * 1024, + handle: GpuHandle::default(), + } + } + + /// The dev-box shape: NVIDIA dGPU + Intel Arc iGPU. + fn hybrid() -> Vec { + let mut v = vec![ + gpu(VENDOR_INTEL, 0x7d55, "Intel(R) Arc(TM) Graphics", 0), + gpu(VENDOR_NVIDIA, 0x2c05, "NVIDIA GeForce RTX 5070 Ti", 16), + ]; + assign_ids(&mut v); + v + } + + fn manual(vendor: u32, device: u32, occurrence: u32, name: &str) -> GpuPreference { + GpuPreference { + mode: GpuMode::Manual, + gpu: Some(PreferredGpu { + vendor_id: vendor, + device_id: device, + occurrence, + name: name.into(), + }), + } + } + + #[test] + fn auto_picks_max_vram() { + let (i, src) = pick(&hybrid(), &GpuPreference::default(), None).unwrap(); + assert_eq!(i, 1); + assert_eq!(src, PickSource::Auto); + } + + #[test] + fn manual_preference_beats_env_and_vram() { + let pref = manual(VENDOR_INTEL, 0x7d55, 0, "Intel(R) Arc(TM) Graphics"); + let (i, src) = pick(&hybrid(), &pref, Some("nvidia")).unwrap(); + assert_eq!(i, 0); + assert_eq!(src, PickSource::Preference); + } + + #[test] + fn env_substring_beats_vram_and_is_case_insensitive() { + let mut gpus = vec![ + gpu(VENDOR_NVIDIA, 0x2c05, "NVIDIA GeForce RTX 5070 Ti", 16), + gpu(VENDOR_INTEL, 0x7d55, "Intel(R) Arc(TM) Graphics", 0), + ]; + assign_ids(&mut gpus); + let (i, src) = pick(&gpus, &GpuPreference::default(), Some("ARC")).unwrap(); + assert_eq!(i, 1); + assert_eq!(src, PickSource::Env); + } + + #[test] + fn unmatched_env_falls_back_to_max_vram() { + let (i, src) = pick(&hybrid(), &GpuPreference::default(), Some("radeon")).unwrap(); + assert_eq!(i, 1); + assert_eq!(src, PickSource::Auto); + } + + #[test] + fn missing_preferred_gpu_falls_back_and_says_so() { + let pref = manual(VENDOR_AMD, 0x744c, 0, "AMD Radeon RX 7900 XTX"); + let (i, src) = pick(&hybrid(), &pref, None).unwrap(); + assert_eq!(i, 1); // max VRAM + assert_eq!(src, PickSource::PreferenceMissing); + } + + #[test] + fn preferred_matches_same_model_when_occurrence_gone() { + // Stored occurrence 1 (was the second of two twins); only one twin remains. + let mut gpus = vec![ + gpu(VENDOR_INTEL, 0x7d55, "Intel(R) Arc(TM) Graphics", 0), + gpu(VENDOR_NVIDIA, 0x2c05, "NVIDIA GeForce RTX 5070 Ti", 16), + ]; + assign_ids(&mut gpus); + let pref = manual(VENDOR_NVIDIA, 0x2c05, 1, "NVIDIA GeForce RTX 5070 Ti"); + let (i, src) = pick(&gpus, &pref, None).unwrap(); + assert_eq!(i, 1); + assert_eq!(src, PickSource::Preference); + } + + #[test] + fn preferred_matches_by_name_when_ids_changed() { + let pref = manual(VENDOR_NVIDIA, 0xffff, 0, "Intel(R) Arc(TM) Graphics"); + let (i, src) = pick(&hybrid(), &pref, None).unwrap(); + assert_eq!(i, 0); + assert_eq!(src, PickSource::Preference); + } + + #[test] + fn empty_inventory_selects_nothing() { + assert!(pick(&[], &GpuPreference::default(), Some("nvidia")).is_none()); + } + + #[test] + fn ids_disambiguate_twins() { + let mut gpus = vec![ + gpu(VENDOR_NVIDIA, 0x2c05, "NVIDIA GeForce RTX 5070 Ti", 16), + gpu(VENDOR_NVIDIA, 0x2c05, "NVIDIA GeForce RTX 5070 Ti", 16), + ]; + assign_ids(&mut gpus); + assert_eq!(gpus[0].id, "10de-2c05-0"); + assert_eq!(gpus[1].id, "10de-2c05-1"); + } + + #[test] + fn store_round_trips_and_survives_corruption() { + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("gpu-settings.json"); + let store = GpuPrefStore::load_from(path.clone()); + assert_eq!(store.get(), GpuPreference::default()); + let pref = manual(VENDOR_INTEL, 0x7d55, 0, "Intel(R) Arc(TM) Graphics"); + store.set(pref.clone()).unwrap(); + assert_eq!(store.get(), pref); + // A fresh load sees the persisted value… + assert_eq!(GpuPrefStore::load_from(path.clone()).get(), pref); + // …and a corrupt file degrades to Auto instead of failing startup. + std::fs::write(&path, b"{ not json").unwrap(); + assert_eq!( + GpuPrefStore::load_from(path).get(), + GpuPreference::default() + ); + } + + #[test] + fn session_counter_tracks_begin_and_drop() { + // Serialize against other tests via the ACTIVE mutex being process-global: this is the + // only test touching it. + let a = session_begin(ActiveGpu { + id: "10de-2c05-0".into(), + name: "GPU A".into(), + vendor_id: VENDOR_NVIDIA, + backend: "nvenc", + }); + let (gpu0, n0) = active().unwrap(); + assert_eq!((gpu0.name.as_str(), n0), ("GPU A", 1)); + let b = session_begin(ActiveGpu { + id: "10de-2c05-0".into(), + name: "GPU A".into(), + vendor_id: VENDOR_NVIDIA, + backend: "nvenc", + }); + assert_eq!(active().unwrap().1, 2); + drop(a); + assert_eq!(active().unwrap().1, 1); + drop(b); + assert_eq!(active().unwrap().1, 0); // idle, last-used retained + } +} diff --git a/crates/punktfunk-host/src/main.rs b/crates/punktfunk-host/src/main.rs index 1c756fa..a036665 100644 --- a/crates/punktfunk-host/src/main.rs +++ b/crates/punktfunk-host/src/main.rs @@ -32,6 +32,7 @@ mod dmabuf_fence; mod drm_sync; mod encode; mod gamestream; +mod gpu; mod hdr; mod inject; #[cfg(target_os = "windows")] diff --git a/crates/punktfunk-host/src/mgmt.rs b/crates/punktfunk-host/src/mgmt.rs index 1258273..837373c 100644 --- a/crates/punktfunk-host/src/mgmt.rs +++ b/crates/punktfunk-host/src/mgmt.rs @@ -153,6 +153,8 @@ fn api_router_parts() -> (Router>, utoipa::openapi::OpenApi) { .routes(routes!(get_health)) .routes(routes!(get_host_info)) .routes(routes!(list_compositors)) + .routes(routes!(list_gpus)) + .routes(routes!(set_gpu_preference)) .routes(routes!(get_status)) .routes(routes!(list_paired_clients)) .routes(routes!(unpair_client)) @@ -204,6 +206,7 @@ pub fn openapi_json() -> String { modifiers(&SecurityAddon), tags( (name = "host", description = "Host identity, capabilities, and liveness"), + (name = "gpu", description = "GPU inventory and selection: list the host's GPUs, choose automatic or a preferred GPU, see the one in use"), (name = "clients", description = "Paired Moonlight client management"), (name = "pairing", description = "Pairing PIN delivery (the out-of-band half of the GameStream pairing handshake)"), (name = "native", description = "Native punktfunk/1 pairing: arm a window, display the host PIN, manage paired devices"), @@ -670,6 +673,238 @@ async fn list_compositors() -> Json> { ) } +/// One hardware GPU on the host (software/WARP adapters are never listed). +#[derive(Serialize, ToSchema)] +struct ApiGpu { + /// Stable identifier (`vendorid-deviceid-occurrence`, hex PCI ids) — pass to `setGpuPreference`. + /// Stable across reboots and driver updates, unlike an adapter index or LUID. + #[schema(example = "10de-2c05-0")] + id: String, + /// Adapter/marketing name. + #[schema(example = "NVIDIA GeForce RTX 5070 Ti")] + name: String, + /// `nvidia` | `amd` | `intel` | `other`. + vendor: String, + /// Dedicated VRAM in MiB (0 where the platform doesn't expose it). + vram_mb: u64, +} + +/// The GPU the **next** session's pipeline will be created on, and why. (A preference change +/// applies to the next session; a running session keeps the GPU it opened on.) +#[derive(Serialize, ToSchema)] +struct ApiSelectedGpu { + id: String, + name: String, + /// `nvidia` | `amd` | `intel` | `other`. + vendor: String, + /// Why this GPU was selected: `preference` (the manual choice), `env` + /// (`PUNKTFUNK_RENDER_ADAPTER`), `auto` (max dedicated VRAM / platform default), or + /// `preference_missing` (a manual choice is set but that GPU is absent — auto-selected + /// instead so the host keeps streaming). + source: String, +} + +/// The GPU live sessions are encoding on right now. +#[derive(Serialize, ToSchema)] +struct ApiActiveGpu { + /// Stable id matching an entry of `gpus` (empty for the CPU/software encoder). + id: String, + name: String, + /// `nvidia` | `amd` | `intel` | `other`. + vendor: String, + /// The encode backend in use (`nvenc` | `amf` | `qsv` | `vaapi` | `software`). + backend: String, + /// Number of live encode sessions on it. + sessions: u32, +} + +/// Full GPU-selection state for the console: inventory, the persisted preference, what the next +/// session will use, and what is in use right now. +#[derive(Serialize, ToSchema)] +struct GpuState { + /// The host's hardware GPUs. + gpus: Vec, + /// `auto` or `manual`. + mode: String, + /// The manually preferred GPU's stable id, when one is stored (kept while `mode` is `auto` so + /// a console can offer returning to it). May reference a GPU that is currently absent. + preferred_id: Option, + /// The stored name of the preferred GPU (a usable label even when it is absent). + preferred_name: Option, + /// Whether the preferred GPU is currently present. + preferred_available: bool, + /// `PUNKTFUNK_RENDER_ADAPTER` (the host.env pin), when set — it applies while `mode` is + /// `auto`; a manual preference overrides it. + env_override: Option, + /// The GPU the next session will use. + selected: Option, + /// The GPU live sessions use right now (absent while nothing is streaming). + active: Option, +} + +/// Request body for `setGpuPreference`. +#[derive(Deserialize, ToSchema)] +struct SetGpuPreference { + /// `auto` (env pin, else max dedicated VRAM — the default) or `manual`. + #[schema(example = "manual")] + mode: String, + /// Required when `mode` is `manual`: the stable `id` of a currently listed GPU + /// (see `listGpus`). + #[schema(example = "10de-2c05-0")] + gpu_id: Option, +} + +/// Build the [`GpuState`] snapshot (shared by the GET and the PUT's response). +fn gpu_state() -> GpuState { + let gpus = crate::gpu::enumerate(); + let pref = crate::gpu::prefs().get(); + let (preferred_id, preferred_name, preferred_available) = match &pref.gpu { + Some(want) => { + let found = crate::gpu::find_preferred(&gpus, want); + let id = match found { + // Canonical: the present GPU's id (identity may have matched loosely). + Some(i) => gpus[i].id.clone(), + None => format!( + "{:04x}-{:04x}-{}", + want.vendor_id, want.device_id, want.occurrence + ), + }; + let name = match found { + Some(i) => gpus[i].name.clone(), + None => want.name.clone(), + }; + (Some(id), Some(name), found.is_some()) + } + None => (None, None, false), + }; + let selected = crate::gpu::selected_gpu().map(|sel| ApiSelectedGpu { + vendor: sel.info.vendor_tag().into(), + id: sel.info.id, + name: sel.info.name, + source: sel.source.tag().into(), + }); + let active = crate::gpu::active().and_then(|(g, sessions)| { + (sessions > 0).then(|| ApiActiveGpu { + vendor: crate::gpu::vendor_tag(g.vendor_id).into(), + id: g.id, + name: g.name, + backend: g.backend.into(), + sessions, + }) + }); + GpuState { + gpus: gpus + .into_iter() + .map(|g| ApiGpu { + vendor: g.vendor_tag().into(), + vram_mb: g.vram_bytes / (1024 * 1024), + id: g.id, + name: g.name, + }) + .collect(), + mode: match pref.mode { + crate::gpu::GpuMode::Auto => "auto".into(), + crate::gpu::GpuMode::Manual => "manual".into(), + }, + preferred_id, + preferred_name, + preferred_available, + env_override: crate::config::config() + .render_adapter + .clone() + .filter(|s| !s.is_empty()), + selected, + active, + } +} + +/// GPU inventory and selection +/// +/// Lists the host's hardware GPUs, the persisted auto/manual preference, the GPU the next session +/// will use (and why), and the GPU live sessions encode on right now. +#[utoipa::path( + get, + path = "/gpus", + tag = "gpu", + operation_id = "listGpus", + responses( + (status = OK, description = "GPU inventory + selection state", body = GpuState), + (status = UNAUTHORIZED, description = "Missing or invalid bearer token", body = ApiError), + ) +)] +async fn list_gpus() -> Json { + Json(gpu_state()) +} + +/// Set the GPU preference +/// +/// `auto` restores automatic selection (`PUNKTFUNK_RENDER_ADAPTER` pin, else max dedicated VRAM); +/// `manual` pins capture + encode to the given GPU. Persisted across restarts; applies to the +/// **next** session (a running session keeps its GPU). If the preferred GPU is absent at session +/// start the host falls back to automatic selection rather than failing. +#[utoipa::path( + put, + path = "/gpus/preference", + tag = "gpu", + operation_id = "setGpuPreference", + request_body = SetGpuPreference, + responses( + (status = OK, description = "Preference stored; the new selection state", body = GpuState), + (status = BAD_REQUEST, description = "Unknown mode, or `gpu_id` missing / not a listed GPU", body = ApiError), + (status = INTERNAL_SERVER_ERROR, description = "Preference could not be persisted", body = ApiError), + (status = UNAUTHORIZED, description = "Missing or invalid bearer token", body = ApiError), + ) +)] +async fn set_gpu_preference(ApiJson(req): ApiJson) -> Response { + let pref = match req.mode.to_ascii_lowercase().as_str() { + "auto" => { + // Keep the stored manual pick so the console can offer switching back to it. + let mut p = crate::gpu::prefs().get(); + p.mode = crate::gpu::GpuMode::Auto; + p + } + "manual" => { + let Some(id) = req + .gpu_id + .as_deref() + .map(str::trim) + .filter(|s| !s.is_empty()) + else { + return api_error(StatusCode::BAD_REQUEST, "mode `manual` requires `gpu_id`"); + }; + let Some(g) = crate::gpu::enumerate().into_iter().find(|g| g.id == id) else { + return api_error( + StatusCode::BAD_REQUEST, + "gpu_id does not match a present GPU (see GET /gpus)", + ); + }; + crate::gpu::GpuPreference { + mode: crate::gpu::GpuMode::Manual, + gpu: Some(crate::gpu::PreferredGpu { + vendor_id: g.vendor_id, + device_id: g.device_id, + occurrence: g.occurrence, + name: g.name, + }), + } + } + other => { + return api_error( + StatusCode::BAD_REQUEST, + &format!("unknown mode {other:?} — use `auto` or `manual`"), + ) + } + }; + if let Err(e) = crate::gpu::prefs().set(pref) { + return api_error( + StatusCode::INTERNAL_SERVER_ERROR, + &format!("persist GPU preference: {e:#}"), + ); + } + tracing::info!(mode = %req.mode, gpu_id = ?req.gpu_id, "management API: GPU preference updated"); + Json(gpu_state()).into_response() +} + /// Live host status #[utoipa::path( get, @@ -2221,4 +2456,57 @@ mod tests { .await; assert_eq!(s, StatusCode::SERVICE_UNAVAILABLE); } + + fn put_json(path: &str, body: serde_json::Value) -> axum::http::Request { + axum::http::Request::put(path) + .header(axum::http::header::CONTENT_TYPE, "application/json") + .body(Body::from(body.to_string())) + .unwrap() + } + + /// The GPU endpoints: the inventory GET always answers (an empty list on a GPU-less box — + /// the schema is platform-independent), and the preference PUT validates mode + gpu_id + /// BEFORE touching the persisted store, so a bad request can never write. + #[tokio::test] + async fn gpu_endpoints_list_and_validate() { + let app = test_app(test_state(), None); + + let (s, b) = send(&app, get_req("/api/v1/gpus")).await; + assert_eq!(s, StatusCode::OK); + assert!(b["gpus"].is_array()); + assert!(b["mode"].is_string()); + + // Unknown mode → 400. + let (s, _) = send( + &app, + put_json( + "/api/v1/gpus/preference", + serde_json::json!({"mode": "fastest"}), + ), + ) + .await; + assert_eq!(s, StatusCode::BAD_REQUEST); + + // `manual` without a gpu_id → 400. + let (s, _) = send( + &app, + put_json( + "/api/v1/gpus/preference", + serde_json::json!({"mode": "manual"}), + ), + ) + .await; + assert_eq!(s, StatusCode::BAD_REQUEST); + + // `manual` with an id that is not a present GPU → 400 (the console only offers listed ids). + let (s, _) = send( + &app, + put_json( + "/api/v1/gpus/preference", + serde_json::json!({"mode": "manual", "gpu_id": "ffff-ffff-9"}), + ), + ) + .await; + assert_eq!(s, StatusCode::BAD_REQUEST); + } } diff --git a/crates/punktfunk-host/src/vdisplay/windows/manager.rs b/crates/punktfunk-host/src/vdisplay/windows/manager.rs index 018d55d..2581c5b 100644 --- a/crates/punktfunk-host/src/vdisplay/windows/manager.rs +++ b/crates/punktfunk-host/src/vdisplay/windows/manager.rs @@ -672,15 +672,14 @@ fn idd_push_mode() -> bool { /// on a hybrid box). `None` = let the IDD use its natural adapter (Apollo parity — avoids the cross-GPU /// ACCESS_LOST storm SudoVDA hit when pinned). fn resolve_render_pin() -> Option { - if crate::config::config().render_adapter.is_some() { - // SAFETY: `resolve_render_adapter_luid` is `unsafe` only for its DXGI factory FFI; it takes no - // arguments and returns an `Option` by value, so there is no input/borrow to keep valid. - unsafe { crate::win_adapter::resolve_render_adapter_luid() } + // A web-console manual GPU preference pins exactly like PUNKTFUNK_RENDER_ADAPTER: the whole + // pipeline (driver render device, capture ring, encoder) must sit on the chosen adapter. + let manual_pref = crate::gpu::prefs().get().mode == crate::gpu::GpuMode::Manual; + if crate::config::config().render_adapter.is_some() || manual_pref { + crate::win_adapter::resolve_render_adapter_luid() } else if crate::config::config().idd_push { tracing::info!("IDD push: pinning the discrete render GPU (SET_RENDER_ADAPTER)"); - // SAFETY: as above — `resolve_render_adapter_luid` takes no arguments and returns an - // `Option` by value; the `unsafe` covers only its DXGI factory enumeration FFI. - unsafe { crate::win_adapter::resolve_render_adapter_luid() } + crate::win_adapter::resolve_render_adapter_luid() } else { tracing::info!( "SET_RENDER_ADAPTER skipped (Apollo-parity: no render pin; set PUNKTFUNK_RENDER_ADAPTER= to force one)" diff --git a/crates/punktfunk-host/src/windows/win_adapter.rs b/crates/punktfunk-host/src/windows/win_adapter.rs index 912d1d3..72b3c75 100644 --- a/crates/punktfunk-host/src/windows/win_adapter.rs +++ b/crates/punktfunk-host/src/windows/win_adapter.rs @@ -5,58 +5,35 @@ //! (IDD-push) and the pf-vdisplay backend depend on it as a *peer* instead of reaching into the SudoVDA //! module — breaking that circular reach-in, which let the SudoVDA backend be dropped without losing this //! helper (audit §9 / Goal 2 — done). This is the plan's `windows/adapter.rs`. +//! +//! The selection logic itself now lives in [`crate::gpu`] (shared with the mgmt API's GPU +//! endpoints): **operator preference (web console) > `PUNKTFUNK_RENDER_ADAPTER` substring > max +//! `DedicatedVideoMemory`**, WARP/Basic-Render always excluded. This wrapper is the LUID-shaped +//! view of it, plus the per-decision logging (call sites are per-session, never per-frame). use windows::Win32::Foundation::LUID; -/// Pick the discrete render GPU LUID: the adapter with the most `DedicatedVideoMemory`, skipping -/// WARP / Basic-Render and the SudoVDA software adapter (≈0 VRAM). `PUNKTFUNK_RENDER_ADAPTER=` -/// forces a match by Description (Apollo's `adapter_name`). Used by the IDD direct-push capturer (to -/// create its shared textures on the same discrete GPU it pins, where NVENC runs) and SET_RENDER_ADAPTER. -/// -/// # Safety -/// Creates + enumerates a DXGI factory; the COM calls run in the caller's apartment (the existing callers -/// already satisfy this). -pub(crate) unsafe fn resolve_render_adapter_luid() -> Option { - use windows::Win32::Graphics::Dxgi::{CreateDXGIFactory1, IDXGIFactory1}; - let want = crate::config::config() - .render_adapter - .clone() - .filter(|s| !s.is_empty()); - let factory: IDXGIFactory1 = CreateDXGIFactory1().ok()?; - let mut best: Option<(LUID, u64, String)> = None; - let mut i = 0u32; - while let Ok(a) = factory.EnumAdapters1(i) { - i += 1; - let Ok(d) = a.GetDesc1() else { continue }; - let name = String::from_utf16_lossy(&d.Description); - let name = name.trim_end_matches('\u{0}').to_string(); - let lname = name.to_ascii_lowercase(); - if lname.contains("basic render") || lname.contains("warp") { - continue; // never pin to the software rasterizer - } - if let Some(w) = &want { - if lname.contains(&w.to_ascii_lowercase()) { - tracing::info!( - adapter = name, - "render adapter chosen by PUNKTFUNK_RENDER_ADAPTER" - ); - return Some(d.AdapterLuid); - } - continue; - } - let vram = d.DedicatedVideoMemory as u64; // SudoVDA software adapter ≈ 0 → loses to the dGPU - if best.as_ref().is_none_or(|(_, v, _)| vram > *v) { - best = Some((d.AdapterLuid, vram, name)); - } - } - match best { - Some((luid, vram, name)) => { +/// Pick the render GPU LUID the pipeline is created on: the IDD-push capturer's shared-texture +/// ring, the IddCx SET_RENDER_ADAPTER pin, and (via the captured frame's device) NVENC/AMF/QSV all +/// follow this one decision — see [`crate::gpu::selected_gpu`] for the precedence. A configured +/// preference that doesn't match a present GPU falls back to auto selection (with a warning) +/// rather than returning `None`, so a stale preference never stops the host from streaming. +pub(crate) fn resolve_render_adapter_luid() -> Option { + match crate::gpu::selected_gpu() { + Some(sel) => { tracing::info!( - adapter = name, - vram_mb = vram / (1024 * 1024), - "render adapter chosen (max VRAM)" + adapter = sel.info.name, + vram_mb = sel.info.vram_bytes / (1024 * 1024), + source = sel.source.tag(), + "render adapter selected" ); - Some(luid) + if sel.source == crate::gpu::PickSource::PreferenceMissing { + tracing::warn!( + "the preferred GPU is not present — auto-selected the adapter above \ + (fix or clear the preference in the web console)" + ); + } + Some(sel.info.luid()) } None => { tracing::warn!("no suitable render adapter found for SET_RENDER_ADAPTER"); diff --git a/web/messages/de.json b/web/messages/de.json index 71e771b..20448bd 100644 --- a/web/messages/de.json +++ b/web/messages/de.json @@ -37,6 +37,16 @@ "compositor_available": "Verfügbar", "compositor_unavailable": "Nicht verfügbar", "compositor_default": "Standard", + "host_gpus": "GPUs", + "host_gpus_help": "Die GPU, auf der der Host aufnimmt und encodiert. Automatisch wählt die beste GPU; eine bevorzugte GPU bindet Aufnahme + Encoding an sie. Eine Änderung gilt ab der nächsten Sitzung.", + "gpu_automatic": "Automatisch", + "gpu_prefer": "Bevorzugen", + "gpu_preferred": "Bevorzugt", + "gpu_in_use": "In Benutzung · {backend}", + "gpu_next_session": "Nächste Sitzung", + "gpu_none": "Keine GPUs erkannt.", + "gpu_missing_warning": "Die bevorzugte GPU „{name}“ ist nicht vorhanden — stattdessen wird automatisch gewählt.", + "gpu_env_note": "PUNKTFUNK_RENDER_ADAPTER={value} bindet die GPU im Automatikmodus.", "clients_title": "Gekoppelte Geräte", "clients_empty": "Noch keine gekoppelten Geräte.", "clients_name": "Name", diff --git a/web/messages/en.json b/web/messages/en.json index 04cfecd..398aedb 100644 --- a/web/messages/en.json +++ b/web/messages/en.json @@ -37,6 +37,16 @@ "compositor_available": "Available", "compositor_unavailable": "Unavailable", "compositor_default": "Default", + "host_gpus": "GPUs", + "host_gpus_help": "The GPU the host captures and encodes on. Automatic picks the best GPU; preferring one pins capture + encode to it. A change applies to the next session.", + "gpu_automatic": "Automatic", + "gpu_prefer": "Prefer", + "gpu_preferred": "Preferred", + "gpu_in_use": "In use · {backend}", + "gpu_next_session": "Next session", + "gpu_none": "No GPUs detected.", + "gpu_missing_warning": "The preferred GPU “{name}” is not present — automatic selection is used instead.", + "gpu_env_note": "PUNKTFUNK_RENDER_ADAPTER={value} pins the GPU while in automatic mode.", "clients_title": "Paired clients", "clients_empty": "No paired clients yet.", "clients_name": "Name", diff --git a/web/src/sections/Host/GpuCard.tsx b/web/src/sections/Host/GpuCard.tsx new file mode 100644 index 0000000..0bec8d7 --- /dev/null +++ b/web/src/sections/Host/GpuCard.tsx @@ -0,0 +1,146 @@ +import { useQueryClient } from "@tanstack/react-query"; +import { Button } from "@unom/ui/button"; +import type { FC } from "react"; +import { + getListGpusQueryKey, + useListGpus, + useSetGpuPreference, +} from "@/api/gen/gpu/gpu"; +import type { GpuState } from "@/api/gen/model"; +import { QueryState } from "@/components/query-state"; +import { Badge } from "@/components/ui/badge"; +import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card"; +import type { Loadable } from "@/lib/query"; +import { m } from "@/paraglide/messages"; + +/** + * Container: the host's GPU inventory + selection. Polls (a stream starting/stopping moves the + * "In use" badge; an eGPU can appear) and applies auto/preferred choices via the mgmt API. A + * preference applies to the NEXT session — the help text says so. + */ +export const GpuSection: FC = () => { + const qc = useQueryClient(); + const gpus = useListGpus({ query: { refetchInterval: 5_000 } }); + const setPref = useSetGpuPreference(); + + const apply = (mode: "auto" | "manual", gpuId?: string) => + setPref.mutate( + { data: { mode, gpu_id: gpuId ?? null } }, + { + onSuccess: () => + qc.invalidateQueries({ queryKey: getListGpusQueryKey() }), + }, + ); + + return ; +}; + +const fmtVram = (mb: number) => + mb >= 1024 ? `${Math.round(mb / 1024)} GiB` : `${mb} MiB`; + +/** + * GPU list in the compositors-card style: per-GPU badges for the manual pick ("Preferred"), what + * the next session will use ("Next session"), and what live sessions encode on right now + * ("In use · NVENC"), plus an Automatic/Prefer control pair. + */ +export const GpuCard: FC<{ + state: Loadable; + onApply: (mode: "auto" | "manual", gpuId?: string) => void; + busy: boolean; +}> = ({ state, onApply, busy }) => { + const s = state.data; + return ( + + + + {m.host_gpus()} + {s && s.gpus.length > 0 && ( + + )} + + + +

{m.host_gpus_help()}

+ + {s && + (s.gpus.length === 0 ? ( +

{m.gpu_none()}

+ ) : ( +
    + {s.gpus.map((g) => { + const isActive = s.active?.id === g.id; + const isSelected = s.selected?.id === g.id; + const isPreferred = + s.mode === "manual" && s.preferred_id === g.id; + return ( +
  • +
    +
    + {g.name} + {isPreferred && ( + + {m.gpu_preferred()} + + )} + {isActive && s.active ? ( + + {m.gpu_in_use({ + backend: s.active.backend.toUpperCase(), + })} + + ) : ( + isSelected && ( + + {m.gpu_next_session()} + + ) + )} +
    + + {g.vendor} + {g.vram_mb > 0 ? ` · ${fmtVram(g.vram_mb)}` : ""} + {` · ${g.id}`} + +
    + +
  • + ); + })} +
+ ))} + {s?.selected?.source === "preference_missing" && ( +

+ {m.gpu_missing_warning({ name: s.preferred_name ?? "?" })} +

+ )} + {s?.env_override && s.mode === "auto" && ( +

+ {m.gpu_env_note({ value: s.env_override })} +

+ )} +
+
+
+ ); +}; diff --git a/web/src/sections/Host/index.tsx b/web/src/sections/Host/index.tsx index 6d50b03..81c0d27 100644 --- a/web/src/sections/Host/index.tsx +++ b/web/src/sections/Host/index.tsx @@ -1,6 +1,7 @@ import type { FC } from "react"; import { useGetHostInfo, useListCompositors } from "@/api/gen/host/host"; import { useLocale } from "@/lib/i18n"; +import { GpuSection } from "./GpuCard"; import { HostView } from "./view"; export const SectionHost: FC = () => { @@ -8,5 +9,7 @@ export const SectionHost: FC = () => { const host = useGetHostInfo(); const compositors = useListCompositors(); - return ; + return ( + } /> + ); }; diff --git a/web/src/sections/Host/view.tsx b/web/src/sections/Host/view.tsx index 403546a..487ce41 100644 --- a/web/src/sections/Host/view.tsx +++ b/web/src/sections/Host/view.tsx @@ -1,5 +1,5 @@ import Section from "@unom/ui/section"; -import type { FC } from "react"; +import type { FC, ReactNode } from "react"; import type { AvailableCompositor } from "@/api/gen/model/availableCompositor"; import type { HostInfo } from "@/api/gen/model/hostInfo"; import { QueryState } from "@/components/query-state"; @@ -11,7 +11,9 @@ import { m } from "@/paraglide/messages"; export const HostView: FC<{ host: Loadable; compositors: Loadable; -}> = ({ host, compositors }) => { + /** The GPU inventory/selection card (a self-contained container — see `GpuCard.tsx`). */ + gpu?: ReactNode; +}> = ({ host, compositors, gpu }) => { const h = host.data; return (
@@ -77,6 +79,8 @@ export const HostView: FC<{ )} + {gpu} + {m.host_compositors()}