feat(host,web): multi-GPU selection — GPU inventory + preference API, web-console GPU card

- new crate::gpu (compiled on all platforms so the OpenAPI doc stays platform-independent): DXGI / sysfs GPU inventory with reboot-stable ids (PCI vendor:device + occurrence — LUIDs are per-boot), persisted auto/manual preference (<config>/gpu-settings.json, atomic temp+rename with in-memory rollback), one selection with precedence console preference > PUNKTFUNK_RENDER_ADAPTER > max VRAM and graceful fallback when the preferred GPU is absent, plus a live "in use" record (RAII session guard wrapped around every encoder open_video returns) - fix: windows_gpu_vendor derived the encoder backend from DXGI adapter 0 instead of the selected render adapter — on a hybrid box (e.g. Intel iGPU at index 0 + NVIDIA dGPU) the backend could disagree with the GPU the capture ring / IddCx render pin sit on. The NVENC 4:4:4 probe now also runs on the selected adapter (was: OS default), the codec/4:4:4 probe caches are keyed per selected GPU (were process-lifetime OnceLocks), and an explicit PUNKTFUNK_ENCODER conflicting with the selected GPU's vendor warns up front - mgmt API: GET /api/v1/gpus (inventory + mode + preferred + next-session selection with reason + in-use GPU/backend/session-count) and PUT /api/v1/gpus/preference (validates mode/gpu_id before writing); openapi.json regenerated; the vdisplay render pin now also engages for a console preference (not just the env pin) - web console: GPU card on the Host page — list with vendor + VRAM, Automatic / Prefer controls, Preferred / Next session / "In use · backend" badges, missing-preferred-GPU warning and env-pin note; en + de messages - Linux: a matched manual preference picks the VAAPI render node and the NVENC-vs-VAAPI auto choice; auto mode is exactly the previous behavior Validated live on the hybrid laptop (RTX 3500 Ada + Intel Arc Pro, which enumerates twice — the occurrence ids disambiguate): enumerate, prefer, bad-id 400, restart persistence, auto-restore keeping the stored pick. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-07-02 13:57:18 +02:00
parent 40fefd73ca
commit 019f2677a7
17 changed files with 1881 additions and 200 deletions
@@ -153,6 +153,8 @@ fn api_router_parts() -> (Router<Arc<MgmtState>>, utoipa::openapi::OpenApi) {
                .routes(routes!(get_health))
                .routes(routes!(get_host_info))
                .routes(routes!(list_compositors))
+                .routes(routes!(list_gpus))
+                .routes(routes!(set_gpu_preference))
                .routes(routes!(get_status))
                .routes(routes!(list_paired_clients))
                .routes(routes!(unpair_client))
@@ -204,6 +206,7 @@ pub fn openapi_json() -> String {
    modifiers(&SecurityAddon),
    tags(
        (name = "host", description = "Host identity, capabilities, and liveness"),
+        (name = "gpu", description = "GPU inventory and selection: list the host's GPUs, choose automatic or a preferred GPU, see the one in use"),
        (name = "clients", description = "Paired Moonlight client management"),
        (name = "pairing", description = "Pairing PIN delivery (the out-of-band half of the GameStream pairing handshake)"),
        (name = "native", description = "Native punktfunk/1 pairing: arm a window, display the host PIN, manage paired devices"),
@@ -670,6 +673,238 @@ async fn list_compositors() -> Json<Vec<AvailableCompositor>> {
    )
 }

+/// One hardware GPU on the host (software/WARP adapters are never listed).
+#[derive(Serialize, ToSchema)]
+struct ApiGpu {
+    /// Stable identifier (`vendorid-deviceid-occurrence`, hex PCI ids) — pass to `setGpuPreference`.
+    /// Stable across reboots and driver updates, unlike an adapter index or LUID.
+    #[schema(example = "10de-2c05-0")]
+    id: String,
+    /// Adapter/marketing name.
+    #[schema(example = "NVIDIA GeForce RTX 5070 Ti")]
+    name: String,
+    /// `nvidia` | `amd` | `intel` | `other`.
+    vendor: String,
+    /// Dedicated VRAM in MiB (0 where the platform doesn't expose it).
+    vram_mb: u64,
+}
+
+/// The GPU the **next** session's pipeline will be created on, and why. (A preference change
+/// applies to the next session; a running session keeps the GPU it opened on.)
+#[derive(Serialize, ToSchema)]
+struct ApiSelectedGpu {
+    id: String,
+    name: String,
+    /// `nvidia` | `amd` | `intel` | `other`.
+    vendor: String,
+    /// Why this GPU was selected: `preference` (the manual choice), `env`
+    /// (`PUNKTFUNK_RENDER_ADAPTER`), `auto` (max dedicated VRAM / platform default), or
+    /// `preference_missing` (a manual choice is set but that GPU is absent — auto-selected
+    /// instead so the host keeps streaming).
+    source: String,
+}
+
+/// The GPU live sessions are encoding on right now.
+#[derive(Serialize, ToSchema)]
+struct ApiActiveGpu {
+    /// Stable id matching an entry of `gpus` (empty for the CPU/software encoder).
+    id: String,
+    name: String,
+    /// `nvidia` | `amd` | `intel` | `other`.
+    vendor: String,
+    /// The encode backend in use (`nvenc` | `amf` | `qsv` | `vaapi` | `software`).
+    backend: String,
+    /// Number of live encode sessions on it.
+    sessions: u32,
+}
+
+/// Full GPU-selection state for the console: inventory, the persisted preference, what the next
+/// session will use, and what is in use right now.
+#[derive(Serialize, ToSchema)]
+struct GpuState {
+    /// The host's hardware GPUs.
+    gpus: Vec<ApiGpu>,
+    /// `auto` or `manual`.
+    mode: String,
+    /// The manually preferred GPU's stable id, when one is stored (kept while `mode` is `auto` so
+    /// a console can offer returning to it). May reference a GPU that is currently absent.
+    preferred_id: Option<String>,
+    /// The stored name of the preferred GPU (a usable label even when it is absent).
+    preferred_name: Option<String>,
+    /// Whether the preferred GPU is currently present.
+    preferred_available: bool,
+    /// `PUNKTFUNK_RENDER_ADAPTER` (the host.env pin), when set — it applies while `mode` is
+    /// `auto`; a manual preference overrides it.
+    env_override: Option<String>,
+    /// The GPU the next session will use.
+    selected: Option<ApiSelectedGpu>,
+    /// The GPU live sessions use right now (absent while nothing is streaming).
+    active: Option<ApiActiveGpu>,
+}
+
+/// Request body for `setGpuPreference`.
+#[derive(Deserialize, ToSchema)]
+struct SetGpuPreference {
+    /// `auto` (env pin, else max dedicated VRAM — the default) or `manual`.
+    #[schema(example = "manual")]
+    mode: String,
+    /// Required when `mode` is `manual`: the stable `id` of a currently listed GPU
+    /// (see `listGpus`).
+    #[schema(example = "10de-2c05-0")]
+    gpu_id: Option<String>,
+}
+
+/// Build the [`GpuState`] snapshot (shared by the GET and the PUT's response).
+fn gpu_state() -> GpuState {
+    let gpus = crate::gpu::enumerate();
+    let pref = crate::gpu::prefs().get();
+    let (preferred_id, preferred_name, preferred_available) = match &pref.gpu {
+        Some(want) => {
+            let found = crate::gpu::find_preferred(&gpus, want);
+            let id = match found {
+                // Canonical: the present GPU's id (identity may have matched loosely).
+                Some(i) => gpus[i].id.clone(),
+                None => format!(
+                    "{:04x}-{:04x}-{}",
+                    want.vendor_id, want.device_id, want.occurrence
+                ),
+            };
+            let name = match found {
+                Some(i) => gpus[i].name.clone(),
+                None => want.name.clone(),
+            };
+            (Some(id), Some(name), found.is_some())
+        }
+        None => (None, None, false),
+    };
+    let selected = crate::gpu::selected_gpu().map(|sel| ApiSelectedGpu {
+        vendor: sel.info.vendor_tag().into(),
+        id: sel.info.id,
+        name: sel.info.name,
+        source: sel.source.tag().into(),
+    });
+    let active = crate::gpu::active().and_then(|(g, sessions)| {
+        (sessions > 0).then(|| ApiActiveGpu {
+            vendor: crate::gpu::vendor_tag(g.vendor_id).into(),
+            id: g.id,
+            name: g.name,
+            backend: g.backend.into(),
+            sessions,
+        })
+    });
+    GpuState {
+        gpus: gpus
+            .into_iter()
+            .map(|g| ApiGpu {
+                vendor: g.vendor_tag().into(),
+                vram_mb: g.vram_bytes / (1024 * 1024),
+                id: g.id,
+                name: g.name,
+            })
+            .collect(),
+        mode: match pref.mode {
+            crate::gpu::GpuMode::Auto => "auto".into(),
+            crate::gpu::GpuMode::Manual => "manual".into(),
+        },
+        preferred_id,
+        preferred_name,
+        preferred_available,
+        env_override: crate::config::config()
+            .render_adapter
+            .clone()
+            .filter(|s| !s.is_empty()),
+        selected,
+        active,
+    }
+}
+
+/// GPU inventory and selection
+///
+/// Lists the host's hardware GPUs, the persisted auto/manual preference, the GPU the next session
+/// will use (and why), and the GPU live sessions encode on right now.
+#[utoipa::path(
+    get,
+    path = "/gpus",
+    tag = "gpu",
+    operation_id = "listGpus",
+    responses(
+        (status = OK, description = "GPU inventory + selection state", body = GpuState),
+        (status = UNAUTHORIZED, description = "Missing or invalid bearer token", body = ApiError),
+    )
+)]
+async fn list_gpus() -> Json<GpuState> {
+    Json(gpu_state())
+}
+
+/// Set the GPU preference
+///
+/// `auto` restores automatic selection (`PUNKTFUNK_RENDER_ADAPTER` pin, else max dedicated VRAM);
+/// `manual` pins capture + encode to the given GPU. Persisted across restarts; applies to the
+/// **next** session (a running session keeps its GPU). If the preferred GPU is absent at session
+/// start the host falls back to automatic selection rather than failing.
+#[utoipa::path(
+    put,
+    path = "/gpus/preference",
+    tag = "gpu",
+    operation_id = "setGpuPreference",
+    request_body = SetGpuPreference,
+    responses(
+        (status = OK, description = "Preference stored; the new selection state", body = GpuState),
+        (status = BAD_REQUEST, description = "Unknown mode, or `gpu_id` missing / not a listed GPU", body = ApiError),
+        (status = INTERNAL_SERVER_ERROR, description = "Preference could not be persisted", body = ApiError),
+        (status = UNAUTHORIZED, description = "Missing or invalid bearer token", body = ApiError),
+    )
+)]
+async fn set_gpu_preference(ApiJson(req): ApiJson<SetGpuPreference>) -> Response {
+    let pref = match req.mode.to_ascii_lowercase().as_str() {
+        "auto" => {
+            // Keep the stored manual pick so the console can offer switching back to it.
+            let mut p = crate::gpu::prefs().get();
+            p.mode = crate::gpu::GpuMode::Auto;
+            p
+        }
+        "manual" => {
+            let Some(id) = req
+                .gpu_id
+                .as_deref()
+                .map(str::trim)
+                .filter(|s| !s.is_empty())
+            else {
+                return api_error(StatusCode::BAD_REQUEST, "mode `manual` requires `gpu_id`");
+            };
+            let Some(g) = crate::gpu::enumerate().into_iter().find(|g| g.id == id) else {
+                return api_error(
+                    StatusCode::BAD_REQUEST,
+                    "gpu_id does not match a present GPU (see GET /gpus)",
+                );
+            };
+            crate::gpu::GpuPreference {
+                mode: crate::gpu::GpuMode::Manual,
+                gpu: Some(crate::gpu::PreferredGpu {
+                    vendor_id: g.vendor_id,
+                    device_id: g.device_id,
+                    occurrence: g.occurrence,
+                    name: g.name,
+                }),
+            }
+        }
+        other => {
+            return api_error(
+                StatusCode::BAD_REQUEST,
+                &format!("unknown mode {other:?} — use `auto` or `manual`"),
+            )
+        }
+    };
+    if let Err(e) = crate::gpu::prefs().set(pref) {
+        return api_error(
+            StatusCode::INTERNAL_SERVER_ERROR,
+            &format!("persist GPU preference: {e:#}"),
+        );
+    }
+    tracing::info!(mode = %req.mode, gpu_id = ?req.gpu_id, "management API: GPU preference updated");
+    Json(gpu_state()).into_response()
+}
+
 /// Live host status
 #[utoipa::path(
    get,
@@ -2221,4 +2456,57 @@ mod tests {
        .await;
        assert_eq!(s, StatusCode::SERVICE_UNAVAILABLE);
    }
+
+    fn put_json(path: &str, body: serde_json::Value) -> axum::http::Request<Body> {
+        axum::http::Request::put(path)
+            .header(axum::http::header::CONTENT_TYPE, "application/json")
+            .body(Body::from(body.to_string()))
+            .unwrap()
+    }
+
+    /// The GPU endpoints: the inventory GET always answers (an empty list on a GPU-less box —
+    /// the schema is platform-independent), and the preference PUT validates mode + gpu_id
+    /// BEFORE touching the persisted store, so a bad request can never write.
+    #[tokio::test]
+    async fn gpu_endpoints_list_and_validate() {
+        let app = test_app(test_state(), None);
+
+        let (s, b) = send(&app, get_req("/api/v1/gpus")).await;
+        assert_eq!(s, StatusCode::OK);
+        assert!(b["gpus"].is_array());
+        assert!(b["mode"].is_string());
+
+        // Unknown mode → 400.
+        let (s, _) = send(
+            &app,
+            put_json(
+                "/api/v1/gpus/preference",
+                serde_json::json!({"mode": "fastest"}),
+            ),
+        )
+        .await;
+        assert_eq!(s, StatusCode::BAD_REQUEST);
+
+        // `manual` without a gpu_id → 400.
+        let (s, _) = send(
+            &app,
+            put_json(
+                "/api/v1/gpus/preference",
+                serde_json::json!({"mode": "manual"}),
+            ),
+        )
+        .await;
+        assert_eq!(s, StatusCode::BAD_REQUEST);
+
+        // `manual` with an id that is not a present GPU → 400 (the console only offers listed ids).
+        let (s, _) = send(
+            &app,
+            put_json(
+                "/api/v1/gpus/preference",
+                serde_json::json!({"mode": "manual", "gpu_id": "ffff-ffff-9"}),
+            ),
+        )
+        .await;
+        assert_eq!(s, StatusCode::BAD_REQUEST);
+    }
 }