Compare commits
52 Commits
c87ca577a3
...
v0.2.1
| Author | SHA1 | Date | |
|---|---|---|---|
| 3947d5b07a | |||
| 238501597e | |||
| 04dd3e3a19 | |||
| 61aa1053e7 | |||
| 50e17b3508 | |||
| 94c556f0e3 | |||
| 32c1929948 | |||
| 3915a82780 | |||
| a4833e4780 | |||
| 4e79e6cdad | |||
| f74bc4a3f1 | |||
| 8e18d01af5 | |||
| 3477cbe7ce | |||
| 5a2e07e865 | |||
| 6e949b6748 | |||
| 8ae161fe61 | |||
| 3a89ee8cd7 | |||
| dac0fee4e3 | |||
| 125a51d81d | |||
| 7b99b41ede | |||
| 9ea2c17419 | |||
| a9cca82fb8 | |||
| 7ab0661ddc | |||
| 92e68024f1 | |||
| 64abce6daa | |||
| bdfab8e0d5 | |||
| 8e87e617df | |||
| 5bf787eb2b | |||
| 0a6c9d8852 | |||
| 0eedfb3c1f | |||
| f6490f4c28 | |||
| d01a8fd17a | |||
| 3e7c9bd059 | |||
| 7aa787a789 | |||
| 3514702d8c | |||
| 327a5fa828 | |||
| 9777ed7fb3 | |||
| ba68a98873 | |||
| 22359f5dc8 | |||
| 7e9023faad | |||
| 5acc12d9e9 | |||
| aed0bf0c2a | |||
| b65745284e | |||
| 8ca695eb4c | |||
| 61c02e695e | |||
| 203ad8069d | |||
| 5f8c6b6147 | |||
| cd3368fc71 | |||
| bd05bc8c30 | |||
| 658564353c | |||
| 6b3cbce120 | |||
| 739fa74e68 |
+2
-2
@@ -1,9 +1,9 @@
|
|||||||
# Root build context is used only by web/Dockerfile, which needs web/ and
|
# Root build context is used only by web/Dockerfile, which needs web/ and
|
||||||
# docs/api/openapi.json. Allowlist those; keep everything else (target/, .git, crates)
|
# api/openapi.json. Allowlist those; keep everything else (target/, .git, crates)
|
||||||
# out of the context upload.
|
# out of the context upload.
|
||||||
*
|
*
|
||||||
!web
|
!web
|
||||||
!docs/api/openapi.json
|
!api/openapi.json
|
||||||
web/node_modules
|
web/node_modules
|
||||||
web/.output
|
web/.output
|
||||||
web/dist
|
web/dist
|
||||||
|
|||||||
@@ -24,7 +24,7 @@ on:
|
|||||||
push:
|
push:
|
||||||
branches: [main]
|
branches: [main]
|
||||||
# The flatpak is the CLIENT — only rebuild when the client/core/manifest change, not on every
|
# The flatpak is the CLIENT — only rebuild when the client/core/manifest change, not on every
|
||||||
# docs/host push (this is a heavy flatpak-builder run). Tags (v*, the client release) build too.
|
# design/host push (this is a heavy flatpak-builder run). Tags (v*, the client release) build too.
|
||||||
paths:
|
paths:
|
||||||
- 'clients/linux/**'
|
- 'clients/linux/**'
|
||||||
- 'crates/punktfunk-core/**'
|
- 'crates/punktfunk-core/**'
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
# One-shot provisioning of the WDK + cargo-wdk onto the persistent self-hosted windows-amd64 runner, so
|
# One-shot provisioning of the WDK + cargo-wdk onto the persistent self-hosted windows-amd64 runner, so
|
||||||
# the all-Rust UMDF drivers can build there (docs/windows-host-rewrite.md, M0). The runner has the base
|
# the all-Rust UMDF drivers can build there (design/windows-host-rewrite.md, M0). The runner has the base
|
||||||
# Windows SDK + MSVC + LLVM + Rust but NOT the WDK (no km/wdf/iddcx headers) or cargo-wdk.
|
# Windows SDK + MSVC + LLVM + Rust but NOT the WDK (no km/wdf/iddcx headers) or cargo-wdk.
|
||||||
#
|
#
|
||||||
# Dispatch manually (workflow_dispatch). Idempotent: re-running is a near no-op once provisioned. The
|
# Dispatch manually (workflow_dispatch). Idempotent: re-running is a near no-op once provisioned. The
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
# Windows driver workspace CI — runs on the self-hosted Windows runner (home-windows-1, host mode;
|
# Windows driver workspace CI — runs on the self-hosted Windows runner (home-windows-1, host mode;
|
||||||
# label windows-amd64). Part of the Windows-host rewrite (docs/windows-host-rewrite.md, M0).
|
# label windows-amd64). Part of the Windows-host rewrite (design/windows-host-rewrite.md, M0).
|
||||||
#
|
#
|
||||||
# Stage 1 (this file): PROBE the runner's driver toolchain (WDK / EWDK / cargo-make / LLVM / the
|
# Stage 1 (this file): PROBE the runner's driver toolchain (WDK / EWDK / cargo-make / LLVM / the
|
||||||
# inf2cat/stampinf/devgen/signtool tools) so we know what's provisioned BEFORE writing driver code,
|
# inf2cat/stampinf/devgen/signtool tools) so we know what's provisioned BEFORE writing driver code,
|
||||||
@@ -76,7 +76,7 @@ jobs:
|
|||||||
head "EWDK"
|
head "EWDK"
|
||||||
Write-Host ("EWDKROOT = " + ($env:EWDKROOT ?? '<unset>'))
|
Write-Host ("EWDKROOT = " + ($env:EWDKROOT ?? '<unset>'))
|
||||||
|
|
||||||
head "LLVM / clang (README pins 21.1.2 for wdk-sys bindgen)"
|
head "LLVM / clang (bindgen 0.72 builds on the runner default clang)"
|
||||||
Write-Host ("LIBCLANG_PATH = " + ($env:LIBCLANG_PATH ?? '<unset>'))
|
Write-Host ("LIBCLANG_PATH = " + ($env:LIBCLANG_PATH ?? '<unset>'))
|
||||||
$clang = Get-Command clang -ErrorAction SilentlyContinue
|
$clang = Get-Command clang -ErrorAction SilentlyContinue
|
||||||
if ($clang) { & clang --version } else { Write-Host "clang: NOT on PATH" }
|
if ($clang) { & clang --version } else { Write-Host "clang: NOT on PATH" }
|
||||||
@@ -119,12 +119,12 @@ jobs:
|
|||||||
env:
|
env:
|
||||||
# wdk-build otherwise picks 10.0.28000.0 (no km/crt) and bindgen fails — pin the WDK SDK version.
|
# wdk-build otherwise picks 10.0.28000.0 (no km/crt) and bindgen fails — pin the WDK SDK version.
|
||||||
Version_Number: '10.0.26100.0'
|
Version_Number: '10.0.26100.0'
|
||||||
# wdk-sys bindgen layout tests overflow (E0080) on the runner's default LLVM (ToT/22-dev); point at
|
# No LIBCLANG_PATH pin: the vendored bindgen 0.72 builds clean on the runner's default clang 22
|
||||||
# the pinned LLVM 21.1.2 that windows-drivers-rs builds clean against (provisioned to C:\llvm-21).
|
# (the shipping pack proves it). A 0.71-era layout-test overflow once needed LLVM 21; the 0.72 bump
|
||||||
LIBCLANG_PATH: 'C:\llvm-21\bin'
|
# retired that — see design/windows-build-and-packaging.md.
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
- name: Ensure WDK + cargo-wdk + LLVM 21.1.2 (idempotent self-provision)
|
- name: Ensure WDK + cargo-wdk (idempotent self-provision)
|
||||||
# Run the provisioning script here too so driver-build is self-sufficient and never races a
|
# Run the provisioning script here too so driver-build is self-sufficient and never races a
|
||||||
# separate provision run on the single runner. Path is relative to the job working-directory
|
# separate provision run on the single runner. Path is relative to the job working-directory
|
||||||
# (packaging/windows/drivers). Near-noop once the toolchain is present.
|
# (packaging/windows/drivers). Near-noop once the toolchain is present.
|
||||||
|
|||||||
@@ -56,6 +56,22 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Locale-safety gate (installer-run scripts must be ASCII)
|
||||||
|
shell: pwsh
|
||||||
|
# The installer runs these via powershell.exe (Windows PowerShell 5.1) and cmd.exe on the END
|
||||||
|
# USER's box. PS 5.1 reads a BOM-less script in the active ANSI codepage, so on a non-UTF-8 locale
|
||||||
|
# (e.g. German Windows-1252) a stray em-dash mis-decodes into a curly quote and the script aborts
|
||||||
|
# with "unterminated string" - exactly how the pf-vdisplay driver install silently failed in the
|
||||||
|
# field. Keep every installer-run script pure ASCII (matches install-gamepad-drivers.ps1).
|
||||||
|
run: |
|
||||||
|
$bad = Get-ChildItem packaging/windows/*.ps1, scripts/windows/*.ps1, scripts/windows/*.cmd -ErrorAction SilentlyContinue |
|
||||||
|
Where-Object { [IO.File]::ReadAllText($_.FullName) -match '[^\x00-\x7F]' }
|
||||||
|
if ($bad) {
|
||||||
|
$bad.FullName | ForEach-Object { Write-Output "::error::non-ASCII in installer-run script: $_" }
|
||||||
|
throw "installer-run scripts must be pure ASCII (PS 5.1 mis-parses them on non-UTF-8 locales)"
|
||||||
|
}
|
||||||
|
Write-Output "installer-run scripts are ASCII-clean"
|
||||||
|
|
||||||
- name: Configure + version
|
- name: Configure + version
|
||||||
shell: pwsh
|
shell: pwsh
|
||||||
run: |
|
run: |
|
||||||
@@ -96,6 +112,18 @@ jobs:
|
|||||||
# First-ever Windows lint coverage for the host (Linux CI never lints the windows-cfg code).
|
# First-ever Windows lint coverage for the host (Linux CI never lints the windows-cfg code).
|
||||||
run: cargo clippy -p punktfunk-host --features nvenc,amf-qsv -- -D warnings
|
run: cargo clippy -p punktfunk-host --features nvenc,amf-qsv -- -D warnings
|
||||||
|
|
||||||
|
- name: Build + lint the HDR Vulkan layer (pf-vkhdr-layer)
|
||||||
|
shell: pwsh
|
||||||
|
# Standalone cdylib (own [workspace]) the installer bundles + registers (it lets Vulkan games
|
||||||
|
# like Doom use HDR on the virtual display). Lint here so a regression fails CI instead of
|
||||||
|
# silently shipping the host without the layer (pack-host-installer.ps1 builds it non-fatally).
|
||||||
|
# Windows-only FFI (user32 + the vk_layer loader glue) → can't be linted on the Linux CI.
|
||||||
|
run: |
|
||||||
|
Push-Location packaging/windows/pf-vkhdr-layer
|
||||||
|
cargo fmt --check; if ($LASTEXITCODE) { throw "pf-vkhdr-layer rustfmt" }
|
||||||
|
cargo clippy --release -- -D warnings; if ($LASTEXITCODE) { throw "pf-vkhdr-layer clippy" }
|
||||||
|
Pop-Location
|
||||||
|
|
||||||
- name: Ensure Inno Setup
|
- name: Ensure Inno Setup
|
||||||
shell: pwsh
|
shell: pwsh
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
Low-latency desktop/game streaming stack, Linux-first, with a shared Rust protocol core
|
Low-latency desktop/game streaming stack, Linux-first, with a shared Rust protocol core
|
||||||
(`punktfunk-core`) exposed over a C ABI and native clients per platform. Full design:
|
(`punktfunk-core`) exposed over a C ABI and native clients per platform. Full design:
|
||||||
[`docs/implementation-plan.md`](docs/implementation-plan.md). Status table: `README.md`.
|
[`design/implementation-plan.md`](design/implementation-plan.md). Status table: `README.md`.
|
||||||
|
|
||||||
## Where the work stands
|
## Where the work stands
|
||||||
|
|
||||||
@@ -27,7 +27,15 @@ Low-latency desktop/game streaming stack, Linux-first, with a shared Rust protoc
|
|||||||
Input: mouse/keyboard (libei via RemoteDesktop portal on KWin/GNOME, gamescope's own EIS
|
Input: mouse/keyboard (libei via RemoteDesktop portal on KWin/GNOME, gamescope's own EIS
|
||||||
socket, wlr protocols on Sway) and **gamepads** (uinput X-Box-360 pads + rumble
|
socket, wlr protocols on Sway) and **gamepads** (uinput X-Box-360 pads + rumble
|
||||||
back-channel; validated live — pad created/destroyed with the session). Management REST API +
|
back-channel; validated live — pad created/destroyed with the session). Management REST API +
|
||||||
checked-in OpenAPI doc (`mgmt.rs`).
|
checked-in OpenAPI doc (`mgmt.rs`). **Web-console performance capture** (`stats_recorder.rs`,
|
||||||
|
design: [`design/stats-capture-plan.md`](design/stats-capture-plan.md)): the operator arms stats
|
||||||
|
recording from the web console, plays, stops, and reviews the run as graphs (per-stage latency
|
||||||
|
breakdown · fps new/repeat · goodput · loss/FEC). A shared `Arc<StatsRecorder>` ring (the hot-path
|
||||||
|
gate is a runtime `AtomicBool`, replacing the startup-only `PUNKTFUNK_PERF`) is fed by **both** the
|
||||||
|
native `virtual_stream` and the GameStream encode loop at their existing ~2 s/~1 s aggregation
|
||||||
|
boundary, and finished captures are saved as on-disk recordings
|
||||||
|
(`~/.config/punktfunk/captures/*.json`) browsable/exportable from the console's **Performance** page
|
||||||
|
(recharts). Endpoints `/api/v1/stats/*` (bearer-only). *Implemented; not yet on-glass validated.*
|
||||||
- **Native protocol (`punktfunk/1`): full session planes, validated live.** QUIC
|
- **Native protocol (`punktfunk/1`): full session planes, validated live.** QUIC
|
||||||
control plane (`punktfunk-core` `quic` feature: Hello{mode}/Welcome{full Config}/Start), data
|
control plane (`punktfunk-core` `quic` feature: Hello{mode}/Welcome{full Config}/Start), data
|
||||||
plane = the hardened core `Session` over raw UDP with **GF(2¹⁶) Leopard FEC + AES-GCM**
|
plane = the hardened core `Session` over raw UDP with **GF(2¹⁶) Leopard FEC + AES-GCM**
|
||||||
@@ -104,9 +112,16 @@ Low-latency desktop/game streaming stack, Linux-first, with a shared Rust protoc
|
|||||||
captures the HDR desktop as FP16/Rgb10a2 (DDA FP16 for the secure desktop), the encoder forces HEVC
|
captures the HDR desktop as FP16/Rgb10a2 (DDA FP16 for the secure desktop), the encoder forces HEVC
|
||||||
Main10 + BT.2020 PQ (NVENC ABGR10/P010; AMF/QSV P010 + a swscale Rgb10a2→P010 fallback), the client
|
Main10 + BT.2020 PQ (NVENC ABGR10/P010; AMF/QSV P010 + a swscale Rgb10a2→P010 fallback), the client
|
||||||
auto-detects PQ from the HEVC VUI — gated by `PUNKTFUNK_10BIT` + client `VIDEO_CAP_10BIT`; **Windows
|
auto-detects PQ from the HEVC VUI — gated by `PUNKTFUNK_10BIT` + client `VIDEO_CAP_10BIT`; **Windows
|
||||||
host only** (the Linux host stays 8-bit, blocked upstream). **AMF/QSV is CI-green but not yet
|
host only** (the Linux host stays 8-bit, blocked upstream). **Vulkan-game HDR over the virtual
|
||||||
on-glass validated** (no AMD/Intel Windows box in the lab); NVENC is live-validated. Newer/less
|
display**: NVIDIA/AMD Vulkan ICDs refuse to *advertise* an HDR color space for a surface on an IddCx
|
||||||
battle-tested than the Linux host. Packaging: `packaging/windows/`.
|
indirect display (so Vulkan games — Doom: The Dark Ages, id Tech, etc. — say "device does not support
|
||||||
|
HDR"), even though the ICD happily *accepts + presents* a forced HDR swapchain there. A tiny always-on
|
||||||
|
Vulkan **implicit layer** (`packaging/windows/pf-vkhdr-layer/`, `VK_LAYER_PUNKTFUNK_hdr_inject`)
|
||||||
|
injects the `HDR10_ST2084`/scRGB surface formats into `vkGetPhysicalDeviceSurfaceFormats[2]KHR`,
|
||||||
|
self-gated on the display's actual advanced-color state (no-op on SDR / real monitors); bundled +
|
||||||
|
HKLM-registered by the installer. **Live-validated: Doom: The Dark Ages enables HDR over the virtual
|
||||||
|
display.** **AMF/QSV is CI-green but not yet on-glass validated** (no AMD/Intel Windows box in the
|
||||||
|
lab); NVENC is live-validated. Newer/less battle-tested than the Linux host. Packaging: `packaging/windows/`.
|
||||||
|
|
||||||
## What's left
|
## What's left
|
||||||
|
|
||||||
@@ -245,8 +260,8 @@ bash crates/punktfunk-core/tests/c/run.sh # standalone C-ABI link + round-trip
|
|||||||
```
|
```
|
||||||
|
|
||||||
Generated artifacts are **checked in** and CI fails on drift: `include/punktfunk_core.h`
|
Generated artifacts are **checked in** and CI fails on drift: `include/punktfunk_core.h`
|
||||||
(cbindgen from `punktfunk-core/src/abi.rs`) and `docs/api/openapi.json` (regenerate with
|
(cbindgen from `punktfunk-core/src/abi.rs`) and `api/openapi.json` (regenerate with
|
||||||
`cargo run -p punktfunk-host -- openapi > docs/api/openapi.json`; spec lives in `mgmt.rs`).
|
`cargo run -p punktfunk-host -- openapi > api/openapi.json`; spec lives in `mgmt.rs`).
|
||||||
|
|
||||||
CI is Gitea Actions (`.gitea/workflows/`, guide: docs-site `ci.md`): `ci.yml` runs the
|
CI is Gitea Actions (`.gitea/workflows/`, guide: docs-site `ci.md`): `ci.yml` runs the
|
||||||
workspace checks inside the `git.unom.io/unom/punktfunk-rust-ci` image plus web/docs-site
|
workspace checks inside the `git.unom.io/unom/punktfunk-rust-ci` image plus web/docs-site
|
||||||
@@ -268,7 +283,7 @@ crates/punktfunk-host/
|
|||||||
zerocopy/{egl,cuda,vulkan}.rs dmabuf → CUDA → NVENC (tiled via EGL/GL, LINEAR via Vulkan)
|
zerocopy/{egl,cuda,vulkan}.rs dmabuf → CUDA → NVENC (tiled via EGL/GL, LINEAR via Vulkan)
|
||||||
inject/{libei,wlr,gamepad,dualsense}.rs input backends (uinput xpad + UHID DualSense)
|
inject/{libei,wlr,gamepad,dualsense}.rs input backends (uinput xpad + UHID DualSense)
|
||||||
encode/{nvenc,linux,vaapi,ffmpeg_win,sw}.rs per-GPU encoders (NVENC · Linux NVENC/CUDA · VAAPI · AMF/QSV · openh264)
|
encode/{nvenc,linux,vaapi,ffmpeg_win,sw}.rs per-GPU encoders (NVENC · Linux NVENC/CUDA · VAAPI · AMF/QSV · openh264)
|
||||||
capture.rs · encode.rs · audio.rs · spike.rs · punktfunk1.rs · mgmt.rs · native_pairing.rs
|
capture.rs · encode.rs · audio.rs · spike.rs · punktfunk1.rs · mgmt.rs · native_pairing.rs · stats_recorder.rs
|
||||||
clients/probe/ punktfunk/1 reference/probe client (headless test/measurement tool)
|
clients/probe/ punktfunk/1 reference/probe client (headless test/measurement tool)
|
||||||
clients/linux/ native Linux client (GTK4/libadwaita · FFmpeg · PipeWire · SDL3)
|
clients/linux/ native Linux client (GTK4/libadwaita · FFmpeg · PipeWire · SDL3)
|
||||||
clients/windows/ native Windows client (WinUI 3 via windows-reactor · D3D11 · WASAPI · SDL3)
|
clients/windows/ native Windows client (WinUI 3 via windows-reactor · D3D11 · WASAPI · SDL3)
|
||||||
@@ -276,7 +291,7 @@ clients/apple/ native macOS/iOS/tvOS client (Swift · VideoToolbox · GameCon
|
|||||||
clients/android/ native Android client (Kotlin app + native/ Rust JNI core over punktfunk-core)
|
clients/android/ native Android client (Kotlin app + native/ Rust JNI core over punktfunk-core)
|
||||||
clients/decky/ Steam Deck Decky plugin
|
clients/decky/ Steam Deck Decky plugin
|
||||||
crates/punktfunk-host/src/{capture/dxgi,vdisplay/sudovda,encode/ffmpeg_win,inject/gamepad_windows,audio/wasapi_*,service}.rs Windows host backends
|
crates/punktfunk-host/src/{capture/dxgi,vdisplay/sudovda,encode/ffmpeg_win,inject/gamepad_windows,audio/wasapi_*,service}.rs Windows host backends
|
||||||
web/ TanStack web console over the mgmt API (status · devices · pairing)
|
web/ TanStack web console over the mgmt API (status · devices · pairing · performance graphs)
|
||||||
packaging/ apt(deb) · RPM/COPR · Arch/sysext · Flatpak · Bazzite bootc · Windows host installer (per-dir READMEs)
|
packaging/ apt(deb) · RPM/COPR · Arch/sysext · Flatpak · Bazzite bootc · Windows host installer (per-dir READMEs)
|
||||||
tools/{loss-harness,latency-probe}/ measurement (plan §10)
|
tools/{loss-harness,latency-probe}/ measurement (plan §10)
|
||||||
scripts/ 60-punktfunk.rules · punktfunk-host.service · host.env.example · headless/
|
scripts/ 60-punktfunk.rules · punktfunk-host.service · host.env.example · headless/
|
||||||
|
|||||||
Generated
+422
-1
@@ -2,6 +2,12 @@
|
|||||||
# It is not intended for manual editing.
|
# It is not intended for manual editing.
|
||||||
version = 3
|
version = 3
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "adler2"
|
||||||
|
version = "2.0.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "aead"
|
name = "aead"
|
||||||
version = "0.5.2"
|
version = "0.5.2"
|
||||||
@@ -735,6 +741,15 @@ dependencies = [
|
|||||||
"libc",
|
"libc",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "crc32fast"
|
||||||
|
version = "1.5.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "criterion"
|
name = "criterion"
|
||||||
version = "0.5.1"
|
version = "0.5.1"
|
||||||
@@ -1010,6 +1025,18 @@ dependencies = [
|
|||||||
"pin-project-lite",
|
"pin-project-lite",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "fallible-iterator"
|
||||||
|
version = "0.3.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "fallible-streaming-iterator"
|
||||||
|
version = "0.1.9"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "fastbloom"
|
name = "fastbloom"
|
||||||
version = "0.14.1"
|
version = "0.14.1"
|
||||||
@@ -1088,6 +1115,16 @@ version = "0.5.7"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99"
|
checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "flate2"
|
||||||
|
version = "1.1.9"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c"
|
||||||
|
dependencies = [
|
||||||
|
"crc32fast",
|
||||||
|
"miniz_oxide",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "flume"
|
name = "flume"
|
||||||
version = "0.11.1"
|
version = "0.11.1"
|
||||||
@@ -1111,6 +1148,12 @@ version = "0.1.5"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
|
checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "foldhash"
|
||||||
|
version = "0.2.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "form_urlencoded"
|
name = "form_urlencoded"
|
||||||
version = "1.2.2"
|
version = "1.2.2"
|
||||||
@@ -1586,7 +1629,16 @@ version = "0.15.5"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
|
checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"foldhash",
|
"foldhash 0.1.5",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "hashbrown"
|
||||||
|
version = "0.16.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100"
|
||||||
|
dependencies = [
|
||||||
|
"foldhash 0.2.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -1594,6 +1646,18 @@ name = "hashbrown"
|
|||||||
version = "0.17.1"
|
version = "0.17.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a"
|
checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a"
|
||||||
|
dependencies = [
|
||||||
|
"foldhash 0.2.0",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "hashlink"
|
||||||
|
version = "0.12.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a5081f264ed7adee96ea4b4778b6bb9da0a7228b084587aa3bd3ff05da7c5a3b"
|
||||||
|
dependencies = [
|
||||||
|
"hashbrown 0.17.1",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "heck"
|
name = "heck"
|
||||||
@@ -1712,12 +1776,115 @@ dependencies = [
|
|||||||
"tower-service",
|
"tower-service",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "icu_collections"
|
||||||
|
version = "2.2.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "2984d1cd16c883d7935b9e07e44071dca8d917fd52ecc02c04d5fa0b5a3f191c"
|
||||||
|
dependencies = [
|
||||||
|
"displaydoc",
|
||||||
|
"potential_utf",
|
||||||
|
"utf8_iter",
|
||||||
|
"yoke",
|
||||||
|
"zerofrom",
|
||||||
|
"zerovec",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "icu_locale_core"
|
||||||
|
version = "2.2.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "92219b62b3e2b4d88ac5119f8904c10f8f61bf7e95b640d25ba3075e6cac2c29"
|
||||||
|
dependencies = [
|
||||||
|
"displaydoc",
|
||||||
|
"litemap",
|
||||||
|
"tinystr",
|
||||||
|
"writeable",
|
||||||
|
"zerovec",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "icu_normalizer"
|
||||||
|
version = "2.2.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c56e5ee99d6e3d33bd91c5d85458b6005a22140021cc324cea84dd0e72cff3b4"
|
||||||
|
dependencies = [
|
||||||
|
"icu_collections",
|
||||||
|
"icu_normalizer_data",
|
||||||
|
"icu_properties",
|
||||||
|
"icu_provider",
|
||||||
|
"smallvec",
|
||||||
|
"zerovec",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "icu_normalizer_data"
|
||||||
|
version = "2.2.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "da3be0ae77ea334f4da67c12f149704f19f81d1adf7c51cf482943e84a2bad38"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "icu_properties"
|
||||||
|
version = "2.2.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "bee3b67d0ea5c2cca5003417989af8996f8604e34fb9ddf96208a033901e70de"
|
||||||
|
dependencies = [
|
||||||
|
"icu_collections",
|
||||||
|
"icu_locale_core",
|
||||||
|
"icu_properties_data",
|
||||||
|
"icu_provider",
|
||||||
|
"zerotrie",
|
||||||
|
"zerovec",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "icu_properties_data"
|
||||||
|
version = "2.2.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8e2bbb201e0c04f7b4b3e14382af113e17ba4f63e2c9d2ee626b720cbce54a14"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "icu_provider"
|
||||||
|
version = "2.2.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "139c4cf31c8b5f33d7e199446eff9c1e02decfc2f0eec2c8d71f65befa45b421"
|
||||||
|
dependencies = [
|
||||||
|
"displaydoc",
|
||||||
|
"icu_locale_core",
|
||||||
|
"writeable",
|
||||||
|
"yoke",
|
||||||
|
"zerofrom",
|
||||||
|
"zerotrie",
|
||||||
|
"zerovec",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "id-arena"
|
name = "id-arena"
|
||||||
version = "2.3.0"
|
version = "2.3.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954"
|
checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "idna"
|
||||||
|
version = "1.1.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de"
|
||||||
|
dependencies = [
|
||||||
|
"idna_adapter",
|
||||||
|
"smallvec",
|
||||||
|
"utf8_iter",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "idna_adapter"
|
||||||
|
version = "1.2.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "cb68373c0d6620ef8105e855e7745e18b0d00d3bdb07fb532e434244cdb9a714"
|
||||||
|
dependencies = [
|
||||||
|
"icu_normalizer",
|
||||||
|
"icu_properties",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "if-addrs"
|
name = "if-addrs"
|
||||||
version = "0.15.0"
|
version = "0.15.0"
|
||||||
@@ -1966,12 +2133,29 @@ dependencies = [
|
|||||||
"system-deps",
|
"system-deps",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "libsqlite3-sys"
|
||||||
|
version = "0.38.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f6c19a05435c21ac299d71b6a9c13db3e3f47c520517d58990a462a1397a61db"
|
||||||
|
dependencies = [
|
||||||
|
"cc",
|
||||||
|
"pkg-config",
|
||||||
|
"vcpkg",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "linux-raw-sys"
|
name = "linux-raw-sys"
|
||||||
version = "0.12.1"
|
version = "0.12.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53"
|
checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "litemap"
|
||||||
|
version = "0.8.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "92daf443525c4cce67b150400bc2316076100ce0b3686209eb8cf3c31612e6f0"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lock_api"
|
name = "lock_api"
|
||||||
version = "0.4.14"
|
version = "0.4.14"
|
||||||
@@ -2066,6 +2250,16 @@ version = "0.2.1"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
|
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "miniz_oxide"
|
||||||
|
version = "0.8.9"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316"
|
||||||
|
dependencies = [
|
||||||
|
"adler2",
|
||||||
|
"simd-adler32",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "mio"
|
name = "mio"
|
||||||
version = "1.2.1"
|
version = "1.2.1"
|
||||||
@@ -2498,6 +2692,15 @@ dependencies = [
|
|||||||
"universal-hash",
|
"universal-hash",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "potential_utf"
|
||||||
|
version = "0.1.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "0103b1cef7ec0cf76490e969665504990193874ea05c85ff9bab8b911d0a0564"
|
||||||
|
dependencies = [
|
||||||
|
"zerovec",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "powerfmt"
|
name = "powerfmt"
|
||||||
version = "0.2.0"
|
version = "0.2.0"
|
||||||
@@ -2655,6 +2858,7 @@ dependencies = [
|
|||||||
"audiopus_sys",
|
"audiopus_sys",
|
||||||
"axum",
|
"axum",
|
||||||
"axum-server",
|
"axum-server",
|
||||||
|
"base64",
|
||||||
"bytemuck",
|
"bytemuck",
|
||||||
"cbc",
|
"cbc",
|
||||||
"ffmpeg-next",
|
"ffmpeg-next",
|
||||||
@@ -2677,7 +2881,9 @@ dependencies = [
|
|||||||
"rand 0.8.6",
|
"rand 0.8.6",
|
||||||
"rcgen",
|
"rcgen",
|
||||||
"reis",
|
"reis",
|
||||||
|
"roxmltree",
|
||||||
"rsa",
|
"rsa",
|
||||||
|
"rusqlite",
|
||||||
"rustls",
|
"rustls",
|
||||||
"rustls-pemfile",
|
"rustls-pemfile",
|
||||||
"rusty_enet",
|
"rusty_enet",
|
||||||
@@ -2689,6 +2895,7 @@ dependencies = [
|
|||||||
"tower",
|
"tower",
|
||||||
"tracing",
|
"tracing",
|
||||||
"tracing-subscriber",
|
"tracing-subscriber",
|
||||||
|
"ureq",
|
||||||
"utoipa",
|
"utoipa",
|
||||||
"utoipa-axum",
|
"utoipa-axum",
|
||||||
"utoipa-scalar",
|
"utoipa-scalar",
|
||||||
@@ -2700,6 +2907,7 @@ dependencies = [
|
|||||||
"wayland-scanner",
|
"wayland-scanner",
|
||||||
"windows 0.62.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
"windows 0.62.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"windows-service",
|
"windows-service",
|
||||||
|
"winreg",
|
||||||
"x509-parser",
|
"x509-parser",
|
||||||
"xkbcommon",
|
"xkbcommon",
|
||||||
]
|
]
|
||||||
@@ -3002,6 +3210,15 @@ dependencies = [
|
|||||||
"windows-sys 0.52.0",
|
"windows-sys 0.52.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "roxmltree"
|
||||||
|
version = "0.21.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f1964b10c76125c36f8afe190065a4bf9a87bf324842c05701330bba9f1cacbb"
|
||||||
|
dependencies = [
|
||||||
|
"memchr",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "rpkg-config"
|
name = "rpkg-config"
|
||||||
version = "0.1.2"
|
version = "0.1.2"
|
||||||
@@ -3028,6 +3245,31 @@ dependencies = [
|
|||||||
"zeroize",
|
"zeroize",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rsqlite-vfs"
|
||||||
|
version = "0.1.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c51c9ae4df8a7fba42103df5c621fa3c37eccf3a3c650879e90fc48b11cc192c"
|
||||||
|
dependencies = [
|
||||||
|
"hashbrown 0.16.1",
|
||||||
|
"thiserror 2.0.18",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rusqlite"
|
||||||
|
version = "0.40.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "11438310b19e3109b6446c33d1ed5e889428cf2e278407bc7896bc4aaea43323"
|
||||||
|
dependencies = [
|
||||||
|
"bitflags",
|
||||||
|
"fallible-iterator",
|
||||||
|
"fallible-streaming-iterator",
|
||||||
|
"hashlink",
|
||||||
|
"libsqlite3-sys",
|
||||||
|
"smallvec",
|
||||||
|
"sqlite-wasm-rs",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "rustc-hash"
|
name = "rustc-hash"
|
||||||
version = "2.1.2"
|
version = "2.1.2"
|
||||||
@@ -3478,6 +3720,12 @@ dependencies = [
|
|||||||
"rand_core 0.6.4",
|
"rand_core 0.6.4",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "simd-adler32"
|
||||||
|
version = "0.3.9"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "703d5c7ef118737c72f1af64ad2f6f8c5e1921f818cdcb97b8fe6fc69bf66214"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "siphasher"
|
name = "siphasher"
|
||||||
version = "1.0.3"
|
version = "1.0.3"
|
||||||
@@ -3548,6 +3796,24 @@ dependencies = [
|
|||||||
"der",
|
"der",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "sqlite-wasm-rs"
|
||||||
|
version = "0.5.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "dc3efc0da82635d7e1ced0053bbbfa8c7ab9645d0bf36ceb4f7127bb85315d75"
|
||||||
|
dependencies = [
|
||||||
|
"cc",
|
||||||
|
"js-sys",
|
||||||
|
"rsqlite-vfs",
|
||||||
|
"wasm-bindgen",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "stable_deref_trait"
|
||||||
|
version = "1.2.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "strsim"
|
name = "strsim"
|
||||||
version = "0.11.1"
|
version = "0.11.1"
|
||||||
@@ -3700,6 +3966,16 @@ dependencies = [
|
|||||||
"time-core",
|
"time-core",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "tinystr"
|
||||||
|
version = "0.8.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c8323304221c2a851516f22236c5722a72eaa19749016521d6dff0824447d96d"
|
||||||
|
dependencies = [
|
||||||
|
"displaydoc",
|
||||||
|
"zerovec",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "tinytemplate"
|
name = "tinytemplate"
|
||||||
version = "1.2.1"
|
version = "1.2.1"
|
||||||
@@ -4005,6 +4281,40 @@ version = "0.9.0"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1"
|
checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ureq"
|
||||||
|
version = "2.12.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "02d1a66277ed75f640d608235660df48c8e3c19f3b4edb6a263315626cc3c01d"
|
||||||
|
dependencies = [
|
||||||
|
"base64",
|
||||||
|
"flate2",
|
||||||
|
"log",
|
||||||
|
"once_cell",
|
||||||
|
"rustls",
|
||||||
|
"rustls-pki-types",
|
||||||
|
"url",
|
||||||
|
"webpki-roots 0.26.11",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "url"
|
||||||
|
version = "2.5.8"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ff67a8a4397373c3ef660812acab3268222035010ab8680ec4215f38ba3d0eed"
|
||||||
|
dependencies = [
|
||||||
|
"form_urlencoded",
|
||||||
|
"idna",
|
||||||
|
"percent-encoding",
|
||||||
|
"serde",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "utf8_iter"
|
||||||
|
version = "1.0.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "utf8parse"
|
name = "utf8parse"
|
||||||
version = "0.2.2"
|
version = "0.2.2"
|
||||||
@@ -4332,6 +4642,24 @@ dependencies = [
|
|||||||
"rustls-pki-types",
|
"rustls-pki-types",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "webpki-roots"
|
||||||
|
version = "0.26.11"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9"
|
||||||
|
dependencies = [
|
||||||
|
"webpki-roots 1.0.8",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "webpki-roots"
|
||||||
|
version = "1.0.8"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "bf85cb06032201fa7c6f829d7db5a7e5aa45bcc0655327713065f6f0576731bf"
|
||||||
|
dependencies = [
|
||||||
|
"rustls-pki-types",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "wide"
|
name = "wide"
|
||||||
version = "0.7.33"
|
version = "0.7.33"
|
||||||
@@ -4857,6 +5185,16 @@ dependencies = [
|
|||||||
"memchr",
|
"memchr",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "winreg"
|
||||||
|
version = "0.56.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7d6f32a0ff4a9f6f01231eb2059cc85479330739333e0e58cadf03b6af2cca10"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if",
|
||||||
|
"windows-sys 0.61.2",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "wit-bindgen"
|
name = "wit-bindgen"
|
||||||
version = "0.51.0"
|
version = "0.51.0"
|
||||||
@@ -4951,6 +5289,12 @@ dependencies = [
|
|||||||
"wasmparser",
|
"wasmparser",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "writeable"
|
||||||
|
version = "0.6.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1ffae5123b2d3fc086436f8834ae3ab053a283cfac8fe0a0b8eaae044768a4c4"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "x509-parser"
|
name = "x509-parser"
|
||||||
version = "0.16.0"
|
version = "0.16.0"
|
||||||
@@ -4994,6 +5338,29 @@ dependencies = [
|
|||||||
"time",
|
"time",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "yoke"
|
||||||
|
version = "0.8.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "709fe23a0424b6a435d82152b1bd3fdfb0833487d5fa90d05d42762a9891fef5"
|
||||||
|
dependencies = [
|
||||||
|
"stable_deref_trait",
|
||||||
|
"yoke-derive",
|
||||||
|
"zerofrom",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "yoke-derive"
|
||||||
|
version = "0.8.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "de844c262c8848816172cef550288e7dc6c7b7814b4ee56b3e1553f275f1858e"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
"synstructure",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "zbus"
|
name = "zbus"
|
||||||
version = "5.16.0"
|
version = "5.16.0"
|
||||||
@@ -5070,12 +5437,66 @@ dependencies = [
|
|||||||
"syn",
|
"syn",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "zerofrom"
|
||||||
|
version = "0.1.8"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "0ec05a11813ea801ff6d75110ad09cd0824ddba17dfe17128ea0d5f68e6c5272"
|
||||||
|
dependencies = [
|
||||||
|
"zerofrom-derive",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "zerofrom-derive"
|
||||||
|
version = "0.1.7"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "11532158c46691caf0f2593ea8358fed6bbf68a0315e80aae9bd41fbade684a1"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
"synstructure",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "zeroize"
|
name = "zeroize"
|
||||||
version = "1.8.2"
|
version = "1.8.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0"
|
checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "zerotrie"
|
||||||
|
version = "0.2.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "0f9152d31db0792fa83f70fb2f83148effb5c1f5b8c7686c3459e361d9bc20bf"
|
||||||
|
dependencies = [
|
||||||
|
"displaydoc",
|
||||||
|
"yoke",
|
||||||
|
"zerofrom",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "zerovec"
|
||||||
|
version = "0.11.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "90f911cbc359ab6af17377d242225f4d75119aec87ea711a880987b18cd7b239"
|
||||||
|
dependencies = [
|
||||||
|
"yoke",
|
||||||
|
"zerofrom",
|
||||||
|
"zerovec-derive",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "zerovec-derive"
|
||||||
|
version = "0.11.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "625dc425cab0dca6dc3c3319506e6593dcb08a9f387ea3b284dbd52a92c40555"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "zmij"
|
name = "zmij"
|
||||||
version = "1.0.21"
|
version = "1.0.21"
|
||||||
|
|||||||
@@ -1,13 +1,20 @@
|
|||||||
# punktfunk
|
<p align="center">
|
||||||
|
<img src="assets/punktfunk-logo.svg" alt="punktfunk" width="320" />
|
||||||
|
</p>
|
||||||
|
|
||||||
**Low-latency desktop and game streaming, Linux-first.** Run the host on a Linux machine — or a
|
<p align="center"><b>Low-latency desktop and game streaming with first-class Linux and Windows hosts.</b></p>
|
||||||
Windows PC — with an NVIDIA GPU, connect from a Mac, PC, phone, tablet, or TV, and stream your desktop
|
|
||||||
or games — each device at its **own native resolution and refresh rate**, over your local network.
|
Run the host on a Linux machine or a Windows PC, connect from a Mac, PC, phone, tablet, or TV, and
|
||||||
|
stream your desktop or games — each device at its **own native resolution and refresh rate**, over
|
||||||
|
your local network.
|
||||||
|
|
||||||
📖 **Documentation: [docs.punktfunk.unom.io](https://docs.punktfunk.unom.io)** — start with
|
📖 **Documentation: [docs.punktfunk.unom.io](https://docs.punktfunk.unom.io)** — start with
|
||||||
[How It Works](https://docs.punktfunk.unom.io/docs/how-it-works) or the
|
[How It Works](https://docs.punktfunk.unom.io/docs/how-it-works) or the
|
||||||
[Quick Start](https://docs.punktfunk.unom.io/docs/quickstart).
|
[Quick Start](https://docs.punktfunk.unom.io/docs/quickstart).
|
||||||
|
|
||||||
|
💬 **Community: [Discord](https://discord.gg/kaPNvzMuGU)** — chat, support, and **Android beta
|
||||||
|
access** · **[r/Punktfunk](https://www.reddit.com/r/Punktfunk/)**.
|
||||||
|
|
||||||
punktfunk pairs a **virtual-display streaming host** with native clients on every platform. It speaks
|
punktfunk pairs a **virtual-display streaming host** with native clients on every platform. It speaks
|
||||||
the existing **GameStream** protocol, so any [Moonlight](https://moonlight-stream.org/) client works
|
the existing **GameStream** protocol, so any [Moonlight](https://moonlight-stream.org/) client works
|
||||||
day one — and adds its own faster **`punktfunk/1`** protocol that breaks the ~1 Gbps FEC wall with a
|
day one — and adds its own faster **`punktfunk/1`** protocol that breaks the ~1 Gbps FEC wall with a
|
||||||
@@ -19,6 +26,11 @@ protocol, FEC, and crypto, linked into the host and every client over a stable C
|
|||||||
- **Your device's exact mode.** For each client that connects, the host spins up a virtual display
|
- **Your device's exact mode.** For each client that connects, the host spins up a virtual display
|
||||||
sized to that device — 1080p60 to a laptop, 1440p120 to a desktop, 4K to a TV, all at once. No
|
sized to that device — 1080p60 to a laptop, 1440p120 to a desktop, 4K to a TV, all at once. No
|
||||||
letterboxing, no scaling, no rearranging your real monitors.
|
letterboxing, no scaling, no rearranging your real monitors.
|
||||||
|
- **A real virtual display on Windows, too.** On Linux the host uses per-compositor virtual outputs;
|
||||||
|
on Windows you get the same on-the-fly virtual display — at the client's exact mode, no physical
|
||||||
|
monitor or dummy HDMI plug, even on the secure desktop (UAC / lock screen). It also has **its own
|
||||||
|
indirect display driver (IDD)** the host pushes finished frames straight into, rather than scraping
|
||||||
|
a screen — tight, push-based integration that's unusual for a Windows streaming host.
|
||||||
- **Low latency, GPU end to end.** Frames go straight from the compositor to the NVENC encoder with
|
- **Low latency, GPU end to end.** Frames go straight from the compositor to the NVENC encoder with
|
||||||
zero CPU copies (dmabuf → CUDA/Vulkan → NVENC), over a transport tuned for responsiveness rather
|
zero CPU copies (dmabuf → CUDA/Vulkan → NVENC), over a transport tuned for responsiveness rather
|
||||||
than throughput. Stable 240 fps at 5120×1440; sub-millisecond capture-to-reassembly on a LAN.
|
than throughput. Stable 240 fps at 5120×1440; sub-millisecond capture-to-reassembly on a LAN.
|
||||||
@@ -35,7 +47,7 @@ protocol, FEC, and crypto, linked into the host and every client over a stable C
|
|||||||
| **Core** — `punktfunk-core` + C ABI (protocol · FEC · crypto · QUIC) | ✅ Complete & hardened |
|
| **Core** — `punktfunk-core` + C ABI (protocol · FEC · crypto · QUIC) | ✅ Complete & hardened |
|
||||||
| **GameStream host** → stock Moonlight | ✅ Live end-to-end: pairing, RTSP, audio, per-client virtual output at native resolution, GPU zero-copy NVENC, gamepads |
|
| **GameStream host** → stock Moonlight | ✅ Live end-to-end: pairing, RTSP, audio, per-client virtual output at native resolution, GPU zero-copy NVENC, gamepads |
|
||||||
| **Native protocol** — `punktfunk/1` | ✅ Validated live: QUIC control + GF(2¹⁶) FEC/AES-GCM data plane, PIN pairing, mDNS discovery, mid-stream mode renegotiation |
|
| **Native protocol** — `punktfunk/1` | ✅ Validated live: QUIC control + GF(2¹⁶) FEC/AES-GCM data plane, PIN pairing, mDNS discovery, mid-stream mode renegotiation |
|
||||||
| **Windows host** (NVIDIA, x64) | 🟡 Implemented & shipping as a signed installer (DXGI capture · SudoVDA virtual display · NVENC · WASAPI · ViGEm); NVIDIA-only, newer than the Linux host |
|
| **Windows host** (x64) | 🟡 Implemented & shipping as a signed installer: DXGI/WGC capture · its own all-Rust IddCx **virtual display** (secure-desktop capable) · GPU encode (NVENC on NVIDIA, AMF/QSV on AMD/Intel) · WASAPI audio · bundled virtual-gamepad drivers (no ViGEmBus) · HDR incl. Vulkan-game HDR. NVIDIA live-validated; AMD/Intel CI-green |
|
||||||
| **macOS / iOS / tvOS client** (`clients/apple`) | ✅ Streaming live: VideoToolbox decode, controllers incl. DualSense, discovery, pairing, speed test |
|
| **macOS / iOS / tvOS client** (`clients/apple`) | ✅ Streaming live: VideoToolbox decode, controllers incl. DualSense, discovery, pairing, speed test |
|
||||||
| **Linux client** (`clients/linux`, GTK4) | ✅ Streaming live: FFmpeg + VAAPI zero-copy decode, PipeWire audio, SDL3 controllers; ships as Flatpak/apt/rpm/Arch |
|
| **Linux client** (`clients/linux`, GTK4) | ✅ Streaming live: FFmpeg + VAAPI zero-copy decode, PipeWire audio, SDL3 controllers; ships as Flatpak/apt/rpm/Arch |
|
||||||
| **Android client** (`clients/android`, phone + TV) | ✅ Streaming live: AMediaCodec decode + HDR10, Oboe audio, controllers, discovery, pairing |
|
| **Android client** (`clients/android`, phone + TV) | ✅ Streaming live: AMediaCodec decode + HDR10, Oboe audio, controllers, discovery, pairing |
|
||||||
@@ -61,14 +73,14 @@ roadmap: **[/docs/roadmap](https://docs.punktfunk.unom.io/docs/roadmap)**.
|
|||||||
|
|
||||||
Pick your platform and install from its package registry — the per-platform guide covers adding the
|
Pick your platform and install from its package registry — the per-platform guide covers adding the
|
||||||
repo, first run, and the web console. The Linux host is the primary, most battle-tested path; a
|
repo, first run, and the web console. The Linux host is the primary, most battle-tested path; a
|
||||||
Windows host (NVIDIA-only) also ships as a signed installer.
|
Windows host also ships as a signed installer (all-vendor: NVIDIA, AMD, Intel).
|
||||||
|
|
||||||
| Platform | Install | Guide |
|
| Platform | Install | Guide |
|
||||||
|--------|---------|-------|
|
|--------|---------|-------|
|
||||||
| **Ubuntu / Debian** (apt) | `sudo apt install punktfunk-host` *(after adding the repo)* | [Ubuntu — GNOME](https://docs.punktfunk.unom.io/docs/ubuntu-gnome) · [KDE](https://docs.punktfunk.unom.io/docs/ubuntu-kde) |
|
| **Ubuntu / Debian** (apt) | `sudo apt install punktfunk-host` *(after adding the repo)* | [Ubuntu — GNOME](https://docs.punktfunk.unom.io/docs/ubuntu-gnome) · [KDE](https://docs.punktfunk.unom.io/docs/ubuntu-kde) |
|
||||||
| **Fedora / Bazzite** (rpm-ostree) | `rpm-ostree install punktfunk punktfunk-web` *(or the bootc image)* | [Fedora — KDE](https://docs.punktfunk.unom.io/docs/fedora-kde) · [Bazzite](https://docs.punktfunk.unom.io/docs/bazzite) |
|
| **Fedora / Bazzite** (rpm-ostree) | `rpm-ostree install punktfunk punktfunk-web` *(or the bootc image)* | [Fedora — KDE](https://docs.punktfunk.unom.io/docs/fedora-kde) · [Bazzite](https://docs.punktfunk.unom.io/docs/bazzite) |
|
||||||
| **Arch / Steam Deck** (PKGBUILD / sysext) | `makepkg -si` *(Arch)* · sysext `.raw` *(SteamOS)* | [packaging/arch](packaging/arch/README.md) |
|
| **Arch / Steam Deck** (PKGBUILD / sysext) | `makepkg -si` *(Arch)* · sysext `.raw` *(SteamOS)* | [packaging/arch](packaging/arch/README.md) |
|
||||||
| **Windows** (NVIDIA, x64) | signed `setup.exe` from the package registry | [Windows Host](https://docs.punktfunk.unom.io/docs/windows-host) |
|
| **Windows** (x64) | signed `setup.exe` from the package registry | [Windows Host](https://docs.punktfunk.unom.io/docs/windows-host) |
|
||||||
|
|
||||||
`punktfunk-host` is the streaming host; `punktfunk-web` is the browser console (pairing + status).
|
`punktfunk-host` is the streaming host; `punktfunk-web` is the browser console (pairing + status).
|
||||||
After install, run `punktfunk-host serve` inside your desktop session (the secure native default;
|
After install, run `punktfunk-host serve` inside your desktop session (the secure native default;
|
||||||
@@ -113,7 +125,7 @@ and the [docs site](https://docs.punktfunk.unom.io).
|
|||||||
```
|
```
|
||||||
crates/
|
crates/
|
||||||
punktfunk-core/ protocol · FEC · pacing · crypto · QUIC control plane — the C ABI (lib + cdylib + staticlib)
|
punktfunk-core/ protocol · FEC · pacing · crypto · QUIC control plane — the C ABI (lib + cdylib + staticlib)
|
||||||
punktfunk-host/ Linux host: virtual displays · capture · encode · input · GameStream · punktfunk/1 · mgmt
|
punktfunk-host/ the host (Linux + Windows): virtual displays · capture · encode · input · GameStream · punktfunk/1 · mgmt
|
||||||
clients/
|
clients/
|
||||||
apple/ macOS / iOS / tvOS app (Swift · VideoToolbox · Metal · GameController)
|
apple/ macOS / iOS / tvOS app (Swift · VideoToolbox · Metal · GameController)
|
||||||
linux/ Linux desktop app (Rust · GTK4/libadwaita · FFmpeg/VAAPI · PipeWire · SDL3)
|
linux/ Linux desktop app (Rust · GTK4/libadwaita · FFmpeg/VAAPI · PipeWire · SDL3)
|
||||||
@@ -124,7 +136,7 @@ clients/
|
|||||||
web/ web console (TanStack) over the management API — status · devices · pairing
|
web/ web console (TanStack) over the management API — status · devices · pairing
|
||||||
packaging/ apt · rpm / COPR · Arch · Flatpak · Bazzite bootc image
|
packaging/ apt · rpm / COPR · Arch · Flatpak · Bazzite bootc image
|
||||||
docs-site/ public documentation site (Fumadocs) — https://docs.punktfunk.unom.io
|
docs-site/ public documentation site (Fumadocs) — https://docs.punktfunk.unom.io
|
||||||
docs/ design notes & deep-dive plans
|
design/ design notes & deep-dive plans (index: design/README.md)
|
||||||
include/punktfunk_core.h cbindgen-generated C header (checked in)
|
include/punktfunk_core.h cbindgen-generated C header (checked in)
|
||||||
tools/ latency-probe · loss-harness (measurement)
|
tools/ latency-probe · loss-harness (measurement)
|
||||||
```
|
```
|
||||||
|
|||||||
@@ -978,6 +978,309 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"/api/v1/stats/capture/live": {
|
||||||
|
"get": {
|
||||||
|
"tags": [
|
||||||
|
"stats"
|
||||||
|
],
|
||||||
|
"summary": "Live in-progress capture",
|
||||||
|
"description": "The full sample time-series of the capture currently recording, for live graphing. `404` when\nnothing is armed.",
|
||||||
|
"operationId": "statsCaptureLive",
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "The in-progress capture (meta + samples so far)",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/Capture"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"401": {
|
||||||
|
"description": "Missing or invalid bearer token",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/ApiError"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"404": {
|
||||||
|
"description": "No capture is currently recording",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/ApiError"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"/api/v1/stats/capture/start": {
|
||||||
|
"post": {
|
||||||
|
"tags": [
|
||||||
|
"stats"
|
||||||
|
],
|
||||||
|
"summary": "Start a stats capture",
|
||||||
|
"description": "Arms a new performance-stats capture. Idempotent: if a capture is already running this returns\nthe current status unchanged. While armed, the streaming loops emit aggregated samples (~ every\n1–2 s) into the in-progress capture, readable live via `GET /stats/capture/live`.",
|
||||||
|
"operationId": "statsCaptureStart",
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "Capture armed (or already running)",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/StatsStatus"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"401": {
|
||||||
|
"description": "Missing or invalid bearer token",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/ApiError"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"/api/v1/stats/capture/status": {
|
||||||
|
"get": {
|
||||||
|
"tags": [
|
||||||
|
"stats"
|
||||||
|
],
|
||||||
|
"summary": "Stats capture status",
|
||||||
|
"description": "Whether a capture is armed, its sample count, and start time. Poll this (e.g. every 2 s) to\ndrive the capture-control UI.",
|
||||||
|
"operationId": "statsCaptureStatus",
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "In-progress capture status (idle when not armed)",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/StatsStatus"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"401": {
|
||||||
|
"description": "Missing or invalid bearer token",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/ApiError"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"/api/v1/stats/capture/stop": {
|
||||||
|
"post": {
|
||||||
|
"tags": [
|
||||||
|
"stats"
|
||||||
|
],
|
||||||
|
"summary": "Stop the stats capture",
|
||||||
|
"description": "Disarms the in-progress capture and writes it to disk atomically, returning its summary. If\nnothing was recording, returns `204 No Content`.",
|
||||||
|
"operationId": "statsCaptureStop",
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "Capture stopped and saved",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/CaptureMeta"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"204": {
|
||||||
|
"description": "Nothing was recording"
|
||||||
|
},
|
||||||
|
"401": {
|
||||||
|
"description": "Missing or invalid bearer token",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/ApiError"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"500": {
|
||||||
|
"description": "Could not write the recording to disk",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/ApiError"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"/api/v1/stats/recordings": {
|
||||||
|
"get": {
|
||||||
|
"tags": [
|
||||||
|
"stats"
|
||||||
|
],
|
||||||
|
"summary": "List saved recordings",
|
||||||
|
"description": "Every saved capture's summary (the `meta` head only — not the sample body), newest first.",
|
||||||
|
"operationId": "statsRecordingsList",
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "Saved capture summaries, newest first",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"$ref": "#/components/schemas/CaptureMeta"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"401": {
|
||||||
|
"description": "Missing or invalid bearer token",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/ApiError"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"/api/v1/stats/recordings/{id}": {
|
||||||
|
"get": {
|
||||||
|
"tags": [
|
||||||
|
"stats"
|
||||||
|
],
|
||||||
|
"summary": "Get a saved recording",
|
||||||
|
"description": "The full capture (meta + samples) for `id`, for graphing or download.",
|
||||||
|
"operationId": "statsRecordingGet",
|
||||||
|
"parameters": [
|
||||||
|
{
|
||||||
|
"name": "id",
|
||||||
|
"in": "path",
|
||||||
|
"description": "The recording id (its filename stem)",
|
||||||
|
"required": true,
|
||||||
|
"schema": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "The full capture",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/Capture"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"401": {
|
||||||
|
"description": "Missing or invalid bearer token",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/ApiError"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"404": {
|
||||||
|
"description": "No recording with that id",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/ApiError"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"500": {
|
||||||
|
"description": "The recording file is unreadable",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/ApiError"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"delete": {
|
||||||
|
"tags": [
|
||||||
|
"stats"
|
||||||
|
],
|
||||||
|
"summary": "Delete a saved recording",
|
||||||
|
"description": "Removes the recording `id` from disk. `404` if there is no such recording.",
|
||||||
|
"operationId": "statsRecordingDelete",
|
||||||
|
"parameters": [
|
||||||
|
{
|
||||||
|
"name": "id",
|
||||||
|
"in": "path",
|
||||||
|
"description": "The recording id (its filename stem)",
|
||||||
|
"required": true,
|
||||||
|
"schema": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"responses": {
|
||||||
|
"204": {
|
||||||
|
"description": "Recording deleted"
|
||||||
|
},
|
||||||
|
"401": {
|
||||||
|
"description": "Missing or invalid bearer token",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/ApiError"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"404": {
|
||||||
|
"description": "No recording with that id",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/ApiError"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"500": {
|
||||||
|
"description": "Could not delete the recording",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/ApiError"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"/api/v1/status": {
|
"/api/v1/status": {
|
||||||
"get": {
|
"get": {
|
||||||
"tags": [
|
"tags": [
|
||||||
@@ -1125,6 +1428,89 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"Capture": {
|
||||||
|
"type": "object",
|
||||||
|
"description": "A full capture: summary + the sample time-series. The wire + on-disk shape.",
|
||||||
|
"required": [
|
||||||
|
"meta",
|
||||||
|
"samples"
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"meta": {
|
||||||
|
"$ref": "#/components/schemas/CaptureMeta"
|
||||||
|
},
|
||||||
|
"samples": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"$ref": "#/components/schemas/StatsSample"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"CaptureMeta": {
|
||||||
|
"type": "object",
|
||||||
|
"description": "Capture summary — the filename stem plus the negotiated mode/codec/client. Stored at the head\nof each on-disk recording and listed standalone (without the sample body) by\n[`StatsRecorder::list`].",
|
||||||
|
"required": [
|
||||||
|
"id",
|
||||||
|
"started_unix_ms",
|
||||||
|
"duration_ms",
|
||||||
|
"kind",
|
||||||
|
"width",
|
||||||
|
"height",
|
||||||
|
"fps",
|
||||||
|
"codec",
|
||||||
|
"client",
|
||||||
|
"sample_count"
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"client": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Short label / fingerprint prefix, or `\"\"` if unknown."
|
||||||
|
},
|
||||||
|
"codec": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "`\"h264\" | \"hevc\" | \"av1\"`."
|
||||||
|
},
|
||||||
|
"duration_ms": {
|
||||||
|
"type": "integer",
|
||||||
|
"format": "int64",
|
||||||
|
"minimum": 0
|
||||||
|
},
|
||||||
|
"fps": {
|
||||||
|
"type": "integer",
|
||||||
|
"format": "int32",
|
||||||
|
"minimum": 0
|
||||||
|
},
|
||||||
|
"height": {
|
||||||
|
"type": "integer",
|
||||||
|
"format": "int32",
|
||||||
|
"minimum": 0
|
||||||
|
},
|
||||||
|
"id": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "e.g. `\"2026-06-26T20-14-03Z_5120x1440\"` — also the filename stem."
|
||||||
|
},
|
||||||
|
"kind": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "`\"native\" | \"gamestream\"`."
|
||||||
|
},
|
||||||
|
"sample_count": {
|
||||||
|
"type": "integer",
|
||||||
|
"format": "int32",
|
||||||
|
"minimum": 0
|
||||||
|
},
|
||||||
|
"started_unix_ms": {
|
||||||
|
"type": "integer",
|
||||||
|
"format": "int64",
|
||||||
|
"minimum": 0
|
||||||
|
},
|
||||||
|
"width": {
|
||||||
|
"type": "integer",
|
||||||
|
"format": "int32",
|
||||||
|
"minimum": 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"CustomEntry": {
|
"CustomEntry": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"description": "A user-added title, persisted in `~/.config/punktfunk/library.json`. Same shape the API\nreturns and the web console edits.",
|
"description": "A user-added title, persisted in `~/.config/punktfunk/library.json`. Same shape the API\nreturns and the web console edits.",
|
||||||
@@ -1595,6 +1981,144 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"StageTiming": {
|
||||||
|
"type": "object",
|
||||||
|
"description": "One pipeline stage's latency in an aggregation window (microseconds).",
|
||||||
|
"required": [
|
||||||
|
"name",
|
||||||
|
"p50_us",
|
||||||
|
"p99_us"
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"name": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "`\"capture\" | \"submit\" | \"encode\" | \"packetize\" | \"send\"` (path-dependent)."
|
||||||
|
},
|
||||||
|
"p50_us": {
|
||||||
|
"type": "number",
|
||||||
|
"format": "float"
|
||||||
|
},
|
||||||
|
"p99_us": {
|
||||||
|
"type": "number",
|
||||||
|
"format": "float"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"StatsSample": {
|
||||||
|
"type": "object",
|
||||||
|
"description": "One aggregated sample (~ every 2 s native, ~ every 1 s GameStream).",
|
||||||
|
"required": [
|
||||||
|
"t_ms",
|
||||||
|
"session_id",
|
||||||
|
"stages",
|
||||||
|
"fps",
|
||||||
|
"repeat_fps",
|
||||||
|
"mbps",
|
||||||
|
"bitrate_kbps",
|
||||||
|
"frames_dropped",
|
||||||
|
"packets_dropped",
|
||||||
|
"send_dropped",
|
||||||
|
"fec_recovered"
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"bitrate_kbps": {
|
||||||
|
"type": "integer",
|
||||||
|
"format": "int32",
|
||||||
|
"description": "Configured target bitrate.",
|
||||||
|
"minimum": 0
|
||||||
|
},
|
||||||
|
"fec_recovered": {
|
||||||
|
"type": "integer",
|
||||||
|
"format": "int32",
|
||||||
|
"description": "FEC shards recovered this window (delta).",
|
||||||
|
"minimum": 0
|
||||||
|
},
|
||||||
|
"fps": {
|
||||||
|
"type": "number",
|
||||||
|
"format": "float",
|
||||||
|
"description": "Genuine NEW frames/s from the source."
|
||||||
|
},
|
||||||
|
"frames_dropped": {
|
||||||
|
"type": "integer",
|
||||||
|
"format": "int32",
|
||||||
|
"description": "Frames dropped this window (delta).",
|
||||||
|
"minimum": 0
|
||||||
|
},
|
||||||
|
"mbps": {
|
||||||
|
"type": "number",
|
||||||
|
"format": "float",
|
||||||
|
"description": "Transmit goodput (Mb/s)."
|
||||||
|
},
|
||||||
|
"packets_dropped": {
|
||||||
|
"type": "integer",
|
||||||
|
"format": "int32",
|
||||||
|
"description": "Packets dropped this window (receiver-side / reassembler, where known).",
|
||||||
|
"minimum": 0
|
||||||
|
},
|
||||||
|
"repeat_fps": {
|
||||||
|
"type": "number",
|
||||||
|
"format": "float",
|
||||||
|
"description": "Re-encoded holds/s (source-starvation indicator)."
|
||||||
|
},
|
||||||
|
"send_dropped": {
|
||||||
|
"type": "integer",
|
||||||
|
"format": "int32",
|
||||||
|
"description": "Host send-buffer overflow / EAGAIN this window (delta).",
|
||||||
|
"minimum": 0
|
||||||
|
},
|
||||||
|
"session_id": {
|
||||||
|
"type": "integer",
|
||||||
|
"format": "int32",
|
||||||
|
"description": "Disambiguates concurrent sessions (usually constant).",
|
||||||
|
"minimum": 0
|
||||||
|
},
|
||||||
|
"stages": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"$ref": "#/components/schemas/StageTiming"
|
||||||
|
},
|
||||||
|
"description": "Ordered pipeline stages for this path."
|
||||||
|
},
|
||||||
|
"t_ms": {
|
||||||
|
"type": "integer",
|
||||||
|
"format": "int64",
|
||||||
|
"description": "Milliseconds since capture start (monotonic; stamped by [`StatsRecorder::push_sample`]).",
|
||||||
|
"minimum": 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"StatsStatus": {
|
||||||
|
"type": "object",
|
||||||
|
"description": "Snapshot of the in-progress capture for the management API.",
|
||||||
|
"required": [
|
||||||
|
"armed",
|
||||||
|
"sample_count",
|
||||||
|
"started_unix_ms",
|
||||||
|
"kind"
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"armed": {
|
||||||
|
"type": "boolean",
|
||||||
|
"description": "Capture currently running."
|
||||||
|
},
|
||||||
|
"kind": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Path of the in-progress capture (`\"\"` if idle)."
|
||||||
|
},
|
||||||
|
"sample_count": {
|
||||||
|
"type": "integer",
|
||||||
|
"format": "int32",
|
||||||
|
"description": "Samples in the in-progress capture.",
|
||||||
|
"minimum": 0
|
||||||
|
},
|
||||||
|
"started_unix_ms": {
|
||||||
|
"type": "integer",
|
||||||
|
"format": "int64",
|
||||||
|
"description": "Unix start time of the in-progress capture (`0` if idle).",
|
||||||
|
"minimum": 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"StreamInfo": {
|
"StreamInfo": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"description": "RTSP-negotiated stream parameters.",
|
"description": "RTSP-negotiated stream parameters.",
|
||||||
@@ -1696,6 +2220,10 @@
|
|||||||
{
|
{
|
||||||
"name": "library",
|
"name": "library",
|
||||||
"description": "Game library: installed-store titles (Steam) plus user-curated custom entries"
|
"description": "Game library: installed-store titles (Steam) plus user-curated custom entries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "stats",
|
||||||
|
"description": "Streaming performance-stats capture: arm/stop a recording, read the live + saved time-series for graphing"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
@@ -0,0 +1,33 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||||
|
<svg width="100%" height="100%" viewBox="0 0 579 298" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:space="preserve" style="fill-rule:evenodd;clip-rule:evenodd;stroke-linejoin:round;stroke-miterlimit:2;">
|
||||||
|
<style>
|
||||||
|
/* Theme-adaptive so the logo stays readable on both light and dark README
|
||||||
|
backgrounds: deep violet (the brand-mark palette) on light, the original
|
||||||
|
light violet on dark. Evaluated by the viewer's color scheme. */
|
||||||
|
.pf-wm { fill: #6c5bf3; }
|
||||||
|
.pf-back { fill: #a79ff8; }
|
||||||
|
.pf-deep { fill: #6c5bf3; }
|
||||||
|
@media (prefers-color-scheme: dark) {
|
||||||
|
.pf-wm { fill: #cec9fb; }
|
||||||
|
.pf-back { fill: #f2f1fe; }
|
||||||
|
.pf-deep { fill: #8c7ef5; }
|
||||||
|
}
|
||||||
|
</style>
|
||||||
|
<g>
|
||||||
|
<g>
|
||||||
|
<path class="pf-wm" style="fill-rule:nonzero;" d="M21.144,176.635l0,102.687l31.253,0l0,-35.563l73.436,0l0,-23.555l-73.436,0l0,-19.398l77.285,0l0,-24.171l-108.537,0Z"/>
|
||||||
|
<path class="pf-wm" style="fill-rule:nonzero;" d="M136.148,176.635l0,47.264c0.154,16.627 0.154,16.627 0.308,20.014c0.77,15.087 2.463,21.4 7.544,26.634c7.698,8.16 20.014,10.315 59.272,10.315c23.863,0 34.178,-0.616 43.415,-2.463c11.7,-2.463 19.552,-10.623 21.246,-22.323c0.924,-7.236 1.078,-8.929 1.54,-32.176l0,-47.264l-31.253,0l0,47.264c0,2.155 -0.154,7.082 -0.308,10.623c-0.462,9.699 -1.232,12.47 -3.695,15.087c-3.387,3.695 -9.853,4.619 -31.407,4.619c-26.634,0 -32.638,-1.693 -34.332,-9.853c-0.77,-4.157 -0.77,-4.311 -1.078,-20.476l0,-47.264l-31.253,0Z"/>
|
||||||
|
<path class="pf-wm" style="fill-rule:nonzero;" d="M275.938,176.527l0,102.687l31.868,0l-0.77,-76.669l3.387,0l54.038,76.669l54.346,0l0,-102.687l-31.868,0l0.77,76.515l-3.233,0l-53.73,-76.515l-54.808,0Z"/>
|
||||||
|
<path class="pf-wm" style="fill-rule:nonzero;" d="M425.273,176.527l0,102.687l31.253,0l0,-39.258l17.089,0l46.032,39.258l47.418,0l-64.353,-52.344l59.426,-50.959l-47.88,0l-40.644,37.873l-17.089,0l0,-37.257l-31.253,0Z"/>
|
||||||
|
</g>
|
||||||
|
<path class="pf-back" style="fill-rule:nonzero;" d="M65.442,150.143c24.514,0 44.298,-19.784 44.298,-44.298c0,-24.514 -19.784,-44.298 -44.298,-44.298c-24.514,0 -44.298,19.784 -44.298,44.298c0,24.514 19.784,44.298 44.298,44.298Z"/>
|
||||||
|
<path class="pf-deep" style="fill-rule:nonzero;" d="M141.063,92.871c17.334,-17.334 17.334,-45.312 0,-62.647c-17.334,-17.334 -45.312,-17.334 -62.647,-0c-17.334,17.334 -17.334,45.312 0,62.647c17.334,17.334 45.312,17.334 62.647,-0Z"/>
|
||||||
|
<path style="fill:url(#_Linear1);" d="M121.228,104.359c-14.777,3.965 -31.187,0.136 -42.811,-11.488c-11.624,-11.624 -15.453,-28.034 -11.488,-42.811c14.777,-3.965 31.187,-0.136 42.811,11.488c11.624,11.624 15.453,28.034 11.488,42.811Z"/>
|
||||||
|
</g>
|
||||||
|
<defs>
|
||||||
|
<linearGradient id="_Linear1" x1="0" y1="0" x2="1" y2="0" gradientUnits="userSpaceOnUse" gradientTransform="matrix(31.323323,-31.323323,31.323323,31.323323,78.416832,92.870811)">
|
||||||
|
<stop offset="0" style="stop-color:#cec9fb;stop-opacity:0"/>
|
||||||
|
<stop offset="1" style="stop-color:#fcfcff;stop-opacity:1"/>
|
||||||
|
</linearGradient>
|
||||||
|
</defs>
|
||||||
|
</svg>
|
||||||
|
After Width: | Height: | Size: 3.0 KiB |
@@ -19,6 +19,12 @@ data class Settings(
|
|||||||
val micEnabled: Boolean = false,
|
val micEnabled: Boolean = false,
|
||||||
/** Show the live stats overlay (FPS / throughput / latency) during a stream. */
|
/** Show the live stats overlay (FPS / throughput / latency) during a stream. */
|
||||||
val statsHudEnabled: Boolean = true,
|
val statsHudEnabled: Boolean = true,
|
||||||
|
/**
|
||||||
|
* Touch input model. `true` (default) = trackpad: the cursor stays put on touch-down and moves
|
||||||
|
* by the finger's relative delta (swipe to nudge, lift and re-swipe to walk it across), tap to
|
||||||
|
* click where it is. `false` = direct pointing: the cursor jumps to the finger (the old behaviour).
|
||||||
|
*/
|
||||||
|
val trackpadMode: Boolean = true,
|
||||||
)
|
)
|
||||||
|
|
||||||
/** Loads/saves [Settings] in the app-private `punktfunk_settings` prefs. */
|
/** Loads/saves [Settings] in the app-private `punktfunk_settings` prefs. */
|
||||||
@@ -35,6 +41,7 @@ class SettingsStore(context: Context) {
|
|||||||
gamepad = prefs.getInt(K_GAMEPAD, 0),
|
gamepad = prefs.getInt(K_GAMEPAD, 0),
|
||||||
micEnabled = prefs.getBoolean(K_MIC, false),
|
micEnabled = prefs.getBoolean(K_MIC, false),
|
||||||
statsHudEnabled = prefs.getBoolean(K_HUD, true),
|
statsHudEnabled = prefs.getBoolean(K_HUD, true),
|
||||||
|
trackpadMode = prefs.getBoolean(K_TRACKPAD, true),
|
||||||
)
|
)
|
||||||
|
|
||||||
fun save(s: Settings) {
|
fun save(s: Settings) {
|
||||||
@@ -47,6 +54,7 @@ class SettingsStore(context: Context) {
|
|||||||
.putInt(K_GAMEPAD, s.gamepad)
|
.putInt(K_GAMEPAD, s.gamepad)
|
||||||
.putBoolean(K_MIC, s.micEnabled)
|
.putBoolean(K_MIC, s.micEnabled)
|
||||||
.putBoolean(K_HUD, s.statsHudEnabled)
|
.putBoolean(K_HUD, s.statsHudEnabled)
|
||||||
|
.putBoolean(K_TRACKPAD, s.trackpadMode)
|
||||||
.apply()
|
.apply()
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -59,6 +67,7 @@ class SettingsStore(context: Context) {
|
|||||||
const val K_GAMEPAD = "gamepad"
|
const val K_GAMEPAD = "gamepad"
|
||||||
const val K_MIC = "mic_enabled"
|
const val K_MIC = "mic_enabled"
|
||||||
const val K_HUD = "stats_hud_enabled"
|
const val K_HUD = "stats_hud_enabled"
|
||||||
|
const val K_TRACKPAD = "trackpad_mode"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -119,6 +119,16 @@ fun SettingsScreen(initial: Settings, onChange: (Settings) -> Unit, onBack: () -
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SettingsGroup("Pointer") {
|
||||||
|
ToggleRow(
|
||||||
|
title = "Trackpad mode",
|
||||||
|
subtitle = "Relative cursor like a laptop touchpad — swipe to nudge, tap to click. " +
|
||||||
|
"Off = the cursor jumps to your finger.",
|
||||||
|
checked = s.trackpadMode,
|
||||||
|
onCheckedChange = { on -> update(s.copy(trackpadMode = on)) },
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
SettingsGroup("Overlay") {
|
SettingsGroup("Overlay") {
|
||||||
ToggleRow(
|
ToggleRow(
|
||||||
title = "Stats overlay",
|
title = "Stats overlay",
|
||||||
|
|||||||
@@ -41,6 +41,7 @@ import io.unom.punktfunk.kit.NativeBridge
|
|||||||
import java.util.concurrent.atomic.AtomicBoolean
|
import java.util.concurrent.atomic.AtomicBoolean
|
||||||
import kotlinx.coroutines.delay
|
import kotlinx.coroutines.delay
|
||||||
import kotlin.math.abs
|
import kotlin.math.abs
|
||||||
|
import kotlin.math.hypot
|
||||||
import kotlin.math.roundToInt
|
import kotlin.math.roundToInt
|
||||||
|
|
||||||
// Touch-gesture tuning (px / ms). TAP_SLOP: movement under this still counts as a tap, not a drag.
|
// Touch-gesture tuning (px / ms). TAP_SLOP: movement under this still counts as a tap, not a drag.
|
||||||
@@ -50,6 +51,15 @@ private const val TAP_SLOP = 12f
|
|||||||
private const val TAP_DRAG_MS = 250L
|
private const val TAP_DRAG_MS = 250L
|
||||||
private const val SCROLL_DIV = 4f
|
private const val SCROLL_DIV = 4f
|
||||||
|
|
||||||
|
// Trackpad-mode pointer ballistics (relative one-finger motion). POINTER_SENS: base finger-px →
|
||||||
|
// host-px gain (~1:1, never twitchy). The rest is mild acceleration so a flick crosses the screen
|
||||||
|
// while a slow drag stays precise: above ACCEL_SPEED_FLOOR px/ms the gain ramps by ACCEL_GAIN per
|
||||||
|
// px/ms, capped at ACCEL_MAX (so a fast swipe can't fling the cursor uncontrollably).
|
||||||
|
private const val POINTER_SENS = 1.3f
|
||||||
|
private const val ACCEL_GAIN = 0.6f
|
||||||
|
private const val ACCEL_SPEED_FLOOR = 0.3f
|
||||||
|
private const val ACCEL_MAX = 3.0f
|
||||||
|
|
||||||
@Composable
|
@Composable
|
||||||
fun StreamScreen(handle: Long, micEnabled: Boolean, onDisconnect: () -> Unit) {
|
fun StreamScreen(handle: Long, micEnabled: Boolean, onDisconnect: () -> Unit) {
|
||||||
val context = LocalContext.current
|
val context = LocalContext.current
|
||||||
@@ -68,8 +78,11 @@ fun StreamScreen(handle: Long, micEnabled: Boolean, onDisconnect: () -> Unit) {
|
|||||||
// Live decode stats for the HUD. Poll once a second for the whole stream (cheap, and each call
|
// Live decode stats for the HUD. Poll once a second for the whole stream (cheap, and each call
|
||||||
// drains+resets the native window so it never grows unbounded even while the overlay is hidden);
|
// drains+resets the native window so it never grows unbounded even while the overlay is hidden);
|
||||||
// `showStats` only gates rendering. A 3-finger tap toggles it live; the default comes from Settings.
|
// `showStats` only gates rendering. A 3-finger tap toggles it live; the default comes from Settings.
|
||||||
|
val initialSettings = remember { SettingsStore(context).load() }
|
||||||
var stats by remember { mutableStateOf<DoubleArray?>(null) }
|
var stats by remember { mutableStateOf<DoubleArray?>(null) }
|
||||||
var showStats by remember { mutableStateOf(SettingsStore(context).load().statsHudEnabled) }
|
var showStats by remember { mutableStateOf(initialSettings.statsHudEnabled) }
|
||||||
|
// Touch model is fixed per session (re-keys the gesture handler below if it ever changes).
|
||||||
|
val trackpad = initialSettings.trackpadMode
|
||||||
LaunchedEffect(handle) {
|
LaunchedEffect(handle) {
|
||||||
while (true) {
|
while (true) {
|
||||||
delay(1000)
|
delay(1000)
|
||||||
@@ -145,13 +158,18 @@ fun StreamScreen(handle: Long, micEnabled: Boolean, onDisconnect: () -> Unit) {
|
|||||||
if (showStats) {
|
if (showStats) {
|
||||||
stats?.let { StatsOverlay(it, Modifier.align(Alignment.TopStart).padding(12.dp)) }
|
stats?.let { StatsOverlay(it, Modifier.align(Alignment.TopStart).padding(12.dp)) }
|
||||||
}
|
}
|
||||||
// Touch → mouse, absolute "direct pointing" like the Apple client: the host cursor follows
|
// Touch → mouse. Two models, chosen by the Trackpad-mode setting:
|
||||||
// your finger (MouseMoveAbs, host-normalized against the overlay size — which fills the video,
|
// • trackpad (default): the cursor STAYS where it is on touch-down and moves by the finger's
|
||||||
// so finger position maps straight onto the remote screen). Gestures: tap = left click;
|
// relative delta (MouseMove) with mild pointer acceleration — swipe to nudge, lift and
|
||||||
// two-finger tap = right click; two-finger drag = scroll; tap-then-press-and-drag = left-drag
|
// re-swipe to walk it across, tap to click where it is. This is what makes the cursor
|
||||||
// (text selection / moving windows); three-finger tap = toggle the stats HUD.
|
// reachable on a small screen.
|
||||||
|
// • direct (opt-out): the cursor jumps to the finger and follows it (MouseMoveAbs,
|
||||||
|
// host-normalized against the overlay size), the old "direct pointing" behaviour.
|
||||||
|
// Both share the same gesture vocabulary: tap = left click; two-finger tap = right click;
|
||||||
|
// two-finger drag = scroll; tap-then-press-and-drag = left-drag (text selection / moving
|
||||||
|
// windows); three-finger tap = toggle the stats HUD.
|
||||||
Box(
|
Box(
|
||||||
Modifier.fillMaxSize().pointerInput(handle) {
|
Modifier.fillMaxSize().pointerInput(handle, trackpad) {
|
||||||
var lastTapUp = 0L
|
var lastTapUp = 0L
|
||||||
var lastTapX = 0f
|
var lastTapX = 0f
|
||||||
var lastTapY = 0f
|
var lastTapY = 0f
|
||||||
@@ -176,7 +194,9 @@ fun StreamScreen(handle: Long, micEnabled: Boolean, onDisconnect: () -> Unit) {
|
|||||||
val isDrag = down.uptimeMillis - lastTapUp < TAP_DRAG_MS &&
|
val isDrag = down.uptimeMillis - lastTapUp < TAP_DRAG_MS &&
|
||||||
abs(startX - lastTapX) < TAP_SLOP && abs(startY - lastTapY) < TAP_SLOP
|
abs(startX - lastTapX) < TAP_SLOP && abs(startY - lastTapY) < TAP_SLOP
|
||||||
lastTapUp = 0L // consume the arming either way
|
lastTapUp = 0L // consume the arming either way
|
||||||
moveAbs(startX, startY) // cursor jumps to the finger immediately
|
// Direct mode jumps the cursor to the finger; trackpad mode leaves it put (the
|
||||||
|
// whole point — you nudge it with swipes instead).
|
||||||
|
if (!trackpad) moveAbs(startX, startY)
|
||||||
if (isDrag) NativeBridge.nativeSendPointerButton(handle, 1, true)
|
if (isDrag) NativeBridge.nativeSendPointerButton(handle, 1, true)
|
||||||
|
|
||||||
var moved = false
|
var moved = false
|
||||||
@@ -185,6 +205,14 @@ fun StreamScreen(handle: Long, micEnabled: Boolean, onDisconnect: () -> Unit) {
|
|||||||
var prevCx = startX
|
var prevCx = startX
|
||||||
var prevCy = startY
|
var prevCy = startY
|
||||||
var upTime = down.uptimeMillis
|
var upTime = down.uptimeMillis
|
||||||
|
// Trackpad relative-motion state: the tracked finger, its last position/time, and
|
||||||
|
// the sub-pixel remainder so a slow drag isn't lost to Int truncation.
|
||||||
|
var trackId = down.id
|
||||||
|
var prevX = startX
|
||||||
|
var prevY = startY
|
||||||
|
var prevT = down.uptimeMillis
|
||||||
|
var accX = 0f
|
||||||
|
var accY = 0f
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
val ev = awaitPointerEvent()
|
val ev = awaitPointerEvent()
|
||||||
@@ -217,15 +245,46 @@ fun StreamScreen(handle: Long, micEnabled: Boolean, onDisconnect: () -> Unit) {
|
|||||||
moved = true
|
moved = true
|
||||||
}
|
}
|
||||||
} else if (!scrolling) {
|
} else if (!scrolling) {
|
||||||
// One finger → the cursor follows it (skipped once a gesture turned into
|
// One finger (skipped once a gesture turned into a scroll, so dropping
|
||||||
// a scroll, so dropping back to one finger doesn't jerk the cursor).
|
// back to one finger doesn't jerk the cursor).
|
||||||
val p = pressed.firstOrNull { it.id == down.id } ?: pressed.first()
|
val p = pressed.firstOrNull { it.id == down.id } ?: pressed.first()
|
||||||
if (abs(p.position.x - startX) > TAP_SLOP ||
|
if (abs(p.position.x - startX) > TAP_SLOP ||
|
||||||
abs(p.position.y - startY) > TAP_SLOP
|
abs(p.position.y - startY) > TAP_SLOP
|
||||||
) {
|
) {
|
||||||
moved = true
|
moved = true
|
||||||
}
|
}
|
||||||
moveAbs(p.position.x, p.position.y)
|
if (trackpad) {
|
||||||
|
// Relative: move by the finger delta × (sensitivity × acceleration),
|
||||||
|
// carrying the sub-pixel remainder. Re-anchor (zero delta this frame)
|
||||||
|
// if the tracked finger changed, so lifting one of several fingers
|
||||||
|
// never jumps the cursor.
|
||||||
|
if (p.id != trackId) {
|
||||||
|
trackId = p.id
|
||||||
|
prevX = p.position.x
|
||||||
|
prevY = p.position.y
|
||||||
|
prevT = p.uptimeMillis
|
||||||
|
}
|
||||||
|
val dx = p.position.x - prevX
|
||||||
|
val dy = p.position.y - prevY
|
||||||
|
val dt = (p.uptimeMillis - prevT).coerceAtLeast(1L)
|
||||||
|
prevX = p.position.x
|
||||||
|
prevY = p.position.y
|
||||||
|
prevT = p.uptimeMillis
|
||||||
|
val speed = hypot(dx, dy) / dt // finger px per ms
|
||||||
|
val accel = (1f + ACCEL_GAIN * (speed - ACCEL_SPEED_FLOOR).coerceAtLeast(0f))
|
||||||
|
.coerceAtMost(ACCEL_MAX)
|
||||||
|
accX += dx * POINTER_SENS * accel
|
||||||
|
accY += dy * POINTER_SENS * accel
|
||||||
|
val outX = accX.toInt() // truncates toward zero → remainder kept w/ sign
|
||||||
|
val outY = accY.toInt()
|
||||||
|
if (outX != 0 || outY != 0) {
|
||||||
|
NativeBridge.nativeSendPointerMove(handle, outX, outY)
|
||||||
|
accX -= outX
|
||||||
|
accY -= outY
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
moveAbs(p.position.x, p.position.y) // direct: cursor follows the finger
|
||||||
|
}
|
||||||
}
|
}
|
||||||
ev.changes.forEach { it.consume() }
|
ev.changes.forEach { it.consume() }
|
||||||
}
|
}
|
||||||
@@ -239,7 +298,7 @@ fun StreamScreen(handle: Long, micEnabled: Boolean, onDisconnect: () -> Unit) {
|
|||||||
NativeBridge.nativeSendPointerButton(handle, 3, true)
|
NativeBridge.nativeSendPointerButton(handle, 3, true)
|
||||||
NativeBridge.nativeSendPointerButton(handle, 3, false)
|
NativeBridge.nativeSendPointerButton(handle, 3, false)
|
||||||
}
|
}
|
||||||
else -> { // tap → left click, and arm tap-and-drag
|
else -> { // tap → left click (at the cursor's current spot), arm tap-drag
|
||||||
NativeBridge.nativeSendPointerButton(handle, 1, true)
|
NativeBridge.nativeSendPointerButton(handle, 1, true)
|
||||||
NativeBridge.nativeSendPointerButton(handle, 1, false)
|
NativeBridge.nativeSendPointerButton(handle, 1, false)
|
||||||
lastTapUp = upTime
|
lastTapUp = upTime
|
||||||
|
|||||||
@@ -1,8 +1,17 @@
|
|||||||
//! Android audio playback (android-only): pull Opus packets from the connector, decode to
|
//! Android audio playback (android-only): pull Opus packets from the connector, decode to
|
||||||
//! interleaved f32 stereo, and feed AAudio (LowLatency) via its realtime data callback through a
|
//! interleaved f32 stereo, and feed AAudio (LowLatency) via its realtime data callback through a
|
||||||
//! jitter ring. Mirrors [`crate::decode`]: one thread we own (the Opus decode producer) plus a
|
//! jitter ring. Mirrors [`crate::decode`]: one thread we own (the Opus decode producer) plus a
|
||||||
//! shutdown flag; the realtime callback thread is owned by AAudio. Ring logic ported from
|
//! shutdown flag; the realtime callback thread is owned by AAudio.
|
||||||
//! `punktfunk-client-linux/src/audio.rs` (prime ~3 quanta, drop-oldest cap, re-prime on drain).
|
//!
|
||||||
|
//! The ring started as a port of `punktfunk-client-linux/src/audio.rs`, but AAudio — unlike
|
||||||
|
//! PipeWire, which adaptively rate-matches the stream and absorbs a shallow buffer — hands us a raw
|
||||||
|
//! realtime callback and makes us own the buffer. So this client diverges deliberately to stop the
|
||||||
|
//! Android-only crackle: (1) the callback is allocation/free-free — decoded buffers are recycled to
|
||||||
|
//! the producer via a free-list instead of being freed on the audio thread (Android's Scudo `free`
|
||||||
|
//! has unbounded tail latency); (2) the jitter ring is deeper (~40 ms prime / ~150 ms hard cap) and
|
||||||
|
//! decoupled from the tiny LowLatency burst size, with de-prime hysteresis so a transient drain
|
||||||
|
//! doesn't manufacture a silence; (3) the AAudio HW buffer is primed above its 2-burst default and
|
||||||
|
//! grown on XRuns (Google's anti-glitch technique).
|
||||||
|
|
||||||
use ndk::audio::{
|
use ndk::audio::{
|
||||||
AudioCallbackResult, AudioDirection, AudioFormat, AudioPerformanceMode, AudioSharingMode,
|
AudioCallbackResult, AudioDirection, AudioFormat, AudioPerformanceMode, AudioSharingMode,
|
||||||
@@ -13,7 +22,7 @@ use punktfunk_core::error::PunktfunkError;
|
|||||||
use std::collections::VecDeque;
|
use std::collections::VecDeque;
|
||||||
use std::ffi::c_void;
|
use std::ffi::c_void;
|
||||||
use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
|
use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
|
||||||
use std::sync::mpsc::{sync_channel, SyncSender, TrySendError};
|
use std::sync::mpsc::{sync_channel, Receiver, SyncSender, TrySendError};
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
|
|
||||||
@@ -24,6 +33,29 @@ const RING_CHUNKS: usize = 64;
|
|||||||
/// Opus decode scratch: worst-case 120 ms stereo frame (5760 samples/ch × 2 ch).
|
/// Opus decode scratch: worst-case 120 ms stereo frame (5760 samples/ch × 2 ch).
|
||||||
const PCM_SCRATCH: usize = 5760 * CHANNELS;
|
const PCM_SCRATCH: usize = 5760 * CHANNELS;
|
||||||
|
|
||||||
|
// --- Jitter-ring depths, in interleaved-f32 samples (all expressed in ms via `MS`). -----------
|
||||||
|
// Unlike the Linux client (PipeWire adaptively rate-matches the stream to the graph clock, masking
|
||||||
|
// host↔DAC drift + a shallow ring), AAudio hands us a raw callback and we own the buffer: drift and
|
||||||
|
// WiFi power-save bunching land as underruns/overflows = crackle. So Android runs a deliberately
|
||||||
|
// deeper, smoothly-managed ring than Linux — keep the two clients' depths intentionally divergent.
|
||||||
|
/// Interleaved f32 samples per millisecond (48 kHz × 2 ch).
|
||||||
|
const MS: usize = (SAMPLE_RATE as usize / 1000) * CHANNELS; // 96
|
||||||
|
/// Prime/target floor: fill to ~40 ms before playing (and after a sustained drain). Deep enough to
|
||||||
|
/// ride out WiFi arrival jitter + clock drift; the dominant Android-only anti-crackle lever.
|
||||||
|
const PRIME_FLOOR: usize = 40 * MS;
|
||||||
|
/// Ceiling for the burst-scaled target (so a large quantum can't push the prime depth too high).
|
||||||
|
const PRIME_CEIL: usize = 80 * MS;
|
||||||
|
/// Drop-oldest headroom above the target before trimming — a ~80 ms band swallows an arrival burst
|
||||||
|
/// without overflowing.
|
||||||
|
const JITTER_HEADROOM: usize = 80 * MS;
|
||||||
|
/// Hard latency bound: never let the ring exceed ~150 ms (the only thing that caps added latency).
|
||||||
|
const HARD_CAP: usize = 150 * MS;
|
||||||
|
/// Re-prime (go silent to refill) only after this many CONSECUTIVE empty callbacks, so one transient
|
||||||
|
/// drain doesn't manufacture a fresh 40 ms silence (the old `if ring.is_empty()` re-primed instantly).
|
||||||
|
const DEPRIME_AFTER_CALLBACKS: u32 = 5;
|
||||||
|
/// Throttle the AAudio XRun-driven HW-buffer grow check (cheap, but no need to poll every quantum).
|
||||||
|
const XRUN_CHECK_EVERY: u32 = 128;
|
||||||
|
|
||||||
/// Diagnostics — written by the decode thread + the realtime callback, logged periodically. The
|
/// Diagnostics — written by the decode thread + the realtime callback, logged periodically. The
|
||||||
/// audio analogue of the video `fed`/`rendered` counters (we can't "screenshot" sound).
|
/// audio analogue of the video `fed`/`rendered` counters (we can't "screenshot" sound).
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
@@ -47,22 +79,41 @@ impl AudioPlayback {
|
|||||||
pub fn start(client: Arc<NativeClient>) -> Option<AudioPlayback> {
|
pub fn start(client: Arc<NativeClient>) -> Option<AudioPlayback> {
|
||||||
let counters = Arc::new(Counters::default());
|
let counters = Arc::new(Counters::default());
|
||||||
let (tx, rx) = sync_channel::<Vec<f32>>(RING_CHUNKS);
|
let (tx, rx) = sync_channel::<Vec<f32>>(RING_CHUNKS);
|
||||||
|
// Recycle free-list: drained PCM buffers go BACK to the decode thread to be refilled, so the
|
||||||
|
// realtime callback never frees heap (Android's Scudo allocator has unbounded free() tail
|
||||||
|
// latency — a free on the audio thread is an XRun = a click) and the decode thread rarely
|
||||||
|
// allocates. Same depth as the data channel.
|
||||||
|
let (free_tx, free_rx) = sync_channel::<Vec<f32>>(RING_CHUNKS);
|
||||||
|
|
||||||
// Realtime consumer state, owned by the callback (FnMut) — no lock: AAudio calls it from a
|
// Realtime consumer state, owned by the callback (FnMut) — no lock: AAudio calls it from a
|
||||||
// single high-priority thread, and the decode thread only touches `tx`.
|
// single high-priority thread, and the decode thread only touches `tx`/`free_rx`.
|
||||||
let cb_counters = counters.clone();
|
let cb_counters = counters.clone();
|
||||||
let mut ring: VecDeque<f32> = VecDeque::with_capacity(PCM_SCRATCH);
|
// Pre-reserve the ring so `extend` never reallocates on the realtime thread. Worst transient
|
||||||
|
// before the trim below = the hard cap plus one full channel of 5 ms (480-f32) frames — the
|
||||||
|
// punktfunk protocol always sends 5 ms Opus frames (host `audio_thread`); a larger frame
|
||||||
|
// would force a one-time realloc, asserted (not silently corrupted) in `decode_loop`.
|
||||||
|
let mut ring: VecDeque<f32> = VecDeque::with_capacity(HARD_CAP + RING_CHUNKS * 5 * MS);
|
||||||
let mut primed = false;
|
let mut primed = false;
|
||||||
let callback = move |_s: &AudioStream, data: *mut c_void, num_frames: i32| {
|
let mut empties: u32 = 0; // consecutive empty callbacks (de-prime hysteresis)
|
||||||
|
let mut cb_count: u32 = 0; // callbacks since open (throttles the XRun grow check)
|
||||||
|
let mut last_xrun: i32 = 0; // last AAudio XRun count we grew the buffer for
|
||||||
|
let callback = move |s: &AudioStream, data: *mut c_void, num_frames: i32| {
|
||||||
let want = num_frames as usize * CHANNELS;
|
let want = num_frames as usize * CHANNELS;
|
||||||
// SAFETY: AAudio provides `num_frames * channel_count` F32 slots at `data`.
|
// SAFETY: AAudio provides `num_frames * channel_count` F32 slots at `data`.
|
||||||
let out = unsafe { std::slice::from_raw_parts_mut(data as *mut f32, want) };
|
let out = unsafe { std::slice::from_raw_parts_mut(data as *mut f32, want) };
|
||||||
while let Ok(chunk) = rx.try_recv() {
|
// Drain decoded chunks into the ring WITHOUT freeing on the RT thread: `drain(..)` empties
|
||||||
ring.extend(chunk);
|
// each Vec but keeps its capacity, then the empty buffer is handed back for reuse. The
|
||||||
|
// only RT-thread free is the rare case where the recycle channel is momentarily full.
|
||||||
|
while let Ok(mut chunk) = rx.try_recv() {
|
||||||
|
ring.extend(chunk.drain(..));
|
||||||
|
let _ = free_tx.try_send(chunk);
|
||||||
}
|
}
|
||||||
// Prime to ~3 quanta (15 ms; floor 15 ms / ceiling 200 ms); drop OLDEST above the cap.
|
// Jitter buffer: prime to ~40 ms (PRIME_FLOOR) before playing and after a sustained drain;
|
||||||
let target = (3 * want).clamp(720 * CHANNELS, 9600 * CHANNELS);
|
// drop-oldest only above a wide ~120 ms band. Decoupled from the AAudio burst `want` (tiny
|
||||||
while ring.len() > target.max(want) + want {
|
// on the LowLatency MMAP path) so the depth doesn't collapse to a single quantum.
|
||||||
|
let target = (3 * want).clamp(PRIME_FLOOR, PRIME_CEIL);
|
||||||
|
let hard_cap = (target + JITTER_HEADROOM).min(HARD_CAP);
|
||||||
|
while ring.len() > hard_cap {
|
||||||
ring.pop_front();
|
ring.pop_front();
|
||||||
}
|
}
|
||||||
if !primed && ring.len() >= target {
|
if !primed && ring.len() >= target {
|
||||||
@@ -79,12 +130,34 @@ impl AudioPlayback {
|
|||||||
out.fill(0.0);
|
out.fill(0.0);
|
||||||
cb_counters.underruns.fetch_add(1, Ordering::Relaxed);
|
cb_counters.underruns.fetch_add(1, Ordering::Relaxed);
|
||||||
}
|
}
|
||||||
|
// Re-prime only after a RUN of empty callbacks, not a single transient one — otherwise
|
||||||
|
// every momentary drain costs a fresh 40 ms silence (the old behaviour, self-inflicted
|
||||||
|
// crackle on any jitter spike).
|
||||||
if ring.is_empty() {
|
if ring.is_empty() {
|
||||||
primed = false; // re-prime after a genuine drain (avoids sustained crackle on loss)
|
empties += 1;
|
||||||
|
if empties >= DEPRIME_AFTER_CALLBACKS {
|
||||||
|
primed = false;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
empties = 0;
|
||||||
}
|
}
|
||||||
cb_counters
|
cb_counters
|
||||||
.ring_depth
|
.ring_depth
|
||||||
.store(ring.len() as u64, Ordering::Relaxed);
|
.store(ring.len() as u64, Ordering::Relaxed);
|
||||||
|
// Google's AAudio anti-glitch technique: when the device reports new XRuns, grow the HW
|
||||||
|
// buffer by one burst (up to capacity). getXRunCount + setBufferSizeInFrames are both
|
||||||
|
// callback-safe / non-blocking, and set clamps to capacity so it self-limits. Throttled.
|
||||||
|
cb_count = cb_count.wrapping_add(1);
|
||||||
|
if cb_count % XRUN_CHECK_EVERY == 0 {
|
||||||
|
let xr = s.x_run_count();
|
||||||
|
if xr > last_xrun {
|
||||||
|
last_xrun = xr;
|
||||||
|
let burst = s.frames_per_burst().max(1);
|
||||||
|
let grown =
|
||||||
|
(s.buffer_size_in_frames() + burst).min(s.buffer_capacity_in_frames());
|
||||||
|
let _ = s.set_buffer_size_in_frames(grown);
|
||||||
|
}
|
||||||
|
}
|
||||||
AudioCallbackResult::Continue
|
AudioCallbackResult::Continue
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -109,19 +182,31 @@ impl AudioPlayback {
|
|||||||
log::error!("audio: request_start: {e}");
|
log::error!("audio: request_start: {e}");
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
|
// Lift the AAudio HW buffer off its brittle ~2-burst LowLatency default so a single late
|
||||||
|
// callback doesn't immediately underrun; the in-callback XRun loop grows it further if the
|
||||||
|
// device still glitches. set_buffer_size_in_frames clamps to capacity.
|
||||||
|
let burst = stream.frames_per_burst().max(1);
|
||||||
|
let _ =
|
||||||
|
stream.set_buffer_size_in_frames((burst * 3).min(stream.buffer_capacity_in_frames()));
|
||||||
|
// perf != LowLatency or rate != 48000 means AAudio silently fell to a resampled legacy path
|
||||||
|
// (different burst behaviour) — surface it so the field can tell that apart from plain jitter.
|
||||||
log::info!(
|
log::info!(
|
||||||
"audio: AAudio started rate={} ch={} fmt={:?} burst={}",
|
"audio: AAudio started rate={} ch={} fmt={:?} perf={:?} share={:?} burst={} buf={}/{}",
|
||||||
stream.sample_rate(),
|
stream.sample_rate(),
|
||||||
stream.channel_count(),
|
stream.channel_count(),
|
||||||
stream.format(),
|
stream.format(),
|
||||||
|
stream.performance_mode(),
|
||||||
|
stream.sharing_mode(),
|
||||||
stream.frames_per_burst(),
|
stream.frames_per_burst(),
|
||||||
|
stream.buffer_size_in_frames(),
|
||||||
|
stream.buffer_capacity_in_frames(),
|
||||||
);
|
);
|
||||||
|
|
||||||
let shutdown = Arc::new(AtomicBool::new(false));
|
let shutdown = Arc::new(AtomicBool::new(false));
|
||||||
let sd = shutdown.clone();
|
let sd = shutdown.clone();
|
||||||
let join = std::thread::Builder::new()
|
let join = std::thread::Builder::new()
|
||||||
.name("pf-audio".into())
|
.name("pf-audio".into())
|
||||||
.spawn(move || decode_loop(client, tx, sd, counters))
|
.spawn(move || decode_loop(client, tx, free_rx, sd, counters))
|
||||||
.ok();
|
.ok();
|
||||||
|
|
||||||
Some(AudioPlayback {
|
Some(AudioPlayback {
|
||||||
@@ -143,9 +228,12 @@ impl Drop for AudioPlayback {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Producer: `next_audio` → Opus `decode_float` → push interleaved f32 into the ring channel.
|
/// Producer: `next_audio` → Opus `decode_float` → push interleaved f32 into the ring channel.
|
||||||
|
/// Buffers come from (and return to) the realtime callback's recycle free-list so the steady state
|
||||||
|
/// is allocation-free on both threads.
|
||||||
fn decode_loop(
|
fn decode_loop(
|
||||||
client: Arc<NativeClient>,
|
client: Arc<NativeClient>,
|
||||||
tx: SyncSender<Vec<f32>>,
|
tx: SyncSender<Vec<f32>>,
|
||||||
|
free_rx: Receiver<Vec<f32>>,
|
||||||
shutdown: Arc<AtomicBool>,
|
shutdown: Arc<AtomicBool>,
|
||||||
counters: Arc<Counters>,
|
counters: Arc<Counters>,
|
||||||
) {
|
) {
|
||||||
@@ -166,8 +254,22 @@ fn decode_loop(
|
|||||||
for &s in &pcm[..n] {
|
for &s in &pcm[..n] {
|
||||||
window_peak = window_peak.max(s.abs());
|
window_peak = window_peak.max(s.abs());
|
||||||
}
|
}
|
||||||
|
// The ring's pre-reservation in `start` assumes the protocol's 5 ms (≤480-f32)
|
||||||
|
// frames; a larger frame would force a one-time realloc on the RT thread. Catch a
|
||||||
|
// future host frame-size change here in debug, not as a silent audio glitch.
|
||||||
|
debug_assert!(
|
||||||
|
n <= 5 * MS,
|
||||||
|
"audio frame {n} f32 exceeds the 5 ms ring reserve"
|
||||||
|
);
|
||||||
let count = counters.opus_decoded.fetch_add(1, Ordering::Relaxed) + 1;
|
let count = counters.opus_decoded.fetch_add(1, Ordering::Relaxed) + 1;
|
||||||
match tx.try_send(pcm[..n].to_vec()) {
|
// Reuse a recycled buffer if the callback handed one back; only allocate when the
|
||||||
|
// free-list is momentarily empty (startup / after a backpressure drop).
|
||||||
|
let mut buf = free_rx
|
||||||
|
.try_recv()
|
||||||
|
.unwrap_or_else(|_| Vec::with_capacity(PCM_SCRATCH));
|
||||||
|
buf.clear();
|
||||||
|
buf.extend_from_slice(&pcm[..n]);
|
||||||
|
match tx.try_send(buf) {
|
||||||
Ok(()) | Err(TrySendError::Full(_)) => {} // drop-newest under backpressure
|
Ok(()) | Err(TrySendError::Full(_)) => {} // drop-newest under backpressure
|
||||||
Err(TrySendError::Disconnected(_)) => break,
|
Err(TrySendError::Disconnected(_)) => break,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -361,4 +361,4 @@ ever switched to a logged-in GUI session, re-adding macOS to the job's capture s
|
|||||||
- Mid-stream renegotiation (resolution change without reconnect) is designed-for but not
|
- Mid-stream renegotiation (resolution change without reconnect) is designed-for but not
|
||||||
implemented (the Welcome is one-shot today).
|
implemented (the Welcome is one-shot today).
|
||||||
- Host-side gamepad injection needs `/dev/uinput` access on the box (udev rule from
|
- Host-side gamepad injection needs `/dev/uinput` access on the box (udev rule from
|
||||||
`docs/linux-setup.md`).
|
`design/linux-setup.md`).
|
||||||
|
|||||||
@@ -276,7 +276,7 @@ pub mod frame {
|
|||||||
/// These were hand-duplicated as `OFF_*`/`SHM_*` constants in `inject/{gamepad,dualsense}_windows.rs`
|
/// These were hand-duplicated as `OFF_*`/`SHM_*` constants in `inject/{gamepad,dualsense}_windows.rs`
|
||||||
/// and (as bare literals — `*view.add(140)`) in the standalone `xusb-driver`/`dualsense-driver`
|
/// and (as bare literals — `*view.add(140)`) in the standalone `xusb-driver`/`dualsense-driver`
|
||||||
/// workspaces, guarded only by "must match" comments — the top ABI-drift hazard the audit flagged
|
/// workspaces, guarded only by "must match" comments — the top ABI-drift hazard the audit flagged
|
||||||
/// (`docs/windows-host-rewrite.md` §2.7). Owning them here with `Pod` derives + `offset_of!`
|
/// (`design/windows-host-rewrite.md` §2.7). Owning them here with `Pod` derives + `offset_of!`
|
||||||
/// asserts makes a one-sided edit a compile error.
|
/// asserts makes a one-sided edit a compile error.
|
||||||
///
|
///
|
||||||
/// The host creates the section (privileged, permissive DACL so the restricted WUDFHost token can
|
/// The host creates the section (privileged, permissive DACL so the restricted WUDFHost token can
|
||||||
|
|||||||
@@ -25,6 +25,14 @@ aes-gcm = "0.10"
|
|||||||
cbc = { version = "0.1", features = ["alloc"] }
|
cbc = { version = "0.1", features = ["alloc"] }
|
||||||
rand = "0.8"
|
rand = "0.8"
|
||||||
hex = "0.4"
|
hex = "0.4"
|
||||||
|
# Cover-art delivery in the game library: encode Lutris's local JPEGs into `data:` URLs and decode
|
||||||
|
# the Epic launcher's base64 `catcache.bin`. Cross-platform (Linux Lutris art + Windows Epic art).
|
||||||
|
base64 = "0.22"
|
||||||
|
# Blocking HTTP for the library cover-art warmer (no-auth GOG api.gog.com + Xbox displaycatalog),
|
||||||
|
# run on a background thread off the hot path. `ureq` is small + sync (no tokio here) and bundles
|
||||||
|
# webpki roots (no system cert dependency). Cross-platform so the fetch/parse code is compiled +
|
||||||
|
# checked everywhere even though only the Windows GOG/Xbox providers need it today.
|
||||||
|
ureq = "2"
|
||||||
rcgen = { version = "0.13", default-features = false, features = ["aws_lc_rs", "pem"] }
|
rcgen = { version = "0.13", default-features = false, features = ["aws_lc_rs", "pem"] }
|
||||||
x509-parser = "0.16"
|
x509-parser = "0.16"
|
||||||
axum-server = { version = "0.7", features = ["tls-rustls"] }
|
axum-server = { version = "0.7", features = ["tls-rustls"] }
|
||||||
@@ -85,6 +93,10 @@ wayland-scanner = "0.31"
|
|||||||
wayland-backend = "0.3"
|
wayland-backend = "0.3"
|
||||||
# Parse `pw-dump` JSON to find gamescope's PipeWire node (gamescope backend).
|
# Parse `pw-dump` JSON to find gamescope's PipeWire node (gamescope backend).
|
||||||
serde_json = "1"
|
serde_json = "1"
|
||||||
|
# Read the Lutris library DB (`pga.db`) for the Lutris store provider. `bundled` vendors + compiles
|
||||||
|
# SQLite (cc, already needed for ffmpeg/opus) so there's no system libsqlite3 runtime dependency —
|
||||||
|
# clean for the deb/rpm/flatpak packaging. Opened read-only/immutable (Lutris may hold it open).
|
||||||
|
rusqlite = { version = "0.40", features = ["bundled"] }
|
||||||
# Builds/validates the xkb keymap uploaded to the virtual keyboard + tracks modifier state.
|
# Builds/validates the xkb keymap uploaded to the virtual keyboard + tracks modifier state.
|
||||||
xkbcommon = "0.8"
|
xkbcommon = "0.8"
|
||||||
# The safe `opus` crate is stereo-only; surround (5.1/7.1) needs the libopus *multistream*
|
# The safe `opus` crate is stereo-only; surround (5.1/7.1) needs the libopus *multistream*
|
||||||
@@ -169,6 +181,12 @@ windows = { version = "0.62", features = [
|
|||||||
# handler / ServiceManager install). Wraps the Win32 service API; the supervision loop itself uses
|
# handler / ServiceManager install). Wraps the Win32 service API; the supervision loop itself uses
|
||||||
# the `windows` crate above.
|
# the `windows` crate above.
|
||||||
windows-service = "0.7"
|
windows-service = "0.7"
|
||||||
|
# Read the GOG.com install registry (HKLM\SOFTWARE\WOW6432Node\GOG.com\Games) for the GOG store
|
||||||
|
# provider — ergonomic + correct-by-construction vs. hand-rolled Reg* FFI for subkey enumeration.
|
||||||
|
winreg = "0.56"
|
||||||
|
# Parse each Xbox/Game-Pass game's MicrosoftGame.config (GDK manifest XML) for the Xbox store
|
||||||
|
# provider — a small read-only DOM is all we need (Identity/Executable/ShellVisuals/StoreId).
|
||||||
|
roxmltree = "0.21"
|
||||||
# Software H.264 encoder (GPU-less path + NVENC fallback). The default `source` feature statically
|
# Software H.264 encoder (GPU-less path + NVENC fallback). The default `source` feature statically
|
||||||
# compiles OpenH264 (BSD-2) — no system lib, builds on MSVC; nasm on PATH adds the SIMD fast path.
|
# compiles OpenH264 (BSD-2) — no system lib, builds on MSVC; nasm on PATH adds the SIMD fast path.
|
||||||
openh264 = "0.9"
|
openh264 = "0.9"
|
||||||
|
|||||||
@@ -0,0 +1,73 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<protocol name="fake_input">
|
||||||
|
<copyright>
|
||||||
|
SPDX-FileCopyrightText: 2015 Martin Gräßlin
|
||||||
|
SPDX-License-Identifier: LGPL-2.1-or-later
|
||||||
|
</copyright>
|
||||||
|
<interface name="org_kde_kwin_fake_input" version="4">
|
||||||
|
<description summary="Fake input manager">
|
||||||
|
This interface allows other processes to provide fake input events.
|
||||||
|
Purpose is on the one hand side to provide testing facilities like XTest
|
||||||
|
on X11, but also to support use cases like remote control (a remote
|
||||||
|
desktop server). The compositor gates the interface: it is only exposed
|
||||||
|
to clients authorized through their .desktop X-KDE-Wayland-Interfaces, so
|
||||||
|
binding it is the authorization — no per-event confirmation dialog.
|
||||||
|
</description>
|
||||||
|
<request name="authenticate">
|
||||||
|
<description summary="Information about the application requesting fake input">
|
||||||
|
A FakeInput is required to authenticate itself by providing the
|
||||||
|
application name and the reason for fake input. The compositor may use
|
||||||
|
this information to decide whether to allow or deny the request.
|
||||||
|
</description>
|
||||||
|
<arg name="application" type="string" summary="user visible name of the application requesting fake input"/>
|
||||||
|
<arg name="reason" type="string" summary="reason of why fake input is requested"/>
|
||||||
|
</request>
|
||||||
|
<request name="pointer_motion">
|
||||||
|
<description summary="pointer motion event"/>
|
||||||
|
<arg name="delta_x" type="fixed" summary="X delta of the relative pointer motion"/>
|
||||||
|
<arg name="delta_y" type="fixed" summary="Y delta of the relative pointer motion"/>
|
||||||
|
</request>
|
||||||
|
<request name="button">
|
||||||
|
<description summary="pointer button event"/>
|
||||||
|
<arg name="button" type="uint" summary="evdev button code"/>
|
||||||
|
<arg name="state" type="uint" summary="button state, 0 released, 1 pressed"/>
|
||||||
|
</request>
|
||||||
|
<request name="axis">
|
||||||
|
<description summary="pointer axis (scroll) event"/>
|
||||||
|
<arg name="axis" type="uint" summary="wl_pointer.axis (0 vertical, 1 horizontal)"/>
|
||||||
|
<arg name="value" type="fixed" summary="axis value"/>
|
||||||
|
</request>
|
||||||
|
<request name="touch_down" since="2">
|
||||||
|
<description summary="touch down event"/>
|
||||||
|
<arg name="id" type="uint" summary="unique id of this touch point; must not be reused until up"/>
|
||||||
|
<arg name="x" type="fixed" summary="x coordinate in global compositor space"/>
|
||||||
|
<arg name="y" type="fixed" summary="y coordinate in global compositor space"/>
|
||||||
|
</request>
|
||||||
|
<request name="touch_motion" since="2">
|
||||||
|
<description summary="touch motion event"/>
|
||||||
|
<arg name="id" type="uint" summary="unique id of an existing touch point"/>
|
||||||
|
<arg name="x" type="fixed" summary="x coordinate in global compositor space"/>
|
||||||
|
<arg name="y" type="fixed" summary="y coordinate in global compositor space"/>
|
||||||
|
</request>
|
||||||
|
<request name="touch_up" since="2">
|
||||||
|
<description summary="touch up event"/>
|
||||||
|
<arg name="id" type="uint" summary="unique id of an existing touch point"/>
|
||||||
|
</request>
|
||||||
|
<request name="touch_cancel" since="2">
|
||||||
|
<description summary="cancel all current touch points"/>
|
||||||
|
</request>
|
||||||
|
<request name="touch_frame" since="2">
|
||||||
|
<description summary="end a set of touch events (atomic frame)"/>
|
||||||
|
</request>
|
||||||
|
<request name="pointer_motion_absolute" since="3">
|
||||||
|
<description summary="absolute pointer motion event"/>
|
||||||
|
<arg name="x" type="fixed" summary="x coordinate in global compositor space"/>
|
||||||
|
<arg name="y" type="fixed" summary="y coordinate in global compositor space"/>
|
||||||
|
</request>
|
||||||
|
<request name="keyboard_key" since="4">
|
||||||
|
<description summary="keyboard key event"/>
|
||||||
|
<arg name="button" type="uint" summary="evdev key code"/>
|
||||||
|
<arg name="state" type="uint" summary="key state, 0 released, 1 pressed"/>
|
||||||
|
</request>
|
||||||
|
</interface>
|
||||||
|
</protocol>
|
||||||
@@ -320,11 +320,18 @@ fn mic_pw_thread(
|
|||||||
.into_inner();
|
.into_inner();
|
||||||
let mut params = [Pod::from_bytes(&values).context("mic pod from bytes")?];
|
let mut params = [Pod::from_bytes(&values).context("mic pod from bytes")?];
|
||||||
|
|
||||||
|
// RT_PROCESS: run the producer callback on PipeWire's realtime data loop, so the source is a
|
||||||
|
// *synchronous* graph node that joins its consumer's driver group and is actually driven. Without
|
||||||
|
// it the node is async/main-loop and, in the host's busy multi-stream graph (desktop-audio +
|
||||||
|
// video capture + the session), never acquires a driver — it stays suspended and its process()
|
||||||
|
// never fires, so every recorder hears pure silence (the long-standing "Linux host mic broken").
|
||||||
stream
|
stream
|
||||||
.connect(
|
.connect(
|
||||||
spa::utils::Direction::Output, // we PRODUCE samples (a source)
|
spa::utils::Direction::Output, // we PRODUCE samples (a source)
|
||||||
None,
|
None,
|
||||||
pw::stream::StreamFlags::AUTOCONNECT | pw::stream::StreamFlags::MAP_BUFFERS,
|
pw::stream::StreamFlags::AUTOCONNECT
|
||||||
|
| pw::stream::StreamFlags::MAP_BUFFERS
|
||||||
|
| pw::stream::StreamFlags::RT_PROCESS,
|
||||||
&mut params,
|
&mut params,
|
||||||
)
|
)
|
||||||
.context("pw mic stream connect")?;
|
.context("pw mic stream connect")?;
|
||||||
|
|||||||
@@ -106,7 +106,10 @@ fn capture_thread(
|
|||||||
}
|
}
|
||||||
let res = (|| -> Result<()> {
|
let res = (|| -> Result<()> {
|
||||||
// Loopback = capture the RENDER endpoint: get the default render device, but open a CAPTURE
|
// Loopback = capture the RENDER endpoint: get the default render device, but open a CAPTURE
|
||||||
// client with loopback=true over it.
|
// client with loopback=true over it. NOTE: the virtual mic (`super::wasapi_mic`) is guarded
|
||||||
|
// to NEVER target this same endpoint — otherwise the client's injected mic would be captured
|
||||||
|
// here and streamed back to the client (infinite echo). Keep that guard in sync if this
|
||||||
|
// device selection ever changes.
|
||||||
let device = DeviceEnumerator::new()
|
let device = DeviceEnumerator::new()
|
||||||
.context("DeviceEnumerator")?
|
.context("DeviceEnumerator")?
|
||||||
.get_default_device(&Direction::Render)
|
.get_default_device(&Direction::Render)
|
||||||
|
|||||||
@@ -5,14 +5,27 @@
|
|||||||
//!
|
//!
|
||||||
//! Target device, by friendly-name substring (first match wins; override with `PUNKTFUNK_MIC_DEVICE`):
|
//! Target device, by friendly-name substring (first match wins; override with `PUNKTFUNK_MIC_DEVICE`):
|
||||||
//! "Steam Streaming Microphone" (ships with Steam Remote Play — exactly this purpose), VB-Audio
|
//! "Steam Streaming Microphone" (ships with Steam Remote Play — exactly this purpose), VB-Audio
|
||||||
//! "CABLE Input", VoiceMeeter, or anything with "virtual" in the name. If none is present we return an
|
//! "CABLE Input", VoiceMeeter, or anything with "virtual" in the name. If none is present we
|
||||||
//! error with install guidance and the host runs without mic passthrough.
|
//! auto-install the Steam Streaming audio pair (see [`install_steam_audio_pair`]); failing that we
|
||||||
|
//! return an error with install guidance and the host runs without mic passthrough.
|
||||||
|
//!
|
||||||
|
//! **Anti-echo guard (the whole point of this being non-trivial).** The desktop-audio plane
|
||||||
|
//! ([`super::wasapi_cap`]) loopback-captures the **default render endpoint**. WASAPI loopback
|
||||||
|
//! captures the *mixed* output of an endpoint — i.e. everything any app renders to it, including
|
||||||
|
//! what THIS module writes. So if the virtual-mic target is the same device the loopback captures,
|
||||||
|
//! the client's uplinked mic is captured straight back into the host→client audio stream: an
|
||||||
|
//! infinite echo. [`find_device`] therefore **excludes the default render endpoint** from the
|
||||||
|
//! candidates — the mic is guaranteed to land on a different device. (Linux gets this for free: its
|
||||||
|
//! mic is a dedicated `Audio/Source` node, structurally separate from the monitored sink.)
|
||||||
//!
|
//!
|
||||||
//! `push` enqueues decoded interleaved-f32 PCM into a bounded ring (drop-oldest beyond ~80 ms so mic
|
//! `push` enqueues decoded interleaved-f32 PCM into a bounded ring (drop-oldest beyond ~80 ms so mic
|
||||||
//! latency stays bounded); a dedicated COM-apartment thread renders it event-driven, filling silence
|
//! latency stays bounded); a dedicated COM-apartment thread renders it event-driven, filling silence
|
||||||
//! when the client isn't talking. WASAPI objects are `!Send`, so they live entirely on that thread
|
//! when the client isn't talking. WASAPI objects are `!Send`, so they live entirely on that thread
|
||||||
//! (mirrors `WasapiLoopbackCapturer`).
|
//! (mirrors `WasapiLoopbackCapturer`).
|
||||||
|
|
||||||
|
// Every `unsafe` block in this file carries a `// SAFETY:` proof; enforce it.
|
||||||
|
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||||
|
|
||||||
use super::{VirtualMic, SAMPLE_RATE};
|
use super::{VirtualMic, SAMPLE_RATE};
|
||||||
use anyhow::{anyhow, Context, Result};
|
use anyhow::{anyhow, Context, Result};
|
||||||
use std::collections::VecDeque;
|
use std::collections::VecDeque;
|
||||||
@@ -110,8 +123,23 @@ impl VirtualMic for WasapiVirtualMic {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Resolve the virtual-mic target among render endpoints by friendly-name. Logs all candidates so a
|
/// The endpoint ID of the device the desktop-audio loopback records (the **default render
|
||||||
/// missing device is diagnosable.
|
/// endpoint**, see [`super::wasapi_cap`]). The virtual mic must never target this device — injecting
|
||||||
|
/// there echoes the client's mic back into the host→client audio stream. `None` if it can't be
|
||||||
|
/// resolved (then [`find_device`] can't prove a candidate is safe and falls back to name-only
|
||||||
|
/// matching — no worse than before the guard existed).
|
||||||
|
fn default_render_id() -> Option<String> {
|
||||||
|
wasapi::DeviceEnumerator::new()
|
||||||
|
.ok()?
|
||||||
|
.get_default_device(&Direction::Render)
|
||||||
|
.ok()?
|
||||||
|
.get_id()
|
||||||
|
.ok()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Resolve the virtual-mic target among render endpoints by friendly-name, **excluding the endpoint
|
||||||
|
/// the loopback captures** (the [`default_render_id`] anti-echo guard). Logs all candidates so a
|
||||||
|
/// missing/skipped device is diagnosable.
|
||||||
fn find_device() -> Result<wasapi::Device> {
|
fn find_device() -> Result<wasapi::Device> {
|
||||||
let enumerator = wasapi::DeviceEnumerator::new().context("DeviceEnumerator")?;
|
let enumerator = wasapi::DeviceEnumerator::new().context("DeviceEnumerator")?;
|
||||||
let collection = enumerator
|
let collection = enumerator
|
||||||
@@ -121,8 +149,11 @@ fn find_device() -> Result<wasapi::Device> {
|
|||||||
let want = std::env::var("PUNKTFUNK_MIC_DEVICE")
|
let want = std::env::var("PUNKTFUNK_MIC_DEVICE")
|
||||||
.ok()
|
.ok()
|
||||||
.map(|s| s.to_lowercase());
|
.map(|s| s.to_lowercase());
|
||||||
|
// The device the loopback captures — a name match on it is rejected below (would echo).
|
||||||
|
let loopback_id = default_render_id();
|
||||||
let mut names = Vec::new();
|
let mut names = Vec::new();
|
||||||
let mut found = None;
|
let mut found = None;
|
||||||
|
let mut skipped_loopback = false;
|
||||||
for i in 0..n {
|
for i in 0..n {
|
||||||
let Ok(dev) = collection.get_device_at_index(i) else {
|
let Ok(dev) = collection.get_device_at_index(i) else {
|
||||||
continue;
|
continue;
|
||||||
@@ -134,16 +165,37 @@ fn find_device() -> Result<wasapi::Device> {
|
|||||||
None => CANDIDATES.iter().any(|c| lname.contains(c)),
|
None => CANDIDATES.iter().any(|c| lname.contains(c)),
|
||||||
};
|
};
|
||||||
if hit && found.is_none() {
|
if hit && found.is_none() {
|
||||||
found = Some(dev);
|
// Anti-echo guard: never inject into the endpoint the loopback captures.
|
||||||
|
let is_loopback = match (dev.get_id().ok(), loopback_id.as_deref()) {
|
||||||
|
(Some(id), Some(lb)) => id == lb,
|
||||||
|
_ => false,
|
||||||
|
};
|
||||||
|
if is_loopback {
|
||||||
|
skipped_loopback = true;
|
||||||
|
tracing::warn!(device = %name,
|
||||||
|
"virtual-mic candidate is the loopback (default render) endpoint — skipping; \
|
||||||
|
injecting there would echo the client's mic into the desktop-audio stream");
|
||||||
|
} else {
|
||||||
|
found = Some(dev);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
names.push(name);
|
names.push(name);
|
||||||
}
|
}
|
||||||
found.ok_or_else(|| {
|
found.ok_or_else(|| {
|
||||||
anyhow!(
|
if skipped_loopback {
|
||||||
"no virtual-mic device among render endpoints {names:?}. Install VB-Audio Virtual Cable \
|
anyhow!(
|
||||||
or enable Steam Remote Play's microphone (Steam Streaming Microphone), or set \
|
"the only virtual-mic candidate among render endpoints {names:?} is the default \
|
||||||
PUNKTFUNK_MIC_DEVICE=<friendly-name substring>."
|
playback device the host loopback-captures — injecting there would echo the mic \
|
||||||
)
|
back to the client. Add a SEPARATE virtual audio device for the mic (e.g. the Steam \
|
||||||
|
Streaming Microphone) or set a different default playback device, then reconnect."
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
anyhow!(
|
||||||
|
"no virtual-mic device among render endpoints {names:?}. Install VB-Audio Virtual \
|
||||||
|
Cable or enable Steam Remote Play's microphone (Steam Streaming Microphone), or set \
|
||||||
|
PUNKTFUNK_MIC_DEVICE=<friendly-name substring>."
|
||||||
|
)
|
||||||
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -153,8 +205,15 @@ fn find_or_install_device() -> Result<wasapi::Device> {
|
|||||||
match find_device() {
|
match find_device() {
|
||||||
Ok(d) => Ok(d),
|
Ok(d) => Ok(d),
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
tracing::info!("no virtual mic device present — attempting auto-install");
|
tracing::info!("no usable virtual mic device present — attempting auto-install");
|
||||||
if unsafe { try_install_virtual_mic() } {
|
// SAFETY: `install_steam_audio_pair` is `unsafe` only because it `LoadLibraryExW`s
|
||||||
|
// `newdev.dll` and calls `DiInstallDriverW` through a `transmute`d function pointer;
|
||||||
|
// calling it imposes no extra precondition here (it takes no args and aliases nothing).
|
||||||
|
// Its internal contract holds: the `DiInstall` type matches the documented
|
||||||
|
// `BOOL DiInstallDriverW(HWND, PCWSTR, DWORD, PBOOL)` ABI, and it passes a
|
||||||
|
// NUL-terminated UTF-16 INF path with null/zero optional args. Invoked once on the
|
||||||
|
// dedicated mic thread.
|
||||||
|
if unsafe { install_steam_audio_pair() } {
|
||||||
find_device()
|
find_device()
|
||||||
} else {
|
} else {
|
||||||
Err(e)
|
Err(e)
|
||||||
@@ -163,13 +222,26 @@ fn find_or_install_device() -> Result<wasapi::Device> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Best-effort: install a virtual mic device so one exists without the user installing anything.
|
/// Best-effort: install BOTH Steam Streaming audio devices (the "Steam pair") so mic passthrough
|
||||||
/// Mirrors Apollo's Steam Streaming Speakers install — Steam Remote Play ships
|
/// works out of the box and the host has a desktop-audio sink distinct from the mic. Steam Remote
|
||||||
/// `SteamStreamingMicrophone.inf` next to the speakers INF, so install it via `DiInstallDriverW`
|
/// Play ships `SteamStreamingMicrophone.inf` + `SteamStreamingSpeakers.inf`: the microphone gives the
|
||||||
/// (loaded from `newdev.dll`, like Apollo, to avoid an extra windows-crate feature). Needs admin (the
|
/// virtual mic a target whose **capture** endpoint apps record from, and the speakers give a
|
||||||
/// host runs as SYSTEM). Returns true on success; false (no-op) if Steam isn't installed (INF absent),
|
/// **render** endpoint a headless box can loopback-capture that is NOT the mic — so the loopback and
|
||||||
/// the install is denied, or `PUNKTFUNK_NO_MIC_INSTALL` is set.
|
/// the mic land on different devices and never echo (see [`find_device`]). Returns true if either
|
||||||
unsafe fn try_install_virtual_mic() -> bool {
|
/// installed. No-op when Steam isn't installed (INFs absent), the install is denied (needs admin —
|
||||||
|
/// the host runs as SYSTEM), or `PUNKTFUNK_NO_MIC_INSTALL` is set.
|
||||||
|
unsafe fn install_steam_audio_pair() -> bool {
|
||||||
|
// Microphone first (the mic's actual target); speakers second (the distinct desktop-audio sink).
|
||||||
|
let mic = try_install_steam_audio("SteamStreamingMicrophone.inf");
|
||||||
|
let spk = try_install_steam_audio("SteamStreamingSpeakers.inf");
|
||||||
|
mic || spk
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Install one Steam Streaming driver INF by filename via `DiInstallDriverW` (loaded from
|
||||||
|
/// `newdev.dll`, like Apollo, to avoid an extra windows-crate feature). See
|
||||||
|
/// [`install_steam_audio_pair`] for the contract; `inf_name` is a bare filename under Steam's
|
||||||
|
/// per-arch `drivers\Windows10\{arch}\` directory.
|
||||||
|
unsafe fn try_install_steam_audio(inf_name: &str) -> bool {
|
||||||
use windows::core::{s, w, PCWSTR};
|
use windows::core::{s, w, PCWSTR};
|
||||||
use windows::Win32::Foundation::HWND;
|
use windows::Win32::Foundation::HWND;
|
||||||
use windows::Win32::System::Environment::ExpandEnvironmentStringsW;
|
use windows::Win32::System::Environment::ExpandEnvironmentStringsW;
|
||||||
@@ -187,12 +259,11 @@ unsafe fn try_install_virtual_mic() -> bool {
|
|||||||
let subdir = "arm64";
|
let subdir = "arm64";
|
||||||
#[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
|
#[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
|
||||||
let subdir = "x86";
|
let subdir = "x86";
|
||||||
let template: Vec<u16> = format!(
|
let template: Vec<u16> =
|
||||||
"%CommonProgramFiles(x86)%\\Steam\\drivers\\Windows10\\{subdir}\\SteamStreamingMicrophone.inf"
|
format!("%CommonProgramFiles(x86)%\\Steam\\drivers\\Windows10\\{subdir}\\{inf_name}")
|
||||||
)
|
.encode_utf16()
|
||||||
.encode_utf16()
|
.chain(std::iter::once(0))
|
||||||
.chain(std::iter::once(0))
|
.collect();
|
||||||
.collect();
|
|
||||||
let mut path = vec![0u16; 1024];
|
let mut path = vec![0u16; 1024];
|
||||||
let n = ExpandEnvironmentStringsW(PCWSTR(template.as_ptr()), Some(path.as_mut_slice()));
|
let n = ExpandEnvironmentStringsW(PCWSTR(template.as_ptr()), Some(path.as_mut_slice()));
|
||||||
if n == 0 || n as usize > path.len() {
|
if n == 0 || n as usize > path.len() {
|
||||||
@@ -200,7 +271,7 @@ unsafe fn try_install_virtual_mic() -> bool {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let Ok(newdev) = LoadLibraryExW(w!("newdev.dll"), None, LOAD_LIBRARY_SEARCH_SYSTEM32) else {
|
let Ok(newdev) = LoadLibraryExW(w!("newdev.dll"), None, LOAD_LIBRARY_SEARCH_SYSTEM32) else {
|
||||||
tracing::warn!("could not load newdev.dll — virtual-mic auto-install unavailable");
|
tracing::warn!("could not load newdev.dll — Steam-audio auto-install unavailable");
|
||||||
return false;
|
return false;
|
||||||
};
|
};
|
||||||
let Some(addr) = GetProcAddress(newdev, s!("DiInstallDriverW")) else {
|
let Some(addr) = GetProcAddress(newdev, s!("DiInstallDriverW")) else {
|
||||||
@@ -216,13 +287,17 @@ unsafe fn try_install_virtual_mic() -> bool {
|
|||||||
std::ptr::null_mut(),
|
std::ptr::null_mut(),
|
||||||
) != 0;
|
) != 0;
|
||||||
if ok {
|
if ok {
|
||||||
tracing::info!("installed the Steam Streaming Microphone virtual device");
|
tracing::info!(
|
||||||
|
inf = inf_name,
|
||||||
|
"installed a Steam Streaming virtual audio device"
|
||||||
|
);
|
||||||
std::thread::sleep(Duration::from_secs(5)); // let the audio subsystem register the endpoint
|
std::thread::sleep(Duration::from_secs(5)); // let the audio subsystem register the endpoint
|
||||||
} else {
|
} else {
|
||||||
let err = windows::Win32::Foundation::GetLastError();
|
let err = windows::Win32::Foundation::GetLastError();
|
||||||
tracing::info!(
|
tracing::info!(
|
||||||
|
inf = inf_name,
|
||||||
?err,
|
?err,
|
||||||
"no virtual mic auto-installed (Steam absent / not admin) — see manual-install guidance"
|
"Steam-audio device not auto-installed (Steam absent / not admin) — see install guidance"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
ok
|
ok
|
||||||
|
|||||||
@@ -2,6 +2,10 @@
|
|||||||
//! CPU-copy fallback (the portal delivers a CPU buffer; the encoder uploads it to the GPU
|
//! CPU-copy fallback (the portal delivers a CPU buffer; the encoder uploads it to the GPU
|
||||||
//! internally). Zero-copy dmabuf→NVENC import is deferred (plan §9 risk).
|
//! internally). Zero-copy dmabuf→NVENC import is deferred (plan §9 risk).
|
||||||
|
|
||||||
|
// Every unsafe block in this module tree carries a `// SAFETY:` proof; enforce it (unsafe-proof
|
||||||
|
// program). As a parent module this also covers the child modules (capture::windows/linux::*).
|
||||||
|
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||||
|
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
|
|
||||||
/// Packed pixel layout of a [`CapturedFrame`]. The ScreenCast portal negotiates the
|
/// Packed pixel layout of a [`CapturedFrame`]. The ScreenCast portal negotiates the
|
||||||
@@ -433,6 +437,11 @@ pub fn capture_virtual_output(
|
|||||||
// DDA is the safety net (+ the secure-desktop path). The encode thread is set MTA so the WGC
|
// DDA is the safety net (+ the secure-desktop path). The encode thread is set MTA so the WGC
|
||||||
// objects built on the watchdog thread (also MTA) are usable here; the keepalive is handed to WGC
|
// objects built on the watchdog thread (also MTA) are usable here; the keepalive is handed to WGC
|
||||||
// only on success, else to DDA. A hung watchdog thread is abandoned (holds no keepalive).
|
// only on success, else to DDA. A hung watchdog thread is abandoned (holds no keepalive).
|
||||||
|
// SAFETY: `RoInitialize` is a combase FFI call that initializes the WinRT apartment for the calling
|
||||||
|
// thread. It takes the `RO_INIT_MULTITHREADED` enum by value and borrows no memory, so there is no
|
||||||
|
// pointer/lifetime/aliasing obligation; it is safe on any thread and idempotent — a second call on a
|
||||||
|
// thread already in a compatible apartment returns S_FALSE / RPC_E_CHANGED_MODE, which we discard.
|
||||||
|
// Runs on the encode thread that goes on to use the WGC (WinRT) objects built by the watchdog thread.
|
||||||
unsafe {
|
unsafe {
|
||||||
let _ = windows::Win32::System::WinRT::RoInitialize(
|
let _ = windows::Win32::System::WinRT::RoInitialize(
|
||||||
windows::Win32::System::WinRT::RO_INIT_MULTITHREADED,
|
windows::Win32::System::WinRT::RO_INIT_MULTITHREADED,
|
||||||
|
|||||||
@@ -17,6 +17,9 @@
|
|||||||
//! instead of leaking it to process exit. The portal thread (when used) still parks on its zbus
|
//! instead of leaking it to process exit. The portal thread (when used) still parks on its zbus
|
||||||
//! connection until process exit.
|
//! connection until process exit.
|
||||||
|
|
||||||
|
// Every `unsafe` block in this file carries a `// SAFETY:` proof; enforce it (unsafe-proof program).
|
||||||
|
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||||
|
|
||||||
use super::{CapturedFrame, Capturer, DmabufFrame, FramePayload, PixelFormat};
|
use super::{CapturedFrame, Capturer, DmabufFrame, FramePayload, PixelFormat};
|
||||||
use anyhow::{anyhow, Context, Result};
|
use anyhow::{anyhow, Context, Result};
|
||||||
use std::os::fd::OwnedFd;
|
use std::os::fd::OwnedFd;
|
||||||
@@ -37,6 +40,13 @@ pub struct PortalCapturer {
|
|||||||
/// branch to tell "format never negotiated" (modifier/format mismatch) apart from "negotiated
|
/// branch to tell "format never negotiated" (modifier/format mismatch) apart from "negotiated
|
||||||
/// but no buffers arrived" (compositor idle/unmapped) — the two black-screen root causes.
|
/// but no buffers arrived" (compositor idle/unmapped) — the two black-screen root causes.
|
||||||
negotiated: Arc<AtomicBool>,
|
negotiated: Arc<AtomicBool>,
|
||||||
|
/// True only while the PipeWire stream is `Streaming`. [`try_latest`](Self::try_latest) reads it
|
||||||
|
/// to distinguish a static desktop (alive, no new buffers) from a dead source (left `Streaming`).
|
||||||
|
streaming: Arc<AtomicBool>,
|
||||||
|
/// When the stream first dropped out of `Streaming` with no new frame; used to grace a transient
|
||||||
|
/// renegotiation before declaring the source lost. Cleared whenever a frame arrives or the stream
|
||||||
|
/// is `Streaming`.
|
||||||
|
stall_since: Option<std::time::Instant>,
|
||||||
/// The PipeWire node this capturer consumes — surfaced in error messages for diagnosis.
|
/// The PipeWire node this capturer consumes — surfaced in error messages for diagnosis.
|
||||||
node_id: u32,
|
node_id: u32,
|
||||||
/// Stops the PipeWire loop on teardown (sent in `Drop`). Without it a dropped or failed
|
/// Stops the PipeWire loop on teardown (sent in `Drop`). Without it a dropped or failed
|
||||||
@@ -106,6 +116,7 @@ struct PwHandles {
|
|||||||
frames: Receiver<CapturedFrame>,
|
frames: Receiver<CapturedFrame>,
|
||||||
active: Arc<AtomicBool>,
|
active: Arc<AtomicBool>,
|
||||||
negotiated: Arc<AtomicBool>,
|
negotiated: Arc<AtomicBool>,
|
||||||
|
streaming: Arc<AtomicBool>,
|
||||||
quit: ::pipewire::channel::Sender<()>,
|
quit: ::pipewire::channel::Sender<()>,
|
||||||
join: thread::JoinHandle<()>,
|
join: thread::JoinHandle<()>,
|
||||||
}
|
}
|
||||||
@@ -118,6 +129,8 @@ impl PwHandles {
|
|||||||
frames: self.frames,
|
frames: self.frames,
|
||||||
active: self.active,
|
active: self.active,
|
||||||
negotiated: self.negotiated,
|
negotiated: self.negotiated,
|
||||||
|
streaming: self.streaming,
|
||||||
|
stall_since: None,
|
||||||
node_id,
|
node_id,
|
||||||
quit: Some(self.quit),
|
quit: Some(self.quit),
|
||||||
join: Some(self.join),
|
join: Some(self.join),
|
||||||
@@ -140,6 +153,8 @@ fn spawn_pipewire(
|
|||||||
let active_cb = active.clone();
|
let active_cb = active.clone();
|
||||||
let negotiated = Arc::new(AtomicBool::new(false));
|
let negotiated = Arc::new(AtomicBool::new(false));
|
||||||
let negotiated_cb = negotiated.clone();
|
let negotiated_cb = negotiated.clone();
|
||||||
|
let streaming = Arc::new(AtomicBool::new(false));
|
||||||
|
let streaming_cb = streaming.clone();
|
||||||
// pipewire's own cross-thread channel: the receiver attaches to the loop and quits it; the
|
// pipewire's own cross-thread channel: the receiver attaches to the loop and quits it; the
|
||||||
// sender lives on the capturer and fires in its `Drop`. Absolute `::pipewire` path — the
|
// sender lives on the capturer and fires in its `Drop`. Absolute `::pipewire` path — the
|
||||||
// inner `mod pipewire` shadows the crate name at this scope.
|
// inner `mod pipewire` shadows the crate name at this scope.
|
||||||
@@ -154,6 +169,7 @@ fn spawn_pipewire(
|
|||||||
frame_tx,
|
frame_tx,
|
||||||
active_cb,
|
active_cb,
|
||||||
negotiated_cb,
|
negotiated_cb,
|
||||||
|
streaming_cb,
|
||||||
zerocopy,
|
zerocopy,
|
||||||
preferred,
|
preferred,
|
||||||
quit_rx,
|
quit_rx,
|
||||||
@@ -166,6 +182,7 @@ fn spawn_pipewire(
|
|||||||
frames: frame_rx,
|
frames: frame_rx,
|
||||||
active,
|
active,
|
||||||
negotiated,
|
negotiated,
|
||||||
|
streaming,
|
||||||
quit: quit_tx,
|
quit: quit_tx,
|
||||||
join,
|
join,
|
||||||
})
|
})
|
||||||
@@ -216,6 +233,28 @@ impl Capturer for PortalCapturer {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if latest.is_some() || self.streaming.load(Ordering::Relaxed) {
|
||||||
|
// A frame arrived, or the source is alive but idle (static desktop) — normal. Clear any
|
||||||
|
// stall and repeat the last frame on `None`, exactly as before.
|
||||||
|
self.stall_since = None;
|
||||||
|
return Ok(latest);
|
||||||
|
}
|
||||||
|
// No new frame AND the stream has left `Streaming` (Paused/Unconnected/Error). The source
|
||||||
|
// went away — a compositor torn down on a Gaming↔Desktop switch, a removed virtual output.
|
||||||
|
// Grace a brief window (a transient mid-stream renegotiation can blip out of Streaming and
|
||||||
|
// back) before declaring it lost so the encode loop rebuilds in place rather than freezing
|
||||||
|
// on the last frame forever.
|
||||||
|
const STALL_GRACE: Duration = Duration::from_millis(1500);
|
||||||
|
let since = *self.stall_since.get_or_insert_with(std::time::Instant::now);
|
||||||
|
if since.elapsed() >= STALL_GRACE {
|
||||||
|
self.stall_since = None;
|
||||||
|
return Err(anyhow!(
|
||||||
|
"PipeWire source stalled (node {}): stream left Streaming for >{}ms with no frames \
|
||||||
|
— the compositor/virtual output went away (session switch?)",
|
||||||
|
self.node_id,
|
||||||
|
STALL_GRACE.as_millis()
|
||||||
|
));
|
||||||
|
}
|
||||||
Ok(latest)
|
Ok(latest)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -464,6 +503,10 @@ mod pipewire {
|
|||||||
/// Set once a video format is agreed (`param_changed`), so a first-frame timeout can tell
|
/// Set once a video format is agreed (`param_changed`), so a first-frame timeout can tell
|
||||||
/// "format never negotiated" apart from "negotiated but no buffers arrived".
|
/// "format never negotiated" apart from "negotiated but no buffers arrived".
|
||||||
negotiated: Arc<AtomicBool>,
|
negotiated: Arc<AtomicBool>,
|
||||||
|
/// True only while the PipeWire stream is in `Streaming` (the source is alive). Goes false on
|
||||||
|
/// `Paused`/`Unconnected`/`Error` — the source vanished (compositor torn down on a session
|
||||||
|
/// switch). Read by [`PortalCapturer::try_latest`] to surface a sustained drop as a loss.
|
||||||
|
streaming: Arc<AtomicBool>,
|
||||||
/// Present when zero-copy is enabled on NVIDIA: imports a dmabuf → CUDA device buffer.
|
/// Present when zero-copy is enabled on NVIDIA: imports a dmabuf → CUDA device buffer.
|
||||||
importer: Option<crate::zerocopy::EglImporter>,
|
importer: Option<crate::zerocopy::EglImporter>,
|
||||||
/// VAAPI zero-copy: hand the raw dmabuf to the encoder (which imports + GPU-CSCs it) instead
|
/// VAAPI zero-copy: hand the raw dmabuf to the encoder (which imports + GPU-CSCs it) instead
|
||||||
@@ -498,6 +541,12 @@ mod pipewire {
|
|||||||
|
|
||||||
impl DmabufMap {
|
impl DmabufMap {
|
||||||
fn new(fd: i32, len: usize) -> Option<DmabufMap> {
|
fn new(fd: i32, len: usize) -> Option<DmabufMap> {
|
||||||
|
// SAFETY: a null `addr` lets the kernel choose the mapping address; `fd` is a caller-owned
|
||||||
|
// dmabuf/MemFd fd, valid for the duration of this call, and `len` is the requested map length.
|
||||||
|
// `mmap` reads no Rust memory — it installs a fresh PROT_READ/MAP_SHARED page mapping and
|
||||||
|
// returns its base (or MAP_FAILED, checked below before `DmabufMap` adopts it). The returned
|
||||||
|
// region is a brand-new VMA, so it aliases no live Rust object, and it keeps the underlying
|
||||||
|
// object mapped independently of `fd` (which may be closed after this returns).
|
||||||
let ptr = unsafe {
|
let ptr = unsafe {
|
||||||
libc::mmap(
|
libc::mmap(
|
||||||
std::ptr::null_mut(),
|
std::ptr::null_mut(),
|
||||||
@@ -514,6 +563,11 @@ mod pipewire {
|
|||||||
|
|
||||||
impl Drop for DmabufMap {
|
impl Drop for DmabufMap {
|
||||||
fn drop(&mut self) {
|
fn drop(&mut self) {
|
||||||
|
// SAFETY: `self.ptr`/`self.len` are exactly the base+length of a successful `mmap` in
|
||||||
|
// `DmabufMap::new` (constructed only when `ptr != MAP_FAILED`). This `DmabufMap` uniquely owns
|
||||||
|
// that mapping and `drop` runs once, so `munmap` releases a live mapping exactly once — no
|
||||||
|
// double-unmap. Every `&[u8]` derived from the mapping is bounded by this `DmabufMap`'s
|
||||||
|
// lifetime, so no borrow outlives the unmap.
|
||||||
unsafe {
|
unsafe {
|
||||||
libc::munmap(self.ptr, self.len);
|
libc::munmap(self.ptr, self.len);
|
||||||
}
|
}
|
||||||
@@ -719,6 +773,14 @@ mod pipewire {
|
|||||||
if !ud.active.load(Ordering::Relaxed) {
|
if !ud.active.load(Ordering::Relaxed) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
// SAFETY: `spa_buf` is the `*mut spa_buffer` of the PipeWire buffer we dequeued and still hold for
|
||||||
|
// this `.process` callback (not requeued until after `consume_frame` returns), so it is live. The
|
||||||
|
// block null-checks `spa_buf`, requires `n_datas != 0`, and null-checks the `datas` array pointer
|
||||||
|
// before forming any slice. `(*spa_buf).datas` points to `n_datas` libspa `spa_data` structs, and
|
||||||
|
// `pw::spa::buffer::Data` is `#[repr(transparent)]` over `spa_data` (the same cast
|
||||||
|
// `Buffer::datas_mut` performs — see the function doc), so the pointer cast + length describe
|
||||||
|
// exactly that array, in bounds. The PipeWire loop is single-threaded and owns the buffer here, so
|
||||||
|
// this `&mut` slice is the only reference to it (no aliasing/data race).
|
||||||
let datas: &mut [pw::spa::buffer::Data] = unsafe {
|
let datas: &mut [pw::spa::buffer::Data] = unsafe {
|
||||||
if spa_buf.is_null() || (*spa_buf).n_datas == 0 || (*spa_buf).datas.is_null() {
|
if spa_buf.is_null() || (*spa_buf).n_datas == 0 || (*spa_buf).datas.is_null() {
|
||||||
&mut []
|
&mut []
|
||||||
@@ -783,6 +845,10 @@ mod pipewire {
|
|||||||
// dup the fd so it survives the SPA buffer recycle — the encode thread
|
// dup the fd so it survives the SPA buffer recycle — the encode thread
|
||||||
// imports it. (Content stability across the brief map+CSC window relies on
|
// imports it. (Content stability across the brief map+CSC window relies on
|
||||||
// the compositor's buffer-pool depth, like any zero-copy capture.)
|
// the compositor's buffer-pool depth, like any zero-copy capture.)
|
||||||
|
// SAFETY: `datas[0].fd()` is the dmabuf fd owned by the live PipeWire buffer (valid
|
||||||
|
// for this callback). `fcntl(fd, F_DUPFD_CLOEXEC, 0)` reads only the integer fd,
|
||||||
|
// touches no Rust memory, and returns a fresh independent CLOEXEC duplicate (or -1).
|
||||||
|
// The original stays owned by PipeWire; the dup is a new fd we own (checked >= 0).
|
||||||
let dup =
|
let dup =
|
||||||
unsafe { libc::fcntl(datas[0].fd() as i32, libc::F_DUPFD_CLOEXEC, 0) };
|
unsafe { libc::fcntl(datas[0].fd() as i32, libc::F_DUPFD_CLOEXEC, 0) };
|
||||||
if dup >= 0 {
|
if dup >= 0 {
|
||||||
@@ -796,6 +862,10 @@ mod pipewire {
|
|||||||
pts_ns,
|
pts_ns,
|
||||||
format: fmt,
|
format: fmt,
|
||||||
payload: FramePayload::Dmabuf(DmabufFrame {
|
payload: FramePayload::Dmabuf(DmabufFrame {
|
||||||
|
// SAFETY: `dup` is the fresh fd `fcntl(F_DUPFD_CLOEXEC)` just returned
|
||||||
|
// (checked `dup >= 0`); nothing else owns it, so `OwnedFd` takes sole
|
||||||
|
// ownership and closes it exactly once on drop — no alias, no
|
||||||
|
// double-close.
|
||||||
fd: unsafe { OwnedFd::from_raw_fd(dup) },
|
fd: unsafe { OwnedFd::from_raw_fd(dup) },
|
||||||
fourcc,
|
fourcc,
|
||||||
modifier: ud.modifier,
|
modifier: ud.modifier,
|
||||||
@@ -930,6 +1000,11 @@ mod pipewire {
|
|||||||
// cleanly if the real buffer is genuinely too small. MemPtr buffers (no fd) are same-process —
|
// cleanly if the real buffer is genuinely too small. MemPtr buffers (no fd) are same-process —
|
||||||
// trust `d.data()`.
|
// trust `d.data()`.
|
||||||
let fd_len = if raw_fd > 0 {
|
let fd_len = if raw_fd > 0 {
|
||||||
|
// SAFETY: `libc::stat` is a C plain-old-data struct for which all-zero is a valid value, so
|
||||||
|
// `mem::zeroed()` is a sound initializer. `raw_fd` is the buffer's fd (`> 0` checked here) and
|
||||||
|
// valid for this callback; `fstat` writes metadata into `&mut st`, a live, aligned,
|
||||||
|
// correctly-sized stack `stat` that outlives the synchronous call. `st.st_size` is read only
|
||||||
|
// after the return value is confirmed `== 0`. `st` is a fresh local, so nothing aliases it.
|
||||||
unsafe {
|
unsafe {
|
||||||
let mut st: libc::stat = std::mem::zeroed();
|
let mut st: libc::stat = std::mem::zeroed();
|
||||||
(libc::fstat(raw_fd as i32, &mut st) == 0 && st.st_size > 0)
|
(libc::fstat(raw_fd as i32, &mut st) == 0 && st.st_size > 0)
|
||||||
@@ -946,6 +1021,14 @@ mod pipewire {
|
|||||||
match DmabufMap::new(raw_fd as i32, map_len) {
|
match DmabufMap::new(raw_fd as i32, map_len) {
|
||||||
Some(m) => {
|
Some(m) => {
|
||||||
_mapping = m;
|
_mapping = m;
|
||||||
|
// SAFETY: `_mapping` is the `DmabufMap` just stored; its `ptr`/`len` come from a
|
||||||
|
// successful `mmap` of `map_len` PROT_READ bytes, so `ptr` is non-null, page-aligned,
|
||||||
|
// and the VMA is one allocated object of `len` bytes valid for reads. In the common
|
||||||
|
// path `map_len == fd_len` (the fd's real size from `fstat`), so the mapping spans the
|
||||||
|
// whole object; the de-pad copy below is further bounded by the `offset <= buf.len()`
|
||||||
|
// and `needed > avail` guards. The `&[u8]` borrows `_mapping`, which lives to the end
|
||||||
|
// of `consume_frame`, so the slice never outlives the mapping, and the memory is only
|
||||||
|
// read here, so there is no aliasing/mutation.
|
||||||
Some(unsafe {
|
Some(unsafe {
|
||||||
std::slice::from_raw_parts(_mapping.ptr as *const u8, _mapping.len)
|
std::slice::from_raw_parts(_mapping.ptr as *const u8, _mapping.len)
|
||||||
})
|
})
|
||||||
@@ -1013,6 +1096,7 @@ mod pipewire {
|
|||||||
tx: SyncSender<CapturedFrame>,
|
tx: SyncSender<CapturedFrame>,
|
||||||
active: Arc<AtomicBool>,
|
active: Arc<AtomicBool>,
|
||||||
negotiated: Arc<AtomicBool>,
|
negotiated: Arc<AtomicBool>,
|
||||||
|
streaming: Arc<AtomicBool>,
|
||||||
zerocopy: bool,
|
zerocopy: bool,
|
||||||
preferred: Option<(u32, u32, u32)>,
|
preferred: Option<(u32, u32, u32)>,
|
||||||
quit_rx: pw::channel::Receiver<()>,
|
quit_rx: pw::channel::Receiver<()>,
|
||||||
@@ -1107,6 +1191,7 @@ mod pipewire {
|
|||||||
tx,
|
tx,
|
||||||
active,
|
active,
|
||||||
negotiated,
|
negotiated,
|
||||||
|
streaming,
|
||||||
importer,
|
importer,
|
||||||
vaapi_passthrough,
|
vaapi_passthrough,
|
||||||
nv12: crate::zerocopy::nv12_enabled(),
|
nv12: crate::zerocopy::nv12_enabled(),
|
||||||
@@ -1131,8 +1216,17 @@ mod pipewire {
|
|||||||
|
|
||||||
let _listener = stream
|
let _listener = stream
|
||||||
.add_local_listener_with_user_data(data)
|
.add_local_listener_with_user_data(data)
|
||||||
.state_changed(|_stream, _ud, old, new| {
|
.state_changed(|_stream, ud, old, new| {
|
||||||
tracing::info!(?old, ?new, "pipewire stream state");
|
tracing::info!(?old, ?new, "pipewire stream state");
|
||||||
|
// Track whether the node is actively producing. A live source sits in `Streaming`
|
||||||
|
// (a static desktop just sends no buffers); anything else — `Paused`/`Unconnected`/
|
||||||
|
// `Error` — means the source went away (compositor died, virtual output removed on a
|
||||||
|
// Gaming↔Desktop switch). `try_latest` turns a sustained non-Streaming state into a
|
||||||
|
// capture-loss so the encode loop rebuilds instead of freezing on the last frame.
|
||||||
|
ud.streaming.store(
|
||||||
|
matches!(new, pw::stream::StreamState::Streaming),
|
||||||
|
Ordering::Relaxed,
|
||||||
|
);
|
||||||
})
|
})
|
||||||
.param_changed(|_stream, ud, id, param| {
|
.param_changed(|_stream, ud, id, param| {
|
||||||
let Some(param) = param else { return };
|
let Some(param) = param else { return };
|
||||||
@@ -1177,24 +1271,43 @@ mod pipewire {
|
|||||||
// Latest-frame-only (OBS pattern): Mutter delivers buffers in bursts and
|
// Latest-frame-only (OBS pattern): Mutter delivers buffers in bursts and
|
||||||
// recycles its pool; an older queued buffer carries a STALE frame. Drain all
|
// recycles its pool; an older queued buffer carries a STALE frame. Drain all
|
||||||
// queued buffers, requeue the older ones, keep only the newest.
|
// queued buffers, requeue the older ones, keep only the newest.
|
||||||
|
// SAFETY: `stream` is the live stream PipeWire passes into this `.process` callback on
|
||||||
|
// the loop thread, where `pw_stream_dequeue_buffer` is the documented call. It returns
|
||||||
|
// a `*mut pw_buffer` owned by the stream (or null when the queue is drained),
|
||||||
|
// null-checked before any use. The loop is single-threaded, so no concurrent access.
|
||||||
let mut newest = unsafe { stream.dequeue_raw_buffer() };
|
let mut newest = unsafe { stream.dequeue_raw_buffer() };
|
||||||
if newest.is_null() {
|
if newest.is_null() {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
let mut drained = 1u32;
|
let mut drained = 1u32;
|
||||||
loop {
|
loop {
|
||||||
|
// SAFETY: same stream/loop-thread contract as the dequeue above; each call returns
|
||||||
|
// the next stream-owned `*mut pw_buffer` or null (null-checked before use).
|
||||||
let next = unsafe { stream.dequeue_raw_buffer() };
|
let next = unsafe { stream.dequeue_raw_buffer() };
|
||||||
if next.is_null() {
|
if next.is_null() {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
// SAFETY: `newest` is a non-null `*mut pw_buffer` previously dequeued from this same
|
||||||
|
// stream and not yet requeued; `pw_stream_queue_buffer` hands ownership back to the
|
||||||
|
// stream. We immediately overwrite `newest = next`, so the requeued pointer is never
|
||||||
|
// touched again (no use-after-requeue). Loop thread, single-threaded.
|
||||||
unsafe { stream.queue_raw_buffer(newest) };
|
unsafe { stream.queue_raw_buffer(newest) };
|
||||||
newest = next;
|
newest = next;
|
||||||
drained += 1;
|
drained += 1;
|
||||||
}
|
}
|
||||||
|
// SAFETY: `newest` is the non-null buffer we still own (dequeued, not requeued);
|
||||||
|
// `.buffer` is a `*mut spa_buffer` field libpipewire populated. This is a single field
|
||||||
|
// load through a valid pointer — no mutation or aliasing.
|
||||||
let spa_buf = unsafe { (*newest).buffer };
|
let spa_buf = unsafe { (*newest).buffer };
|
||||||
|
|
||||||
// Inspect the newest buffer's header + first chunk for the diagnostic and the
|
// Inspect the newest buffer's header + first chunk for the diagnostic and the
|
||||||
// CORRUPTED skip. SPA_META_Header is optional — `hdr` may be null.
|
// CORRUPTED skip. SPA_META_Header is optional — `hdr` may be null.
|
||||||
|
// SAFETY: `spa_buf` is the `*mut spa_buffer` of the buffer we still hold.
|
||||||
|
// `spa_buffer_find_meta_data` scans that buffer's metadata array for a `SPA_META_Header`
|
||||||
|
// of at least `size_of::<spa_meta_header>()` bytes and returns a pointer into the held
|
||||||
|
// buffer's metadata (or null). The size argument matches the struct the result is cast
|
||||||
|
// to, and the pointer stays valid as long as the buffer is held (until requeue). Null is
|
||||||
|
// handled below.
|
||||||
let hdr = unsafe {
|
let hdr = unsafe {
|
||||||
spa::sys::spa_buffer_find_meta_data(
|
spa::sys::spa_buffer_find_meta_data(
|
||||||
spa_buf,
|
spa_buf,
|
||||||
@@ -1205,11 +1318,20 @@ mod pipewire {
|
|||||||
let hdr_flags = if hdr.is_null() {
|
let hdr_flags = if hdr.is_null() {
|
||||||
0u32
|
0u32
|
||||||
} else {
|
} else {
|
||||||
|
// SAFETY: reached only when `hdr` is non-null; it points to a `spa_meta_header`
|
||||||
|
// inside the live buffer's metadata (returned for a size >=
|
||||||
|
// `size_of::<spa_meta_header>()`, so `.flags` is in bounds). A single field read
|
||||||
|
// while the buffer is still held.
|
||||||
unsafe { (*hdr).flags }
|
unsafe { (*hdr).flags }
|
||||||
};
|
};
|
||||||
// First data chunk's size + flags (used for the diagnostic + CORRUPTED check)
|
// First data chunk's size + flags (used for the diagnostic + CORRUPTED check)
|
||||||
// and its data type (a dmabuf legitimately reports chunk size 0, so the size-0
|
// and its data type (a dmabuf legitimately reports chunk size 0, so the size-0
|
||||||
// stale skip only applies to mappable SHM buffers).
|
// stale skip only applies to mappable SHM buffers).
|
||||||
|
// SAFETY: every dereference is guarded in order before any field read — `spa_buf`
|
||||||
|
// non-null, `n_datas > 0`, the `datas` (`*mut spa_data`) array non-null, and the first
|
||||||
|
// element's `chunk` (`*mut spa_chunk`) non-null. `d0` is that first `spa_data` and `c`
|
||||||
|
// its chunk; reading `(*d0).type_`, `(*c).size`, `(*c).flags` are in-bounds field loads
|
||||||
|
// of libspa structs inside the buffer we still hold. Single-threaded loop, no mutation.
|
||||||
let (chunk_size, chunk_flags, is_dmabuf) = unsafe {
|
let (chunk_size, chunk_flags, is_dmabuf) = unsafe {
|
||||||
if !spa_buf.is_null()
|
if !spa_buf.is_null()
|
||||||
&& (*spa_buf).n_datas > 0
|
&& (*spa_buf).n_datas > 0
|
||||||
@@ -1246,11 +1368,17 @@ mod pipewire {
|
|||||||
"capture: skipped a stale CORRUPTED/cursor buffer (GNOME)"
|
"capture: skipped a stale CORRUPTED/cursor buffer (GNOME)"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
// SAFETY: `newest` is the non-null buffer we own (dequeued, never requeued on this
|
||||||
|
// skip path); hand it back to the stream exactly once and return without touching it
|
||||||
|
// again. Loop thread inside `.process`.
|
||||||
unsafe { stream.queue_raw_buffer(newest) };
|
unsafe { stream.queue_raw_buffer(newest) };
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
consume_frame(ud, spa_buf);
|
consume_frame(ud, spa_buf);
|
||||||
|
// SAFETY: `consume_frame` has finished reading `spa_buf` (and the `datas` borrows derived
|
||||||
|
// from `newest`), so requeuing the owned `newest` exactly once here is sound — no
|
||||||
|
// use-after-requeue. Loop thread inside `.process`.
|
||||||
unsafe { stream.queue_raw_buffer(newest) };
|
unsafe { stream.queue_raw_buffer(newest) };
|
||||||
}));
|
}));
|
||||||
if outcome.is_err() {
|
if outcome.is_err() {
|
||||||
|
|||||||
@@ -15,6 +15,9 @@
|
|||||||
//! composed while a session is live). Effectiveness can be build/driver-dependent; gated by
|
//! composed while a session is live). Effectiveness can be build/driver-dependent; gated by
|
||||||
//! `PUNKTFUNK_FORCE_COMPOSED` (default ON; set =0 to disable).
|
//! `PUNKTFUNK_FORCE_COMPOSED` (default ON; set =0 to disable).
|
||||||
|
|
||||||
|
// Every `unsafe` block in this file carries a `// SAFETY:` proof; enforce it (unsafe-proof program).
|
||||||
|
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||||
|
|
||||||
use std::sync::atomic::{AtomicBool, Ordering};
|
use std::sync::atomic::{AtomicBool, Ordering};
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use windows::core::w;
|
use windows::core::w;
|
||||||
@@ -48,6 +51,10 @@ impl ForceComposedFlip {
|
|||||||
let st = stop.clone();
|
let st = stop.clone();
|
||||||
std::thread::Builder::new()
|
std::thread::Builder::new()
|
||||||
.name("composed-flip".into())
|
.name("composed-flip".into())
|
||||||
|
// SAFETY: `run` is this module's `unsafe fn` (it owns a desktop+window lifecycle via Win32
|
||||||
|
// FFI); it takes ownership of `st` (the stop `Arc<AtomicBool>`) and has no caller-side memory
|
||||||
|
// precondition. It is designed to own its thread for its whole duration — exactly the
|
||||||
|
// dedicated `composed-flip` thread spawned here.
|
||||||
.spawn(move || unsafe { run(st) })
|
.spawn(move || unsafe { run(st) })
|
||||||
.ok()?;
|
.ok()?;
|
||||||
tracing::info!("force-composed-flip overlay started (Winlogon-aware)");
|
tracing::info!("force-composed-flip overlay started (Winlogon-aware)");
|
||||||
@@ -62,6 +69,9 @@ impl Drop for ForceComposedFlip {
|
|||||||
}
|
}
|
||||||
|
|
||||||
extern "system" fn wndproc(hwnd: HWND, msg: u32, wp: WPARAM, lp: LPARAM) -> LRESULT {
|
extern "system" fn wndproc(hwnd: HWND, msg: u32, wp: WPARAM, lp: LPARAM) -> LRESULT {
|
||||||
|
// SAFETY: this is the window procedure the OS invokes with the window's own `hwnd` and a real
|
||||||
|
// message `(msg, wp, lp)`. `DefWindowProcW` performs default processing for exactly those
|
||||||
|
// parameters (all passed straight through by value); it borrows no Rust memory and is synchronous.
|
||||||
unsafe { DefWindowProcW(hwnd, msg, wp, lp) }
|
unsafe { DefWindowProcW(hwnd, msg, wp, lp) }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
//! Input-desktop watcher (Windows) — the authoritative "normal vs secure desktop" signal for the
|
//! Input-desktop watcher (Windows) — the authoritative "normal vs secure desktop" signal for the
|
||||||
//! two-process secure-desktop design (docs/windows-secure-desktop.md).
|
//! two-process secure-desktop design (design/archive/windows-secure-desktop.md).
|
||||||
//!
|
//!
|
||||||
//! Windows switches the *input desktop* to "Winlogon" (the secure desktop) for UAC elevation, the
|
//! Windows switches the *input desktop* to "Winlogon" (the secure desktop) for UAC elevation, the
|
||||||
//! lock screen and the login screen, and back to "Default" for the normal session. WGC captures only
|
//! lock screen and the login screen, and back to "Default" for the normal session. WGC captures only
|
||||||
@@ -7,6 +7,9 @@
|
|||||||
//! desktop's NAME (WTS session notifications miss UAC entirely, so the name is the reliable signal)
|
//! desktop's NAME (WTS session notifications miss UAC entirely, so the name is the reliable signal)
|
||||||
//! and publishes it as an atomic the capture mux + input path read.
|
//! and publishes it as an atomic the capture mux + input path read.
|
||||||
|
|
||||||
|
// Every `unsafe` block in this file carries a `// SAFETY:` proof; enforce it (unsafe-proof program).
|
||||||
|
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||||
|
|
||||||
use std::sync::atomic::{AtomicBool, AtomicU8, Ordering};
|
use std::sync::atomic::{AtomicBool, AtomicU8, Ordering};
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
@@ -33,6 +36,10 @@ impl DesktopWatcher {
|
|||||||
// mux) sees the real state immediately. Otherwise a session that begins already on the secure
|
// mux) sees the real state immediately. Otherwise a session that begins already on the secure
|
||||||
// desktop (e.g. a reconnect to a locked box) would read DESKTOP_NORMAL for the first poll
|
// desktop (e.g. a reconnect to a locked box) would read DESKTOP_NORMAL for the first poll
|
||||||
// interval and relay one stale normal-desktop frame — the "flash of the login screen" bug.
|
// interval and relay one stale normal-desktop frame — the "flash of the login screen" bug.
|
||||||
|
// SAFETY: `is_secure_desktop` is this module's `unsafe fn` — unsafe only because it calls Win32
|
||||||
|
// desktop FFI (`OpenInputDesktop`/`GetUserObjectInformationW`/`CloseDesktop`), with no caller
|
||||||
|
// precondition; it opens, names, and closes the input-desktop handle internally and is safe to
|
||||||
|
// call from any thread (here, on the thread running `DesktopWatcher::start`).
|
||||||
let initial = if unsafe { is_secure_desktop() } {
|
let initial = if unsafe { is_secure_desktop() } {
|
||||||
DESKTOP_SECURE
|
DESKTOP_SECURE
|
||||||
} else {
|
} else {
|
||||||
@@ -53,6 +60,9 @@ impl DesktopWatcher {
|
|||||||
let mut candidate = initial;
|
let mut candidate = initial;
|
||||||
let mut stable = 0u32;
|
let mut stable = 0u32;
|
||||||
while !st.load(Ordering::Relaxed) {
|
while !st.load(Ordering::Relaxed) {
|
||||||
|
// SAFETY: same as in `start` — `is_secure_desktop` is self-contained Win32 desktop
|
||||||
|
// FFI with no caller precondition, called here on the dedicated `desktop-watch`
|
||||||
|
// polling thread.
|
||||||
let v = if unsafe { is_secure_desktop() } {
|
let v = if unsafe { is_secure_desktop() } {
|
||||||
DESKTOP_SECURE
|
DESKTOP_SECURE
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
@@ -7,6 +7,9 @@
|
|||||||
//! Validates only with a real GPU + an *activated* SudoVDA monitor (`DuplicateOutput` needs a live
|
//! Validates only with a real GPU + an *activated* SudoVDA monitor (`DuplicateOutput` needs a live
|
||||||
//! WDDM output). Compiles on the GPU-less VM; the pure helpers are unit-tested there.
|
//! WDDM output). Compiles on the GPU-less VM; the pure helpers are unit-tested there.
|
||||||
|
|
||||||
|
// Every `unsafe` block in this file carries a `// SAFETY:` proof; enforce it (unsafe-proof program).
|
||||||
|
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||||
|
|
||||||
use super::{CapturedFrame, Capturer, FramePayload, PixelFormat};
|
use super::{CapturedFrame, Capturer, FramePayload, PixelFormat};
|
||||||
use anyhow::{anyhow, bail, Context, Result};
|
use anyhow::{anyhow, bail, Context, Result};
|
||||||
use std::ffi::c_void;
|
use std::ffi::c_void;
|
||||||
@@ -69,7 +72,12 @@ pub struct D3d11Frame {
|
|||||||
pub texture: ID3D11Texture2D,
|
pub texture: ID3D11Texture2D,
|
||||||
pub device: ID3D11Device,
|
pub device: ID3D11Device,
|
||||||
}
|
}
|
||||||
// COM pointers, used only from the single owning thread.
|
// SAFETY: `D3d11Frame` owns an `ID3D11Texture2D` + `ID3D11Device`, which are COM interface pointers.
|
||||||
|
// D3D11 devices/resources use thread-safe (interlocked) COM reference counting, and the device is
|
||||||
|
// created free-threaded (`make_device` passes no `D3D11_CREATE_DEVICE_SINGLETHREADED`), so handing
|
||||||
|
// ownership of the frame to another thread — the capture→encode handoff — and releasing it there is
|
||||||
|
// sound. The value is moved, never aliased (no `Sync`), so there is no concurrent use of the
|
||||||
|
// single-threaded immediate context.
|
||||||
unsafe impl Send for D3d11Frame {}
|
unsafe impl Send for D3d11Frame {}
|
||||||
|
|
||||||
pub fn pack_luid(luid: LUID) -> i64 {
|
pub fn pack_luid(luid: LUID) -> i64 {
|
||||||
@@ -295,6 +303,12 @@ unsafe fn d3dkmt_set_scheduling_priority_class(
|
|||||||
fn elevate_process_gpu_priority() {
|
fn elevate_process_gpu_priority() {
|
||||||
use std::sync::Once;
|
use std::sync::Once;
|
||||||
static ONCE: Once = Once::new();
|
static ONCE: Once = Once::new();
|
||||||
|
// SAFETY: the closure calls two of this module's `unsafe fn`s — `enable_inc_base_priority`
|
||||||
|
// (adjusts the current-process token; it has no caller precondition and builds all its FFI args
|
||||||
|
// locally) and `d3dkmt_set_scheduling_priority_class` (loads gdi32 by name and calls the export).
|
||||||
|
// The latter requires `process` to be a valid process handle; `GetCurrentProcess()` returns the
|
||||||
|
// current-process pseudo-handle, which is always valid and needs no close. Runs once via
|
||||||
|
// `Once::call_once`; no raw pointers are dereferenced here.
|
||||||
ONCE.call_once(|| unsafe {
|
ONCE.call_once(|| unsafe {
|
||||||
use windows::Win32::System::Threading::GetCurrentProcess;
|
use windows::Win32::System::Threading::GetCurrentProcess;
|
||||||
let Some(prio) = configured_gpu_priority_class() else {
|
let Some(prio) = configured_gpu_priority_class() else {
|
||||||
@@ -538,6 +552,17 @@ unsafe extern "system" fn hybrid_query_hook(gpu_preference: *mut u32) -> i32 {
|
|||||||
pub(crate) fn install_gpu_pref_hook() {
|
pub(crate) fn install_gpu_pref_hook() {
|
||||||
use std::sync::Once;
|
use std::sync::Once;
|
||||||
static HOOK: Once = Once::new();
|
static HOOK: Once = Once::new();
|
||||||
|
// SAFETY: this one-time hook install only touches a region it has just validated.
|
||||||
|
// `LoadLibraryA("win32u.dll")` + `GetProcAddress("NtGdiDdDDIGetCachedHybridQueryValue")` yield the
|
||||||
|
// live base of the real exported function, so `target` is a valid executable code pointer to at
|
||||||
|
// least the 12 bytes the patch overwrites (an x64 prologue, per Apollo's verified hook). The two
|
||||||
|
// `ptr::copy_nonoverlapping`s each move exactly 12 bytes between the 12-byte stack arrays
|
||||||
|
// (`patch`/`readback`) and `target`, which `VirtualProtect(target, 12, PAGE_EXECUTE_READWRITE, …)`
|
||||||
|
// has just made writable (and is restored to `old` after) — source and dest never overlap (stack
|
||||||
|
// vs. loaded module image), so every access stays in mapped, in-bounds memory.
|
||||||
|
// `FlushInstructionCache` gets the current-process pseudo-handle + that same range. The DPI calls
|
||||||
|
// take by-value context handles / fill the live local `&mut old`/`&mut restore` for the duration of
|
||||||
|
// each synchronous call. Runs once via `Once::call_once`, before any DXGI use.
|
||||||
HOOK.call_once(|| unsafe {
|
HOOK.call_once(|| unsafe {
|
||||||
use windows::Win32::System::LibraryLoader::{GetProcAddress, LoadLibraryA};
|
use windows::Win32::System::LibraryLoader::{GetProcAddress, LoadLibraryA};
|
||||||
use windows::Win32::System::Memory::{
|
use windows::Win32::System::Memory::{
|
||||||
@@ -1389,6 +1414,14 @@ pub fn hdr_p010_selftest() -> Result<()> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SAFETY: this self-test creates its own D3D11 device + immediate context (`D3D11CreateDevice`,
|
||||||
|
// both checked non-null) and uses ONLY that device for the rest of the block: every
|
||||||
|
// `CreateTexture2D`/`CreateShaderResourceView`/`HdrP010Converter::{new,convert}`/`CopyResource`/
|
||||||
|
// `Map` is invoked on that device or its context, so all resources share one device and run on this
|
||||||
|
// single thread. The source texture's `D3D11_SUBRESOURCE_DATA` points at `fp16`, a live
|
||||||
|
// `Vec<u16>` of `W*H*4` samples with `SysMemPitch = W*8`, matching the W×H R16G16B16A16 texture;
|
||||||
|
// `fp16` outlives the synchronous `CreateTexture2D` that reads it. The mapped-pointer reads are
|
||||||
|
// proven individually at the `read_u16` closure below.
|
||||||
unsafe {
|
unsafe {
|
||||||
// Hardware D3D11 device (no adapter pin — the default GPU is fine for the self-test).
|
// Hardware D3D11 device (no adapter pin — the default GPU is fine for the self-test).
|
||||||
let mut device: Option<ID3D11Device> = None;
|
let mut device: Option<ID3D11Device> = None;
|
||||||
@@ -2038,7 +2071,11 @@ pub struct DuplCapturer {
|
|||||||
dbg_cursor: u64,
|
dbg_cursor: u64,
|
||||||
_keepalive: Box<dyn Send>,
|
_keepalive: Box<dyn Send>,
|
||||||
}
|
}
|
||||||
// COM objects used only from the one thread that owns the capturer (the encode thread).
|
// SAFETY: `DuplCapturer` holds D3D11 device/context/duplication COM pointers plus plain data. The
|
||||||
|
// device is created free-threaded (`make_device` sets no `D3D11_CREATE_DEVICE_SINGLETHREADED`) and
|
||||||
|
// COM reference counting is interlocked, so moving ownership of the whole capturer to another thread
|
||||||
|
// is sound. It is used by exactly one thread (the encode thread) at a time — moved to it once, never
|
||||||
|
// shared (no `Sync`) — so the single-threaded immediate context is never touched concurrently.
|
||||||
unsafe impl Send for DuplCapturer {}
|
unsafe impl Send for DuplCapturer {}
|
||||||
|
|
||||||
impl DuplCapturer {
|
impl DuplCapturer {
|
||||||
@@ -2051,6 +2088,13 @@ impl DuplCapturer {
|
|||||||
gpu: bool,
|
gpu: bool,
|
||||||
want_hdr: bool,
|
want_hdr: bool,
|
||||||
) -> Result<Self> {
|
) -> Result<Self> {
|
||||||
|
// SAFETY: runs on the capture thread that will own this `DuplCapturer`. `install_gpu_pref_hook()`
|
||||||
|
// and the DPI-context calls take by-value handles / no args and touch only thread/process state;
|
||||||
|
// `SetThreadExecutionState` takes a flags bitmask by value. `CreateDXGIFactory1` yields a live
|
||||||
|
// `IDXGIFactory1`, and every subsequent COM method (`EnumAdapters1`/`EnumOutputs`/`GetDesc1`/
|
||||||
|
// `GetDesc`/`cast`) is called on that factory or on an adapter/output it returned — each obtained
|
||||||
|
// through a checked `while let Ok(..)`/`?` — all from this one thread. No raw pointers are
|
||||||
|
// dereferenced; the borrowed strings/locals outlive each synchronous call.
|
||||||
unsafe {
|
unsafe {
|
||||||
// Stop DXGI hybrid-GPU output reparenting BEFORE we create the factory / enumerate outputs
|
// Stop DXGI hybrid-GPU output reparenting BEFORE we create the factory / enumerate outputs
|
||||||
// (the cause of the 0x887A0026 ACCESS_LOST churn on this hybrid box: RTX 4090 + AMD iGPU).
|
// (the cause of the 0x887A0026 ACCESS_LOST churn on this hybrid box: RTX 4090 + AMD iGPU).
|
||||||
@@ -3207,6 +3251,11 @@ impl Capturer for DuplCapturer {
|
|||||||
// the duplication up to 12 s). Better a few seconds of frozen-last-frame than dropping the stream.
|
// the duplication up to 12 s). Better a few seconds of frozen-last-frame than dropping the stream.
|
||||||
let mut deadline = Instant::now() + Duration::from_secs(20);
|
let mut deadline = Instant::now() + Duration::from_secs(20);
|
||||||
loop {
|
loop {
|
||||||
|
// SAFETY: `acquire` is an `unsafe fn` because it drives the D3D11 immediate context + the
|
||||||
|
// output duplication, which must be touched only from the capturer's owning thread.
|
||||||
|
// `next_frame` runs on that one thread — `DuplCapturer` is `Send` but not `Sync`, so it is
|
||||||
|
// owned by a single (encode) thread for its whole life — and `&mut self` gives exclusive
|
||||||
|
// access for the call, satisfying that contract.
|
||||||
if let Some(f) = unsafe { self.acquire() }? {
|
if let Some(f) = unsafe { self.acquire() }? {
|
||||||
self.ever_got_frame = true;
|
self.ever_got_frame = true;
|
||||||
return Ok(f);
|
return Ok(f);
|
||||||
@@ -3253,6 +3302,8 @@ impl Capturer for DuplCapturer {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn try_latest(&mut self) -> Result<Option<CapturedFrame>> {
|
fn try_latest(&mut self) -> Result<Option<CapturedFrame>> {
|
||||||
|
// SAFETY: as in `next_frame` — `acquire` must run on the capturer's single owning thread, and
|
||||||
|
// `try_latest` is called on it (`DuplCapturer` is `Send`, not `Sync`); `&mut self` is exclusive.
|
||||||
unsafe { self.acquire() }
|
unsafe { self.acquire() }
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -3264,11 +3315,19 @@ impl Capturer for DuplCapturer {
|
|||||||
impl Drop for DuplCapturer {
|
impl Drop for DuplCapturer {
|
||||||
fn drop(&mut self) {
|
fn drop(&mut self) {
|
||||||
if self.holding_frame {
|
if self.holding_frame {
|
||||||
|
// SAFETY: `self.dupl` is the live `IDXGIOutputDuplication` this capturer created and owns;
|
||||||
|
// `ReleaseFrame` is a valid COM method on it, called only when `holding_frame` records that a
|
||||||
|
// frame was acquired and not yet released (so it is not an unbalanced release). Drop runs on
|
||||||
|
// whichever thread owns the capturer — its sole owner, since it is `!Sync` — and the `&`
|
||||||
|
// borrow of the duplication outlives this synchronous call.
|
||||||
unsafe {
|
unsafe {
|
||||||
let _ = self.dupl.as_ref().map(|d| d.ReleaseFrame());
|
let _ = self.dupl.as_ref().map(|d| d.ReleaseFrame());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Release the display/system-required execution state we took at open().
|
// Release the display/system-required execution state we took at open().
|
||||||
|
// SAFETY: `SetThreadExecutionState` is a Win32 FFI call taking an execution-state flag bitmask
|
||||||
|
// by value (`ES_CONTINUOUS` clears the display/system-required state taken at open); it borrows
|
||||||
|
// no Rust memory and is safe to call from any thread.
|
||||||
unsafe {
|
unsafe {
|
||||||
SetThreadExecutionState(ES_CONTINUOUS);
|
SetThreadExecutionState(ES_CONTINUOUS);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -10,24 +10,25 @@
|
|||||||
//! [`pf_driver_proto::frame`] (which OWNS the contract, with `const` size asserts) — both sides
|
//! [`pf_driver_proto::frame`] (which OWNS the contract, with `const` size asserts) — both sides
|
||||||
//! `use` it, so drift is a compile error rather than a "must match" comment.
|
//! `use` it, so drift is a compile error rather than a "must match" comment.
|
||||||
|
|
||||||
use super::dxgi::{make_device, D3d11Frame, HdrConverter, WinCaptureTarget};
|
// Every `unsafe` block in this file carries a `// SAFETY:` proof; enforce it (unsafe-proof program).
|
||||||
|
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||||
|
|
||||||
|
use super::dxgi::{make_device, D3d11Frame, HdrP010Converter, VideoConverter, WinCaptureTarget};
|
||||||
use super::{CapturedFrame, Capturer, FramePayload, PixelFormat};
|
use super::{CapturedFrame, Capturer, FramePayload, PixelFormat};
|
||||||
use anyhow::{bail, Context, Result};
|
use anyhow::{bail, Context, Result};
|
||||||
use pf_driver_proto::frame;
|
use pf_driver_proto::frame;
|
||||||
use std::ffi::c_void;
|
|
||||||
use std::os::windows::io::{AsRawHandle, FromRawHandle, OwnedHandle};
|
use std::os::windows::io::{AsRawHandle, FromRawHandle, OwnedHandle};
|
||||||
use std::sync::atomic::{AtomicU32, AtomicU64, Ordering};
|
use std::sync::atomic::{AtomicU32, AtomicU64, Ordering};
|
||||||
use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
|
use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
|
||||||
use windows::core::{w, Interface, HSTRING};
|
use windows::core::{w, Interface, HSTRING};
|
||||||
use windows::Win32::Foundation::{HANDLE, INVALID_HANDLE_VALUE, LUID};
|
use windows::Win32::Foundation::{HANDLE, INVALID_HANDLE_VALUE, LUID};
|
||||||
use windows::Win32::Graphics::Direct3D11::{
|
use windows::Win32::Graphics::Direct3D11::{
|
||||||
ID3D11Device, ID3D11DeviceContext, ID3D11RenderTargetView, ID3D11ShaderResourceView,
|
ID3D11Device, ID3D11DeviceContext, ID3D11ShaderResourceView, ID3D11Texture2D,
|
||||||
ID3D11Texture2D, D3D11_BIND_RENDER_TARGET, D3D11_BIND_SHADER_RESOURCE,
|
D3D11_BIND_RENDER_TARGET, D3D11_BIND_SHADER_RESOURCE, D3D11_RESOURCE_MISC_SHARED_KEYEDMUTEX,
|
||||||
D3D11_RESOURCE_MISC_SHARED_KEYEDMUTEX, D3D11_RESOURCE_MISC_SHARED_NTHANDLE,
|
D3D11_RESOURCE_MISC_SHARED_NTHANDLE, D3D11_TEXTURE2D_DESC, D3D11_USAGE_DEFAULT,
|
||||||
D3D11_TEXTURE2D_DESC, D3D11_USAGE_DEFAULT,
|
|
||||||
};
|
};
|
||||||
use windows::Win32::Graphics::Dxgi::Common::{
|
use windows::Win32::Graphics::Dxgi::Common::{
|
||||||
DXGI_FORMAT, DXGI_FORMAT_B8G8R8A8_UNORM, DXGI_FORMAT_R10G10B10A2_UNORM,
|
DXGI_FORMAT, DXGI_FORMAT_B8G8R8A8_UNORM, DXGI_FORMAT_NV12, DXGI_FORMAT_P010,
|
||||||
DXGI_FORMAT_R16G16B16A16_FLOAT, DXGI_SAMPLE_DESC,
|
DXGI_FORMAT_R16G16B16A16_FLOAT, DXGI_SAMPLE_DESC,
|
||||||
};
|
};
|
||||||
use windows::Win32::Graphics::Dxgi::{
|
use windows::Win32::Graphics::Dxgi::{
|
||||||
@@ -132,6 +133,41 @@ struct HostSlot {
|
|||||||
srv: ID3D11ShaderResourceView,
|
srv: ID3D11ShaderResourceView,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// RAII guard over an [`IDXGIKeyedMutex`]: [`acquire`](Self::acquire) does `AcquireSync(key, timeout)`,
|
||||||
|
/// `Drop` does `ReleaseSync(key)`. So the lock is released even if the work between acquire and the end
|
||||||
|
/// of the guard's scope `?`-returns or panics — the "leak the keyed-mutex lock → stall the driver on
|
||||||
|
/// that slot" footgun the consume loop guards against by hand. Keeps the hot loop free of a raw
|
||||||
|
/// `ReleaseSync` that a future early-return could skip.
|
||||||
|
struct KeyedMutexGuard<'a> {
|
||||||
|
mutex: &'a IDXGIKeyedMutex,
|
||||||
|
key: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> KeyedMutexGuard<'a> {
|
||||||
|
/// Acquire `mutex` at `key`, waiting up to `timeout_ms`. `None` if the acquire times out / errors
|
||||||
|
/// (the caller skips the frame), so the guard is only ever held when the lock is genuinely held.
|
||||||
|
fn acquire(
|
||||||
|
mutex: &'a IDXGIKeyedMutex,
|
||||||
|
key: u64,
|
||||||
|
timeout_ms: u32,
|
||||||
|
) -> Option<KeyedMutexGuard<'a>> {
|
||||||
|
// SAFETY: `mutex` is a live `IDXGIKeyedMutex` on this thread's immediate-context device.
|
||||||
|
if unsafe { mutex.AcquireSync(key, timeout_ms) }.is_err() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
Some(KeyedMutexGuard { mutex, key })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Drop for KeyedMutexGuard<'_> {
|
||||||
|
fn drop(&mut self) {
|
||||||
|
// SAFETY: we hold `mutex` at `key` (acquired in `acquire`, never released elsewhere); release it.
|
||||||
|
unsafe {
|
||||||
|
let _ = self.mutex.ReleaseSync(self.key);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Creates + owns the shared ring; yields the driver's frames as [`FramePayload::D3d11`].
|
/// Creates + owns the shared ring; yields the driver's frames as [`FramePayload::D3d11`].
|
||||||
pub struct IddPushCapturer {
|
pub struct IddPushCapturer {
|
||||||
device: ID3D11Device,
|
device: ID3D11Device,
|
||||||
@@ -171,21 +207,33 @@ pub struct IddPushCapturer {
|
|||||||
/// cleared when a fresh frame resumes. If it stays set past the recovery window, `try_consume` drops
|
/// cleared when a fresh frame resumes. If it stays set past the recovery window, `try_consume` drops
|
||||||
/// the session (recover-or-drop, no DDA).
|
/// the session (recover-or-drop, no DDA).
|
||||||
recovering_since: Option<Instant>,
|
recovering_since: Option<Instant>,
|
||||||
/// Host-owned ROTATING output ring NVENC encodes (texture + RTV per slot). Rotating it per frame is
|
/// Host-owned ROTATING output ring NVENC encodes (one YUV texture per slot). Rotating it per frame
|
||||||
/// the precondition for pipelining the encode loop: while NVENC encodes frame N's texture on the
|
/// is the precondition for pipelining the encode loop: while NVENC encodes frame N's texture on the
|
||||||
/// ASIC, frame N+1's convert/copy writes a DIFFERENT texture on the 3D engine — the two overlap. The
|
/// ASIC, frame N+1's convert writes a DIFFERENT texture — the two overlap. Format = `out_format()`:
|
||||||
/// HDR convert and the SDR copy both write into the current slot. Format = `out_format()` (Rgb10a2 in
|
/// NV12 (SDR, BT.709 limited) or P010 (HDR, BT.2020 PQ limited), so NVENC takes native YUV and skips
|
||||||
/// HDR, Bgra in SDR); rebuilt on a display-mode flip. Built lazily.
|
/// its internal RGB→YUV CSC on the SM/3D engine the game saturates (plan §5.A). Rebuilt on a
|
||||||
out_ring: Vec<(ID3D11Texture2D, ID3D11RenderTargetView)>,
|
/// display-mode flip. Built lazily.
|
||||||
|
out_ring: Vec<ID3D11Texture2D>,
|
||||||
out_idx: usize,
|
out_idx: usize,
|
||||||
/// FP16 scRGB → `Rgb10a2` BT.2020 PQ converter, used while the display is HDR. Built lazily.
|
/// BGRA slot → NV12 (BT.709 limited) on the dedicated D3D11 VIDEO engine, used while the display is
|
||||||
hdr_conv: Option<HdrConverter>,
|
/// SDR — keeps the colour-convert OFF the contended 3D/compute engine. Built lazily; rebuilt on a
|
||||||
|
/// size/HDR flip.
|
||||||
|
video_conv: Option<VideoConverter>,
|
||||||
|
/// FP16 scRGB slot → P010 (BT.2020 PQ limited) via two shader passes, used while the display is HDR
|
||||||
|
/// (NVIDIA's VideoProcessor can't do RGB→P010). The passes run on the 3D engine, but it still skips
|
||||||
|
/// NVENC's internal SM-side CSC. Built lazily.
|
||||||
|
hdr_p010_conv: Option<HdrP010Converter>,
|
||||||
last_seq: u64,
|
last_seq: u64,
|
||||||
last_present: Option<(ID3D11Texture2D, PixelFormat)>,
|
last_present: Option<(ID3D11Texture2D, PixelFormat)>,
|
||||||
status_logged: bool,
|
status_logged: bool,
|
||||||
_keepalive: Box<dyn Send>,
|
_keepalive: Box<dyn Send>,
|
||||||
}
|
}
|
||||||
// COM objects used only from the owning (encode) thread.
|
// SAFETY: `IddPushCapturer` is `!Send` only because of its `*mut SharedHeader`/`*mut DebugBlock` raw
|
||||||
|
// pointers (and the COM interfaces). It is created, used, and dropped by a SINGLE thread — the owning
|
||||||
|
// capture/encode thread — never shared: the `ID3D11DeviceContext` is the device's IMMEDIATE context
|
||||||
|
// (single-threaded by D3D11 contract) and is only ever touched from that thread, and the header/
|
||||||
|
// dbg_block pointers (into mappings this struct owns) are only dereferenced there. `Send` transfers
|
||||||
|
// ownership to one thread at a time with NO concurrent access; we do not (and must not) claim `Sync`.
|
||||||
unsafe impl Send for IddPushCapturer {}
|
unsafe impl Send for IddPushCapturer {}
|
||||||
|
|
||||||
/// Build a permissive (Everyone:GenericAll) `SECURITY_ATTRIBUTES` so the restricted WUDFHost driver
|
/// Build a permissive (Everyone:GenericAll) `SECURITY_ATTRIBUTES` so the restricted WUDFHost driver
|
||||||
@@ -303,6 +351,9 @@ impl IddPushCapturer {
|
|||||||
// a fullscreen game can hold the virtual display at a different mode (esp. across a reconnect), so
|
// a fullscreen game can hold the virtual display at a different mode (esp. across a reconnect), so
|
||||||
// matching the actual mode lets the first frame flow instead of being dropped (game-capture bug
|
// matching the actual mode lets the first frame flow instead of being dropped (game-capture bug
|
||||||
// GB1). Falls back to the negotiated mode when the CCD read is unavailable.
|
// GB1). Falls back to the negotiated mode when the CCD read is unavailable.
|
||||||
|
// SAFETY: `active_resolution` is an `unsafe fn` (Win32 CCD `QueryDisplayConfig`) that takes only a
|
||||||
|
// copy of the plain `u32` CCD target id and returns owned `(w, h)` values; it forms no borrows from
|
||||||
|
// us and validates the id internally, returning `None` on any failure (handled by `unwrap_or`).
|
||||||
let (w, h) =
|
let (w, h) =
|
||||||
unsafe { crate::win_display::active_resolution(target.target_id) }.unwrap_or((pw, ph));
|
unsafe { crate::win_display::active_resolution(target.target_id) }.unwrap_or((pw, ph));
|
||||||
if (w, h) != (pw, ph) {
|
if (w, h) != (pw, ph) {
|
||||||
@@ -321,6 +372,27 @@ impl IddPushCapturer {
|
|||||||
// PROACTIVELY enable advanced color so HDR streams without the user toggling anything; an
|
// PROACTIVELY enable advanced color so HDR streams without the user toggling anything; an
|
||||||
// SDR-only client leaves the display alone (and still gets a tone-mapped picture, never a freeze,
|
// SDR-only client leaves the display alone (and still gets a tone-mapped picture, never a freeze,
|
||||||
// if the user does enable HDR).
|
// if the user does enable HDR).
|
||||||
|
// SAFETY: one block over the whole ring setup; every operation in it is sound:
|
||||||
|
// - `set_advanced_color`/`advanced_color_enabled` are `unsafe fn`s taking only a copy of the plain
|
||||||
|
// `u32` target id; they read/flip CCD display config and return owned values, borrowing nothing.
|
||||||
|
// - `CreateDXGIFactory1`, `EnumAdapterByLuid`, `make_device`, `permissive_sa`, `CreateFileMappingW`,
|
||||||
|
// `MapViewOfFile`, `CreateEventW`, and `create_ring_slots` are all `?`-checked, so every returned
|
||||||
|
// interface/handle/view is non-error before use; `&sa`/`&adapter`/`&device`/the `&HSTRING` names
|
||||||
|
// are live borrows that outlive each synchronous call, and `sa.lpSecurityDescriptor` stays valid
|
||||||
|
// because its backing `_psd` is held in scope for the whole block.
|
||||||
|
// - The header mapping is created AND viewed at `bytes == size_of::<SharedHeader>().max(64)`; the
|
||||||
|
// view's null is checked (`bail!` on failure, after which the owned `map` closes the mapping). The
|
||||||
|
// OS view base is page-aligned, so `section.ptr::<SharedHeader>()` is suitably aligned for a
|
||||||
|
// `SharedHeader`, and `write_bytes(.., 0, bytes)` plus the `(*header).field = ..` writes all stay
|
||||||
|
// within those `bytes` and write THROUGH the raw pointer without forming any `&mut`. The debug
|
||||||
|
// section is the same pattern at `dbg_bytes == size_of::<DebugBlock>()`, only entered when its
|
||||||
|
// own view is non-null.
|
||||||
|
// - The `magic` publish stores through `addr_of!((*header).magic) as *const AtomicU32`: `addr_of!`
|
||||||
|
// takes the field address without a reference; the field is a 4-aligned `u32` (valid for
|
||||||
|
// `AtomicU32`), and the `Release` store after the `Release` fence is the cross-process handshake
|
||||||
|
// that orders all preceding writes before the driver may observe `MAGIC`.
|
||||||
|
// - `header`/`dbg_block` point into the OS mappings, NOT into the `MappedSection` structs, so moving
|
||||||
|
// `section`/`dbg_section` into `me` leaves them valid (see the `MappedSection` doc comment).
|
||||||
unsafe {
|
unsafe {
|
||||||
// If we ENABLE advanced color for a 10-bit client, trust it (the driver will compose FP16) and
|
// If we ENABLE advanced color for a 10-bit client, trust it (the driver will compose FP16) and
|
||||||
// size the ring FP16 directly — don't race the advanced_color_enabled poll, which may not have
|
// size the ring FP16 directly — don't race the advanced_color_enabled poll, which may not have
|
||||||
@@ -365,7 +437,7 @@ impl IddPushCapturer {
|
|||||||
// Own the mapping handle so it (and its view) free via `MappedSection` RAII even on bail.
|
// Own the mapping handle so it (and its view) free via `MappedSection` RAII even on bail.
|
||||||
let map = OwnedHandle::from_raw_handle(map.0 as _);
|
let map = OwnedHandle::from_raw_handle(map.0 as _);
|
||||||
let view = MapViewOfFile(
|
let view = MapViewOfFile(
|
||||||
HANDLE(map.as_raw_handle() as *mut c_void),
|
HANDLE(map.as_raw_handle()),
|
||||||
FILE_MAP_ALL_ACCESS,
|
FILE_MAP_ALL_ACCESS,
|
||||||
0,
|
0,
|
||||||
0,
|
0,
|
||||||
@@ -415,7 +487,7 @@ impl IddPushCapturer {
|
|||||||
// Own the mapping handle so it (and its view) free via `MappedSection` RAII.
|
// Own the mapping handle so it (and its view) free via `MappedSection` RAII.
|
||||||
let dm = OwnedHandle::from_raw_handle(dm.0 as _);
|
let dm = OwnedHandle::from_raw_handle(dm.0 as _);
|
||||||
let dv = MapViewOfFile(
|
let dv = MapViewOfFile(
|
||||||
HANDLE(dm.as_raw_handle() as *mut c_void),
|
HANDLE(dm.as_raw_handle()),
|
||||||
FILE_MAP_ALL_ACCESS,
|
FILE_MAP_ALL_ACCESS,
|
||||||
0,
|
0,
|
||||||
0,
|
0,
|
||||||
@@ -470,7 +542,8 @@ impl IddPushCapturer {
|
|||||||
recovering_since: None,
|
recovering_since: None,
|
||||||
out_ring: Vec::new(),
|
out_ring: Vec::new(),
|
||||||
out_idx: 0,
|
out_idx: 0,
|
||||||
hdr_conv: None,
|
video_conv: None,
|
||||||
|
hdr_p010_conv: None,
|
||||||
last_seq: 0,
|
last_seq: 0,
|
||||||
last_present: None,
|
last_present: None,
|
||||||
status_logged: false,
|
status_logged: false,
|
||||||
@@ -489,7 +562,7 @@ impl IddPushCapturer {
|
|||||||
|
|
||||||
/// Block (bounded) until the driver has ATTACHED to the host ring (`DRV_STATUS_OPENED`) **and published
|
/// Block (bounded) until the driver has ATTACHED to the host ring (`DRV_STATUS_OPENED`) **and published
|
||||||
/// a first frame**, else fail so the caller can fall back to DDA (audit §5.1 +
|
/// a first frame**, else fail so the caller can fall back to DDA (audit §5.1 +
|
||||||
/// `docs/windows-host-rewrite.md` §2.5 — the GB1 game-capture fix).
|
/// `design/windows-host-rewrite.md` §2.5 — the GB1 game-capture fix).
|
||||||
///
|
///
|
||||||
/// Requiring the first frame — not just the attach — catches the *reconnect-into-a-broken-state* case:
|
/// Requiring the first frame — not just the attach — catches the *reconnect-into-a-broken-state* case:
|
||||||
/// a fullscreen game can leave the virtual display in a format/size that the driver's `publish()` guard
|
/// a fullscreen game can leave the virtual display in a format/size that the driver's `publish()` guard
|
||||||
@@ -500,10 +573,16 @@ impl IddPushCapturer {
|
|||||||
fn wait_for_attach(&self) -> Result<()> {
|
fn wait_for_attach(&self) -> Result<()> {
|
||||||
let deadline = Instant::now() + Duration::from_secs(4);
|
let deadline = Instant::now() + Duration::from_secs(4);
|
||||||
loop {
|
loop {
|
||||||
// Plain read: the driver writes this u32; an aligned u32 read can't tear (same access as
|
// SAFETY: `self.header` points into the live shared-header mapping this capturer owns (sized
|
||||||
|
// `>= size_of::<SharedHeader>()`, page-aligned), so the field read is in-bounds + aligned, and
|
||||||
|
// no reference into the shared region is formed. Plain read: the driver writes this `u32`
|
||||||
|
// cross-process, but an aligned `u32` read can't tear and `driver_status` is best-effort
|
||||||
|
// diagnostics — the real handshake is the atomic `magic`/`latest` (same access as
|
||||||
// log_driver_status_once).
|
// log_driver_status_once).
|
||||||
let st = unsafe { (*self.header).driver_status };
|
let st = unsafe { (*self.header).driver_status };
|
||||||
if matches!(st, DRV_STATUS_TEX_FAIL | DRV_STATUS_NO_DEVICE1) {
|
if matches!(st, DRV_STATUS_TEX_FAIL | DRV_STATUS_NO_DEVICE1) {
|
||||||
|
// SAFETY: as above — an in-bounds, aligned `u32` read of a best-effort diagnostic field
|
||||||
|
// through the owned, live header mapping; no reference into the shared region is formed.
|
||||||
let detail = unsafe { (*self.header).driver_status_detail };
|
let detail = unsafe { (*self.header).driver_status_detail };
|
||||||
bail!(
|
bail!(
|
||||||
"IDD-push driver failed to attach (driver_status={st} detail=0x{detail:08x} — \
|
"IDD-push driver failed to attach (driver_status={st} detail=0x{detail:08x} — \
|
||||||
@@ -526,6 +605,10 @@ impl IddPushCapturer {
|
|||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn latest(&self) -> u64 {
|
fn latest(&self) -> u64 {
|
||||||
|
// SAFETY: `self.header` is the live, owned shared-header mapping (page-aligned, sized for a
|
||||||
|
// `SharedHeader`). `addr_of!((*self.header).latest)` forms the address of the `latest` field
|
||||||
|
// WITHOUT a reference; it is an 8-aligned `u64` (so valid for `AtomicU64`), and the `Acquire` load
|
||||||
|
// is the consumer half of the cross-process publish handshake (pairs with the driver's `Release`).
|
||||||
unsafe {
|
unsafe {
|
||||||
(*(std::ptr::addr_of!((*self.header).latest) as *const AtomicU64))
|
(*(std::ptr::addr_of!((*self.header).latest) as *const AtomicU64))
|
||||||
.load(Ordering::Acquire)
|
.load(Ordering::Acquire)
|
||||||
@@ -537,6 +620,10 @@ impl IddPushCapturer {
|
|||||||
if self.status_logged {
|
if self.status_logged {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
// SAFETY: four in-bounds, aligned reads of the live, owned shared-header mapping. The driver writes
|
||||||
|
// these `u32`/`i32` diagnostic fields cross-process, but aligned word reads can't tear and these are
|
||||||
|
// best-effort status (the real handshake is the atomic `magic`/`latest`); no `&`/`&mut` reference
|
||||||
|
// into the shared region is formed.
|
||||||
let (status, detail, lo, hi) = unsafe {
|
let (status, detail, lo, hi) = unsafe {
|
||||||
(
|
(
|
||||||
(*self.header).driver_status,
|
(*self.header).driver_status,
|
||||||
@@ -576,6 +663,11 @@ impl IddPushCapturer {
|
|||||||
tracing::warn!("IDD push DEBUG: no debug block");
|
tracing::warn!("IDD push DEBUG: no debug block");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
// SAFETY: `self.dbg_block` was just checked non-null (the early return above); it points into the
|
||||||
|
// owned `dbg_section` mapping sized exactly `size_of::<DebugBlock>()` and page-aligned, so it is
|
||||||
|
// valid + aligned for `DebugBlock`. `d` is a short-lived SHARED reference used only to read the
|
||||||
|
// fields below; we never form `&mut` into this region, and the driver's cross-process writes are
|
||||||
|
// aligned `u32`s that don't tear (best-effort bring-up diagnostics).
|
||||||
let d = unsafe { &*self.dbg_block };
|
let d = unsafe { &*self.dbg_block };
|
||||||
tracing::error!(
|
tracing::error!(
|
||||||
run_core_entries = d.run_core_entries,
|
run_core_entries = d.run_core_entries,
|
||||||
@@ -591,16 +683,17 @@ impl IddPushCapturer {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The output texture format + the [`PixelFormat`] it presents as, driven SOLELY by the DISPLAY's
|
/// The output texture format + the [`PixelFormat`] NVENC encodes, driven SOLELY by the DISPLAY's HDR
|
||||||
/// HDR state (like the WGC path): HDR → `Rgb10a2` BT.2020 PQ → NVENC Main10, and the client
|
/// state (like the WGC path): HDR → `P010` (BT.2020 PQ 10-bit limited) → NVENC Main10, and the client
|
||||||
/// auto-detects PQ from the HEVC VUI; SDR → 8-bit `Bgra`. We do NOT gate HDR on the client's
|
/// auto-detects PQ from the HEVC VUI; SDR → `Nv12` (BT.709 8-bit limited). Both are native YUV so
|
||||||
/// advertised `VIDEO_CAP_10BIT` — clients under-report it (e.g. the Mac advertises 10-bit only when
|
/// NVENC skips its internal RGB→YUV CSC on the contended SM (plan §5.A). We do NOT gate HDR on the
|
||||||
/// its OWN display is HDR), yet all decode Main10 + auto-switch, exactly as on the WGC path.
|
/// client's advertised `VIDEO_CAP_10BIT` — clients under-report it (e.g. the Mac advertises 10-bit
|
||||||
|
/// only when its OWN display is HDR), yet all decode Main10 + auto-switch, exactly as on the WGC path.
|
||||||
fn out_format(&self) -> (DXGI_FORMAT, PixelFormat) {
|
fn out_format(&self) -> (DXGI_FORMAT, PixelFormat) {
|
||||||
if self.display_hdr {
|
if self.display_hdr {
|
||||||
(DXGI_FORMAT_R10G10B10A2_UNORM, PixelFormat::Rgb10a2)
|
(DXGI_FORMAT_P010, PixelFormat::P010)
|
||||||
} else {
|
} else {
|
||||||
(DXGI_FORMAT_B8G8R8A8_UNORM, PixelFormat::Bgra)
|
(DXGI_FORMAT_NV12, PixelFormat::Nv12)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -624,6 +717,10 @@ impl IddPushCapturer {
|
|||||||
self.height = new_h;
|
self.height = new_h;
|
||||||
let fmt = self.ring_format();
|
let fmt = self.ring_format();
|
||||||
let new_gen = IDD_GENERATION.fetch_add(1, Ordering::Relaxed);
|
let new_gen = IDD_GENERATION.fetch_add(1, Ordering::Relaxed);
|
||||||
|
// SAFETY: `create_ring_slots` is an `unsafe fn` (it makes D3D11/DXGI COM calls); we pass a live
|
||||||
|
// borrow of `self.device` (the capturer's own device, on which the slots are created) plus plain
|
||||||
|
// `u32`/`DXGI_FORMAT` values, and `?` propagates any failure before the slots are used. Every
|
||||||
|
// returned slot's texture + keyed mutex belongs to that same `self.device`.
|
||||||
let new_slots = unsafe {
|
let new_slots = unsafe {
|
||||||
Self::create_ring_slots(
|
Self::create_ring_slots(
|
||||||
&self.device,
|
&self.device,
|
||||||
@@ -634,6 +731,12 @@ impl IddPushCapturer {
|
|||||||
fmt,
|
fmt,
|
||||||
)?
|
)?
|
||||||
};
|
};
|
||||||
|
// SAFETY: `self.header` is the live, owned shared-header mapping (page-aligned, sized for a
|
||||||
|
// `SharedHeader`). The `latest`/`generation` stores go through `addr_of!`-formed field pointers (no
|
||||||
|
// references) of correctly-aligned `u64`/`u32` fields, valid for `AtomicU64`/`AtomicU32`; the
|
||||||
|
// `dxgi_format`/`width`/`height` writes are in-bounds raw writes through the pointer (no `&mut`).
|
||||||
|
// The `Release` fence + the `Release` `generation` store publish all preceding writes so the driver
|
||||||
|
// only re-attaches (`Acquire`) once the new textures + format are in place.
|
||||||
unsafe {
|
unsafe {
|
||||||
// Clear `latest` to the 0 sentinel (generation 0, which try_consume rejects). The real guard
|
// Clear `latest` to the 0 sentinel (generation 0, which try_consume rejects). The real guard
|
||||||
// against consuming an unwritten new-ring slot is the generation tag in `latest`: a stale
|
// against consuming an unwritten new-ring slot is the generation tag in `latest`: a stale
|
||||||
@@ -654,6 +757,8 @@ impl IddPushCapturer {
|
|||||||
self.generation = new_gen;
|
self.generation = new_gen;
|
||||||
self.last_seq = 0;
|
self.last_seq = 0;
|
||||||
self.out_ring.clear(); // the output format changed → rebuild lazily at the new format
|
self.out_ring.clear(); // the output format changed → rebuild lazily at the new format
|
||||||
|
self.video_conv = None; // converters are sized + HDR-specific → rebuild at the new mode
|
||||||
|
self.hdr_p010_conv = None;
|
||||||
self.out_idx = 0;
|
self.out_idx = 0;
|
||||||
self.last_present = None;
|
self.last_present = None;
|
||||||
Ok(())
|
Ok(())
|
||||||
@@ -667,9 +772,13 @@ impl IddPushCapturer {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
self.last_acm_poll = Instant::now();
|
self.last_acm_poll = Instant::now();
|
||||||
|
// SAFETY: `advanced_color_enabled` is an `unsafe fn` taking only a copy of the plain `u32` target
|
||||||
|
// id; it performs a read-only CCD query and returns an owned `bool`, borrowing nothing from us.
|
||||||
let now_hdr = unsafe { crate::win_display::advanced_color_enabled(self.target_id) };
|
let now_hdr = unsafe { crate::win_display::advanced_color_enabled(self.target_id) };
|
||||||
// Follow the display's ACTUAL resolution too — a fullscreen game can mode-set the virtual display
|
// Follow the display's ACTUAL resolution too — a fullscreen game can mode-set the virtual display
|
||||||
// out from under the negotiated size (game-capture bug GB1). Unknown read → keep our current size.
|
// out from under the negotiated size (game-capture bug GB1). Unknown read → keep our current size.
|
||||||
|
// SAFETY: `active_resolution` is an `unsafe fn` taking only a copy of the plain `u32` target id; it
|
||||||
|
// performs a read-only CCD query and returns owned `(w, h)` values, borrowing nothing from us.
|
||||||
let (now_w, now_h) = unsafe { crate::win_display::active_resolution(self.target_id) }
|
let (now_w, now_h) = unsafe { crate::win_display::active_resolution(self.target_id) }
|
||||||
.unwrap_or((self.width, self.height));
|
.unwrap_or((self.width, self.height));
|
||||||
if now_hdr == self.display_hdr && now_w == self.width && now_h == self.height {
|
if now_hdr == self.display_hdr && now_w == self.width && now_h == self.height {
|
||||||
@@ -708,31 +817,46 @@ impl IddPushCapturer {
|
|||||||
Quality: 0,
|
Quality: 0,
|
||||||
},
|
},
|
||||||
Usage: D3D11_USAGE_DEFAULT,
|
Usage: D3D11_USAGE_DEFAULT,
|
||||||
BindFlags: (D3D11_BIND_RENDER_TARGET.0 | D3D11_BIND_SHADER_RESOURCE.0) as u32,
|
// RENDER_TARGET: the VIDEO processor (NV12) and the P010 shader passes both write here, and
|
||||||
|
// NVENC registers it as encode input — matching the WGC YUV ring.
|
||||||
|
BindFlags: D3D11_BIND_RENDER_TARGET.0 as u32,
|
||||||
CPUAccessFlags: 0,
|
CPUAccessFlags: 0,
|
||||||
MiscFlags: 0,
|
MiscFlags: 0,
|
||||||
};
|
};
|
||||||
for _ in 0..OUT_RING {
|
for _ in 0..OUT_RING {
|
||||||
let mut t: Option<ID3D11Texture2D> = None;
|
let mut t: Option<ID3D11Texture2D> = None;
|
||||||
let mut rtv: Option<ID3D11RenderTargetView> = None;
|
// SAFETY: `CreateTexture2D` is called on `self.device` (the capturer's live D3D11 device);
|
||||||
|
// `&desc` is a fully-initialized stack `D3D11_TEXTURE2D_DESC`, the data arg is `None` (no
|
||||||
|
// initial data), and `Some(&mut t)` is a live out-parameter the call fills. `?` rejects a failed
|
||||||
|
// HRESULT before `t` is unwrapped, and the created texture belongs to `self.device`.
|
||||||
unsafe {
|
unsafe {
|
||||||
self.device
|
self.device
|
||||||
.CreateTexture2D(&desc, None, Some(&mut t))
|
.CreateTexture2D(&desc, None, Some(&mut t))
|
||||||
.context("CreateTexture2D(IDD out ring)")?;
|
.context("CreateTexture2D(IDD out ring)")?;
|
||||||
let t = t.context("null out-ring texture")?;
|
self.out_ring.push(t.context("null out-ring texture")?);
|
||||||
self.device
|
|
||||||
.CreateRenderTargetView(&t, None, Some(&mut rtv))
|
|
||||||
.context("CreateRenderTargetView(IDD out ring)")?;
|
|
||||||
self.out_ring.push((t, rtv.context("null out-ring rtv")?));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Build the HDR converter if not already built (HDR-display path only — an SDR display is a copy).
|
/// Build the per-mode YUV converter if not already built: a VIDEO-engine BGRA→NV12 processor on an
|
||||||
|
/// SDR display, or the FP16→P010 shader on an HDR display. Both keep NVENC's RGB→YUV CSC off the SM.
|
||||||
fn ensure_converter(&mut self) -> Result<()> {
|
fn ensure_converter(&mut self) -> Result<()> {
|
||||||
if self.hdr_conv.is_none() {
|
if self.display_hdr {
|
||||||
self.hdr_conv = Some(unsafe { HdrConverter::new(&self.device)? });
|
if self.hdr_p010_conv.is_none() {
|
||||||
|
// SAFETY: `HdrP010Converter::new` is `unsafe` (it compiles D3D11 shaders + creates
|
||||||
|
// resources); we pass a live borrow of `self.device`, the device the converter's resources
|
||||||
|
// belong to, and `?` propagates any failure before the converter is stored.
|
||||||
|
self.hdr_p010_conv = Some(unsafe { HdrP010Converter::new(&self.device)? });
|
||||||
|
}
|
||||||
|
} else if self.video_conv.is_none() {
|
||||||
|
// SAFETY: `VideoConverter::new` is `unsafe` (it sets up the D3D11 VIDEO processor); we pass live
|
||||||
|
// borrows of `self.device` + its immediate `self.context` (single-threaded, this thread) plus
|
||||||
|
// plain `u32` dimensions, and `?` propagates any failure before it is stored. The converter's
|
||||||
|
// resources belong to that same device/context.
|
||||||
|
self.video_conv = Some(unsafe {
|
||||||
|
VideoConverter::new(&self.device, &self.context, self.width, self.height, false)?
|
||||||
|
});
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
@@ -767,36 +891,48 @@ impl IddPushCapturer {
|
|||||||
return Ok(None);
|
return Ok(None);
|
||||||
}
|
}
|
||||||
self.ensure_out_ring()?;
|
self.ensure_out_ring()?;
|
||||||
// Build the HDR converter BEFORE acquiring the slot so nothing between Acquire and Release can
|
// Build the converter BEFORE acquiring the slot so nothing between Acquire and Release can
|
||||||
// `?`-return and leak the keyed-mutex lock (which would stall the driver on that slot).
|
// `?`-return and leak the keyed-mutex lock (which would stall the driver on that slot).
|
||||||
if self.display_hdr {
|
self.ensure_converter()?;
|
||||||
self.ensure_converter()?;
|
|
||||||
}
|
|
||||||
let i = self.out_idx;
|
let i = self.out_idx;
|
||||||
let (out, out_rtv) = {
|
let out = self.out_ring[i].clone();
|
||||||
let (t, rtv) = &self.out_ring[i];
|
|
||||||
(t.clone(), rtv.clone())
|
|
||||||
};
|
|
||||||
let (_, pf) = self.out_format();
|
let (_, pf) = self.out_format();
|
||||||
|
|
||||||
// Hold the slot's keyed mutex only across the convert/copy into the host out-ring (NOT across the
|
// Hold the slot's keyed mutex only across the convert/copy into the host out-ring (NOT across the
|
||||||
// ~3 ms encode — NVENC reads the host out-ring slot, not the keyed-mutex slot), so the driver gets
|
// ~3 ms encode — NVENC reads the host out-ring slot, not the keyed-mutex slot), so the driver gets
|
||||||
// the slot back immediately and the encode of the PREVIOUS frame overlaps this convert.
|
// the slot back immediately and the encode of the PREVIOUS frame overlaps this convert.
|
||||||
let s = &self.slots[slot];
|
let s = &self.slots[slot];
|
||||||
if unsafe { s.mutex.AcquireSync(0, 8) }.is_err() {
|
// Acquire the slot's keyed mutex via a RAII guard, scoped to JUST the convert/copy below so it
|
||||||
return Ok(None);
|
// releases at the same point as the old hand-written `ReleaseSync` (the driver gets the slot back
|
||||||
}
|
// immediately, NOT held across the rest of `try_consume`) — but now leak-proof on any early return.
|
||||||
unsafe {
|
{
|
||||||
if self.display_hdr {
|
let Some(_lock) = KeyedMutexGuard::acquire(&s.mutex, 0, 8) else {
|
||||||
// Sample the FP16 slot's SRV directly (no scratch copy) → BT.2020 PQ Rgb10a2.
|
return Ok(None);
|
||||||
if let Some(conv) = self.hdr_conv.as_ref() {
|
};
|
||||||
conv.convert(&self.context, &s.srv, &out_rtv, self.width, self.height);
|
// SAFETY: convert on the owning (encode) thread's immediate context, holding the slot lock.
|
||||||
|
// A `?` here is leak-safe: `_lock` (the KeyedMutexGuard) drops on the early return, releasing
|
||||||
|
// the slot back to the driver.
|
||||||
|
unsafe {
|
||||||
|
if self.display_hdr {
|
||||||
|
// HDR: FP16 slot SRV → P010 (BT.2020 PQ) via the shader; NVENC takes native P010.
|
||||||
|
if let Some(conv) = self.hdr_p010_conv.as_ref() {
|
||||||
|
conv.convert(
|
||||||
|
&self.device,
|
||||||
|
&self.context,
|
||||||
|
&s.srv,
|
||||||
|
&out,
|
||||||
|
self.width,
|
||||||
|
self.height,
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// SDR: BGRA slot → NV12 on the VIDEO engine; NVENC takes native NV12, no SM-side CSC.
|
||||||
|
if let Some(conv) = self.video_conv.as_ref() {
|
||||||
|
conv.convert(&s.tex, &out)?;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
// SDR: the slot is already 8-bit BGRA — one copy into the out-ring (hidden by pipelining).
|
|
||||||
self.context.CopyResource(&out, &s.tex);
|
|
||||||
}
|
}
|
||||||
let _ = s.mutex.ReleaseSync(0);
|
// `_lock` drops here → `ReleaseSync(0)`.
|
||||||
}
|
}
|
||||||
self.out_idx = (i + 1) % self.out_ring.len();
|
self.out_idx = (i + 1) % self.out_ring.len();
|
||||||
self.last_seq = seq;
|
self.last_seq = seq;
|
||||||
@@ -821,7 +957,7 @@ impl IddPushCapturer {
|
|||||||
// OUT_RING(3) > the max pipeline_depth(2) guarantees the rotated slot is not in flight.
|
// OUT_RING(3) > the max pipeline_depth(2) guarantees the rotated slot is not in flight.
|
||||||
let (src, pf) = self.last_present.clone()?;
|
let (src, pf) = self.last_present.clone()?;
|
||||||
let i = self.out_idx;
|
let i = self.out_idx;
|
||||||
let dst = self.out_ring.get(i)?.0.clone();
|
let dst = self.out_ring.get(i)?.clone();
|
||||||
// SAFETY: GPU copy on the owning thread's immediate context; src/dst are our out-ring textures of
|
// SAFETY: GPU copy on the owning thread's immediate context; src/dst are our out-ring textures of
|
||||||
// identical format/size (src is a previous out-ring slot; dst the next).
|
// identical format/size (src is a previous out-ring slot; dst the next).
|
||||||
unsafe {
|
unsafe {
|
||||||
@@ -882,6 +1018,8 @@ pub fn spawn_observer(target: WinCaptureTarget, preferred: Option<(u32, u32, u32
|
|||||||
|
|
||||||
/// The discrete render GPU LUID (where NVENC runs), falling back to the monitor's `OsAdapterLuid`.
|
/// The discrete render GPU LUID (where NVENC runs), falling back to the monitor's `OsAdapterLuid`.
|
||||||
fn resolve_render_adapter_luid_or(fallback_packed: i64) -> LUID {
|
fn resolve_render_adapter_luid_or(fallback_packed: i64) -> LUID {
|
||||||
|
// SAFETY: `resolve_render_adapter_luid` is an `unsafe fn` (it enumerates DXGI adapters) that takes no
|
||||||
|
// arguments and returns an owned `Option<LUID>`, borrowing nothing.
|
||||||
if let Some(l) = unsafe { crate::win_adapter::resolve_render_adapter_luid() } {
|
if let Some(l) = unsafe { crate::win_adapter::resolve_render_adapter_luid() } {
|
||||||
return l;
|
return l;
|
||||||
}
|
}
|
||||||
@@ -895,9 +1033,10 @@ impl Capturer for IddPushCapturer {
|
|||||||
fn next_frame(&mut self) -> Result<CapturedFrame> {
|
fn next_frame(&mut self) -> Result<CapturedFrame> {
|
||||||
let deadline = Instant::now() + Duration::from_secs(20);
|
let deadline = Instant::now() + Duration::from_secs(20);
|
||||||
loop {
|
loop {
|
||||||
let _ = unsafe {
|
// SAFETY: `self.event` is the live frame-ready `OwnedHandle` this capturer owns; its raw value
|
||||||
WaitForSingleObject(HANDLE(self.event.as_raw_handle() as *mut c_void), 16)
|
// (borrowed for the call, so it outlives this synchronous wait) is a valid auto-reset event
|
||||||
};
|
// handle. `WaitForSingleObject` only reads the handle; the 16 ms timeout bounds the wait.
|
||||||
|
let _ = unsafe { WaitForSingleObject(HANDLE(self.event.as_raw_handle()), 16) };
|
||||||
if let Some(f) = self.try_consume()? {
|
if let Some(f) = self.try_consume()? {
|
||||||
return Ok(f);
|
return Ok(f);
|
||||||
}
|
}
|
||||||
@@ -906,6 +1045,9 @@ impl Capturer for IddPushCapturer {
|
|||||||
}
|
}
|
||||||
if Instant::now() > deadline {
|
if Instant::now() > deadline {
|
||||||
self.log_debug_block();
|
self.log_debug_block();
|
||||||
|
// SAFETY: four in-bounds, aligned reads of the live, owned shared-header mapping — the same
|
||||||
|
// best-effort diagnostic fields as `log_driver_status_once` (aligned word reads can't tear;
|
||||||
|
// no reference into the shared region is formed).
|
||||||
let (st, detail, lo, hi) = unsafe {
|
let (st, detail, lo, hi) = unsafe {
|
||||||
(
|
(
|
||||||
(*self.header).driver_status,
|
(*self.header).driver_status,
|
||||||
|
|||||||
@@ -16,6 +16,9 @@
|
|||||||
//! Limitation: WGC cannot capture the secure desktop (lock / UAC / login) — the caller falls back to
|
//! Limitation: WGC cannot capture the secure desktop (lock / UAC / login) — the caller falls back to
|
||||||
//! the DDA backend ([`super::dxgi::DuplCapturer`]) for those (see capture.rs).
|
//! the DDA backend ([`super::dxgi::DuplCapturer`]) for those (see capture.rs).
|
||||||
|
|
||||||
|
// Every `unsafe` block in this file carries a `// SAFETY:` proof; enforce it (unsafe-proof program).
|
||||||
|
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||||
|
|
||||||
use super::dxgi::{
|
use super::dxgi::{
|
||||||
find_output, hdr_shader_p010_enabled, make_device, nudge_cursor_onto, D3d11Frame, HdrConverter,
|
find_output, hdr_shader_p010_enabled, make_device, nudge_cursor_onto, D3d11Frame, HdrConverter,
|
||||||
HdrP010Converter, VideoConverter, WinCaptureTarget,
|
HdrP010Converter, VideoConverter, WinCaptureTarget,
|
||||||
@@ -92,6 +95,10 @@ struct Deimpersonate(Option<HANDLE>);
|
|||||||
impl Drop for Deimpersonate {
|
impl Drop for Deimpersonate {
|
||||||
fn drop(&mut self) {
|
fn drop(&mut self) {
|
||||||
if let Some(tok) = self.0.take() {
|
if let Some(tok) = self.0.take() {
|
||||||
|
// SAFETY: `RevertToSelf` takes no arguments and undoes the thread impersonation set during
|
||||||
|
// WGC activation; `tok` is the impersonation token `HANDLE` from `impersonate_active_user`,
|
||||||
|
// owned by this `Deimpersonate` and closed exactly once here (taken out of the `Option`, so
|
||||||
|
// no double-close). Both are FFI calls borrowing no Rust memory.
|
||||||
unsafe {
|
unsafe {
|
||||||
let _ = RevertToSelf();
|
let _ = RevertToSelf();
|
||||||
let _ = CloseHandle(tok);
|
let _ = CloseHandle(tok);
|
||||||
@@ -174,7 +181,12 @@ pub struct WgcCapturer {
|
|||||||
_keepalive: Option<Box<dyn Send>>,
|
_keepalive: Option<Box<dyn Send>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
// COM + WinRT pointers; confined to the single owning (encode) thread, like DuplCapturer.
|
// SAFETY: like `DuplCapturer`. `WgcCapturer` holds D3D11 (free-threaded device/context) plus WGC WinRT
|
||||||
|
// objects (`Direct3D11CaptureFramePool` etc., created free-threaded via `CreateFreeThreaded`). COM/WinRT
|
||||||
|
// reference counting is interlocked, and the capturer is owned + used by exactly one encode thread,
|
||||||
|
// moved to it once and never shared (no `Sync`), so transferring ownership across threads is sound. The
|
||||||
|
// free-threaded `FrameArrived` callback touches only the `Arc<WgcSignal>` (itself `Send + Sync`), not
|
||||||
|
// the capturer's COM fields.
|
||||||
unsafe impl Send for WgcCapturer {}
|
unsafe impl Send for WgcCapturer {}
|
||||||
|
|
||||||
impl WgcCapturer {
|
impl WgcCapturer {
|
||||||
@@ -182,6 +194,15 @@ impl WgcCapturer {
|
|||||||
/// [`attach_keepalive`](Self::attach_keepalive) only after open succeeds, so a failure leaves the
|
/// [`attach_keepalive`](Self::attach_keepalive) only after open succeeds, so a failure leaves the
|
||||||
/// keepalive with the caller to hand to the DDA fallback.
|
/// keepalive with the caller to hand to the DDA fallback.
|
||||||
pub fn open(target: WinCaptureTarget, preferred: Option<(u32, u32, u32)>) -> Result<Self> {
|
pub fn open(target: WinCaptureTarget, preferred: Option<(u32, u32, u32)>) -> Result<Self> {
|
||||||
|
// SAFETY: runs on the thread opening the WGC session. `RoInitialize` inits this thread's WinRT
|
||||||
|
// apartment (idempotent; result ignored). `impersonate_active_user()` and `find_output()` are
|
||||||
|
// this module's `unsafe fn`s whose contracts (call on the activating thread; pass a GDI name)
|
||||||
|
// are met, and the impersonation is reverted by `_deimp`'s Drop on every return path. Every
|
||||||
|
// COM/WinRT call thereafter operates on an object obtained + `?`-checked earlier in this same
|
||||||
|
// block on this single thread — the `IDXGIOutput1` from `find_output`, the device/context from
|
||||||
|
// `make_device`, the factory/interop/item/pool/session — and the `TypedEventHandler` closure
|
||||||
|
// captures an `Arc<WgcSignal>` (Send+Sync) by move. No raw pointers are dereferenced; borrowed
|
||||||
|
// locals outlive their synchronous calls.
|
||||||
unsafe {
|
unsafe {
|
||||||
// WGC is WinRT — the calling thread needs a COM/WinRT apartment for the GraphicsCaptureItem
|
// WGC is WinRT — the calling thread needs a COM/WinRT apartment for the GraphicsCaptureItem
|
||||||
// activation factory (RoGetActivationFactory). Initialize MTA; ignore "already initialized"
|
// activation factory (RoGetActivationFactory). Initialize MTA; ignore "already initialized"
|
||||||
@@ -585,6 +606,15 @@ impl WgcCapturer {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn process_frame(&mut self, frame: Direct3D11CaptureFrame) -> Result<CapturedFrame> {
|
fn process_frame(&mut self, frame: Direct3D11CaptureFrame) -> Result<CapturedFrame> {
|
||||||
|
// SAFETY: runs on the capturer's single owning thread. `frame` is a live
|
||||||
|
// `Direct3D11CaptureFrame` from `self.pool`; `frame.Surface().cast::<IDirect3DDxgiInterfaceAccess
|
||||||
|
// >().GetInterface()` yields the frame's backing `ID3D11Texture2D`, which belongs to
|
||||||
|
// `self.device` (the pool was created on it via `CreateDirect3D11DeviceFromDXGIDevice`). Every
|
||||||
|
// helper called here — `hdr_to_p010`, `convert_to_yuv`, `ensure_fp16_src`, `ensure_out_ring`,
|
||||||
|
// `HdrConverter::convert`, `CopyResource`, `CreateRenderTargetView` — operates on
|
||||||
|
// `self.device`/`self.context` and that same-device texture, so all resources share one device.
|
||||||
|
// The frame is held in `self.held` until its async GPU read completes for the zero-copy paths.
|
||||||
|
// Single-threaded immediate-context use; borrowed textures/SRVs/RTVs outlive each synchronous call.
|
||||||
unsafe {
|
unsafe {
|
||||||
let surface = frame.Surface().context("frame Surface")?;
|
let surface = frame.Surface().context("frame Surface")?;
|
||||||
let access: IDirect3DDxgiInterfaceAccess = surface
|
let access: IDirect3DDxgiInterfaceAccess = surface
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
//! Host-side WGC helper relay (Windows two-process secure-desktop design,
|
//! Host-side WGC helper relay (Windows two-process secure-desktop design,
|
||||||
//! docs/windows-secure-desktop.md — step 4).
|
//! design/archive/windows-secure-desktop.md — step 4).
|
||||||
//!
|
//!
|
||||||
//! WGC won't activate under the SYSTEM account, so the SYSTEM host can't capture the normal desktop
|
//! WGC won't activate under the SYSTEM account, so the SYSTEM host can't capture the normal desktop
|
||||||
//! itself. Instead it spawns `punktfunk-host wgc-helper` in the **interactive user session** (so WGC works)
|
//! itself. Instead it spawns `punktfunk-host wgc-helper` in the **interactive user session** (so WGC works)
|
||||||
@@ -13,6 +13,9 @@
|
|||||||
//! Wire framing (must match `wgc_helper::write_au`): per AU
|
//! Wire framing (must match `wgc_helper::write_au`): per AU
|
||||||
//! `[u32 magic "PFAU" LE][u32 len LE][u64 pts_ns LE][u8 keyframe][len bytes data]`.
|
//! `[u32 magic "PFAU" LE][u32 len LE][u64 pts_ns LE][u8 keyframe][len bytes data]`.
|
||||||
|
|
||||||
|
// Every `unsafe` block in this file carries a `// SAFETY:` proof; enforce it (unsafe-proof program).
|
||||||
|
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||||
|
|
||||||
use crate::capture::dxgi::WinCaptureTarget;
|
use crate::capture::dxgi::WinCaptureTarget;
|
||||||
use anyhow::{bail, Context, Result};
|
use anyhow::{bail, Context, Result};
|
||||||
use std::io::{BufRead, BufReader, Read};
|
use std::io::{BufRead, BufReader, Read};
|
||||||
@@ -56,9 +59,15 @@ pub struct HelperRelay {
|
|||||||
rx: Receiver<RelayAu>,
|
rx: Receiver<RelayAu>,
|
||||||
}
|
}
|
||||||
|
|
||||||
// HANDLEs are just kernel handle values; we own them for the relay's lifetime and close them on Drop.
|
// SAFETY: every field is itself `Send`: the `proc`/`thread` `HANDLE`s are process-global kernel
|
||||||
|
// handle values (plain integers valid from any thread, owned for the relay's lifetime and closed once
|
||||||
|
// on Drop), `stdin_w` is a `Mutex<HANDLE>`, and `rx` is an mpsc `Receiver<RelayAu>` (which is `Send`).
|
||||||
|
// The relay is moved to one thread and owned there, so transferring it across threads is sound.
|
||||||
unsafe impl Send for HelperRelay {}
|
unsafe impl Send for HelperRelay {}
|
||||||
unsafe impl Sync for HelperRelay {}
|
// NOTE: `HelperRelay` is deliberately NOT `Sync`. Its `rx: Receiver<RelayAu>` is `!Sync` (std mpsc
|
||||||
|
// is single-consumer), and the relay is only ever a single-owner local in the punktfunk1 two-process
|
||||||
|
// mux loop — never shared by `&` across threads — so `Sync` is neither sound nor needed. (A prior
|
||||||
|
// `unsafe impl Sync` here asserted more than the fields support; removed.)
|
||||||
|
|
||||||
/// Control byte on the helper's stdin: force the next encoded frame to be an IDR (client decode
|
/// Control byte on the helper's stdin: force the next encoded frame to be an IDR (client decode
|
||||||
/// recovery). Mirrors `enc.request_keyframe()` in the single-process path.
|
/// recovery). Mirrors `enc.request_keyframe()` in the single-process path.
|
||||||
@@ -84,6 +93,10 @@ impl HelperRelay {
|
|||||||
);
|
);
|
||||||
tracing::info!(cmd = %cmdline, "spawning WGC helper in user session");
|
tracing::info!(cmd = %cmdline, "spawning WGC helper in user session");
|
||||||
|
|
||||||
|
// SAFETY: `spawn_inner` is an `unsafe fn` only because it drives raw Win32 token/pipe/process
|
||||||
|
// FFI; it imposes no caller-side memory precondition beyond valid arguments. `cmdline` is a live
|
||||||
|
// `&str` borrowed for the synchronous call and `(w, h, hz)` are plain `u32`s. It validates its
|
||||||
|
// own runtime requirements (active console session, SYSTEM token) and returns `Err` otherwise.
|
||||||
unsafe { spawn_inner(&cmdline, w, h, hz) }
|
unsafe { spawn_inner(&cmdline, w, h, hz) }
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -108,6 +121,11 @@ impl HelperRelay {
|
|||||||
pub fn request_keyframe(&self) {
|
pub fn request_keyframe(&self) {
|
||||||
let h = self.stdin_w.lock().unwrap();
|
let h = self.stdin_w.lock().unwrap();
|
||||||
let mut written = 0u32;
|
let mut written = 0u32;
|
||||||
|
// SAFETY: `*h` is the host's write end of the helper's stdin pipe — a live `HANDLE` owned by
|
||||||
|
// this `HelperRelay` (held under the `stdin_w` Mutex, locked here), closed only in Drop.
|
||||||
|
// `WriteFile` reads the 1-byte `&[CTL_KEYFRAME]` buffer and writes the byte count into
|
||||||
|
// `written`; both are live locals that outlive the synchronous call. A failure (helper gone) is
|
||||||
|
// discarded as documented.
|
||||||
unsafe {
|
unsafe {
|
||||||
let _ = windows::Win32::Storage::FileSystem::WriteFile(
|
let _ = windows::Win32::Storage::FileSystem::WriteFile(
|
||||||
*h,
|
*h,
|
||||||
@@ -121,6 +139,10 @@ impl HelperRelay {
|
|||||||
|
|
||||||
impl Drop for HelperRelay {
|
impl Drop for HelperRelay {
|
||||||
fn drop(&mut self) {
|
fn drop(&mut self) {
|
||||||
|
// SAFETY: `self.proc`/`self.thread` are the child process/thread `HANDLE`s from
|
||||||
|
// `CreateProcessAsUserW`, and `stdin_w` is the host's pipe write end — all owned by this
|
||||||
|
// `HelperRelay` and closed exactly once here in Drop (no double-close). `TerminateProcess` and
|
||||||
|
// the three `CloseHandle`s are FFI calls taking those handles by value, borrowing no Rust memory.
|
||||||
unsafe {
|
unsafe {
|
||||||
// Terminate the child first so its WGC capture + NVENC session tear down, then close our
|
// Terminate the child first so its WGC capture + NVENC session tear down, then close our
|
||||||
// handles (the reader threads end on the resulting broken pipe).
|
// handles (the reader threads end on the resulting broken pipe).
|
||||||
@@ -364,10 +386,17 @@ fn au_reader(mut r: HandleReader, tx: SyncSender<RelayAu>) {
|
|||||||
|
|
||||||
/// Minimal `Read` over a Win32 pipe HANDLE (the windows crate doesn't impl `Read` on HANDLE).
|
/// Minimal `Read` over a Win32 pipe HANDLE (the windows crate doesn't impl `Read` on HANDLE).
|
||||||
struct HandleReader(HANDLE);
|
struct HandleReader(HANDLE);
|
||||||
|
// SAFETY: `HandleReader` owns a single pipe `HANDLE` (a process-global kernel handle value, valid from
|
||||||
|
// any thread). It is moved into the dedicated reader thread and used only there (and closed once on
|
||||||
|
// Drop), never shared — so transferring ownership across threads is sound.
|
||||||
unsafe impl Send for HandleReader {}
|
unsafe impl Send for HandleReader {}
|
||||||
impl Read for HandleReader {
|
impl Read for HandleReader {
|
||||||
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
|
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
|
||||||
let mut read = 0u32;
|
let mut read = 0u32;
|
||||||
|
// SAFETY: `self.0` is the live read end of an anonymous pipe owned by this `HandleReader`
|
||||||
|
// (closed only in Drop). `ReadFile` fills the caller-provided `buf` (writing at most `buf.len()`
|
||||||
|
// bytes) and stores the count in `read`; both outlive the synchronous call. A broken pipe
|
||||||
|
// surfaces as `Err` and is mapped to EOF below.
|
||||||
let ok = unsafe {
|
let ok = unsafe {
|
||||||
windows::Win32::Storage::FileSystem::ReadFile(self.0, Some(buf), Some(&mut read), None)
|
windows::Win32::Storage::FileSystem::ReadFile(self.0, Some(buf), Some(&mut read), None)
|
||||||
};
|
};
|
||||||
@@ -380,6 +409,8 @@ impl Read for HandleReader {
|
|||||||
}
|
}
|
||||||
impl Drop for HandleReader {
|
impl Drop for HandleReader {
|
||||||
fn drop(&mut self) {
|
fn drop(&mut self) {
|
||||||
|
// SAFETY: `self.0` is the pipe `HANDLE` this `HandleReader` owns; `CloseHandle` (an FFI call
|
||||||
|
// taking the handle by value) is invoked exactly once here in Drop, so there is no double-close.
|
||||||
unsafe {
|
unsafe {
|
||||||
let _ = CloseHandle(self.0);
|
let _ = CloseHandle(self.0);
|
||||||
}
|
}
|
||||||
@@ -391,6 +422,13 @@ impl Drop for HandleReader {
|
|||||||
pub fn running_as_system() -> bool {
|
pub fn running_as_system() -> bool {
|
||||||
use windows::Win32::Security::{GetTokenInformation, TokenUser, TOKEN_QUERY, TOKEN_USER};
|
use windows::Win32::Security::{GetTokenInformation, TokenUser, TOKEN_QUERY, TOKEN_USER};
|
||||||
use windows::Win32::System::Threading::{GetCurrentProcess, OpenProcessToken};
|
use windows::Win32::System::Threading::{GetCurrentProcess, OpenProcessToken};
|
||||||
|
// SAFETY: `OpenProcessToken(GetCurrentProcess(), TOKEN_QUERY, &mut token)` opens the current-process
|
||||||
|
// token (the pseudo-handle is always valid) into `token`, which is closed once before each return.
|
||||||
|
// The first `GetTokenInformation` (null buffer) queries the required `len`; `buf` is then a
|
||||||
|
// `Vec<u8>` of exactly `len` bytes and the second call fills it, so `&*(buf.as_ptr() as *const
|
||||||
|
// TOKEN_USER)` reads a `TOKEN_USER` the kernel just wrote into a sufficiently-sized buffer (the
|
||||||
|
// variable-length SID it points at also lies within `buf`, which outlives the borrow).
|
||||||
|
// `is_local_system_sid` is this module's `unsafe fn`, given that in-buffer `PSID`. Safe on any thread.
|
||||||
unsafe {
|
unsafe {
|
||||||
let mut token = HANDLE::default();
|
let mut token = HANDLE::default();
|
||||||
if OpenProcessToken(GetCurrentProcess(), TOKEN_QUERY, &mut token).is_err() {
|
if OpenProcessToken(GetCurrentProcess(), TOKEN_QUERY, &mut token).is_err() {
|
||||||
|
|||||||
@@ -4,7 +4,7 @@
|
|||||||
//! environment before the host starts, and **for the knobs captured here the environment is constant for the
|
//! environment before the host starts, and **for the knobs captured here the environment is constant for the
|
||||||
//! process lifetime**, so a lazily-parsed global is equivalent to "parsed once at startup".
|
//! process lifetime**, so a lazily-parsed global is equivalent to "parsed once at startup".
|
||||||
//!
|
//!
|
||||||
//! **Goal-1 stages 1–2** (`docs/windows-host-rewrite.md` §2.2): stage 1 stood this up; stage 2 migrated the
|
//! **Goal-1 stages 1–2** (`design/windows-host-rewrite.md` §2.2): stage 1 stood this up; stage 2 migrated the
|
||||||
//! genuinely-constant operator/dispatch knobs onto it (the dispatch-disagreement bug class: `idd_push`,
|
//! genuinely-constant operator/dispatch knobs onto it (the dispatch-disagreement bug class: `idd_push`,
|
||||||
//! `capture_backend`, `encoder_pref`, `render_adapter`, `no_wgc`, the vdisplay backend select — plus the
|
//! `capture_backend`, `encoder_pref`, `render_adapter`, `no_wgc`, the vdisplay backend select — plus the
|
||||||
//! plan-named `secure_dda`/`idd_depth`/`zerocopy`/`ten_bit` and the multi-site `perf`/`compositor`/
|
//! plan-named `secure_dda`/`idd_depth`/`zerocopy`/`ten_bit` and the multi-site `perf`/`compositor`/
|
||||||
|
|||||||
@@ -3,6 +3,9 @@
|
|||||||
//! RGB→YUV on the GPU, so no host-side CSC) and VAAPI on AMD/Intel (`*_vaapi`; the CPU-input
|
//! RGB→YUV on the GPU, so no host-side CSC) and VAAPI on AMD/Intel (`*_vaapi`; the CPU-input
|
||||||
//! fallback swscales RGB→NV12, the zero-copy path imports the capture dmabuf straight into a
|
//! fallback swscales RGB→NV12, the zero-copy path imports the capture dmabuf straight into a
|
||||||
//! VA surface). One [`Encoder`] trait, selected in [`open_video`].
|
//! VA surface). One [`Encoder`] trait, selected in [`open_video`].
|
||||||
|
// Every unsafe block in this module tree carries a `// SAFETY:` proof; enforce it (unsafe-proof
|
||||||
|
// program). As a parent module this also covers the child modules (encode::windows/linux::*).
|
||||||
|
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||||
|
|
||||||
use crate::capture::{CapturedFrame, PixelFormat};
|
use crate::capture::{CapturedFrame, PixelFormat};
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
@@ -505,6 +508,14 @@ fn windows_gpu_vendor() -> Option<GpuVendor> {
|
|||||||
CreateDXGIFactory1, IDXGIFactory1, DXGI_ADAPTER_FLAG_SOFTWARE,
|
CreateDXGIFactory1, IDXGIFactory1, DXGI_ADAPTER_FLAG_SOFTWARE,
|
||||||
};
|
};
|
||||||
static CACHE: OnceLock<Option<GpuVendor>> = OnceLock::new();
|
static CACHE: OnceLock<Option<GpuVendor>> = OnceLock::new();
|
||||||
|
// SAFETY: `CreateDXGIFactory1` returns a fresh owned `IDXGIFactory1` COM object (refcounted by the
|
||||||
|
// windows-rs wrapper, Released when the local drops); `.ok()?` bails on failure so `factory` is a
|
||||||
|
// valid interface before any use. `EnumAdapters1(i)` hands back the i-th adapter as an owned
|
||||||
|
// `IDXGIAdapter1` (or an error past the last adapter, which ends the loop). `GetDesc1()` returns the
|
||||||
|
// `DXGI_ADAPTER_DESC1` by value (no out-pointer), so reading `desc.Flags`/`desc.VendorId` is plain
|
||||||
|
// field access. Every call only touches COM objects this closure owns; the `OnceLock` runs the
|
||||||
|
// closure once (no data race) and all interfaces are Released as the locals drop. No raw pointer is
|
||||||
|
// dereferenced and nothing is aliased.
|
||||||
*CACHE.get_or_init(|| unsafe {
|
*CACHE.get_or_init(|| unsafe {
|
||||||
let factory: IDXGIFactory1 = CreateDXGIFactory1().ok()?;
|
let factory: IDXGIFactory1 = CreateDXGIFactory1().ok()?;
|
||||||
let mut i = 0u32;
|
let mut i = 0u32;
|
||||||
|
|||||||
@@ -8,6 +8,8 @@
|
|||||||
//! does *not* accept — we expand it to `rgb0` (one padding byte/pixel, no colour math).
|
//! does *not* accept — we expand it to `rgb0` (one padding byte/pixel, no colour math).
|
||||||
//! The encoder is opened *without* a global header so VPS/SPS/PPS are emitted in-band on
|
//! The encoder is opened *without* a global header so VPS/SPS/PPS are emitted in-band on
|
||||||
//! every IDR — the output is both a playable raw Annex-B stream and self-contained AUs.
|
//! every IDR — the output is both a playable raw Annex-B stream and self-contained AUs.
|
||||||
|
// Every `unsafe` block in this file carries a `// SAFETY:` proof; enforce it (unsafe-proof program).
|
||||||
|
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||||
|
|
||||||
use super::{Codec, EncodedFrame, Encoder};
|
use super::{Codec, EncodedFrame, Encoder};
|
||||||
use crate::capture::{CapturedFrame, FramePayload, PixelFormat};
|
use crate::capture::{CapturedFrame, FramePayload, PixelFormat};
|
||||||
@@ -79,6 +81,12 @@ impl CudaHw {
|
|||||||
|
|
||||||
impl Drop for CudaHw {
|
impl Drop for CudaHw {
|
||||||
fn drop(&mut self) {
|
fn drop(&mut self) {
|
||||||
|
// SAFETY: `frames_ref`/`device_ref` are the two non-null `AVBufferRef`s `CudaHw::new` created
|
||||||
|
// (it bails before returning `Self` if either alloc fails, so a live `CudaHw` always holds
|
||||||
|
// both). `av_buffer_unref` drops one reference and nulls the pointer through the `&mut`. This
|
||||||
|
// `Drop` runs exactly once and `CudaHw` owns these refs exclusively → no double-free /
|
||||||
|
// use-after-free. Frames are unref'd before the device (the frames ctx internally refs the
|
||||||
|
// device; refcounted, so the order is sound regardless).
|
||||||
unsafe {
|
unsafe {
|
||||||
ffi::av_buffer_unref(&mut self.frames_ref);
|
ffi::av_buffer_unref(&mut self.frames_ref);
|
||||||
ffi::av_buffer_unref(&mut self.device_ref);
|
ffi::av_buffer_unref(&mut self.device_ref);
|
||||||
@@ -136,6 +144,13 @@ pub struct NvencEncoder {
|
|||||||
|
|
||||||
// `CudaHw` holds raw `AVBufferRef`s; the encoder lives on a single thread. The CPU encoder is
|
// `CudaHw` holds raw `AVBufferRef`s; the encoder lives on a single thread. The CPU encoder is
|
||||||
// already `Send` via ffmpeg-next; assert it for the CUDA fields too.
|
// already `Send` via ffmpeg-next; assert it for the CUDA fields too.
|
||||||
|
// SAFETY: `NvencEncoder` owns an ffmpeg-next `Encoder`/`VideoFrame` (already `Send`) plus a `CudaHw`
|
||||||
|
// holding raw `AVBufferRef`s, which are not `Send` by default. The encoder is owned and driven by
|
||||||
|
// exactly ONE thread — the per-session encode thread it is moved to — and is only touched through
|
||||||
|
// `&mut self` methods, so it is never aliased or accessed concurrently. The wrapped libav contexts
|
||||||
|
// (and the shared `CUcontext` the `CudaHw` references) have no thread affinity, so transferring
|
||||||
|
// ownership across threads is sound. This asserts `Send` (transfer) only, extending ffmpeg-next's
|
||||||
|
// existing `Send` to the raw CUDA fields; `Sync` (shared `&`) is deliberately NOT implemented.
|
||||||
unsafe impl Send for NvencEncoder {}
|
unsafe impl Send for NvencEncoder {}
|
||||||
|
|
||||||
impl NvencEncoder {
|
impl NvencEncoder {
|
||||||
@@ -162,6 +177,9 @@ impl NvencEncoder {
|
|||||||
}
|
}
|
||||||
ffmpeg::init().context("ffmpeg init")?;
|
ffmpeg::init().context("ffmpeg init")?;
|
||||||
if std::env::var_os("PUNKTFUNK_FFMPEG_DEBUG").is_some() {
|
if std::env::var_os("PUNKTFUNK_FFMPEG_DEBUG").is_some() {
|
||||||
|
// SAFETY: `av_log_set_level` sets libav's global integer log level; `48` (= AV_LOG_DEBUG)
|
||||||
|
// is a valid level with no pointer args, and libav was just initialized by `ffmpeg::init()`
|
||||||
|
// above — always sound.
|
||||||
unsafe { ffi::av_log_set_level(48) }; // AV_LOG_DEBUG — surface NVENC hw-frame rejects
|
unsafe { ffi::av_log_set_level(48) }; // AV_LOG_DEBUG — surface NVENC hw-frame rejects
|
||||||
}
|
}
|
||||||
let name = codec.nvenc_name();
|
let name = codec.nvenc_name();
|
||||||
@@ -195,6 +213,11 @@ impl NvencEncoder {
|
|||||||
.unwrap_or(1.0);
|
.unwrap_or(1.0);
|
||||||
let vbv_bits = ((bitrate_bps as f64 / fps.max(1) as f64) * vbv_frames as f64)
|
let vbv_bits = ((bitrate_bps as f64 / fps.max(1) as f64) * vbv_frames as f64)
|
||||||
.clamp(1.0, i32::MAX as f64);
|
.clamp(1.0, i32::MAX as f64);
|
||||||
|
// SAFETY: `video` is the ffmpeg-next encoder builder wrapping a freshly-allocated
|
||||||
|
// `AVCodecContext` that we hold by value and have not opened yet; `video.as_mut_ptr()` returns
|
||||||
|
// that non-null, properly-aligned, exclusively-owned context. Writing the plain `rc_buffer_size`
|
||||||
|
// int field before `open_with` is the supported way to set a field ffmpeg-next exposes no
|
||||||
|
// setter for. Sole owner → no aliasing; synchronous in-bounds scalar write.
|
||||||
unsafe {
|
unsafe {
|
||||||
(*video.as_mut_ptr()).rc_buffer_size = vbv_bits as i32;
|
(*video.as_mut_ptr()).rc_buffer_size = vbv_bits as i32;
|
||||||
}
|
}
|
||||||
@@ -204,6 +227,9 @@ impl NvencEncoder {
|
|||||||
// "freeze". NVENC emits one IDR at stream start, then P-frames only; `forced-idr` (below)
|
// "freeze". NVENC emits one IDR at stream start, then P-frames only; `forced-idr` (below)
|
||||||
// turns a client recovery request (RFI, via `request_keyframe`) into an IDR on demand.
|
// turns a client recovery request (RFI, via `request_keyframe`) into an IDR on demand.
|
||||||
// This is the Moonlight/Sunshine low-latency model.
|
// This is the Moonlight/Sunshine low-latency model.
|
||||||
|
// SAFETY: same `video` builder as above — a non-null, properly-aligned, sole-owned, not-yet-
|
||||||
|
// opened `AVCodecContext`. We write the plain `gop_size` int field (= -1, infinite GOP) before
|
||||||
|
// `open_with`, which ffmpeg-next has no setter for. No aliasing; synchronous scalar write.
|
||||||
unsafe {
|
unsafe {
|
||||||
(*video.as_mut_ptr()).gop_size = -1;
|
(*video.as_mut_ptr()).gop_size = -1;
|
||||||
}
|
}
|
||||||
@@ -214,6 +240,10 @@ impl NvencEncoder {
|
|||||||
// RGB-input paths leave these unset (NVENC's internal CSC writes its own VUI). Matches the
|
// RGB-input paths leave these unset (NVENC's internal CSC writes its own VUI). Matches the
|
||||||
// Windows NV12 path's BT.709 limited-range signalling.
|
// Windows NV12 path's BT.709 limited-range signalling.
|
||||||
if matches!(format, PixelFormat::Nv12) {
|
if matches!(format, PixelFormat::Nv12) {
|
||||||
|
// SAFETY: same `video` builder — `raw = video.as_mut_ptr()` is the non-null, properly-
|
||||||
|
// aligned, sole-owned, not-yet-opened `AVCodecContext`. We set its four VUI colour enum
|
||||||
|
// fields to valid `AVColorSpace`/`AVColorRange`/`AVColorPrimaries`/`AVColorTransfer-
|
||||||
|
// Characteristic` variants before `open_with`. Sole owner → no aliasing; synchronous writes.
|
||||||
unsafe {
|
unsafe {
|
||||||
let raw = video.as_mut_ptr();
|
let raw = video.as_mut_ptr();
|
||||||
(*raw).colorspace = ffi::AVColorSpace::AVCOL_SPC_BT709;
|
(*raw).colorspace = ffi::AVColorSpace::AVCOL_SPC_BT709;
|
||||||
@@ -228,7 +258,17 @@ impl NvencEncoder {
|
|||||||
// *before* open (NVENC derives the device from `hw_frames_ctx`).
|
// *before* open (NVENC derives the device from `hw_frames_ctx`).
|
||||||
let cuda_hw = if cuda {
|
let cuda_hw = if cuda {
|
||||||
let cu_ctx = crate::zerocopy::cuda::context().context("shared CUDA context")?;
|
let cu_ctx = crate::zerocopy::cuda::context().context("shared CUDA context")?;
|
||||||
|
// SAFETY: `CudaHw::new` (an `unsafe fn`) requires libav initialized (the `ffmpeg::init()`
|
||||||
|
// above ran) and a valid `CUcontext`; `cu_ctx` is the shared importer context from
|
||||||
|
// `zerocopy::cuda::context()?`, non-null on the `Ok` path. `nvenc_pixel` is a valid `Pixel`
|
||||||
|
// and `width`/`height` are the validated positive dims. It returns a RAII `CudaHw` wrapping
|
||||||
|
// (not owning) `cu_ctx` and owning two `AVBufferRef`s freed on drop.
|
||||||
let hw = unsafe { CudaHw::new(cu_ctx, nvenc_pixel, width, height)? };
|
let hw = unsafe { CudaHw::new(cu_ctx, nvenc_pixel, width, height)? };
|
||||||
|
// SAFETY: `raw = video.as_mut_ptr()` is the non-null, sole-owned, not-yet-opened
|
||||||
|
// `AVCodecContext`. We set `pix_fmt = CUDA` and attach NEW refs (`av_buffer_ref`) of
|
||||||
|
// `hw.device_ref`/`hw.frames_ref` — both non-null (`CudaHw::new` guarantees) and from the
|
||||||
|
// live `hw`, which is moved into `NvencEncoder.cuda` next to `enc` and so outlives the
|
||||||
|
// encoder. The context owns its own refs (freed when the context closes). No aliasing.
|
||||||
unsafe {
|
unsafe {
|
||||||
let raw = video.as_mut_ptr();
|
let raw = video.as_mut_ptr();
|
||||||
(*raw).pix_fmt = ffi::AVPixelFormat::AV_PIX_FMT_CUDA;
|
(*raw).pix_fmt = ffi::AVPixelFormat::AV_PIX_FMT_CUDA;
|
||||||
@@ -428,6 +468,19 @@ impl NvencEncoder {
|
|||||||
// The device→device copy below uses our shared context directly; make it current on the
|
// The device→device copy below uses our shared context directly; make it current on the
|
||||||
// encode thread (ffmpeg pushes its own around the pool alloc, so order is fine).
|
// encode thread (ffmpeg pushes its own around the pool alloc, so order is fine).
|
||||||
crate::zerocopy::cuda::make_current().context("CUDA context current (encode thread)")?;
|
crate::zerocopy::cuda::make_current().context("CUDA context current (encode thread)")?;
|
||||||
|
// SAFETY: `frames_ref` is the non-null CUDA frames ctx from `self.cuda` (unwrapped via
|
||||||
|
// `.context(..)?` above), and the shared CUDA context was just made current on THIS thread
|
||||||
|
// (`make_current()?`), the precondition for the device-pointer copies below.
|
||||||
|
// * `av_frame_alloc` → `f` (null-checked). `av_hwframe_get_buffer(frames_ref, f, 0)` fills `f`
|
||||||
|
// with a pooled CUDA surface (sets `data[]`/`linesize[]`/`buf[0]`/`hw_frames_ctx`); on
|
||||||
|
// failure we free `f` and bail.
|
||||||
|
// * For NV12 we read `(*f).data[0..2]` / `linesize[0..2]` (Y + interleaved UV), else
|
||||||
|
// `data[0]`/`linesize[0]` — in-struct fields of the non-null `f`, valid for the surface dims
|
||||||
|
// ffmpeg allocated — and pass them to the cuda copy helpers, which device→device copy `buf`
|
||||||
|
// (the imported `DeviceBuffer`, owned by the caller and live for this call) into the surface.
|
||||||
|
// * On copy error we free `f` and return. Otherwise we write `pts`/`pict_type` through `f` and
|
||||||
|
// `avcodec_send_frame` it into the live owned `self.enc` context (which takes its own ref of
|
||||||
|
// the pooled surface), then free our `f` ref exactly once. Single-threaded encoder → no race.
|
||||||
unsafe {
|
unsafe {
|
||||||
let mut f = ffi::av_frame_alloc();
|
let mut f = ffi::av_frame_alloc();
|
||||||
if f.is_null() {
|
if f.is_null() {
|
||||||
|
|||||||
@@ -19,6 +19,8 @@
|
|||||||
//! hwdevice/hwframes/buffersrc/buffersink calls go through `ffmpeg::ffi` (= `ffmpeg_sys_next`),
|
//! hwdevice/hwframes/buffersrc/buffersink calls go through `ffmpeg::ffi` (= `ffmpeg_sys_next`),
|
||||||
//! as the CUDA encode path and the clients' decode paths already do. The encoder is opened
|
//! as the CUDA encode path and the clients' decode paths already do. The encoder is opened
|
||||||
//! *without* a global header, so VPS/SPS/PPS are in-band on every IDR.
|
//! *without* a global header, so VPS/SPS/PPS are in-band on every IDR.
|
||||||
|
// Every `unsafe` block in this file carries a `// SAFETY:` proof; enforce it (unsafe-proof program).
|
||||||
|
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||||
|
|
||||||
use super::{Codec, EncodedFrame, Encoder};
|
use super::{Codec, EncodedFrame, Encoder};
|
||||||
use crate::capture::{CapturedFrame, DmabufFrame, FramePayload, PixelFormat};
|
use crate::capture::{CapturedFrame, DmabufFrame, FramePayload, PixelFormat};
|
||||||
@@ -133,6 +135,14 @@ pub fn probe_can_encode(codec: Codec) -> bool {
|
|||||||
if ffmpeg::init().is_err() {
|
if ffmpeg::init().is_err() {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
// SAFETY: `ffmpeg::init()` returned Ok above, so libav is initialized. `av_log_get_level`/
|
||||||
|
// `av_log_set_level` only read/write libav's global integer log level (no pointer args) and are
|
||||||
|
// always sound to call post-init. `VaapiHw::new` (an `unsafe fn`) builds a VAAPI device + NV12
|
||||||
|
// frames pool from the literal NV12/640x480/pool=2 args and hands back a RAII handle that unrefs
|
||||||
|
// both `AVBufferRef`s on drop. `open_vaapi_encoder` (an `unsafe fn`) borrows `hw.device_ref`/
|
||||||
|
// `hw.frames_ref` — the two non-null refs `VaapiHw::new` just created — and `av_buffer_ref`s them
|
||||||
|
// into the encoder; `hw` is a live local for the whole match arm, so the borrows outlive the
|
||||||
|
// synchronous call, and both `hw` and the probe encoder are dropped (RAII) when the arm ends.
|
||||||
unsafe {
|
unsafe {
|
||||||
// A missing VA device (non-VAAPI host, GPU-less CI) is an expected probe outcome — quiet
|
// A missing VA device (non-VAAPI host, GPU-less CI) is an expected probe outcome — quiet
|
||||||
// ffmpeg's "No VA display found" error for the probe, then restore the level.
|
// ffmpeg's "No VA display found" error for the probe, then restore the level.
|
||||||
@@ -224,6 +234,12 @@ impl VaapiHw {
|
|||||||
|
|
||||||
impl Drop for VaapiHw {
|
impl Drop for VaapiHw {
|
||||||
fn drop(&mut self) {
|
fn drop(&mut self) {
|
||||||
|
// SAFETY: `frames_ref`/`device_ref` are the two non-null `AVBufferRef`s `VaapiHw::new`
|
||||||
|
// created (it bails before constructing `Self` if either alloc fails, so a live `VaapiHw`
|
||||||
|
// always holds both). `av_buffer_unref` drops one reference and nulls the pointer through the
|
||||||
|
// `&mut`. This `Drop` runs exactly once and `VaapiHw` owns these refs exclusively, so there
|
||||||
|
// is no double-free / use-after-free. Frames are unref'd before the device because the frames
|
||||||
|
// ctx internally holds a ref on the device (refcounted, so the order is sound either way).
|
||||||
unsafe {
|
unsafe {
|
||||||
ffi::av_buffer_unref(&mut self.frames_ref);
|
ffi::av_buffer_unref(&mut self.frames_ref);
|
||||||
ffi::av_buffer_unref(&mut self.device_ref);
|
ffi::av_buffer_unref(&mut self.device_ref);
|
||||||
@@ -252,7 +268,16 @@ impl CpuInner {
|
|||||||
) -> Result<Self> {
|
) -> Result<Self> {
|
||||||
let src_pixel = vaapi_sws_src(format)?;
|
let src_pixel = vaapi_sws_src(format)?;
|
||||||
const POOL: c_int = 16;
|
const POOL: c_int = 16;
|
||||||
|
// SAFETY: `VaapiHw::new` (an `unsafe fn`) requires libav initialized — guaranteed because the
|
||||||
|
// only path here is `VaapiEncoder::open` → `ensure_inner` → `CpuInner::open`, and `open` ran
|
||||||
|
// `ffmpeg::init()`. The args are valid: NV12 sw_format, the validated positive `width`/`height`,
|
||||||
|
// pool=16. It returns a RAII `VaapiHw` that unrefs its two `AVBufferRef`s on drop.
|
||||||
let hw = unsafe { VaapiHw::new(ffi::AVPixelFormat::AV_PIX_FMT_NV12, width, height, POOL)? };
|
let hw = unsafe { VaapiHw::new(ffi::AVPixelFormat::AV_PIX_FMT_NV12, width, height, POOL)? };
|
||||||
|
// SAFETY: `open_vaapi_encoder` (an `unsafe fn`) borrows `hw.device_ref`/`hw.frames_ref` — both
|
||||||
|
// non-null (`VaapiHw::new` guarantees it) and from the `hw` just built above, which is a live
|
||||||
|
// local that outlives this synchronous call. The fn `av_buffer_ref`s them into the encoder, so
|
||||||
|
// the encoder holds its own references; `hw` is also moved into the returned `CpuInner` next to
|
||||||
|
// `enc`, keeping the device/frames alive for the encoder's whole lifetime.
|
||||||
let enc = unsafe {
|
let enc = unsafe {
|
||||||
open_vaapi_encoder(
|
open_vaapi_encoder(
|
||||||
codec,
|
codec,
|
||||||
@@ -266,6 +291,12 @@ impl CpuInner {
|
|||||||
};
|
};
|
||||||
// swscale RGB→NV12, BT.709 limited (matches the VUI), no rescale.
|
// swscale RGB→NV12, BT.709 limited (matches the VUI), no rescale.
|
||||||
let src_av = pixel_to_av(src_pixel);
|
let src_av = pixel_to_av(src_pixel);
|
||||||
|
// SAFETY: `sws_getContext` allocates a swscale context for the given src/dst dimensions and
|
||||||
|
// pixel formats. All four dims are the encoder's positive `width`/`height` cast to `c_int`;
|
||||||
|
// `src_av` is a valid `AVPixelFormat` (from `pixel_to_av` of the `vaapi_sws_src`-validated
|
||||||
|
// `src_pixel`), the dst is NV12. The three trailing pointers (srcFilter, dstFilter, param) are
|
||||||
|
// explicitly null = "use defaults", which the API documents as accepted. No Rust memory is
|
||||||
|
// borrowed — only by-value ints/enums — and the returned pointer is null-checked just below.
|
||||||
let sws = unsafe {
|
let sws = unsafe {
|
||||||
ffi::sws_getContext(
|
ffi::sws_getContext(
|
||||||
width as c_int,
|
width as c_int,
|
||||||
@@ -283,10 +314,23 @@ impl CpuInner {
|
|||||||
if sws.is_null() {
|
if sws.is_null() {
|
||||||
bail!("sws_getContext(RGB→NV12) failed");
|
bail!("sws_getContext(RGB→NV12) failed");
|
||||||
}
|
}
|
||||||
|
// SAFETY: `sws` is the non-null `SwsContext` from `sws_getContext` above (the `is_null()`
|
||||||
|
// check immediately preceding returned false). `sws_getCoefficients(SWS_CS_ITU709)` returns a
|
||||||
|
// pointer into a libswscale static const coefficient table valid for the whole process, reused
|
||||||
|
// here for both the inverse (src) and forward (dst) matrices. `sws_setColorspaceDetails` only
|
||||||
|
// reads those tables and writes scalar CSC settings into `sws`; the table pointer outlives the
|
||||||
|
// synchronous call and no Rust memory is passed.
|
||||||
unsafe {
|
unsafe {
|
||||||
let cs709 = ffi::sws_getCoefficients(SWS_CS_ITU709);
|
let cs709 = ffi::sws_getCoefficients(SWS_CS_ITU709);
|
||||||
ffi::sws_setColorspaceDetails(sws, cs709, 1, cs709, 0, 0, 1 << 16, 1 << 16);
|
ffi::sws_setColorspaceDetails(sws, cs709, 1, cs709, 0, 0, 1 << 16, 1 << 16);
|
||||||
}
|
}
|
||||||
|
// SAFETY: `av_frame_alloc` returns a fresh, uniquely-owned heap `AVFrame` (null-checked — on
|
||||||
|
// null we free the already-built `sws` and bail). We then write the plain `format`/`width`/
|
||||||
|
// `height` fields through the non-null, properly-aligned `f` (sole owner, not yet shared).
|
||||||
|
// `av_frame_get_buffer(f, 0)` allocates backing storage for those dims/format; on failure we
|
||||||
|
// free `f` and `sws` (unwinding the half-built state) and bail. On success `f` is a fully-owned
|
||||||
|
// NV12 frame stored in `CpuInner.nv12` and freed once in `CpuInner::drop`. `f` is a unique
|
||||||
|
// fresh pointer, so none of these writes alias anything.
|
||||||
let nv12 = unsafe {
|
let nv12 = unsafe {
|
||||||
let f = ffi::av_frame_alloc();
|
let f = ffi::av_frame_alloc();
|
||||||
if f.is_null() {
|
if f.is_null() {
|
||||||
@@ -329,6 +373,18 @@ impl CpuInner {
|
|||||||
let h = self.height as usize;
|
let h = self.height as usize;
|
||||||
let src_row = w * self.src_format.bytes_per_pixel();
|
let src_row = w * self.src_format.bytes_per_pixel();
|
||||||
anyhow::ensure!(bytes.len() >= src_row * h, "captured buffer too small");
|
anyhow::ensure!(bytes.len() >= src_row * h, "captured buffer too small");
|
||||||
|
// SAFETY: The `ensure!`s above guarantee `format == self.src_format` and
|
||||||
|
// `bytes.len() >= src_row * h`. `sws_scale` reads `h` rows of `src_row` bytes from
|
||||||
|
// `src_data[0] = bytes.as_ptr()` (the other planes null/0 — packed RGB is single-plane), all
|
||||||
|
// in bounds; `bytes`, `src_data`, `src_stride` are live locals for this synchronous call.
|
||||||
|
// `self.sws` is the non-null context built in `open`; it writes into `self.nv12` (a non-null
|
||||||
|
// owned frame whose `data`/`linesize` in-struct arrays were sized by `av_frame_get_buffer`).
|
||||||
|
// `av_frame_alloc` (null-checked) yields a fresh `hwf`; `av_hwframe_get_buffer` pulls a pooled
|
||||||
|
// VAAPI surface from the live non-null `self.hw.frames_ref`; `av_hwframe_transfer_data` uploads
|
||||||
|
// the staged NV12 into it — both frames live, failures free `hwf` and bail. We then write
|
||||||
|
// `pts`/`pict_type` through the non-null `hwf` and `avcodec_send_frame` it into the live
|
||||||
|
// owned `self.enc` context (which takes its own ref), then free our `hwf` ref exactly once.
|
||||||
|
// The encoder runs only on this thread (see `unsafe impl Send`), so no aliasing/data race.
|
||||||
unsafe {
|
unsafe {
|
||||||
let src_data: [*const u8; 4] = [bytes.as_ptr(), ptr::null(), ptr::null(), ptr::null()];
|
let src_data: [*const u8; 4] = [bytes.as_ptr(), ptr::null(), ptr::null(), ptr::null()];
|
||||||
let src_stride: [c_int; 4] = [src_row as c_int, 0, 0, 0];
|
let src_stride: [c_int; 4] = [src_row as c_int, 0, 0, 0];
|
||||||
@@ -374,6 +430,12 @@ impl CpuInner {
|
|||||||
|
|
||||||
impl Drop for CpuInner {
|
impl Drop for CpuInner {
|
||||||
fn drop(&mut self) {
|
fn drop(&mut self) {
|
||||||
|
// SAFETY: `self.nv12` (an owned `AVFrame`) and `self.sws` (an owned `SwsContext`) are each
|
||||||
|
// freed exactly once here, guarded by `is_null()` so a never-set pointer is skipped (no double
|
||||||
|
// free). `CpuInner` owns both exclusively and `Drop` runs once. `av_frame_free` takes `&mut`
|
||||||
|
// and nulls the pointer. `self.enc`/`self.hw` are freed afterward by their own `Drop` impls;
|
||||||
|
// the encoder holds its own `av_buffer_ref`'d device/frames copies, so field-drop order is
|
||||||
|
// irrelevant to soundness.
|
||||||
unsafe {
|
unsafe {
|
||||||
if !self.nv12.is_null() {
|
if !self.nv12.is_null() {
|
||||||
ffi::av_frame_free(&mut self.nv12);
|
ffi::av_frame_free(&mut self.nv12);
|
||||||
@@ -417,6 +479,31 @@ impl DmabufInner {
|
|||||||
let drm_fourcc = crate::zerocopy::drm_fourcc(format)
|
let drm_fourcc = crate::zerocopy::drm_fourcc(format)
|
||||||
.ok_or_else(|| anyhow!("no DRM fourcc for {format:?} (VAAPI zero-copy)"))?;
|
.ok_or_else(|| anyhow!("no DRM fourcc for {format:?} (VAAPI zero-copy)"))?;
|
||||||
let node = render_node();
|
let node = render_node();
|
||||||
|
// SAFETY: libav is initialized (`VaapiEncoder::open` ran `ffmpeg::init()` before
|
||||||
|
// `ensure_inner` → `DmabufInner::open`). Every raw pointer dereferenced below is either freshly
|
||||||
|
// allocated by the immediately-preceding ffmpeg call and null-checked, or an in-struct field of
|
||||||
|
// such an object:
|
||||||
|
// * `node` is a `CString` (from `render_node`) live for the whole block; its `.as_ptr()` is a
|
||||||
|
// NUL-terminated path read only during `av_hwdevice_ctx_create`.
|
||||||
|
// * `av_hwdevice_ctx_create(&mut drm_device, DRM, …)` / `…_create_derived(&mut vaapi_device,
|
||||||
|
// VAAPI, drm_device, …)`: on `r < 0` the out-param stays null and we bail (the derive path
|
||||||
|
// unrefs `drm_device` first); on success each is a non-null owned `AVBufferRef`.
|
||||||
|
// * `av_hwframe_ctx_alloc(drm_device)` → `drm_frames` (null-checked); `(*drm_frames).data` is
|
||||||
|
// its `AVHWFramesContext` payload, written before `av_hwframe_ctx_init`.
|
||||||
|
// * `avfilter_graph_alloc` → `graph` (null-checked); `avfilter_get_by_name` returns a static
|
||||||
|
// const `AVFilter` (process-lifetime) or null; `avfilter_graph_alloc_filter` allocates each
|
||||||
|
// filter ctx inside `graph`; the four are null-checked together. `inst`/arg strings are
|
||||||
|
// 'static C literals.
|
||||||
|
// * `(*hwmap/scale).hw_device_ctx = av_buffer_ref(vaapi_device)` attaches a NEW ref owned by
|
||||||
|
// the filter (freed by `avfilter_graph_free`); our `vaapi_device` ref is untouched.
|
||||||
|
// * `av_buffersink_get_hw_frames_ctx(sink)` → `nv12_ctx` is a borrowed ref owned by the sink,
|
||||||
|
// valid while `graph` lives (and `graph` is moved into the returned `DmabufInner`).
|
||||||
|
// * `open_vaapi_encoder` borrows `vaapi_device` (our live owned ref) and `nv12_ctx` (sink's
|
||||||
|
// live ref) and `av_buffer_ref`s both into the encoder.
|
||||||
|
// Every early-error path unref's the allocated buffers and frees the graph in the right order
|
||||||
|
// before bailing; on success the four `AVBufferRef`s + `graph` + `src`/`sink` are moved into
|
||||||
|
// `DmabufInner` and freed in its `Drop`. (Two non-UB leaks noted below: `av_buffersrc_*` and
|
||||||
|
// the final `?`.)
|
||||||
unsafe {
|
unsafe {
|
||||||
// DRM device (source dmabuf frames) + a VAAPI device derived from it (same GPU) for
|
// DRM device (source dmabuf frames) + a VAAPI device derived from it (same GPU) for
|
||||||
// hwmap/scale_vaapi/the encoder.
|
// hwmap/scale_vaapi/the encoder.
|
||||||
@@ -509,7 +596,12 @@ impl DmabufInner {
|
|||||||
num: 1,
|
num: 1,
|
||||||
den: fps as c_int,
|
den: fps as c_int,
|
||||||
};
|
};
|
||||||
(*par).hw_frames_ctx = ffi::av_buffer_ref(drm_frames);
|
// Assign `drm_frames` BORROWED (no extra ref): `av_buffersrc_parameters_set` takes its
|
||||||
|
// own ref of `par->hw_frames_ctx` (via av_buffer_replace), and `av_free(par)` frees only
|
||||||
|
// the struct, not the ref. Our single owned `drm_frames` ref is retained, lives in
|
||||||
|
// `DmabufInner`, and is unref'd in `Drop`. Wrapping it in `av_buffer_ref` here would leak
|
||||||
|
// that extra ref every session (the persistent listener would accumulate them).
|
||||||
|
(*par).hw_frames_ctx = drm_frames;
|
||||||
let r = ffi::av_buffersrc_parameters_set(src, par);
|
let r = ffi::av_buffersrc_parameters_set(src, par);
|
||||||
ffi::av_free(par as *mut _);
|
ffi::av_free(par as *mut _);
|
||||||
if r < 0 {
|
if r < 0 {
|
||||||
@@ -564,7 +656,12 @@ impl DmabufInner {
|
|||||||
ffi::av_buffer_unref(&mut drm_device);
|
ffi::av_buffer_unref(&mut drm_device);
|
||||||
bail!("filter sink has no VAAPI frames context");
|
bail!("filter sink has no VAAPI frames context");
|
||||||
}
|
}
|
||||||
let enc = open_vaapi_encoder(
|
// On encoder-open failure, free the graph + our owned buffer refs before bailing (matching
|
||||||
|
// every error path above) so a failed session doesn't leak them. `nv12_ctx` is borrowed
|
||||||
|
// from the sink (owned by `graph`), so `avfilter_graph_free` reclaims it — don't unref it
|
||||||
|
// separately. On success the encoder takes its own ref of `vaapi_device`, and `drm_frames`/
|
||||||
|
// `vaapi_device`/`drm_device`/`graph` move into `DmabufInner` (freed in `Drop`).
|
||||||
|
let enc = match open_vaapi_encoder(
|
||||||
codec,
|
codec,
|
||||||
width,
|
width,
|
||||||
height,
|
height,
|
||||||
@@ -572,7 +669,16 @@ impl DmabufInner {
|
|||||||
bitrate_bps,
|
bitrate_bps,
|
||||||
vaapi_device,
|
vaapi_device,
|
||||||
nv12_ctx,
|
nv12_ctx,
|
||||||
)?;
|
) {
|
||||||
|
Ok(enc) => enc,
|
||||||
|
Err(e) => {
|
||||||
|
ffi::avfilter_graph_free(&mut graph);
|
||||||
|
ffi::av_buffer_unref(&mut drm_frames);
|
||||||
|
ffi::av_buffer_unref(&mut vaapi_device);
|
||||||
|
ffi::av_buffer_unref(&mut drm_device);
|
||||||
|
return Err(e);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
tracing::info!(
|
tracing::info!(
|
||||||
encoder = codec.vaapi_name(),
|
encoder = codec.vaapi_name(),
|
||||||
@@ -600,6 +706,23 @@ impl DmabufInner {
|
|||||||
dmabuf.fourcc,
|
dmabuf.fourcc,
|
||||||
self.fourcc
|
self.fourcc
|
||||||
);
|
);
|
||||||
|
// SAFETY: The `ensure!` above checked `dmabuf.fourcc == self.fourcc`.
|
||||||
|
// * `std::mem::zeroed::<AVDRMFrameDescriptor>()` is sound: it is a `#[repr(C)]` POD of ints and
|
||||||
|
// nested int-struct arrays (no `NonNull`/refs), for which all-zero is a valid bit pattern;
|
||||||
|
// `Box` puts it on the heap with a unique owner.
|
||||||
|
// * `dmabuf.fd.as_raw_fd()` is the fd of the caller's `&DmabufFrame`, which owns it for the
|
||||||
|
// whole synchronous `submit`; we describe one object/layer/plane from its
|
||||||
|
// fourcc/modifier/offset/stride and pass `object.size = 0` (ffmpeg queries the real size).
|
||||||
|
// * `av_frame_alloc` → `drm` (null-checked); we set its scalar fields and
|
||||||
|
// `hw_frames_ctx = av_buffer_ref(self.drm_frames)` (new ref of the live owned ctx).
|
||||||
|
// * `data[0] = Box::into_raw(desc)` transfers the box into the frame; `buf[0] =
|
||||||
|
// av_buffer_create(.., free_desc, ..)` registers a destructor that reclaims it exactly once
|
||||||
|
// when the buffer's refcount hits zero — matched alloc/free, no leak/double-free.
|
||||||
|
// * `av_buffersrc_add_frame_flags(self.src, drm, KEEP_REF)` pushes a ref into the live
|
||||||
|
// buffersrc; KEEP_REF keeps our own `drm` ref, which we then `av_frame_free`. We pull the
|
||||||
|
// converted surface with `av_buffersink_get_frame(self.sink, nv12)` BEFORE returning, so the
|
||||||
|
// dmabuf (owned by the caller) is read while still valid. `nv12` is sent into the live owned
|
||||||
|
// `self.enc` (takes its own ref) and our ref freed once. Single-threaded encoder → no race.
|
||||||
unsafe {
|
unsafe {
|
||||||
// Build a DRM-PRIME AVFrame describing the dmabuf (one object/fd, one layer/plane).
|
// Build a DRM-PRIME AVFrame describing the dmabuf (one object/fd, one layer/plane).
|
||||||
let mut desc: Box<ffi::AVDRMFrameDescriptor> = Box::new(std::mem::zeroed());
|
let mut desc: Box<ffi::AVDRMFrameDescriptor> = Box::new(std::mem::zeroed());
|
||||||
@@ -626,6 +749,11 @@ impl DmabufInner {
|
|||||||
// Own the descriptor so it frees with the frame (the fd is owned by the DmabufFrame,
|
// Own the descriptor so it frees with the frame (the fd is owned by the DmabufFrame,
|
||||||
// which outlives this call — the graph reads the surface before submit returns).
|
// which outlives this call — the graph reads the surface before submit returns).
|
||||||
extern "C" fn free_desc(_opaque: *mut std::ffi::c_void, data: *mut u8) {
|
extern "C" fn free_desc(_opaque: *mut std::ffi::c_void, data: *mut u8) {
|
||||||
|
// SAFETY: `data` is exactly the pointer produced by `Box::into_raw(desc)` and passed as
|
||||||
|
// `av_buffer_create`'s first arg, which libav hands back verbatim to this callback. It
|
||||||
|
// is a valid, uniquely-owned `Box<AVDRMFrameDescriptor>` raw pointer; libav invokes the
|
||||||
|
// callback exactly once (when the last buffer ref drops), so `from_raw` + `drop`
|
||||||
|
// reclaims it exactly once — no double-free. `_opaque` is unused (we passed null).
|
||||||
unsafe { drop(Box::from_raw(data as *mut ffi::AVDRMFrameDescriptor)) };
|
unsafe { drop(Box::from_raw(data as *mut ffi::AVDRMFrameDescriptor)) };
|
||||||
}
|
}
|
||||||
(*drm).buf[0] = ffi::av_buffer_create(
|
(*drm).buf[0] = ffi::av_buffer_create(
|
||||||
@@ -673,6 +801,13 @@ impl DmabufInner {
|
|||||||
|
|
||||||
impl Drop for DmabufInner {
|
impl Drop for DmabufInner {
|
||||||
fn drop(&mut self) {
|
fn drop(&mut self) {
|
||||||
|
// SAFETY: `graph`/`drm_frames`/`vaapi_device`/`drm_device` are the non-null objects
|
||||||
|
// `DmabufInner::open` built and moved into `self` (open bails before constructing `Self` if any
|
||||||
|
// alloc fails). `avfilter_graph_free` frees the graph (and the per-filter device refs it owns);
|
||||||
|
// each `av_buffer_unref` drops one ref and nulls the pointer via `&mut`. `DmabufInner` owns all
|
||||||
|
// four exclusively and `Drop` runs once → no double-free/use-after-free. The graph is freed
|
||||||
|
// first (it holds refs on the devices), then frames, then the derived VAAPI device, then DRM.
|
||||||
|
// (`self.enc` drops via ffmpeg-next afterward, holding its own refs.)
|
||||||
unsafe {
|
unsafe {
|
||||||
ffi::avfilter_graph_free(&mut self.graph);
|
ffi::avfilter_graph_free(&mut self.graph);
|
||||||
ffi::av_buffer_unref(&mut self.drm_frames);
|
ffi::av_buffer_unref(&mut self.drm_frames);
|
||||||
@@ -703,6 +838,13 @@ pub struct VaapiEncoder {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Raw FFI pointers; the encoder lives on a single thread (same contract as `NvencEncoder`).
|
// Raw FFI pointers; the encoder lives on a single thread (same contract as `NvencEncoder`).
|
||||||
|
// SAFETY: `VaapiEncoder`'s `Inner` holds raw FFI pointers (`SwsContext`, `AVFrame`, `AVBufferRef`,
|
||||||
|
// `AVFilterContext`, `AVCodecContext`) that are not `Send` by default. The encoder is owned and
|
||||||
|
// driven by exactly ONE thread — the host's per-session encode thread it is moved (transferred) to —
|
||||||
|
// and is only ever touched through `&mut self` methods, so it is never aliased or accessed
|
||||||
|
// concurrently from two threads. None of the underlying libav/libswscale objects have thread
|
||||||
|
// affinity (they are not thread-local), so transferring ownership across threads is sound. This
|
||||||
|
// asserts `Send` (transfer) only; `Sync` (shared `&`) is deliberately NOT implemented.
|
||||||
unsafe impl Send for VaapiEncoder {}
|
unsafe impl Send for VaapiEncoder {}
|
||||||
|
|
||||||
impl VaapiEncoder {
|
impl VaapiEncoder {
|
||||||
@@ -720,6 +862,9 @@ impl VaapiEncoder {
|
|||||||
}
|
}
|
||||||
ffmpeg::init().context("ffmpeg init")?;
|
ffmpeg::init().context("ffmpeg init")?;
|
||||||
if std::env::var_os("PUNKTFUNK_FFMPEG_DEBUG").is_some() {
|
if std::env::var_os("PUNKTFUNK_FFMPEG_DEBUG").is_some() {
|
||||||
|
// SAFETY: `av_log_set_level` sets libav's global integer log level; `48` (= AV_LOG_DEBUG)
|
||||||
|
// is a valid level and there are no pointer args. libav was just initialized by the
|
||||||
|
// `ffmpeg::init()` above, so the call is always sound.
|
||||||
unsafe { ffi::av_log_set_level(48) };
|
unsafe { ffi::av_log_set_level(48) };
|
||||||
}
|
}
|
||||||
// Validate the codec/format up front so a bad request fails at open, not on the first frame.
|
// Validate the codec/format up front so a bad request fails at open, not on the first frame.
|
||||||
|
|||||||
@@ -28,6 +28,8 @@
|
|||||||
//! through `ffmpeg::ffi` (= `ffmpeg_sys_next`), exactly as the Linux CUDA/VAAPI paths do. The
|
//! through `ffmpeg::ffi` (= `ffmpeg_sys_next`), exactly as the Linux CUDA/VAAPI paths do. The
|
||||||
//! `AVD3D11VADeviceContext`/`AVD3D11VAFramesContext` layouts are mirrored (the bindings don't
|
//! `AVD3D11VADeviceContext`/`AVD3D11VAFramesContext` layouts are mirrored (the bindings don't
|
||||||
//! allowlist `hwcontext_d3d11va.h`), as [`super::linux`] mirrors `AVCUDADeviceContext`.
|
//! allowlist `hwcontext_d3d11va.h`), as [`super::linux`] mirrors `AVCUDADeviceContext`.
|
||||||
|
// Every `unsafe` block in this file carries a `// SAFETY:` proof; enforce it (unsafe-proof program).
|
||||||
|
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||||
|
|
||||||
use super::{Codec, EncodedFrame, Encoder};
|
use super::{Codec, EncodedFrame, Encoder};
|
||||||
use crate::capture::{dxgi::D3d11Frame, CapturedFrame, FramePayload, PixelFormat};
|
use crate::capture::{dxgi::D3d11Frame, CapturedFrame, FramePayload, PixelFormat};
|
||||||
@@ -243,6 +245,12 @@ pub fn probe_can_encode(vendor: WinVendor, codec: Codec) -> bool {
|
|||||||
if ffmpeg::init().is_err() {
|
if ffmpeg::init().is_err() {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
// SAFETY: `ffmpeg::init()` succeeded above, so libav's global state is initialised.
|
||||||
|
// `av_log_get_level`/`av_log_set_level` are global scalar getters/setters with no pointer args.
|
||||||
|
// `open_win_encoder` (the `unsafe fn`) is called with null `device_ref`/`frames_ref` (the system
|
||||||
|
// path), so it touches no D3D11/hwcontext — it only allocates and opens a self-contained
|
||||||
|
// libavcodec encoder that is dropped at the end of `.is_ok()`. We restore the prior log level and
|
||||||
|
// no raw pointer escapes the block.
|
||||||
unsafe {
|
unsafe {
|
||||||
// A missing AMF/QSV runtime (wrong-vendor host, GPU-less CI) is an expected probe outcome —
|
// A missing AMF/QSV runtime (wrong-vendor host, GPU-less CI) is an expected probe outcome —
|
||||||
// quiet ffmpeg's open error for the probe, then restore the level.
|
// quiet ffmpeg's open error for the probe, then restore the level.
|
||||||
@@ -337,6 +345,10 @@ impl SystemInner {
|
|||||||
} else {
|
} else {
|
||||||
ffi::AVPixelFormat::AV_PIX_FMT_NV12
|
ffi::AVPixelFormat::AV_PIX_FMT_NV12
|
||||||
};
|
};
|
||||||
|
// SAFETY: calls the `unsafe fn open_win_encoder` with null `device_ref`/`frames_ref`, so the
|
||||||
|
// system path is taken (no hw device/frames context is touched); all other args are scalars.
|
||||||
|
// The returned `encoder::video::Encoder` owns its `AVCodecContext` and frees it on drop; no raw
|
||||||
|
// pointer is aliased.
|
||||||
let enc = unsafe {
|
let enc = unsafe {
|
||||||
open_win_encoder(
|
open_win_encoder(
|
||||||
vendor,
|
vendor,
|
||||||
@@ -352,6 +364,11 @@ impl SystemInner {
|
|||||||
ptr::null_mut(),
|
ptr::null_mut(),
|
||||||
)?
|
)?
|
||||||
};
|
};
|
||||||
|
// SAFETY: `av_frame_alloc` returns a freshly-allocated, uniquely-owned `AVFrame` (null-checked
|
||||||
|
// before any deref); writing `format`/`width`/`height` through `*f` stays inside that
|
||||||
|
// allocation. `av_frame_get_buffer(f, 0)` allocates the backing planes — on failure we
|
||||||
|
// `av_frame_free` the sole owner (no double-free) and bail; on success the raw `f` is moved into
|
||||||
|
// `self.sw_frame` and freed exactly once in `Drop`.
|
||||||
let sw_frame = unsafe {
|
let sw_frame = unsafe {
|
||||||
let f = ffi::av_frame_alloc();
|
let f = ffi::av_frame_alloc();
|
||||||
if f.is_null() {
|
if f.is_null() {
|
||||||
@@ -467,6 +484,18 @@ impl SystemInner {
|
|||||||
} else {
|
} else {
|
||||||
DXGI_FORMAT_NV12
|
DXGI_FORMAT_NV12
|
||||||
};
|
};
|
||||||
|
// SAFETY: `ensure_staging` builds a STAGING texture (CPU_ACCESS_READ) matching `dxgi_fmt` on
|
||||||
|
// `frame.device` — the same `ID3D11Device` that owns `frame.texture` — and caches that device's
|
||||||
|
// immediate context in `self.ctx`. `src`/`dst` are that device's textures of identical NV12/P010
|
||||||
|
// format and dimensions, so `CopyResource` on the single-threaded immediate context is valid.
|
||||||
|
// `Map(.., D3D11_MAP_READ)` succeeds on a staging texture and yields `map.pData` valid for the
|
||||||
|
// whole resource; for NV12/P010 the luma plane is `H` rows at `RowPitch` and the chroma plane
|
||||||
|
// follows at byte offset `RowPitch*H` (`H/2` rows), so `total = pitch*(H+⌈H/2⌉)` is exactly the
|
||||||
|
// mapped extent and `from_raw_parts(base, total)` stays in-bounds. Each `copy_nonoverlapping`
|
||||||
|
// reads a bounds-checked `mapped[..]` sub-slice (`row_bytes ≤ pitch`) and writes `row_bytes ≤
|
||||||
|
// linesize` into the `av_frame_get_buffer`-allocated plane at row `y < H`, so every destination
|
||||||
|
// offset is inside the frame's plane allocation; src and dst never alias. `Unmap` pairs `Map`,
|
||||||
|
// then `send` (the `unsafe fn`) hands `sw_frame` to the encoder.
|
||||||
unsafe {
|
unsafe {
|
||||||
self.ensure_staging(&frame.device, dxgi_fmt)?;
|
self.ensure_staging(&frame.device, dxgi_fmt)?;
|
||||||
let staging = self.staging.clone().context("staging texture")?;
|
let staging = self.staging.clone().context("staging texture")?;
|
||||||
@@ -510,6 +539,14 @@ impl SystemInner {
|
|||||||
if self.ten_bit {
|
if self.ten_bit {
|
||||||
bail!("ffmpeg_win: BGRA readback is 8-bit only (HDR needs the P010 capture path)");
|
bail!("ffmpeg_win: BGRA readback is 8-bit only (HDR needs the P010 capture path)");
|
||||||
}
|
}
|
||||||
|
// SAFETY: `ensure_staging` builds a B8G8R8A8 STAGING texture on `frame.device` and caches that
|
||||||
|
// device's immediate context; `src`/`dst` are that device's textures of matching BGRA format,
|
||||||
|
// so `CopyResource` on the single-threaded context is valid. `Map(READ)` on the staging texture
|
||||||
|
// yields `base` valid for `pitch` × `h` rows. `ensure_sws` lazily builds the BGRA→NV12 context;
|
||||||
|
// `sws_scale` reads `h` rows of `pitch` bytes from `base` (in-bounds — the staging surface is
|
||||||
|
// `≥ pitch*h`) into the `sw_frame` planes addressed by its `data`/`linesize` (allocated for
|
||||||
|
// `width`×`height` NV12). `Unmap` pairs `Map`; the cached `sws` is freed once in `Drop`. The
|
||||||
|
// mapped read region never aliases the owned encoder frame.
|
||||||
unsafe {
|
unsafe {
|
||||||
self.ensure_staging(&frame.device, DXGI_FORMAT_B8G8R8A8_UNORM)?;
|
self.ensure_staging(&frame.device, DXGI_FORMAT_B8G8R8A8_UNORM)?;
|
||||||
let staging = self.staging.clone().context("staging texture")?;
|
let staging = self.staging.clone().context("staging texture")?;
|
||||||
@@ -552,6 +589,13 @@ impl SystemInner {
|
|||||||
/// R10 shader output instead of P010. DXGI `R10G10B10A2_UNORM` (R in the low 10 bits, X2 alpha in
|
/// R10 shader output instead of P010. DXGI `R10G10B10A2_UNORM` (R in the low 10 bits, X2 alpha in
|
||||||
/// the top 2) == FFmpeg `AV_PIX_FMT_X2BGR10LE`. UNTESTED on glass (no AMD/Intel Windows box).
|
/// the top 2) == FFmpeg `AV_PIX_FMT_X2BGR10LE`. UNTESTED on glass (no AMD/Intel Windows box).
|
||||||
fn readback_rgb10(&mut self, frame: &D3d11Frame, pts: i64, idr: bool) -> Result<()> {
|
fn readback_rgb10(&mut self, frame: &D3d11Frame, pts: i64, idr: bool) -> Result<()> {
|
||||||
|
// SAFETY: same shape as `readback_yuv`/`readback_bgra` — `ensure_staging` builds an
|
||||||
|
// R10G10B10A2 STAGING texture on `frame.device` and caches its immediate context; `src`/`dst`
|
||||||
|
// are that device's matching-format textures, so `CopyResource` on the single-threaded context
|
||||||
|
// is valid. `Map(READ)` yields `base` valid for `pitch` × `h` rows. `ensure_sws` builds the
|
||||||
|
// X2BGR10LE→P010 (BT.2020) context; `sws_scale` reads `h` rows of `pitch` bytes from `base`
|
||||||
|
// (in-bounds) into the `sw_frame` P010 planes (`data`/`linesize`, allocated `width`×`height`).
|
||||||
|
// `Unmap` pairs `Map`; `sws` is freed once in `Drop`. No aliasing between read and write.
|
||||||
unsafe {
|
unsafe {
|
||||||
self.ensure_staging(&frame.device, DXGI_FORMAT_R10G10B10A2_UNORM)?;
|
self.ensure_staging(&frame.device, DXGI_FORMAT_R10G10B10A2_UNORM)?;
|
||||||
let staging = self.staging.clone().context("staging texture")?;
|
let staging = self.staging.clone().context("staging texture")?;
|
||||||
@@ -605,6 +649,12 @@ impl SystemInner {
|
|||||||
let h = self.height as usize;
|
let h = self.height as usize;
|
||||||
let src_row = w * format.bytes_per_pixel();
|
let src_row = w * format.bytes_per_pixel();
|
||||||
anyhow::ensure!(bytes.len() >= src_row * h, "captured buffer too small");
|
anyhow::ensure!(bytes.len() >= src_row * h, "captured buffer too small");
|
||||||
|
// SAFETY: `ensure_sws` lazily builds the (packed RGB/BGR)→NV12 context for this fixed src/dst
|
||||||
|
// format pair. `src_data[0] = bytes.as_ptr()` with `src_stride[0] = src_row`; the `ensure!`
|
||||||
|
// above guarantees `bytes` holds at least `src_row*h` bytes, so `sws_scale` reads `h` rows of
|
||||||
|
// `src_row` bytes in-bounds and writes the `sw_frame` NV12 planes (`data`/`linesize`, allocated
|
||||||
|
// `width`×`height`). `bytes` is borrowed for the call only and never aliases the owned
|
||||||
|
// `sw_frame`. `send` then hands `sw_frame` to the encoder.
|
||||||
unsafe {
|
unsafe {
|
||||||
self.ensure_sws(
|
self.ensure_sws(
|
||||||
pixel_to_av(sws_src(format)?),
|
pixel_to_av(sws_src(format)?),
|
||||||
@@ -667,6 +717,10 @@ impl SystemInner {
|
|||||||
|
|
||||||
impl Drop for SystemInner {
|
impl Drop for SystemInner {
|
||||||
fn drop(&mut self) {
|
fn drop(&mut self) {
|
||||||
|
// SAFETY: `sw_frame` is the `AVFrame` allocated in `open` (or null) — `av_frame_free` drops it
|
||||||
|
// once and nulls the pointer through the `&mut`; `sws` is the cached `SwsContext` (or null) —
|
||||||
|
// `sws_freeContext` frees it once. This `Drop` runs exactly once and `SystemInner` owns both
|
||||||
|
// exclusively, so there is no double-free or use-after-free.
|
||||||
unsafe {
|
unsafe {
|
||||||
if !self.sw_frame.is_null() {
|
if !self.sw_frame.is_null() {
|
||||||
ffi::av_frame_free(&mut self.sw_frame);
|
ffi::av_frame_free(&mut self.sw_frame);
|
||||||
@@ -745,6 +799,12 @@ impl D3d11Hw {
|
|||||||
|
|
||||||
impl Drop for D3d11Hw {
|
impl Drop for D3d11Hw {
|
||||||
fn drop(&mut self) {
|
fn drop(&mut self) {
|
||||||
|
// SAFETY: `frames_ref`/`device_ref` are the two non-null `AVBufferRef`s `D3d11Hw::new` created
|
||||||
|
// (it bails before constructing `Self` if either alloc/init fails, so a live `D3d11Hw` always
|
||||||
|
// holds both). `av_buffer_unref` drops one reference and nulls the pointer through the `&mut`.
|
||||||
|
// This `Drop` runs exactly once and `D3d11Hw` owns these refs exclusively → no double-free /
|
||||||
|
// use-after-free. Frames are unref'd before the device because the frames ctx internally holds
|
||||||
|
// a ref on the device (refcounted, so the order is sound either way).
|
||||||
unsafe {
|
unsafe {
|
||||||
ffi::av_buffer_unref(&mut self.frames_ref);
|
ffi::av_buffer_unref(&mut self.frames_ref);
|
||||||
ffi::av_buffer_unref(&mut self.device_ref);
|
ffi::av_buffer_unref(&mut self.device_ref);
|
||||||
@@ -800,6 +860,18 @@ impl ZeroCopyInner {
|
|||||||
WinVendor::Qsv => (D3D11_BIND_DECODER.0 | D3D11_BIND_VIDEO_ENCODER.0) as u32,
|
WinVendor::Qsv => (D3D11_BIND_DECODER.0 | D3D11_BIND_VIDEO_ENCODER.0) as u32,
|
||||||
};
|
};
|
||||||
const POOL: c_int = 8;
|
const POOL: c_int = 8;
|
||||||
|
// SAFETY: `D3d11Hw::new` wraps the capturer's `device` as a D3D11VA hwdevice (handing FFmpeg an
|
||||||
|
// owned AddRef of it, balanced by FFmpeg's teardown Release) and builds an owned
|
||||||
|
// device_ref/frames_ref pair freed by `D3d11Hw::Drop`; `hw` is a local, so it is dropped (and
|
||||||
|
// both refs freed) on every early `return Err`. For QSV, `av_hwdevice_ctx_create_derived` and
|
||||||
|
// `av_hwframe_ctx_create_derived` fill the null-initialised `qsv_device`/`qsv_frames` out-params
|
||||||
|
// only on success (`r >= 0` checked); on the frames-derive failure we unref the already-created
|
||||||
|
// `qsv_device` before bailing. `open_win_encoder` internally `av_buffer_ref`s the dev/frames
|
||||||
|
// refs it is given (so ownership of `hw`'s and the derived refs stays here), and on its failure
|
||||||
|
// we unref the still-owned derived `qsv_frames`/`qsv_device` (null for AMF → skipped) and return
|
||||||
|
// — `hw` then drops its D3D11 refs. On success the derived refs are moved into `ZeroCopyInner`
|
||||||
|
// (freed in its `Drop`) and the encoder holds its own AddRef'd copies. Every `AVBufferRef` is
|
||||||
|
// unref'd exactly once across all paths — no leak, no double-free.
|
||||||
unsafe {
|
unsafe {
|
||||||
let hw = D3d11Hw::new(device, sw_av, bind_flags, width, height, POOL)?;
|
let hw = D3d11Hw::new(device, sw_av, bind_flags, width, height, POOL)?;
|
||||||
let (pix_fmt, dev_ref, frames_ref, mut qsv_device, mut qsv_frames) = match vendor {
|
let (pix_fmt, dev_ref, frames_ref, mut qsv_device, mut qsv_frames) = match vendor {
|
||||||
@@ -887,6 +959,19 @@ impl ZeroCopyInner {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn submit(&mut self, frame: &D3d11Frame, pts: i64, idr: bool) -> Result<()> {
|
fn submit(&mut self, frame: &D3d11Frame, pts: i64, idr: bool) -> Result<()> {
|
||||||
|
// SAFETY: `d3d = av_frame_alloc()` is a fresh owned frame (null-checked) and is `av_frame_free`d
|
||||||
|
// exactly once on every path below. `av_hwframe_get_buffer` fills it from the pool — on failure
|
||||||
|
// we free it and bail. `(*d3d).data[0]` is the pool's texture-array and `data[1]` the array
|
||||||
|
// index; `from_raw_borrowed` borrows that `ID3D11Texture2D` WITHOUT taking ownership (no Release
|
||||||
|
// — the frame owns it) and is null-checked. `src` (the captured texture) and `dst` (the pooled
|
||||||
|
// slice) live on the SAME D3D11 device wrapped by `self.hw`, and the caller guarantees
|
||||||
|
// `captured.format == pool_format` before calling, so `CopySubresourceRegion(dst, dst_index, ..,
|
||||||
|
// src, 0, ..)` on the single-threaded immediate context `self.ctx` is a valid same-format GPU
|
||||||
|
// copy. For QSV the mapped `qsv` frame is a fresh owned frame whose `hw_frames_ctx` takes an
|
||||||
|
// `av_buffer_ref` of `self.qsv_frames`; it is `av_frame_free`d (releasing that ref) on both the
|
||||||
|
// map-failure and success paths. `avcodec_send_frame` only internally refs the input frame, so
|
||||||
|
// the `av_frame_free(d3d)`/`av_frame_free(qsv)` afterwards are the sole owning frees — no leak,
|
||||||
|
// no double-free, no use-after-free.
|
||||||
unsafe {
|
unsafe {
|
||||||
// Pull a pooled D3D11 surface; its data[0] is the pool's texture-ARRAY, data[1] the slice.
|
// Pull a pooled D3D11 surface; its data[0] is the pool's texture-ARRAY, data[1] the slice.
|
||||||
let mut d3d = ffi::av_frame_alloc();
|
let mut d3d = ffi::av_frame_alloc();
|
||||||
@@ -959,6 +1044,11 @@ impl ZeroCopyInner {
|
|||||||
|
|
||||||
impl Drop for ZeroCopyInner {
|
impl Drop for ZeroCopyInner {
|
||||||
fn drop(&mut self) {
|
fn drop(&mut self) {
|
||||||
|
// SAFETY: `qsv_frames`/`qsv_device` are the derived QSV `AVBufferRef`s (or null for AMF); each
|
||||||
|
// is `av_buffer_unref`'d once here (nulling the pointer through the `&mut`) — `ZeroCopyInner`
|
||||||
|
// owns these handles exclusively and this `Drop` runs once, so no double-free. The `enc` and
|
||||||
|
// `hw` fields free the encoder's AddRef'd copies and the D3D11 device/frames refs through their
|
||||||
|
// own `Drop`, so all references stay balanced.
|
||||||
unsafe {
|
unsafe {
|
||||||
if !self.qsv_frames.is_null() {
|
if !self.qsv_frames.is_null() {
|
||||||
ffi::av_buffer_unref(&mut self.qsv_frames);
|
ffi::av_buffer_unref(&mut self.qsv_frames);
|
||||||
@@ -996,6 +1086,13 @@ pub struct FfmpegWinEncoder {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Raw FFI pointers + COM objects; the encoder lives on a single thread (same contract as NVENC/VAAPI).
|
// Raw FFI pointers + COM objects; the encoder lives on a single thread (same contract as NVENC/VAAPI).
|
||||||
|
// SAFETY: `FfmpegWinEncoder` owns raw libav pointers (`AVFrame`/`SwsContext`/`AVBufferRef`) and
|
||||||
|
// windows-rs COM handles (`ID3D11Device`/`ID3D11DeviceContext`/textures) that are not auto-`Send`. The
|
||||||
|
// session creates the encoder, drives `submit`/`poll`/`flush`, and drops it all on one dedicated encode
|
||||||
|
// thread; it is never shared by reference across threads, and the D3D11 immediate context is only ever
|
||||||
|
// touched from that thread. The only cross-thread action is the initial move to the encode thread,
|
||||||
|
// after which every interior pointer/COM ref is used single-threaded — the same contract the
|
||||||
|
// NVENC/VAAPI encoders rely on. No interior state is accessed concurrently.
|
||||||
unsafe impl Send for FfmpegWinEncoder {}
|
unsafe impl Send for FfmpegWinEncoder {}
|
||||||
|
|
||||||
impl FfmpegWinEncoder {
|
impl FfmpegWinEncoder {
|
||||||
@@ -1012,6 +1109,8 @@ impl FfmpegWinEncoder {
|
|||||||
) -> Result<Self> {
|
) -> Result<Self> {
|
||||||
ffmpeg::init().context("ffmpeg init")?;
|
ffmpeg::init().context("ffmpeg init")?;
|
||||||
if std::env::var_os("PUNKTFUNK_FFMPEG_DEBUG").is_some() {
|
if std::env::var_os("PUNKTFUNK_FFMPEG_DEBUG").is_some() {
|
||||||
|
// SAFETY: `ffmpeg::init()` ran on the line above, so libav is initialised; `av_log_set_level`
|
||||||
|
// is a global scalar setter with no pointer arguments.
|
||||||
unsafe { ffi::av_log_set_level(48) };
|
unsafe { ffi::av_log_set_level(48) };
|
||||||
}
|
}
|
||||||
// Make sure the encoder name exists in this libavcodec build up front (clear error vs a
|
// Make sure the encoder name exists in this libavcodec build up front (clear error vs a
|
||||||
|
|||||||
@@ -13,6 +13,9 @@
|
|||||||
//! Needs a real NVIDIA GPU at runtime (session creation fails otherwise) — compiles GPU-less, but
|
//! Needs a real NVIDIA GPU at runtime (session creation fails otherwise) — compiles GPU-less, but
|
||||||
//! `open`/`submit` only succeed on a GPU box. The software encoder (`super::sw`) is the fallback.
|
//! `open`/`submit` only succeed on a GPU box. The software encoder (`super::sw`) is the fallback.
|
||||||
|
|
||||||
|
// Every `unsafe` block / impl in this file carries a `// SAFETY:` proof; enforce it.
|
||||||
|
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||||
|
|
||||||
use super::{Codec, EncodedFrame, Encoder, EncoderCaps};
|
use super::{Codec, EncodedFrame, Encoder, EncoderCaps};
|
||||||
use crate::capture::{CapturedFrame, FramePayload, PixelFormat};
|
use crate::capture::{CapturedFrame, FramePayload, PixelFormat};
|
||||||
use anyhow::{anyhow, bail, Context, Result};
|
use anyhow::{anyhow, bail, Context, Result};
|
||||||
@@ -88,7 +91,15 @@ pub struct NvencD3d11Encoder {
|
|||||||
init_device: *mut c_void,
|
init_device: *mut c_void,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Raw NVENC handle + COM ptrs; confined to the single encode thread (like the Linux encoder).
|
// SAFETY: the `!Send` fields are the raw NVENC session/device handles (`encoder`, `init_device`),
|
||||||
|
// the raw NVENC bitstream/registered/mapped pointers carried in `bitstreams`/`regs`/`pending`, and
|
||||||
|
// the `ID3D11Texture2D` COM refs — none of which may be touched concurrently from two threads. This
|
||||||
|
// encoder is owned by exactly one thread: it is moved onto the host encode thread once at
|
||||||
|
// construction, and every NVENC call and D3D11 access happens only from that thread thereafter
|
||||||
|
// (`submit`/`poll`/`invalidate_ref_frames`/`Drop` all run there, like the Linux encoder). Moving the
|
||||||
|
// handles across that single ownership-transfer boundary is sound because no NVENC/D3D11 call is in
|
||||||
|
// flight during the move and the session and its D3D11 immediate context are never shared (`&`) or
|
||||||
|
// used concurrently — so `Send` introduces no data race on the non-`Send` fields.
|
||||||
unsafe impl Send for NvencD3d11Encoder {}
|
unsafe impl Send for NvencD3d11Encoder {}
|
||||||
|
|
||||||
impl NvencD3d11Encoder {
|
impl NvencD3d11Encoder {
|
||||||
@@ -403,6 +414,17 @@ impl NvencD3d11Encoder {
|
|||||||
|
|
||||||
/// Lazily create the session on the first frame's D3D11 device (so capture + encode share it).
|
/// Lazily create the session on the first frame's D3D11 device (so capture + encode share it).
|
||||||
fn init_session(&mut self, device: &ID3D11Device) -> Result<()> {
|
fn init_session(&mut self, device: &ID3D11Device) -> Result<()> {
|
||||||
|
// SAFETY: every call below goes through a function pointer resolved once from the loaded
|
||||||
|
// `nvidia_video_codec_sdk::ENCODE_API` (`nvEncodeAPI`) table, or through this type's own
|
||||||
|
// `unsafe fn`s whose contract is met here. `query_caps`/`try_open_session` receive `device`,
|
||||||
|
// the live `ID3D11Device` the caller pulled off the first frame; each returns either a valid
|
||||||
|
// open NVENC session handle or an `Err`. `destroy_encoder` is only ever called on a handle a
|
||||||
|
// `try_open_session` just returned (and `best` only when `!best.is_null()`), so it never frees
|
||||||
|
// a dangling or null session. `create_bitstream_buffer` is passed `enc` — the one chosen live
|
||||||
|
// session — and `&mut cb`, a `#[repr(C)] NV_ENC_CREATE_BITSTREAM_BUFFER` whose `version` is set
|
||||||
|
// to `NV_ENC_CREATE_BITSTREAM_BUFFER_VER`; `cb` lives across the synchronous call and its
|
||||||
|
// returned `bitstreamBuffer` is copied into `self.bitstreams` before `cb` drops. No handle
|
||||||
|
// escapes the encode thread.
|
||||||
unsafe {
|
unsafe {
|
||||||
// Probe real GPU caps first (max dims / 10-bit / custom-VBV / RFI) so the config below is
|
// Probe real GPU caps first (max dims / 10-bit / custom-VBV / RFI) so the config below is
|
||||||
// gated on what this card supports and an out-of-range mode fails with a clear error
|
// gated on what this card supports and an out-of-range mode fails with a clear error
|
||||||
@@ -589,6 +611,11 @@ impl Encoder for NvencD3d11Encoder {
|
|||||||
new = format!("{}x{}", captured.width, captured.height),
|
new = format!("{}x{}", captured.width, captured.height),
|
||||||
"NVENC: capture device/size/HDR changed — re-initializing session"
|
"NVENC: capture device/size/HDR changed — re-initializing session"
|
||||||
);
|
);
|
||||||
|
// SAFETY: `teardown` (an `unsafe fn`) requires the encode thread with no NVENC call in
|
||||||
|
// flight and a session whose cached regs/bitstreams/pending all belong to `self.encoder`.
|
||||||
|
// All hold: this is the synchronous encode thread, `self.inited` so `self.encoder` is the
|
||||||
|
// live session every cached resource was created against, and the previous frame's encode
|
||||||
|
// has already been polled (synchronous submit→poll), so nothing is mid-encode.
|
||||||
unsafe { self.teardown() };
|
unsafe { self.teardown() };
|
||||||
}
|
}
|
||||||
if !self.inited {
|
if !self.inited {
|
||||||
@@ -609,7 +636,14 @@ impl Encoder for NvencD3d11Encoder {
|
|||||||
self.bit_depth = 10;
|
self.bit_depth = 10;
|
||||||
nv::NV_ENC_BUFFER_FORMAT::NV_ENC_BUFFER_FORMAT_ABGR10
|
nv::NV_ENC_BUFFER_FORMAT::NV_ENC_BUFFER_FORMAT_ABGR10
|
||||||
}
|
}
|
||||||
PixelFormat::Nv12 => nv::NV_ENC_BUFFER_FORMAT::NV_ENC_BUFFER_FORMAT_NV12,
|
PixelFormat::Nv12 => {
|
||||||
|
// NV12 is 8-bit 4:2:0. Force 8-bit so a transition from a prior P010 (10-bit) session
|
||||||
|
// — or a 10-bit-negotiated client on an SDR display — re-inits at the matching depth.
|
||||||
|
// Unlike ARGB (which NVENC upconverts to Main10), NV12 cannot feed a 10-bit session:
|
||||||
|
// `register_resource` rejects it as InvalidParam (the HDR→SDR-toggle stream drop).
|
||||||
|
self.bit_depth = 8;
|
||||||
|
nv::NV_ENC_BUFFER_FORMAT::NV_ENC_BUFFER_FORMAT_NV12
|
||||||
|
}
|
||||||
_ => nv::NV_ENC_BUFFER_FORMAT::NV_ENC_BUFFER_FORMAT_ARGB,
|
_ => nv::NV_ENC_BUFFER_FORMAT::NV_ENC_BUFFER_FORMAT_ARGB,
|
||||||
};
|
};
|
||||||
let device = frame.device.clone();
|
let device = frame.device.clone();
|
||||||
@@ -618,6 +652,21 @@ impl Encoder for NvencD3d11Encoder {
|
|||||||
}
|
}
|
||||||
let slot = self.next % POOL;
|
let slot = self.next % POOL;
|
||||||
self.next += 1;
|
self.next += 1;
|
||||||
|
// SAFETY: every NVENC call goes through a function pointer from the loaded `ENCODE_API` table
|
||||||
|
// and takes `self.encoder`, the live session `init_session` just established (non-null on the
|
||||||
|
// path that reaches here). `NV_ENC_REGISTER_RESOURCE rr` has `version =
|
||||||
|
// NV_ENC_REGISTER_RESOURCE_VER` and registers `frame.texture` — a D3D11 texture from
|
||||||
|
// `frame.device`, which is the SAME device the session was opened against (any device change
|
||||||
|
// tears down and re-inits above, so `init_device == frame.device.as_raw()` here); the cloned
|
||||||
|
// `ID3D11Texture2D` is kept alive in `regs` so NVENC's registration never outlives the texture.
|
||||||
|
// `mp` (`NV_ENC_MAP_INPUT_RESOURCE`, version set) maps that registration and the map is recorded
|
||||||
|
// in `pending` to be unmapped exactly once in `poll`/`teardown`. `pic` (`NV_ENC_PIC_PARAMS`,
|
||||||
|
// version set) points `inputBuffer` at `mp.mappedResource` and `outputBitstream` at the live
|
||||||
|
// pool bitstream `bitstreams[slot]`; the optional SEI scratch (`mastering_sei`/`cll_sei` and the
|
||||||
|
// `sei` Vec whose `as_mut_ptr()` is written into the codec union) are stack locals that outlive
|
||||||
|
// the synchronous `encode_picture`. Every `#[repr(C)]` param is a live local borrowed `&mut`
|
||||||
|
// for the duration of its one synchronous call. (In-place encode without `CopyResource` is
|
||||||
|
// sound because the encode loop is synchronous, as the module docs state.)
|
||||||
unsafe {
|
unsafe {
|
||||||
// Register the capturer's texture with NVENC once (cached by raw pointer), then encode it
|
// Register the capturer's texture with NVENC once (cached by raw pointer), then encode it
|
||||||
// IN PLACE — no `CopyResource` into an encoder-owned pool. This is the zero-copy win: the
|
// IN PLACE — no `CopyResource` into an encoder-owned pool. This is the zero-copy win: the
|
||||||
@@ -774,6 +823,12 @@ impl Encoder for NvencD3d11Encoder {
|
|||||||
// We tag each input with `inputTimeStamp = frame_idx` (0,1,2,…), which is also the client's
|
// We tag each input with `inputTimeStamp = frame_idx` (0,1,2,…), which is also the client's
|
||||||
// frame number (the packetizer numbers frames in submit order), so the client's lost-frame
|
// frame number (the packetizer numbers frames in submit order), so the client's lost-frame
|
||||||
// range maps 1:1 onto the timestamps NVENC invalidates here.
|
// range maps 1:1 onto the timestamps NVENC invalidates here.
|
||||||
|
// SAFETY: `invalidate_ref_frames` is a function pointer from the loaded `ENCODE_API` table.
|
||||||
|
// `self.encoder` was checked non-null at the top of this fn and is the live session; this runs
|
||||||
|
// on the encode thread (like submit/poll), so there is no concurrent NVENC use. Each `ts` was
|
||||||
|
// clamped to `[oldest_in_dpb, frame_idx - 1]` above, so it names a frame still in the session's
|
||||||
|
// DPB; the call passes only that `u64` timestamp (no struct), so there is no struct-size or
|
||||||
|
// lifetime concern.
|
||||||
unsafe {
|
unsafe {
|
||||||
for ts in first..=last {
|
for ts in first..=last {
|
||||||
if (API.invalidate_ref_frames)(self.encoder, ts as u64)
|
if (API.invalidate_ref_frames)(self.encoder, ts as u64)
|
||||||
@@ -792,6 +847,16 @@ impl Encoder for NvencD3d11Encoder {
|
|||||||
let Some((bs, map, pts_ns)) = self.pending.pop_front() else {
|
let Some((bs, map, pts_ns)) = self.pending.pop_front() else {
|
||||||
return Ok(None);
|
return Ok(None);
|
||||||
};
|
};
|
||||||
|
// SAFETY: a non-empty `pending` implies `submit` ran, so `self.encoder` is the live session
|
||||||
|
// (`teardown` clears `pending` whenever it nulls the handle); all calls below use function
|
||||||
|
// pointers from the loaded `ENCODE_API` table on the encode thread. `NV_ENC_LOCK_BITSTREAM lock`
|
||||||
|
// (version = `NV_ENC_LOCK_BITSTREAM_VER`) locks `bs`, a pool bitstream a prior `encode_picture`
|
||||||
|
// targeted; `lock_bitstream` blocks until that encode finishes, so on success
|
||||||
|
// `lock.bitstreamBufferPtr` is non-null and points at `lock.bitstreamSizeInBytes` bytes of
|
||||||
|
// NVENC-owned, CPU-readable output valid until `unlock_bitstream`. The `from_raw_parts` slice is
|
||||||
|
// only read (copied via `to_vec()`) BEFORE `unlock_bitstream(bs)` — lock and unlock pair on the
|
||||||
|
// same buffer — so it never outlives the lock. `map` (the input resource paired with `bs` in
|
||||||
|
// `pending`) is unmapped here, after the encode completed, exactly once.
|
||||||
unsafe {
|
unsafe {
|
||||||
let mut lock = nv::NV_ENC_LOCK_BITSTREAM {
|
let mut lock = nv::NV_ENC_LOCK_BITSTREAM {
|
||||||
version: nv::NV_ENC_LOCK_BITSTREAM_VER,
|
version: nv::NV_ENC_LOCK_BITSTREAM_VER,
|
||||||
@@ -831,6 +896,11 @@ impl Encoder for NvencD3d11Encoder {
|
|||||||
|
|
||||||
impl Drop for NvencD3d11Encoder {
|
impl Drop for NvencD3d11Encoder {
|
||||||
fn drop(&mut self) {
|
fn drop(&mut self) {
|
||||||
|
// SAFETY: `teardown` (an `unsafe fn`) needs the owning thread with no NVENC call in flight and
|
||||||
|
// a session whose cached resources all belong to `self.encoder`. At Drop this encoder is owned
|
||||||
|
// exclusively (no other reference can exist), runs on the encode thread it was confined to, and
|
||||||
|
// `teardown` early-returns when `self.encoder` is null; otherwise every cached reg/bitstream/
|
||||||
|
// pending was created against that live session. It runs exactly once (here).
|
||||||
unsafe { self.teardown() };
|
unsafe { self.teardown() };
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,6 +2,8 @@
|
|||||||
//! fallback when NVENC is unavailable). Low-latency screen-content config: single-reference,
|
//! fallback when NVENC is unavailable). Low-latency screen-content config: single-reference,
|
||||||
//! no B-frames (Baseline), bitrate rate-control, in-band SPS/PPS each IDR, BT.709 limited range.
|
//! no B-frames (Baseline), bitrate rate-control, in-band SPS/PPS each IDR, BT.709 limited range.
|
||||||
//! Synchronous: `submit` encodes immediately and stashes the AU for `poll` (no internal queue).
|
//! Synchronous: `submit` encodes immediately and stashes the AU for `poll` (no internal queue).
|
||||||
|
// Every `unsafe` block in this file carries a `// SAFETY:` proof; enforce it (unsafe-proof program).
|
||||||
|
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||||
|
|
||||||
use super::{EncodedFrame, Encoder};
|
use super::{EncodedFrame, Encoder};
|
||||||
use crate::capture::{CapturedFrame, FramePayload, PixelFormat};
|
use crate::capture::{CapturedFrame, FramePayload, PixelFormat};
|
||||||
@@ -30,6 +32,12 @@ pub struct OpenH264Encoder {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// openh264's Encoder holds a raw C handle (not auto-Send); it lives on the single encode thread.
|
// openh264's Encoder holds a raw C handle (not auto-Send); it lives on the single encode thread.
|
||||||
|
// SAFETY: `OpenH264Encoder` wraps `Oh264` (openh264's `Encoder`), which holds a raw C handle to the
|
||||||
|
// openh264 `ISVCEncoder` and is not auto-`Send`; the other fields (`YUVBuffer`, `Vec`, scalars,
|
||||||
|
// `Option<EncodedFrame>`) are plain owned data. The session creates the encoder, calls
|
||||||
|
// `submit`/`poll`/`flush`, and drops it all on one dedicated encode thread, never sharing it by
|
||||||
|
// reference across threads, so the C handle is only ever touched from a single thread. Moving the
|
||||||
|
// whole value to that thread is therefore sound — there is no concurrent access to the handle.
|
||||||
unsafe impl Send for OpenH264Encoder {}
|
unsafe impl Send for OpenH264Encoder {}
|
||||||
|
|
||||||
impl OpenH264Encoder {
|
impl OpenH264Encoder {
|
||||||
|
|||||||
@@ -17,6 +17,9 @@
|
|||||||
//! data packets are consumed immediately and missing parity only costs loss recovery — so
|
//! data packets are consumed immediately and missing parity only costs loss recovery — so
|
||||||
//! the validated stereo path stays byte-identical (data packets only, exactly as before).
|
//! the validated stereo path stays byte-identical (data packets only, exactly as before).
|
||||||
|
|
||||||
|
// Every `unsafe` block in this file carries a `// SAFETY:` proof; enforce it.
|
||||||
|
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||||
|
|
||||||
#[cfg(any(target_os = "linux", target_os = "windows", test))]
|
#[cfg(any(target_os = "linux", target_os = "windows", test))]
|
||||||
use crate::audio::SAMPLE_RATE;
|
use crate::audio::SAMPLE_RATE;
|
||||||
#[cfg(any(target_os = "linux", target_os = "windows"))]
|
#[cfg(any(target_os = "linux", target_os = "windows"))]
|
||||||
@@ -409,7 +412,10 @@ struct MsEncoder {
|
|||||||
st: std::ptr::NonNull<audiopus_sys::OpusMSEncoder>,
|
st: std::ptr::NonNull<audiopus_sys::OpusMSEncoder>,
|
||||||
}
|
}
|
||||||
|
|
||||||
// The raw encoder state has no thread affinity; the session owns it on one thread at a time.
|
// SAFETY: `MsEncoder` owns a unique `OpusMSEncoder` via `NonNull` (it is neither `Clone` nor
|
||||||
|
// `Sync`, so the pointer is never aliased). libopus's multistream encoder state is a self-contained
|
||||||
|
// heap allocation with no thread-local or thread-affine state, so moving ownership to another thread
|
||||||
|
// is sound; every method takes `&mut self`, keeping access single-threaded at any instant.
|
||||||
#[cfg(target_os = "linux")]
|
#[cfg(target_os = "linux")]
|
||||||
unsafe impl Send for MsEncoder {}
|
unsafe impl Send for MsEncoder {}
|
||||||
|
|
||||||
@@ -418,6 +424,13 @@ impl MsEncoder {
|
|||||||
fn new(layout: &OpusLayout) -> Result<MsEncoder> {
|
fn new(layout: &OpusLayout) -> Result<MsEncoder> {
|
||||||
use std::os::raw::c_int;
|
use std::os::raw::c_int;
|
||||||
let mut err: c_int = 0;
|
let mut err: c_int = 0;
|
||||||
|
// SAFETY: every scalar arg is a valid libopus input (sample rate, channel/stream/coupled
|
||||||
|
// counts, the RESTRICTED_LOWDELAY application constant). `layout.mapping.as_ptr()` addresses
|
||||||
|
// a 'static slice of exactly `layout.channels` bytes (every `OpusLayout` constant upholds
|
||||||
|
// that), which is the element count `opus_multistream_encoder_create` reads through it, and
|
||||||
|
// `&mut err` is a live local the call writes its status into. libopus copies the mapping into
|
||||||
|
// its own allocation, so the pointer need only be valid for the call; the returned pointer is
|
||||||
|
// null/`OPUS_OK`-checked below before any use.
|
||||||
let st = unsafe {
|
let st = unsafe {
|
||||||
audiopus_sys::opus_multistream_encoder_create(
|
audiopus_sys::opus_multistream_encoder_create(
|
||||||
SAMPLE_RATE as i32,
|
SAMPLE_RATE as i32,
|
||||||
@@ -432,6 +445,11 @@ impl MsEncoder {
|
|||||||
let st = std::ptr::NonNull::new(st)
|
let st = std::ptr::NonNull::new(st)
|
||||||
.filter(|_| err == audiopus_sys::OPUS_OK)
|
.filter(|_| err == audiopus_sys::OPUS_OK)
|
||||||
.ok_or_else(|| anyhow::anyhow!("opus_multistream_encoder_create failed ({err})"))?;
|
.ok_or_else(|| anyhow::anyhow!("opus_multistream_encoder_create failed ({err})"))?;
|
||||||
|
// SAFETY: `st` is the non-null encoder `opus_multistream_encoder_create` just returned, owned
|
||||||
|
// exclusively here. Each `opus_multistream_encoder_ctl` call passes a valid request constant
|
||||||
|
// with the single by-value `c_int` argument that request's variadic ABI expects
|
||||||
|
// (`OPUS_SET_BITRATE_REQUEST` → bitrate, `OPUS_SET_VBR_REQUEST` → 0). No pointer escapes the
|
||||||
|
// call and the encoder outlives it.
|
||||||
unsafe {
|
unsafe {
|
||||||
audiopus_sys::opus_multistream_encoder_ctl(
|
audiopus_sys::opus_multistream_encoder_ctl(
|
||||||
st.as_ptr(),
|
st.as_ptr(),
|
||||||
@@ -453,6 +471,13 @@ impl MsEncoder {
|
|||||||
samples_per_channel: usize,
|
samples_per_channel: usize,
|
||||||
out: &mut [u8],
|
out: &mut [u8],
|
||||||
) -> Result<usize> {
|
) -> Result<usize> {
|
||||||
|
// SAFETY: `self.st` is the live encoder from `new`. libopus reads `samples_per_channel *
|
||||||
|
// channels` f32s through `frame.as_ptr()`; every caller passes a `frame` of exactly that
|
||||||
|
// length together with the matching `samples_per_channel` (`audio_body`'s `frame_len =
|
||||||
|
// samples_per_channel * layout.channels`; the round-trip tests size identically), so the read
|
||||||
|
// stays in bounds. `out.as_mut_ptr()` is written for at most `out.len()` bytes, which is
|
||||||
|
// passed as the capacity bound. Both buffers are live locals outliving this synchronous call;
|
||||||
|
// the return value is range-checked before being used as a length.
|
||||||
let n = unsafe {
|
let n = unsafe {
|
||||||
audiopus_sys::opus_multistream_encode_float(
|
audiopus_sys::opus_multistream_encode_float(
|
||||||
self.st.as_ptr(),
|
self.st.as_ptr(),
|
||||||
@@ -470,6 +495,9 @@ impl MsEncoder {
|
|||||||
#[cfg(target_os = "linux")]
|
#[cfg(target_os = "linux")]
|
||||||
impl Drop for MsEncoder {
|
impl Drop for MsEncoder {
|
||||||
fn drop(&mut self) {
|
fn drop(&mut self) {
|
||||||
|
// SAFETY: `self.st` is the encoder `opus_multistream_encoder_create` returned; this
|
||||||
|
// `MsEncoder` owns it uniquely and `drop` runs exactly once, so the destroy frees it once
|
||||||
|
// with no subsequent use.
|
||||||
unsafe { audiopus_sys::opus_multistream_encoder_destroy(self.st.as_ptr()) }
|
unsafe { audiopus_sys::opus_multistream_encoder_destroy(self.st.as_ptr()) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -761,6 +789,10 @@ mod tests {
|
|||||||
let client_mapping = client_swap(&digits[3..]);
|
let client_mapping = client_swap(&digits[3..]);
|
||||||
|
|
||||||
let mut err = 0i32;
|
let mut err = 0i32;
|
||||||
|
// SAFETY: scalar args are valid libopus inputs. `client_mapping.as_ptr()` addresses a
|
||||||
|
// `Vec<u8>` of exactly `ch` entries (derived from the advertised surround-params), which is
|
||||||
|
// the element count the decoder reads through it, and `&mut err` is a live local the call
|
||||||
|
// writes. The returned pointer is `OPUS_OK`/non-null-checked immediately below before use.
|
||||||
let dec = unsafe {
|
let dec = unsafe {
|
||||||
audiopus_sys::opus_multistream_decoder_create(
|
audiopus_sys::opus_multistream_decoder_create(
|
||||||
SAMPLE_RATE as i32,
|
SAMPLE_RATE as i32,
|
||||||
@@ -789,6 +821,11 @@ mod tests {
|
|||||||
}
|
}
|
||||||
let n = enc.encode_float(&frame, samples, &mut out).unwrap();
|
let n = enc.encode_float(&frame, samples, &mut out).unwrap();
|
||||||
assert!(n > 0);
|
assert!(n > 0);
|
||||||
|
// SAFETY: `dec` is the non-null decoder asserted above. `out.as_ptr()` is read for
|
||||||
|
// the `n` encoded bytes just produced by `encode_float`; `decoded.as_mut_ptr()` is
|
||||||
|
// written for up to `samples * ch` f32s and `decoded` is exactly that long; `samples`
|
||||||
|
// is the per-channel frame size. All buffers are live locals outliving the call; the
|
||||||
|
// return is checked to equal `samples`.
|
||||||
let got = unsafe {
|
let got = unsafe {
|
||||||
audiopus_sys::opus_multistream_decode_float(
|
audiopus_sys::opus_multistream_decode_float(
|
||||||
dec,
|
dec,
|
||||||
@@ -817,6 +854,8 @@ mod tests {
|
|||||||
(energies: {energy:?})"
|
(energies: {energy:?})"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
// SAFETY: `dec` is the decoder `opus_multistream_decoder_create` returned; the test owns it
|
||||||
|
// and destroys it exactly once here, after the final decode — no later use, no double free.
|
||||||
unsafe { audiopus_sys::opus_multistream_decoder_destroy(dec) };
|
unsafe { audiopus_sys::opus_multistream_decoder_destroy(dec) };
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -853,6 +892,9 @@ mod tests {
|
|||||||
let digits: Vec<u8> = s.bytes().map(|b| b - b'0').collect();
|
let digits: Vec<u8> = s.bytes().map(|b| b - b'0').collect();
|
||||||
let client_mapping = client_swap(&digits[3..]);
|
let client_mapping = client_swap(&digits[3..]);
|
||||||
let mut err = 0i32;
|
let mut err = 0i32;
|
||||||
|
// SAFETY: scalar args are valid; `client_mapping.as_ptr()` addresses a 6-entry `Vec<u8>`
|
||||||
|
// (matches the 6-channel layout the decoder reads through it), alive past the call, and
|
||||||
|
// `&mut err` is a live local. The pointer is `OPUS_OK`-checked before use.
|
||||||
let dec = unsafe {
|
let dec = unsafe {
|
||||||
audiopus_sys::opus_multistream_decoder_create(
|
audiopus_sys::opus_multistream_decoder_create(
|
||||||
48000,
|
48000,
|
||||||
@@ -865,6 +907,10 @@ mod tests {
|
|||||||
};
|
};
|
||||||
assert_eq!(err, audiopus_sys::OPUS_OK);
|
assert_eq!(err, audiopus_sys::OPUS_OK);
|
||||||
let mut pcm = vec![0f32; 240 * 6];
|
let mut pcm = vec![0f32; 240 * 6];
|
||||||
|
// SAFETY: `dec` is the non-null decoder from create. `out.as_ptr()` is read for the CBR
|
||||||
|
// packet length passed in (`*sizes.first()`, a real encoded packet size in `out`);
|
||||||
|
// `pcm.as_mut_ptr()` is written for up to `240 * 6` f32s and `pcm` is exactly that long;
|
||||||
|
// `240` is the per-channel frame size. All buffers are live locals outliving the call.
|
||||||
let got = unsafe {
|
let got = unsafe {
|
||||||
audiopus_sys::opus_multistream_decode_float(
|
audiopus_sys::opus_multistream_decode_float(
|
||||||
dec,
|
dec,
|
||||||
@@ -875,6 +921,7 @@ mod tests {
|
|||||||
0,
|
0,
|
||||||
)
|
)
|
||||||
};
|
};
|
||||||
|
// SAFETY: `dec` is owned by the test; destroyed exactly once here after the final decode.
|
||||||
unsafe { audiopus_sys::opus_multistream_decoder_destroy(dec) };
|
unsafe { audiopus_sys::opus_multistream_decoder_destroy(dec) };
|
||||||
assert_eq!(got, 240);
|
assert_eq!(got, 240);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
//! Pairing crypto primitives (control plane only — distinct from `punktfunk_core`'s AES-GCM
|
//! Pairing crypto primitives (control plane only — distinct from `punktfunk_core`'s AES-GCM
|
||||||
//! data-plane sealing). GameStream pairing uses: AES-128-**ECB** with **no padding**,
|
//! data-plane sealing). GameStream pairing uses: AES-128-**ECB** with **no padding**,
|
||||||
//! SHA-256 (host appversion major ≥ 7), and RSA-PKCS1v15-SHA256 signatures. See the
|
//! SHA-256 (host appversion major ≥ 7), and RSA-PKCS1v15-SHA256 signatures. See the
|
||||||
//! `serverinfo + pairing` section of `docs/research/gamestream-protocol-research.json`.
|
//! `serverinfo + pairing` section of `design/research/gamestream-protocol-research.json`.
|
||||||
|
|
||||||
use aes::cipher::generic_array::GenericArray;
|
use aes::cipher::generic_array::GenericArray;
|
||||||
use aes::cipher::{BlockDecrypt, BlockEncrypt, KeyInit};
|
use aes::cipher::{BlockDecrypt, BlockEncrypt, KeyInit};
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
//! GameStream (P1) control plane — what a stock Moonlight/Artemis client talks to around
|
//! GameStream (P1) control plane — what a stock Moonlight/Artemis client talks to around
|
||||||
//! the media streams: mDNS discovery, the nvhttp serverinfo + pairing HTTP(S) API, RTSP,
|
//! the media streams: mDNS discovery, the nvhttp serverinfo + pairing HTTP(S) API, RTSP,
|
||||||
//! and the ENet control stream. `tokio`/`axum` live here (control plane, I/O-bound — never
|
//! and the ENet control stream. `tokio`/`axum` live here (control plane, I/O-bound — never
|
||||||
//! the per-frame hot path; that is `punktfunk_core`'s P1 wire codec). See `docs/gamestream-host-plan.md`.
|
//! the per-frame hot path; that is `punktfunk_core`'s P1 wire codec). See `design/gamestream-host-plan.md`.
|
||||||
//!
|
//!
|
||||||
//! Status: P1.1 — mDNS `_nvstream._tcp` advertisement + `/serverinfo`. Pairing, RTSP, and
|
//! Status: P1.1 — mDNS `_nvstream._tcp` advertisement + `/serverinfo`. Pairing, RTSP, and
|
||||||
//! the media streams follow (see the GameStream host task list / plan).
|
//! the media streams follow (see the GameStream host task list / plan).
|
||||||
@@ -125,12 +125,21 @@ pub struct AppState {
|
|||||||
/// (avoids a PipeWire stream setup per reconnect); drained on reuse so no stale audio is
|
/// (avoids a PipeWire stream setup per reconnect); drained on reuse so no stale audio is
|
||||||
/// sent, dropped + reopened when a session negotiates a different channel count.
|
/// sent, dropped + reopened when a session negotiates a different channel count.
|
||||||
pub audio_cap: std::sync::Arc<std::sync::Mutex<Option<Box<dyn crate::audio::AudioCapturer>>>>,
|
pub audio_cap: std::sync::Arc<std::sync::Mutex<Option<Box<dyn crate::audio::AudioCapturer>>>>,
|
||||||
|
/// Shared streaming-stats recorder (web-console capture/graph). The GameStream encode loop
|
||||||
|
/// reads `is_armed()` per frame and emits samples; the same `Arc` is shared with the mgmt API
|
||||||
|
/// and the native punktfunk/1 loops so one capture spans whichever path is streaming.
|
||||||
|
pub stats: Arc<crate::stats_recorder::StatsRecorder>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl AppState {
|
impl AppState {
|
||||||
/// Fresh control-plane state: no active session; the pairing allow-list is loaded from
|
/// Fresh control-plane state: no active session; the pairing allow-list is loaded from
|
||||||
/// disk (pairings persist across restarts).
|
/// disk (pairings persist across restarts). `stats` is the shared recorder handed to both the
|
||||||
pub fn new(host: Host, identity: cert::ServerIdentity) -> AppState {
|
/// mgmt API and the streaming loops.
|
||||||
|
pub fn new(
|
||||||
|
host: Host,
|
||||||
|
identity: cert::ServerIdentity,
|
||||||
|
stats: Arc<crate::stats_recorder::StatsRecorder>,
|
||||||
|
) -> AppState {
|
||||||
AppState {
|
AppState {
|
||||||
host,
|
host,
|
||||||
identity,
|
identity,
|
||||||
@@ -145,6 +154,7 @@ impl AppState {
|
|||||||
rfi_range: std::sync::Arc::new(std::sync::Mutex::new(None)),
|
rfi_range: std::sync::Arc::new(std::sync::Mutex::new(None)),
|
||||||
video_cap: std::sync::Arc::new(std::sync::Mutex::new(None)),
|
video_cap: std::sync::Arc::new(std::sync::Mutex::new(None)),
|
||||||
audio_cap: std::sync::Arc::new(std::sync::Mutex::new(None)),
|
audio_cap: std::sync::Arc::new(std::sync::Mutex::new(None)),
|
||||||
|
stats,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -166,7 +176,10 @@ pub fn serve(
|
|||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let host = Host::detect()?;
|
let host = Host::detect()?;
|
||||||
let identity = cert::ServerIdentity::load_or_create().context("host certificate")?;
|
let identity = cert::ServerIdentity::load_or_create().context("host certificate")?;
|
||||||
let state = Arc::new(AppState::new(host, identity));
|
// The shared streaming-stats recorder: one handle for the mgmt API, the GameStream encode loop
|
||||||
|
// (via `AppState`), and the native punktfunk/1 loops (passed to `punktfunk1::serve`).
|
||||||
|
let stats = crate::stats_recorder::StatsRecorder::new(crate::stats_recorder::default_dir());
|
||||||
|
let state = Arc::new(AppState::new(host, identity, stats.clone()));
|
||||||
// The native plane always runs, so the shared native-pairing handle (linking the QUIC ceremony
|
// The native plane always runs, so the shared native-pairing handle (linking the QUIC ceremony
|
||||||
// and the management API) always exists.
|
// and the management API) always exists.
|
||||||
let np = Arc::new(
|
let np = Arc::new(
|
||||||
@@ -206,8 +219,8 @@ pub fn serve(
|
|||||||
);
|
);
|
||||||
tokio::try_join!(
|
tokio::try_join!(
|
||||||
nvhttp::run(state.clone()),
|
nvhttp::run(state.clone()),
|
||||||
crate::mgmt::run(state.clone(), mgmt, Some(np.clone())),
|
crate::mgmt::run(state.clone(), mgmt, Some(np.clone()), stats.clone()),
|
||||||
crate::punktfunk1::serve(native_opts, np),
|
crate::punktfunk1::serve(native_opts, np, stats.clone()),
|
||||||
)?;
|
)?;
|
||||||
} else {
|
} else {
|
||||||
// Secure default: native punktfunk/1 + management API only (no GameStream surface).
|
// Secure default: native punktfunk/1 + management API only (no GameStream surface).
|
||||||
@@ -217,8 +230,8 @@ pub fn serve(
|
|||||||
(GameStream OFF — pass --gamestream for stock-Moonlight compat)"
|
(GameStream OFF — pass --gamestream for stock-Moonlight compat)"
|
||||||
);
|
);
|
||||||
tokio::try_join!(
|
tokio::try_join!(
|
||||||
crate::mgmt::run(state.clone(), mgmt, Some(np.clone())),
|
crate::mgmt::run(state.clone(), mgmt, Some(np.clone()), stats.clone()),
|
||||||
crate::punktfunk1::serve(native_opts, np),
|
crate::punktfunk1::serve(native_opts, np, stats.clone()),
|
||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
|
|||||||
@@ -291,7 +291,10 @@ mod tests {
|
|||||||
https_port: HTTPS_PORT,
|
https_port: HTTPS_PORT,
|
||||||
};
|
};
|
||||||
let identity = super::super::cert::ServerIdentity::ephemeral().expect("ephemeral identity");
|
let identity = super::super::cert::ServerIdentity::ephemeral().expect("ephemeral identity");
|
||||||
Arc::new(AppState::new(host, identity))
|
let stats = crate::stats_recorder::StatsRecorder::new(
|
||||||
|
std::env::temp_dir().join(format!("pf-nvhttp-stats-{}", std::process::id())),
|
||||||
|
);
|
||||||
|
Arc::new(AppState::new(host, identity, stats))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn fp_of(der: &[u8]) -> String {
|
fn fp_of(der: &[u8]) -> String {
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
//! The 4-phase GameStream pairing state machine (over HTTP), keyed by `uniqueid`. Proves
|
//! The 4-phase GameStream pairing state machine (over HTTP), keyed by `uniqueid`. Proves
|
||||||
//! both sides know the PIN (via the SHA-256(salt||pin) AES-ECB key) and own their certs
|
//! both sides know the PIN (via the SHA-256(salt||pin) AES-ECB key) and own their certs
|
||||||
//! (RSA signatures), then pins the client cert. The final `pairchallenge` happens over
|
//! (RSA signatures), then pins the client cert. The final `pairchallenge` happens over
|
||||||
//! HTTPS (handled in `nvhttp`). Byte-exact spec: `docs/research/…-research.json`.
|
//! HTTPS (handled in `nvhttp`). Byte-exact spec: `design/research/…-research.json`.
|
||||||
|
|
||||||
use super::cert::ServerIdentity;
|
use super::cert::ServerIdentity;
|
||||||
use super::crypto;
|
use super::crypto;
|
||||||
|
|||||||
@@ -234,6 +234,7 @@ fn handle_request(req: &Request, state: &AppState) -> String {
|
|||||||
state.force_idr.clone(),
|
state.force_idr.clone(),
|
||||||
state.rfi_range.clone(),
|
state.rfi_range.clone(),
|
||||||
state.video_cap.clone(),
|
state.video_cap.clone(),
|
||||||
|
state.stats.clone(),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
Some(_) => tracing::info!("RTSP PLAY — stream already running"),
|
Some(_) => tracing::info!("RTSP PLAY — stream already running"),
|
||||||
|
|||||||
@@ -3,6 +3,9 @@
|
|||||||
//! either real portal desktop capture (`PUNKTFUNK_VIDEO_SOURCE=portal`, the portal PipeWire path) or
|
//! either real portal desktop capture (`PUNKTFUNK_VIDEO_SOURCE=portal`, the portal PipeWire path) or
|
||||||
//! a synthetic test pattern (default). Runs on its own native thread.
|
//! a synthetic test pattern (default). Runs on its own native thread.
|
||||||
|
|
||||||
|
// Every `unsafe` block in this file carries a `// SAFETY:` proof; enforce it.
|
||||||
|
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||||
|
|
||||||
use super::video::{FrameType, VideoPacketizer};
|
use super::video::{FrameType, VideoPacketizer};
|
||||||
use super::VIDEO_PORT;
|
use super::VIDEO_PORT;
|
||||||
use crate::capture::{self, Capturer, FastSyntheticCapturer};
|
use crate::capture::{self, Capturer, FastSyntheticCapturer};
|
||||||
@@ -45,6 +48,7 @@ pub fn start(
|
|||||||
force_idr: Arc<AtomicBool>,
|
force_idr: Arc<AtomicBool>,
|
||||||
rfi_range: RfiSlot,
|
rfi_range: RfiSlot,
|
||||||
video_cap: CapturerSlot,
|
video_cap: CapturerSlot,
|
||||||
|
stats: Arc<crate::stats_recorder::StatsRecorder>,
|
||||||
) {
|
) {
|
||||||
let _ = std::thread::Builder::new()
|
let _ = std::thread::Builder::new()
|
||||||
.name("punktfunk-video".into())
|
.name("punktfunk-video".into())
|
||||||
@@ -57,6 +61,7 @@ pub fn start(
|
|||||||
&force_idr,
|
&force_idr,
|
||||||
&rfi_range,
|
&rfi_range,
|
||||||
&video_cap,
|
&video_cap,
|
||||||
|
&stats,
|
||||||
) {
|
) {
|
||||||
tracing::error!(error = %format!("{e:#}"), "video stream failed");
|
tracing::error!(error = %format!("{e:#}"), "video stream failed");
|
||||||
}
|
}
|
||||||
@@ -65,6 +70,7 @@ pub fn start(
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[allow(clippy::too_many_arguments)]
|
||||||
fn run(
|
fn run(
|
||||||
cfg: StreamConfig,
|
cfg: StreamConfig,
|
||||||
app: Option<&super::apps::AppEntry>,
|
app: Option<&super::apps::AppEntry>,
|
||||||
@@ -72,6 +78,9 @@ fn run(
|
|||||||
force_idr: &AtomicBool,
|
force_idr: &AtomicBool,
|
||||||
rfi_range: &std::sync::Mutex<Option<(i64, i64)>>,
|
rfi_range: &std::sync::Mutex<Option<(i64, i64)>>,
|
||||||
video_cap: &std::sync::Mutex<Option<Box<dyn Capturer>>>,
|
video_cap: &std::sync::Mutex<Option<Box<dyn Capturer>>>,
|
||||||
|
// Shared stats recorder for the web-console capture/graph. Threaded into `stream_body` (the
|
||||||
|
// encode loop); per-frame sample emission is wired by a later pass.
|
||||||
|
stats: &Arc<crate::stats_recorder::StatsRecorder>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
// GameStream capture/encode thread: apply Windows session tuning (no-op off Windows).
|
// GameStream capture/encode thread: apply Windows session tuning (no-op off Windows).
|
||||||
crate::session_tuning::on_hot_thread();
|
crate::session_tuning::on_hot_thread();
|
||||||
@@ -97,18 +106,20 @@ fn run(
|
|||||||
sock.connect(client)
|
sock.connect(client)
|
||||||
.context("connect client video endpoint")?;
|
.context("connect client video endpoint")?;
|
||||||
tracing::info!(%client, "video: client endpoint learned");
|
tracing::info!(%client, "video: client endpoint learned");
|
||||||
|
// Short label for web-console stats captures: the client's peer IP.
|
||||||
|
let client_label = client.ip().to_string();
|
||||||
|
|
||||||
// Native client-resolution source: create a compositor virtual output sized to the client's
|
// Native client-resolution source: create a compositor virtual output sized to the client's
|
||||||
// request and capture it (no scaling). Self-contained — deliberately NOT pooled in
|
// request and capture it (no scaling). Self-contained — deliberately NOT pooled in
|
||||||
// `video_cap`, since a reconnect at a different resolution needs a freshly-sized output; the
|
// `video_cap`, since a reconnect at a different resolution needs a freshly-sized output; the
|
||||||
// output is released when this capturer drops at stream end (RAII via its keepalive).
|
// output is released when this capturer drops at stream end (RAII via its keepalive).
|
||||||
if crate::config::config().video_source.as_deref() == Some("virtual") {
|
if crate::config::config().video_source.as_deref() == Some("virtual") {
|
||||||
// The launched app picks the compositor (e.g. gamescope for game entries) and the
|
// Open the virtual-display source: pick the live compositor, normalize the session env
|
||||||
// nested command.
|
// (apply_session_env/apply_input_env — gamescope ATTACH/resize + KWin/Mutter retargeting,
|
||||||
let compositor = app
|
// exactly like the native plane), create a virtual output at the client mode, and capture it.
|
||||||
.and_then(|a| a.compositor)
|
// Re-runnable: the encode loop calls it again on a mid-stream capture loss to FOLLOW a
|
||||||
.map(Ok)
|
// Desktop<->Game switch.
|
||||||
.unwrap_or_else(|| crate::vdisplay::detect().context("detect compositor"))?;
|
let (mut capturer, compositor) = open_gs_virtual_source(cfg, app)?;
|
||||||
tracing::info!(
|
tracing::info!(
|
||||||
?compositor,
|
?compositor,
|
||||||
app = ?app.map(|a| &a.title),
|
app = ?app.map(|a| &a.title),
|
||||||
@@ -116,32 +127,41 @@ fn run(
|
|||||||
h = cfg.height,
|
h = cfg.height,
|
||||||
"video source: virtual display (native client resolution)"
|
"video source: virtual display (native client resolution)"
|
||||||
);
|
);
|
||||||
let mut vd = crate::vdisplay::open(compositor).context("open virtual display")?;
|
// Launch the app's command now that capture is live, for the backends that DON'T nest it via
|
||||||
// Carry the resolved launch command on the backend instance (per-session) rather than a
|
// set_launch_command above: Windows (no gamescope) and Linux kwin/mutter/wlroots (which stream
|
||||||
// process-global env var, so concurrent sessions can't stomp each other's launch target.
|
// the existing desktop, so the app must be spawned into the session to land on the streamed
|
||||||
vd.set_launch_command(app.and_then(|a| a.cmd.clone()));
|
// output). Linux gamescope already nested it via set_launch_command, so skip it there.
|
||||||
let vout = vd
|
#[cfg(windows)]
|
||||||
.create(punktfunk_core::Mode {
|
let launch_here = true;
|
||||||
width: cfg.width,
|
#[cfg(target_os = "linux")]
|
||||||
height: cfg.height,
|
let launch_here = compositor != crate::vdisplay::Compositor::Gamescope;
|
||||||
refresh_hz: cfg.fps,
|
#[cfg(any(windows, target_os = "linux"))]
|
||||||
})
|
if launch_here {
|
||||||
.context("create virtual output at client resolution")?;
|
if let Some(cmd) = app
|
||||||
// `want_hdr=false`: the IDD-push backend (opt-in PUNKTFUNK_IDD_PUSH) has no monitor-HDR
|
.and_then(|a| a.cmd.as_deref())
|
||||||
// auto-detection — it converts its always-FP16 ring per this flag — and GameStream HDR is not
|
.filter(|c| !c.trim().is_empty())
|
||||||
// negotiated into StreamConfig here, so an IDD-push GameStream session streams SDR even on an
|
{
|
||||||
// HDR desktop. (The default WGC backend DOES auto-detect HDR from the output colorspace, but
|
if let Err(e) = crate::library::launch_gamestream_command(cmd) {
|
||||||
// IDD-push bypasses WGC.) Acceptable for the experimental IDD-push A/B path; HDR over IDD-push
|
tracing::warn!(command = %cmd, error = %e, "gamestream: could not launch app");
|
||||||
// is wired only for punktfunk/1 (want_hdr = negotiated bit_depth >= 10). TODO: derive want_hdr
|
}
|
||||||
// from a GameStream HDR flag once StreamConfig carries one.
|
}
|
||||||
let mut capturer = capture::capture_virtual_output(
|
}
|
||||||
vout,
|
// Rebuild closure: re-open the source on a mid-stream capture loss, RE-DETECTING the live
|
||||||
capture::OutputFormat::resolve(false),
|
// compositor — so a Desktop<->Game switch (at the client's fixed mode) is FOLLOWED in place
|
||||||
crate::session_plan::CaptureBackend::resolve(),
|
// without a Moonlight reconnect. (A resolution change can't be followed mid-stream on
|
||||||
)
|
// GameStream — WxH is locked at ANNOUNCE — but a session toggle keeps the negotiated mode.)
|
||||||
.context("capture virtual output")?;
|
let rebuild = || open_gs_virtual_source(cfg, app).map(|(c, _)| c);
|
||||||
capturer.set_active(true);
|
return stream_body(
|
||||||
return stream_body(&mut *capturer, &sock, cfg, running, force_idr, rfi_range);
|
&mut capturer,
|
||||||
|
Some(&rebuild),
|
||||||
|
&sock,
|
||||||
|
cfg,
|
||||||
|
running,
|
||||||
|
force_idr,
|
||||||
|
rfi_range,
|
||||||
|
stats,
|
||||||
|
&client_label,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reuse the persistent capturer (one screencast session → clean reconnect); create it on
|
// Reuse the persistent capturer (one screencast session → clean reconnect); create it on
|
||||||
@@ -161,12 +181,70 @@ fn run(
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
capturer.set_active(true);
|
capturer.set_active(true);
|
||||||
let result = stream_body(&mut *capturer, &sock, cfg, running, force_idr, rfi_range);
|
// Portal/synthetic source: no compositor virtual output to re-detect, so no rebuild closure.
|
||||||
|
let result = stream_body(
|
||||||
|
&mut capturer,
|
||||||
|
None,
|
||||||
|
&sock,
|
||||||
|
cfg,
|
||||||
|
running,
|
||||||
|
force_idr,
|
||||||
|
rfi_range,
|
||||||
|
stats,
|
||||||
|
&client_label,
|
||||||
|
);
|
||||||
capturer.set_active(false);
|
capturer.set_active(false);
|
||||||
*video_cap.lock().unwrap() = Some(capturer);
|
*video_cap.lock().unwrap() = Some(capturer);
|
||||||
result
|
result
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Open the virtual-display video source for a GameStream session: pick the LIVE compositor + normalize
|
||||||
|
/// the session env (apply_session_env/apply_input_env — gamescope ATTACH/resize, KWin/Mutter
|
||||||
|
/// retargeting) exactly like the native plane (punktfunk1.rs resolve_compositor), create a virtual
|
||||||
|
/// output at the client's mode, and capture it. Returns the capturer (it owns the output's keepalive;
|
||||||
|
/// the stateless VirtualDisplay factory is dropped here) plus the resolved compositor. An apps.json
|
||||||
|
/// entry can PIN a compositor (skips the live detect/retarget). Re-run on a mid-stream capture loss to
|
||||||
|
/// FOLLOW a Desktop<->Game switch: it re-detects the now-live compositor and re-targets at it. Does NOT
|
||||||
|
/// launch the app (that happens once at stream start; a rebuild must not re-spawn it).
|
||||||
|
fn open_gs_virtual_source(
|
||||||
|
cfg: StreamConfig,
|
||||||
|
app: Option<&super::apps::AppEntry>,
|
||||||
|
) -> Result<(Box<dyn Capturer>, crate::vdisplay::Compositor)> {
|
||||||
|
let compositor = if let Some(c) = app.and_then(|a| a.compositor) {
|
||||||
|
c
|
||||||
|
} else {
|
||||||
|
let active = crate::vdisplay::detect_active_session();
|
||||||
|
crate::vdisplay::apply_session_env(&active);
|
||||||
|
let c = crate::vdisplay::compositor_for_kind(active.kind)
|
||||||
|
.map(Ok)
|
||||||
|
.unwrap_or_else(crate::vdisplay::detect)
|
||||||
|
.context("detect compositor")?;
|
||||||
|
crate::vdisplay::apply_input_env(c);
|
||||||
|
c
|
||||||
|
};
|
||||||
|
let mut vd = crate::vdisplay::open(compositor).context("open virtual display")?;
|
||||||
|
// Carry the resolved launch command on the backend instance (per-session) rather than a
|
||||||
|
// process-global env var, so concurrent sessions can't stomp each other's launch target.
|
||||||
|
vd.set_launch_command(app.and_then(|a| a.cmd.clone()));
|
||||||
|
let vout = vd
|
||||||
|
.create(punktfunk_core::Mode {
|
||||||
|
width: cfg.width,
|
||||||
|
height: cfg.height,
|
||||||
|
refresh_hz: cfg.fps,
|
||||||
|
})
|
||||||
|
.context("create virtual output at client resolution")?;
|
||||||
|
// want_hdr=false: GameStream HDR is not negotiated into StreamConfig here (the default WGC backend
|
||||||
|
// still auto-detects HDR from the output colorspace; only the opt-in IDD-push path streams SDR).
|
||||||
|
let capturer = capture::capture_virtual_output(
|
||||||
|
vout,
|
||||||
|
capture::OutputFormat::resolve(false),
|
||||||
|
crate::session_plan::CaptureBackend::resolve(),
|
||||||
|
)
|
||||||
|
.context("capture virtual output")?;
|
||||||
|
capturer.set_active(true);
|
||||||
|
Ok((capturer, compositor))
|
||||||
|
}
|
||||||
|
|
||||||
/// One frame's packets, handed from the encode thread to the send thread.
|
/// One frame's packets, handed from the encode thread to the send thread.
|
||||||
type PacketBatch = Vec<Vec<u8>>;
|
type PacketBatch = Vec<Vec<u8>>;
|
||||||
|
|
||||||
@@ -188,6 +266,10 @@ fn sendmmsg_all(sock: &UdpSocket, pkts: &[Vec<u8>]) -> std::io::Result<()> {
|
|||||||
let mut hdrs: Vec<libc::mmsghdr> = iovs
|
let mut hdrs: Vec<libc::mmsghdr> = iovs
|
||||||
.iter_mut()
|
.iter_mut()
|
||||||
.map(|iov| {
|
.map(|iov| {
|
||||||
|
// SAFETY: `libc::mmsghdr` is a plain `#[repr(C)]` struct of integers and raw
|
||||||
|
// pointers, for which an all-zero bit pattern is valid (null pointers / zero
|
||||||
|
// lengths); the fields we rely on (`msg_iov`, `msg_iovlen`) are overwritten on the
|
||||||
|
// next two lines before the struct is handed to the kernel.
|
||||||
let mut h: libc::mmsghdr = unsafe { std::mem::zeroed() };
|
let mut h: libc::mmsghdr = unsafe { std::mem::zeroed() };
|
||||||
h.msg_hdr.msg_iov = iov;
|
h.msg_hdr.msg_iov = iov;
|
||||||
h.msg_hdr.msg_iovlen = 1;
|
h.msg_hdr.msg_iovlen = 1;
|
||||||
@@ -196,6 +278,13 @@ fn sendmmsg_all(sock: &UdpSocket, pkts: &[Vec<u8>]) -> std::io::Result<()> {
|
|||||||
.collect();
|
.collect();
|
||||||
let mut off = 0usize;
|
let mut off = 0usize;
|
||||||
while off < hdrs.len() {
|
while off < hdrs.len() {
|
||||||
|
// SAFETY: `fd` is `sock`'s live raw fd (`sock` outlives the call). `hdrs[off..]
|
||||||
|
// .as_mut_ptr()` is a live slice of `(hdrs.len() - off)` `mmsghdr`s — exactly the count
|
||||||
|
// passed — into which the kernel writes each `msg_len`. Each header's `msg_iov` points
|
||||||
|
// into `iovs` (a local that outlives this call, with `msg_iovlen == 1` matching its one
|
||||||
|
// entry) and each `iovec.iov_base` points into the `chunk` packet buffers (the caller's
|
||||||
|
// `pkts`, alive for the call); the kernel only reads those payloads. Flags 0; the return
|
||||||
|
// is error-/progress-checked before advancing `off`.
|
||||||
let n = unsafe {
|
let n = unsafe {
|
||||||
libc::sendmmsg(fd, hdrs[off..].as_mut_ptr(), (hdrs.len() - off) as u32, 0)
|
libc::sendmmsg(fd, hdrs[off..].as_mut_ptr(), (hdrs.len() - off) as u32, 0)
|
||||||
};
|
};
|
||||||
@@ -293,15 +382,36 @@ fn spawn_sender(
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Percentile of a slice (sorts it in place first). `q` in `0.0..=1.0`. Used for the web-console
|
||||||
|
/// stats sample's per-stage p50/p99.
|
||||||
|
fn percentile(v: &mut [u32], q: f64) -> u32 {
|
||||||
|
if v.is_empty() {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
v.sort_unstable();
|
||||||
|
let i = ((v.len() as f64 * q) as usize).min(v.len() - 1);
|
||||||
|
v[i]
|
||||||
|
}
|
||||||
|
|
||||||
/// The encode → packetize loop, over a borrowed capturer. Sending runs on a dedicated thread
|
/// The encode → packetize loop, over a borrowed capturer. Sending runs on a dedicated thread
|
||||||
/// (see [`spawn_sender`]) so a send spike can never stall capture/encode.
|
/// (see [`spawn_sender`]) so a send spike can never stall capture/encode.
|
||||||
|
#[allow(clippy::too_many_arguments)]
|
||||||
fn stream_body(
|
fn stream_body(
|
||||||
capturer: &mut dyn Capturer,
|
// `&mut Box` (not `&mut dyn`) so a mid-stream capture-loss rebuild can SWAP the capturer in place.
|
||||||
|
capturer: &mut Box<dyn Capturer>,
|
||||||
|
// Re-open the video source on capture loss (virtual-display path → follow a Desktop<->Game switch);
|
||||||
|
// `None` for the portal/synthetic source, which has nothing to re-detect (propagate the error).
|
||||||
|
rebuild: Option<&dyn Fn() -> Result<Box<dyn Capturer>>>,
|
||||||
sock: &UdpSocket,
|
sock: &UdpSocket,
|
||||||
cfg: StreamConfig,
|
cfg: StreamConfig,
|
||||||
running: &Arc<AtomicBool>,
|
running: &Arc<AtomicBool>,
|
||||||
force_idr: &AtomicBool,
|
force_idr: &AtomicBool,
|
||||||
rfi_range: &std::sync::Mutex<Option<(i64, i64)>>,
|
rfi_range: &std::sync::Mutex<Option<(i64, i64)>>,
|
||||||
|
// Shared stats recorder. The encode loop reads `stats.is_armed()` per frame to decide whether
|
||||||
|
// to accumulate the per-stage split, then emits a `StatsSample` at its 1 s aggregation boundary.
|
||||||
|
stats: &Arc<crate::stats_recorder::StatsRecorder>,
|
||||||
|
// Short client label (peer IP) seeded into the capture meta on the first armed registration.
|
||||||
|
client_label: &str,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
// The first frame establishes the authoritative size/format for the encoder.
|
// The first frame establishes the authoritative size/format for the encoder.
|
||||||
let mut frame = capturer.next_frame().context("capture first frame")?;
|
let mut frame = capturer.next_frame().context("capture first frame")?;
|
||||||
@@ -365,19 +475,99 @@ fn stream_body(
|
|||||||
let perf = crate::config::config().perf;
|
let perf = crate::config::config().perf;
|
||||||
let (mut mx_cap, mut mx_enc, mut mx_pkt, mut mx_send, mut mx_pkts, mut uniq) =
|
let (mut mx_cap, mut mx_enc, mut mx_pkt, mut mx_send, mut mx_pkts, mut uniq) =
|
||||||
(0u128, 0u128, 0u128, 0u128, 0usize, 0u32);
|
(0u128, 0u128, 0u128, 0u128, 0usize, 0u32);
|
||||||
|
// Web-console stats accumulation (active when `perf` OR a capture is armed): per-stage vectors
|
||||||
|
// for p50/p99, the goodput bytes queued to the sender this window, the previous window's
|
||||||
|
// dropped-frame count for delta computation, and the registration id cached on the first sample.
|
||||||
|
let codec_name = match cfg.codec {
|
||||||
|
Codec::H264 => "h264",
|
||||||
|
Codec::H265 => "hevc",
|
||||||
|
Codec::Av1 => "av1",
|
||||||
|
};
|
||||||
|
let mut sid: Option<u32> = None;
|
||||||
|
let (mut v_cap, mut v_enc, mut v_pkt, mut v_send): (Vec<u32>, Vec<u32>, Vec<u32>, Vec<u32>) =
|
||||||
|
(Vec::new(), Vec::new(), Vec::new(), Vec::new());
|
||||||
|
let mut bytes_win: u64 = 0;
|
||||||
|
let mut last_dropped_batches: u64 = 0;
|
||||||
// Absolute next-frame deadline — the single pacing clock for the loop.
|
// Absolute next-frame deadline — the single pacing clock for the loop.
|
||||||
let mut next_frame = Instant::now();
|
let mut next_frame = Instant::now();
|
||||||
// RFI capability is fixed for the session (probed at encoder open). Query it once so the
|
// RFI capability is fixed for the session (probed at encoder open). Query it once so the
|
||||||
// recovery path skips the always-`false` invalidate call on encoders without NVENC RFI and
|
// recovery path skips the always-`false` invalidate call on encoders without NVENC RFI and
|
||||||
// forces a keyframe directly instead.
|
// forces a keyframe directly instead.
|
||||||
let supports_rfi = enc.caps().supports_rfi;
|
let mut supports_rfi = enc.caps().supports_rfi;
|
||||||
|
|
||||||
|
// Bound consecutive capture-loss rebuilds (a delivered frame clears the counter) so a permanently
|
||||||
|
// dead source can't loop forever — it ends the stream after the cap, falling back to a reconnect.
|
||||||
|
const MAX_REBUILDS: u32 = 5;
|
||||||
|
let mut rebuilds: u32 = 0;
|
||||||
|
|
||||||
while running.load(Ordering::SeqCst) {
|
while running.load(Ordering::SeqCst) {
|
||||||
let tick = Instant::now();
|
let tick = Instant::now();
|
||||||
|
// Measure per-stage timing when `PUNKTFUNK_PERF` is set OR a web-console stats capture is
|
||||||
|
// armed (cheap Relaxed atomic, re-read each frame).
|
||||||
|
let measure = perf || stats.is_armed();
|
||||||
// Advance to the freshest captured frame if one arrived; otherwise reuse the last.
|
// Advance to the freshest captured frame if one arrived; otherwise reuse the last.
|
||||||
if let Some(f) = capturer.try_latest().context("capture frame")? {
|
match capturer.try_latest() {
|
||||||
frame = f;
|
Ok(Some(f)) => {
|
||||||
uniq += 1;
|
frame = f;
|
||||||
|
uniq += 1;
|
||||||
|
rebuilds = 0; // a delivered frame clears the consecutive-loss counter
|
||||||
|
}
|
||||||
|
Ok(None) => {} // no new frame — reuse the last (static/idle desktop)
|
||||||
|
Err(e) => {
|
||||||
|
// The capture source went away — the compositor was torn down on a Desktop<->Game
|
||||||
|
// switch, or the virtual output was removed. On the virtual-display path, re-detect the
|
||||||
|
// now-live compositor and re-attach IN PLACE (the send thread + packetizer + socket +
|
||||||
|
// RTP clock all survive), then force an IDR so Moonlight resyncs — so the stream FOLLOWS
|
||||||
|
// the switch with no client reconnect. Build the new source BEFORE dropping the old.
|
||||||
|
// Bounded by a counter + a ~40s budget; on exhaustion, end the stream (Moonlight
|
||||||
|
// reconnect). The portal/synthetic path has no rebuild closure → propagate as before.
|
||||||
|
let Some(rebuild) = rebuild else {
|
||||||
|
return Err(e).context("capture frame");
|
||||||
|
};
|
||||||
|
rebuilds += 1;
|
||||||
|
if rebuilds > MAX_REBUILDS {
|
||||||
|
return Err(e).context("capture lost — rebuild attempts exhausted");
|
||||||
|
}
|
||||||
|
tracing::warn!(error = %format!("{e:#}"), rebuild = rebuilds,
|
||||||
|
"gamestream: capture lost — rebuilding source in place (following a session switch)");
|
||||||
|
let rebuild_deadline = Instant::now() + Duration::from_secs(40);
|
||||||
|
let new_cap = loop {
|
||||||
|
match rebuild() {
|
||||||
|
Ok(c) => break c,
|
||||||
|
Err(e2) => {
|
||||||
|
if !running.load(Ordering::SeqCst) || Instant::now() >= rebuild_deadline
|
||||||
|
{
|
||||||
|
return Err(e2)
|
||||||
|
.context("capture lost — no source within the rebuild budget");
|
||||||
|
}
|
||||||
|
tracing::warn!(error = %format!("{e2:#}"),
|
||||||
|
"gamestream: source not up yet — retrying");
|
||||||
|
std::thread::sleep(Duration::from_millis(500));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
*capturer = new_cap;
|
||||||
|
capturer.set_active(true);
|
||||||
|
frame = capturer.next_frame().context("first frame after rebuild")?;
|
||||||
|
// Re-open the encoder for the new source (same negotiated WxH → same SPS profile) and
|
||||||
|
// force an IDR so Moonlight resyncs on the first emitted AU.
|
||||||
|
enc = encode::open_video(
|
||||||
|
cfg.codec,
|
||||||
|
frame.format,
|
||||||
|
frame.width,
|
||||||
|
frame.height,
|
||||||
|
cfg.fps,
|
||||||
|
cfg.bitrate_kbps as u64 * 1000,
|
||||||
|
frame.is_cuda(),
|
||||||
|
8,
|
||||||
|
)
|
||||||
|
.context("reopen encoder after rebuild")?;
|
||||||
|
supports_rfi = enc.caps().supports_rfi;
|
||||||
|
enc.request_keyframe();
|
||||||
|
next_frame = Instant::now();
|
||||||
|
tracing::info!("gamestream: source rebuilt — stream continues");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
let t_cap = tick.elapsed();
|
let t_cap = tick.elapsed();
|
||||||
// Honor a client recovery request. Prefer reference-frame invalidation (the encoder
|
// Honor a client recovery request. Prefer reference-frame invalidation (the encoder
|
||||||
@@ -414,9 +604,19 @@ fn stream_body(
|
|||||||
// Hand the frame's packets to the send thread; never block here. A full queue means
|
// Hand the frame's packets to the send thread; never block here. A full queue means
|
||||||
// the sender is behind — drop this batch (FEC/RFI covers the client) and keep encoding.
|
// the sender is behind — drop this batch (FEC/RFI covers the client) and keep encoding.
|
||||||
let n = batch.len();
|
let n = batch.len();
|
||||||
|
// Goodput this window = bytes actually queued to the sender (a dropped batch never reaches
|
||||||
|
// the wire, so it's excluded). Summed only when measuring, to keep the idle path free.
|
||||||
|
let batch_bytes: u64 = if measure {
|
||||||
|
batch.iter().map(|p| p.len() as u64).sum()
|
||||||
|
} else {
|
||||||
|
0
|
||||||
|
};
|
||||||
if n > 0 {
|
if n > 0 {
|
||||||
match batch_tx.try_send(batch) {
|
match batch_tx.try_send(batch) {
|
||||||
Ok(()) => sent_batches += 1,
|
Ok(()) => {
|
||||||
|
sent_batches += 1;
|
||||||
|
bytes_win += batch_bytes;
|
||||||
|
}
|
||||||
Err(std::sync::mpsc::TrySendError::Full(_)) => {
|
Err(std::sync::mpsc::TrySendError::Full(_)) => {
|
||||||
dropped_batches += 1;
|
dropped_batches += 1;
|
||||||
if dropped_batches.is_power_of_two() {
|
if dropped_batches.is_power_of_two() {
|
||||||
@@ -428,17 +628,26 @@ fn stream_body(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if perf {
|
if measure {
|
||||||
let t_send = tick.elapsed();
|
let t_send = tick.elapsed();
|
||||||
mx_cap = mx_cap.max(t_cap.as_micros());
|
let cap_us = t_cap.as_micros();
|
||||||
mx_enc = mx_enc.max((t_enc - t_cap).as_micros());
|
let enc_us = (t_enc - t_cap).as_micros();
|
||||||
mx_pkt = mx_pkt.max((t_pkt - t_enc).as_micros());
|
let pkt_us = (t_pkt - t_enc).as_micros();
|
||||||
mx_send = mx_send.max((t_send - t_pkt).as_micros());
|
let send_us = (t_send - t_pkt).as_micros();
|
||||||
|
mx_cap = mx_cap.max(cap_us);
|
||||||
|
mx_enc = mx_enc.max(enc_us);
|
||||||
|
mx_pkt = mx_pkt.max(pkt_us);
|
||||||
|
mx_send = mx_send.max(send_us);
|
||||||
mx_pkts = mx_pkts.max(n);
|
mx_pkts = mx_pkts.max(n);
|
||||||
|
v_cap.push(cap_us as u32);
|
||||||
|
v_enc.push(enc_us as u32);
|
||||||
|
v_pkt.push(pkt_us as u32);
|
||||||
|
v_send.push(send_us as u32);
|
||||||
}
|
}
|
||||||
|
|
||||||
fps_count += 1;
|
fps_count += 1;
|
||||||
if fps_t.elapsed() >= Duration::from_secs(1) {
|
if fps_t.elapsed() >= Duration::from_secs(1) {
|
||||||
|
let secs = fps_t.elapsed().as_secs_f64();
|
||||||
if perf {
|
if perf {
|
||||||
// Max µs/stage this second: cap=drain channel, enc=submit (zero-copy device
|
// Max µs/stage this second: cap=drain channel, enc=submit (zero-copy device
|
||||||
// copy + NVENC), pkt=poll+FEC+packetize, send=paced packet send. `uniq`=new
|
// copy + NVENC), pkt=poll+FEC+packetize, send=paced packet send. `uniq`=new
|
||||||
@@ -453,12 +662,6 @@ fn stream_body(
|
|||||||
max_pkts = mx_pkts,
|
max_pkts = mx_pkts,
|
||||||
"video: streaming (perf)"
|
"video: streaming (perf)"
|
||||||
);
|
);
|
||||||
mx_cap = 0;
|
|
||||||
mx_enc = 0;
|
|
||||||
mx_pkt = 0;
|
|
||||||
mx_send = 0;
|
|
||||||
mx_pkts = 0;
|
|
||||||
uniq = 0;
|
|
||||||
} else {
|
} else {
|
||||||
tracing::info!(
|
tracing::info!(
|
||||||
fps = fps_count,
|
fps = fps_count,
|
||||||
@@ -467,6 +670,68 @@ fn stream_body(
|
|||||||
"video: streaming"
|
"video: streaming"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
// Web-console capture: build the aggregated sample. The host send side exposes no
|
||||||
|
// receiver-side packet loss / FEC-recovery / send-buffer EAGAIN counters, so those stay
|
||||||
|
// 0 (not fabricated); `frames_dropped` is the per-frame send-queue overflow delta.
|
||||||
|
if stats.is_armed() {
|
||||||
|
let session_id = *sid.get_or_insert_with(|| {
|
||||||
|
stats.register_session(
|
||||||
|
"gamestream",
|
||||||
|
cfg.width,
|
||||||
|
cfg.height,
|
||||||
|
cfg.fps,
|
||||||
|
codec_name,
|
||||||
|
client_label,
|
||||||
|
)
|
||||||
|
});
|
||||||
|
let sample = crate::stats_recorder::StatsSample {
|
||||||
|
t_ms: 0, // stamped by push_sample from the capture's monotonic start
|
||||||
|
session_id,
|
||||||
|
stages: vec![
|
||||||
|
crate::stats_recorder::StageTiming {
|
||||||
|
name: "capture".into(),
|
||||||
|
p50_us: percentile(&mut v_cap, 0.50) as f32,
|
||||||
|
p99_us: percentile(&mut v_cap, 0.99) as f32,
|
||||||
|
},
|
||||||
|
crate::stats_recorder::StageTiming {
|
||||||
|
name: "encode".into(),
|
||||||
|
p50_us: percentile(&mut v_enc, 0.50) as f32,
|
||||||
|
p99_us: percentile(&mut v_enc, 0.99) as f32,
|
||||||
|
},
|
||||||
|
crate::stats_recorder::StageTiming {
|
||||||
|
name: "packetize".into(),
|
||||||
|
p50_us: percentile(&mut v_pkt, 0.50) as f32,
|
||||||
|
p99_us: percentile(&mut v_pkt, 0.99) as f32,
|
||||||
|
},
|
||||||
|
crate::stats_recorder::StageTiming {
|
||||||
|
name: "send".into(),
|
||||||
|
p50_us: percentile(&mut v_send, 0.50) as f32,
|
||||||
|
p99_us: percentile(&mut v_send, 0.99) as f32,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
fps: (uniq as f64 / secs) as f32,
|
||||||
|
repeat_fps: (fps_count.saturating_sub(uniq) as f64 / secs) as f32,
|
||||||
|
mbps: (bytes_win as f64 * 8.0 / secs / 1_000_000.0) as f32,
|
||||||
|
bitrate_kbps: cfg.bitrate_kbps,
|
||||||
|
frames_dropped: dropped_batches.saturating_sub(last_dropped_batches) as u32,
|
||||||
|
packets_dropped: 0,
|
||||||
|
send_dropped: 0,
|
||||||
|
fec_recovered: 0,
|
||||||
|
};
|
||||||
|
stats.push_sample(session_id, sample);
|
||||||
|
}
|
||||||
|
mx_cap = 0;
|
||||||
|
mx_enc = 0;
|
||||||
|
mx_pkt = 0;
|
||||||
|
mx_send = 0;
|
||||||
|
mx_pkts = 0;
|
||||||
|
uniq = 0;
|
||||||
|
v_cap.clear();
|
||||||
|
v_enc.clear();
|
||||||
|
v_pkt.clear();
|
||||||
|
v_send.clear();
|
||||||
|
bytes_win = 0;
|
||||||
|
last_dropped_batches = dropped_batches;
|
||||||
fps_count = 0;
|
fps_count = 0;
|
||||||
fps_t = Instant::now();
|
fps_t = Instant::now();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
//! `RTP_PACKET(12, big-endian) + reserved[4] + NV_VIDEO_PACKET(16, little-endian) + payload`
|
//! `RTP_PACKET(12, big-endian) + reserved[4] + NV_VIDEO_PACKET(16, little-endian) + payload`
|
||||||
//! and the frame's bitstream is prefixed with an 8-byte `video_short_frame_header_t`, then
|
//! and the frame's bitstream is prefixed with an 8-byte `video_short_frame_header_t`, then
|
||||||
//! striped into ≤4 FEC blocks of ≤255 shards. Byte-exact spec:
|
//! striped into ≤4 FEC blocks of ≤255 shards. Byte-exact spec:
|
||||||
//! `docs/research/gamestream-protocol-research.json` (video plane).
|
//! `design/research/gamestream-protocol-research.json` (video plane).
|
||||||
//!
|
//!
|
||||||
//! FEC (P1.5): each block carries `m = ⌈k·pct/100⌉` Reed–Solomon parity shards generated by
|
//! FEC (P1.5): each block carries `m = ⌈k·pct/100⌉` Reed–Solomon parity shards generated by
|
||||||
//! `punktfunk_core::fec::Gf8Coder` (the nanors-compatible Cauchy GF(2⁸) coder). Crucially, RS runs
|
//! `punktfunk_core::fec::Gf8Coder` (the nanors-compatible Cauchy GF(2⁸) coder). Crucially, RS runs
|
||||||
|
|||||||
@@ -24,6 +24,9 @@ pub trait InputInjector {
|
|||||||
pub enum Backend {
|
pub enum Backend {
|
||||||
/// wlroots virtual pointer + keyboard Wayland protocols — the headless-Sway path.
|
/// wlroots virtual pointer + keyboard Wayland protocols — the headless-Sway path.
|
||||||
WlrVirtual,
|
WlrVirtual,
|
||||||
|
/// KWin `org_kde_kwin_fake_input` — direct injection, no RemoteDesktop portal / approval dialog
|
||||||
|
/// (authorized by the host's `.desktop`). The headless KDE-Desktop path; what krdpserver uses.
|
||||||
|
KwinFakeInput,
|
||||||
/// libei via `reis` — Wayland-native (RemoteDesktop portal). Not yet implemented.
|
/// libei via `reis` — Wayland-native (RemoteDesktop portal). Not yet implemented.
|
||||||
Libei,
|
Libei,
|
||||||
/// libei directly against gamescope's own EIS socket (no portal): input lands in the
|
/// libei directly against gamescope's own EIS socket (no portal): input lands in the
|
||||||
@@ -47,6 +50,16 @@ pub fn open(backend: Backend) -> Result<Box<dyn InputInjector>> {
|
|||||||
anyhow::bail!("wlroots virtual input requires Linux + a Wayland compositor")
|
anyhow::bail!("wlroots virtual input requires Linux + a Wayland compositor")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Backend::KwinFakeInput => {
|
||||||
|
#[cfg(target_os = "linux")]
|
||||||
|
{
|
||||||
|
Ok(Box::new(kwin_fake_input::KwinFakeInjector::open()?))
|
||||||
|
}
|
||||||
|
#[cfg(not(target_os = "linux"))]
|
||||||
|
{
|
||||||
|
anyhow::bail!("KWin fake_input requires Linux + a KWin Wayland session")
|
||||||
|
}
|
||||||
|
}
|
||||||
Backend::Libei => {
|
Backend::Libei => {
|
||||||
#[cfg(target_os = "linux")]
|
#[cfg(target_os = "linux")]
|
||||||
{
|
{
|
||||||
@@ -90,12 +103,18 @@ pub fn open(backend: Backend) -> Result<Box<dyn InputInjector>> {
|
|||||||
/// Pick the injection backend for the current session. gamescope hosts its own EIS server (no
|
/// Pick the injection backend for the current session. gamescope hosts its own EIS server (no
|
||||||
/// portal), so a gamescope session injects directly into it. wlroots/Sway only implements the
|
/// portal), so a gamescope session injects directly into it. wlroots/Sway only implements the
|
||||||
/// ScreenCast portal (no RemoteDesktop), so libei can't run there — use the wlr virtual-input
|
/// ScreenCast portal (no RemoteDesktop), so libei can't run there — use the wlr virtual-input
|
||||||
/// protocols. KWin and GNOME implement RemoteDesktop but not the wlr protocols, so use libei.
|
/// protocols. **KWin** exposes `org_kde_kwin_fake_input` (direct injection, no portal / approval
|
||||||
/// `PUNKTFUNK_INPUT_BACKEND=wlr|libei|gamescope|uinput` overrides the auto-detection.
|
/// dialog — the only headless-capable path; what krdpserver uses), so prefer it there. **GNOME**
|
||||||
|
/// has neither fake_input nor the wlr protocols, so it uses libei via the RemoteDesktop portal
|
||||||
|
/// (which needs a user to approve, or a pre-seeded grant — not truly headless).
|
||||||
|
/// `PUNKTFUNK_INPUT_BACKEND=wlr|kwin|libei|gamescope|uinput` overrides the auto-detection.
|
||||||
pub fn default_backend() -> Backend {
|
pub fn default_backend() -> Backend {
|
||||||
if let Ok(v) = std::env::var("PUNKTFUNK_INPUT_BACKEND") {
|
if let Ok(v) = std::env::var("PUNKTFUNK_INPUT_BACKEND") {
|
||||||
match v.trim().to_ascii_lowercase().as_str() {
|
match v.trim().to_ascii_lowercase().as_str() {
|
||||||
"wlr" | "wlroots" | "wlrvirtual" => return Backend::WlrVirtual,
|
"wlr" | "wlroots" | "wlrvirtual" => return Backend::WlrVirtual,
|
||||||
|
"kwin" | "fakeinput" | "fake_input" | "kwin-fake-input" => {
|
||||||
|
return Backend::KwinFakeInput
|
||||||
|
}
|
||||||
"libei" | "ei" | "portal" => return Backend::Libei,
|
"libei" | "ei" | "portal" => return Backend::Libei,
|
||||||
"gamescope" | "gamescope-ei" => return Backend::GamescopeEi,
|
"gamescope" | "gamescope-ei" => return Backend::GamescopeEi,
|
||||||
"uinput" => return Backend::Uinput,
|
"uinput" => return Backend::Uinput,
|
||||||
@@ -112,16 +131,26 @@ pub fn default_backend() -> Backend {
|
|||||||
}
|
}
|
||||||
#[cfg(not(target_os = "windows"))]
|
#[cfg(not(target_os = "windows"))]
|
||||||
{
|
{
|
||||||
if crate::config::config()
|
// An explicit compositor pick (set per connect / mid-stream) is the strongest signal.
|
||||||
.compositor
|
let compositor = crate::config::config().compositor.clone();
|
||||||
.as_deref()
|
if let Some(c) = compositor.as_deref() {
|
||||||
.is_some_and(|v| v.trim().eq_ignore_ascii_case("gamescope"))
|
let c = c.trim();
|
||||||
{
|
if c.eq_ignore_ascii_case("gamescope") {
|
||||||
return Backend::GamescopeEi;
|
return Backend::GamescopeEi;
|
||||||
|
}
|
||||||
|
if c.eq_ignore_ascii_case("kwin") {
|
||||||
|
return Backend::KwinFakeInput;
|
||||||
|
}
|
||||||
|
if c.eq_ignore_ascii_case("wlroots") || c.eq_ignore_ascii_case("sway") {
|
||||||
|
return Backend::WlrVirtual;
|
||||||
|
}
|
||||||
|
// mutter (GNOME) falls through to the XDG_CURRENT_DESKTOP check below.
|
||||||
}
|
}
|
||||||
let desktop = std::env::var("XDG_CURRENT_DESKTOP").unwrap_or_default();
|
let desktop = std::env::var("XDG_CURRENT_DESKTOP").unwrap_or_default();
|
||||||
let d = desktop.to_ascii_uppercase();
|
let d = desktop.to_ascii_uppercase();
|
||||||
if d.contains("KDE") || d.contains("GNOME") {
|
if d.contains("KDE") {
|
||||||
|
Backend::KwinFakeInput
|
||||||
|
} else if d.contains("GNOME") {
|
||||||
Backend::Libei
|
Backend::Libei
|
||||||
} else {
|
} else {
|
||||||
Backend::WlrVirtual
|
Backend::WlrVirtual
|
||||||
@@ -478,6 +507,9 @@ pub mod gamepad {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#[cfg(target_os = "linux")]
|
#[cfg(target_os = "linux")]
|
||||||
|
#[path = "inject/linux/kwin_fake_input.rs"]
|
||||||
|
mod kwin_fake_input;
|
||||||
|
#[cfg(target_os = "linux")]
|
||||||
#[path = "inject/linux/libei.rs"]
|
#[path = "inject/linux/libei.rs"]
|
||||||
mod libei;
|
mod libei;
|
||||||
#[cfg(target_os = "windows")]
|
#[cfg(target_os = "windows")]
|
||||||
|
|||||||
@@ -15,6 +15,9 @@
|
|||||||
//! `<linux/uinput.h>` on x86_64. `/dev/uinput` needs a udev rule + `input` group membership
|
//! `<linux/uinput.h>` on x86_64. `/dev/uinput` needs a udev rule + `input` group membership
|
||||||
//! (see `scripts/60-punktfunk.rules`); creation fails with a clear error otherwise.
|
//! (see `scripts/60-punktfunk.rules`); creation fails with a clear error otherwise.
|
||||||
|
|
||||||
|
// Every `unsafe` block in this file carries a `// SAFETY:` proof; enforce it (unsafe-proof program).
|
||||||
|
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||||
|
|
||||||
use crate::gamestream::gamepad::{self, GamepadFrame, MAX_PADS};
|
use crate::gamestream::gamepad::{self, GamepadFrame, MAX_PADS};
|
||||||
use anyhow::{bail, Result};
|
use anyhow::{bail, Result};
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
@@ -215,6 +218,11 @@ const _: () = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
fn ioctl_int(fd: i32, req: libc::c_ulong, arg: libc::c_int, what: &str) -> Result<()> {
|
fn ioctl_int(fd: i32, req: libc::c_ulong, arg: libc::c_int, what: &str) -> Result<()> {
|
||||||
|
// SAFETY: every caller passes one of UI_SET_EVBIT/KEYBIT/FFBIT/UI_DEV_CREATE/UI_DEV_DESTROY as
|
||||||
|
// `req` — all integer-argument ioctls whose third arg the kernel takes BY VALUE, so nothing is
|
||||||
|
// dereferenced through `arg` and no memory must outlive the call. The only precondition is `fd`
|
||||||
|
// being a valid open descriptor; callers pass the live `/dev/uinput` fd, and even a stale fd
|
||||||
|
// would merely return -1/EBADF (reported below), never UB.
|
||||||
if unsafe { libc::ioctl(fd, req, arg) } < 0 {
|
if unsafe { libc::ioctl(fd, req, arg) } < 0 {
|
||||||
bail!("{what}: {}", std::io::Error::last_os_error());
|
bail!("{what}: {}", std::io::Error::last_os_error());
|
||||||
}
|
}
|
||||||
@@ -222,6 +230,12 @@ fn ioctl_int(fd: i32, req: libc::c_ulong, arg: libc::c_int, what: &str) -> Resul
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn ioctl_ptr<T>(fd: i32, req: libc::c_ulong, arg: *mut T, what: &str) -> Result<()> {
|
fn ioctl_ptr<T>(fd: i32, req: libc::c_ulong, arg: *mut T, what: &str) -> Result<()> {
|
||||||
|
// SAFETY: `fd` is the caller's live `/dev/uinput` fd. Every call site passes `&mut x` for a live,
|
||||||
|
// uniquely-borrowed `#[repr(C)]` `x: T` whose size matches the struct the request number encodes
|
||||||
|
// (UI_DEV_SETUP=0x405c_5503 → 0x5c=92=size_of::<UinputSetup>(); UI_ABS_SETUP → 0x1c=28; the FF
|
||||||
|
// upload/erase ioctls → 0x68/0x0c — all pinned by the `size_of` asserts above). The kernel copies
|
||||||
|
// exactly that many bytes in/out through `arg`; the `&mut` keeps the pointee alive and unaliased
|
||||||
|
// for the whole synchronous call.
|
||||||
if unsafe { libc::ioctl(fd, req, arg) } < 0 {
|
if unsafe { libc::ioctl(fd, req, arg) } < 0 {
|
||||||
bail!("{what}: {}", std::io::Error::last_os_error());
|
bail!("{what}: {}", std::io::Error::last_os_error());
|
||||||
}
|
}
|
||||||
@@ -251,6 +265,9 @@ pub struct VirtualPad {
|
|||||||
impl VirtualPad {
|
impl VirtualPad {
|
||||||
pub fn create(index: usize, identity: PadIdentity) -> Result<VirtualPad> {
|
pub fn create(index: usize, identity: PadIdentity) -> Result<VirtualPad> {
|
||||||
use std::os::fd::FromRawFd;
|
use std::os::fd::FromRawFd;
|
||||||
|
// SAFETY: `c"/dev/uinput"` is a 'static NUL-terminated C string literal; `as_ptr()` yields a
|
||||||
|
// valid pointer the kernel only reads as a filesystem path. `open` returns a fresh fd (or -1)
|
||||||
|
// and retains nothing; no Rust memory is aliased or handed to the kernel beyond that 'static path.
|
||||||
let raw = unsafe {
|
let raw = unsafe {
|
||||||
libc::open(
|
libc::open(
|
||||||
c"/dev/uinput".as_ptr(),
|
c"/dev/uinput".as_ptr(),
|
||||||
@@ -264,6 +281,9 @@ impl VirtualPad {
|
|||||||
std::io::Error::last_os_error()
|
std::io::Error::last_os_error()
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
// SAFETY: `raw >= 0` here (the `< 0` branch above already bailed), so it is a freshly-opened fd
|
||||||
|
// from `libc::open` that is not stored or owned anywhere else. Transferring it to `OwnedFd` makes
|
||||||
|
// this the unique owner, which will `close` it exactly once on drop (no double-close, no leak).
|
||||||
let fd = unsafe { OwnedFd::from_raw_fd(raw) };
|
let fd = unsafe { OwnedFd::from_raw_fd(raw) };
|
||||||
|
|
||||||
ioctl_int(raw, UI_SET_EVBIT, EV_KEY as i32, "UI_SET_EVBIT(EV_KEY)")?;
|
ioctl_int(raw, UI_SET_EVBIT, EV_KEY as i32, "UI_SET_EVBIT(EV_KEY)")?;
|
||||||
@@ -356,6 +376,11 @@ impl VirtualPad {
|
|||||||
code,
|
code,
|
||||||
value,
|
value,
|
||||||
};
|
};
|
||||||
|
// SAFETY: `ev` is a live local `#[repr(C)]` struct of all-integer fields with no padding bytes
|
||||||
|
// (timeval=16 + u16 + u16 + i32 = 24, the size asserted above), so every byte is initialized and
|
||||||
|
// valid to read as `u8`. The pointer is non-null and `u8`-aligned (align 1), the length is exactly
|
||||||
|
// `size_of::<InputEventRaw>()` so the slice spans precisely `ev`'s bytes (in bounds), and `ev`
|
||||||
|
// outlives `bytes` (used immediately below) with no concurrent mutation (single-threaded local).
|
||||||
let bytes = unsafe {
|
let bytes = unsafe {
|
||||||
std::slice::from_raw_parts(
|
std::slice::from_raw_parts(
|
||||||
&ev as *const _ as *const u8,
|
&ev as *const _ as *const u8,
|
||||||
@@ -363,6 +388,10 @@ impl VirtualPad {
|
|||||||
)
|
)
|
||||||
};
|
};
|
||||||
// Best-effort: a full kernel queue drops the event; the next frame re-syncs state.
|
// Best-effort: a full kernel queue drops the event; the next frame re-syncs state.
|
||||||
|
// SAFETY: `self.fd` is the live uinput `OwnedFd` (borrowed via `as_raw_fd`, so it stays open for
|
||||||
|
// the call); `bytes` is the slice above backed by the still-live local `ev`. `write` only READS
|
||||||
|
// exactly `bytes.len()` bytes from `bytes.as_ptr()` (in bounds) and retains nothing past return,
|
||||||
|
// so the buffer outlives the synchronous call and the read-only access cannot race or alias.
|
||||||
let _ = unsafe {
|
let _ = unsafe {
|
||||||
libc::write(
|
libc::write(
|
||||||
self.fd.as_raw_fd(),
|
self.fd.as_raw_fd(),
|
||||||
@@ -404,6 +433,10 @@ impl VirtualPad {
|
|||||||
let raw = self.fd.as_raw_fd();
|
let raw = self.fd.as_raw_fd();
|
||||||
let mut buf = [0u8; std::mem::size_of::<InputEventRaw>()];
|
let mut buf = [0u8; std::mem::size_of::<InputEventRaw>()];
|
||||||
loop {
|
loop {
|
||||||
|
// SAFETY: `raw` is the live raw fd of `self.fd` (the non-blocking uinput device). `buf` is a
|
||||||
|
// live local `[u8; size_of::<InputEventRaw>()]`; `buf.as_mut_ptr()` is a valid writable pointer
|
||||||
|
// to its `buf.len()` bytes. `read` writes AT MOST `buf.len()` bytes (in bounds), the buffer
|
||||||
|
// outlives this synchronous call, and `buf` is borrowed uniquely here (no alias/race).
|
||||||
let n = unsafe { libc::read(raw, buf.as_mut_ptr() as *mut libc::c_void, buf.len()) };
|
let n = unsafe { libc::read(raw, buf.as_mut_ptr() as *mut libc::c_void, buf.len()) };
|
||||||
if n != buf.len() as isize {
|
if n != buf.len() as isize {
|
||||||
break; // EAGAIN / short read — queue drained
|
break; // EAGAIN / short read — queue drained
|
||||||
@@ -415,6 +448,10 @@ impl VirtualPad {
|
|||||||
unsafe { std::ptr::read_unaligned(buf.as_ptr() as *const InputEventRaw) };
|
unsafe { std::ptr::read_unaligned(buf.as_ptr() as *const InputEventRaw) };
|
||||||
match (ev.type_, ev.code) {
|
match (ev.type_, ev.code) {
|
||||||
(EV_UINPUT, UI_FF_UPLOAD) => {
|
(EV_UINPUT, UI_FF_UPLOAD) => {
|
||||||
|
// SAFETY: `UinputFfUpload` is `#[repr(C)]` over integers (`u32`, `i32`) and two
|
||||||
|
// `FfEffect`s (integers + `[u8; 32]`); all-zero is a valid bit pattern for every field
|
||||||
|
// (no bool/NonZero/enum/reference niche), so `zeroed` yields a fully-initialized valid
|
||||||
|
// value — `request_id` is then set below and the rest filled by UI_BEGIN_FF_UPLOAD.
|
||||||
let mut up: UinputFfUpload = unsafe { std::mem::zeroed() };
|
let mut up: UinputFfUpload = unsafe { std::mem::zeroed() };
|
||||||
up.request_id = ev.value as u32;
|
up.request_id = ev.value as u32;
|
||||||
if ioctl_ptr(raw, UI_BEGIN_FF_UPLOAD, &mut up, "UI_BEGIN_FF_UPLOAD").is_ok() {
|
if ioctl_ptr(raw, UI_BEGIN_FF_UPLOAD, &mut up, "UI_BEGIN_FF_UPLOAD").is_ok() {
|
||||||
@@ -442,6 +479,9 @@ impl VirtualPad {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
(EV_UINPUT, UI_FF_ERASE) => {
|
(EV_UINPUT, UI_FF_ERASE) => {
|
||||||
|
// SAFETY: `UinputFfErase` is `#[repr(C)]` over three integer fields (`u32`, `i32`,
|
||||||
|
// `u32`); all-zero is a valid bit pattern for each, so `zeroed` produces a fully-valid
|
||||||
|
// initialized value — `request_id` is set below and `effect_id` filled by the ioctl.
|
||||||
let mut er: UinputFfErase = unsafe { std::mem::zeroed() };
|
let mut er: UinputFfErase = unsafe { std::mem::zeroed() };
|
||||||
er.request_id = ev.value as u32;
|
er.request_id = ev.value as u32;
|
||||||
if ioctl_ptr(raw, UI_BEGIN_FF_ERASE, &mut er, "UI_BEGIN_FF_ERASE").is_ok() {
|
if ioctl_ptr(raw, UI_BEGIN_FF_ERASE, &mut er, "UI_BEGIN_FF_ERASE").is_ok() {
|
||||||
@@ -492,6 +532,9 @@ impl VirtualPad {
|
|||||||
|
|
||||||
impl Drop for VirtualPad {
|
impl Drop for VirtualPad {
|
||||||
fn drop(&mut self) {
|
fn drop(&mut self) {
|
||||||
|
// SAFETY: `self.fd` is still the live owned uinput fd here (the `OwnedFd` field is closed only
|
||||||
|
// AFTER this `drop` body returns), borrowed by `as_raw_fd`. UI_DEV_DESTROY takes its argument
|
||||||
|
// (0) BY VALUE, so nothing is dereferenced or aliased; the ioctl just tears down the device.
|
||||||
let _ = unsafe { libc::ioctl(self.fd.as_raw_fd(), UI_DEV_DESTROY, 0) };
|
let _ = unsafe { libc::ioctl(self.fd.as_raw_fd(), UI_DEV_DESTROY, 0) };
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,209 @@
|
|||||||
|
//! Headless input injection on KWin via the privileged `org_kde_kwin_fake_input` protocol — the
|
||||||
|
//! exact path KDE's own headless RDP server (`krdpserver`) uses. KWin advertises this restricted
|
||||||
|
//! global only to a client authorized through its installed `.desktop` `X-KDE-Wayland-Interfaces`
|
||||||
|
//! (we ship `io.unom.Punktfunk.Host.desktop`, which lists `org_kde_kwin_fake_input` alongside
|
||||||
|
//! `zkde_screencast_unstable_v1`). Binding the global IS the authorization, so injection needs **no
|
||||||
|
//! RemoteDesktop portal and no "Allow remote control?" dialog** — it works with no user present,
|
||||||
|
//! which the libei/portal path cannot. We connect as an ordinary Wayland client on the KWin session's
|
||||||
|
//! `$WAYLAND_DISPLAY` and translate events into fake-input requests; keyboard keys are raw Linux
|
||||||
|
//! evdev codes that KWin resolves through the session's own keymap (no keymap upload, unlike the wlr
|
||||||
|
//! virtual-keyboard path), and absolute pointer/touch coordinates are global compositor space — which
|
||||||
|
//! on a headless box (single per-session virtual output at the origin, scale 1) equals the streamed
|
||||||
|
//! output's pixels.
|
||||||
|
|
||||||
|
#![allow(clippy::all, dead_code, non_camel_case_types, non_snake_case, unused)]
|
||||||
|
// Every `unsafe` block in this file carries a `// SAFETY:` proof; enforce it (unsafe-proof program).
|
||||||
|
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||||
|
|
||||||
|
use super::{gs_button_to_evdev, vk_to_evdev, InputEvent, InputInjector};
|
||||||
|
use anyhow::{Context, Result};
|
||||||
|
use punktfunk_core::input::InputKind;
|
||||||
|
use wayland_client::protocol::wl_registry::{self, WlRegistry};
|
||||||
|
use wayland_client::{Connection, Dispatch, EventQueue, Proxy, QueueHandle};
|
||||||
|
|
||||||
|
// Generate the client bindings for the vendored protocol XML inline (no build.rs), exactly like the
|
||||||
|
// KWin virtual-output backend. Path is relative to CARGO_MANIFEST_DIR.
|
||||||
|
#[allow(clippy::all, dead_code, non_camel_case_types, non_snake_case, unused)]
|
||||||
|
pub mod fake {
|
||||||
|
use wayland_client;
|
||||||
|
use wayland_client::protocol::*;
|
||||||
|
|
||||||
|
pub mod __interfaces {
|
||||||
|
use wayland_client::protocol::__interfaces::*;
|
||||||
|
wayland_scanner::generate_interfaces!("protocols/fake-input.xml");
|
||||||
|
}
|
||||||
|
use self::__interfaces::*;
|
||||||
|
|
||||||
|
wayland_scanner::generate_client_code!("protocols/fake-input.xml");
|
||||||
|
}
|
||||||
|
|
||||||
|
use fake::org_kde_kwin_fake_input::OrgKdeKwinFakeInput as FakeInput;
|
||||||
|
|
||||||
|
/// Highest interface version we drive. `keyboard_key` arrived at v4; KWin advertises ≥4.
|
||||||
|
const MAX_VERSION: u32 = 4;
|
||||||
|
|
||||||
|
/// `wl_pointer.axis` values used by `axis`.
|
||||||
|
const AXIS_VERTICAL: u32 = 0;
|
||||||
|
const AXIS_HORIZONTAL: u32 = 1;
|
||||||
|
/// `code` value marking a horizontal scroll event (mirrors `gamestream::input` / the wlr backend).
|
||||||
|
const SCROLL_HORIZONTAL: u32 = 1;
|
||||||
|
|
||||||
|
/// Registry-bound globals (the Wayland dispatch state).
|
||||||
|
#[derive(Default)]
|
||||||
|
struct State {
|
||||||
|
fake: Option<FakeInput>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Dispatch<WlRegistry, ()> for State {
|
||||||
|
fn event(
|
||||||
|
state: &mut Self,
|
||||||
|
registry: &WlRegistry,
|
||||||
|
event: wl_registry::Event,
|
||||||
|
_: &(),
|
||||||
|
_: &Connection,
|
||||||
|
qh: &QueueHandle<Self>,
|
||||||
|
) {
|
||||||
|
if let wl_registry::Event::Global {
|
||||||
|
name,
|
||||||
|
interface,
|
||||||
|
version,
|
||||||
|
} = event
|
||||||
|
{
|
||||||
|
if interface == "org_kde_kwin_fake_input" {
|
||||||
|
state.fake = Some(registry.bind(name, version.min(MAX_VERSION), qh, ()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// fake_input emits no events.
|
||||||
|
impl Dispatch<FakeInput, ()> for State {
|
||||||
|
fn event(
|
||||||
|
_: &mut Self,
|
||||||
|
_: &FakeInput,
|
||||||
|
_: <FakeInput as Proxy>::Event,
|
||||||
|
_: &(),
|
||||||
|
_: &Connection,
|
||||||
|
_: &QueueHandle<Self>,
|
||||||
|
) {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct KwinFakeInjector {
|
||||||
|
conn: Connection,
|
||||||
|
queue: EventQueue<State>,
|
||||||
|
state: State,
|
||||||
|
fake: FakeInput,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl KwinFakeInjector {
|
||||||
|
pub fn open() -> Result<Self> {
|
||||||
|
let conn = Connection::connect_to_env()
|
||||||
|
.context("connect to KWin Wayland (is WAYLAND_DISPLAY set to the KWin socket?)")?;
|
||||||
|
let mut queue = conn.new_event_queue();
|
||||||
|
let qh = queue.handle();
|
||||||
|
let _registry = conn.display().get_registry(&qh, ());
|
||||||
|
let mut state = State::default();
|
||||||
|
queue
|
||||||
|
.roundtrip(&mut state)
|
||||||
|
.context("Wayland registry roundtrip")?;
|
||||||
|
|
||||||
|
let fake = state.fake.clone().context(
|
||||||
|
"KWin does not expose org_kde_kwin_fake_input to this client — install the host's \
|
||||||
|
.desktop (io.unom.Punktfunk.Host.desktop, X-KDE-Wayland-Interfaces) and re-login so \
|
||||||
|
KWin authorizes it (the grant is cached per-exe on first connect), or this is not a \
|
||||||
|
KWin session",
|
||||||
|
)?;
|
||||||
|
// Authenticate (the legacy handshake; for an interface-authorized client KWin accepts it
|
||||||
|
// without a dialog — same as krdpserver/krfb headless).
|
||||||
|
fake.authenticate("punktfunk".into(), "remote streaming input".into());
|
||||||
|
queue
|
||||||
|
.roundtrip(&mut state)
|
||||||
|
.context("fake_input authenticate roundtrip")?;
|
||||||
|
conn.flush().ok();
|
||||||
|
|
||||||
|
tracing::info!("KWin fake_input ready (headless keyboard/mouse/touch — no portal)");
|
||||||
|
Ok(Self {
|
||||||
|
conn,
|
||||||
|
queue,
|
||||||
|
state,
|
||||||
|
fake,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl InputInjector for KwinFakeInjector {
|
||||||
|
fn inject(&mut self, event: &InputEvent) -> Result<()> {
|
||||||
|
match event.kind {
|
||||||
|
InputKind::MouseMove => {
|
||||||
|
self.fake.pointer_motion(event.x as f64, event.y as f64);
|
||||||
|
}
|
||||||
|
InputKind::MouseMoveAbs => {
|
||||||
|
let w = (event.flags >> 16) & 0xffff;
|
||||||
|
let h = event.flags & 0xffff;
|
||||||
|
if w > 0 && h > 0 {
|
||||||
|
let x = event.x.clamp(0, w as i32) as f64;
|
||||||
|
let y = event.y.clamp(0, h as i32) as f64;
|
||||||
|
self.fake.pointer_motion_absolute(x, y);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
InputKind::MouseButtonDown | InputKind::MouseButtonUp => {
|
||||||
|
if let Some(btn) = gs_button_to_evdev(event.code) {
|
||||||
|
let st = u32::from(event.kind == InputKind::MouseButtonDown);
|
||||||
|
self.fake.button(btn, st);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
InputKind::MouseScroll => {
|
||||||
|
// GameStream sends WHEEL_DELTA(120)-scaled units; a notch ≈ 15px. Vertical flips
|
||||||
|
// sign on the Wayland axis, horizontal passes through — same as the wlr backend.
|
||||||
|
let horizontal = event.code == SCROLL_HORIZONTAL;
|
||||||
|
let axis = if horizontal {
|
||||||
|
AXIS_HORIZONTAL
|
||||||
|
} else {
|
||||||
|
AXIS_VERTICAL
|
||||||
|
};
|
||||||
|
let notches = event.x as f64 / 120.0;
|
||||||
|
let sign = if horizontal { 1.0 } else { -1.0 };
|
||||||
|
self.fake.axis(axis, sign * notches * 15.0);
|
||||||
|
}
|
||||||
|
InputKind::KeyDown | InputKind::KeyUp => {
|
||||||
|
// Raw evdev keycode; KWin resolves it through the session's own keymap (and tracks
|
||||||
|
// modifier state itself, so no separate modifiers request is needed).
|
||||||
|
if let Some(evdev) = vk_to_evdev(event.code as u8) {
|
||||||
|
let st = u32::from(event.kind == InputKind::KeyDown);
|
||||||
|
self.fake.keyboard_key(evdev as u32, st);
|
||||||
|
} else {
|
||||||
|
tracing::debug!(vk = event.code, "unmapped VK keycode — dropped");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Touch: id = event.code, coords in the client surface w×h packed into flags (same
|
||||||
|
// absolute mapping as MouseMoveAbs). Each event is its own frame.
|
||||||
|
InputKind::TouchDown | InputKind::TouchMove => {
|
||||||
|
let w = (event.flags >> 16) & 0xffff;
|
||||||
|
let h = event.flags & 0xffff;
|
||||||
|
if w > 0 && h > 0 {
|
||||||
|
let x = event.x.clamp(0, w as i32) as f64;
|
||||||
|
let y = event.y.clamp(0, h as i32) as f64;
|
||||||
|
if event.kind == InputKind::TouchDown {
|
||||||
|
self.fake.touch_down(event.code, x, y);
|
||||||
|
} else {
|
||||||
|
self.fake.touch_motion(event.code, x, y);
|
||||||
|
}
|
||||||
|
self.fake.touch_frame();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
InputKind::TouchUp => {
|
||||||
|
self.fake.touch_up(event.code);
|
||||||
|
self.fake.touch_frame();
|
||||||
|
}
|
||||||
|
// Gamepads are injected through uinput, not the compositor.
|
||||||
|
InputKind::GamepadButton | InputKind::GamepadAxis => {}
|
||||||
|
}
|
||||||
|
// Surface protocol errors / disconnects, then push the batch to the compositor.
|
||||||
|
self.queue
|
||||||
|
.dispatch_pending(&mut self.state)
|
||||||
|
.context("wayland dispatch")?;
|
||||||
|
self.conn.flush().context("wayland flush")?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -5,6 +5,9 @@
|
|||||||
//! keymap, and translate events into virtual pointer/keyboard requests, tracking modifier state
|
//! keymap, and translate events into virtual pointer/keyboard requests, tracking modifier state
|
||||||
//! so the compositor resolves shifted keysyms correctly.
|
//! so the compositor resolves shifted keysyms correctly.
|
||||||
|
|
||||||
|
// Every `unsafe` block in this file carries a `// SAFETY:` proof; enforce it (unsafe-proof program).
|
||||||
|
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||||
|
|
||||||
use super::{gs_button_to_evdev, vk_to_evdev, InputEvent, InputInjector};
|
use super::{gs_button_to_evdev, vk_to_evdev, InputEvent, InputInjector};
|
||||||
use anyhow::{bail, Context, Result};
|
use anyhow::{bail, Context, Result};
|
||||||
use punktfunk_core::input::InputKind;
|
use punktfunk_core::input::InputKind;
|
||||||
@@ -264,10 +267,17 @@ impl InputInjector for WlrootsInjector {
|
|||||||
/// Create an anonymous in-memory file holding `s` + a trailing NUL (for the keymap fd).
|
/// Create an anonymous in-memory file holding `s` + a trailing NUL (for the keymap fd).
|
||||||
fn memfd_with(s: &str) -> Result<std::fs::File> {
|
fn memfd_with(s: &str) -> Result<std::fs::File> {
|
||||||
let name = b"punktfunk-keymap\0";
|
let name = b"punktfunk-keymap\0";
|
||||||
|
// SAFETY: `name` is a byte-string literal with an explicit trailing NUL, so `name.as_ptr()` is a
|
||||||
|
// valid NUL-terminated C string; `memfd_create` only reads that name (copying it) and creates an
|
||||||
|
// anonymous file, returning a fresh fd (or -1). `MFD_CLOEXEC` is a valid flag. The 'static literal
|
||||||
|
// outlives the synchronous call and nothing aliases it. The result is checked `< 0` below.
|
||||||
let fd = unsafe { libc::memfd_create(name.as_ptr() as *const libc::c_char, libc::MFD_CLOEXEC) };
|
let fd = unsafe { libc::memfd_create(name.as_ptr() as *const libc::c_char, libc::MFD_CLOEXEC) };
|
||||||
if fd < 0 {
|
if fd < 0 {
|
||||||
bail!("memfd_create failed: {}", std::io::Error::last_os_error());
|
bail!("memfd_create failed: {}", std::io::Error::last_os_error());
|
||||||
}
|
}
|
||||||
|
// SAFETY: `fd` is the fresh memfd `memfd_create` just returned and checked `>= 0`; it is a unique
|
||||||
|
// open fd nothing else owns, so `File` takes sole ownership and closes it exactly once on drop —
|
||||||
|
// no alias, no double-close.
|
||||||
let mut f = unsafe { std::fs::File::from_raw_fd(fd) };
|
let mut f = unsafe { std::fs::File::from_raw_fd(fd) };
|
||||||
f.write_all(s.as_bytes()).context("write keymap")?;
|
f.write_all(s.as_bytes()).context("write keymap")?;
|
||||||
f.write_all(&[0]).context("write keymap NUL")?;
|
f.write_all(&[0]).context("write keymap NUL")?;
|
||||||
|
|||||||
@@ -41,7 +41,8 @@ pub(super) const SHM_MAGIC: u32 = pf_driver_proto::gamepad::PAD_MAGIC; // "PFDS"
|
|||||||
pub(super) const OFF_INPUT: usize = core::mem::offset_of!(pf_driver_proto::gamepad::PadShm, input);
|
pub(super) const OFF_INPUT: usize = core::mem::offset_of!(pf_driver_proto::gamepad::PadShm, input);
|
||||||
pub(super) const OFF_OUT_SEQ: usize =
|
pub(super) const OFF_OUT_SEQ: usize =
|
||||||
core::mem::offset_of!(pf_driver_proto::gamepad::PadShm, out_seq);
|
core::mem::offset_of!(pf_driver_proto::gamepad::PadShm, out_seq);
|
||||||
pub(super) const OFF_OUTPUT: usize = core::mem::offset_of!(pf_driver_proto::gamepad::PadShm, output);
|
pub(super) const OFF_OUTPUT: usize =
|
||||||
|
core::mem::offset_of!(pf_driver_proto::gamepad::PadShm, output);
|
||||||
/// Device-type selector the driver reads to choose which HID identity/descriptor it serves: 0 =
|
/// Device-type selector the driver reads to choose which HID identity/descriptor it serves: 0 =
|
||||||
/// DualSense (the default — the section is zeroed), 1 = DualShock 4.
|
/// DualSense (the default — the section is zeroed), 1 = DualShock 4.
|
||||||
pub(super) const OFF_DEVTYPE: usize =
|
pub(super) const OFF_DEVTYPE: usize =
|
||||||
@@ -108,7 +109,7 @@ pub(super) struct SwDeviceProfile<'a> {
|
|||||||
/// `profile.instance`). The returned `HSWDEVICE` owns it — `SwDeviceClose` removes it on drop, so the
|
/// `profile.instance`). The returned `HSWDEVICE` owns it — `SwDeviceClose` removes it on drop, so the
|
||||||
/// pad appears/disappears with the session and nothing persists.
|
/// pad appears/disappears with the session and nothing persists.
|
||||||
///
|
///
|
||||||
/// **Game-detection identity** (see `docs/windows-dualsense-game-detection.md`). `HIDD_ATTRIBUTES`
|
/// **Game-detection identity** (see `design/windows-dualsense-game-detection.md`). `HIDD_ATTRIBUTES`
|
||||||
/// alone (VID/PID via the IOCTL) satisfies SDL/HIDAPI/RawInput, but a native PS5 path (libScePad-
|
/// alone (VID/PID via the IOCTL) satisfies SDL/HIDAPI/RawInput, but a native PS5 path (libScePad-
|
||||||
/// style raw HID) classifies the *connection type* by walking from the HID child to its parent
|
/// style raw HID) classifies the *connection type* by walking from the HID child to its parent
|
||||||
/// (`CM_Get_Parent`) and string-matching `"USB"`/`"BTHENUM"` in that parent's
|
/// (`CM_Get_Parent`) and string-matching `"USB"`/`"BTHENUM"` in that parent's
|
||||||
|
|||||||
@@ -187,8 +187,10 @@ impl XusbWinPad {
|
|||||||
#[allow(clippy::too_many_arguments)]
|
#[allow(clippy::too_many_arguments)]
|
||||||
fn write_state(&mut self, buttons: u16, lt: u8, rt: u8, lx: i16, ly: i16, rx: i16, ry: i16) {
|
fn write_state(&mut self, buttons: u16, lt: u8, rt: u8, lx: i16, ly: i16, rx: i16, ry: i16) {
|
||||||
self.packet = self.packet.wrapping_add(1);
|
self.packet = self.packet.wrapping_add(1);
|
||||||
// SAFETY: base points at SHM_SIZE bytes; all offsets are in range.
|
|
||||||
let base = self.shm.base();
|
let base = self.shm.base();
|
||||||
|
// SAFETY: `base` is the start of the mapped section (`SHM_SIZE` bytes, owned by `Shm`); every
|
||||||
|
// `OFF_*` is a fixed in-range offset into it and `write_unaligned` handles the unaligned field
|
||||||
|
// writes. Single owner (`&mut self`), so no concurrent writer races these stores.
|
||||||
unsafe {
|
unsafe {
|
||||||
std::ptr::write_unaligned(base.add(OFF_BUTTONS) as *mut u16, buttons);
|
std::ptr::write_unaligned(base.add(OFF_BUTTONS) as *mut u16, buttons);
|
||||||
*base.add(OFF_LT) = lt;
|
*base.add(OFF_LT) = lt;
|
||||||
|
|||||||
@@ -5,6 +5,9 @@
|
|||||||
//! thread stays bound to its desktop and only reattaches (`OpenInputDesktop`/`SetThreadDesktop`) when
|
//! thread stays bound to its desktop and only reattaches (`OpenInputDesktop`/`SetThreadDesktop`) when
|
||||||
//! `SendInput` reports a short write (the input desktop switched) — no per-event reattach overhead.
|
//! `SendInput` reports a short write (the input desktop switched) — no per-event reattach overhead.
|
||||||
|
|
||||||
|
// Every `unsafe` block in this file carries a `// SAFETY:` proof; enforce it.
|
||||||
|
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||||
|
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use punktfunk_core::input::{InputEvent, InputKind};
|
use punktfunk_core::input::{InputEvent, InputKind};
|
||||||
use std::mem::size_of;
|
use std::mem::size_of;
|
||||||
@@ -35,7 +38,12 @@ pub struct SendInputInjector {
|
|||||||
desktop: Option<HDESK>,
|
desktop: Option<HDESK>,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Only ever used from the host's single injector thread.
|
// SAFETY: `SendInputInjector` holds only an `Option<HDESK>` (a desktop handle). The host creates
|
||||||
|
// and drives it from a single dedicated injector thread; the handle is opened, rebound, and closed
|
||||||
|
// on whichever thread owns the value, and the type is not `Sync`, so there is never concurrent
|
||||||
|
// access. A desktop `HDESK` is not thread-affine for ownership (`CloseDesktop` works from any
|
||||||
|
// thread; `SetThreadDesktop` rebinds the current thread), so transferring ownership via `Send` is
|
||||||
|
// sound.
|
||||||
unsafe impl Send for SendInputInjector {}
|
unsafe impl Send for SendInputInjector {}
|
||||||
|
|
||||||
impl SendInputInjector {
|
impl SendInputInjector {
|
||||||
@@ -49,6 +57,12 @@ impl SendInputInjector {
|
|||||||
/// Bind this thread to the desktop currently receiving input. UAC / lock screen / Ctrl-Alt-Del
|
/// Bind this thread to the desktop currently receiving input. UAC / lock screen / Ctrl-Alt-Del
|
||||||
/// swap the input desktop; `SendInput` silently no-ops unless our thread is on it.
|
/// swap the input desktop; `SendInput` silently no-ops unless our thread is on it.
|
||||||
fn reattach_input_desktop(&mut self) {
|
fn reattach_input_desktop(&mut self) {
|
||||||
|
// SAFETY: `OpenInputDesktop`/`SetThreadDesktop`/`CloseDesktop` are FFI calls passed only
|
||||||
|
// by-value args (constant desktop flags, a `bool`, an access mask). `OpenInputDesktop`
|
||||||
|
// yields an owned `HDESK` only on `Ok`; we then either install it with `SetThreadDesktop`
|
||||||
|
// (closing the previously-owned handle exactly once) or close the fresh handle on failure —
|
||||||
|
// so every handle is closed exactly once and none is used after close. `SetThreadDesktop`
|
||||||
|
// only rebinds this calling thread, which is where the injector runs.
|
||||||
unsafe {
|
unsafe {
|
||||||
match OpenInputDesktop(
|
match OpenInputDesktop(
|
||||||
DESKTOP_CONTROL_FLAGS(0),
|
DESKTOP_CONTROL_FLAGS(0),
|
||||||
@@ -75,12 +89,17 @@ impl SendInputInjector {
|
|||||||
/// switched out from under us, e.g. into UAC/lock) do we reattach to the now-current input desktop
|
/// switched out from under us, e.g. into UAC/lock) do we reattach to the now-current input desktop
|
||||||
/// and retry once. This serves both the normal and secure desktops with no steady-state overhead.
|
/// and retry once. This serves both the normal and secure desktops with no steady-state overhead.
|
||||||
fn send(&mut self, inputs: &[INPUT]) -> Result<()> {
|
fn send(&mut self, inputs: &[INPUT]) -> Result<()> {
|
||||||
|
// SAFETY: `inputs` is a live `&[INPUT]` slice that outlives this synchronous `SendInput`
|
||||||
|
// call; `size_of::<INPUT>()` is the exact per-element stride Win32 requires as `cbSize`. The
|
||||||
|
// call only reads the array (one event per element) and returns the count injected.
|
||||||
let n = unsafe { SendInput(inputs, size_of::<INPUT>() as i32) };
|
let n = unsafe { SendInput(inputs, size_of::<INPUT>() as i32) };
|
||||||
if n as usize == inputs.len() {
|
if n as usize == inputs.len() {
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
// Short write → the input desktop likely changed. Reattach + retry once.
|
// Short write → the input desktop likely changed. Reattach + retry once.
|
||||||
self.reattach_input_desktop();
|
self.reattach_input_desktop();
|
||||||
|
// SAFETY: same as the first `SendInput` — `inputs` is the identical live slice outliving the
|
||||||
|
// call and `cbSize == size_of::<INPUT>()`; only re-issued after reattaching the input desktop.
|
||||||
let n = unsafe { SendInput(inputs, size_of::<INPUT>() as i32) };
|
let n = unsafe { SendInput(inputs, size_of::<INPUT>() as i32) };
|
||||||
if n as usize != inputs.len() {
|
if n as usize != inputs.len() {
|
||||||
anyhow::bail!(
|
anyhow::bail!(
|
||||||
@@ -95,6 +114,9 @@ impl SendInputInjector {
|
|||||||
impl Drop for SendInputInjector {
|
impl Drop for SendInputInjector {
|
||||||
fn drop(&mut self) {
|
fn drop(&mut self) {
|
||||||
if let Some(h) = self.desktop.take() {
|
if let Some(h) = self.desktop.take() {
|
||||||
|
// SAFETY: `h` is the `HDESK` this injector owned (moved out of `self.desktop`);
|
||||||
|
// `CloseDesktop` runs once here in `Drop` on that still-valid handle, with no later use —
|
||||||
|
// no double close.
|
||||||
unsafe {
|
unsafe {
|
||||||
let _ = CloseDesktop(h);
|
let _ = CloseDesktop(h);
|
||||||
}
|
}
|
||||||
@@ -216,7 +238,11 @@ impl InputInjector for SendInputInjector {
|
|||||||
}
|
}
|
||||||
InputKind::KeyDown | InputKind::KeyUp => {
|
InputKind::KeyDown | InputKind::KeyUp => {
|
||||||
let down = event.kind == InputKind::KeyDown;
|
let down = event.kind == InputKind::KeyDown;
|
||||||
let vk = (event.code & 0xff) as u16; // client sends Windows VK
|
// client sends Windows VK
|
||||||
|
let vk = (event.code & 0xff) as u16;
|
||||||
|
// SAFETY: `MapVirtualKeyExW` is a pure value translation (VK → scancode); all three
|
||||||
|
// args are by-value (`u32`, the `MAPVK_VK_TO_VSC_EX` map-type constant, a `None`
|
||||||
|
// HKL). It dereferences no pointer and returns a `u32` — FFI-`unsafe` only.
|
||||||
let sc_ex = unsafe { MapVirtualKeyExW(vk as u32, MAPVK_VK_TO_VSC_EX, None) };
|
let sc_ex = unsafe { MapVirtualKeyExW(vk as u32, MAPVK_VK_TO_VSC_EX, None) };
|
||||||
if sc_ex == 0 {
|
if sc_ex == 0 {
|
||||||
return Ok(()); // unmappable -> drop
|
return Ok(()); // unmappable -> drop
|
||||||
@@ -264,6 +290,8 @@ fn key(ki: KEYBDINPUT) -> INPUT {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn virtual_desktop_rect() -> (i32, i32, i32, i32) {
|
fn virtual_desktop_rect() -> (i32, i32, i32, i32) {
|
||||||
|
// SAFETY: each `GetSystemMetrics` takes a single by-value `SYSTEM_METRICS_INDEX` constant and
|
||||||
|
// returns an `i32`; it dereferences no pointer and has no side effects — FFI-`unsafe` only.
|
||||||
unsafe {
|
unsafe {
|
||||||
(
|
(
|
||||||
GetSystemMetrics(SM_XVIRTUALSCREEN),
|
GetSystemMetrics(SM_XVIRTUALSCREEN),
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -13,6 +13,9 @@
|
|||||||
//! attaches none, the export yields an already-signaled sync_file (poll returns immediately) — no
|
//! attaches none, the export yields an already-signaled sync_file (poll returns immediately) — no
|
||||||
//! wait, no harm, and `waited=false` tells us the driver doesn't fence (so zero-copy would still race).
|
//! wait, no harm, and `waited=false` tells us the driver doesn't fence (so zero-copy would still race).
|
||||||
|
|
||||||
|
// Every `unsafe` block in this file carries a `// SAFETY:` proof; enforce it (unsafe-proof program).
|
||||||
|
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||||
|
|
||||||
use std::os::fd::RawFd;
|
use std::os::fd::RawFd;
|
||||||
|
|
||||||
// linux/dma-buf.h ioctls on the DMA_BUF_BASE ('b' = 0x62) magic. _IOWR = dir(3)<<30 | size<<16 | base<<8 | nr.
|
// linux/dma-buf.h ioctls on the DMA_BUF_BASE ('b' = 0x62) magic. _IOWR = dir(3)<<30 | size<<16 | base<<8 | nr.
|
||||||
@@ -40,6 +43,11 @@ pub fn wait_read_ready(dmabuf_fd: RawFd, timeout_ms: i32) -> std::io::Result<boo
|
|||||||
flags: DMA_BUF_SYNC_READ,
|
flags: DMA_BUF_SYNC_READ,
|
||||||
fd: -1,
|
fd: -1,
|
||||||
};
|
};
|
||||||
|
// SAFETY: `dmabuf_fd` is a live dmabuf fd supplied by the caller (borrowed for this call; we
|
||||||
|
// never close it). `DMA_BUF_IOCTL_EXPORT_SYNC_FILE` encodes `size_of::<DmaBufExportSyncFile>()`
|
||||||
|
// — the exact byte count the kernel copies — and `&mut req` is a live, correctly-sized
|
||||||
|
// `#[repr(C)]` struct the EXPORT_SYNC_FILE ioctl reads (`flags`) and writes (`fd`). `req`
|
||||||
|
// outlives this synchronous call and is not aliased elsewhere.
|
||||||
let r = unsafe { libc::ioctl(dmabuf_fd, DMA_BUF_IOCTL_EXPORT_SYNC_FILE, &mut req) };
|
let r = unsafe { libc::ioctl(dmabuf_fd, DMA_BUF_IOCTL_EXPORT_SYNC_FILE, &mut req) };
|
||||||
if r < 0 {
|
if r < 0 {
|
||||||
return Err(std::io::Error::last_os_error());
|
return Err(std::io::Error::last_os_error());
|
||||||
@@ -54,11 +62,21 @@ pub fn wait_read_ready(dmabuf_fd: RawFd, timeout_ms: i32) -> std::io::Result<boo
|
|||||||
revents: 0,
|
revents: 0,
|
||||||
};
|
};
|
||||||
// Non-blocking probe: not-yet-signaled (poll==0) means the producer is still rendering.
|
// Non-blocking probe: not-yet-signaled (poll==0) means the producer is still rendering.
|
||||||
|
// SAFETY: `&mut pfd` points at a single live `libc::pollfd` and `nfds == 1` matches that one
|
||||||
|
// element; `pfd.fd` is `sync_fd`, the sync_file fd just exported (already checked `>= 0`).
|
||||||
|
// `poll` reads `fd`/`events` and writes `revents` for this non-blocking (timeout 0) probe, then
|
||||||
|
// returns — `pfd` outlives the call and aliases nothing.
|
||||||
let pending = unsafe { libc::poll(&mut pfd, 1, 0) } == 0;
|
let pending = unsafe { libc::poll(&mut pfd, 1, 0) } == 0;
|
||||||
if pending {
|
if pending {
|
||||||
pfd.revents = 0;
|
pfd.revents = 0;
|
||||||
|
// SAFETY: same live single-element `pfd` (its `revents` reset to 0 just above), `nfds == 1`,
|
||||||
|
// and `sync_fd` still open. This blocking `poll` (up to `timeout_ms`) waits for the render
|
||||||
|
// fence to signal; it reads `fd`/`events`, writes `revents`, and returns before `pfd` ends.
|
||||||
unsafe { libc::poll(&mut pfd, 1, timeout_ms) }; // block until the render fence signals
|
unsafe { libc::poll(&mut pfd, 1, timeout_ms) }; // block until the render fence signals
|
||||||
}
|
}
|
||||||
|
// SAFETY: `sync_fd` is the sync_file fd the EXPORT_SYNC_FILE ioctl created and handed us to own;
|
||||||
|
// this point is reached only when `sync_fd >= 0`, this `close` runs exactly once on it, and it is
|
||||||
|
// never used afterward — no double-close or use-after-close.
|
||||||
unsafe { libc::close(sync_fd) };
|
unsafe { libc::close(sync_fd) };
|
||||||
Ok(pending)
|
Ok(pending)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -8,6 +8,8 @@
|
|||||||
//! verified (ioctl numbers + a live signal→wait round trip), ready to wire in the moment a producer
|
//! verified (ioctl numbers + a live signal→wait round trip), ready to wire in the moment a producer
|
||||||
//! gains working `SPA_META_SyncTimeline`.
|
//! gains working `SPA_META_SyncTimeline`.
|
||||||
#![allow(dead_code)]
|
#![allow(dead_code)]
|
||||||
|
// Every `unsafe` block in this file carries a `// SAFETY:` proof; enforce it (unsafe-proof program).
|
||||||
|
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||||
//!
|
//!
|
||||||
//! Compositors that render directly into the PipeWire buffer pool (Mutter's virtual
|
//! Compositors that render directly into the PipeWire buffer pool (Mutter's virtual
|
||||||
//! monitors) hand buffers over at GPU-submit time; on drivers without implicit dmabuf
|
//! monitors) hand buffers over at GPU-submit time; on drivers without implicit dmabuf
|
||||||
@@ -81,6 +83,8 @@ pub struct DrmSync {
|
|||||||
impl DrmSync {
|
impl DrmSync {
|
||||||
pub fn open() -> Result<DrmSync> {
|
pub fn open() -> Result<DrmSync> {
|
||||||
let path = c"/dev/dri/renderD128";
|
let path = c"/dev/dri/renderD128";
|
||||||
|
// SAFETY: `path` is a 'static NUL-terminated C string literal; `open` only reads it as a
|
||||||
|
// filesystem path and returns an fd (or -1). No Rust memory is aliased or handed to the kernel.
|
||||||
let fd = unsafe { libc::open(path.as_ptr(), libc::O_RDWR | libc::O_CLOEXEC) };
|
let fd = unsafe { libc::open(path.as_ptr(), libc::O_RDWR | libc::O_CLOEXEC) };
|
||||||
if fd < 0 {
|
if fd < 0 {
|
||||||
bail!("open /dev/dri/renderD128 for syncobj ops: {}", errno());
|
bail!("open /dev/dri/renderD128 for syncobj ops: {}", errno());
|
||||||
@@ -94,6 +98,9 @@ impl DrmSync {
|
|||||||
fd: syncobj_fd,
|
fd: syncobj_fd,
|
||||||
..Default::default()
|
..Default::default()
|
||||||
};
|
};
|
||||||
|
// SAFETY: `self.fd` is the live render-node fd from `open`; the request number encodes
|
||||||
|
// `size_of::<DrmSyncobjHandle>()` (the bytes the kernel copies), and `&mut req` is a live,
|
||||||
|
// correctly-sized `#[repr(C)]` struct the FD_TO_HANDLE ioctl reads (`fd`) and writes (`handle`).
|
||||||
let r = unsafe { libc::ioctl(self.fd, DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, &mut req) };
|
let r = unsafe { libc::ioctl(self.fd, DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, &mut req) };
|
||||||
if r < 0 {
|
if r < 0 {
|
||||||
bail!("SYNCOBJ_FD_TO_HANDLE: {}", errno());
|
bail!("SYNCOBJ_FD_TO_HANDLE: {}", errno());
|
||||||
@@ -106,6 +113,8 @@ impl DrmSync {
|
|||||||
handle,
|
handle,
|
||||||
..Default::default()
|
..Default::default()
|
||||||
};
|
};
|
||||||
|
// SAFETY: `self.fd` is the live render-node fd; `DRM_IOCTL_SYNCOBJ_DESTROY` encodes
|
||||||
|
// `size_of::<DrmSyncobjDestroy>()`, and `&mut req` is a live correctly-sized struct the kernel reads.
|
||||||
unsafe { libc::ioctl(self.fd, DRM_IOCTL_SYNCOBJ_DESTROY, &mut req) };
|
unsafe { libc::ioctl(self.fd, DRM_IOCTL_SYNCOBJ_DESTROY, &mut req) };
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -117,6 +126,8 @@ impl DrmSync {
|
|||||||
tv_sec: 0,
|
tv_sec: 0,
|
||||||
tv_nsec: 0,
|
tv_nsec: 0,
|
||||||
};
|
};
|
||||||
|
// SAFETY: `CLOCK_MONOTONIC` is a valid clock id and `&mut now` is a live `libc::timespec` the
|
||||||
|
// kernel fills in; the call returns before `now` is read, so there is no aliasing/lifetime issue.
|
||||||
unsafe { libc::clock_gettime(libc::CLOCK_MONOTONIC, &mut now) };
|
unsafe { libc::clock_gettime(libc::CLOCK_MONOTONIC, &mut now) };
|
||||||
let deadline = now.tv_sec * 1_000_000_000 + now.tv_nsec + timeout_ms as i64 * 1_000_000;
|
let deadline = now.tv_sec * 1_000_000_000 + now.tv_nsec + timeout_ms as i64 * 1_000_000;
|
||||||
let handles = [handle];
|
let handles = [handle];
|
||||||
@@ -129,6 +140,11 @@ impl DrmSync {
|
|||||||
flags: DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
|
flags: DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
|
||||||
..Default::default()
|
..Default::default()
|
||||||
};
|
};
|
||||||
|
// SAFETY: `self.fd` is the live render-node fd; the request number encodes
|
||||||
|
// `size_of::<DrmSyncobjTimelineWait>()`; `&mut req` is a live correctly-sized struct. Its
|
||||||
|
// `handles`/`points` u64 fields hold the addresses of the local `handles`/`points` arrays, which
|
||||||
|
// outlive this synchronous call, and `count_handles == 1` matches their length — so every kernel
|
||||||
|
// read through those addresses stays in bounds.
|
||||||
let r = unsafe { libc::ioctl(self.fd, DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT, &mut req) };
|
let r = unsafe { libc::ioctl(self.fd, DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT, &mut req) };
|
||||||
let saved = errno();
|
let saved = errno();
|
||||||
self.destroy(handle);
|
self.destroy(handle);
|
||||||
@@ -151,6 +167,10 @@ impl DrmSync {
|
|||||||
count_handles: 1,
|
count_handles: 1,
|
||||||
flags: 0,
|
flags: 0,
|
||||||
};
|
};
|
||||||
|
// SAFETY: `self.fd` is the live render-node fd; the request number encodes
|
||||||
|
// `size_of::<DrmSyncobjTimelineArray>()`; `&mut req` is a live correctly-sized struct whose
|
||||||
|
// `handles`/`points` u64 fields address the local `handles`/`points` arrays (alive for this
|
||||||
|
// synchronous call, `count_handles == 1` matching their length).
|
||||||
let r = unsafe { libc::ioctl(self.fd, DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL, &mut req) };
|
let r = unsafe { libc::ioctl(self.fd, DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL, &mut req) };
|
||||||
let saved = errno();
|
let saved = errno();
|
||||||
self.destroy(handle);
|
self.destroy(handle);
|
||||||
@@ -163,6 +183,8 @@ impl DrmSync {
|
|||||||
|
|
||||||
impl Drop for DrmSync {
|
impl Drop for DrmSync {
|
||||||
fn drop(&mut self) {
|
fn drop(&mut self) {
|
||||||
|
// SAFETY: `self.fd` is the fd `open` returned; this `DrmSync` owns it exclusively and `close`
|
||||||
|
// runs exactly once (here, in `Drop`), so there is no double-close or use-after-close.
|
||||||
unsafe { libc::close(self.fd) };
|
unsafe { libc::close(self.fd) };
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -203,14 +225,19 @@ mod tests {
|
|||||||
const CREATE: u64 = iowr(0xBF, std::mem::size_of::<Create>());
|
const CREATE: u64 = iowr(0xBF, std::mem::size_of::<Create>());
|
||||||
const HANDLE_TO_FD: u64 = iowr(0xC1, std::mem::size_of::<DrmSyncobjHandle>());
|
const HANDLE_TO_FD: u64 = iowr(0xC1, std::mem::size_of::<DrmSyncobjHandle>());
|
||||||
let mut c = Create::default();
|
let mut c = Create::default();
|
||||||
|
// SAFETY: `sync.fd` is the live render-node fd; `CREATE` encodes `size_of::<Create>()`, and
|
||||||
|
// `&mut c` is a live correctly-sized struct the kernel fills (`handle`).
|
||||||
assert!(unsafe { libc::ioctl(sync.fd, CREATE, &mut c) } >= 0);
|
assert!(unsafe { libc::ioctl(sync.fd, CREATE, &mut c) } >= 0);
|
||||||
let mut h = DrmSyncobjHandle {
|
let mut h = DrmSyncobjHandle {
|
||||||
handle: c.handle,
|
handle: c.handle,
|
||||||
..Default::default()
|
..Default::default()
|
||||||
};
|
};
|
||||||
|
// SAFETY: `sync.fd` is live; `HANDLE_TO_FD` encodes `size_of::<DrmSyncobjHandle>()`; `&mut h`
|
||||||
|
// is a live correctly-sized struct (the kernel reads `handle`, writes `fd`).
|
||||||
assert!(unsafe { libc::ioctl(sync.fd, HANDLE_TO_FD, &mut h) } >= 0);
|
assert!(unsafe { libc::ioctl(sync.fd, HANDLE_TO_FD, &mut h) } >= 0);
|
||||||
sync.signal_point(h.fd, 1).expect("signal");
|
sync.signal_point(h.fd, 1).expect("signal");
|
||||||
sync.wait_point(h.fd, 1, 100).expect("wait after signal");
|
sync.wait_point(h.fd, 1, 100).expect("wait after signal");
|
||||||
|
// SAFETY: `h.fd` is the fd HANDLE_TO_FD just exported; we own it and close it exactly once here.
|
||||||
unsafe { libc::close(h.fd) };
|
unsafe { libc::close(h.fd) };
|
||||||
sync.destroy(c.handle);
|
sync.destroy(c.handle);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -11,6 +11,8 @@
|
|||||||
//! thread) and ffmpeg's `hevc_nvenc` (encode thread); each thread makes it current before use.
|
//! thread) and ffmpeg's `hevc_nvenc` (encode thread); each thread makes it current before use.
|
||||||
|
|
||||||
#![allow(non_camel_case_types, non_snake_case)]
|
#![allow(non_camel_case_types, non_snake_case)]
|
||||||
|
// Every `unsafe` block/impl below carries a `// SAFETY:` proof; enforce it (unsafe-proof program).
|
||||||
|
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||||
|
|
||||||
use anyhow::{bail, Result};
|
use anyhow::{bail, Result};
|
||||||
use std::os::raw::{c_int, c_uint, c_void};
|
use std::os::raw::{c_int, c_uint, c_void};
|
||||||
@@ -128,8 +130,14 @@ struct CudaApi {
|
|||||||
) -> CUresult,
|
) -> CUresult,
|
||||||
cuDestroyExternalMemory: unsafe extern "C" fn(CUexternalMemory) -> CUresult,
|
cuDestroyExternalMemory: unsafe extern "C" fn(CUexternalMemory) -> CUresult,
|
||||||
}
|
}
|
||||||
// The resolved fn pointers are plain addresses into a process-lifetime mapping; safe to share.
|
// SAFETY: every field is a bare `extern "C" fn` address into the leaked, process-lifetime
|
||||||
|
// `libcuda` mapping (`cuda_api` `forget`s the `Library`, so it is never unloaded) — an immutable
|
||||||
|
// value with no interior mutability and no thread affinity. Moving the table to another thread
|
||||||
|
// cannot dangle (the code it points at stays mapped) or race (the fields are read-only).
|
||||||
unsafe impl Send for CudaApi {}
|
unsafe impl Send for CudaApi {}
|
||||||
|
// SAFETY: as above — the table is a set of immutable fn-pointer addresses with no interior
|
||||||
|
// mutability, so concurrent shared reads from multiple threads cannot race; the driver entry
|
||||||
|
// points they address are themselves thread-safe.
|
||||||
unsafe impl Sync for CudaApi {}
|
unsafe impl Sync for CudaApi {}
|
||||||
|
|
||||||
/// `CUresult` returned by the wrappers when `libcuda` isn't loaded (no NVIDIA driver). Non-zero so
|
/// `CUresult` returned by the wrappers when `libcuda` isn't loaded (no NVIDIA driver). Non-zero so
|
||||||
@@ -143,6 +151,14 @@ static CUDA_API: OnceLock<Option<CudaApi>> = OnceLock::new();
|
|||||||
/// (the expected case on AMD/Intel hosts) — logged at debug, not an error.
|
/// (the expected case on AMD/Intel hosts) — logged at debug, not an error.
|
||||||
fn cuda_api() -> Option<&'static CudaApi> {
|
fn cuda_api() -> Option<&'static CudaApi> {
|
||||||
CUDA_API
|
CUDA_API
|
||||||
|
// SAFETY: `Library::new` runs `libcuda.so.1`'s initializers — it is the trusted NVIDIA
|
||||||
|
// driver library, so loading has no unexpected effects; `?`/`None` handle its absence.
|
||||||
|
// Each `lib.get::<T>(name)` asserts the symbol's real ABI equals `T`: every NUL-terminated
|
||||||
|
// name is a documented CUDA Driver API entry point and `T` is the exact
|
||||||
|
// `unsafe extern "C" fn(..)` signature from cuda.h/cudaGL.h (`_v2` for ctx/mem ops). Each
|
||||||
|
// `Symbol` only borrows `lib` until the end of the struct-literal statement; we deref-copy
|
||||||
|
// the raw fn-pointer out first, then `forget(lib)` leaks the mapping so those addresses
|
||||||
|
// stay valid for the whole process. Runs once under the `OnceLock` init — no aliasing.
|
||||||
.get_or_init(|| unsafe {
|
.get_or_init(|| unsafe {
|
||||||
let lib = libloading::Library::new("libcuda.so.1")
|
let lib = libloading::Library::new("libcuda.so.1")
|
||||||
.or_else(|_| libloading::Library::new("libcuda.so"))
|
.or_else(|_| libloading::Library::new("libcuda.so"))
|
||||||
@@ -361,6 +377,12 @@ pub fn read_plane_to_host(
|
|||||||
Height: height,
|
Height: height,
|
||||||
..Default::default()
|
..Default::default()
|
||||||
};
|
};
|
||||||
|
// SAFETY: `copy_blocking` is unsafe because it issues a CUDA copy; its contract is a valid
|
||||||
|
// descriptor with the shared context current (the caller's responsibility — self-test path).
|
||||||
|
// `©` is a live local `#[repr(C)] CUDA_MEMCPY2D` that outlives the synchronous call:
|
||||||
|
// `srcDevice`/`srcPitch` are the caller's live pitched device plane, `dstHost` addresses the
|
||||||
|
// freshly-allocated `host` `Vec` of exactly `width_bytes*height` bytes, and `WidthInBytes`×
|
||||||
|
// `Height` fit both. The copy is synchronous, so `host` is fully written before we return it.
|
||||||
unsafe { copy_blocking(©, "cuMemcpy2DAsync_v2(dev->host)")? };
|
unsafe { copy_blocking(©, "cuMemcpy2DAsync_v2(dev->host)")? };
|
||||||
Ok(host)
|
Ok(host)
|
||||||
}
|
}
|
||||||
@@ -369,7 +391,13 @@ pub fn read_plane_to_host(
|
|||||||
/// in a `OnceLock`; the raw `CUcontext` is thread-safe to make current from any thread.
|
/// in a `OnceLock`; the raw `CUcontext` is thread-safe to make current from any thread.
|
||||||
#[derive(Clone, Copy)]
|
#[derive(Clone, Copy)]
|
||||||
pub struct Context(pub CUcontext);
|
pub struct Context(pub CUcontext);
|
||||||
|
// SAFETY: `CUcontext` is an opaque CUDA driver handle, not a dereferenceable Rust pointer. It is
|
||||||
|
// created once and never destroyed (process lifetime), and the only thing done with it is
|
||||||
|
// `cuCtxSetCurrent`, which the Driver API explicitly allows from any thread — so transferring the
|
||||||
|
// handle to another thread cannot dangle or race (the driver owns the synchronization).
|
||||||
unsafe impl Send for Context {}
|
unsafe impl Send for Context {}
|
||||||
|
// SAFETY: as above — the wrapped handle is an immutable opaque address and the driver does all the
|
||||||
|
// synchronization, so sharing `&Context` across threads is sound.
|
||||||
unsafe impl Sync for Context {}
|
unsafe impl Sync for Context {}
|
||||||
|
|
||||||
static CONTEXT: OnceLock<Context> = OnceLock::new();
|
static CONTEXT: OnceLock<Context> = OnceLock::new();
|
||||||
@@ -382,6 +410,12 @@ pub fn context() -> Result<CUcontext> {
|
|||||||
if cuda_api().is_none() {
|
if cuda_api().is_none() {
|
||||||
bail!("libcuda.so.1 not available — no NVIDIA driver (CUDA zero-copy disabled)");
|
bail!("libcuda.so.1 not available — no NVIDIA driver (CUDA zero-copy disabled)");
|
||||||
}
|
}
|
||||||
|
// SAFETY: we returned above unless `cuda_api()` is `Some`, so every wrapper here forwards into
|
||||||
|
// the live, leaked `libcuda` table rather than the not-loaded stub. `cuInit(0)` passes the
|
||||||
|
// API-required flags value 0. `&mut dev`/`&mut ctx` are live, zero/null-initialized stack
|
||||||
|
// out-params the driver writes the device handle / new context into; each outlives its
|
||||||
|
// synchronous call and they are distinct locals (no aliasing). `cuCtxCreate_v2` yields a valid
|
||||||
|
// `CUcontext` on success (`ck` bails otherwise), which becomes the block's value.
|
||||||
let ctx = unsafe {
|
let ctx = unsafe {
|
||||||
ck(cuInit(0), "cuInit")?;
|
ck(cuInit(0), "cuInit")?;
|
||||||
let mut dev: CUdevice = 0;
|
let mut dev: CUdevice = 0;
|
||||||
@@ -401,6 +435,10 @@ pub fn context() -> Result<CUcontext> {
|
|||||||
/// Make the shared context current on the calling thread (required before any CUDA op here).
|
/// Make the shared context current on the calling thread (required before any CUDA op here).
|
||||||
pub fn make_current() -> Result<()> {
|
pub fn make_current() -> Result<()> {
|
||||||
let ctx = context()?;
|
let ctx = context()?;
|
||||||
|
// SAFETY: `ctx` came from `context()?`, so it is the live shared `CUcontext` and the driver
|
||||||
|
// table is present. `cuCtxSetCurrent` binds that opaque handle to the calling thread; it takes
|
||||||
|
// no Rust-memory pointer and is thread-safe (affects only this thread's current context), so
|
||||||
|
// there is no aliasing or lifetime hazard.
|
||||||
unsafe { ck(cuCtxSetCurrent(ctx), "cuCtxSetCurrent") }
|
unsafe { ck(cuCtxSetCurrent(ctx), "cuCtxSetCurrent") }
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -423,6 +461,12 @@ fn copy_stream() -> CUstream {
|
|||||||
if let Some(s) = cell.get() {
|
if let Some(s) = cell.get() {
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
// SAFETY: `copy_stream` runs with the shared context current (its doc contract), so the
|
||||||
|
// wrappers forward into the live `libcuda` table. `&mut least`/`&mut greatest` are live
|
||||||
|
// stack `i32`s the driver fills with the priority range; `&mut s` is a live null-init
|
||||||
|
// `CUstream` the driver writes the new stream into. All out-params outlive their
|
||||||
|
// synchronous calls and are distinct locals. On any non-zero result we fall back to a null
|
||||||
|
// (NULL-stream) value and never read an uninitialized handle.
|
||||||
let stream = unsafe {
|
let stream = unsafe {
|
||||||
let (mut least, mut greatest) = (0i32, 0i32);
|
let (mut least, mut greatest) = (0i32, 0i32);
|
||||||
if cuCtxGetStreamPriorityRange(&mut least, &mut greatest) != 0 {
|
if cuCtxGetStreamPriorityRange(&mut least, &mut greatest) != 0 {
|
||||||
@@ -459,6 +503,11 @@ unsafe fn copy_blocking(copy: &CUDA_MEMCPY2D, what: &str) -> Result<()> {
|
|||||||
fn alloc_pitched(width: u32, height: u32) -> Result<(CUdeviceptr, usize)> {
|
fn alloc_pitched(width: u32, height: u32) -> Result<(CUdeviceptr, usize)> {
|
||||||
let mut ptr: CUdeviceptr = 0;
|
let mut ptr: CUdeviceptr = 0;
|
||||||
let mut pitch: usize = 0;
|
let mut pitch: usize = 0;
|
||||||
|
// SAFETY: `cuMemAllocPitch_v2` allocates a pitched device buffer (the wrapper forwards to the
|
||||||
|
// live table on any path that reached allocation). `&mut ptr` (`CUdeviceptr`) and `&mut pitch`
|
||||||
|
// (`usize`) are live, distinct stack out-params the driver writes the allocation pointer and
|
||||||
|
// its pitch into; both outlive the synchronous call. Width/height/element-size are by-value
|
||||||
|
// ints. No aliasing — two separate locals.
|
||||||
unsafe {
|
unsafe {
|
||||||
ck(
|
ck(
|
||||||
cuMemAllocPitch_v2(
|
cuMemAllocPitch_v2(
|
||||||
@@ -486,6 +535,10 @@ fn alloc_pitched_nv12(
|
|||||||
let mut y_pitch: usize = 0;
|
let mut y_pitch: usize = 0;
|
||||||
let mut uv_ptr: CUdeviceptr = 0;
|
let mut uv_ptr: CUdeviceptr = 0;
|
||||||
let mut uv_pitch: usize = 0;
|
let mut uv_pitch: usize = 0;
|
||||||
|
// SAFETY: two independent `cuMemAllocPitch_v2` calls (wrapper → live table). `&mut y_ptr`/
|
||||||
|
// `&mut y_pitch` and `&mut uv_ptr`/`&mut uv_pitch` are live, distinct stack out-params the
|
||||||
|
// driver writes each plane's pointer and pitch into; all outlive their synchronous calls. The
|
||||||
|
// dimension/element-size args are by-value ints. No aliasing — four separate locals.
|
||||||
unsafe {
|
unsafe {
|
||||||
ck(
|
ck(
|
||||||
cuMemAllocPitch_v2(
|
cuMemAllocPitch_v2(
|
||||||
@@ -524,6 +577,13 @@ struct PoolInner {
|
|||||||
|
|
||||||
impl Drop for PoolInner {
|
impl Drop for PoolInner {
|
||||||
fn drop(&mut self) {
|
fn drop(&mut self) {
|
||||||
|
// SAFETY: the pool only exists because allocation succeeded, so the driver table is live.
|
||||||
|
// `PoolInner` drops only once every `DeviceBuffer` that referenced it (each holds an `Arc`
|
||||||
|
// clone) has been recycled, so `free`/`free_uv` hold every outstanding allocation exactly
|
||||||
|
// once and nothing else still uses them — no double-free or use-after-free. We make the
|
||||||
|
// shared context current first (drop may run off the allocating thread) so `cuMemFree_v2`
|
||||||
|
// targets the right context. Each `p` is a `CUdeviceptr` previously returned by
|
||||||
|
// `cuMemAllocPitch_v2`; results are ignored (best-effort teardown).
|
||||||
unsafe {
|
unsafe {
|
||||||
if let Some(c) = CONTEXT.get() {
|
if let Some(c) = CONTEXT.get() {
|
||||||
let _ = cuCtxSetCurrent(c.0);
|
let _ = cuCtxSetCurrent(c.0);
|
||||||
@@ -697,6 +757,12 @@ impl Drop for DeviceBuffer {
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// The buffer may be freed on the encode thread; cuMemFree needs a current context.
|
// The buffer may be freed on the encode thread; cuMemFree needs a current context.
|
||||||
|
// SAFETY: this is the un-pooled branch (`pool` is `None`), so this `DeviceBuffer`
|
||||||
|
// exclusively owns `self.ptr` (and `self.uv`'s `uv_ptr`), each returned by
|
||||||
|
// `cuMemAllocPitch_v2` and freed exactly once here — `drop` runs once and the
|
||||||
|
// `self.ptr == 0` guard above skips the sentinel/empty case, so no double-free. We set
|
||||||
|
// the shared context current first because drop may run on a thread where it isn't, and
|
||||||
|
// `cuMemFree_v2` needs it. Wrapper → live table; results ignored (teardown).
|
||||||
unsafe {
|
unsafe {
|
||||||
if let Some(c) = CONTEXT.get() {
|
if let Some(c) = CONTEXT.get() {
|
||||||
let _ = cuCtxSetCurrent(c.0);
|
let _ = cuCtxSetCurrent(c.0);
|
||||||
@@ -745,6 +811,16 @@ impl RegisteredTexture {
|
|||||||
/// unmap. The copy is synchronized (on our priority stream) before unmap so `dst` is ready
|
/// unmap. The copy is synchronized (on our priority stream) before unmap so `dst` is ready
|
||||||
/// before the source dmabuf is recycled. Always unmaps, even if the copy errors.
|
/// before the source dmabuf is recycled. Always unmaps, even if the copy errors.
|
||||||
pub fn copy_mapped_to(&mut self, dst: &DeviceBuffer) -> Result<()> {
|
pub fn copy_mapped_to(&mut self, dst: &DeviceBuffer) -> Result<()> {
|
||||||
|
// SAFETY: `self.resource` is the valid `CUgraphicsResource` from a successful `register_gl`
|
||||||
|
// (its only constructor), so the wrappers forward to the live table; the caller holds the
|
||||||
|
// GL+CUDA contexts current (the registration's contract). `cuGraphicsMapResources` maps
|
||||||
|
// `count == 1` resource via `&mut self.resource` (a live field) on the default stream;
|
||||||
|
// `cuGraphicsSubResourceGetMappedArray` writes the mapped `CUarray` into the live local
|
||||||
|
// `array` (index 0, mip 0). On failure we unmap and bail (balanced). `©` is a live
|
||||||
|
// local `CUDA_MEMCPY2D` outliving the synchronous `copy_blocking`: `srcArray` is valid
|
||||||
|
// while mapped, `dstDevice`/`dstPitch` are `dst`'s live allocation, `width*4`×`height` fit
|
||||||
|
// both. `copy_blocking` syncs before we unmap, so the array stays valid through the copy;
|
||||||
|
// we always unmap afterward (even on error), keeping the map/unmap pair balanced.
|
||||||
unsafe {
|
unsafe {
|
||||||
ck(
|
ck(
|
||||||
cuGraphicsMapResources(1, &mut self.resource, std::ptr::null_mut()),
|
cuGraphicsMapResources(1, &mut self.resource, std::ptr::null_mut()),
|
||||||
@@ -783,6 +859,14 @@ impl RegisteredTexture {
|
|||||||
width_bytes: usize,
|
width_bytes: usize,
|
||||||
height: usize,
|
height: usize,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
|
// SAFETY: identical contract to `copy_mapped_to` — `self.resource` is the valid
|
||||||
|
// `CUgraphicsResource` from `register_gl` (wrappers → live table; caller holds GL+CUDA
|
||||||
|
// contexts current). Map `count == 1` resource via the live `&mut self.resource`; the
|
||||||
|
// mapped `CUarray` is written into the live local `array` (index 0, mip 0); on failure we
|
||||||
|
// unmap and bail (balanced). `©` is a live local outliving the synchronous
|
||||||
|
// `copy_blocking`: `srcArray` valid while mapped, `dstDevice`/`dstPitch` are the caller's
|
||||||
|
// live plane, `width_bytes`×`height` fit it. We always unmap afterward, even on copy error,
|
||||||
|
// so the map/unmap pair stays balanced and the array outlives the copy.
|
||||||
unsafe {
|
unsafe {
|
||||||
ck(
|
ck(
|
||||||
cuGraphicsMapResources(1, &mut self.resource, std::ptr::null_mut()),
|
cuGraphicsMapResources(1, &mut self.resource, std::ptr::null_mut()),
|
||||||
@@ -847,6 +931,10 @@ pub fn copy_device_to_device(
|
|||||||
Height: src.height as usize,
|
Height: src.height as usize,
|
||||||
..Default::default()
|
..Default::default()
|
||||||
};
|
};
|
||||||
|
// SAFETY: `copy_blocking` is unsafe (issues a CUDA copy); the caller must have the shared
|
||||||
|
// context current (documented). `©` is a live local device→device `CUDA_MEMCPY2D` outliving
|
||||||
|
// the synchronous call: `srcDevice`/`srcPitch` are `src`'s live allocation, `dstDevice`/
|
||||||
|
// `dstPitch` the caller's live region, `width*4`×`height` within both. Wrapper → live table.
|
||||||
unsafe { copy_blocking(©, "cuMemcpy2DAsync_v2(dev->dev)") }
|
unsafe { copy_blocking(©, "cuMemcpy2DAsync_v2(dev->dev)") }
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -888,6 +976,12 @@ pub fn copy_nv12_to_device(
|
|||||||
Height: h / 2,
|
Height: h / 2,
|
||||||
..Default::default()
|
..Default::default()
|
||||||
};
|
};
|
||||||
|
// SAFETY: two unsafe `copy_blocking` device→device copies; the caller must have the shared
|
||||||
|
// context current (documented). `&y`/`&uv` are live local `CUDA_MEMCPY2D`s outliving each
|
||||||
|
// synchronous call. All four device pointers are valid: `src.ptr`/`src_uv_ptr` come from a live
|
||||||
|
// NV12 `DeviceBuffer` (its `.uv` presence was checked via `ok_or_else`), `y_dst`/`uv_dst` are
|
||||||
|
// the caller's live NVENC surface planes; the luma copy is `w`×`h`, the chroma copy
|
||||||
|
// `(w/2)*2`×`h/2`, each within its planes. Wrappers → live table.
|
||||||
unsafe {
|
unsafe {
|
||||||
copy_blocking(&y, "cuMemcpy2DAsync_v2(nv12 Y dev->dev)")?;
|
copy_blocking(&y, "cuMemcpy2DAsync_v2(nv12 Y dev->dev)")?;
|
||||||
copy_blocking(&uv, "cuMemcpy2DAsync_v2(nv12 UV dev->dev)")
|
copy_blocking(&uv, "cuMemcpy2DAsync_v2(nv12 UV dev->dev)")
|
||||||
@@ -897,6 +991,12 @@ pub fn copy_nv12_to_device(
|
|||||||
impl Drop for RegisteredTexture {
|
impl Drop for RegisteredTexture {
|
||||||
fn drop(&mut self) {
|
fn drop(&mut self) {
|
||||||
if !self.resource.is_null() {
|
if !self.resource.is_null() {
|
||||||
|
// SAFETY: `self.resource` is non-null (just checked) and is the valid
|
||||||
|
// `CUgraphicsResource` from `register_gl`, owned exclusively by this `RegisteredTexture`
|
||||||
|
// and unregistered exactly once here (drop runs once) — no use-after-free or
|
||||||
|
// double-unregister. `cuGraphicsUnregisterResource` releases the GL↔CUDA registration;
|
||||||
|
// wrapper → live table (the resource exists ⇒ the driver was present). Result ignored
|
||||||
|
// (best-effort teardown).
|
||||||
unsafe {
|
unsafe {
|
||||||
let _ = cuGraphicsUnregisterResource(self.resource);
|
let _ = cuGraphicsUnregisterResource(self.resource);
|
||||||
}
|
}
|
||||||
@@ -913,7 +1013,11 @@ pub struct ExternalDmabuf {
|
|||||||
pub size: u64,
|
pub size: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Raw driver handles; used from the single capture thread but moved with the importer.
|
// SAFETY: the fields are opaque CUDA driver handles — an external-memory handle and a device
|
||||||
|
// pointer — not dereferenceable Rust memory, and the value is uniquely owned (no `Clone`). It is
|
||||||
|
// used from a single capture thread but constructed on / moved between threads with the importer;
|
||||||
|
// transferring these handles is sound because uniqueness rules out aliasing and they are destroyed
|
||||||
|
// exactly once in `Drop`. Only `Send` (not `Sync`) is asserted, matching the single-thread use.
|
||||||
unsafe impl Send for ExternalDmabuf {}
|
unsafe impl Send for ExternalDmabuf {}
|
||||||
|
|
||||||
impl ExternalDmabuf {
|
impl ExternalDmabuf {
|
||||||
@@ -921,6 +1025,9 @@ impl ExternalDmabuf {
|
|||||||
/// from then on) and map its full `size` bytes to a device pointer. The shared context
|
/// from then on) and map its full `size` bytes to a device pointer. The shared context
|
||||||
/// must be current.
|
/// must be current.
|
||||||
pub fn import(fd: i32, size: u64) -> Result<ExternalDmabuf> {
|
pub fn import(fd: i32, size: u64) -> Result<ExternalDmabuf> {
|
||||||
|
// SAFETY: `libc::dup` only reads the integer `fd` and returns a new descriptor (or -1); it
|
||||||
|
// touches no Rust memory and `fd` is the caller's still-owned dmabuf fd (not consumed
|
||||||
|
// here). No aliasing or lifetime concern — a pure syscall on an integer.
|
||||||
let dup = unsafe { libc::dup(fd) };
|
let dup = unsafe { libc::dup(fd) };
|
||||||
if dup < 0 {
|
if dup < 0 {
|
||||||
bail!("dup(dmabuf fd) failed");
|
bail!("dup(dmabuf fd) failed");
|
||||||
@@ -938,8 +1045,17 @@ impl ExternalDmabuf {
|
|||||||
};
|
};
|
||||||
desc.handle[0] = dup as u32 as u64; // union member `int fd` (little-endian low bytes)
|
desc.handle[0] = dup as u32 as u64; // union member `int fd` (little-endian low bytes)
|
||||||
let mut ext: CUexternalMemory = std::ptr::null_mut();
|
let mut ext: CUexternalMemory = std::ptr::null_mut();
|
||||||
|
// SAFETY: `cuImportExternalMemory` imports the memory described by `&desc`, a live local
|
||||||
|
// `#[repr(C)] CUDA_EXTERNAL_MEMORY_HANDLE_DESC` (cuda.h 64-bit layout) that outlives this
|
||||||
|
// synchronous call: `type_` is OPAQUE_FD, `handle[0]` holds the dup'd fd in the union's
|
||||||
|
// `int fd` low bytes, `size` is set. `&mut ext` is a live null-init out-param the driver
|
||||||
|
// writes the imported handle into. The driver takes ownership of the fd only on success.
|
||||||
|
// Distinct locals → no aliasing. Wrapper → live table (caller holds the context current).
|
||||||
let r = unsafe { cuImportExternalMemory(&mut ext, &desc) };
|
let r = unsafe { cuImportExternalMemory(&mut ext, &desc) };
|
||||||
if r != 0 {
|
if r != 0 {
|
||||||
|
// SAFETY: import failed (`r != 0`), so the driver did NOT take ownership of `dup`; we
|
||||||
|
// still own it and close it exactly once here on the error path (the success path never
|
||||||
|
// closes it — the driver does). `libc::close` acts on the integer fd alone.
|
||||||
unsafe { libc::close(dup) }; // import failed → the driver did not take the fd
|
unsafe { libc::close(dup) }; // import failed → the driver did not take the fd
|
||||||
bail!("cuImportExternalMemory failed ({r}) — LINEAR dmabuf import unsupported?");
|
bail!("cuImportExternalMemory failed ({r}) — LINEAR dmabuf import unsupported?");
|
||||||
}
|
}
|
||||||
@@ -949,8 +1065,17 @@ impl ExternalDmabuf {
|
|||||||
..Default::default()
|
..Default::default()
|
||||||
};
|
};
|
||||||
let mut ptr: CUdeviceptr = 0;
|
let mut ptr: CUdeviceptr = 0;
|
||||||
|
// SAFETY: maps a device pointer from `ext` (the valid `CUexternalMemory` just imported) per
|
||||||
|
// `&buf`, a live local `CUDA_EXTERNAL_MEMORY_BUFFER_DESC` (offset 0, full `size`) that
|
||||||
|
// outlives this synchronous call. `&mut ptr` is a live zero-init out-param the driver writes
|
||||||
|
// the mapped device address into; distinct locals → no aliasing. Wrapper → live table
|
||||||
|
// (context current).
|
||||||
let r = unsafe { cuExternalMemoryGetMappedBuffer(&mut ptr, ext, &buf) };
|
let r = unsafe { cuExternalMemoryGetMappedBuffer(&mut ptr, ext, &buf) };
|
||||||
if r != 0 {
|
if r != 0 {
|
||||||
|
// SAFETY: mapping failed; `ext` is the valid `CUexternalMemory` we imported and
|
||||||
|
// exclusively own. We destroy it exactly once here on the error path (the success path
|
||||||
|
// instead moves it into the returned `ExternalDmabuf`, whose `Drop` destroys it),
|
||||||
|
// releasing the fd the driver took — no double-destroy or use-after-free.
|
||||||
unsafe {
|
unsafe {
|
||||||
let _ = cuDestroyExternalMemory(ext);
|
let _ = cuDestroyExternalMemory(ext);
|
||||||
}
|
}
|
||||||
@@ -962,6 +1087,12 @@ impl ExternalDmabuf {
|
|||||||
|
|
||||||
impl Drop for ExternalDmabuf {
|
impl Drop for ExternalDmabuf {
|
||||||
fn drop(&mut self) {
|
fn drop(&mut self) {
|
||||||
|
// SAFETY: this `ExternalDmabuf` only exists after a successful import, so the driver table
|
||||||
|
// is live. It exclusively owns `self.ptr` (the mapped buffer) and `self.ext` (the external
|
||||||
|
// memory), each torn down exactly once here (drop runs once; guarded by `!= 0` / `!null`) —
|
||||||
|
// no double-free or use-after-free. We make the shared context current first because drop
|
||||||
|
// may run off the import thread, and we free the mapped buffer before destroying its
|
||||||
|
// backing external memory. Results ignored (best-effort teardown).
|
||||||
unsafe {
|
unsafe {
|
||||||
if let Some(c) = CONTEXT.get() {
|
if let Some(c) = CONTEXT.get() {
|
||||||
let _ = cuCtxSetCurrent(c.0);
|
let _ = cuCtxSetCurrent(c.0);
|
||||||
@@ -996,5 +1127,10 @@ pub fn copy_pitched_to_buffer(
|
|||||||
};
|
};
|
||||||
// copy_blocking syncs our priority stream before returning, so the copy is complete before the
|
// copy_blocking syncs our priority stream before returning, so the copy is complete before the
|
||||||
// dmabuf is requeued to the producer.
|
// dmabuf is requeued to the producer.
|
||||||
|
// SAFETY: `copy_blocking` is unsafe (issues a CUDA copy); the caller must have the shared
|
||||||
|
// context current (documented). `©` is a live local device→device `CUDA_MEMCPY2D` outliving
|
||||||
|
// the synchronous call: `srcDevice`/`srcPitch` are the caller's live mapped span (e.g. an
|
||||||
|
// `ExternalDmabuf`), `dstDevice`/`dstPitch` are `dst`'s live allocation, `width*4`×`height`
|
||||||
|
// within both. Wrapper → live table.
|
||||||
unsafe { copy_blocking(©, "cuMemcpy2DAsync_v2(ext->dev)") }
|
unsafe { copy_blocking(©, "cuMemcpy2DAsync_v2(ext->dev)") }
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -12,6 +12,8 @@
|
|||||||
//! owned [`DeviceBuffer`] so the dmabuf can be returned to the compositor immediately.
|
//! owned [`DeviceBuffer`] so the dmabuf can be returned to the compositor immediately.
|
||||||
|
|
||||||
#![allow(non_upper_case_globals)]
|
#![allow(non_upper_case_globals)]
|
||||||
|
// Every `unsafe` block in this file carries a `// SAFETY:` proof; enforce it (unsafe-proof program).
|
||||||
|
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||||
|
|
||||||
use super::cuda::{self, DeviceBuffer};
|
use super::cuda::{self, DeviceBuffer};
|
||||||
use anyhow::{bail, ensure, Context as _, Result};
|
use anyhow::{bail, ensure, Context as _, Result};
|
||||||
@@ -415,6 +417,14 @@ impl Nv12Blit {
|
|||||||
|
|
||||||
impl Drop for Nv12Blit {
|
impl Drop for Nv12Blit {
|
||||||
fn drop(&mut self) {
|
fn drop(&mut self) {
|
||||||
|
// SAFETY: these GL names (textures/FBOs/VAO/programs) were all created by THIS `Nv12Blit`
|
||||||
|
// in `Nv12Blit::new` on the current GL context, which is still current because the owning
|
||||||
|
// `EglImporter` is dropped on its single capture thread (fields drop before
|
||||||
|
// `EglImporter::drop`, which never releases the context). `glDelete*` takes a count + a
|
||||||
|
// pointer to that many names: `&self.y_tex`/`&self.vao` are `&u32` to one live field (n=1);
|
||||||
|
// `[self.y_fbo, self.uv_fbo].as_ptr()` points at a 2-element temporary that lives for the
|
||||||
|
// whole `glDeleteFramebuffers` call (n=2 matches). The symbols dispatch through libGL
|
||||||
|
// (libglvnd) to the driver for the current context. Each name is deleted exactly once.
|
||||||
unsafe {
|
unsafe {
|
||||||
glDeleteTextures(1, &self.y_tex);
|
glDeleteTextures(1, &self.y_tex);
|
||||||
glDeleteTextures(1, &self.uv_tex);
|
glDeleteTextures(1, &self.uv_tex);
|
||||||
@@ -459,7 +469,14 @@ pub struct EglImporter {
|
|||||||
render_fd: c_int,
|
render_fd: c_int,
|
||||||
}
|
}
|
||||||
|
|
||||||
// The EGL handles are confined to the capture thread; the struct is moved there once.
|
// SAFETY: `EglImporter` owns thread-affine handles — an EGLDisplay/contexts made current on one
|
||||||
|
// thread, a loaded GL proc pointer, a `gbm_device*`, a raw fd, and CUDA-registered GL textures —
|
||||||
|
// none safe to touch concurrently. It is constructed inside `pipewire_thread` on the dedicated
|
||||||
|
// `punktfunk-pipewire` thread, and every method (`import*`, `supported_modifiers`, `Drop`) runs on
|
||||||
|
// that same thread; it is never accessed through a shared `&` from another thread. `Send` asserts
|
||||||
|
// only that transferring *ownership* is sound (needed so the importer can live in the PipeWire
|
||||||
|
// stream's user-data, whose API imposes a `Send` bound) — the live handles are never used
|
||||||
|
// off-thread. `Sync` is deliberately NOT implied.
|
||||||
unsafe impl Send for EglImporter {}
|
unsafe impl Send for EglImporter {}
|
||||||
|
|
||||||
impl EglImporter {
|
impl EglImporter {
|
||||||
@@ -470,16 +487,38 @@ impl EglImporter {
|
|||||||
// to the same DRM device CUDA-GL interop associates with, which the EGL device platform
|
// to the same DRM device CUDA-GL interop associates with, which the EGL device platform
|
||||||
// did not (cuGraphicsGLRegisterImage rejected device-platform GL textures).
|
// did not (cuGraphicsGLRegisterImage rejected device-platform GL textures).
|
||||||
let path = std::ffi::CString::new("/dev/dri/renderD128").unwrap();
|
let path = std::ffi::CString::new("/dev/dri/renderD128").unwrap();
|
||||||
|
// SAFETY: `path` is a live local `CString` (built from a string with no interior NUL, so it
|
||||||
|
// is NUL-terminated); `path.as_ptr()` is a valid pointer to that buffer which outlives this
|
||||||
|
// synchronous `open`. `open` only reads the path and returns a new fd (or -1); it neither
|
||||||
|
// retains the pointer nor writes through it, so there is no aliasing or lifetime hazard.
|
||||||
let render_fd = unsafe { libc::open(path.as_ptr(), libc::O_RDWR | libc::O_CLOEXEC) };
|
let render_fd = unsafe { libc::open(path.as_ptr(), libc::O_RDWR | libc::O_CLOEXEC) };
|
||||||
ensure!(render_fd >= 0, "open /dev/dri/renderD128 for GBM");
|
ensure!(render_fd >= 0, "open /dev/dri/renderD128 for GBM");
|
||||||
|
// SAFETY: `render_fd` is the live DRM render-node fd just returned by `open` and checked
|
||||||
|
// `>= 0`. `gbm_create_device` (libgbm, linked above) builds a `gbm_device` over that fd and
|
||||||
|
// returns a `*mut gbm_device` (or null); it borrows but does not take ownership of the fd,
|
||||||
|
// which `EglImporter` keeps open and closes only in `Drop` after `gbm_device_destroy`. No
|
||||||
|
// Rust-owned memory is passed, so there is nothing to alias.
|
||||||
let gbm = unsafe { gbm_create_device(render_fd) };
|
let gbm = unsafe { gbm_create_device(render_fd) };
|
||||||
if gbm.is_null() {
|
if gbm.is_null() {
|
||||||
|
// SAFETY: reached only when `gbm_create_device` failed (null) — the fd was not consumed
|
||||||
|
// and no `EglImporter` exists yet to close it again, so this `close` runs exactly once on
|
||||||
|
// the live `render_fd`, releasing it before the error return. No double-close.
|
||||||
unsafe { libc::close(render_fd) };
|
unsafe { libc::close(render_fd) };
|
||||||
anyhow::bail!("gbm_create_device failed");
|
anyhow::bail!("gbm_create_device failed");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SAFETY: `Egl::load_required` dlopens the system libEGL and binds its entry points,
|
||||||
|
// trusting that libEGL (libglvnd) is a genuine EGL 1.5 implementation whose core symbols
|
||||||
|
// match the ABI the `khronos_egl` `EGL1_5` bindings declare. No Rust memory is passed; the
|
||||||
|
// returned instance is afterwards used only through the safe `khronos_egl` wrappers.
|
||||||
let egl: Egl =
|
let egl: Egl =
|
||||||
unsafe { Egl::load_required() }.context("load libEGL (EGL 1.5 dynamic instance)")?;
|
unsafe { Egl::load_required() }.context("load libEGL (EGL 1.5 dynamic instance)")?;
|
||||||
|
// SAFETY: `gbm` is the non-null `gbm_device*` created just above (checked), and
|
||||||
|
// `EGL_PLATFORM_GBM_KHR` is exactly the platform enum that pairs with a GBM device as the
|
||||||
|
// native-display handle, so the `gbm as NativeDisplayType` cast hands EGL a valid native
|
||||||
|
// display for the requested platform. `&[egl::ATTRIB_NONE]` is a properly terminated, empty
|
||||||
|
// attribute array borrowed for this synchronous call; EGL only reads it and returns an
|
||||||
|
// `EGLDisplay`, retaining no pointer into Rust memory.
|
||||||
let display = unsafe {
|
let display = unsafe {
|
||||||
egl.get_platform_display(
|
egl.get_platform_display(
|
||||||
EGL_PLATFORM_GBM_KHR,
|
EGL_PLATFORM_GBM_KHR,
|
||||||
@@ -533,6 +572,13 @@ impl EglImporter {
|
|||||||
.context("eglCreateContext(OpenGL)")?;
|
.context("eglCreateContext(OpenGL)")?;
|
||||||
egl.make_current(display, None, None, Some(gl_ctx))
|
egl.make_current(display, None, None, Some(gl_ctx))
|
||||||
.context("eglMakeCurrent surfaceless (needs EGL_KHR_surfaceless_context)")?;
|
.context("eglMakeCurrent surfaceless (needs EGL_KHR_surfaceless_context)")?;
|
||||||
|
// SAFETY: the GL context was made current on this thread just above, which `eglGetProcAddress`
|
||||||
|
// requires to return a usable pointer. The non-null (`?`-checked) pointer it returns for
|
||||||
|
// "glEGLImageTargetTexture2DOES" is the driver's implementation of that GL-OES entry point,
|
||||||
|
// whose real ABI is `void(GLenum, GLeglImageOES)` = `(u32, *mut c_void)` `extern "system"`.
|
||||||
|
// `EglImageTargetFn` is declared with exactly that signature, so the transmute only retypes a
|
||||||
|
// same-size, same-ABI thin function pointer (no value/representation change). The function is
|
||||||
|
// present because `EGL_EXT_image_dma_buf_import` was asserted on this display above.
|
||||||
let egl_image_target: EglImageTargetFn = unsafe {
|
let egl_image_target: EglImageTargetFn = unsafe {
|
||||||
std::mem::transmute(
|
std::mem::transmute(
|
||||||
egl.get_proc_address("glEGLImageTargetTexture2DOES")
|
egl.get_proc_address("glEGLImageTargetTexture2DOES")
|
||||||
@@ -543,6 +589,10 @@ impl EglImporter {
|
|||||||
// Create the shared CUDA context up front so import() is pure hot path.
|
// Create the shared CUDA context up front so import() is pure hot path.
|
||||||
cuda::context().context("create CUDA context")?;
|
cuda::context().context("create CUDA context")?;
|
||||||
|
|
||||||
|
// SAFETY: `egl::NO_CONTEXT` is EGL's defined sentinel (a null handle) for "no context";
|
||||||
|
// `Context::from_ptr` only stores the handle (it never dereferences it), so wrapping the
|
||||||
|
// null sentinel is sound and yields exactly the `EGL_NO_CONTEXT` value that
|
||||||
|
// `eglCreateImage(EGL_LINUX_DMA_BUF_EXT)` requires as its context argument later.
|
||||||
let no_ctx = unsafe { egl::Context::from_ptr(egl::NO_CONTEXT) };
|
let no_ctx = unsafe { egl::Context::from_ptr(egl::NO_CONTEXT) };
|
||||||
tracing::info!(
|
tracing::info!(
|
||||||
"zero-copy EGL importer ready (GBM platform + GL texture interop, dma_buf_import + modifiers)"
|
"zero-copy EGL importer ready (GBM platform + GL texture interop, dma_buf_import + modifiers)"
|
||||||
@@ -602,8 +652,21 @@ impl EglImporter {
|
|||||||
let Some(sym) = self.egl.get_proc_address("eglQueryDmaBufModifiersEXT") else {
|
let Some(sym) = self.egl.get_proc_address("eglQueryDmaBufModifiersEXT") else {
|
||||||
return Vec::new();
|
return Vec::new();
|
||||||
};
|
};
|
||||||
|
// SAFETY: `sym` is the non-null pointer `eglGetProcAddress("eglQueryDmaBufModifiersEXT")`
|
||||||
|
// returned (the `let-else` already bailed on `None`) — the driver's implementation of that
|
||||||
|
// EGL extension entry point. `QueryFn` is declared with that function's exact documented ABI
|
||||||
|
// (`EGLDisplay, EGLint, EGLint, EGLuint64* , EGLBoolean*, EGLint* -> EGLBoolean`), all
|
||||||
|
// `extern "system"`, so the transmute only retypes a same-size, same-ABI thin fn pointer.
|
||||||
let query: QueryFn = unsafe { std::mem::transmute(sym) };
|
let query: QueryFn = unsafe { std::mem::transmute(sym) };
|
||||||
let dpy = self.display.as_ptr();
|
let dpy = self.display.as_ptr();
|
||||||
|
// SAFETY: `dpy` is this importer's live, initialized `EGLDisplay`; `query` is the proc loaded
|
||||||
|
// just above. The first call passes null out-arrays with `max_modifiers == 0`, which the
|
||||||
|
// extension defines as "write only the count" — it writes solely through `&mut count` (a live
|
||||||
|
// local `i32`). For the second call, `mods`/`ext` are freshly allocated `Vec`s of exactly
|
||||||
|
// `count` elements and `max_modifiers == count`, so the driver writes at most `count`
|
||||||
|
// `u64`/`u32` entries (in bounds) plus the actual count through `&mut n` (a live local). All
|
||||||
|
// four Rust addresses outlive these synchronous calls and alias nothing else. `truncate` only
|
||||||
|
// shrinks, so even a misbehaving `n > count` cannot read out of bounds.
|
||||||
unsafe {
|
unsafe {
|
||||||
let mut count: i32 = 0;
|
let mut count: i32 = 0;
|
||||||
if query(
|
if query(
|
||||||
@@ -699,6 +762,10 @@ impl EglImporter {
|
|||||||
]);
|
]);
|
||||||
}
|
}
|
||||||
attrs.push(egl::ATTRIB_NONE);
|
attrs.push(egl::ATTRIB_NONE);
|
||||||
|
// SAFETY: `eglCreateImage(EGL_LINUX_DMA_BUF_EXT, ...)` mandates a NULL `EGLClientBuffer`
|
||||||
|
// (the source is described entirely by the attribute list built above), so wrapping
|
||||||
|
// `null_mut()` is the required value. `from_ptr` only stores the pointer without
|
||||||
|
// dereferencing it, so constructing it from null is sound.
|
||||||
let client = unsafe { egl::ClientBuffer::from_ptr(std::ptr::null_mut()) };
|
let client = unsafe { egl::ClientBuffer::from_ptr(std::ptr::null_mut()) };
|
||||||
let image = self
|
let image = self
|
||||||
.egl
|
.egl
|
||||||
@@ -733,11 +800,21 @@ impl EglImporter {
|
|||||||
) -> Result<DeviceBuffer> {
|
) -> Result<DeviceBuffer> {
|
||||||
cuda::make_current()?;
|
cuda::make_current()?;
|
||||||
if self.blit.as_ref().map(|b| (b.width, b.height)) != Some((width, height)) {
|
if self.blit.as_ref().map(|b| (b.width, b.height)) != Some((width, height)) {
|
||||||
|
// SAFETY: `GlBlit::new` requires the GL context current on the calling thread and a
|
||||||
|
// current CUDA context. Both hold: this runs on the capture thread where
|
||||||
|
// `EglImporter::new` made the GL context current and never released it, and
|
||||||
|
// `cuda::make_current()?` ran at the top of this function. `width`/`height` are plain
|
||||||
|
// `Copy` frame dimensions.
|
||||||
self.blit = Some(unsafe { GlBlit::new(width, height)? });
|
self.blit = Some(unsafe { GlBlit::new(width, height)? });
|
||||||
}
|
}
|
||||||
let egl_image_target = self.egl_image_target;
|
let egl_image_target = self.egl_image_target;
|
||||||
let blit = self.blit.as_mut().unwrap();
|
let blit = self.blit.as_mut().unwrap();
|
||||||
// SAFETY: GL + CUDA contexts current on this thread; `image` is a valid EGLImage.
|
// SAFETY: `GlBlit::run` requires a current GL context and a valid `EGLImage`. The GL context
|
||||||
|
// is current on this capture thread (made current in `EglImporter::new`, never released) and
|
||||||
|
// `cuda::make_current()` ran above; `egl_image_target` is the `glEGLImageTargetTexture2DOES`
|
||||||
|
// pointer loaded in `new`; `image` is the raw handle of the live `EGLImage` that
|
||||||
|
// `import_inner` created with `eglCreateImage` and destroys only AFTER this call returns, so
|
||||||
|
// it stays valid for the whole synchronous `run`.
|
||||||
unsafe { blit.run(egl_image_target, image)? };
|
unsafe { blit.run(egl_image_target, image)? };
|
||||||
// Persistent registration (mapped per frame) + a pooled buffer — no per-frame
|
// Persistent registration (mapped per frame) + a pooled buffer — no per-frame
|
||||||
// cuGraphicsGLRegisterImage / cuMemAllocPitch.
|
// cuGraphicsGLRegisterImage / cuMemAllocPitch.
|
||||||
@@ -757,11 +834,21 @@ impl EglImporter {
|
|||||||
) -> Result<DeviceBuffer> {
|
) -> Result<DeviceBuffer> {
|
||||||
cuda::make_current()?;
|
cuda::make_current()?;
|
||||||
if self.nv12_blit.as_ref().map(|b| (b.width, b.height)) != Some((width, height)) {
|
if self.nv12_blit.as_ref().map(|b| (b.width, b.height)) != Some((width, height)) {
|
||||||
|
// SAFETY: `Nv12Blit::new` requires the GL context current on the calling thread and a
|
||||||
|
// current CUDA context. Both hold: this runs on the capture thread where
|
||||||
|
// `EglImporter::new` made the GL context current and never released it, and
|
||||||
|
// `cuda::make_current()?` ran at the top of this function. `width`/`height` are plain
|
||||||
|
// `Copy` frame dimensions.
|
||||||
self.nv12_blit = Some(unsafe { Nv12Blit::new(width, height)? });
|
self.nv12_blit = Some(unsafe { Nv12Blit::new(width, height)? });
|
||||||
}
|
}
|
||||||
let egl_image_target = self.egl_image_target;
|
let egl_image_target = self.egl_image_target;
|
||||||
let blit = self.nv12_blit.as_mut().unwrap();
|
let blit = self.nv12_blit.as_mut().unwrap();
|
||||||
// SAFETY: GL + CUDA contexts current on this thread; `image` is a valid EGLImage.
|
// SAFETY: `Nv12Blit::run` requires a current GL context and a valid `EGLImage`. The GL
|
||||||
|
// context is current on this capture thread (made current in `EglImporter::new`, never
|
||||||
|
// released) and `cuda::make_current()` ran above; `egl_image_target` is the
|
||||||
|
// `glEGLImageTargetTexture2DOES` pointer loaded in `new`; `image` is the raw handle of the
|
||||||
|
// live `EGLImage` that `import_inner` created with `eglCreateImage` and destroys only AFTER
|
||||||
|
// this call returns, so it stays valid for the whole synchronous `run`.
|
||||||
unsafe { blit.run(egl_image_target, image)? };
|
unsafe { blit.run(egl_image_target, image)? };
|
||||||
let dst = blit.pool.get()?;
|
let dst = blit.pool.get()?;
|
||||||
cuda::copy_mapped_nv12(&mut blit.y_registered, &mut blit.uv_registered, &dst)?;
|
cuda::copy_mapped_nv12(&mut blit.y_registered, &mut blit.uv_registered, &dst)?;
|
||||||
@@ -787,9 +874,22 @@ impl EglImporter {
|
|||||||
);
|
);
|
||||||
cuda::make_current()?;
|
cuda::make_current()?;
|
||||||
if self.nv12_blit.as_ref().map(|b| (b.width, b.height)) != Some((width, height)) {
|
if self.nv12_blit.as_ref().map(|b| (b.width, b.height)) != Some((width, height)) {
|
||||||
|
// SAFETY: `Nv12Blit::new` requires the GL context current on the calling thread and a
|
||||||
|
// current CUDA context. Both hold: this self-test path runs on the thread that owns this
|
||||||
|
// `EglImporter` with its GL context current, and `cuda::make_current()?` ran just above.
|
||||||
|
// `width`/`height` are plain `Copy` scalars.
|
||||||
self.nv12_blit = Some(unsafe { Nv12Blit::new(width, height)? });
|
self.nv12_blit = Some(unsafe { Nv12Blit::new(width, height)? });
|
||||||
}
|
}
|
||||||
let blit = self.nv12_blit.as_mut().unwrap();
|
let blit = self.nv12_blit.as_mut().unwrap();
|
||||||
|
// SAFETY: runs on the thread that owns this `EglImporter` with its GL context current.
|
||||||
|
// `blit.src_tex` is a texture this `Nv12Blit` owns; `glTexStorage2D` allocates immutable
|
||||||
|
// RGBA8 storage exactly once (guarded by `test_src_storage`) sized `width×height`.
|
||||||
|
// `glTexSubImage2D` then uploads exactly `width×height` RGBA8 texels, reading `width*height*4`
|
||||||
|
// bytes from `rgba.as_ptr()`; the caller already asserted `rgba.len() == width*height*4`, rows
|
||||||
|
// are `width*4` bytes (a multiple of the default 4-byte unpack alignment, so no row-padding
|
||||||
|
// over-read), and `rgba` is a live borrow that outlives this synchronous upload. `run_passes`
|
||||||
|
// then needs only the current GL context (no further Rust pointers). All GL names are this
|
||||||
|
// blit's own, alias no other live object, and nothing is retained past the calls.
|
||||||
unsafe {
|
unsafe {
|
||||||
// Upload the host RGBA into `src_tex` (an immutable GL_RGBA8 backing must exist first;
|
// Upload the host RGBA into `src_tex` (an immutable GL_RGBA8 backing must exist first;
|
||||||
// the live path never allocates it — it retargets `src_tex` via EGLImage instead).
|
// the live path never allocates it — it retargets `src_tex` via EGLImage instead).
|
||||||
@@ -824,9 +924,16 @@ impl EglImporter {
|
|||||||
impl Drop for EglImporter {
|
impl Drop for EglImporter {
|
||||||
fn drop(&mut self) {
|
fn drop(&mut self) {
|
||||||
if !self.gbm.is_null() {
|
if !self.gbm.is_null() {
|
||||||
|
// SAFETY: `self.gbm` is the non-null `gbm_device*` from `gbm_create_device` in `new`
|
||||||
|
// (checked non-null here), owned exclusively by this `EglImporter` and destroyed exactly
|
||||||
|
// once (in `Drop`). It is freed BEFORE `render_fd` is closed below — the correct order,
|
||||||
|
// since the device borrowed that fd for its lifetime.
|
||||||
unsafe { gbm_device_destroy(self.gbm) };
|
unsafe { gbm_device_destroy(self.gbm) };
|
||||||
}
|
}
|
||||||
if self.render_fd >= 0 {
|
if self.render_fd >= 0 {
|
||||||
|
// SAFETY: `self.render_fd` is the fd `open` returned in `new` (checked `>= 0`), owned
|
||||||
|
// exclusively by this `EglImporter`; this `close` runs exactly once, after the gbm device
|
||||||
|
// that borrowed it has been destroyed. No double-close or use-after-close.
|
||||||
unsafe { libc::close(self.render_fd) };
|
unsafe { libc::close(self.render_fd) };
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -16,6 +16,9 @@
|
|||||||
//! a stream's life). Falls back cleanly: any init/import error disables the importer and the
|
//! a stream's life). Falls back cleanly: any init/import error disables the importer and the
|
||||||
//! CPU mmap path takes over.
|
//! CPU mmap path takes over.
|
||||||
|
|
||||||
|
// Every `unsafe` block in this file carries a `// SAFETY:` proof; enforce it (unsafe-proof program).
|
||||||
|
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||||
|
|
||||||
use super::cuda::{self, DeviceBuffer};
|
use super::cuda::{self, DeviceBuffer};
|
||||||
use anyhow::{anyhow, bail, Context as _, Result};
|
use anyhow::{anyhow, bail, Context as _, Result};
|
||||||
use ash::vk;
|
use ash::vk;
|
||||||
@@ -51,12 +54,27 @@ pub struct VkBridge {
|
|||||||
dst: Option<DstBuf>,
|
dst: Option<DstBuf>,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Confined to the capture thread; moved there once.
|
// SAFETY: `VkBridge` owns ash Vulkan handles (instance/device/queue/command pool+buffer/fence), a
|
||||||
|
// CUDA external-memory mapping, and an fd→buffer cache — none `Sync`, and a single queue +
|
||||||
|
// command buffer must be externally synchronized. It is created inside `EglImporter::import_linear`
|
||||||
|
// on the dedicated `punktfunk-pipewire` capture thread and every method (`import_linear`, `Drop`)
|
||||||
|
// runs on that thread; it is never shared via `&` across threads. `Send` asserts only that
|
||||||
|
// transferring ownership is sound (so the bridge can live inside the `Send` `EglImporter`); the live
|
||||||
|
// handles are never touched off-thread, and `Sync` is deliberately NOT implied.
|
||||||
unsafe impl Send for VkBridge {}
|
unsafe impl Send for VkBridge {}
|
||||||
|
|
||||||
impl VkBridge {
|
impl VkBridge {
|
||||||
/// Bring up Vulkan on the NVIDIA GPU with the external-memory extensions.
|
/// Bring up Vulkan on the NVIDIA GPU with the external-memory extensions.
|
||||||
pub fn new() -> Result<VkBridge> {
|
pub fn new() -> Result<VkBridge> {
|
||||||
|
// SAFETY: standard ash bring-up — every call is `unsafe` only because ash cannot statically
|
||||||
|
// verify Vulkan handle/CreateInfo validity. `ash::Entry::load` dlopens a real system
|
||||||
|
// libvulkan. Each `*CreateInfo`/`AllocateInfo` is built by ash's builders from locals (`app`,
|
||||||
|
// `exts`, `prio`, `qci`, and the inline infos) that all live for the duration of the
|
||||||
|
// synchronous `create_*`/`enumerate_*` call that reads them — in particular the
|
||||||
|
// `enabled_extension_names(&exts)` and `queue_priorities(&prio)` borrows outlive their calls.
|
||||||
|
// Every handle passed (`instance`, `phys`, `device`, `qf`, `cmd_pool`) was just created and
|
||||||
|
// checked via `?`/`ok_or_else` in this same function, so no invalid handle is ever used. This
|
||||||
|
// constructor shares nothing across threads.
|
||||||
unsafe {
|
unsafe {
|
||||||
let entry = ash::Entry::load().context("load libvulkan")?;
|
let entry = ash::Entry::load().context("load libvulkan")?;
|
||||||
let app = vk::ApplicationInfo::default().api_version(vk::API_VERSION_1_1);
|
let app = vk::ApplicationInfo::default().api_version(vk::API_VERSION_1_1);
|
||||||
@@ -294,6 +312,19 @@ impl VkBridge {
|
|||||||
height: u32,
|
height: u32,
|
||||||
pool: &cuda::BufferPool,
|
pool: &cuda::BufferPool,
|
||||||
) -> Result<DeviceBuffer> {
|
) -> Result<DeviceBuffer> {
|
||||||
|
// SAFETY: `fd` is the live dmabuf fd handed in by the caller (borrowed; `import_src` dup's it
|
||||||
|
// internally and Vulkan owns the dup). `libc::lseek` only queries the fd's size. The unsafe
|
||||||
|
// `import_src`/`ensure_dst` are called with a valid fd and a checked size. The bounds are
|
||||||
|
// proven: `import_src` asserts `size >= span` (so the cached `src_size >= span`),
|
||||||
|
// `copy_size = src_size.min(span)`, and `ensure_dst(copy_size)` makes `dst` at least
|
||||||
|
// `copy_size` — so the GPU `cmd_copy_buffer` of `copy_size` bytes reads/writes within both
|
||||||
|
// buffers, and the later CUDA pitched copy reading `[offset, span)` from `dst.cuda.ptr` (=
|
||||||
|
// `offset + stride*height = span <= copy_size`) stays inside the freshly-copied region. The
|
||||||
|
// `*Info`/`region`/`cmds`/`submit` are locals that outlive the synchronous calls reading them.
|
||||||
|
// `cmd`/`queue`/`fence` are this bridge's own handles, used on this single thread only. The
|
||||||
|
// host-side `wait_for_fences` fully retires the Vulkan copy BEFORE CUDA reads the shared
|
||||||
|
// memory, so there is no GPU write/read data race. `dst` is an `&self.dst` shared borrow that
|
||||||
|
// does not alias the `&self.device` calls.
|
||||||
unsafe {
|
unsafe {
|
||||||
let span = offset as u64 + stride as u64 * height as u64;
|
let span = offset as u64 + stride as u64 * height as u64;
|
||||||
if !self.src_cache.contains_key(&fd) {
|
if !self.src_cache.contains_key(&fd) {
|
||||||
@@ -347,6 +378,15 @@ impl VkBridge {
|
|||||||
|
|
||||||
impl Drop for VkBridge {
|
impl Drop for VkBridge {
|
||||||
fn drop(&mut self) {
|
fn drop(&mut self) {
|
||||||
|
// SAFETY: runs once when the bridge is dropped on its owning capture thread.
|
||||||
|
// `device_wait_idle` first drains all in-flight GPU work, so no queued command still
|
||||||
|
// references these objects. Every handle freed (the `src_cache` buffers+memories, the `dst`
|
||||||
|
// buffer+memory, `fence`, `cmd_pool`, `device`, `instance`) was created by this `VkBridge`
|
||||||
|
// and owned exclusively by it, so each `destroy_*`/`free_*` runs exactly once with no
|
||||||
|
// double-free, in dependency order (child objects before `device`, `device` before
|
||||||
|
// `instance`). `dst.cuda` is dropped after `free_memory`, which is safe because CUDA holds
|
||||||
|
// its own dup'd OPAQUE_FD reference to the underlying allocation. No other thread touches
|
||||||
|
// these handles.
|
||||||
unsafe {
|
unsafe {
|
||||||
let _ = self.device.device_wait_idle();
|
let _ = self.device.device_wait_idle();
|
||||||
for (_, s) in self.src_cache.drain() {
|
for (_, s) in self.src_cache.drain() {
|
||||||
|
|||||||
@@ -13,6 +13,10 @@
|
|||||||
|
|
||||||
// Scaffold: trait methods and config paths are defined ahead of their backends.
|
// Scaffold: trait methods and config paths are defined ahead of their backends.
|
||||||
#![allow(dead_code)]
|
#![allow(dead_code)]
|
||||||
|
// Unsafe-proof program: every `unsafe {}` / `unsafe impl` in the crate must carry a `// SAFETY:`
|
||||||
|
// proof of why it is sound. This crate-root deny is the permanent, catch-all gate (it also covers
|
||||||
|
// any future module); individual files keep their own `#![deny(...)]` as belt-and-suspenders.
|
||||||
|
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||||
|
|
||||||
mod audio;
|
mod audio;
|
||||||
mod capture;
|
mod capture;
|
||||||
@@ -31,6 +35,9 @@ mod gamestream;
|
|||||||
mod hdr;
|
mod hdr;
|
||||||
mod inject;
|
mod inject;
|
||||||
#[cfg(target_os = "windows")]
|
#[cfg(target_os = "windows")]
|
||||||
|
#[path = "windows/install.rs"]
|
||||||
|
mod install;
|
||||||
|
#[cfg(target_os = "windows")]
|
||||||
#[path = "windows/interactive.rs"]
|
#[path = "windows/interactive.rs"]
|
||||||
mod interactive;
|
mod interactive;
|
||||||
mod library;
|
mod library;
|
||||||
@@ -46,6 +53,7 @@ mod service;
|
|||||||
mod session_plan;
|
mod session_plan;
|
||||||
mod session_tuning;
|
mod session_tuning;
|
||||||
mod spike;
|
mod spike;
|
||||||
|
mod stats_recorder;
|
||||||
mod vdisplay;
|
mod vdisplay;
|
||||||
#[cfg(target_os = "windows")]
|
#[cfg(target_os = "windows")]
|
||||||
#[path = "windows/wgc_helper.rs"]
|
#[path = "windows/wgc_helper.rs"]
|
||||||
@@ -385,7 +393,7 @@ fn real_main() -> Result<()> {
|
|||||||
}
|
}
|
||||||
// USER-session WGC helper (Windows two-process secure-desktop design): capture the EXISTING
|
// USER-session WGC helper (Windows two-process secure-desktop design): capture the EXISTING
|
||||||
// SudoVDA via WGC + NVENC, stream AUs on stdout to the SYSTEM host. Spawned by the host
|
// SudoVDA via WGC + NVENC, stream AUs on stdout to the SYSTEM host. Spawned by the host
|
||||||
// (CreateProcessAsUser), not run by hand. See docs/windows-secure-desktop.md.
|
// (CreateProcessAsUser), not run by hand. See design/archive/windows-secure-desktop.md.
|
||||||
#[cfg(target_os = "windows")]
|
#[cfg(target_os = "windows")]
|
||||||
Some("wgc-helper") => {
|
Some("wgc-helper") => {
|
||||||
let get = |flag: &str| {
|
let get = |flag: &str| {
|
||||||
@@ -417,6 +425,12 @@ fn real_main() -> Result<()> {
|
|||||||
// that launches the host into the active interactive session.
|
// that launches the host into the active interactive session.
|
||||||
#[cfg(target_os = "windows")]
|
#[cfg(target_os = "windows")]
|
||||||
Some("service") => service::main(&args[1..]),
|
Some("service") => service::main(&args[1..]),
|
||||||
|
// Install-time work the Windows installer delegates to the exe instead of locale-parsed
|
||||||
|
// PowerShell *files* (the ANSI-codepage parse-break root fix; see windows/install.rs).
|
||||||
|
#[cfg(target_os = "windows")]
|
||||||
|
Some("driver") => install::driver_main(&args[1..]),
|
||||||
|
#[cfg(target_os = "windows")]
|
||||||
|
Some("web") => install::web_main(&args[1..]),
|
||||||
Some("-h") | Some("--help") | Some("help") | None => {
|
Some("-h") | Some("--help") | Some("help") | None => {
|
||||||
print_usage();
|
print_usage();
|
||||||
Ok(())
|
Ok(())
|
||||||
@@ -700,7 +714,7 @@ SPIKE OPTIONS:
|
|||||||
|
|
||||||
NOTES:
|
NOTES:
|
||||||
'portal' needs headless Sway + xdg-desktop-portal-wlr running in this session
|
'portal' needs headless Sway + xdg-desktop-portal-wlr running in this session
|
||||||
(see docs/linux-setup.md). 'synthetic' needs no capture session and always runs.
|
(see design/linux-setup.md). 'synthetic' needs no capture session and always runs.
|
||||||
Encoded AUs are written to a playable file AND (unless --no-loopback) fed through a
|
Encoded AUs are written to a playable file AND (unless --no-loopback) fed through a
|
||||||
punktfunk_core host→client loopback that reassembles and byte-verifies each one.
|
punktfunk_core host→client loopback that reassembles and byte-verifies each one.
|
||||||
Both 'serve' and 'punktfunk1-host' advertise the native service over mDNS
|
Both 'serve' and 'punktfunk1-host' advertise the native service over mDNS
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
//! The API is versioned under `/api/v1` and described by an OpenAPI 3.1 document generated
|
//! The API is versioned under `/api/v1` and described by an OpenAPI 3.1 document generated
|
||||||
//! at compile time with `utoipa` — `punktfunk-host openapi` prints it for client codegen, the
|
//! at compile time with `utoipa` — `punktfunk-host openapi` prints it for client codegen, the
|
||||||
//! running server serves it at `/api/v1/openapi.json` plus interactive docs at `/api/docs`,
|
//! running server serves it at `/api/v1/openapi.json` plus interactive docs at `/api/docs`,
|
||||||
//! and a copy is checked in at `docs/api/openapi.json` (a test fails if it drifts, like the
|
//! and a copy is checked in at `api/openapi.json` (a test fails if it drifts, like the
|
||||||
//! cbindgen header).
|
//! cbindgen header).
|
||||||
//!
|
//!
|
||||||
//! Security: binds loopback by default, serves HTTPS with the host's identity cert, and requires
|
//! Security: binds loopback by default, serves HTTPS with the host's identity cert, and requires
|
||||||
@@ -20,6 +20,7 @@ use crate::gamestream::{
|
|||||||
tls::{serve_https, PeerCertFingerprint},
|
tls::{serve_https, PeerCertFingerprint},
|
||||||
AppState, APP_VERSION, AUDIO_PORT, CONTROL_PORT, GFE_VERSION, RTSP_PORT, VIDEO_PORT,
|
AppState, APP_VERSION, AUDIO_PORT, CONTROL_PORT, GFE_VERSION, RTSP_PORT, VIDEO_PORT,
|
||||||
};
|
};
|
||||||
|
use crate::stats_recorder::{Capture, CaptureMeta, StatsStatus};
|
||||||
use anyhow::{Context, Result};
|
use anyhow::{Context, Result};
|
||||||
use axum::{
|
use axum::{
|
||||||
extract::{Path, Request, State},
|
extract::{Path, Request, State},
|
||||||
@@ -66,6 +67,9 @@ struct MgmtState {
|
|||||||
/// Native (punktfunk/1) pairing — shared with the QUIC host when the unified `serve --native`
|
/// Native (punktfunk/1) pairing — shared with the QUIC host when the unified `serve --native`
|
||||||
/// runs it. `None` ⇒ GameStream-only host (the native endpoints report `enabled: false`).
|
/// runs it. `None` ⇒ GameStream-only host (the native endpoints report `enabled: false`).
|
||||||
native: Option<Arc<crate::native_pairing::NativePairing>>,
|
native: Option<Arc<crate::native_pairing::NativePairing>>,
|
||||||
|
/// Shared streaming-stats recorder — the same handle the streaming loops emit into, so an
|
||||||
|
/// operator can arm/stop a capture here and review/list/delete saved recordings.
|
||||||
|
stats: Arc<crate::stats_recorder::StatsRecorder>,
|
||||||
token: Option<String>,
|
token: Option<String>,
|
||||||
/// The port we serve on, echoed in [`PortMap`] so a client can persist a full endpoint map.
|
/// The port we serve on, echoed in [`PortMap`] so a client can persist a full endpoint map.
|
||||||
port: u16,
|
port: u16,
|
||||||
@@ -77,6 +81,7 @@ pub async fn run(
|
|||||||
state: Arc<AppState>,
|
state: Arc<AppState>,
|
||||||
opts: Options,
|
opts: Options,
|
||||||
native: Option<Arc<crate::native_pairing::NativePairing>>,
|
native: Option<Arc<crate::native_pairing::NativePairing>>,
|
||||||
|
stats: Arc<crate::stats_recorder::StatsRecorder>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
// The mgmt API is HTTPS + token-authenticated ALWAYS (even on loopback): `parse_serve`
|
// The mgmt API is HTTPS + token-authenticated ALWAYS (even on loopback): `parse_serve`
|
||||||
// guarantees a token (CLI flag / env / persisted ~/.config/punktfunk/mgmt-token / generated).
|
// guarantees a token (CLI flag / env / persisted ~/.config/punktfunk/mgmt-token / generated).
|
||||||
@@ -100,7 +105,7 @@ pub async fn run(
|
|||||||
auth = "mTLS (paired cert) or bearer (required)",
|
auth = "mTLS (paired cert) or bearer (required)",
|
||||||
"management API listening over HTTPS (docs at /api/docs, spec at /api/v1/openapi.json)"
|
"management API listening over HTTPS (docs at /api/docs, spec at /api/v1/openapi.json)"
|
||||||
);
|
);
|
||||||
let app = app(state, Some(token), opts.bind.port(), native);
|
let app = app(state, Some(token), opts.bind.port(), native, stats);
|
||||||
serve_https(opts.bind, app, tls).await
|
serve_https(opts.bind, app, tls).await
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -110,10 +115,12 @@ fn app(
|
|||||||
token: Option<String>,
|
token: Option<String>,
|
||||||
port: u16,
|
port: u16,
|
||||||
native: Option<Arc<crate::native_pairing::NativePairing>>,
|
native: Option<Arc<crate::native_pairing::NativePairing>>,
|
||||||
|
stats: Arc<crate::stats_recorder::StatsRecorder>,
|
||||||
) -> Router {
|
) -> Router {
|
||||||
let shared = Arc::new(MgmtState {
|
let shared = Arc::new(MgmtState {
|
||||||
app: state,
|
app: state,
|
||||||
native,
|
native,
|
||||||
|
stats,
|
||||||
token,
|
token,
|
||||||
port,
|
port,
|
||||||
});
|
});
|
||||||
@@ -158,13 +165,19 @@ fn api_router_parts() -> (Router<Arc<MgmtState>>, utoipa::openapi::OpenApi) {
|
|||||||
.routes(routes!(request_idr))
|
.routes(routes!(request_idr))
|
||||||
.routes(routes!(get_library))
|
.routes(routes!(get_library))
|
||||||
.routes(routes!(create_custom_game))
|
.routes(routes!(create_custom_game))
|
||||||
.routes(routes!(update_custom_game, delete_custom_game)),
|
.routes(routes!(update_custom_game, delete_custom_game))
|
||||||
|
.routes(routes!(stats_capture_start))
|
||||||
|
.routes(routes!(stats_capture_stop))
|
||||||
|
.routes(routes!(stats_capture_status))
|
||||||
|
.routes(routes!(stats_capture_live))
|
||||||
|
.routes(routes!(stats_recordings_list))
|
||||||
|
.routes(routes!(stats_recording_get, stats_recording_delete)),
|
||||||
)
|
)
|
||||||
.split_for_parts()
|
.split_for_parts()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The OpenAPI document as pretty JSON — what `punktfunk-host openapi` prints and what is
|
/// The OpenAPI document as pretty JSON — what `punktfunk-host openapi` prints and what is
|
||||||
/// checked in at `docs/api/openapi.json` for client codegen.
|
/// checked in at `api/openapi.json` for client codegen.
|
||||||
pub fn openapi_json() -> String {
|
pub fn openapi_json() -> String {
|
||||||
let (_, api) = api_router_parts();
|
let (_, api) = api_router_parts();
|
||||||
let mut json = api.to_pretty_json().expect("serialize OpenAPI document");
|
let mut json = api.to_pretty_json().expect("serialize OpenAPI document");
|
||||||
@@ -190,6 +203,7 @@ pub fn openapi_json() -> String {
|
|||||||
(name = "native", description = "Native punktfunk/1 pairing: arm a window, display the host PIN, manage paired devices"),
|
(name = "native", description = "Native punktfunk/1 pairing: arm a window, display the host PIN, manage paired devices"),
|
||||||
(name = "session", description = "Active streaming session control"),
|
(name = "session", description = "Active streaming session control"),
|
||||||
(name = "library", description = "Game library: installed-store titles (Steam) plus user-curated custom entries"),
|
(name = "library", description = "Game library: installed-store titles (Steam) plus user-curated custom entries"),
|
||||||
|
(name = "stats", description = "Streaming performance-stats capture: arm/stop a recording, read the live + saved time-series for graphing"),
|
||||||
)
|
)
|
||||||
)]
|
)]
|
||||||
struct ApiDoc;
|
struct ApiDoc;
|
||||||
@@ -1218,6 +1232,185 @@ async fn delete_custom_game(Path(id): Path<String>) -> Response {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------------------
|
||||||
|
// Streaming stats capture (design/stats-capture-plan.md §2)
|
||||||
|
// ---------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/// Start a stats capture
|
||||||
|
///
|
||||||
|
/// Arms a new performance-stats capture. Idempotent: if a capture is already running this returns
|
||||||
|
/// the current status unchanged. While armed, the streaming loops emit aggregated samples (~ every
|
||||||
|
/// 1–2 s) into the in-progress capture, readable live via `GET /stats/capture/live`.
|
||||||
|
#[utoipa::path(
|
||||||
|
post,
|
||||||
|
path = "/stats/capture/start",
|
||||||
|
tag = "stats",
|
||||||
|
operation_id = "statsCaptureStart",
|
||||||
|
responses(
|
||||||
|
(status = OK, description = "Capture armed (or already running)", body = StatsStatus),
|
||||||
|
(status = UNAUTHORIZED, description = "Missing or invalid bearer token", body = ApiError),
|
||||||
|
)
|
||||||
|
)]
|
||||||
|
async fn stats_capture_start(State(st): State<Arc<MgmtState>>) -> Json<StatsStatus> {
|
||||||
|
let status = st.stats.start();
|
||||||
|
tracing::info!(
|
||||||
|
started_unix_ms = status.started_unix_ms,
|
||||||
|
"management API: stats capture armed"
|
||||||
|
);
|
||||||
|
Json(status)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Stop the stats capture
|
||||||
|
///
|
||||||
|
/// Disarms the in-progress capture and writes it to disk atomically, returning its summary. If
|
||||||
|
/// nothing was recording, returns `204 No Content`.
|
||||||
|
#[utoipa::path(
|
||||||
|
post,
|
||||||
|
path = "/stats/capture/stop",
|
||||||
|
tag = "stats",
|
||||||
|
operation_id = "statsCaptureStop",
|
||||||
|
responses(
|
||||||
|
(status = OK, description = "Capture stopped and saved", body = CaptureMeta),
|
||||||
|
(status = NO_CONTENT, description = "Nothing was recording"),
|
||||||
|
(status = INTERNAL_SERVER_ERROR, description = "Could not write the recording to disk", body = ApiError),
|
||||||
|
(status = UNAUTHORIZED, description = "Missing or invalid bearer token", body = ApiError),
|
||||||
|
)
|
||||||
|
)]
|
||||||
|
async fn stats_capture_stop(State(st): State<Arc<MgmtState>>) -> Response {
|
||||||
|
match st.stats.stop() {
|
||||||
|
Ok(Some(meta)) => {
|
||||||
|
tracing::info!(id = %meta.id, samples = meta.sample_count, "management API: stats capture saved");
|
||||||
|
(StatusCode::OK, Json(meta)).into_response()
|
||||||
|
}
|
||||||
|
Ok(None) => StatusCode::NO_CONTENT.into_response(),
|
||||||
|
Err(e) => api_error(
|
||||||
|
StatusCode::INTERNAL_SERVER_ERROR,
|
||||||
|
&format!("could not save capture: {e}"),
|
||||||
|
),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Stats capture status
|
||||||
|
///
|
||||||
|
/// Whether a capture is armed, its sample count, and start time. Poll this (e.g. every 2 s) to
|
||||||
|
/// drive the capture-control UI.
|
||||||
|
#[utoipa::path(
|
||||||
|
get,
|
||||||
|
path = "/stats/capture/status",
|
||||||
|
tag = "stats",
|
||||||
|
operation_id = "statsCaptureStatus",
|
||||||
|
responses(
|
||||||
|
(status = OK, description = "In-progress capture status (idle when not armed)", body = StatsStatus),
|
||||||
|
(status = UNAUTHORIZED, description = "Missing or invalid bearer token", body = ApiError),
|
||||||
|
)
|
||||||
|
)]
|
||||||
|
async fn stats_capture_status(State(st): State<Arc<MgmtState>>) -> Json<StatsStatus> {
|
||||||
|
Json(st.stats.status())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Live in-progress capture
|
||||||
|
///
|
||||||
|
/// The full sample time-series of the capture currently recording, for live graphing. `404` when
|
||||||
|
/// nothing is armed.
|
||||||
|
#[utoipa::path(
|
||||||
|
get,
|
||||||
|
path = "/stats/capture/live",
|
||||||
|
tag = "stats",
|
||||||
|
operation_id = "statsCaptureLive",
|
||||||
|
responses(
|
||||||
|
(status = OK, description = "The in-progress capture (meta + samples so far)", body = Capture),
|
||||||
|
(status = NOT_FOUND, description = "No capture is currently recording", body = ApiError),
|
||||||
|
(status = UNAUTHORIZED, description = "Missing or invalid bearer token", body = ApiError),
|
||||||
|
)
|
||||||
|
)]
|
||||||
|
async fn stats_capture_live(State(st): State<Arc<MgmtState>>) -> Response {
|
||||||
|
match st.stats.live_snapshot() {
|
||||||
|
Some(capture) => Json(capture).into_response(),
|
||||||
|
None => api_error(StatusCode::NOT_FOUND, "no capture is currently recording"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// List saved recordings
|
||||||
|
///
|
||||||
|
/// Every saved capture's summary (the `meta` head only — not the sample body), newest first.
|
||||||
|
#[utoipa::path(
|
||||||
|
get,
|
||||||
|
path = "/stats/recordings",
|
||||||
|
tag = "stats",
|
||||||
|
operation_id = "statsRecordingsList",
|
||||||
|
responses(
|
||||||
|
(status = OK, description = "Saved capture summaries, newest first", body = [CaptureMeta]),
|
||||||
|
(status = UNAUTHORIZED, description = "Missing or invalid bearer token", body = ApiError),
|
||||||
|
)
|
||||||
|
)]
|
||||||
|
async fn stats_recordings_list(State(st): State<Arc<MgmtState>>) -> Json<Vec<CaptureMeta>> {
|
||||||
|
Json(st.stats.list())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get a saved recording
|
||||||
|
///
|
||||||
|
/// The full capture (meta + samples) for `id`, for graphing or download.
|
||||||
|
#[utoipa::path(
|
||||||
|
get,
|
||||||
|
path = "/stats/recordings/{id}",
|
||||||
|
tag = "stats",
|
||||||
|
operation_id = "statsRecordingGet",
|
||||||
|
params(("id" = String, Path, description = "The recording id (its filename stem)")),
|
||||||
|
responses(
|
||||||
|
(status = OK, description = "The full capture", body = Capture),
|
||||||
|
(status = NOT_FOUND, description = "No recording with that id", body = ApiError),
|
||||||
|
(status = UNAUTHORIZED, description = "Missing or invalid bearer token", body = ApiError),
|
||||||
|
(status = INTERNAL_SERVER_ERROR, description = "The recording file is unreadable", body = ApiError),
|
||||||
|
)
|
||||||
|
)]
|
||||||
|
async fn stats_recording_get(State(st): State<Arc<MgmtState>>, Path(id): Path<String>) -> Response {
|
||||||
|
match st.stats.load(&id) {
|
||||||
|
Ok(capture) => Json(capture).into_response(),
|
||||||
|
Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
|
||||||
|
api_error(StatusCode::NOT_FOUND, "no recording with that id")
|
||||||
|
}
|
||||||
|
Err(e) => api_error(
|
||||||
|
StatusCode::INTERNAL_SERVER_ERROR,
|
||||||
|
&format!("could not read recording: {e}"),
|
||||||
|
),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Delete a saved recording
|
||||||
|
///
|
||||||
|
/// Removes the recording `id` from disk. `404` if there is no such recording.
|
||||||
|
#[utoipa::path(
|
||||||
|
delete,
|
||||||
|
path = "/stats/recordings/{id}",
|
||||||
|
tag = "stats",
|
||||||
|
operation_id = "statsRecordingDelete",
|
||||||
|
params(("id" = String, Path, description = "The recording id (its filename stem)")),
|
||||||
|
responses(
|
||||||
|
(status = NO_CONTENT, description = "Recording deleted"),
|
||||||
|
(status = NOT_FOUND, description = "No recording with that id", body = ApiError),
|
||||||
|
(status = UNAUTHORIZED, description = "Missing or invalid bearer token", body = ApiError),
|
||||||
|
(status = INTERNAL_SERVER_ERROR, description = "Could not delete the recording", body = ApiError),
|
||||||
|
)
|
||||||
|
)]
|
||||||
|
async fn stats_recording_delete(
|
||||||
|
State(st): State<Arc<MgmtState>>,
|
||||||
|
Path(id): Path<String>,
|
||||||
|
) -> Response {
|
||||||
|
match st.stats.delete(&id) {
|
||||||
|
Ok(()) => {
|
||||||
|
tracing::info!(id, "management API: recording deleted");
|
||||||
|
StatusCode::NO_CONTENT.into_response()
|
||||||
|
}
|
||||||
|
Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
|
||||||
|
api_error(StatusCode::NOT_FOUND, "no recording with that id")
|
||||||
|
}
|
||||||
|
Err(e) => api_error(
|
||||||
|
StatusCode::INTERNAL_SERVER_ERROR,
|
||||||
|
&format!("could not delete recording: {e}"),
|
||||||
|
),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------------------
|
||||||
// Tests
|
// Tests
|
||||||
// ---------------------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------------------
|
||||||
@@ -1231,6 +1424,15 @@ mod tests {
|
|||||||
use std::net::{IpAddr, Ipv4Addr};
|
use std::net::{IpAddr, Ipv4Addr};
|
||||||
use tower::ServiceExt;
|
use tower::ServiceExt;
|
||||||
|
|
||||||
|
/// A throwaway stats recorder rooted in a unique temp dir (never touches the real config dir).
|
||||||
|
fn test_stats() -> Arc<crate::stats_recorder::StatsRecorder> {
|
||||||
|
crate::stats_recorder::StatsRecorder::new(std::env::temp_dir().join(format!(
|
||||||
|
"pf-mgmt-stats-{}-{:p}",
|
||||||
|
std::process::id(),
|
||||||
|
&0u8 as *const u8
|
||||||
|
)))
|
||||||
|
}
|
||||||
|
|
||||||
fn test_state() -> Arc<AppState> {
|
fn test_state() -> Arc<AppState> {
|
||||||
let host = Host {
|
let host = Host {
|
||||||
hostname: "test-host".into(),
|
hostname: "test-host".into(),
|
||||||
@@ -1240,18 +1442,20 @@ mod tests {
|
|||||||
https_port: HTTPS_PORT,
|
https_port: HTTPS_PORT,
|
||||||
};
|
};
|
||||||
let identity = ServerIdentity::ephemeral().expect("ephemeral identity");
|
let identity = ServerIdentity::ephemeral().expect("ephemeral identity");
|
||||||
Arc::new(AppState::new(host, identity))
|
Arc::new(AppState::new(host, identity, test_stats()))
|
||||||
}
|
}
|
||||||
|
|
||||||
// The mgmt API now always requires auth, so the router always has a token. A test that passes
|
// The mgmt API now always requires auth, so the router always has a token. A test that passes
|
||||||
// `None` gets the default "test-secret" (and `send` auto-attaches the matching bearer); a test
|
// `None` gets the default "test-secret" (and `send` auto-attaches the matching bearer); a test
|
||||||
// that passes an explicit token exercises a mismatch (e.g. `bearer_token_is_enforced`).
|
// that passes an explicit token exercises a mismatch (e.g. `bearer_token_is_enforced`).
|
||||||
fn test_app(state: Arc<AppState>, token: Option<&str>) -> Router {
|
fn test_app(state: Arc<AppState>, token: Option<&str>) -> Router {
|
||||||
|
let stats = state.stats.clone();
|
||||||
app(
|
app(
|
||||||
state,
|
state,
|
||||||
Some(token.unwrap_or("test-secret").to_string()),
|
Some(token.unwrap_or("test-secret").to_string()),
|
||||||
DEFAULT_PORT,
|
DEFAULT_PORT,
|
||||||
None,
|
None,
|
||||||
|
stats,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1261,11 +1465,13 @@ mod tests {
|
|||||||
) -> Router {
|
) -> Router {
|
||||||
// Auth required always; the paired-cert tests inject a fingerprint (cert branch wins), the
|
// Auth required always; the paired-cert tests inject a fingerprint (cert branch wins), the
|
||||||
// rest authenticate via the `send`-attached default bearer.
|
// rest authenticate via the `send`-attached default bearer.
|
||||||
|
let stats = state.stats.clone();
|
||||||
app(
|
app(
|
||||||
state,
|
state,
|
||||||
Some("test-secret".to_string()),
|
Some("test-secret".to_string()),
|
||||||
DEFAULT_PORT,
|
DEFAULT_PORT,
|
||||||
Some(np),
|
Some(np),
|
||||||
|
stats,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1580,7 +1786,9 @@ mod tests {
|
|||||||
bind: "127.0.0.1:0".parse().unwrap(),
|
bind: "127.0.0.1:0".parse().unwrap(),
|
||||||
token: Some(" ".into()),
|
token: Some(" ".into()),
|
||||||
};
|
};
|
||||||
let err = run(test_state(), opts, None).await.unwrap_err();
|
let err = run(test_state(), opts, None, test_stats())
|
||||||
|
.await
|
||||||
|
.unwrap_err();
|
||||||
assert!(err.to_string().contains("no token"), "{err}");
|
assert!(err.to_string().contains("no token"), "{err}");
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1663,14 +1871,14 @@ mod tests {
|
|||||||
serde_json::json!([{}])
|
serde_json::json!([{}])
|
||||||
);
|
);
|
||||||
|
|
||||||
let checked_in = include_str!("../../../docs/api/openapi.json");
|
let checked_in = include_str!("../../../api/openapi.json");
|
||||||
// Compare content, not line-ending style: the generated `json` is LF (serde_json), but git
|
// Compare content, not line-ending style: the generated `json` is LF (serde_json), but git
|
||||||
// may check the file out CRLF on Windows.
|
// may check the file out CRLF on Windows.
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
json.trim().replace('\r', ""),
|
json.trim().replace('\r', ""),
|
||||||
checked_in.trim().replace('\r', ""),
|
checked_in.trim().replace('\r', ""),
|
||||||
"docs/api/openapi.json is stale — regenerate with: \
|
"api/openapi.json is stale — regenerate with: \
|
||||||
cargo run -p punktfunk-host -- openapi > docs/api/openapi.json"
|
cargo run -p punktfunk-host -- openapi > api/openapi.json"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -22,6 +22,9 @@
|
|||||||
//! Trust: the host serves with its persistent identity (`~/.config/punktfunk/cert.pem`, shared
|
//! Trust: the host serves with its persistent identity (`~/.config/punktfunk/cert.pem`, shared
|
||||||
//! with GameStream pairing) and logs the SHA-256 fingerprint clients pin.
|
//! with GameStream pairing) and logs the SHA-256 fingerprint clients pin.
|
||||||
|
|
||||||
|
// Every `unsafe` block in this file carries a `// SAFETY:` proof; enforce it (unsafe-proof program).
|
||||||
|
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||||
|
|
||||||
use anyhow::{anyhow, Context, Result};
|
use anyhow::{anyhow, Context, Result};
|
||||||
use punktfunk_core::config::{CompositorPref, FecConfig, FecScheme, GamepadPref, Role};
|
use punktfunk_core::config::{CompositorPref, FecConfig, FecScheme, GamepadPref, Role};
|
||||||
use punktfunk_core::input::{InputEvent, InputKind};
|
use punktfunk_core::input::{InputEvent, InputKind};
|
||||||
@@ -76,6 +79,9 @@ pub struct Punktfunk1Options {
|
|||||||
|
|
||||||
/// The native (punktfunk/1) trust store + on-demand arming PIN, shared with the management API.
|
/// The native (punktfunk/1) trust store + on-demand arming PIN, shared with the management API.
|
||||||
use crate::native_pairing::NativePairing;
|
use crate::native_pairing::NativePairing;
|
||||||
|
/// The shared streaming-stats recorder (web-console capture/graph), shared with the management API
|
||||||
|
/// and the GameStream loop; threaded into each session's `SessionContext`.
|
||||||
|
use crate::stats_recorder::StatsRecorder;
|
||||||
|
|
||||||
/// Minimum spacing between accepted pairing ceremonies (bounds online PIN guessing — with
|
/// Minimum spacing between accepted pairing ceremonies (bounds online PIN guessing — with
|
||||||
/// SPAKE2 an attacker already gets only one guess per ceremony; this caps the rate).
|
/// SPAKE2 an attacker already gets only one guess per ceremony; this caps the rate).
|
||||||
@@ -111,7 +117,11 @@ pub fn run(opts: Punktfunk1Options) -> Result<()> {
|
|||||||
opts.pairing_pin.clone(),
|
opts.pairing_pin.clone(),
|
||||||
opts.allow_pairing || opts.require_pairing,
|
opts.allow_pairing || opts.require_pairing,
|
||||||
)?);
|
)?);
|
||||||
rt.block_on(serve(opts, np))
|
// Standalone `punktfunk1-host` has no mgmt API to arm capture, so this recorder stays disarmed
|
||||||
|
// (harmless — the loops' `is_armed()` gate is always false). The unified `serve` shares one
|
||||||
|
// recorder across mgmt + both streaming paths instead.
|
||||||
|
let stats = StatsRecorder::new(crate::stats_recorder::default_dir());
|
||||||
|
rt.block_on(serve(opts, np, stats))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn fingerprint_hex(fp: &[u8; 32]) -> String {
|
fn fingerprint_hex(fp: &[u8; 32]) -> String {
|
||||||
@@ -154,7 +164,11 @@ pub(crate) fn native_serve_opts(cfg: &NativeServe) -> Punktfunk1Options {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) async fn serve(opts: Punktfunk1Options, np: Arc<NativePairing>) -> Result<()> {
|
pub(crate) async fn serve(
|
||||||
|
opts: Punktfunk1Options,
|
||||||
|
np: Arc<NativePairing>,
|
||||||
|
stats: Arc<StatsRecorder>,
|
||||||
|
) -> Result<()> {
|
||||||
let identity = crate::gamestream::cert::ServerIdentity::load_or_create()
|
let identity = crate::gamestream::cert::ServerIdentity::load_or_create()
|
||||||
.context("load host identity (~/.config/punktfunk)")?;
|
.context("load host identity (~/.config/punktfunk)")?;
|
||||||
let fingerprint = endpoint::fingerprint_of_pem(&identity.cert_pem)
|
let fingerprint = endpoint::fingerprint_of_pem(&identity.cert_pem)
|
||||||
@@ -209,6 +223,10 @@ pub(crate) async fn serve(opts: Punktfunk1Options, np: Arc<NativePairing>) -> Re
|
|||||||
// restores the box's autologin gaming session on idle, not per-disconnect — see
|
// restores the box's autologin gaming session on idle, not per-disconnect — see
|
||||||
// `vdisplay::restore_managed_session`). Held for serve()'s lifetime; dropping it stops it.
|
// `vdisplay::restore_managed_session`). Held for serve()'s lifetime; dropping it stops it.
|
||||||
let _restore_worker = crate::vdisplay::start_restore_worker();
|
let _restore_worker = crate::vdisplay::start_restore_worker();
|
||||||
|
// Host-lifetime cover-art warmer: fetches + caches GOG/Xbox cover art (no-auth api.gog.com /
|
||||||
|
// displaycatalog) off the hot path so `all_games()` (the library list + launch resolve) never
|
||||||
|
// blocks on the network. A no-op on a host whose stores all carry their own art.
|
||||||
|
let _art_warmer = crate::library::start_art_warmer();
|
||||||
// Pairing state (arming PIN + trust store) is shared with the management API. If it was armed
|
// Pairing state (arming PIN + trust store) is shared with the management API. If it was armed
|
||||||
// at startup (the CLI flags), surface the PIN the headless operator reads from the log; the
|
// at startup (the CLI flags), surface the PIN the headless operator reads from the log; the
|
||||||
// web console arms it on demand instead (a fresh, time-limited PIN).
|
// web console arms it on demand instead (a fresh, time-limited PIN).
|
||||||
@@ -269,6 +287,7 @@ pub(crate) async fn serve(opts: Punktfunk1Options, np: Arc<NativePairing>) -> Re
|
|||||||
let audio_cap = audio_cap.clone();
|
let audio_cap = audio_cap.clone();
|
||||||
let np = np.clone();
|
let np = np.clone();
|
||||||
let last_pairing = last_pairing.clone();
|
let last_pairing = last_pairing.clone();
|
||||||
|
let stats = stats.clone();
|
||||||
let inj_tx = injector.sender();
|
let inj_tx = injector.sender();
|
||||||
let mic_tx = mic_service.sender();
|
let mic_tx = mic_service.sender();
|
||||||
sessions.spawn(async move {
|
sessions.spawn(async move {
|
||||||
@@ -282,6 +301,7 @@ pub(crate) async fn serve(opts: Punktfunk1Options, np: Arc<NativePairing>) -> Re
|
|||||||
&fingerprint,
|
&fingerprint,
|
||||||
&np,
|
&np,
|
||||||
&last_pairing,
|
&last_pairing,
|
||||||
|
stats,
|
||||||
)
|
)
|
||||||
.await
|
.await
|
||||||
{
|
{
|
||||||
@@ -472,6 +492,7 @@ async fn serve_session(
|
|||||||
host_fp: &[u8; 32],
|
host_fp: &[u8; 32],
|
||||||
np: &NativePairing,
|
np: &NativePairing,
|
||||||
last_pairing: &std::sync::Mutex<Option<std::time::Instant>>,
|
last_pairing: &std::sync::Mutex<Option<std::time::Instant>>,
|
||||||
|
stats: Arc<StatsRecorder>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let peer = conn.remote_address();
|
let peer = conn.remote_address();
|
||||||
|
|
||||||
@@ -928,6 +949,12 @@ async fn serve_session(
|
|||||||
let stop_stream = stop.clone();
|
let stop_stream = stop.clone();
|
||||||
let fec_target_dp = fec_target.clone(); // data-plane handle to the adaptive-FEC target
|
let fec_target_dp = fec_target.clone(); // data-plane handle to the adaptive-FEC target
|
||||||
let conn_stream = conn.clone(); // for sending the source's real HDR metadata (0xCE) mid-stream
|
let conn_stream = conn.clone(); // for sending the source's real HDR metadata (0xCE) mid-stream
|
||||||
|
let stats_dp = stats; // data-plane handle to the shared stats recorder
|
||||||
|
// Short label for web-console stats captures: the client's cert-fingerprint prefix, else its
|
||||||
|
// peer IP (no fingerprint = anonymous TOFU/--open client).
|
||||||
|
let client_label = endpoint::peer_fingerprint(&conn)
|
||||||
|
.map(|fp| fingerprint_hex(&fp)[..12].to_string())
|
||||||
|
.unwrap_or_else(|| conn.remote_address().ip().to_string());
|
||||||
let result: Result<()> = async {
|
let result: Result<()> = async {
|
||||||
tokio::task::spawn_blocking(move || -> Result<()> {
|
tokio::task::spawn_blocking(move || -> Result<()> {
|
||||||
// Wait briefly for the client to hole-punch our data port, then stream to its OBSERVED
|
// Wait briefly for the client to hole-punch our data port, then stream to its OBSERVED
|
||||||
@@ -982,6 +1009,8 @@ async fn serve_session(
|
|||||||
probe_result_tx,
|
probe_result_tx,
|
||||||
fec_target: fec_target_dp,
|
fec_target: fec_target_dp,
|
||||||
conn: conn_stream,
|
conn: conn_stream,
|
||||||
|
stats: stats_dp,
|
||||||
|
client_label,
|
||||||
#[cfg(target_os = "windows")]
|
#[cfg(target_os = "windows")]
|
||||||
launch: launch_for_dp,
|
launch: launch_for_dp,
|
||||||
})
|
})
|
||||||
@@ -1940,6 +1969,21 @@ struct FrameMsg {
|
|||||||
deadline: std::time::Instant,
|
deadline: std::time::Instant,
|
||||||
/// capture→encoded latency (µs), measured on the encode thread, carried for the perf histogram.
|
/// capture→encoded latency (µs), measured on the encode thread, carried for the perf histogram.
|
||||||
encode_us: u32,
|
encode_us: u32,
|
||||||
|
/// Per-stage µs splits, measured on the capture/encode thread (0 when neither `PUNKTFUNK_PERF`
|
||||||
|
/// nor a stats capture is armed). The send thread accumulates them for the web-console sample:
|
||||||
|
/// `cap_us` = `try_latest` (ring read + colour convert), `submit_us` = NVENC `encode_picture`
|
||||||
|
/// launch, `wait_us` = `lock_bitstream` (the scheduling wait + ASIC encode = the "encode" stage).
|
||||||
|
cap_us: u32,
|
||||||
|
submit_us: u32,
|
||||||
|
wait_us: u32,
|
||||||
|
/// This frame is a re-encoded hold (the source had no fresh frame): a source-starvation signal
|
||||||
|
/// the send thread folds into `repeat_fps`.
|
||||||
|
repeat: bool,
|
||||||
|
/// Whether the per-stage splits (`cap_us`/`submit_us`/`wait_us`) were actually measured at
|
||||||
|
/// capture time (`perf` was on or a stats capture was armed). The send thread trusts this
|
||||||
|
/// instead of re-reading `is_armed()`, so a capture that arms while frames are already in flight
|
||||||
|
/// doesn't fold their zeroed splits into the first window's percentiles.
|
||||||
|
was_measured: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The dedicated send thread: it owns the whole [`Session`] (so no socket clone or shared stats are
|
/// The dedicated send thread: it owns the whole [`Session`] (so no socket clone or shared stats are
|
||||||
@@ -1961,6 +2005,11 @@ pub(crate) fn boost_thread_priority(critical: bool) {
|
|||||||
// capture/encode (critical) and send (non-critical).
|
// capture/encode (critical) and send (non-critical).
|
||||||
crate::session_tuning::on_hot_thread();
|
crate::session_tuning::on_hot_thread();
|
||||||
#[cfg(target_os = "windows")]
|
#[cfg(target_os = "windows")]
|
||||||
|
// SAFETY: `GetCurrentThread()` returns the constant pseudo-handle for the calling thread — always
|
||||||
|
// valid, thread-local in meaning, and never closed (no leak/double-close). `SetThreadPriority`
|
||||||
|
// takes that handle plus a `THREAD_PRIORITY_*` value the windows crate defines (HIGHEST or
|
||||||
|
// ABOVE_NORMAL here); it only reprioritizes this OS thread, borrows no Rust memory, and its
|
||||||
|
// `Result` is matched (a failure is logged, never UB). No pointers, lifetimes, or aliasing.
|
||||||
unsafe {
|
unsafe {
|
||||||
use windows::Win32::System::Threading::{
|
use windows::Win32::System::Threading::{
|
||||||
GetCurrentThread, SetThreadPriority, THREAD_PRIORITY_ABOVE_NORMAL,
|
GetCurrentThread, SetThreadPriority, THREAD_PRIORITY_ABOVE_NORMAL,
|
||||||
@@ -1988,6 +2037,10 @@ pub(crate) fn boost_thread_priority(critical: bool) {
|
|||||||
// realtime CPU class can preempt the compositor AND the game's own render thread, adding the
|
// realtime CPU class can preempt the compositor AND the game's own render thread, adding the
|
||||||
// very frame-time we refuse to add (opt-in only — see PUNKTFUNK_SCHED_RR).
|
// very frame-time we refuse to add (opt-in only — see PUNKTFUNK_SCHED_RR).
|
||||||
let nice = if critical { -10 } else { -5 };
|
let nice = if critical { -10 } else { -5 };
|
||||||
|
// SAFETY: `setpriority` takes three by-value integers and no pointers, so there is nothing to
|
||||||
|
// alias or outlive. `PRIO_PROCESS` with `who == 0` targets the calling task on Linux and
|
||||||
|
// `nice` is in range; the call only adjusts this thread's scheduling nice value and returns an
|
||||||
|
// `int` we inspect. No memory is touched.
|
||||||
let rc = unsafe { libc::setpriority(libc::PRIO_PROCESS, 0, nice) };
|
let rc = unsafe { libc::setpriority(libc::PRIO_PROCESS, 0, nice) };
|
||||||
if rc == 0 {
|
if rc == 0 {
|
||||||
tracing::debug!(critical, nice, "thread nice raised");
|
tracing::debug!(critical, nice, "thread nice raised");
|
||||||
@@ -2004,6 +2057,19 @@ pub(crate) fn boost_thread_priority(critical: bool) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Everything the send thread needs to emit web-console stats samples at its 2 s aggregation
|
||||||
|
/// boundary: the shared recorder (whose `is_armed()` gates emission) plus the negotiated
|
||||||
|
/// mode/codec/client to seed the capture's `CaptureMeta` on the first armed registration.
|
||||||
|
struct SendStats {
|
||||||
|
rec: Arc<StatsRecorder>,
|
||||||
|
width: u32,
|
||||||
|
height: u32,
|
||||||
|
fps: u32,
|
||||||
|
codec: &'static str,
|
||||||
|
client: String,
|
||||||
|
bitrate_kbps: u32,
|
||||||
|
}
|
||||||
|
|
||||||
#[allow(clippy::too_many_arguments)]
|
#[allow(clippy::too_many_arguments)]
|
||||||
fn send_loop(
|
fn send_loop(
|
||||||
mut session: Session,
|
mut session: Session,
|
||||||
@@ -2014,6 +2080,7 @@ fn send_loop(
|
|||||||
perf: bool,
|
perf: bool,
|
||||||
burst_cap: usize,
|
burst_cap: usize,
|
||||||
fec_target: Arc<AtomicU8>,
|
fec_target: Arc<AtomicU8>,
|
||||||
|
stats: SendStats,
|
||||||
) {
|
) {
|
||||||
boost_thread_priority(false); // transmit thread: above-normal (Apollo's encoder-thread level)
|
boost_thread_priority(false); // transmit thread: above-normal (Apollo's encoder-thread level)
|
||||||
let mut last_perf = std::time::Instant::now();
|
let mut last_perf = std::time::Instant::now();
|
||||||
@@ -2022,6 +2089,16 @@ fn send_loop(
|
|||||||
let mut encode_us: Vec<u32> = Vec::new();
|
let mut encode_us: Vec<u32> = Vec::new();
|
||||||
let mut pace_us: Vec<u32> = Vec::new();
|
let mut pace_us: Vec<u32> = Vec::new();
|
||||||
let (mut paced_frames, mut immediate_frames) = (0u64, 0u64);
|
let (mut paced_frames, mut immediate_frames) = (0u64, 0u64);
|
||||||
|
// Web-console stats accumulation (active when `perf` OR the recorder is armed): the per-stage
|
||||||
|
// split carried on each FrameMsg, the new-vs-repeat frame split, the cached registration id, and
|
||||||
|
// the previous window's loss snapshot for delta computation.
|
||||||
|
let mut sid: Option<u32> = None;
|
||||||
|
let (mut cap_v, mut submit_v, mut wait_v): (Vec<u32>, Vec<u32>, Vec<u32>) =
|
||||||
|
(Vec::new(), Vec::new(), Vec::new());
|
||||||
|
let (mut new_frames, mut repeat_frames) = (0u64, 0u64);
|
||||||
|
let mut last_frames_dropped = 0u64;
|
||||||
|
let mut last_packets_dropped = 0u64;
|
||||||
|
let mut last_fec_recovered = 0u64;
|
||||||
loop {
|
loop {
|
||||||
if stop.load(Ordering::SeqCst) {
|
if stop.load(Ordering::SeqCst) {
|
||||||
break;
|
break;
|
||||||
@@ -2042,9 +2119,24 @@ fn send_loop(
|
|||||||
burst_cap,
|
burst_cap,
|
||||||
) {
|
) {
|
||||||
Ok(stat) => {
|
Ok(stat) => {
|
||||||
if perf {
|
if perf || stats.rec.is_armed() {
|
||||||
|
// `encode_us`/`pace_us`/fps are valid for every frame (always measured),
|
||||||
|
// including the Windows relay + tail-drain frames. The cap/submit/wait splits
|
||||||
|
// are only real when the frame was measured at capture time — a frame captured
|
||||||
|
// before this capture armed carries zeroed splits, so skip those (an empty
|
||||||
|
// window → `percentile()` returns 0) rather than pull the percentiles down.
|
||||||
encode_us.push(msg.encode_us);
|
encode_us.push(msg.encode_us);
|
||||||
pace_us.push(stat.spread_us);
|
pace_us.push(stat.spread_us);
|
||||||
|
if msg.was_measured {
|
||||||
|
cap_v.push(msg.cap_us);
|
||||||
|
submit_v.push(msg.submit_us);
|
||||||
|
wait_v.push(msg.wait_us);
|
||||||
|
}
|
||||||
|
if msg.repeat {
|
||||||
|
repeat_frames += 1;
|
||||||
|
} else {
|
||||||
|
new_frames += 1;
|
||||||
|
}
|
||||||
if stat.paced {
|
if stat.paced {
|
||||||
paced_frames += 1;
|
paced_frames += 1;
|
||||||
} else {
|
} else {
|
||||||
@@ -2060,31 +2152,91 @@ fn send_loop(
|
|||||||
Err(std::sync::mpsc::RecvTimeoutError::Timeout) => {}
|
Err(std::sync::mpsc::RecvTimeoutError::Timeout) => {}
|
||||||
Err(std::sync::mpsc::RecvTimeoutError::Disconnected) => break, // encode thread done
|
Err(std::sync::mpsc::RecvTimeoutError::Disconnected) => break, // encode thread done
|
||||||
}
|
}
|
||||||
if perf && last_perf.elapsed() >= std::time::Duration::from_secs(2) {
|
if last_perf.elapsed() >= std::time::Duration::from_secs(2) {
|
||||||
let s = session.stats();
|
let s = session.stats();
|
||||||
let secs = last_perf.elapsed().as_secs_f64();
|
let secs = last_perf.elapsed().as_secs_f64();
|
||||||
// Attempted (sealed) transmit rate; `send_dropped` is what didn't reach the wire.
|
// Attempted (sealed) transmit rate; `send_dropped` is what didn't reach the wire.
|
||||||
let tx_mbps = (s.bytes_sent - last_bytes) as f64 * 8.0 / secs / 1_000_000.0;
|
let tx_mbps = (s.bytes_sent - last_bytes) as f64 * 8.0 / secs / 1_000_000.0;
|
||||||
tracing::info!(
|
if perf {
|
||||||
tx_mbps = format!("{tx_mbps:.0}"),
|
tracing::info!(
|
||||||
send_dropped = s.packets_send_dropped - last_send_dropped,
|
tx_mbps = format!("{tx_mbps:.0}"),
|
||||||
send_dropped_total = s.packets_send_dropped,
|
send_dropped = s.packets_send_dropped - last_send_dropped,
|
||||||
encode_us_p50 = percentile(&mut encode_us, 0.50),
|
send_dropped_total = s.packets_send_dropped,
|
||||||
encode_us_p99 = percentile(&mut encode_us, 0.99),
|
encode_us_p50 = percentile(&mut encode_us, 0.50),
|
||||||
pace_us_p50 = percentile(&mut pace_us, 0.50),
|
encode_us_p99 = percentile(&mut encode_us, 0.99),
|
||||||
pace_us_p99 = percentile(&mut pace_us, 0.99),
|
pace_us_p50 = percentile(&mut pace_us, 0.50),
|
||||||
pace_us_max = pace_us.last().copied().unwrap_or(0),
|
pace_us_p99 = percentile(&mut pace_us, 0.99),
|
||||||
immediate_frames,
|
pace_us_max = pace_us.last().copied().unwrap_or(0),
|
||||||
paced_frames,
|
immediate_frames,
|
||||||
"perf"
|
paced_frames,
|
||||||
);
|
"perf"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
// Web-console capture: this thread owns `session.stats()`, so it emits the COMPLETE
|
||||||
|
// sample — the cap/submit/encode split carried over from the capture thread plus this
|
||||||
|
// window's pacing/goodput/loss. Loss fields are deltas vs the previous window's snapshot.
|
||||||
|
if stats.rec.is_armed() {
|
||||||
|
let session_id = *sid.get_or_insert_with(|| {
|
||||||
|
stats.rec.register_session(
|
||||||
|
"native",
|
||||||
|
stats.width,
|
||||||
|
stats.height,
|
||||||
|
stats.fps,
|
||||||
|
stats.codec,
|
||||||
|
&stats.client,
|
||||||
|
)
|
||||||
|
});
|
||||||
|
let sample = crate::stats_recorder::StatsSample {
|
||||||
|
t_ms: 0, // stamped by push_sample from the capture's monotonic start
|
||||||
|
session_id,
|
||||||
|
stages: vec![
|
||||||
|
crate::stats_recorder::StageTiming {
|
||||||
|
name: "capture".into(),
|
||||||
|
p50_us: percentile(&mut cap_v, 0.50) as f32,
|
||||||
|
p99_us: percentile(&mut cap_v, 0.99) as f32,
|
||||||
|
},
|
||||||
|
crate::stats_recorder::StageTiming {
|
||||||
|
name: "submit".into(),
|
||||||
|
p50_us: percentile(&mut submit_v, 0.50) as f32,
|
||||||
|
p99_us: percentile(&mut submit_v, 0.99) as f32,
|
||||||
|
},
|
||||||
|
crate::stats_recorder::StageTiming {
|
||||||
|
name: "encode".into(),
|
||||||
|
p50_us: percentile(&mut wait_v, 0.50) as f32,
|
||||||
|
p99_us: percentile(&mut wait_v, 0.99) as f32,
|
||||||
|
},
|
||||||
|
crate::stats_recorder::StageTiming {
|
||||||
|
name: "send".into(),
|
||||||
|
p50_us: percentile(&mut pace_us, 0.50) as f32,
|
||||||
|
p99_us: percentile(&mut pace_us, 0.99) as f32,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
fps: (new_frames as f64 / secs) as f32,
|
||||||
|
repeat_fps: (repeat_frames as f64 / secs) as f32,
|
||||||
|
mbps: tx_mbps as f32,
|
||||||
|
bitrate_kbps: stats.bitrate_kbps,
|
||||||
|
frames_dropped: s.frames_dropped.saturating_sub(last_frames_dropped) as u32,
|
||||||
|
packets_dropped: s.packets_dropped.saturating_sub(last_packets_dropped) as u32,
|
||||||
|
send_dropped: s.packets_send_dropped.saturating_sub(last_send_dropped) as u32,
|
||||||
|
fec_recovered: s.fec_recovered_shards.saturating_sub(last_fec_recovered) as u32,
|
||||||
|
};
|
||||||
|
stats.rec.push_sample(session_id, sample);
|
||||||
|
}
|
||||||
last_perf = std::time::Instant::now();
|
last_perf = std::time::Instant::now();
|
||||||
last_bytes = s.bytes_sent;
|
last_bytes = s.bytes_sent;
|
||||||
last_send_dropped = s.packets_send_dropped;
|
last_send_dropped = s.packets_send_dropped;
|
||||||
|
last_frames_dropped = s.frames_dropped;
|
||||||
|
last_packets_dropped = s.packets_dropped;
|
||||||
|
last_fec_recovered = s.fec_recovered_shards;
|
||||||
encode_us.clear();
|
encode_us.clear();
|
||||||
pace_us.clear();
|
pace_us.clear();
|
||||||
|
cap_v.clear();
|
||||||
|
submit_v.clear();
|
||||||
|
wait_v.clear();
|
||||||
paced_frames = 0;
|
paced_frames = 0;
|
||||||
immediate_frames = 0;
|
immediate_frames = 0;
|
||||||
|
new_frames = 0;
|
||||||
|
repeat_frames = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -2104,6 +2256,45 @@ struct SessionSwitch {
|
|||||||
/// read (so no handshake plumbing). Opt-in via `PUNKTFUNK_SESSION_WATCH`; readiness of the new
|
/// read (so no handshake plumbing). Opt-in via `PUNKTFUNK_SESSION_WATCH`; readiness of the new
|
||||||
/// backend is left to the encode thread's `build_pipeline_with_retry` (the watcher never writes
|
/// backend is left to the encode thread's `build_pipeline_with_retry` (the watcher never writes
|
||||||
/// env). Exits when `stop` is set or the channel closes.
|
/// env). Exits when `stop` is set or the channel closes.
|
||||||
|
/// Whether to run the mid-stream session-switch watcher. An explicit `PUNKTFUNK_SESSION_WATCH` wins
|
||||||
|
/// (truthy → on; `0`/`false`/`no`/`off`/empty → off). When unset it defaults **on** for Steam HTPC
|
||||||
|
/// platforms (Bazzite / SteamOS) — which flip Gaming↔Desktop and need the host to follow the switch
|
||||||
|
/// mid-stream — and **off** elsewhere, preserving the opt-in default for plain desktop hosts.
|
||||||
|
fn session_watch_enabled() -> bool {
|
||||||
|
match std::env::var("PUNKTFUNK_SESSION_WATCH") {
|
||||||
|
Ok(v) => {
|
||||||
|
let v = v.trim();
|
||||||
|
!(v.is_empty()
|
||||||
|
|| v == "0"
|
||||||
|
|| v.eq_ignore_ascii_case("false")
|
||||||
|
|| v.eq_ignore_ascii_case("no")
|
||||||
|
|| v.eq_ignore_ascii_case("off"))
|
||||||
|
}
|
||||||
|
Err(_) => is_steam_htpc_platform(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// True on Bazzite or SteamOS (matched against os-release `ID`/`ID_LIKE`) — the platforms that flip
|
||||||
|
/// between Steam Gaming Mode and a Desktop session, where following a mid-stream switch is the
|
||||||
|
/// sensible default. Anything else (incl. non-Linux, where the file is absent) → false.
|
||||||
|
fn is_steam_htpc_platform() -> bool {
|
||||||
|
let Ok(os) = std::fs::read_to_string("/etc/os-release") else {
|
||||||
|
return false;
|
||||||
|
};
|
||||||
|
os.lines().any(|line| {
|
||||||
|
let line = line.trim();
|
||||||
|
let Some(val) = line
|
||||||
|
.strip_prefix("ID=")
|
||||||
|
.or_else(|| line.strip_prefix("ID_LIKE="))
|
||||||
|
else {
|
||||||
|
return false;
|
||||||
|
};
|
||||||
|
val.trim_matches('"')
|
||||||
|
.split_whitespace()
|
||||||
|
.any(|tok| tok.eq_ignore_ascii_case("bazzite") || tok.eq_ignore_ascii_case("steamos"))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
fn session_watcher_loop(tx: std::sync::mpsc::Sender<SessionSwitch>, stop: Arc<AtomicBool>) {
|
fn session_watcher_loop(tx: std::sync::mpsc::Sender<SessionSwitch>, stop: Arc<AtomicBool>) {
|
||||||
use crate::vdisplay;
|
use crate::vdisplay;
|
||||||
const DEBOUNCE: std::time::Duration = std::time::Duration::from_secs(3);
|
const DEBOUNCE: std::time::Duration = std::time::Duration::from_secs(3);
|
||||||
@@ -2185,6 +2376,13 @@ struct SessionContext {
|
|||||||
fec_target: Arc<AtomicU8>,
|
fec_target: Arc<AtomicU8>,
|
||||||
/// The QUIC control connection (carries host→client 0xCE source-HDR metadata mid-stream).
|
/// The QUIC control connection (carries host→client 0xCE source-HDR metadata mid-stream).
|
||||||
conn: quinn::Connection,
|
conn: quinn::Connection,
|
||||||
|
/// Shared streaming-stats recorder. The capture loop reads `is_armed()` per frame to decide
|
||||||
|
/// whether to measure the per-stage split; the send thread builds + pushes the aggregated
|
||||||
|
/// `StatsSample` at its 2 s boundary.
|
||||||
|
stats: Arc<StatsRecorder>,
|
||||||
|
/// Short client label (cert-fingerprint prefix, else peer IP) seeded into the capture meta on
|
||||||
|
/// the first armed stats registration.
|
||||||
|
client_label: String,
|
||||||
/// Windows: the store-qualified library id to launch into the interactive user session once
|
/// Windows: the store-qualified library id to launch into the interactive user session once
|
||||||
/// capture is live (no gamescope nesting on Windows). `None` = no launch requested. Linux uses the
|
/// capture is live (no gamescope nesting on Windows). `None` = no launch requested. Linux uses the
|
||||||
/// gamescope `PUNKTFUNK_GAMESCOPE_APP` path resolved at handshake, so this field is Windows-only.
|
/// gamescope `PUNKTFUNK_GAMESCOPE_APP` path resolved at handshake, so this field is Windows-only.
|
||||||
@@ -2205,7 +2403,7 @@ fn virtual_stream(ctx: SessionContext) -> Result<()> {
|
|||||||
// Windows two-process secure-desktop path: when the host runs as SYSTEM (required for the secure
|
// Windows two-process secure-desktop path: when the host runs as SYSTEM (required for the secure
|
||||||
// desktop + SendInput), WGC can't activate in-process, so we capture the normal desktop via a
|
// desktop + SendInput), WGC can't activate in-process, so we capture the normal desktop via a
|
||||||
// helper spawned in the user session and relay its AUs. (Single-process WGC/DDA is used as the
|
// helper spawned in the user session and relay its AUs. (Single-process WGC/DDA is used as the
|
||||||
// user, and stays the path on Linux.) See docs/windows-secure-desktop.md.
|
// user, and stays the path on Linux.) See design/archive/windows-secure-desktop.md.
|
||||||
#[cfg(target_os = "windows")]
|
#[cfg(target_os = "windows")]
|
||||||
if plan.topology == crate::session_plan::SessionTopology::TwoProcessRelay {
|
if plan.topology == crate::session_plan::SessionTopology::TwoProcessRelay {
|
||||||
return virtual_stream_relay(ctx);
|
return virtual_stream_relay(ctx);
|
||||||
@@ -2226,6 +2424,8 @@ fn virtual_stream(ctx: SessionContext) -> Result<()> {
|
|||||||
probe_result_tx,
|
probe_result_tx,
|
||||||
fec_target,
|
fec_target,
|
||||||
conn,
|
conn,
|
||||||
|
stats,
|
||||||
|
client_label,
|
||||||
#[cfg(target_os = "windows")]
|
#[cfg(target_os = "windows")]
|
||||||
launch,
|
launch,
|
||||||
} = ctx;
|
} = ctx;
|
||||||
@@ -2294,6 +2494,17 @@ fn virtual_stream(ctx: SessionContext) -> Result<()> {
|
|||||||
// The bounded channel applies backpressure (the encode thread blocks if the send falls behind,
|
// The bounded channel applies backpressure (the encode thread blocks if the send falls behind,
|
||||||
// so frames slow down rather than a dropped frame freezing the infinite-GOP stream).
|
// so frames slow down rather than a dropped frame freezing the infinite-GOP stream).
|
||||||
let (frame_tx, frame_rx) = std::sync::mpsc::sync_channel::<FrameMsg>(3);
|
let (frame_tx, frame_rx) = std::sync::mpsc::sync_channel::<FrameMsg>(3);
|
||||||
|
// The send thread emits the web-console stats sample (it owns `session.stats()`); clone the
|
||||||
|
// recorder so the capture loop keeps its own handle for the per-frame `is_armed()` gate.
|
||||||
|
let send_stats = SendStats {
|
||||||
|
rec: stats.clone(),
|
||||||
|
width: mode.width,
|
||||||
|
height: mode.height,
|
||||||
|
fps: mode.refresh_hz,
|
||||||
|
codec: "hevc",
|
||||||
|
client: client_label,
|
||||||
|
bitrate_kbps,
|
||||||
|
};
|
||||||
let send_thread = std::thread::Builder::new()
|
let send_thread = std::thread::Builder::new()
|
||||||
.name("punktfunk-send".into())
|
.name("punktfunk-send".into())
|
||||||
.spawn({
|
.spawn({
|
||||||
@@ -2308,6 +2519,7 @@ fn virtual_stream(ctx: SessionContext) -> Result<()> {
|
|||||||
perf,
|
perf,
|
||||||
burst_cap,
|
burst_cap,
|
||||||
fec_target,
|
fec_target,
|
||||||
|
send_stats,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
@@ -2318,9 +2530,9 @@ fn virtual_stream(ctx: SessionContext) -> Result<()> {
|
|||||||
// place when the box flips Gaming↔Desktop. When not spawned, session_rx just stays empty.
|
// place when the box flips Gaming↔Desktop. When not spawned, session_rx just stays empty.
|
||||||
let mut compositor = compositor;
|
let mut compositor = compositor;
|
||||||
let (session_tx, session_rx) = std::sync::mpsc::channel::<SessionSwitch>();
|
let (session_tx, session_rx) = std::sync::mpsc::channel::<SessionSwitch>();
|
||||||
let watch = std::env::var_os("PUNKTFUNK_SESSION_WATCH").is_some()
|
let watch = session_watch_enabled() && crate::config::config().compositor.is_none();
|
||||||
&& crate::config::config().compositor.is_none();
|
|
||||||
let _watcher = if watch {
|
let _watcher = if watch {
|
||||||
|
tracing::info!("session watcher on — following a mid-stream Gaming↔Desktop switch");
|
||||||
let stop = stop.clone();
|
let stop = stop.clone();
|
||||||
std::thread::Builder::new()
|
std::thread::Builder::new()
|
||||||
.name("punktfunk1-watcher".into())
|
.name("punktfunk1-watcher".into())
|
||||||
@@ -2352,6 +2564,12 @@ fn virtual_stream(ctx: SessionContext) -> Result<()> {
|
|||||||
// compositing), NOT an encoder problem. Logged every 2 s when `PUNKTFUNK_PERF`.
|
// compositing), NOT an encoder problem. Logged every 2 s when `PUNKTFUNK_PERF`.
|
||||||
let (mut diag_new, mut diag_repeat) = (0u64, 0u64);
|
let (mut diag_new, mut diag_repeat) = (0u64, 0u64);
|
||||||
let mut diag_at = std::time::Instant::now();
|
let mut diag_at = std::time::Instant::now();
|
||||||
|
// Per-stage latency breakdown (PUNKTFUNK_PERF): per-call µs for the GPU-bound stages so we see
|
||||||
|
// exactly where the capture→encoded latency goes — cap=try_latest (ring read + colour convert),
|
||||||
|
// submit=encode_picture launch, wait=lock_bitstream (the scheduling wait + ASIC encode, the one
|
||||||
|
// that dominates under a GPU-saturating game).
|
||||||
|
let (mut st_cap, mut st_submit, mut st_wait): (Vec<u32>, Vec<u32>, Vec<u32>) =
|
||||||
|
(Vec::new(), Vec::new(), Vec::new());
|
||||||
while !stop.load(Ordering::SeqCst) && std::time::Instant::now() < deadline {
|
while !stop.load(Ordering::SeqCst) && std::time::Instant::now() < deadline {
|
||||||
// Mid-stream session switch (the box flipped Gaming↔Desktop): rebuild the WHOLE backend in
|
// Mid-stream session switch (the box flipped Gaming↔Desktop): rebuild the WHOLE backend in
|
||||||
// place — a different compositor at the SAME client mode — keeping the Session + send thread
|
// place — a different compositor at the SAME client mode — keeping the Session + send thread
|
||||||
@@ -2458,13 +2676,31 @@ fn virtual_stream(ctx: SessionContext) -> Result<()> {
|
|||||||
tracing::debug!("forcing keyframe (client decode recovery)");
|
tracing::debug!("forcing keyframe (client decode recovery)");
|
||||||
enc.request_keyframe();
|
enc.request_keyframe();
|
||||||
}
|
}
|
||||||
match capturer.try_latest() {
|
// Measure the per-stage split when `PUNKTFUNK_PERF` is set OR a web-console stats capture is
|
||||||
|
// armed (a cheap Relaxed atomic, re-read each frame). The values feed the existing perf log
|
||||||
|
// unchanged and ride each FrameMsg to the send thread, which builds the aggregated sample.
|
||||||
|
let measure = perf || stats.is_armed();
|
||||||
|
let t_cap = std::time::Instant::now();
|
||||||
|
let cap_result = capturer.try_latest();
|
||||||
|
let cap_us = if measure {
|
||||||
|
t_cap.elapsed().as_micros() as u32
|
||||||
|
} else {
|
||||||
|
0
|
||||||
|
};
|
||||||
|
if perf {
|
||||||
|
st_cap.push(cap_us);
|
||||||
|
}
|
||||||
|
let mut repeat = false;
|
||||||
|
match cap_result {
|
||||||
Ok(Some(f)) => {
|
Ok(Some(f)) => {
|
||||||
frame = f;
|
frame = f;
|
||||||
diag_new += 1;
|
diag_new += 1;
|
||||||
capture_rebuilds = 0; // a delivered frame clears the consecutive-loss counter
|
capture_rebuilds = 0; // a delivered frame clears the consecutive-loss counter
|
||||||
}
|
}
|
||||||
Ok(None) => diag_repeat += 1, // no new frame (static desktop / mid-rebuild) — repeat the last
|
Ok(None) => {
|
||||||
|
diag_repeat += 1; // no new frame (static desktop / mid-rebuild) — repeat the last
|
||||||
|
repeat = true;
|
||||||
|
}
|
||||||
// The capture source died (PipeWire/compositor thread ended, virtual output gone). Rather
|
// The capture source died (PipeWire/compositor thread ended, virtual output gone). Rather
|
||||||
// than tear the whole session down — the client has no reconnect path and would have to
|
// than tear the whole session down — the client has no reconnect path and would have to
|
||||||
// cold-restart the handshake — rebuild the pipeline IN PLACE at the current mode, exactly
|
// cold-restart the handshake — rebuild the pipeline IN PLACE at the current mode, exactly
|
||||||
@@ -2478,15 +2714,76 @@ fn virtual_stream(ctx: SessionContext) -> Result<()> {
|
|||||||
}
|
}
|
||||||
tracing::warn!(error = %format!("{e:#}"), rebuild = capture_rebuilds,
|
tracing::warn!(error = %format!("{e:#}"), rebuild = capture_rebuilds,
|
||||||
"capture lost — rebuilding pipeline in place");
|
"capture lost — rebuilding pipeline in place");
|
||||||
let (new_cap, new_enc, new_frame, new_interval) =
|
// A Bazzite/SteamOS Gaming↔Desktop switch tears the old compositor down and can take
|
||||||
build_pipeline_with_retry(&mut vd, cur_mode, bitrate_kbps, bit_depth, plan)
|
// 15s+ to bring the new one up. Don't fail the session over that (the client would
|
||||||
.context("rebuild after capture loss")?;
|
// have to cold-reconnect, surfacing a "session failed") — keep retrying within a
|
||||||
|
// generous budget while the QUIC keepalive (its own thread) holds the connection,
|
||||||
|
// RE-DETECTING the live compositor each attempt so we follow the box to whatever
|
||||||
|
// session comes up: a fresh instance of the same compositor, OR a different one
|
||||||
|
// (the kind-change case the session watcher also handles). The client stays
|
||||||
|
// connected, frozen on the last frame, and the stream resumes when the new output
|
||||||
|
// appears — no reconnect.
|
||||||
|
const REBUILD_BUDGET: std::time::Duration = std::time::Duration::from_secs(40);
|
||||||
|
let rebuild_deadline = std::time::Instant::now() + REBUILD_BUDGET;
|
||||||
|
let (new_cap, new_enc, new_frame, new_interval) = loop {
|
||||||
|
// Follow the active session unless an explicit PUNKTFUNK_COMPOSITOR pin forbids
|
||||||
|
// retargeting (then we stick to the pinned backend and just rebuild it).
|
||||||
|
if crate::config::config().compositor.is_none() {
|
||||||
|
let active = crate::vdisplay::detect_active_session();
|
||||||
|
if let Some(c) = crate::vdisplay::compositor_for_kind(active.kind) {
|
||||||
|
crate::vdisplay::apply_session_env(&active);
|
||||||
|
crate::vdisplay::apply_input_env(c);
|
||||||
|
if c != compositor {
|
||||||
|
if matches!(
|
||||||
|
c,
|
||||||
|
crate::vdisplay::Compositor::Kwin
|
||||||
|
| crate::vdisplay::Compositor::Mutter
|
||||||
|
) {
|
||||||
|
crate::vdisplay::settle_desktop_portal(c);
|
||||||
|
}
|
||||||
|
match crate::vdisplay::open(c) {
|
||||||
|
Ok(v) => {
|
||||||
|
tracing::info!(from = compositor.id(), to = c.id(),
|
||||||
|
"capture loss: active session switched compositor — retargeting");
|
||||||
|
vd = v;
|
||||||
|
compositor = c;
|
||||||
|
}
|
||||||
|
Err(e2) => tracing::warn!(error = %format!("{e2:#}"),
|
||||||
|
"capture loss: opening the newly-detected compositor failed — retrying"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
match build_pipeline_with_retry(
|
||||||
|
&mut vd,
|
||||||
|
cur_mode,
|
||||||
|
bitrate_kbps,
|
||||||
|
bit_depth,
|
||||||
|
plan,
|
||||||
|
) {
|
||||||
|
Ok(p) => break p,
|
||||||
|
Err(e2) => {
|
||||||
|
if stop.load(Ordering::SeqCst)
|
||||||
|
|| std::time::Instant::now() >= rebuild_deadline
|
||||||
|
{
|
||||||
|
return Err(e2)
|
||||||
|
.context("capture lost — no compositor came up within the rebuild budget");
|
||||||
|
}
|
||||||
|
tracing::warn!(error = %format!("{e2:#}"),
|
||||||
|
"capture lost — new session not up yet, retrying");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
capturer = new_cap;
|
capturer = new_cap;
|
||||||
enc = new_enc;
|
enc = new_enc;
|
||||||
frame = new_frame;
|
frame = new_frame;
|
||||||
interval = new_interval;
|
interval = new_interval;
|
||||||
enc.request_keyframe(); // belt-and-suspenders; a fresh encoder opens on an IDR anyway
|
enc.request_keyframe(); // belt-and-suspenders; a fresh encoder opens on an IDR anyway
|
||||||
next = std::time::Instant::now();
|
next = std::time::Instant::now();
|
||||||
|
tracing::info!(
|
||||||
|
compositor = compositor.id(),
|
||||||
|
"capture loss: pipeline rebuilt — stream resumes"
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if perf && diag_at.elapsed() >= std::time::Duration::from_secs(2) {
|
if perf && diag_at.elapsed() >= std::time::Duration::from_secs(2) {
|
||||||
@@ -2497,6 +2794,20 @@ fn virtual_stream(ctx: SessionContext) -> Result<()> {
|
|||||||
"capture diag: NEW frames from the source vs REPEATS (low new_fps at high send rate ⇒ \
|
"capture diag: NEW frames from the source vs REPEATS (low new_fps at high send rate ⇒ \
|
||||||
the source isn't producing frames, not an encode stall)"
|
the source isn't producing frames, not an encode stall)"
|
||||||
);
|
);
|
||||||
|
let wait_max = st_wait.iter().copied().max().unwrap_or(0);
|
||||||
|
tracing::info!(
|
||||||
|
cap_us_p50 = percentile(&mut st_cap, 0.50),
|
||||||
|
cap_us_p99 = percentile(&mut st_cap, 0.99),
|
||||||
|
submit_us_p50 = percentile(&mut st_submit, 0.50),
|
||||||
|
submit_us_p99 = percentile(&mut st_submit, 0.99),
|
||||||
|
wait_us_p50 = percentile(&mut st_wait, 0.50),
|
||||||
|
wait_us_p99 = percentile(&mut st_wait, 0.99),
|
||||||
|
wait_us_max = wait_max,
|
||||||
|
"stage perf (µs/call): cap=try_latest(ring+convert) submit=encode_picture wait=lock_bitstream(sched+ASIC)"
|
||||||
|
);
|
||||||
|
st_cap.clear();
|
||||||
|
st_submit.clear();
|
||||||
|
st_wait.clear();
|
||||||
diag_new = 0;
|
diag_new = 0;
|
||||||
diag_repeat = 0;
|
diag_repeat = 0;
|
||||||
diag_at = std::time::Instant::now();
|
diag_at = std::time::Instant::now();
|
||||||
@@ -2515,7 +2826,16 @@ fn virtual_stream(ctx: SessionContext) -> Result<()> {
|
|||||||
// capturer hands a rotating ring of output textures, so it returns >1; other capturers default 1.
|
// capturer hands a rotating ring of output textures, so it returns >1; other capturers default 1.
|
||||||
let depth = capturer.pipeline_depth().max(1);
|
let depth = capturer.pipeline_depth().max(1);
|
||||||
let capture_ns = now_ns();
|
let capture_ns = now_ns();
|
||||||
|
let t_submit = std::time::Instant::now();
|
||||||
enc.submit(&frame).context("encoder submit")?;
|
enc.submit(&frame).context("encoder submit")?;
|
||||||
|
let submit_us = if measure {
|
||||||
|
t_submit.elapsed().as_micros() as u32
|
||||||
|
} else {
|
||||||
|
0
|
||||||
|
};
|
||||||
|
if perf {
|
||||||
|
st_submit.push(submit_us);
|
||||||
|
}
|
||||||
// This frame's pacing deadline (the next frame's due time); the send thread spreads a big frame
|
// This frame's pacing deadline (the next frame's due time); the send thread spreads a big frame
|
||||||
// up to here. Each in-flight frame carries its own (capture_ns, deadline) for when it's polled.
|
// up to here. Each in-flight frame carries its own (capture_ns, deadline) for when it's polled.
|
||||||
next += interval;
|
next += interval;
|
||||||
@@ -2526,7 +2846,17 @@ fn virtual_stream(ctx: SessionContext) -> Result<()> {
|
|||||||
// the oldest submitted frame's AU — matching `inflight.pop_front()`.
|
// the oldest submitted frame's AU — matching `inflight.pop_front()`.
|
||||||
let mut send_gone = false;
|
let mut send_gone = false;
|
||||||
while inflight.len() >= depth {
|
while inflight.len() >= depth {
|
||||||
let au = match enc.poll().context("encoder poll")? {
|
let t_wait = std::time::Instant::now();
|
||||||
|
let polled = enc.poll().context("encoder poll")?;
|
||||||
|
let wait_us = if measure {
|
||||||
|
t_wait.elapsed().as_micros() as u32
|
||||||
|
} else {
|
||||||
|
0
|
||||||
|
};
|
||||||
|
if perf {
|
||||||
|
st_wait.push(wait_us);
|
||||||
|
}
|
||||||
|
let au = match polled {
|
||||||
Some(au) => au,
|
Some(au) => au,
|
||||||
None => break, // no AU ready for a submitted frame (shouldn't happen — poll blocks)
|
None => break, // no AU ready for a submitted frame (shouldn't happen — poll blocks)
|
||||||
};
|
};
|
||||||
@@ -2552,6 +2882,11 @@ fn virtual_stream(ctx: SessionContext) -> Result<()> {
|
|||||||
flags,
|
flags,
|
||||||
deadline,
|
deadline,
|
||||||
encode_us,
|
encode_us,
|
||||||
|
cap_us,
|
||||||
|
submit_us,
|
||||||
|
wait_us,
|
||||||
|
repeat,
|
||||||
|
was_measured: measure,
|
||||||
};
|
};
|
||||||
// Hand to the send thread; this blocks (backpressure) if it's behind. An Err means it
|
// Hand to the send thread; this blocks (backpressure) if it's behind. An Err means it
|
||||||
// exited (send failure / stop) — end the encode loop too.
|
// exited (send failure / stop) — end the encode loop too.
|
||||||
@@ -2579,12 +2914,19 @@ fn virtual_stream(ctx: SessionContext) -> Result<()> {
|
|||||||
FLAG_PIC as u32
|
FLAG_PIC as u32
|
||||||
};
|
};
|
||||||
let encode_us = (now_ns().saturating_sub(cap_ns) / 1000) as u32;
|
let encode_us = (now_ns().saturating_sub(cap_ns) / 1000) as u32;
|
||||||
|
// End-of-stream tail drain: the per-stage split isn't measured here (the capture loop has
|
||||||
|
// exited), so leave it zero — these last few frames are negligible for the aggregates.
|
||||||
let msg = FrameMsg {
|
let msg = FrameMsg {
|
||||||
data: au.data,
|
data: au.data,
|
||||||
capture_ns: cap_ns,
|
capture_ns: cap_ns,
|
||||||
flags,
|
flags,
|
||||||
deadline,
|
deadline,
|
||||||
encode_us,
|
encode_us,
|
||||||
|
cap_us: 0,
|
||||||
|
submit_us: 0,
|
||||||
|
wait_us: 0,
|
||||||
|
repeat: false,
|
||||||
|
was_measured: false,
|
||||||
};
|
};
|
||||||
if frame_tx.send(msg).is_err() {
|
if frame_tx.send(msg).is_err() {
|
||||||
break;
|
break;
|
||||||
@@ -2631,6 +2973,8 @@ fn virtual_stream_relay(ctx: SessionContext) -> Result<()> {
|
|||||||
probe_result_tx,
|
probe_result_tx,
|
||||||
fec_target,
|
fec_target,
|
||||||
conn: _conn,
|
conn: _conn,
|
||||||
|
stats,
|
||||||
|
client_label,
|
||||||
launch,
|
launch,
|
||||||
} = ctx;
|
} = ctx;
|
||||||
tracing::info!(
|
tracing::info!(
|
||||||
@@ -2669,6 +3013,11 @@ fn virtual_stream_relay(ctx: SessionContext) -> Result<()> {
|
|||||||
// The secure-desktop HDR drop (for the DDA leg) keys off the monitor's real state in the mux loop.
|
// The secure-desktop HDR drop (for the DDA leg) keys off the monitor's real state in the mux loop.
|
||||||
#[cfg(target_os = "windows")]
|
#[cfg(target_os = "windows")]
|
||||||
if bit_depth >= 10 {
|
if bit_depth >= 10 {
|
||||||
|
// SAFETY: `set_advanced_color` is marked `unsafe` only because it drives the Win32 CCD API
|
||||||
|
// internally; it takes `target_id` by value (Copy `u32` — this session's live SudoVDA
|
||||||
|
// monitor's CCD target id) and sizes + owns every buffer it hands the OS on its own stack.
|
||||||
|
// We pass no pointers, so nothing must outlive the call and there is no aliasing; an
|
||||||
|
// unknown/absent target id simply returns false.
|
||||||
unsafe {
|
unsafe {
|
||||||
if crate::win_display::set_advanced_color(target.target_id, true) {
|
if crate::win_display::set_advanced_color(target.target_id, true) {
|
||||||
// Let the colorspace change settle before WGC creates its capture item / detects HDR.
|
// Let the colorspace change settle before WGC creates its capture item / detects HDR.
|
||||||
@@ -2760,7 +3109,18 @@ fn virtual_stream_relay(ctx: SessionContext) -> Result<()> {
|
|||||||
* 1024;
|
* 1024;
|
||||||
|
|
||||||
// Same encode|send split as the single-process path: this thread relays AUs, a dedicated send
|
// Same encode|send split as the single-process path: this thread relays AUs, a dedicated send
|
||||||
// thread owns the Session and does FEC+seal+paced-send.
|
// thread owns the Session and does FEC+seal+paced-send. The relay encodes in the helper process,
|
||||||
|
// so this path's FrameMsgs carry no cap/submit/encode split (those stages stay 0 in the sample);
|
||||||
|
// the send thread still emits fps/goodput/pacing/loss from `session.stats()`.
|
||||||
|
let send_stats = SendStats {
|
||||||
|
rec: stats,
|
||||||
|
width: mode.width,
|
||||||
|
height: mode.height,
|
||||||
|
fps: effective_hz,
|
||||||
|
codec: "hevc",
|
||||||
|
client: client_label,
|
||||||
|
bitrate_kbps,
|
||||||
|
};
|
||||||
let (frame_tx, frame_rx) = std::sync::mpsc::sync_channel::<FrameMsg>(3);
|
let (frame_tx, frame_rx) = std::sync::mpsc::sync_channel::<FrameMsg>(3);
|
||||||
let send_thread = std::thread::Builder::new()
|
let send_thread = std::thread::Builder::new()
|
||||||
.name("punktfunk-send".into())
|
.name("punktfunk-send".into())
|
||||||
@@ -2776,6 +3136,7 @@ fn virtual_stream_relay(ctx: SessionContext) -> Result<()> {
|
|||||||
perf,
|
perf,
|
||||||
burst_cap,
|
burst_cap,
|
||||||
fec_target,
|
fec_target,
|
||||||
|
send_stats,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
@@ -2838,6 +3199,11 @@ fn virtual_stream_relay(ctx: SessionContext) -> Result<()> {
|
|||||||
flags,
|
flags,
|
||||||
deadline: std::time::Instant::now() + interval,
|
deadline: std::time::Instant::now() + interval,
|
||||||
encode_us,
|
encode_us,
|
||||||
|
cap_us: 0,
|
||||||
|
submit_us: 0,
|
||||||
|
wait_us: 0,
|
||||||
|
repeat: false,
|
||||||
|
was_measured: false,
|
||||||
};
|
};
|
||||||
let ok = frame_tx.send(msg).is_ok();
|
let ok = frame_tx.send(msg).is_ok();
|
||||||
if ok {
|
if ok {
|
||||||
@@ -2904,8 +3270,12 @@ fn virtual_stream_relay(ctx: SessionContext) -> Result<()> {
|
|||||||
// desktop (the drop just churned + still went black). Instead, if the monitor is in HDR,
|
// desktop (the drop just churned + still went black). Instead, if the monitor is in HDR,
|
||||||
// open DDA in HDR (FP16 DuplicateOutput1 → BT.2020 PQ Main10); the normal-desktop DDA
|
// open DDA in HDR (FP16 DuplicateOutput1 → BT.2020 PQ Main10); the normal-desktop DDA
|
||||||
// overlay/flip issues that drove us to WGC don't apply to the composed Winlogon UI.
|
// overlay/flip issues that drove us to WGC don't apply to the composed Winlogon UI.
|
||||||
let hdr =
|
// SAFETY: `advanced_color_enabled` is `unsafe` only because it queries the Win32 CCD
|
||||||
unsafe { crate::win_display::advanced_color_enabled(target.target_id) };
|
// API; it takes `target_id` by value (the live SudoVDA monitor's CCD target id) and
|
||||||
|
// allocates + owns every buffer it passes the OS internally. No caller pointer is
|
||||||
|
// involved, so nothing must outlive the call and there is no aliasing; a missing
|
||||||
|
// target id just yields false.
|
||||||
|
let hdr = unsafe { crate::win_display::advanced_color_enabled(target.target_id) };
|
||||||
dda = None; // reopen to capture the secure desktop
|
dda = None; // reopen to capture the secure desktop
|
||||||
match open_dda(&target, cur_mode.width, cur_mode.height, effective_hz, hdr) {
|
match open_dda(&target, cur_mode.width, cur_mode.height, effective_hz, hdr) {
|
||||||
Ok(mut p) => {
|
Ok(mut p) => {
|
||||||
@@ -3330,12 +3700,27 @@ mod tests {
|
|||||||
unsafe fn pull_verified(conn: *mut punktfunk_core::abi::PunktfunkConnection, count: u32) {
|
unsafe fn pull_verified(conn: *mut punktfunk_core::abi::PunktfunkConnection, count: u32) {
|
||||||
use punktfunk_core::error::PunktfunkStatus;
|
use punktfunk_core::error::PunktfunkStatus;
|
||||||
let mut got = 0u32;
|
let mut got = 0u32;
|
||||||
|
// SAFETY: the inferred type is the `#[repr(C)]` POD `PunktfunkFrame` (a raw `*const u8`, a
|
||||||
|
// `usize`, and integer fields); all-zero is a valid bit pattern for every field (a null
|
||||||
|
// `data`, `len == 0`). It is only ever read after `next_au` below fully overwrites it on `Ok`,
|
||||||
|
// so the zeroed value is never observed.
|
||||||
let mut frame = unsafe { std::mem::zeroed() };
|
let mut frame = unsafe { std::mem::zeroed() };
|
||||||
while got < count {
|
while got < count {
|
||||||
|
// SAFETY: `conn` is the live, non-null `*mut PunktfunkConnection` from `punktfunk_connect`
|
||||||
|
// (the caller asserts non-null and does not close it until after this returns), meeting the
|
||||||
|
// ABI's "valid handle". `&mut frame` is an exclusive, writable borrow of the local
|
||||||
|
// `PunktfunkFrame` that outlives this synchronous call. This single test thread is the only
|
||||||
|
// video puller, satisfying the one-video-thread rule.
|
||||||
match unsafe {
|
match unsafe {
|
||||||
punktfunk_core::abi::punktfunk_connection_next_au(conn, &mut frame, 2000)
|
punktfunk_core::abi::punktfunk_connection_next_au(conn, &mut frame, 2000)
|
||||||
} {
|
} {
|
||||||
PunktfunkStatus::Ok => {
|
PunktfunkStatus::Ok => {
|
||||||
|
// SAFETY: on `Ok`, `next_au` set `frame.data`/`frame.len` to the reassembled AU
|
||||||
|
// buffer the connection owns; per the ABI contract that borrow stays valid until
|
||||||
|
// the NEXT `next_au` call on this handle. We read the whole slice here (the assert
|
||||||
|
// + length-checked indexing) before the loop's next `next_au`, and `conn` outlives
|
||||||
|
// it — so the pointer is live, exactly `len` bytes, read-only, single-threaded (no
|
||||||
|
// aliasing/use-after-free).
|
||||||
let data = unsafe { std::slice::from_raw_parts(frame.data, frame.len) };
|
let data = unsafe { std::slice::from_raw_parts(frame.data, frame.len) };
|
||||||
let idx = u32::from_le_bytes(data[0..4].try_into().unwrap());
|
let idx = u32::from_le_bytes(data[0..4].try_into().unwrap());
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
@@ -3383,6 +3768,11 @@ mod tests {
|
|||||||
// Session 1: TOFU (no pin) — observe the host fingerprint.
|
// Session 1: TOFU (no pin) — observe the host fingerprint.
|
||||||
let addr = std::ffi::CString::new("127.0.0.1").unwrap();
|
let addr = std::ffi::CString::new("127.0.0.1").unwrap();
|
||||||
let mut observed = [0u8; 32];
|
let mut observed = [0u8; 32];
|
||||||
|
// SAFETY: `addr` is a live `CString` ("127.0.0.1") whose `as_ptr()` is the NUL-terminated
|
||||||
|
// UTF-8 host string the contract requires; `pin_sha256`/cert/key are NULL (all permitted), and
|
||||||
|
// `observed.as_mut_ptr()` is the local `[u8; 32]` — exactly the 32 writable bytes the contract
|
||||||
|
// demands, not aliased during the call. Every pointer references a live local that outlives the
|
||||||
|
// blocking connect.
|
||||||
let conn = unsafe {
|
let conn = unsafe {
|
||||||
punktfunk_connect(
|
punktfunk_connect(
|
||||||
addr.as_ptr(),
|
addr.as_ptr(),
|
||||||
@@ -3401,26 +3791,28 @@ mod tests {
|
|||||||
assert_ne!(observed, [0u8; 32], "fingerprint not reported");
|
assert_ne!(observed, [0u8; 32], "fingerprint not reported");
|
||||||
|
|
||||||
let (mut w, mut h, mut hz) = (0u32, 0u32, 0u32);
|
let (mut w, mut h, mut hz) = (0u32, 0u32, 0u32);
|
||||||
assert_eq!(
|
// SAFETY: `conn` is the live, non-null connection handle just asserted above; `&mut w/h/hz` are
|
||||||
unsafe { punktfunk_connection_mode(conn, &mut w, &mut h, &mut hz) },
|
// exclusive, writable borrows of local `u32`s that outlive this synchronous call — the three
|
||||||
PunktfunkStatus::Ok
|
// writable out-params the contract names.
|
||||||
);
|
let st = unsafe { punktfunk_connection_mode(conn, &mut w, &mut h, &mut hz) };
|
||||||
|
assert_eq!(st, PunktfunkStatus::Ok);
|
||||||
assert_eq!((w, h, hz), (1280, 720, 60));
|
assert_eq!((w, h, hz), (1280, 720, 60));
|
||||||
|
|
||||||
// Mid-stream renegotiation: request a new mode, the host acks on the control
|
// Mid-stream renegotiation: request a new mode, the host acks on the control
|
||||||
// stream, and punktfunk_connection_mode reflects the switch.
|
// stream, and punktfunk_connection_mode reflects the switch.
|
||||||
assert_eq!(
|
// SAFETY: `conn` is the live, non-null connection handle (the only pointer arg); the remaining
|
||||||
unsafe {
|
// arguments are by-value integers. The handle outlives this non-blocking enqueue.
|
||||||
punktfunk_core::abi::punktfunk_connection_request_mode(conn, 1920, 1080, 144)
|
let st = unsafe {
|
||||||
},
|
punktfunk_core::abi::punktfunk_connection_request_mode(conn, 1920, 1080, 144)
|
||||||
PunktfunkStatus::Ok
|
};
|
||||||
);
|
assert_eq!(st, PunktfunkStatus::Ok);
|
||||||
let deadline = std::time::Instant::now() + std::time::Duration::from_secs(5);
|
let deadline = std::time::Instant::now() + std::time::Duration::from_secs(5);
|
||||||
loop {
|
loop {
|
||||||
assert_eq!(
|
// SAFETY: same as the earlier `punktfunk_connection_mode` call — `conn` is the live handle
|
||||||
unsafe { punktfunk_connection_mode(conn, &mut w, &mut h, &mut hz) },
|
// and `&mut w/h/hz` are exclusive writable borrows of locals that outlive this synchronous
|
||||||
PunktfunkStatus::Ok
|
// call.
|
||||||
);
|
let st = unsafe { punktfunk_connection_mode(conn, &mut w, &mut h, &mut hz) };
|
||||||
|
assert_eq!(st, PunktfunkStatus::Ok);
|
||||||
if (w, h, hz) == (1920, 1080, 144) {
|
if (w, h, hz) == (1920, 1080, 144) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -3431,6 +3823,8 @@ mod tests {
|
|||||||
std::thread::sleep(std::time::Duration::from_millis(20));
|
std::thread::sleep(std::time::Duration::from_millis(20));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SAFETY: `pull_verified` requires a live connection handle it alone pulls video from; `conn` is
|
||||||
|
// the open, non-null handle from `punktfunk_connect` and this is the only thread touching it.
|
||||||
unsafe { pull_verified(conn, 25) };
|
unsafe { pull_verified(conn, 25) };
|
||||||
|
|
||||||
let ev = punktfunk_core::input::InputEvent {
|
let ev = punktfunk_core::input::InputEvent {
|
||||||
@@ -3441,13 +3835,19 @@ mod tests {
|
|||||||
y: 2,
|
y: 2,
|
||||||
flags: 0,
|
flags: 0,
|
||||||
};
|
};
|
||||||
assert_eq!(
|
// SAFETY: `conn` is the live handle; `&ev` borrows the local `InputEvent`, valid and immutable
|
||||||
unsafe { punktfunk_connection_send_input(conn, &ev) },
|
// for this synchronous enqueue — the contract's "valid InputEvent" pointer.
|
||||||
PunktfunkStatus::Ok
|
let st = unsafe { punktfunk_connection_send_input(conn, &ev) };
|
||||||
);
|
assert_eq!(st, PunktfunkStatus::Ok);
|
||||||
|
// SAFETY: `conn` was returned by `punktfunk_connect` and is never used after this call (session
|
||||||
|
// 2 below uses a fresh `conn2`); `close` takes ownership and frees the handle exactly once.
|
||||||
unsafe { punktfunk_connection_close(conn) };
|
unsafe { punktfunk_connection_close(conn) };
|
||||||
|
|
||||||
// Session 2 (same host process — the listener survived): pin the fingerprint.
|
// Session 2 (same host process — the listener survived): pin the fingerprint.
|
||||||
|
// SAFETY: as for session 1 — `addr` is the live NUL-terminated host string; here
|
||||||
|
// `observed.as_ptr()` is the 32-byte pin (the fingerprint captured above, a valid `[u8; 32]`),
|
||||||
|
// `observed_sha256_out` is NULL and cert/key are NULL. All pointers reference live locals for
|
||||||
|
// the duration of the blocking connect.
|
||||||
let conn2 = unsafe {
|
let conn2 = unsafe {
|
||||||
punktfunk_connect(
|
punktfunk_connect(
|
||||||
addr.as_ptr(),
|
addr.as_ptr(),
|
||||||
@@ -3463,11 +3863,17 @@ mod tests {
|
|||||||
)
|
)
|
||||||
};
|
};
|
||||||
assert!(!conn2.is_null(), "pinned reconnect failed");
|
assert!(!conn2.is_null(), "pinned reconnect failed");
|
||||||
|
// SAFETY: `conn2` is the live, non-null pinned handle, pulled only from this thread —
|
||||||
|
// `pull_verified`'s requirement.
|
||||||
unsafe { pull_verified(conn2, 25) };
|
unsafe { pull_verified(conn2, 25) };
|
||||||
|
// SAFETY: `conn2` came from `punktfunk_connect` and is not used after this; `close` frees it once.
|
||||||
unsafe { punktfunk_connection_close(conn2) };
|
unsafe { punktfunk_connection_close(conn2) };
|
||||||
|
|
||||||
// Session 3: a wrong pin must be rejected by the handshake.
|
// Session 3: a wrong pin must be rejected by the handshake.
|
||||||
let bad = [0xAAu8; 32];
|
let bad = [0xAAu8; 32];
|
||||||
|
// SAFETY: same shape as the prior connects — `addr` is the live host string, `bad.as_ptr()` is
|
||||||
|
// the 32-byte `[0xAA; 32]` pin, and out/cert/key are NULL; all reference live locals across the
|
||||||
|
// blocking call. (The handshake is expected to fail and return NULL here, which is sound.)
|
||||||
let conn3 = unsafe {
|
let conn3 = unsafe {
|
||||||
punktfunk_connect(
|
punktfunk_connect(
|
||||||
addr.as_ptr(),
|
addr.as_ptr(),
|
||||||
@@ -3487,6 +3893,8 @@ mod tests {
|
|||||||
// The host saw the rejected handshake attempt as session 3? No — a TLS-failed
|
// The host saw the rejected handshake attempt as session 3? No — a TLS-failed
|
||||||
// handshake never yields a connection, so accept() is still waiting. Connect once
|
// handshake never yields a connection, so accept() is still waiting. Connect once
|
||||||
// more (TOFU) to complete the host's third session and let it exit.
|
// more (TOFU) to complete the host's third session and let it exit.
|
||||||
|
// SAFETY: same as session 1's connect — `addr` is the live host string, pin/out/cert/key all
|
||||||
|
// NULL; the pointers reference live locals for the duration of the blocking connect.
|
||||||
let conn4 = unsafe {
|
let conn4 = unsafe {
|
||||||
punktfunk_connect(
|
punktfunk_connect(
|
||||||
addr.as_ptr(),
|
addr.as_ptr(),
|
||||||
@@ -3502,7 +3910,9 @@ mod tests {
|
|||||||
)
|
)
|
||||||
};
|
};
|
||||||
assert!(!conn4.is_null());
|
assert!(!conn4.is_null());
|
||||||
|
// SAFETY: `conn4` is the live, non-null handle, pulled only from this thread.
|
||||||
unsafe { pull_verified(conn4, 25) };
|
unsafe { pull_verified(conn4, 25) };
|
||||||
|
// SAFETY: `conn4` came from `punktfunk_connect` and is unused after this; `close` frees it once.
|
||||||
unsafe { punktfunk_connection_close(conn4) };
|
unsafe { punktfunk_connection_close(conn4) };
|
||||||
|
|
||||||
host.join().unwrap().unwrap();
|
host.join().unwrap().unwrap();
|
||||||
@@ -3546,6 +3956,9 @@ mod tests {
|
|||||||
paired_store: None, // unused: the shared `np` IS the store handle
|
paired_store: None, // unused: the shared `np` IS the store handle
|
||||||
},
|
},
|
||||||
np_host,
|
np_host,
|
||||||
|
StatsRecorder::new(
|
||||||
|
std::env::temp_dir().join(format!("pf-approval-stats-{}", std::process::id())),
|
||||||
|
),
|
||||||
))
|
))
|
||||||
});
|
});
|
||||||
std::thread::sleep(std::time::Duration::from_millis(500));
|
std::thread::sleep(std::time::Duration::from_millis(500));
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
//! `SessionPlan` — the per-session capture / topology / encoder decision, resolved **once** from
|
//! `SessionPlan` — the per-session capture / topology / encoder decision, resolved **once** from
|
||||||
//! [`HostConfig`](crate::config) (+ the handshake-negotiated bit depth) into a typed, logged value.
|
//! [`HostConfig`](crate::config) (+ the handshake-negotiated bit depth) into a typed, logged value.
|
||||||
//!
|
//!
|
||||||
//! **Goal-1 stage 3** (`docs/windows-host-rewrite.md` §2.2): before this, the Windows session decision was
|
//! **Goal-1 stage 3** (`design/windows-host-rewrite.md` §2.2): before this, the Windows session decision was
|
||||||
//! re-derived at three call sites — the capture backend inside `capture::capture_virtual_output`, the
|
//! re-derived at three call sites — the capture backend inside `capture::capture_virtual_output`, the
|
||||||
//! process topology in `punktfunk1::should_use_helper`, and the encode backend in
|
//! process topology in `punktfunk1::should_use_helper`, and the encode backend in
|
||||||
//! `encode::windows_resolved_backend` — each reading [`config`](crate::config) independently, with no
|
//! `encode::windows_resolved_backend` — each reading [`config`](crate::config) independently, with no
|
||||||
@@ -138,9 +138,7 @@ fn resolve_topology() -> SessionTopology {
|
|||||||
let cfg = crate::config::config();
|
let cfg = crate::config::config();
|
||||||
// `NO_HELPER`/`NO_WGC` force single-process; IDD-push captures in-process in Session 0 (no helper);
|
// `NO_HELPER`/`NO_WGC` force single-process; IDD-push captures in-process in Session 0 (no helper);
|
||||||
// otherwise the helper runs when forced or when we're SYSTEM (in-process WGC can't activate there).
|
// otherwise the helper runs when forced or when we're SYSTEM (in-process WGC can't activate there).
|
||||||
let helper = if cfg.no_helper || crate::capture::wgc_disabled() {
|
let helper = if cfg.no_helper || crate::capture::wgc_disabled() || cfg.idd_push {
|
||||||
false
|
|
||||||
} else if cfg.idd_push {
|
|
||||||
false
|
false
|
||||||
} else {
|
} else {
|
||||||
cfg.force_helper || crate::capture::wgc_relay::running_as_system()
|
cfg.force_helper || crate::capture::wgc_relay::running_as_system()
|
||||||
|
|||||||
@@ -9,7 +9,10 @@
|
|||||||
//! Raw C-ABI FFI (winmm/kernel32/dwmapi/avrt) rather than the `windows` crate so it builds without
|
//! Raw C-ABI FFI (winmm/kernel32/dwmapi/avrt) rather than the `windows` crate so it builds without
|
||||||
//! pulling new windows-rs features. No-op on non-Windows. Per-thread effects (MMCSS, execution
|
//! pulling new windows-rs features. No-op on non-Windows. Per-thread effects (MMCSS, execution
|
||||||
//! state) auto-revert at thread exit (= session end); the process-wide bits revert at process exit.
|
//! state) auto-revert at thread exit (= session end); the process-wide bits revert at process exit.
|
||||||
//! See `docs/host-latency-plan.md` Tier 3A.
|
//! See `design/host-latency-plan.md` Tier 3A.
|
||||||
|
|
||||||
|
// Every `unsafe` block in this file carries a `// SAFETY:` proof; enforce it (unsafe-proof program).
|
||||||
|
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||||
|
|
||||||
#[cfg(target_os = "windows")]
|
#[cfg(target_os = "windows")]
|
||||||
mod imp {
|
mod imp {
|
||||||
@@ -49,6 +52,10 @@ mod imp {
|
|||||||
/// Process-wide tuning, applied exactly once. Reverts at process exit. Best-effort: each call is
|
/// Process-wide tuning, applied exactly once. Reverts at process exit. Best-effort: each call is
|
||||||
/// independent and a failure is ignored (e.g. a non-elevated host may not get HIGH class).
|
/// independent and a failure is ignored (e.g. a non-elevated host may not get HIGH class).
|
||||||
fn tune_process_once() {
|
fn tune_process_once() {
|
||||||
|
// SAFETY: each call is a C-ABI FFI into winmm/kernel32/dwmapi declared with a matching
|
||||||
|
// `extern "system"` signature; every argument is a plain integer (no pointers/buffers escape),
|
||||||
|
// and `GetCurrentProcess()` returns the current-process pseudo-handle (a constant, always valid,
|
||||||
|
// never closed). The body runs inside `get_or_init`, so it executes exactly once per process.
|
||||||
PROCESS_TUNED.get_or_init(|| unsafe {
|
PROCESS_TUNED.get_or_init(|| unsafe {
|
||||||
// 1 ms timer granularity (default ~15.6 ms) — the floor for precise frame pacing and the
|
// 1 ms timer granularity (default ~15.6 ms) — the floor for precise frame pacing and the
|
||||||
// encode|send split's sub-ms sleeps.
|
// encode|send split's sub-ms sleeps.
|
||||||
@@ -70,6 +77,11 @@ mod imp {
|
|||||||
/// thread exits, so a session that ends tears them down without explicit bookkeeping.
|
/// thread exits, so a session that ends tears them down without explicit bookkeeping.
|
||||||
pub fn on_hot_thread() {
|
pub fn on_hot_thread() {
|
||||||
tune_process_once();
|
tune_process_once();
|
||||||
|
// SAFETY: C-ABI FFI declared with matching `extern "system"` signatures. SetThreadExecutionState
|
||||||
|
// takes only flag bits. `task` is a local NUL-terminated UTF-16 buffer ("Games\0") alive for the
|
||||||
|
// whole block, so `task.as_ptr()` is a valid LPCWSTR for the call, and `&mut idx` is a live local
|
||||||
|
// u32 the call writes the task index into. The returned MMCSS handle is intentionally leaked (the
|
||||||
|
// OS reverts the characteristics at thread exit), so there is nothing to free or double-free.
|
||||||
unsafe {
|
unsafe {
|
||||||
SetThreadExecutionState(ES_CONTINUOUS | ES_DISPLAY_REQUIRED | ES_SYSTEM_REQUIRED);
|
SetThreadExecutionState(ES_CONTINUOUS | ES_DISPLAY_REQUIRED | ES_SYSTEM_REQUIRED);
|
||||||
let task: Vec<u16> = "Games\0".encode_utf16().collect();
|
let task: Vec<u16> = "Games\0".encode_utf16().collect();
|
||||||
|
|||||||
@@ -0,0 +1,553 @@
|
|||||||
|
//! Shared streaming-stats recorder (`design/stats-capture-plan.md` §1). One
|
||||||
|
//! [`StatsRecorder`] handle is created once in the unified host entry
|
||||||
|
//! (`gamestream::serve`) alongside [`crate::native_pairing::NativePairing`], and shared with
|
||||||
|
//! **both** the management API ([`crate::mgmt`]) and the streaming loops (threaded through
|
||||||
|
//! [`crate::punktfunk1::serve`] → `SessionContext` and into the GameStream encode loop). The
|
||||||
|
//! operator arms a capture from the web console, plays a session, stops, and reviews the
|
||||||
|
//! captured time-series as graphs; captures are saved to disk and survive a host restart.
|
||||||
|
//!
|
||||||
|
//! Hot-path discipline: [`StatsRecorder::is_armed`] is a cheap `Relaxed` atomic load (re-read
|
||||||
|
//! per frame); sample construction happens only at the loops' existing ~2 s / ~1 s aggregation
|
||||||
|
//! boundary, never per frame. Memory is bounded ([`MAX_SAMPLES`]); the on-disk write is atomic
|
||||||
|
//! (temp + rename); and capture ids are path-traversal-safe.
|
||||||
|
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use std::path::PathBuf;
|
||||||
|
use std::sync::atomic::{AtomicBool, AtomicU32, Ordering};
|
||||||
|
use std::sync::{Arc, Mutex};
|
||||||
|
use std::time::Instant;
|
||||||
|
use utoipa::ToSchema;
|
||||||
|
|
||||||
|
/// Cap on samples kept in one capture: ≈ 3 h at one sample / 2 s. On overflow we stop appending
|
||||||
|
/// (keeping the oldest — a saved recording must keep its start), never dropping the front and never
|
||||||
|
/// growing unbounded.
|
||||||
|
const MAX_SAMPLES: usize = 5400;
|
||||||
|
|
||||||
|
/// One pipeline stage's latency in an aggregation window (microseconds).
|
||||||
|
#[derive(Serialize, Deserialize, ToSchema, Clone, Debug)]
|
||||||
|
pub struct StageTiming {
|
||||||
|
/// `"capture" | "submit" | "encode" | "packetize" | "send"` (path-dependent).
|
||||||
|
pub name: String,
|
||||||
|
pub p50_us: f32,
|
||||||
|
pub p99_us: f32,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// One aggregated sample (~ every 2 s native, ~ every 1 s GameStream).
|
||||||
|
#[derive(Serialize, Deserialize, ToSchema, Clone, Debug)]
|
||||||
|
pub struct StatsSample {
|
||||||
|
/// Milliseconds since capture start (monotonic; stamped by [`StatsRecorder::push_sample`]).
|
||||||
|
pub t_ms: u64,
|
||||||
|
/// Disambiguates concurrent sessions (usually constant).
|
||||||
|
pub session_id: u32,
|
||||||
|
/// Ordered pipeline stages for this path.
|
||||||
|
pub stages: Vec<StageTiming>,
|
||||||
|
/// Genuine NEW frames/s from the source.
|
||||||
|
pub fps: f32,
|
||||||
|
/// Re-encoded holds/s (source-starvation indicator).
|
||||||
|
pub repeat_fps: f32,
|
||||||
|
/// Transmit goodput (Mb/s).
|
||||||
|
pub mbps: f32,
|
||||||
|
/// Configured target bitrate.
|
||||||
|
pub bitrate_kbps: u32,
|
||||||
|
/// Frames dropped this window (delta).
|
||||||
|
pub frames_dropped: u32,
|
||||||
|
/// Packets dropped this window (receiver-side / reassembler, where known).
|
||||||
|
pub packets_dropped: u32,
|
||||||
|
/// Host send-buffer overflow / EAGAIN this window (delta).
|
||||||
|
pub send_dropped: u32,
|
||||||
|
/// FEC shards recovered this window (delta).
|
||||||
|
pub fec_recovered: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Capture summary — the filename stem plus the negotiated mode/codec/client. Stored at the head
|
||||||
|
/// of each on-disk recording and listed standalone (without the sample body) by
|
||||||
|
/// [`StatsRecorder::list`].
|
||||||
|
#[derive(Serialize, Deserialize, ToSchema, Clone, Debug)]
|
||||||
|
pub struct CaptureMeta {
|
||||||
|
/// e.g. `"2026-06-26T20-14-03Z_5120x1440"` — also the filename stem.
|
||||||
|
pub id: String,
|
||||||
|
pub started_unix_ms: u64,
|
||||||
|
pub duration_ms: u64,
|
||||||
|
/// `"native" | "gamestream"`.
|
||||||
|
pub kind: String,
|
||||||
|
pub width: u32,
|
||||||
|
pub height: u32,
|
||||||
|
pub fps: u32,
|
||||||
|
/// `"h264" | "hevc" | "av1"`.
|
||||||
|
pub codec: String,
|
||||||
|
/// Short label / fingerprint prefix, or `""` if unknown.
|
||||||
|
pub client: String,
|
||||||
|
pub sample_count: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A full capture: summary + the sample time-series. The wire + on-disk shape.
|
||||||
|
#[derive(Serialize, Deserialize, ToSchema, Clone, Debug)]
|
||||||
|
pub struct Capture {
|
||||||
|
pub meta: CaptureMeta,
|
||||||
|
pub samples: Vec<StatsSample>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Snapshot of the in-progress capture for the management API.
|
||||||
|
#[derive(Serialize, Deserialize, ToSchema, Clone, Debug)]
|
||||||
|
pub struct StatsStatus {
|
||||||
|
/// Capture currently running.
|
||||||
|
pub armed: bool,
|
||||||
|
/// Samples in the in-progress capture.
|
||||||
|
pub sample_count: u32,
|
||||||
|
/// Unix start time of the in-progress capture (`0` if idle).
|
||||||
|
pub started_unix_ms: u64,
|
||||||
|
/// Path of the in-progress capture (`""` if idle).
|
||||||
|
pub kind: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Mode/codec/client seeded on the first [`StatsRecorder::register_session`] of a capture.
|
||||||
|
#[derive(Clone)]
|
||||||
|
struct MetaSeed {
|
||||||
|
kind: String,
|
||||||
|
width: u32,
|
||||||
|
height: u32,
|
||||||
|
fps: u32,
|
||||||
|
codec: String,
|
||||||
|
client: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The in-progress capture (present iff armed).
|
||||||
|
struct Live {
|
||||||
|
/// Monotonic clock origin for sample `t_ms`.
|
||||||
|
started: Instant,
|
||||||
|
started_unix_ms: u64,
|
||||||
|
/// Seeded once, on the first session registration.
|
||||||
|
meta: Option<MetaSeed>,
|
||||||
|
samples: Vec<StatsSample>,
|
||||||
|
/// Set once the sample cap was hit (further samples dropped). Read so it isn't dead.
|
||||||
|
truncated: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Shared streaming-stats recorder: an arm/disarm flag (the hot-path gate), the in-progress
|
||||||
|
/// capture, and the on-disk capture directory.
|
||||||
|
pub struct StatsRecorder {
|
||||||
|
dir: PathBuf,
|
||||||
|
/// The hot-path gate — a `Relaxed` load per frame; never blocks the frame thread.
|
||||||
|
armed: AtomicBool,
|
||||||
|
/// The in-progress capture. Locks recover a poisoned guard (`unwrap_or_else(|e| e.into_inner())`,
|
||||||
|
/// as in `vdisplay::gamescope`) rather than `unwrap()`: a panic somewhere must never make stats
|
||||||
|
/// recording crash an otherwise-healthy stream. The critical sections only push/clone/format, so
|
||||||
|
/// poisoning is near-impossible anyway — this is belt-and-suspenders.
|
||||||
|
live: Mutex<Option<Live>>,
|
||||||
|
next_sid: AtomicU32,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The default captures directory: `~/.config/punktfunk/captures/` (next to `cert.pem`),
|
||||||
|
/// resolved via the same config-dir helper the rest of the host uses.
|
||||||
|
pub fn default_dir() -> PathBuf {
|
||||||
|
crate::gamestream::config_dir().join("captures")
|
||||||
|
}
|
||||||
|
|
||||||
|
/// `id` charset gate, matching `^[A-Za-z0-9._-]+$` — the exact charset `capture_id` emits (which
|
||||||
|
/// deliberately uses dashes, not colons, so the stem is a valid Windows filename). We additionally
|
||||||
|
/// reject `.`/`..` so a path-component sneaks no parent reference even though the charset would allow
|
||||||
|
/// bare dots. The charset already excludes `/` and `\`, so `dir.join("<id>.json")` is always a single
|
||||||
|
/// child of `dir`. Defense in depth — the endpoints are bearer-authed.
|
||||||
|
fn valid_id(id: &str) -> bool {
|
||||||
|
!id.is_empty()
|
||||||
|
&& id != "."
|
||||||
|
&& id != ".."
|
||||||
|
&& id
|
||||||
|
.bytes()
|
||||||
|
.all(|b| b.is_ascii_alphanumeric() || matches!(b, b'.' | b'_' | b'-'))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn unix_ms_now() -> u64 {
|
||||||
|
std::time::SystemTime::now()
|
||||||
|
.duration_since(std::time::UNIX_EPOCH)
|
||||||
|
.map(|d| d.as_millis() as u64)
|
||||||
|
.unwrap_or(0)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A human-readable, filesystem-safe capture id from the start time + mode, e.g.
|
||||||
|
/// `2026-06-26T20-14-03Z_5120x1440`. Dashes (not colons) in the time so it's a valid Windows
|
||||||
|
/// filename; matches [`valid_id`].
|
||||||
|
fn capture_id(unix_ms: u64, width: u32, height: u32) -> String {
|
||||||
|
let secs = (unix_ms / 1000) as i64;
|
||||||
|
let days = secs.div_euclid(86_400);
|
||||||
|
let tod = secs.rem_euclid(86_400);
|
||||||
|
let (y, mo, d) = civil_from_days(days);
|
||||||
|
let (h, mi, s) = (tod / 3600, (tod % 3600) / 60, tod % 60);
|
||||||
|
format!("{y:04}-{mo:02}-{d:02}T{h:02}-{mi:02}-{s:02}Z_{width}x{height}")
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Civil (Y, M, D) from a count of days since the Unix epoch (Howard Hinnant's `civil_from_days`).
|
||||||
|
fn civil_from_days(z: i64) -> (i64, u32, u32) {
|
||||||
|
let z = z + 719_468;
|
||||||
|
let era = if z >= 0 { z } else { z - 146_096 }.div_euclid(146_097);
|
||||||
|
let doe = z - era * 146_097; // [0, 146096]
|
||||||
|
let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146_096) / 365; // [0, 399]
|
||||||
|
let y = yoe + era * 400;
|
||||||
|
let doy = doe - (365 * yoe + yoe / 4 - yoe / 100); // [0, 365]
|
||||||
|
let mp = (5 * doy + 2) / 153; // [0, 11]
|
||||||
|
let d = (doy - (153 * mp + 2) / 5 + 1) as u32; // [1, 31]
|
||||||
|
let m = if mp < 10 { mp + 3 } else { mp - 9 }; // [1, 12]
|
||||||
|
(if m <= 2 { y + 1 } else { y }, m as u32, d)
|
||||||
|
}
|
||||||
|
|
||||||
|
impl StatsRecorder {
|
||||||
|
/// Create the recorder, creating `dir` (owner-private, best-effort) if missing.
|
||||||
|
pub fn new(dir: PathBuf) -> Arc<Self> {
|
||||||
|
if let Err(e) = crate::gamestream::create_private_dir(&dir) {
|
||||||
|
tracing::warn!(dir = %dir.display(), error = %e, "could not create stats captures dir");
|
||||||
|
}
|
||||||
|
Arc::new(StatsRecorder {
|
||||||
|
dir,
|
||||||
|
armed: AtomicBool::new(false),
|
||||||
|
live: Mutex::new(None),
|
||||||
|
next_sid: AtomicU32::new(0),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The hot-path gate: cheap `Relaxed` load, called per frame to decide whether to measure.
|
||||||
|
pub fn is_armed(&self) -> bool {
|
||||||
|
self.armed.load(Ordering::Relaxed)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Arm a new capture. No-op if already armed (returns the current status).
|
||||||
|
pub fn start(&self) -> StatsStatus {
|
||||||
|
let mut guard = self.live.lock().unwrap_or_else(|e| e.into_inner());
|
||||||
|
if guard.is_none() {
|
||||||
|
*guard = Some(Live {
|
||||||
|
started: Instant::now(),
|
||||||
|
started_unix_ms: unix_ms_now(),
|
||||||
|
meta: None,
|
||||||
|
samples: Vec::new(),
|
||||||
|
truncated: false,
|
||||||
|
});
|
||||||
|
// Publish AFTER the live capture exists, so a frame thread that observes `armed` always
|
||||||
|
// finds a capture to push into.
|
||||||
|
self.armed.store(true, Ordering::Relaxed);
|
||||||
|
}
|
||||||
|
status_of(guard.as_ref())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A streaming loop announces itself when it first records while armed. Seeds the capture's
|
||||||
|
/// `CaptureMeta` (kind/w/h/fps/codec/client) on the FIRST registration; returns a session id
|
||||||
|
/// to stamp on the loop's samples.
|
||||||
|
pub fn register_session(
|
||||||
|
&self,
|
||||||
|
kind: &'static str,
|
||||||
|
w: u32,
|
||||||
|
h: u32,
|
||||||
|
fps: u32,
|
||||||
|
codec: &str,
|
||||||
|
client: &str,
|
||||||
|
) -> u32 {
|
||||||
|
let sid = self.next_sid.fetch_add(1, Ordering::Relaxed);
|
||||||
|
let mut guard = self.live.lock().unwrap_or_else(|e| e.into_inner());
|
||||||
|
if let Some(live) = guard.as_mut() {
|
||||||
|
if live.meta.is_none() {
|
||||||
|
live.meta = Some(MetaSeed {
|
||||||
|
kind: kind.to_string(),
|
||||||
|
width: w,
|
||||||
|
height: h,
|
||||||
|
fps,
|
||||||
|
codec: codec.to_string(),
|
||||||
|
client: client.to_string(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sid
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Append one aggregated sample (called from the loops' existing ~2 s / ~1 s boundary). The
|
||||||
|
/// `t_ms` is (re)stamped here from the capture's monotonic start, so callers may leave it `0`.
|
||||||
|
/// Bounded at [`MAX_SAMPLES`]: on overflow we stop appending (oldest kept) and flag truncation.
|
||||||
|
/// A no-op when nothing is armed (e.g. a `stop()` raced the frame boundary).
|
||||||
|
pub fn push_sample(&self, session_id: u32, mut sample: StatsSample) {
|
||||||
|
let mut guard = self.live.lock().unwrap_or_else(|e| e.into_inner());
|
||||||
|
let Some(live) = guard.as_mut() else { return };
|
||||||
|
if live.samples.len() >= MAX_SAMPLES {
|
||||||
|
if !live.truncated {
|
||||||
|
live.truncated = true;
|
||||||
|
tracing::warn!(
|
||||||
|
max = MAX_SAMPLES,
|
||||||
|
"stats capture hit the sample cap — further samples dropped (oldest kept)"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
sample.session_id = session_id;
|
||||||
|
sample.t_ms = live.started.elapsed().as_millis() as u64;
|
||||||
|
live.samples.push(sample);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Disarm + finalize: write `<dir>/<id>.json` atomically (temp + rename) and return its meta.
|
||||||
|
/// `Ok(None)` if nothing was recording.
|
||||||
|
pub fn stop(&self) -> std::io::Result<Option<CaptureMeta>> {
|
||||||
|
// Clear the hot-path gate first so frame threads stop building samples immediately.
|
||||||
|
self.armed.store(false, Ordering::Relaxed);
|
||||||
|
let Some(live) = self.live.lock().unwrap_or_else(|e| e.into_inner()).take() else {
|
||||||
|
return Ok(None);
|
||||||
|
};
|
||||||
|
let meta = meta_of(&live);
|
||||||
|
let capture = Capture {
|
||||||
|
meta: meta.clone(),
|
||||||
|
samples: live.samples,
|
||||||
|
};
|
||||||
|
let bytes = serde_json::to_vec(&capture).map_err(std::io::Error::other)?;
|
||||||
|
// Atomic replace: write a sibling temp then rename, so a crash mid-write can't leave a half
|
||||||
|
// file. The id is generated (always `valid_id`), so this only ever names a child of `dir`.
|
||||||
|
let path = self.dir.join(format!("{}.json", meta.id));
|
||||||
|
let tmp = self.dir.join(format!("{}.json.tmp", meta.id));
|
||||||
|
std::fs::write(&tmp, &bytes)?;
|
||||||
|
std::fs::rename(&tmp, &path)?;
|
||||||
|
Ok(Some(meta))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The in-progress capture status (idle = `armed: false`, zeroed fields).
|
||||||
|
pub fn status(&self) -> StatsStatus {
|
||||||
|
status_of(self.live.lock().unwrap_or_else(|e| e.into_inner()).as_ref())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A clone of the in-progress capture for live graphing (`None` when idle).
|
||||||
|
pub fn live_snapshot(&self) -> Option<Capture> {
|
||||||
|
let guard = self.live.lock().unwrap_or_else(|e| e.into_inner());
|
||||||
|
let live = guard.as_ref()?;
|
||||||
|
Some(Capture {
|
||||||
|
meta: meta_of(live),
|
||||||
|
samples: live.samples.clone(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// All saved recordings, newest first, parsing each file's `meta` head only (not the samples).
|
||||||
|
pub fn list(&self) -> Vec<CaptureMeta> {
|
||||||
|
/// Parse only the `meta` head — serde skips the (large) `samples` array.
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
struct MetaOnly {
|
||||||
|
meta: CaptureMeta,
|
||||||
|
}
|
||||||
|
let mut out: Vec<CaptureMeta> = Vec::new();
|
||||||
|
let Ok(entries) = std::fs::read_dir(&self.dir) else {
|
||||||
|
return out;
|
||||||
|
};
|
||||||
|
for entry in entries.flatten() {
|
||||||
|
let path = entry.path();
|
||||||
|
if path.extension().and_then(|e| e.to_str()) != Some("json") {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if let Ok(bytes) = std::fs::read(&path) {
|
||||||
|
if let Ok(parsed) = serde_json::from_slice::<MetaOnly>(&bytes) {
|
||||||
|
out.push(parsed.meta);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
out.sort_by_key(|m| std::cmp::Reverse(m.started_unix_ms));
|
||||||
|
out
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Load a saved recording by id. Rejects a path-unsafe id (and a missing file) as `NotFound`.
|
||||||
|
pub fn load(&self, id: &str) -> std::io::Result<Capture> {
|
||||||
|
let path = self.recording_path(id)?;
|
||||||
|
let bytes = std::fs::read(&path)?;
|
||||||
|
serde_json::from_slice(&bytes)
|
||||||
|
.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Delete a saved recording by id. Rejects a path-unsafe id (and a missing file) as `NotFound`.
|
||||||
|
pub fn delete(&self, id: &str) -> std::io::Result<()> {
|
||||||
|
let path = self.recording_path(id)?;
|
||||||
|
std::fs::remove_file(&path)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Resolve `dir/<id>.json` after validating `id`. A rejected id is `NotFound` (defense in
|
||||||
|
/// depth: never let an attacker-shaped id escape `dir`).
|
||||||
|
fn recording_path(&self, id: &str) -> std::io::Result<PathBuf> {
|
||||||
|
if !valid_id(id) {
|
||||||
|
return Err(std::io::Error::new(
|
||||||
|
std::io::ErrorKind::NotFound,
|
||||||
|
"invalid recording id",
|
||||||
|
));
|
||||||
|
}
|
||||||
|
Ok(self.dir.join(format!("{id}.json")))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Build the live `StatsStatus` from the optional in-progress capture.
|
||||||
|
fn status_of(live: Option<&Live>) -> StatsStatus {
|
||||||
|
match live {
|
||||||
|
Some(l) => StatsStatus {
|
||||||
|
armed: true,
|
||||||
|
sample_count: l.samples.len() as u32,
|
||||||
|
started_unix_ms: l.started_unix_ms,
|
||||||
|
kind: l.meta.as_ref().map(|m| m.kind.clone()).unwrap_or_default(),
|
||||||
|
},
|
||||||
|
None => StatsStatus {
|
||||||
|
armed: false,
|
||||||
|
sample_count: 0,
|
||||||
|
started_unix_ms: 0,
|
||||||
|
kind: String::new(),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Compute the `CaptureMeta` for an in-progress or finalizing capture (id derived from the start
|
||||||
|
/// time + negotiated mode; duration from the monotonic start).
|
||||||
|
fn meta_of(live: &Live) -> CaptureMeta {
|
||||||
|
let (kind, width, height, fps, codec, client) = match &live.meta {
|
||||||
|
Some(m) => (
|
||||||
|
m.kind.clone(),
|
||||||
|
m.width,
|
||||||
|
m.height,
|
||||||
|
m.fps,
|
||||||
|
m.codec.clone(),
|
||||||
|
m.client.clone(),
|
||||||
|
),
|
||||||
|
None => (String::new(), 0, 0, 0, String::new(), String::new()),
|
||||||
|
};
|
||||||
|
CaptureMeta {
|
||||||
|
id: capture_id(live.started_unix_ms, width, height),
|
||||||
|
started_unix_ms: live.started_unix_ms,
|
||||||
|
duration_ms: live.started.elapsed().as_millis() as u64,
|
||||||
|
kind,
|
||||||
|
width,
|
||||||
|
height,
|
||||||
|
fps,
|
||||||
|
codec,
|
||||||
|
client,
|
||||||
|
sample_count: live.samples.len() as u32,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
fn temp_dir() -> PathBuf {
|
||||||
|
// A per-call unique dir: a process-wide counter (NOT a timestamp, which collides when tests
|
||||||
|
// run in parallel within the same millisecond — one test's cleanup would then wipe another's
|
||||||
|
// dir mid-run).
|
||||||
|
static COUNTER: AtomicU32 = AtomicU32::new(0);
|
||||||
|
let n = COUNTER.fetch_add(1, Ordering::Relaxed);
|
||||||
|
let p = std::env::temp_dir().join(format!("pf-stats-{}-{}", std::process::id(), n));
|
||||||
|
let _ = std::fs::remove_dir_all(&p);
|
||||||
|
p
|
||||||
|
}
|
||||||
|
|
||||||
|
fn sample() -> StatsSample {
|
||||||
|
StatsSample {
|
||||||
|
t_ms: 0,
|
||||||
|
session_id: 0,
|
||||||
|
stages: vec![StageTiming {
|
||||||
|
name: "capture".into(),
|
||||||
|
p50_us: 100.0,
|
||||||
|
p99_us: 200.0,
|
||||||
|
}],
|
||||||
|
fps: 60.0,
|
||||||
|
repeat_fps: 0.0,
|
||||||
|
mbps: 25.0,
|
||||||
|
bitrate_kbps: 20_000,
|
||||||
|
frames_dropped: 0,
|
||||||
|
packets_dropped: 0,
|
||||||
|
send_dropped: 0,
|
||||||
|
fec_recovered: 0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn arm_record_save_load_delete() {
|
||||||
|
let dir = temp_dir();
|
||||||
|
let rec = StatsRecorder::new(dir.clone());
|
||||||
|
assert!(!rec.is_armed());
|
||||||
|
assert!(!rec.status().armed);
|
||||||
|
// A push while idle is a no-op (no live capture).
|
||||||
|
rec.push_sample(0, sample());
|
||||||
|
|
||||||
|
let st = rec.start();
|
||||||
|
assert!(st.armed);
|
||||||
|
assert!(rec.is_armed());
|
||||||
|
let sid = rec.register_session("native", 5120, 1440, 240, "hevc", "abcd");
|
||||||
|
rec.push_sample(sid, sample());
|
||||||
|
rec.push_sample(sid, sample());
|
||||||
|
assert_eq!(rec.status().sample_count, 2);
|
||||||
|
assert_eq!(rec.status().kind, "native");
|
||||||
|
assert!(rec.live_snapshot().is_some());
|
||||||
|
|
||||||
|
let meta = rec.stop().unwrap().expect("a capture was recording");
|
||||||
|
assert_eq!(meta.sample_count, 2);
|
||||||
|
assert_eq!(meta.kind, "native");
|
||||||
|
assert_eq!(meta.width, 5120);
|
||||||
|
assert!(meta.id.ends_with("_5120x1440"), "id was {}", meta.id);
|
||||||
|
assert!(!rec.is_armed());
|
||||||
|
assert!(rec.live_snapshot().is_none());
|
||||||
|
// Stop with nothing recording → Ok(None).
|
||||||
|
assert!(rec.stop().unwrap().is_none());
|
||||||
|
|
||||||
|
// It is listed and loadable.
|
||||||
|
let list = rec.list();
|
||||||
|
assert_eq!(list.len(), 1);
|
||||||
|
assert_eq!(list[0].id, meta.id);
|
||||||
|
let loaded = rec.load(&meta.id).unwrap();
|
||||||
|
assert_eq!(loaded.samples.len(), 2);
|
||||||
|
assert_eq!(loaded.meta.codec, "hevc");
|
||||||
|
|
||||||
|
// Delete removes it; a second delete is NotFound.
|
||||||
|
rec.delete(&meta.id).unwrap();
|
||||||
|
assert!(rec.list().is_empty());
|
||||||
|
assert_eq!(
|
||||||
|
rec.delete(&meta.id).unwrap_err().kind(),
|
||||||
|
std::io::ErrorKind::NotFound
|
||||||
|
);
|
||||||
|
let _ = std::fs::remove_dir_all(&dir);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn rejects_path_traversal_ids() {
|
||||||
|
let dir = temp_dir();
|
||||||
|
let rec = StatsRecorder::new(dir.clone());
|
||||||
|
for bad in [
|
||||||
|
"../secret",
|
||||||
|
"..",
|
||||||
|
".",
|
||||||
|
"a/b",
|
||||||
|
"a\\b",
|
||||||
|
"",
|
||||||
|
"/etc/passwd",
|
||||||
|
"x/../../y",
|
||||||
|
] {
|
||||||
|
assert_eq!(
|
||||||
|
rec.load(bad).unwrap_err().kind(),
|
||||||
|
std::io::ErrorKind::NotFound,
|
||||||
|
"load({bad:?}) must be rejected as NotFound"
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
rec.delete(bad).unwrap_err().kind(),
|
||||||
|
std::io::ErrorKind::NotFound,
|
||||||
|
"delete({bad:?}) must be rejected as NotFound"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
let _ = std::fs::remove_dir_all(&dir);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn samples_are_bounded() {
|
||||||
|
let dir = temp_dir();
|
||||||
|
let rec = StatsRecorder::new(dir.clone());
|
||||||
|
rec.start();
|
||||||
|
for _ in 0..(MAX_SAMPLES + 50) {
|
||||||
|
rec.push_sample(0, sample());
|
||||||
|
}
|
||||||
|
assert_eq!(rec.status().sample_count as usize, MAX_SAMPLES);
|
||||||
|
let _ = std::fs::remove_dir_all(&dir);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn start_is_idempotent_while_armed() {
|
||||||
|
let dir = temp_dir();
|
||||||
|
let rec = StatsRecorder::new(dir.clone());
|
||||||
|
rec.start();
|
||||||
|
rec.register_session("native", 1920, 1080, 60, "hevc", "");
|
||||||
|
rec.push_sample(0, sample());
|
||||||
|
// A second start must NOT wipe the in-progress capture.
|
||||||
|
let st = rec.start();
|
||||||
|
assert!(st.armed);
|
||||||
|
assert_eq!(st.sample_count, 1);
|
||||||
|
let _ = std::fs::remove_dir_all(&dir);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -13,6 +13,9 @@
|
|||||||
//! owned keepalive whose `Drop` releases the output (RAII — no explicit `destroy`). Capture
|
//! owned keepalive whose `Drop` releases the output (RAII — no explicit `destroy`). Capture
|
||||||
//! consumes the node via [`crate::capture::capture_virtual_output`].
|
//! consumes the node via [`crate::capture::capture_virtual_output`].
|
||||||
|
|
||||||
|
// Every `unsafe` block in this file carries a `// SAFETY:` proof; enforce it (unsafe-proof program).
|
||||||
|
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||||
|
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
pub use punktfunk_core::Mode;
|
pub use punktfunk_core::Mode;
|
||||||
#[cfg(target_os = "linux")]
|
#[cfg(target_os = "linux")]
|
||||||
@@ -225,6 +228,8 @@ pub fn compositor_for_kind(kind: ActiveKind) -> Option<Compositor> {
|
|||||||
#[cfg(target_os = "linux")]
|
#[cfg(target_os = "linux")]
|
||||||
fn default_runtime_dir() -> String {
|
fn default_runtime_dir() -> String {
|
||||||
std::env::var("XDG_RUNTIME_DIR").unwrap_or_else(|_| {
|
std::env::var("XDG_RUNTIME_DIR").unwrap_or_else(|_| {
|
||||||
|
// SAFETY: `getuid()` is a parameterless POSIX call that always succeeds and touches no
|
||||||
|
// memory — it just returns the calling process's real uid. Nothing is aliased or freed.
|
||||||
let uid = unsafe { libc::getuid() };
|
let uid = unsafe { libc::getuid() };
|
||||||
format!("/run/user/{uid}")
|
format!("/run/user/{uid}")
|
||||||
})
|
})
|
||||||
@@ -245,6 +250,8 @@ fn default_bus(runtime: &str) -> String {
|
|||||||
#[cfg(target_os = "linux")]
|
#[cfg(target_os = "linux")]
|
||||||
pub fn detect_active_session() -> ActiveSession {
|
pub fn detect_active_session() -> ActiveSession {
|
||||||
use std::os::unix::fs::MetadataExt;
|
use std::os::unix::fs::MetadataExt;
|
||||||
|
// SAFETY: `getuid()` is a parameterless POSIX call that always succeeds and touches no memory —
|
||||||
|
// it just returns the calling process's real uid. Nothing is aliased or freed.
|
||||||
let uid = unsafe { libc::getuid() };
|
let uid = unsafe { libc::getuid() };
|
||||||
let xdg_runtime_dir = default_runtime_dir();
|
let xdg_runtime_dir = default_runtime_dir();
|
||||||
let dbus = default_bus(&xdg_runtime_dir);
|
let dbus = default_bus(&xdg_runtime_dir);
|
||||||
@@ -450,7 +457,11 @@ pub fn settle_desktop_portal(_chosen: Compositor) {}
|
|||||||
pub fn apply_input_env(chosen: Compositor) {
|
pub fn apply_input_env(chosen: Compositor) {
|
||||||
let backend = match chosen {
|
let backend = match chosen {
|
||||||
Compositor::Gamescope => "gamescope",
|
Compositor::Gamescope => "gamescope",
|
||||||
Compositor::Kwin | Compositor::Mutter => "libei",
|
// KWin: org_kde_kwin_fake_input — direct injection, no RemoteDesktop portal / approval
|
||||||
|
// dialog (headless, the krdpserver path), authorized by the host's shipped .desktop.
|
||||||
|
Compositor::Kwin => "kwin",
|
||||||
|
// GNOME has neither fake_input nor the wlr protocols → RemoteDesktop portal via libei.
|
||||||
|
Compositor::Mutter => "libei",
|
||||||
Compositor::Wlroots => "wlr",
|
Compositor::Wlroots => "wlr",
|
||||||
};
|
};
|
||||||
std::env::set_var("PUNKTFUNK_INPUT_BACKEND", backend);
|
std::env::set_var("PUNKTFUNK_INPUT_BACKEND", backend);
|
||||||
@@ -615,12 +626,12 @@ mod gamescope;
|
|||||||
#[cfg(target_os = "linux")]
|
#[cfg(target_os = "linux")]
|
||||||
#[path = "vdisplay/linux/kwin.rs"]
|
#[path = "vdisplay/linux/kwin.rs"]
|
||||||
mod kwin;
|
mod kwin;
|
||||||
#[cfg(target_os = "linux")]
|
|
||||||
#[path = "vdisplay/linux/mutter.rs"]
|
|
||||||
mod mutter;
|
|
||||||
#[cfg(target_os = "windows")]
|
#[cfg(target_os = "windows")]
|
||||||
#[path = "vdisplay/windows/manager.rs"]
|
#[path = "vdisplay/windows/manager.rs"]
|
||||||
pub(crate) mod manager;
|
pub(crate) mod manager;
|
||||||
|
#[cfg(target_os = "linux")]
|
||||||
|
#[path = "vdisplay/linux/mutter.rs"]
|
||||||
|
mod mutter;
|
||||||
#[cfg(target_os = "windows")]
|
#[cfg(target_os = "windows")]
|
||||||
#[path = "vdisplay/windows/pf_vdisplay.rs"]
|
#[path = "vdisplay/windows/pf_vdisplay.rs"]
|
||||||
pub(crate) mod pf_vdisplay;
|
pub(crate) mod pf_vdisplay;
|
||||||
|
|||||||
@@ -15,7 +15,7 @@
|
|||||||
//! `inject/libei.rs`) — wired and live-validated.
|
//! `inject/libei.rs`) — wired and live-validated.
|
||||||
|
|
||||||
use super::{Mode, VirtualDisplay, VirtualOutput};
|
use super::{Mode, VirtualDisplay, VirtualOutput};
|
||||||
use anyhow::{anyhow, Context, Result};
|
use anyhow::{anyhow, bail, Context, Result};
|
||||||
use std::process::{Child, Command, Stdio};
|
use std::process::{Child, Command, Stdio};
|
||||||
use std::time::{Duration, Instant};
|
use std::time::{Duration, Instant};
|
||||||
|
|
||||||
@@ -110,12 +110,11 @@ impl VirtualDisplay for GamescopeDisplay {
|
|||||||
// PUNKTFUNK_GAMESCOPE_NODE=<id|auto>; "auto" discovers the gamescope `Video/Source` node.
|
// PUNKTFUNK_GAMESCOPE_NODE=<id|auto>; "auto" discovers the gamescope `Video/Source` node.
|
||||||
if let Ok(id) = std::env::var("PUNKTFUNK_GAMESCOPE_NODE") {
|
if let Ok(id) = std::env::var("PUNKTFUNK_GAMESCOPE_NODE") {
|
||||||
let node_id: u32 = if id.trim().eq_ignore_ascii_case("auto") {
|
let node_id: u32 = if id.trim().eq_ignore_ascii_case("auto") {
|
||||||
find_gamescope_node().ok_or_else(|| {
|
// Attach to the box-owned game-mode session, but FIRST make it run at the connecting
|
||||||
anyhow!(
|
// client's resolution (the box is headless, so its game-mode mode is ours to set).
|
||||||
"PUNKTFUNK_GAMESCOPE_NODE=auto but no running gamescope Video/Source node \
|
// Reuse if it already matches (fast, no restart); otherwise relaunch the box's own
|
||||||
was found — is the headless gamescope/Steam session up?"
|
// session at the client mode. Without this the client gets the box's default mode.
|
||||||
)
|
ensure_box_gamescope_mode(mode)?
|
||||||
})?
|
|
||||||
} else {
|
} else {
|
||||||
id.parse()
|
id.parse()
|
||||||
.context("PUNKTFUNK_GAMESCOPE_NODE must be a node id or 'auto'")?
|
.context("PUNKTFUNK_GAMESCOPE_NODE must be a node id or 'auto'")?
|
||||||
@@ -368,6 +367,150 @@ fn create_managed_session_steamos(mode: Mode) -> Result<VirtualOutput> {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// ATTACH at the CLIENT's resolution: ensure the box's own game-mode session is running at `mode`'s
|
||||||
|
/// output size, then return its capture node. Reuses the running session if it already matches (no
|
||||||
|
/// restart — the rock-solid fast path a stable client always hits); otherwise reconfigures + restarts
|
||||||
|
/// the box's OWN autologin `gamescope-session-plus@<client>` unit at the client mode. Restarting the
|
||||||
|
/// box's own unit (rather than spawning a competing one) avoids the autologin-respawn fight the old
|
||||||
|
/// MANAGED path hit. A headless box has no physical panel, so its game-mode resolution is ours to set;
|
||||||
|
/// Steam restarts only on an actual resolution CHANGE.
|
||||||
|
fn ensure_box_gamescope_mode(mode: Mode) -> Result<u32> {
|
||||||
|
let target = (mode.width, mode.height);
|
||||||
|
// Fast path: already at the client's resolution — just attach to the live node.
|
||||||
|
if current_gamescope_output_size() == Some(target) {
|
||||||
|
if let Some(node) = find_gamescope_node() {
|
||||||
|
tracing::info!(
|
||||||
|
w = mode.width,
|
||||||
|
h = mode.height,
|
||||||
|
node,
|
||||||
|
"gamescope: box game-mode session already at the client's resolution — reusing"
|
||||||
|
);
|
||||||
|
return Ok(node);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let Some(unit) = running_autologin_gamescope_unit() else {
|
||||||
|
// No box-owned autologin session to reconfigure (a bare/foreign gamescope): attach to
|
||||||
|
// whatever node exists, accepting its resolution.
|
||||||
|
return find_gamescope_node().ok_or_else(|| {
|
||||||
|
anyhow!(
|
||||||
|
"no running gamescope Video/Source node — is the headless game mode up? \
|
||||||
|
(put the box into Steam Game Mode)"
|
||||||
|
)
|
||||||
|
});
|
||||||
|
};
|
||||||
|
tracing::info!(
|
||||||
|
from = ?current_gamescope_output_size(),
|
||||||
|
to_w = mode.width,
|
||||||
|
to_h = mode.height,
|
||||||
|
hz = mode.refresh_hz,
|
||||||
|
%unit,
|
||||||
|
"gamescope: relaunching the box game-mode session at the client's resolution"
|
||||||
|
);
|
||||||
|
// The session reads SCREEN_WIDTH/HEIGHT (+ CUSTOM_REFRESH_RATES) from the user-manager
|
||||||
|
// environment; set them and restart the box's own unit.
|
||||||
|
systemctl_user(&[
|
||||||
|
"set-environment",
|
||||||
|
&format!("SCREEN_WIDTH={}", mode.width),
|
||||||
|
&format!("SCREEN_HEIGHT={}", mode.height),
|
||||||
|
&format!("CUSTOM_REFRESH_RATES={}", mode.refresh_hz.max(1)),
|
||||||
|
]);
|
||||||
|
systemctl_user(&["restart", &unit]);
|
||||||
|
// Wait for the relaunched session to come up at the new size and publish its capture node. The
|
||||||
|
// node appears when gamescope is up (well before Steam finishes booting); the caller's
|
||||||
|
// first-frame retry absorbs Steam's cold start.
|
||||||
|
let deadline = Instant::now() + Duration::from_secs(45);
|
||||||
|
loop {
|
||||||
|
if current_gamescope_output_size() == Some(target) {
|
||||||
|
if let Some(node) = find_gamescope_node() {
|
||||||
|
tracing::info!(
|
||||||
|
node,
|
||||||
|
w = mode.width,
|
||||||
|
h = mode.height,
|
||||||
|
"gamescope: box game-mode session relaunched at the client's resolution"
|
||||||
|
);
|
||||||
|
return Ok(node);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if Instant::now() >= deadline {
|
||||||
|
bail!(
|
||||||
|
"box game-mode session did not come up at {}x{} within 45s after relaunch \
|
||||||
|
(Steam may still be booting)",
|
||||||
|
mode.width,
|
||||||
|
mode.height
|
||||||
|
);
|
||||||
|
}
|
||||||
|
std::thread::sleep(Duration::from_millis(500));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Output (capture) resolution `-W <w> -H <h>` of the running `gamescope` binary, parsed from its
|
||||||
|
/// `/proc/<pid>/cmdline`. `None` if no gamescope is running or the flags aren't present.
|
||||||
|
fn current_gamescope_output_size() -> Option<(u32, u32)> {
|
||||||
|
for entry in std::fs::read_dir("/proc").ok()?.flatten() {
|
||||||
|
let name = entry.file_name();
|
||||||
|
let Some(pid) = name.to_str() else { continue };
|
||||||
|
if !pid.bytes().all(|b| b.is_ascii_digit()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let Ok(raw) = std::fs::read(format!("/proc/{pid}/cmdline")) else {
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
let args: Vec<String> = raw
|
||||||
|
.split(|&b| b == 0)
|
||||||
|
.filter(|s| !s.is_empty())
|
||||||
|
.map(|s| String::from_utf8_lossy(s).into_owned())
|
||||||
|
.collect();
|
||||||
|
// Match the gamescope BINARY by argv[0]'s basename — NOT /proc/<pid>/exe, which is commonly
|
||||||
|
// unreadable for the gamescope process (returns empty). The session wrapper scripts run as
|
||||||
|
// bash/sh (argv[0] != gamescope), so they're excluded; the -W/-H presence check below is the
|
||||||
|
// final filter.
|
||||||
|
let is_gamescope = args
|
||||||
|
.first()
|
||||||
|
.map(|a0| a0.rsplit('/').next().unwrap_or(a0) == "gamescope")
|
||||||
|
.unwrap_or(false);
|
||||||
|
if !is_gamescope {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let flag = |names: &[&str]| -> Option<u32> {
|
||||||
|
args.iter().enumerate().find_map(|(i, a)| {
|
||||||
|
names
|
||||||
|
.contains(&a.as_str())
|
||||||
|
.then(|| args.get(i + 1).and_then(|v| v.parse().ok()))
|
||||||
|
.flatten()
|
||||||
|
})
|
||||||
|
};
|
||||||
|
if let (Some(w), Some(h)) = (
|
||||||
|
flag(&["-W", "--output-width"]),
|
||||||
|
flag(&["-H", "--output-height"]),
|
||||||
|
) {
|
||||||
|
return Some((w, h));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The running autologin gaming-mode unit (`gamescope-session-plus@<client>.service`), if any — the
|
||||||
|
/// box's own game-mode session, which [`ensure_box_gamescope_mode`] reconfigures + restarts.
|
||||||
|
fn running_autologin_gamescope_unit() -> Option<String> {
|
||||||
|
let out = Command::new("systemctl")
|
||||||
|
.args([
|
||||||
|
"--user",
|
||||||
|
"list-units",
|
||||||
|
"--type=service",
|
||||||
|
"--state=running",
|
||||||
|
"--no-legend",
|
||||||
|
"--plain",
|
||||||
|
"gamescope-session-plus@*.service",
|
||||||
|
])
|
||||||
|
.output()
|
||||||
|
.ok()?;
|
||||||
|
String::from_utf8_lossy(&out.stdout)
|
||||||
|
.lines()
|
||||||
|
.filter_map(|l| l.split_whitespace().next())
|
||||||
|
.find(|u| u.starts_with("gamescope-session-plus@") && u.ends_with(".service"))
|
||||||
|
.map(|u| u.to_string())
|
||||||
|
}
|
||||||
|
|
||||||
/// Stop every running autologin gaming-mode session (`gamescope-session-plus@*.service`) so its
|
/// Stop every running autologin gaming-mode session (`gamescope-session-plus@*.service`) so its
|
||||||
/// single-instance Steam is free for our own host-managed session. Records the units so
|
/// single-instance Steam is free for our own host-managed session. Records the units so
|
||||||
/// [`schedule_restore_tv_session`] can restart them on disconnect. Our own session is the transient
|
/// [`schedule_restore_tv_session`] can restart them on disconnect. Our own session is the transient
|
||||||
|
|||||||
@@ -6,8 +6,14 @@
|
|||||||
//! node for it. The node lives on the user's default PipeWire daemon, so [`VirtualOutput::remote_fd`]
|
//! node for it. The node lives on the user's default PipeWire daemon, so [`VirtualOutput::remote_fd`]
|
||||||
//! is `None` and capture connects to that daemon directly.
|
//! is `None` and capture connects to that daemon directly.
|
||||||
//!
|
//!
|
||||||
//! Requirements: KWin must expose the privileged `zkde_screencast` global — a real Plasma session
|
//! Requirements: KWin must expose the privileged `zkde_screencast` global. It is a *restricted*
|
||||||
//! authorizes it for its own clients; the headless test exposes it to bare clients via
|
//! protocol — KWin advertises it only to a client whose installed `.desktop` lists it under
|
||||||
|
//! `X-KDE-Wayland-Interfaces` (KWin maps the connecting client to a `.desktop` by resolving
|
||||||
|
//! `/proc/<pid>/exe` against `Exec=`, then caches the grant per-executable for the session's life).
|
||||||
|
//! So an interactive Plasma session does NOT hand it to a bare client — the host packages ship
|
||||||
|
//! `io.unom.Punktfunk.Host.desktop` (`Exec=/usr/bin/punktfunk-host`,
|
||||||
|
//! `X-KDE-Wayland-Interfaces=zkde_screencast_unstable_v1,…`) so it is present before the host first
|
||||||
|
//! connects. The headless test path instead exposes it to bare clients via
|
||||||
//! `KWIN_WAYLAND_NO_PERMISSION_CHECKS=1`. The compositor backend must implement
|
//! `KWIN_WAYLAND_NO_PERMISSION_CHECKS=1`. The compositor backend must implement
|
||||||
//! `createVirtualOutput`: the **DRM backend** (any version) or the **VirtualBackend since KWin
|
//! `createVirtualOutput`: the **DRM backend** (any version) or the **VirtualBackend since KWin
|
||||||
//! 6.5.6** (`kwin_wayland --virtual`); on `--virtual` < 6.5.6 the request fails with
|
//! 6.5.6** (`kwin_wayland --virtual`); on `--virtual` < 6.5.6 the request fails with
|
||||||
@@ -15,6 +21,8 @@
|
|||||||
//! the KWin session's environment.
|
//! the KWin session's environment.
|
||||||
|
|
||||||
#![allow(clippy::all, dead_code, non_camel_case_types, non_snake_case, unused)]
|
#![allow(clippy::all, dead_code, non_camel_case_types, non_snake_case, unused)]
|
||||||
|
// Every `unsafe` block in this file carries a `// SAFETY:` proof; enforce it (unsafe-proof program).
|
||||||
|
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||||
|
|
||||||
use super::{Mode, VirtualDisplay, VirtualOutput};
|
use super::{Mode, VirtualDisplay, VirtualOutput};
|
||||||
use anyhow::{anyhow, bail, Context, Result};
|
use anyhow::{anyhow, bail, Context, Result};
|
||||||
@@ -404,9 +412,11 @@ pub fn probe() -> Result<()> {
|
|||||||
queue.roundtrip(&mut state).context("registry roundtrip")?;
|
queue.roundtrip(&mut state).context("registry roundtrip")?;
|
||||||
if state.screencast.is_none() {
|
if state.screencast.is_none() {
|
||||||
bail!(
|
bail!(
|
||||||
"KWin is up but does not (yet) expose zkde_screencast_unstable_v1 — needs a real \
|
"KWin is up but does not expose zkde_screencast_unstable_v1 to this client — KWin gates \
|
||||||
KDE session (or KWIN_WAYLAND_NO_PERMISSION_CHECKS=1), and KWin ≥ 6.5.6 for the \
|
it on the host's .desktop X-KDE-Wayland-Interfaces (install \
|
||||||
headless virtual output"
|
io.unom.Punktfunk.Host.desktop with Exec=/usr/bin/punktfunk-host, then re-login so KWin \
|
||||||
|
re-reads it — the grant is cached per-exe on first connect), or set \
|
||||||
|
KWIN_WAYLAND_NO_PERMISSION_CHECKS=1 for the headless test; needs KWin ≥ 6.5.6"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
@@ -435,8 +445,9 @@ fn run(
|
|||||||
|
|
||||||
let screencast = state.screencast.clone().ok_or_else(|| {
|
let screencast = state.screencast.clone().ok_or_else(|| {
|
||||||
anyhow!(
|
anyhow!(
|
||||||
"KWin does not expose zkde_screencast_unstable_v1 (need a real KDE session, or run \
|
"KWin does not expose zkde_screencast_unstable_v1 to this client — install the host's \
|
||||||
KWin with KWIN_WAYLAND_NO_PERMISSION_CHECKS=1 for the headless test)"
|
.desktop (io.unom.Punktfunk.Host.desktop, X-KDE-Wayland-Interfaces) and re-login so \
|
||||||
|
KWin authorizes it, or run KWin with KWIN_WAYLAND_NO_PERMISSION_CHECKS=1 (headless test)"
|
||||||
)
|
)
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
@@ -495,6 +506,11 @@ fn run(
|
|||||||
events: libc::POLLIN,
|
events: libc::POLLIN,
|
||||||
revents: 0,
|
revents: 0,
|
||||||
};
|
};
|
||||||
|
// SAFETY: `&mut pfd` points at a single live, fully-initialized `libc::pollfd` on the stack, and
|
||||||
|
// the count `1` matches that one-element array, so `poll` reads `fd`/`events` and writes `revents`
|
||||||
|
// strictly within `pfd`. `pfd.fd` is the Wayland connection's fd, valid because `conn` (and the
|
||||||
|
// `prepare_read` guard) are alive across the call. `poll` blocks up to 200 ms and writes only
|
||||||
|
// `revents`; `pfd` outlives the synchronous call and aliases nothing (a fresh local).
|
||||||
let r = unsafe { libc::poll(&mut pfd, 1, 200) };
|
let r = unsafe { libc::poll(&mut pfd, 1, 200) };
|
||||||
if r > 0 && (pfd.revents & libc::POLLIN) != 0 {
|
if r > 0 && (pfd.revents & libc::POLLIN) != 0 {
|
||||||
let _ = guard.read();
|
let _ = guard.read();
|
||||||
|
|||||||
@@ -13,7 +13,9 @@
|
|||||||
//! its `Drop` releases the refcount (a *stale* lease — its monitor was preempted + recreated under it —
|
//! its `Drop` releases the refcount (a *stale* lease — its monitor was preempted + recreated under it —
|
||||||
//! is a no-op, so it can never tear down the live monitor).
|
//! is a no-op, so it can never tear down the live monitor).
|
||||||
|
|
||||||
use std::ffi::c_void;
|
// Every `unsafe` block in this file carries a `// SAFETY:` proof; enforce it (unsafe-proof program).
|
||||||
|
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||||
|
|
||||||
use std::os::windows::io::{AsRawHandle, OwnedHandle};
|
use std::os::windows::io::{AsRawHandle, OwnedHandle};
|
||||||
use std::sync::atomic::{AtomicBool, AtomicU32, AtomicU64, Ordering};
|
use std::sync::atomic::{AtomicBool, AtomicU32, AtomicU64, Ordering};
|
||||||
use std::sync::{Arc, Mutex, Once, OnceLock};
|
use std::sync::{Arc, Mutex, Once, OnceLock};
|
||||||
@@ -25,7 +27,8 @@ use windows::Win32::Foundation::{HANDLE, LUID};
|
|||||||
|
|
||||||
use super::{Mode, VirtualOutput};
|
use super::{Mode, VirtualOutput};
|
||||||
use crate::win_display::{
|
use crate::win_display::{
|
||||||
isolate_displays_ccd, resolve_gdi_name, restore_displays_ccd, set_active_mode, SavedConfig,
|
force_extend_topology, isolate_displays_ccd, resolve_gdi_name, restore_displays_ccd,
|
||||||
|
set_active_mode, SavedConfig,
|
||||||
};
|
};
|
||||||
|
|
||||||
/// The per-backend REMOVE key the driver stamps on ADD and consumes on REMOVE. SudoVDA keys monitors by
|
/// The per-backend REMOVE key the driver stamps on ADD and consumes on REMOVE. SudoVDA keys monitors by
|
||||||
@@ -61,8 +64,12 @@ pub(crate) trait VdisplayDriver: Send + Sync {
|
|||||||
///
|
///
|
||||||
/// # Safety
|
/// # Safety
|
||||||
/// `dev` must be the live control handle from [`open`](Self::open).
|
/// `dev` must be the live control handle from [`open`](Self::open).
|
||||||
unsafe fn add_monitor(&self, dev: HANDLE, mode: Mode, render_luid: Option<LUID>)
|
unsafe fn add_monitor(
|
||||||
-> Result<AddedMonitor>;
|
&self,
|
||||||
|
dev: HANDLE,
|
||||||
|
mode: Mode,
|
||||||
|
render_luid: Option<LUID>,
|
||||||
|
) -> Result<AddedMonitor>;
|
||||||
/// REMOVE the monitor identified by `key`.
|
/// REMOVE the monitor identified by `key`.
|
||||||
///
|
///
|
||||||
/// # Safety
|
/// # Safety
|
||||||
@@ -148,7 +155,8 @@ pub(crate) fn init(driver: Box<dyn VdisplayDriver>) -> &'static VirtualDisplayMa
|
|||||||
/// The process-wide manager. Panics if reached before a backend called [`init`] — by construction a
|
/// The process-wide manager. Panics if reached before a backend called [`init`] — by construction a
|
||||||
/// session is only ever created after `vdisplay::open` constructed the backend (which calls `init`).
|
/// session is only ever created after `vdisplay::open` constructed the backend (which calls `init`).
|
||||||
pub(crate) fn vdm() -> &'static VirtualDisplayManager {
|
pub(crate) fn vdm() -> &'static VirtualDisplayManager {
|
||||||
VDM.get().expect("VirtualDisplayManager used before a backend initialised it")
|
VDM.get()
|
||||||
|
.expect("VirtualDisplayManager used before a backend initialised it")
|
||||||
}
|
}
|
||||||
|
|
||||||
impl VirtualDisplayManager {
|
impl VirtualDisplayManager {
|
||||||
@@ -160,11 +168,15 @@ impl VirtualDisplayManager {
|
|||||||
/// double-open.
|
/// double-open.
|
||||||
fn ensure_device(&self) -> Result<HANDLE> {
|
fn ensure_device(&self) -> Result<HANDLE> {
|
||||||
if let Some(d) = self.device.get() {
|
if let Some(d) = self.device.get() {
|
||||||
return Ok(HANDLE(d.as_raw_handle() as *mut c_void));
|
return Ok(HANDLE(d.as_raw_handle()));
|
||||||
}
|
}
|
||||||
|
// SAFETY: `VdisplayDriver::open` is `unsafe` only because it issues SetupAPI + `DeviceIoControl`
|
||||||
|
// FFI in the caller's apartment; `ensure_device` runs that on the acquiring thread under the
|
||||||
|
// `state` lock (callers hold it), so there is no concurrent open. `open` has no handle
|
||||||
|
// precondition to uphold, and the `OwnedHandle` it returns is the sole owner of the device.
|
||||||
let (handle, watchdog_s) = unsafe { self.driver.open()? };
|
let (handle, watchdog_s) = unsafe { self.driver.open()? };
|
||||||
self.watchdog_s.store(watchdog_s, Ordering::Relaxed);
|
self.watchdog_s.store(watchdog_s, Ordering::Relaxed);
|
||||||
let raw = HANDLE(handle.as_raw_handle() as *mut c_void);
|
let raw = HANDLE(handle.as_raw_handle());
|
||||||
let _ = self.device.set(Arc::new(handle));
|
let _ = self.device.set(Arc::new(handle));
|
||||||
Ok(raw)
|
Ok(raw)
|
||||||
}
|
}
|
||||||
@@ -172,9 +184,7 @@ impl VirtualDisplayManager {
|
|||||||
/// The live control handle for the pinger/linger threads (lock-free: the device never changes once
|
/// The live control handle for the pinger/linger threads (lock-free: the device never changes once
|
||||||
/// opened). `None` only before the first acquire opened it.
|
/// opened). `None` only before the first acquire opened it.
|
||||||
fn device_handle(&self) -> Option<HANDLE> {
|
fn device_handle(&self) -> Option<HANDLE> {
|
||||||
self.device
|
self.device.get().map(|d| HANDLE(d.as_raw_handle()))
|
||||||
.get()
|
|
||||||
.map(|d| HANDLE(d.as_raw_handle() as *mut c_void))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Open + initialise the backend (validates the driver is present). Mirrors the old
|
/// Open + initialise the backend (validates the driver is present). Mirrors the old
|
||||||
@@ -197,8 +207,7 @@ impl VirtualDisplayManager {
|
|||||||
// client is gone). A REUSED IddCx swap-chain is DEAD, so joining it hands a black screen —
|
// client is gone). A REUSED IddCx swap-chain is DEAD, so joining it hands a black screen —
|
||||||
// PREEMPT: tear the old monitor down (its key/topology are restored) and create a fresh one. The
|
// PREEMPT: tear the old monitor down (its key/topology are restored) and create a fresh one. The
|
||||||
// old session's lease is gen-stamped, so its later drop is a no-op and can't tear down the new one.
|
// old session's lease is gen-stamped, so its later drop is a no-op and can't tear down the new one.
|
||||||
if idd_push_mode()
|
if idd_push_mode() && matches!(*state, MgrState::Active { .. } | MgrState::Lingering { .. })
|
||||||
&& matches!(*state, MgrState::Active { .. } | MgrState::Lingering { .. })
|
|
||||||
{
|
{
|
||||||
if let MgrState::Active { mon, .. } | MgrState::Lingering { mon, .. } =
|
if let MgrState::Active { mon, .. } | MgrState::Lingering { mon, .. } =
|
||||||
std::mem::replace(&mut *state, MgrState::Idle)
|
std::mem::replace(&mut *state, MgrState::Idle)
|
||||||
@@ -207,6 +216,10 @@ impl VirtualDisplayManager {
|
|||||||
old_target = mon.target_id,
|
old_target = mon.target_id,
|
||||||
"IDD-push reconnect — preempting the prior session, recreating a fresh monitor"
|
"IDD-push reconnect — preempting the prior session, recreating a fresh monitor"
|
||||||
);
|
);
|
||||||
|
// SAFETY: `teardown` requires `dev` to be the live control handle; `dev` is the value
|
||||||
|
// `ensure_device()` returned above (the device is cached in the `OnceLock` and never
|
||||||
|
// closed for the manager's lifetime). `mon` was moved out of the prior `Active`/
|
||||||
|
// `Lingering` state by `mem::replace`, so it is exclusively owned here — no aliasing.
|
||||||
unsafe { self.teardown(dev, mon) };
|
unsafe { self.teardown(dev, mon) };
|
||||||
// Let the OS finish the ASYNC monitor departure before the next ADD; a back-to-back
|
// Let the OS finish the ASYNC monitor departure before the next ADD; a back-to-back
|
||||||
// REMOVE→ADD races the teardown and the ADD IOCTL is rejected under reconnect churn.
|
// REMOVE→ADD races the teardown and the ADD IOCTL is rejected under reconnect churn.
|
||||||
@@ -220,21 +233,37 @@ impl VirtualDisplayManager {
|
|||||||
if let MgrState::Active { mon, refs } = &mut *state {
|
if let MgrState::Active { mon, refs } = &mut *state {
|
||||||
*refs += 1;
|
*refs += 1;
|
||||||
if mon.mode != mode {
|
if mon.mode != mode {
|
||||||
|
// SAFETY: `reconfigure` only manipulates the live display topology via the CCD/GDI
|
||||||
|
// helpers and needs an exclusive `&mut Monitor`. `mon` is the `&mut` into the current
|
||||||
|
// `Active` state, held under the `state` lock, so nothing else reconfigures it concurrently.
|
||||||
unsafe { self.reconfigure(mon, mode) };
|
unsafe { self.reconfigure(mon, mode) };
|
||||||
}
|
}
|
||||||
tracing::info!(refs = *refs, backend = self.driver.name(), "virtual monitor reused (concurrent / reconfigure session)");
|
tracing::info!(
|
||||||
|
refs = *refs,
|
||||||
|
backend = self.driver.name(),
|
||||||
|
"virtual monitor reused (concurrent / reconfigure session)"
|
||||||
|
);
|
||||||
return Ok(self.output_for(mon));
|
return Ok(self.output_for(mon));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Idle or Lingering: repurpose a lingering monitor / create a fresh one → Active{refs:1}.
|
// Idle or Lingering: repurpose a lingering monitor / create a fresh one → Active{refs:1}.
|
||||||
let mon = match std::mem::replace(&mut *state, MgrState::Idle) {
|
let mon = match std::mem::replace(&mut *state, MgrState::Idle) {
|
||||||
MgrState::Lingering { mut mon, .. } => {
|
MgrState::Lingering { mut mon, .. } => {
|
||||||
tracing::info!(backend = self.driver.name(), "virtual monitor reused (reconnect within the linger window)");
|
tracing::info!(
|
||||||
|
backend = self.driver.name(),
|
||||||
|
"virtual monitor reused (reconnect within the linger window)"
|
||||||
|
);
|
||||||
if mon.mode != mode {
|
if mon.mode != mode {
|
||||||
|
// SAFETY: `reconfigure` needs an exclusive `&mut Monitor` and only touches the live
|
||||||
|
// display topology. `mon` is the local monitor just moved out of the `Lingering`
|
||||||
|
// state (sole owner), and we hold the `state` lock — no concurrent reconfigure.
|
||||||
unsafe { self.reconfigure(&mut mon, mode) };
|
unsafe { self.reconfigure(&mut mon, mode) };
|
||||||
}
|
}
|
||||||
mon
|
mon
|
||||||
}
|
}
|
||||||
|
// SAFETY: `create_monitor` requires `dev` to be the live control handle; `dev` is the
|
||||||
|
// handle `ensure_device()` returned above (cached in the `OnceLock`, never closed for the
|
||||||
|
// manager's lifetime), and we hold the `state` lock.
|
||||||
MgrState::Idle => unsafe { self.create_monitor(dev, mode)? },
|
MgrState::Idle => unsafe { self.create_monitor(dev, mode)? },
|
||||||
MgrState::Active { .. } => unreachable!("handled above"),
|
MgrState::Active { .. } => unreachable!("handled above"),
|
||||||
};
|
};
|
||||||
@@ -263,17 +292,27 @@ impl VirtualDisplayManager {
|
|||||||
/// # Safety
|
/// # Safety
|
||||||
/// `dev` must be the live control handle.
|
/// `dev` must be the live control handle.
|
||||||
unsafe fn create_monitor(&'static self, dev: HANDLE, mode: Mode) -> Result<Monitor> {
|
unsafe fn create_monitor(&'static self, dev: HANDLE, mode: Mode) -> Result<Monitor> {
|
||||||
|
// SAFETY: `create_monitor`'s own `# Safety` contract guarantees `dev` is the live control
|
||||||
|
// handle; we forward it unchanged to `add_monitor`, whose precondition is exactly that.
|
||||||
|
// `resolve_render_pin()` returns an `Option<LUID>` by value (plain `Copy`), so no borrowed
|
||||||
|
// memory crosses the call.
|
||||||
let added = unsafe { self.driver.add_monitor(dev, mode, resolve_render_pin())? };
|
let added = unsafe { self.driver.add_monitor(dev, mode, resolve_render_pin())? };
|
||||||
|
|
||||||
// Mandatory keepalive: ping inside the watchdog window or the driver tears all displays down.
|
// Mandatory keepalive: ping inside the watchdog window or the driver tears all displays down.
|
||||||
// The pinger reaches the singleton for both the device + the driver — no raw-handle smuggle.
|
// The pinger reaches the singleton for both the device + the driver — no raw-handle smuggle.
|
||||||
let stop = Arc::new(AtomicBool::new(false));
|
let stop = Arc::new(AtomicBool::new(false));
|
||||||
let interval = Duration::from_millis(self.watchdog_s.load(Ordering::Relaxed) as u64 * 1000 / 3);
|
let interval =
|
||||||
|
Duration::from_millis(self.watchdog_s.load(Ordering::Relaxed) as u64 * 1000 / 3);
|
||||||
let stop_t = stop.clone();
|
let stop_t = stop.clone();
|
||||||
let pinger = thread::spawn(move || {
|
let pinger = thread::spawn(move || {
|
||||||
let mut warned = false;
|
let mut warned = false;
|
||||||
while !stop_t.load(Ordering::Relaxed) {
|
while !stop_t.load(Ordering::Relaxed) {
|
||||||
if let Some(h) = vdm().device_handle() {
|
if let Some(h) = vdm().device_handle() {
|
||||||
|
// SAFETY: `ping` requires `dev` to be the live control handle. `h` is from
|
||||||
|
// `device_handle()` (the `Some` branch) — the `OnceLock<Arc<OwnedHandle>>` that,
|
||||||
|
// once set, is never cleared or closed for the process lifetime, so the handle is
|
||||||
|
// live for this call. The pinger thread only spins while the `&'static` manager
|
||||||
|
// singleton (and thus the device) lives.
|
||||||
match unsafe { vdm().driver.ping(h) } {
|
match unsafe { vdm().driver.ping(h) } {
|
||||||
Ok(()) => warned = false,
|
Ok(()) => warned = false,
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
@@ -288,11 +327,23 @@ impl VirtualDisplayManager {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Windows defaults a new IddCx monitor into CLONE mode when a physical display is already
|
||||||
|
// active (a laptop panel, an attached monitor): the cloned IDD shares that display's source, so
|
||||||
|
// the OS never commits a distinct path for it and capture sees no frames. Force EXTEND first so
|
||||||
|
// the IDD comes up as its OWN active path; the resolve loop below then finds it. Idempotent /
|
||||||
|
// no-op on a sole-display box, so it's safe on the headless single-GPU path too.
|
||||||
|
// SAFETY: `force_extend_topology` only calls `SetDisplayConfig` (a CCD topology apply) with no
|
||||||
|
// borrowed caller memory; it runs under the manager `state` lock, the sole topology mutator.
|
||||||
|
unsafe { force_extend_topology() };
|
||||||
|
|
||||||
// Resolve the capture target. May be None on a GPU-less box (target added but not WDDM-activated);
|
// Resolve the capture target. May be None on a GPU-less box (target added but not WDDM-activated);
|
||||||
// the capture backend re-resolves once a GPU is present.
|
// the capture backend re-resolves once a GPU is present.
|
||||||
let mut gdi_name = None;
|
let mut gdi_name = None;
|
||||||
for _ in 0..15 {
|
for _ in 0..15 {
|
||||||
thread::sleep(Duration::from_millis(200));
|
thread::sleep(Duration::from_millis(200));
|
||||||
|
// SAFETY: `resolve_gdi_name` is `unsafe` for its CCD (QueryDisplayConfig) FFI; it takes a
|
||||||
|
// plain `Copy` `u32` target id by value and returns an owned `String`, so no caller memory
|
||||||
|
// is borrowed across the call.
|
||||||
if let Some(n) = unsafe { resolve_gdi_name(added.target_id) } {
|
if let Some(n) = unsafe { resolve_gdi_name(added.target_id) } {
|
||||||
gdi_name = Some(n);
|
gdi_name = Some(n);
|
||||||
break;
|
break;
|
||||||
@@ -309,6 +360,9 @@ impl VirtualDisplayManager {
|
|||||||
// display(s) first via the atomic CCD path promotes the IDD to a composited primary with no
|
// display(s) first via the atomic CCD path promotes the IDD to a composited primary with no
|
||||||
// MODE_CHANGE storm. Opt out with PUNKTFUNK_NO_ISOLATE=1.
|
// MODE_CHANGE storm. Opt out with PUNKTFUNK_NO_ISOLATE=1.
|
||||||
if std::env::var("PUNKTFUNK_NO_ISOLATE").is_err() {
|
if std::env::var("PUNKTFUNK_NO_ISOLATE").is_err() {
|
||||||
|
// SAFETY: `isolate_displays_ccd` is `unsafe` for its CCD topology FFI; it takes a
|
||||||
|
// `Copy` `u32` by value and returns an owned `SavedConfig` snapshot (no borrowed
|
||||||
|
// memory crosses). It runs under the `state` lock, the sole mutator of the topology.
|
||||||
ccd_saved = unsafe { isolate_displays_ccd(added.target_id) };
|
ccd_saved = unsafe { isolate_displays_ccd(added.target_id) };
|
||||||
} else {
|
} else {
|
||||||
tracing::info!("display isolation skipped (PUNKTFUNK_NO_ISOLATE) — IDD stays extended");
|
tracing::info!("display isolation skipped (PUNKTFUNK_NO_ISOLATE) — IDD stays extended");
|
||||||
@@ -340,10 +394,15 @@ impl VirtualDisplayManager {
|
|||||||
/// Touches the live display topology via the CCD/GDI helpers.
|
/// Touches the live display topology via the CCD/GDI helpers.
|
||||||
unsafe fn reconfigure(&self, mon: &mut Monitor, mode: Mode) {
|
unsafe fn reconfigure(&self, mon: &mut Monitor, mode: Mode) {
|
||||||
tracing::info!(
|
tracing::info!(
|
||||||
old = format!("{}x{}@{}", mon.mode.width, mon.mode.height, mon.mode.refresh_hz),
|
old = format!(
|
||||||
|
"{}x{}@{}",
|
||||||
|
mon.mode.width, mon.mode.height, mon.mode.refresh_hz
|
||||||
|
),
|
||||||
new = format!("{}x{}@{}", mode.width, mode.height, mode.refresh_hz),
|
new = format!("{}x{}@{}", mode.width, mode.height, mode.refresh_hz),
|
||||||
"virtual-display: reconfiguring reused monitor to the new client mode"
|
"virtual-display: reconfiguring reused monitor to the new client mode"
|
||||||
);
|
);
|
||||||
|
// SAFETY: `resolve_gdi_name` is `unsafe` for its CCD FFI; it takes the `Copy` `u32`
|
||||||
|
// `mon.target_id` by value and returns an owned `String`, so nothing borrowed crosses the call.
|
||||||
if let Some(n) = unsafe { resolve_gdi_name(mon.target_id) } {
|
if let Some(n) = unsafe { resolve_gdi_name(mon.target_id) } {
|
||||||
mon.gdi_name = Some(n);
|
mon.gdi_name = Some(n);
|
||||||
}
|
}
|
||||||
@@ -366,10 +425,16 @@ impl VirtualDisplayManager {
|
|||||||
if let Some(saved) = &mon.ccd_saved {
|
if let Some(saved) = &mon.ccd_saved {
|
||||||
restore_displays_ccd(saved);
|
restore_displays_ccd(saved);
|
||||||
}
|
}
|
||||||
|
// SAFETY: `teardown`'s own `# Safety` contract guarantees `dev` is the live control handle, and
|
||||||
|
// `remove_monitor` requires exactly that. `&mon.key` borrows the `MonitorKey` inside the
|
||||||
|
// still-owned `mon`, alive for this synchronous IOCTL, so the pointer the driver reads stays valid.
|
||||||
if let Err(e) = unsafe { self.driver.remove_monitor(dev, &mon.key) } {
|
if let Err(e) = unsafe { self.driver.remove_monitor(dev, &mon.key) } {
|
||||||
tracing::warn!("virtual-display REMOVE failed: {e:#}");
|
tracing::warn!("virtual-display REMOVE failed: {e:#}");
|
||||||
} else {
|
} else {
|
||||||
tracing::info!(backend = self.driver.name(), "virtual-display monitor removed");
|
tracing::info!(
|
||||||
|
backend = self.driver.name(),
|
||||||
|
"virtual-display monitor removed"
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -386,10 +451,16 @@ impl VirtualDisplayManager {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
*state = match std::mem::replace(&mut *state, MgrState::Idle) {
|
*state = match std::mem::replace(&mut *state, MgrState::Idle) {
|
||||||
MgrState::Active { mon, refs } if refs > 1 => MgrState::Active { mon, refs: refs - 1 },
|
MgrState::Active { mon, refs } if refs > 1 => MgrState::Active {
|
||||||
|
mon,
|
||||||
|
refs: refs - 1,
|
||||||
|
},
|
||||||
MgrState::Active { mon, .. } => {
|
MgrState::Active { mon, .. } => {
|
||||||
let ms = linger_ms();
|
let ms = linger_ms();
|
||||||
tracing::info!(linger_ms = ms, "virtual-display: last session left — lingering before teardown");
|
tracing::info!(
|
||||||
|
linger_ms = ms,
|
||||||
|
"virtual-display: last session left — lingering before teardown"
|
||||||
|
);
|
||||||
MgrState::Lingering {
|
MgrState::Lingering {
|
||||||
mon,
|
mon,
|
||||||
until: Instant::now() + Duration::from_millis(ms),
|
until: Instant::now() + Duration::from_millis(ms),
|
||||||
@@ -471,6 +542,10 @@ impl VirtualDisplayManager {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
if let Some(mon) = taken {
|
if let Some(mon) = taken {
|
||||||
|
// SAFETY: `teardown` requires `dev` to be the live control handle; `dev` is from
|
||||||
|
// `self.device_handle()` (the `Some` checked just above), i.e. the cached
|
||||||
|
// `OwnedHandle` live for the process lifetime. `mon` was moved out of the
|
||||||
|
// `Lingering` state under the `state` lock, so it is exclusively owned here.
|
||||||
unsafe { self.teardown(dev, mon) };
|
unsafe { self.teardown(dev, mon) };
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
@@ -504,9 +579,13 @@ fn idd_push_mode() -> bool {
|
|||||||
/// ACCESS_LOST storm SudoVDA hit when pinned).
|
/// ACCESS_LOST storm SudoVDA hit when pinned).
|
||||||
fn resolve_render_pin() -> Option<LUID> {
|
fn resolve_render_pin() -> Option<LUID> {
|
||||||
if crate::config::config().render_adapter.is_some() {
|
if crate::config::config().render_adapter.is_some() {
|
||||||
|
// SAFETY: `resolve_render_adapter_luid` is `unsafe` only for its DXGI factory FFI; it takes no
|
||||||
|
// arguments and returns an `Option<LUID>` by value, so there is no input/borrow to keep valid.
|
||||||
unsafe { crate::win_adapter::resolve_render_adapter_luid() }
|
unsafe { crate::win_adapter::resolve_render_adapter_luid() }
|
||||||
} else if crate::config::config().idd_push {
|
} else if crate::config::config().idd_push {
|
||||||
tracing::info!("IDD push: pinning the discrete render GPU (SET_RENDER_ADAPTER)");
|
tracing::info!("IDD push: pinning the discrete render GPU (SET_RENDER_ADAPTER)");
|
||||||
|
// SAFETY: as above — `resolve_render_adapter_luid` takes no arguments and returns an
|
||||||
|
// `Option<LUID>` by value; the `unsafe` covers only its DXGI factory enumeration FFI.
|
||||||
unsafe { crate::win_adapter::resolve_render_adapter_luid() }
|
unsafe { crate::win_adapter::resolve_render_adapter_luid() }
|
||||||
} else {
|
} else {
|
||||||
tracing::info!(
|
tracing::info!(
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
//!
|
//!
|
||||||
//! Control surface: a device-interface-GUID + `CreateFileW` + `DeviceIoControl` IOCTL protocol, with
|
//! Control surface: a device-interface-GUID + `CreateFileW` + `DeviceIoControl` IOCTL protocol, with
|
||||||
//! the wire contract OWNED by [`pf_driver_proto::control`] (versioned + `#[repr(C)] Pod` structs,
|
//! the wire contract OWNED by [`pf_driver_proto::control`] (versioned + `#[repr(C)] Pod` structs,
|
||||||
//! NOT the SudoVDA ABI). No DLL, no named pipe. See `docs/windows-host-rewrite.md`.
|
//! NOT the SudoVDA ABI). No DLL, no named pipe. See `design/windows-host-rewrite.md`.
|
||||||
//!
|
//!
|
||||||
//! This is a faithful clone of [`super::sudovda`] (the shipping fallback) repointed at the new driver:
|
//! This is a faithful clone of [`super::sudovda`] (the shipping fallback) repointed at the new driver:
|
||||||
//! same reference-counted/lingering monitor lifecycle, same CCD isolation + active-mode forcing — those
|
//! same reference-counted/lingering monitor lifecycle, same CCD isolation + active-mode forcing — those
|
||||||
@@ -14,6 +14,9 @@
|
|||||||
//! target id, so the CCD/DXGI code works unchanged). Only the driver-specific bits (GUID, IOCTL codes,
|
//! target id, so the CCD/DXGI code works unchanged). Only the driver-specific bits (GUID, IOCTL codes,
|
||||||
//! request/reply structs, the version handshake) differ, per `pf_driver_proto`.
|
//! request/reply structs, the version handshake) differ, per `pf_driver_proto`.
|
||||||
|
|
||||||
|
// Every `unsafe` block in this file carries a `// SAFETY:` proof; enforce it (unsafe-proof program).
|
||||||
|
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||||
|
|
||||||
use std::ffi::c_void;
|
use std::ffi::c_void;
|
||||||
use std::mem::size_of;
|
use std::mem::size_of;
|
||||||
use std::os::windows::io::{FromRawHandle, OwnedHandle};
|
use std::os::windows::io::{FromRawHandle, OwnedHandle};
|
||||||
@@ -144,15 +147,26 @@ impl VdisplayDriver for PfVdisplayDriver {
|
|||||||
}
|
}
|
||||||
|
|
||||||
unsafe fn open(&self) -> Result<(OwnedHandle, u32)> {
|
unsafe fn open(&self) -> Result<(OwnedHandle, u32)> {
|
||||||
|
// SAFETY: `open_device` is `unsafe` only because it issues SetupAPI enumeration + `CreateFileW`
|
||||||
|
// FFI; it takes no arguments and returns an owned raw `HANDLE` (or `Err`). Called here on the
|
||||||
|
// backend-init thread, with no precondition beyond a valid thread context.
|
||||||
let device = unsafe { open_device()? };
|
let device = unsafe { open_device()? };
|
||||||
// HARD protocol-version check (unlike SudoVDA's best-effort log): a mismatched host/driver pair
|
// HARD protocol-version check (unlike SudoVDA's best-effort log): a mismatched host/driver pair
|
||||||
// fails loudly here rather than corrupting the IOCTL stream.
|
// fails loudly here rather than corrupting the IOCTL stream.
|
||||||
let mut info_buf = [0u8; size_of::<control::InfoReply>()];
|
let mut info_buf = [0u8; size_of::<control::InfoReply>()];
|
||||||
|
// SAFETY: `ioctl` requires `h` to be a valid device handle and its slices to be valid for the
|
||||||
|
// call. `device` is the live handle just returned by `open_device`. `IOCTL_GET_INFO` takes no
|
||||||
|
// input (`&[]`) and writes into `info_buf`, a stack `[u8; size_of::<InfoReply>()]` whose length
|
||||||
|
// is passed as the output size — so `DeviceIoControl` can't write OOB — and which outlives this
|
||||||
|
// synchronous call.
|
||||||
unsafe { ioctl(device, control::IOCTL_GET_INFO, &[], &mut info_buf) }
|
unsafe { ioctl(device, control::IOCTL_GET_INFO, &[], &mut info_buf) }
|
||||||
.context("pf-vdisplay IOCTL_GET_INFO (version handshake)")?;
|
.context("pf-vdisplay IOCTL_GET_INFO (version handshake)")?;
|
||||||
let info: control::InfoReply =
|
let info: control::InfoReply =
|
||||||
bytemuck::pod_read_unaligned(&info_buf[..size_of::<control::InfoReply>()]);
|
bytemuck::pod_read_unaligned(&info_buf[..size_of::<control::InfoReply>()]);
|
||||||
if info.protocol_version != pf_driver_proto::PROTOCOL_VERSION {
|
if info.protocol_version != pf_driver_proto::PROTOCOL_VERSION {
|
||||||
|
// SAFETY: `device` is the valid raw handle from `open_device` and has NOT yet been wrapped
|
||||||
|
// in an `OwnedHandle` (that happens only on the success path below), so this error path is
|
||||||
|
// the sole owner closing it exactly once — no double-close.
|
||||||
unsafe {
|
unsafe {
|
||||||
let _ = CloseHandle(device);
|
let _ = CloseHandle(device);
|
||||||
}
|
}
|
||||||
@@ -171,12 +185,19 @@ impl VdisplayDriver for PfVdisplayDriver {
|
|||||||
);
|
);
|
||||||
// Reap monitors orphaned by a crashed previous host — a FIRST-CLASS op (driver returns SUCCESS).
|
// Reap monitors orphaned by a crashed previous host — a FIRST-CLASS op (driver returns SUCCESS).
|
||||||
let mut none: [u8; 0] = [];
|
let mut none: [u8; 0] = [];
|
||||||
|
// SAFETY: `device` is the live handle from `open_device` (still owned here, before it is wrapped
|
||||||
|
// below). `IOCTL_CLEAR_ALL` has no input and no output: `&[]` and the empty `none` slice pass
|
||||||
|
// zero-length buffers, so nothing is read or written through them.
|
||||||
if unsafe { ioctl(device, control::IOCTL_CLEAR_ALL, &[], &mut none) }.is_ok() {
|
if unsafe { ioctl(device, control::IOCTL_CLEAR_ALL, &[], &mut none) }.is_ok() {
|
||||||
tracing::info!("cleared orphaned virtual monitors on host startup");
|
tracing::info!("cleared orphaned virtual monitors on host startup");
|
||||||
} else {
|
} else {
|
||||||
tracing::warn!("pf-vdisplay IOCTL_CLEAR_ALL failed on startup (continuing)");
|
tracing::warn!("pf-vdisplay IOCTL_CLEAR_ALL failed on startup (continuing)");
|
||||||
}
|
}
|
||||||
Ok((
|
Ok((
|
||||||
|
// SAFETY: `device` is the valid handle from `open_device`, still owned here and NOT closed
|
||||||
|
// on this success path (the error paths above close it and return). `from_raw_handle`'s
|
||||||
|
// contract — caller owns a valid handle — holds, so ownership transfers cleanly into the
|
||||||
|
// `OwnedHandle`: exactly one owner, which `CloseHandle`s it on drop.
|
||||||
unsafe { OwnedHandle::from_raw_handle(device.0 as _) },
|
unsafe { OwnedHandle::from_raw_handle(device.0 as _) },
|
||||||
watchdog_s,
|
watchdog_s,
|
||||||
))
|
))
|
||||||
@@ -199,6 +220,9 @@ impl VdisplayDriver for PfVdisplayDriver {
|
|||||||
// SET_RENDER_ADAPTER (opt-in; pf-vdisplay IMPLEMENTS it). Non-fatal on failure: the driver reports
|
// SET_RENDER_ADAPTER (opt-in; pf-vdisplay IMPLEMENTS it). Non-fatal on failure: the driver reports
|
||||||
// its real render LUID in the shared header, so the host binds correctly even if this is ignored.
|
// its real render LUID in the shared header, so the host binds correctly even if this is ignored.
|
||||||
if let Some(luid) = render_luid {
|
if let Some(luid) = render_luid {
|
||||||
|
// SAFETY: `add_monitor`'s `# Safety` contract guarantees `dev` is the live control handle,
|
||||||
|
// which is `set_render_adapter`'s precondition; we forward it unchanged. `luid` is a plain
|
||||||
|
// `Copy` `LUID` passed by value — no borrow crosses the call.
|
||||||
match unsafe { set_render_adapter(dev, luid) } {
|
match unsafe { set_render_adapter(dev, luid) } {
|
||||||
Ok(()) => tracing::info!(
|
Ok(()) => tracing::info!(
|
||||||
luid = format!("{:08x}:{:08x}", luid.HighPart, luid.LowPart),
|
luid = format!("{:08x}:{:08x}", luid.HighPart, luid.LowPart),
|
||||||
@@ -210,14 +234,17 @@ impl VdisplayDriver for PfVdisplayDriver {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
let mut out = [0u8; size_of::<control::AddReply>()];
|
let mut out = [0u8; size_of::<control::AddReply>()];
|
||||||
unsafe { ioctl(dev, control::IOCTL_ADD, bytemuck::bytes_of(&add), &mut out) }.with_context(
|
// SAFETY: per `add_monitor`'s contract `dev` is the live control handle. `bytemuck::bytes_of(&add)`
|
||||||
|| {
|
// borrows the local `AddRequest` (alive across this synchronous call) as the input bytes, and
|
||||||
|
// `out` is a stack `[u8; size_of::<AddReply>()]` whose length bounds the kernel's write — both
|
||||||
|
// buffers outlive the call.
|
||||||
|
unsafe { ioctl(dev, control::IOCTL_ADD, bytemuck::bytes_of(&add), &mut out) }
|
||||||
|
.with_context(|| {
|
||||||
format!(
|
format!(
|
||||||
"pf-vdisplay ADD {}x{}@{}",
|
"pf-vdisplay ADD {}x{}@{}",
|
||||||
mode.width, mode.height, mode.refresh_hz
|
mode.width, mode.height, mode.refresh_hz
|
||||||
)
|
)
|
||||||
},
|
})?;
|
||||||
)?;
|
|
||||||
// `pod_read_unaligned` (NOT `from_bytes`): `out` is a stack `[u8; N]` with no guaranteed 4-byte
|
// `pod_read_unaligned` (NOT `from_bytes`): `out` is a stack `[u8; N]` with no guaranteed 4-byte
|
||||||
// alignment, and `from_bytes` PANICS on a mismatch. This copies into an aligned `AddReply`.
|
// alignment, and `from_bytes` PANICS on a mismatch. This copies into an aligned `AddReply`.
|
||||||
let reply: control::AddReply =
|
let reply: control::AddReply =
|
||||||
@@ -260,11 +287,24 @@ impl VdisplayDriver for PfVdisplayDriver {
|
|||||||
session_id: *session_id,
|
session_id: *session_id,
|
||||||
};
|
};
|
||||||
let mut none: [u8; 0] = [];
|
let mut none: [u8; 0] = [];
|
||||||
unsafe { ioctl(dev, control::IOCTL_REMOVE, bytemuck::bytes_of(&req), &mut none) }.map(|_| ())
|
// SAFETY: per `remove_monitor`'s contract `dev` is the live control handle. `bytes_of(&req)`
|
||||||
|
// borrows the local `RemoveRequest` for the duration of this synchronous call as the input
|
||||||
|
// bytes; `none` is empty, so there is no output buffer.
|
||||||
|
unsafe {
|
||||||
|
ioctl(
|
||||||
|
dev,
|
||||||
|
control::IOCTL_REMOVE,
|
||||||
|
bytemuck::bytes_of(&req),
|
||||||
|
&mut none,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
.map(|_| ())
|
||||||
}
|
}
|
||||||
|
|
||||||
unsafe fn ping(&self, dev: HANDLE) -> Result<()> {
|
unsafe fn ping(&self, dev: HANDLE) -> Result<()> {
|
||||||
let mut none: [u8; 0] = [];
|
let mut none: [u8; 0] = [];
|
||||||
|
// SAFETY: per `ping`'s contract `dev` is the live control handle. `IOCTL_PING` has no input
|
||||||
|
// (`&[]`) and no output (`none` is empty), so no memory is read or written through the buffers.
|
||||||
unsafe { ioctl(dev, control::IOCTL_PING, &[], &mut none) }.map(|_| ())
|
unsafe { ioctl(dev, control::IOCTL_PING, &[], &mut none) }.map(|_| ())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -292,7 +332,11 @@ impl VirtualDisplay for PfVdisplayDisplay {
|
|||||||
|
|
||||||
/// Readiness probe: can we open the pf-vdisplay control device?
|
/// Readiness probe: can we open the pf-vdisplay control device?
|
||||||
pub fn probe() -> Result<()> {
|
pub fn probe() -> Result<()> {
|
||||||
|
// SAFETY: `open_device` is `unsafe` only for its SetupAPI + `CreateFileW` FFI; no arguments, returns
|
||||||
|
// an owned raw `HANDLE` (or `Err`).
|
||||||
let h = unsafe { open_device()? };
|
let h = unsafe { open_device()? };
|
||||||
|
// SAFETY: `h` is the handle just opened by `open_device` in this function, owned here and not yet
|
||||||
|
// handed anywhere else, so this closes it exactly once — no double-close, no use-after-close.
|
||||||
unsafe {
|
unsafe {
|
||||||
let _ = CloseHandle(h);
|
let _ = CloseHandle(h);
|
||||||
}
|
}
|
||||||
@@ -301,6 +345,9 @@ pub fn probe() -> Result<()> {
|
|||||||
|
|
||||||
/// Is the pf-vdisplay driver present (device interface enumerable)?
|
/// Is the pf-vdisplay driver present (device interface enumerable)?
|
||||||
pub fn is_available() -> bool {
|
pub fn is_available() -> bool {
|
||||||
|
// SAFETY: `open_device` returns an owned raw `HANDLE`; on `Ok(h)` the handle is moved into the
|
||||||
|
// closure (sole owner) and closed exactly once via `CloseHandle`, on `Err` there is nothing to
|
||||||
|
// close — so no double-close and no leak of an opened handle. The `unsafe` covers both FFI calls.
|
||||||
unsafe { open_device().map(|h| CloseHandle(h)).is_ok() }
|
unsafe { open_device().map(|h| CloseHandle(h)).is_ok() }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,402 @@
|
|||||||
|
//! `punktfunk-host driver install` / `web setup` - the install-time work the Windows installer's Inno
|
||||||
|
//! `[Run]` section delegates to the host EXE instead of locale-parsed PowerShell *files*.
|
||||||
|
//!
|
||||||
|
//! Why: Windows PowerShell 5.1 reads a BOM-less `.ps1` *file* in the machine's ANSI codepage, so on a
|
||||||
|
//! non-English locale a stray non-ASCII byte mis-decodes and the script aborts "unterminated string" -
|
||||||
|
//! exactly how the pf-vdisplay driver install silently failed on a German box. A compiled subcommand has
|
||||||
|
//! no such surface: the external tools it drives (`certutil`/`pnputil`/`nefconc`/`schtasks`/`netsh`/
|
||||||
|
//! `icacls`) are fixed string literals, not a file parsed in some codepage. (The installer's *inline*
|
||||||
|
//! `-Command` PowerShell in the `.iss` is unaffected - that's a command-line string, not a file read -
|
||||||
|
//! so it stays.) Sits next to `service install` (`service.rs`), the established Rust-owns-install pattern.
|
||||||
|
//!
|
||||||
|
//! Everything here is BEST-EFFORT: a hiccup warns but returns `Ok` - a non-zero exit would abort the
|
||||||
|
//! whole installer, and a missing driver only degrades the host to a physical display.
|
||||||
|
|
||||||
|
use anyhow::{bail, Context, Result};
|
||||||
|
use std::path::{Path, PathBuf};
|
||||||
|
use std::process::{Command, Stdio};
|
||||||
|
|
||||||
|
// ── arg + command helpers ──────────────────────────────────────────────────────────────────────
|
||||||
|
fn flag_val(args: &[String], name: &str) -> Option<String> {
|
||||||
|
args.iter()
|
||||||
|
.position(|a| a == name)
|
||||||
|
.and_then(|i| args.get(i + 1))
|
||||||
|
.cloned()
|
||||||
|
}
|
||||||
|
fn flag_present(args: &[String], name: &str) -> bool {
|
||||||
|
args.iter().any(|a| a == name)
|
||||||
|
}
|
||||||
|
/// Run a command, discard output, return whether it succeeded.
|
||||||
|
fn run_quiet(cmd: &str, args: &[&str]) -> bool {
|
||||||
|
Command::new(cmd)
|
||||||
|
.args(args)
|
||||||
|
.stdout(Stdio::null())
|
||||||
|
.stderr(Stdio::null())
|
||||||
|
.status()
|
||||||
|
.map(|s| s.success())
|
||||||
|
.unwrap_or(false)
|
||||||
|
}
|
||||||
|
/// Run a command, capture stdout (lossy UTF-8); empty on failure.
|
||||||
|
fn run_capture(cmd: &str, args: &[&str]) -> String {
|
||||||
|
Command::new(cmd)
|
||||||
|
.args(args)
|
||||||
|
.output()
|
||||||
|
.map(|o| String::from_utf8_lossy(&o.stdout).into_owned())
|
||||||
|
.unwrap_or_default()
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── `driver install [--gamepad] --dir <stage>` ─────────────────────────────────────────────────
|
||||||
|
pub fn driver_main(args: &[String]) -> Result<()> {
|
||||||
|
match args.first().map(String::as_str) {
|
||||||
|
Some("install") => driver_install(&args[1..]),
|
||||||
|
_ => bail!("usage: punktfunk-host driver install --dir <stage> [--gamepad]"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn driver_install(args: &[String]) -> Result<()> {
|
||||||
|
let dir =
|
||||||
|
PathBuf::from(flag_val(args, "--dir").context("driver install: --dir <stage> required")?);
|
||||||
|
let gamepad = flag_present(args, "--gamepad");
|
||||||
|
let (what, res) = if gamepad {
|
||||||
|
("gamepad", install_gamepad(&dir))
|
||||||
|
} else {
|
||||||
|
("pf-vdisplay", install_pf_vdisplay(&dir))
|
||||||
|
};
|
||||||
|
if let Err(e) = res {
|
||||||
|
// Never abort the installer on a driver failure (matches the old best-effort PS scripts).
|
||||||
|
eprintln!("warning: {what} driver install: {e:#} (the host degrades without it)");
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Trust the bundled self-signed driver cert: machine `Root` (so the chain validates) + `TrustedPublisher`
|
||||||
|
/// (so PnP installs without a prompt).
|
||||||
|
fn trust_cert(dir: &Path) {
|
||||||
|
match first_with_ext(dir, "cer") {
|
||||||
|
Some(cer) => {
|
||||||
|
let cer = cer.to_string_lossy().into_owned();
|
||||||
|
for store in ["Root", "TrustedPublisher"] {
|
||||||
|
if !run_quiet("certutil", &["-addstore", "-f", store, &cer]) {
|
||||||
|
eprintln!("warning: certutil -addstore {store} failed for {cer}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
println!("trusted driver cert {cer} (Root + TrustedPublisher)");
|
||||||
|
}
|
||||||
|
None => eprintln!(
|
||||||
|
"warning: no .cer in {} - driver may not install silently",
|
||||||
|
dir.display()
|
||||||
|
),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn install_pf_vdisplay(dir: &Path) -> Result<()> {
|
||||||
|
let inf = dir.join("pf_vdisplay.inf");
|
||||||
|
if !inf.exists() {
|
||||||
|
bail!("no pf_vdisplay.inf in {}", dir.display());
|
||||||
|
}
|
||||||
|
trust_cert(dir);
|
||||||
|
// Create the ROOT device node only if absent (a blind re-create spawns a phantom duplicate, and the
|
||||||
|
// host binds interface index 0). ALWAYS nefconc (a clean ROOT\DISPLAY node), NEVER devgen (which makes
|
||||||
|
// persistent SWD\DEVGEN software devices that survive reboot + registry deletion).
|
||||||
|
if pf_vdisplay_present() {
|
||||||
|
println!("pf-vdisplay device node already present - leaving it.");
|
||||||
|
} else if let Some(nef) = first_named(dir, "nefconc.exe") {
|
||||||
|
let (class, guid) = inf_class(&inf);
|
||||||
|
let ok = run_quiet(
|
||||||
|
&nef.to_string_lossy(),
|
||||||
|
&[
|
||||||
|
"--create-device-node",
|
||||||
|
"--hardware-id",
|
||||||
|
"root\\pf_vdisplay",
|
||||||
|
"--class-name",
|
||||||
|
&class,
|
||||||
|
"--class-guid",
|
||||||
|
&guid,
|
||||||
|
],
|
||||||
|
);
|
||||||
|
if ok {
|
||||||
|
println!("created root\\pf_vdisplay device node (nefconc)");
|
||||||
|
} else {
|
||||||
|
eprintln!("warning: nefconc --create-device-node failed");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
eprintln!(
|
||||||
|
"warning: nefconc.exe not found in {} - cannot create the device node",
|
||||||
|
dir.display()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
// Stage + bind the driver (idempotent; re-staging the same .inf is harmless).
|
||||||
|
if run_quiet(
|
||||||
|
"pnputil",
|
||||||
|
&["/add-driver", &inf.to_string_lossy(), "/install"],
|
||||||
|
) {
|
||||||
|
println!("pnputil /add-driver pf_vdisplay.inf /install ok");
|
||||||
|
} else {
|
||||||
|
eprintln!("warning: pnputil /add-driver /install failed (driver may not have installed)");
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn install_gamepad(dir: &Path) -> Result<()> {
|
||||||
|
let infs: Vec<PathBuf> = std::fs::read_dir(dir)
|
||||||
|
.with_context(|| format!("read {}", dir.display()))?
|
||||||
|
.flatten()
|
||||||
|
.map(|e| e.path())
|
||||||
|
.filter(|p| p.extension().is_some_and(|x| x.eq_ignore_ascii_case("inf")))
|
||||||
|
.collect();
|
||||||
|
if infs.is_empty() {
|
||||||
|
bail!("no driver .inf in {}", dir.display());
|
||||||
|
}
|
||||||
|
trust_cert(dir);
|
||||||
|
// Add each package to the store - no /install, no device node: the host SwDeviceCreate's the
|
||||||
|
// per-session devnode when a client forwards a pad, so PnP binds the store driver on demand.
|
||||||
|
for inf in &infs {
|
||||||
|
if run_quiet("pnputil", &["/add-driver", &inf.to_string_lossy()]) {
|
||||||
|
println!("pnputil /add-driver {} ok", file_name(inf));
|
||||||
|
} else {
|
||||||
|
eprintln!("warning: pnputil /add-driver {} failed", inf.display());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Is a punktfunk virtual-display device already enumerated? Matches the device ID / description, which
|
||||||
|
/// are NOT localized, so the substring check is locale-safe.
|
||||||
|
fn pf_vdisplay_present() -> bool {
|
||||||
|
let lo = run_capture("pnputil", &["/enum-devices", "/class", "Display"]).to_ascii_lowercase();
|
||||||
|
lo.contains("pf_vdisplay") || lo.contains("punktfunk virtual display")
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Read `Class` + `ClassGuid` from an INF so the node matches the shipped driver; falls back to Display.
|
||||||
|
fn inf_class(inf: &Path) -> (String, String) {
|
||||||
|
let text = std::fs::read_to_string(inf).unwrap_or_default();
|
||||||
|
let (mut class, mut guid) = (None, None);
|
||||||
|
for line in text.lines() {
|
||||||
|
let t = line.trim();
|
||||||
|
if let Some(eq) = t.find('=') {
|
||||||
|
let key = t[..eq].trim().to_ascii_lowercase();
|
||||||
|
let val = t[eq + 1..]
|
||||||
|
.split(';')
|
||||||
|
.next()
|
||||||
|
.unwrap_or("")
|
||||||
|
.trim()
|
||||||
|
.to_string();
|
||||||
|
match key.as_str() {
|
||||||
|
"class" => class = Some(val),
|
||||||
|
"classguid" => guid = Some(val),
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
(
|
||||||
|
class
|
||||||
|
.filter(|c| !c.is_empty())
|
||||||
|
.unwrap_or_else(|| "Display".into()),
|
||||||
|
guid.filter(|g| !g.is_empty())
|
||||||
|
.unwrap_or_else(|| "{4d36e968-e325-11ce-bfc1-08002be10318}".into()),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── `web setup --app-dir <app> [--password-file <file>]` ────────────────────────────────────────
|
||||||
|
const WEB_TASK: &str = "PunktfunkWeb";
|
||||||
|
|
||||||
|
pub fn web_main(args: &[String]) -> Result<()> {
|
||||||
|
match args.first().map(String::as_str) {
|
||||||
|
Some("setup") => web_setup(&args[1..]),
|
||||||
|
_ => bail!("usage: punktfunk-host web setup --app-dir <app> [--password-file <file>]"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn web_setup(args: &[String]) -> Result<()> {
|
||||||
|
let app_dir =
|
||||||
|
PathBuf::from(flag_val(args, "--app-dir").context("web setup: --app-dir <app> required")?);
|
||||||
|
let pw_file = flag_val(args, "--password-file");
|
||||||
|
let data_dir = crate::gamestream::config_dir();
|
||||||
|
std::fs::create_dir_all(&data_dir).ok();
|
||||||
|
let pw_path = data_dir.join("web-password");
|
||||||
|
let token_path = data_dir.join("mgmt-token");
|
||||||
|
|
||||||
|
// 1. login password
|
||||||
|
set_web_password(&pw_path, pw_file.as_deref());
|
||||||
|
// 2. (upgrade-safe) stop any running console so the new task binds :3000 + the files unlock
|
||||||
|
stop_web_console();
|
||||||
|
// 3. register the PunktfunkWeb scheduled task
|
||||||
|
let cmd = app_dir.join("web").join("web-run.cmd");
|
||||||
|
if !cmd.exists() {
|
||||||
|
bail!("web launcher missing: {}", cmd.display());
|
||||||
|
}
|
||||||
|
register_web_task(&cmd)?;
|
||||||
|
// 4. firewall: inbound TCP 3000
|
||||||
|
if !run_quiet(
|
||||||
|
"netsh",
|
||||||
|
&[
|
||||||
|
"advfirewall",
|
||||||
|
"firewall",
|
||||||
|
"add",
|
||||||
|
"rule",
|
||||||
|
"name=punktfunk web console (TCP 3000)",
|
||||||
|
"dir=in",
|
||||||
|
"action=allow",
|
||||||
|
"protocol=TCP",
|
||||||
|
"localport=3000",
|
||||||
|
],
|
||||||
|
) {
|
||||||
|
eprintln!("warning: could not add the firewall rule for TCP 3000");
|
||||||
|
}
|
||||||
|
// 5. wait briefly for the host's mgmt token, then start (restart-on-failure picks it up otherwise)
|
||||||
|
for _ in 0..30 {
|
||||||
|
if token_path.exists() {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
std::thread::sleep(std::time::Duration::from_secs(1));
|
||||||
|
}
|
||||||
|
run_quiet("schtasks", &["/run", "/tn", WEB_TASK]);
|
||||||
|
println!("web console set up + started (http://<host-ip>:3000)");
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Source: a non-empty `--password-file` (fresh install) > keep existing (upgrade) > random fallback.
|
||||||
|
/// Writes `PUNKTFUNK_UI_PASSWORD=<pw>\n` (LF, no BOM) + ACLs it to Administrators + SYSTEM only.
|
||||||
|
fn set_web_password(pw_path: &Path, pw_file: Option<&str>) {
|
||||||
|
let password = pw_file
|
||||||
|
.and_then(|f| std::fs::read_to_string(f).ok())
|
||||||
|
.map(|s| s.trim().to_string())
|
||||||
|
.filter(|s| !s.is_empty())
|
||||||
|
.or_else(|| {
|
||||||
|
if pw_path.exists() {
|
||||||
|
println!("keeping existing web console password");
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(random_password())
|
||||||
|
}
|
||||||
|
});
|
||||||
|
if let Some(pw) = password {
|
||||||
|
if std::fs::write(pw_path, format!("PUNKTFUNK_UI_PASSWORD={pw}\n")).is_err() {
|
||||||
|
eprintln!("warning: could not write {}", pw_path.display());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// Lock down: drop inheritance, grant only Administrators (S-1-5-32-544) + SYSTEM (S-1-5-18).
|
||||||
|
let p = pw_path.to_string_lossy();
|
||||||
|
run_quiet(
|
||||||
|
"icacls",
|
||||||
|
&[
|
||||||
|
&p,
|
||||||
|
"/inheritance:r",
|
||||||
|
"/grant:r",
|
||||||
|
"*S-1-5-32-544:F",
|
||||||
|
"*S-1-5-18:F",
|
||||||
|
],
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// 20-char URL/shell-safe password (no `/ + =`), like web-init.sh / the old web-setup.ps1.
|
||||||
|
fn random_password() -> String {
|
||||||
|
use base64::Engine;
|
||||||
|
use rand::RngCore;
|
||||||
|
let mut b = [0u8; 24];
|
||||||
|
rand::thread_rng().fill_bytes(&mut b);
|
||||||
|
base64::engine::general_purpose::STANDARD
|
||||||
|
.encode(b)
|
||||||
|
.chars()
|
||||||
|
.filter(|c| !matches!(c, '/' | '+' | '='))
|
||||||
|
.take(20)
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Stop + reap a running console before re-registering (upgrade-safe): end the task AND kill the :3000
|
||||||
|
/// listener owner (runtime-agnostic - a prior install may have run node vs the current bun). The listener
|
||||||
|
/// is identified by the wildcard foreign address (`0.0.0.0:0`/`[::]:0`), so the localized state word
|
||||||
|
/// ("LISTENING"/"ABHOEREN"/...) is never parsed.
|
||||||
|
fn stop_web_console() {
|
||||||
|
run_quiet("schtasks", &["/end", "/tn", WEB_TASK]);
|
||||||
|
for line in run_capture("netstat", &["-ano", "-p", "tcp"]).lines() {
|
||||||
|
let toks: Vec<&str> = line.split_whitespace().collect();
|
||||||
|
if toks.len() >= 5
|
||||||
|
&& toks[0].eq_ignore_ascii_case("tcp")
|
||||||
|
&& toks[1].ends_with(":3000")
|
||||||
|
&& (toks[2] == "0.0.0.0:0" || toks[2] == "[::]:0")
|
||||||
|
{
|
||||||
|
let pid = toks[toks.len() - 1];
|
||||||
|
if !pid.is_empty() && pid.bytes().all(|b| b.is_ascii_digit()) {
|
||||||
|
run_quiet("taskkill", &["/PID", pid, "/F"]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
std::thread::sleep(std::time::Duration::from_secs(1));
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Register the boot/SYSTEM/restart-on-failure task via a generated Task Scheduler XML (`schtasks /xml`,
|
||||||
|
/// no COM). The XML declares UTF-16, so it's written UTF-16LE+BOM.
|
||||||
|
fn register_web_task(cmd: &Path) -> Result<()> {
|
||||||
|
let xml = format!(
|
||||||
|
"<?xml version=\"1.0\" encoding=\"UTF-16\"?>\n\
|
||||||
|
<Task version=\"1.2\" xmlns=\"http://schemas.microsoft.com/windows/2004/02/mit/task\">\n\
|
||||||
|
<RegistrationInfo><Description>punktfunk web management console (Nitro SSR on bun, :3000)</Description></RegistrationInfo>\n\
|
||||||
|
<Triggers><BootTrigger><Enabled>true</Enabled></BootTrigger></Triggers>\n\
|
||||||
|
<Principals><Principal id=\"Author\"><UserId>S-1-5-18</UserId><RunLevel>HighestAvailable</RunLevel></Principal></Principals>\n\
|
||||||
|
<Settings>\n\
|
||||||
|
<MultipleInstancesPolicy>IgnoreNew</MultipleInstancesPolicy>\n\
|
||||||
|
<DisallowStartIfOnBatteries>false</DisallowStartIfOnBatteries>\n\
|
||||||
|
<StopIfGoingOnBatteries>false</StopIfGoingOnBatteries>\n\
|
||||||
|
<StartWhenAvailable>true</StartWhenAvailable>\n\
|
||||||
|
<ExecutionTimeLimit>PT0S</ExecutionTimeLimit>\n\
|
||||||
|
<RestartOnFailure><Interval>PT1M</Interval><Count>10</Count></RestartOnFailure>\n\
|
||||||
|
</Settings>\n\
|
||||||
|
<Actions Context=\"Author\"><Exec><Command>{}</Command></Exec></Actions>\n\
|
||||||
|
</Task>",
|
||||||
|
xml_escape(&cmd.to_string_lossy())
|
||||||
|
);
|
||||||
|
let xml_path = std::env::temp_dir().join("punktfunk-web-task.xml");
|
||||||
|
write_utf16le_bom(&xml_path, &xml)?;
|
||||||
|
let ok = run_quiet(
|
||||||
|
"schtasks",
|
||||||
|
&[
|
||||||
|
"/create",
|
||||||
|
"/tn",
|
||||||
|
WEB_TASK,
|
||||||
|
"/xml",
|
||||||
|
&xml_path.to_string_lossy(),
|
||||||
|
"/f",
|
||||||
|
],
|
||||||
|
);
|
||||||
|
let _ = std::fs::remove_file(&xml_path);
|
||||||
|
if ok {
|
||||||
|
println!("registered scheduled task {WEB_TASK} -> {}", cmd.display());
|
||||||
|
Ok(())
|
||||||
|
} else {
|
||||||
|
bail!("schtasks /create {WEB_TASK} failed")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn write_utf16le_bom(path: &Path, s: &str) -> Result<()> {
|
||||||
|
let mut bytes = vec![0xFFu8, 0xFE]; // UTF-16LE BOM
|
||||||
|
for u in s.encode_utf16() {
|
||||||
|
bytes.extend_from_slice(&u.to_le_bytes());
|
||||||
|
}
|
||||||
|
std::fs::write(path, bytes).with_context(|| format!("write {}", path.display()))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn xml_escape(s: &str) -> String {
|
||||||
|
s.replace('&', "&")
|
||||||
|
.replace('<', "<")
|
||||||
|
.replace('>', ">")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn first_with_ext(dir: &Path, ext: &str) -> Option<PathBuf> {
|
||||||
|
std::fs::read_dir(dir)
|
||||||
|
.ok()?
|
||||||
|
.flatten()
|
||||||
|
.map(|e| e.path())
|
||||||
|
.find(|p| p.extension().is_some_and(|x| x.eq_ignore_ascii_case(ext)))
|
||||||
|
}
|
||||||
|
fn first_named(dir: &Path, name: &str) -> Option<PathBuf> {
|
||||||
|
let p = dir.join(name);
|
||||||
|
p.exists().then_some(p)
|
||||||
|
}
|
||||||
|
fn file_name(p: &Path) -> String {
|
||||||
|
p.file_name()
|
||||||
|
.unwrap_or_default()
|
||||||
|
.to_string_lossy()
|
||||||
|
.into_owned()
|
||||||
|
}
|
||||||
@@ -15,6 +15,9 @@
|
|||||||
//! that is correct for launching *our own* streamer, but a store launcher needs the real user's token
|
//! that is correct for launching *our own* streamer, but a store launcher needs the real user's token
|
||||||
//! for activation + auth). The host process itself stays SYSTEM.
|
//! for activation + auth). The host process itself stays SYSTEM.
|
||||||
|
|
||||||
|
// Every `unsafe` block in this file carries a `// SAFETY:` proof; enforce it (unsafe-proof program).
|
||||||
|
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||||
|
|
||||||
use anyhow::{bail, Context, Result};
|
use anyhow::{bail, Context, Result};
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
use windows::core::{PCWSTR, PWSTR};
|
use windows::core::{PCWSTR, PWSTR};
|
||||||
@@ -40,6 +43,8 @@ use windows::Win32::System::Threading::{
|
|||||||
/// user is logged on (a pre-login / freshly-booted box can stream the login desktop but cannot
|
/// user is logged on (a pre-login / freshly-booted box can stream the login desktop but cannot
|
||||||
/// auto-launch a store title until someone signs in).
|
/// auto-launch a store title until someone signs in).
|
||||||
pub fn spawn_in_active_session(cmdline: &str, workdir: Option<&Path>) -> Result<u32> {
|
pub fn spawn_in_active_session(cmdline: &str, workdir: Option<&Path>) -> Result<u32> {
|
||||||
|
// SAFETY: `spawn_inner` is unsafe only for its Win32 FFI; it has no caller-side preconditions — it
|
||||||
|
// validates the session/token itself and owns every handle it opens — so calling it is always sound.
|
||||||
unsafe { spawn_inner(cmdline, workdir) }
|
unsafe { spawn_inner(cmdline, workdir) }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -21,11 +21,14 @@
|
|||||||
//! loaded into the service's environment and carried to the host child. Logs land in
|
//! loaded into the service's environment and carried to the host child. Logs land in
|
||||||
//! `%ProgramData%\punktfunk\logs\`.
|
//! `%ProgramData%\punktfunk\logs\`.
|
||||||
|
|
||||||
|
// Every `unsafe` block in this file carries a `// SAFETY:` proof; enforce it (unsafe-proof program).
|
||||||
|
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||||
|
|
||||||
use anyhow::{bail, Context, Result};
|
use anyhow::{bail, Context, Result};
|
||||||
use std::ffi::{c_void, OsString};
|
use std::ffi::{c_void, OsString};
|
||||||
use std::os::windows::io::{AsRawHandle, FromRawHandle, OwnedHandle};
|
use std::os::windows::io::{AsRawHandle, FromRawHandle, OwnedHandle};
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
use std::sync::atomic::{AtomicIsize, Ordering};
|
use std::sync::OnceLock;
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
|
|
||||||
use windows::core::{PCWSTR, PWSTR};
|
use windows::core::{PCWSTR, PWSTR};
|
||||||
@@ -65,18 +68,19 @@ const SERVICE_DESCRIPTION: &str =
|
|||||||
/// legacy GCM nonce reuse — security-review #5/#9; native clients only).
|
/// legacy GCM nonce reuse — security-review #5/#9; native clients only).
|
||||||
const DEFAULT_HOST_CMD: &str = "serve --gamestream";
|
const DEFAULT_HOST_CMD: &str = "serve --gamestream";
|
||||||
|
|
||||||
/// Event handles shared between the SCM control handler (which signals them) and the supervision loop
|
/// The STOP and SESSION manual-reset events, shared between the SCM control handler (a capture-free
|
||||||
/// (which waits on them). Stored as raw `isize` so the `'static + Send` handler can reach them without
|
/// `'static` closure that SIGNALS them) and the supervision loop (which WAITS on them). They live in
|
||||||
/// a non-`Send` `HANDLE` capture. Set once in `run_service`.
|
/// `OnceLock`s — a static the handler can reach without capturing a non-`Send` `HANDLE` — and each owns
|
||||||
///
|
/// its handle (`OwnedHandle`) for the process lifetime: the service process exits right after
|
||||||
/// Intentionally left as raw-`isize` statics + their explicit `CloseHandle` in `run_service` (not
|
/// `run_service` returns, so the OS reaps them at exit, and owning them past the handler's last possible
|
||||||
/// `OwnedHandle`): they're smuggled across the C SCM control-handler boundary, so converting them is a
|
/// call avoids the close-then-signal window the old raw-`isize` statics had. Set once, in `run_service`.
|
||||||
/// separate, riskier redesign out of scope for the process/job-handle ownership change here.
|
static STOP_EVENT: OnceLock<OwnedHandle> = OnceLock::new();
|
||||||
static STOP_EVENT: AtomicIsize = AtomicIsize::new(0);
|
static SESSION_EVENT: OnceLock<OwnedHandle> = OnceLock::new();
|
||||||
static SESSION_EVENT: AtomicIsize = AtomicIsize::new(0);
|
|
||||||
|
|
||||||
fn load_event(a: &AtomicIsize) -> HANDLE {
|
/// Borrow an event's handle for the control handler's `SetEvent`. `None` until `run_service` creates the
|
||||||
HANDLE(a.load(Ordering::Relaxed) as *mut c_void)
|
/// events — but the handler is registered only AFTER they're set, so in practice this is always `Some`.
|
||||||
|
fn event_handle(ev: &OnceLock<OwnedHandle>) -> Option<HANDLE> {
|
||||||
|
ev.get().map(|h| HANDLE(h.as_raw_handle()))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Dispatch `service <sub>`.
|
/// Dispatch `service <sub>`.
|
||||||
@@ -204,12 +208,24 @@ fn run_service() -> Result<()> {
|
|||||||
|
|
||||||
// Two manual-reset events: STOP (set once, never reset) and SESSION (set on a console
|
// Two manual-reset events: STOP (set once, never reset) and SESSION (set on a console
|
||||||
// connect/disconnect, reset by the supervisor after it reacts).
|
// connect/disconnect, reset by the supervisor after it reacts).
|
||||||
let stop =
|
// SAFETY: CreateEventW with null attributes (None), manual-reset=true, initial-state=false and a null
|
||||||
|
// name passes no pointers into Rust memory; it returns a fresh, owned event HANDLE (or Err, via `?`).
|
||||||
|
// Nothing aliases or outlives the call.
|
||||||
|
let stop_raw =
|
||||||
unsafe { CreateEventW(None, true, false, PCWSTR::null()) }.context("CreateEvent stop")?;
|
unsafe { CreateEventW(None, true, false, PCWSTR::null()) }.context("CreateEvent stop")?;
|
||||||
let session = unsafe { CreateEventW(None, true, false, PCWSTR::null()) }
|
// SAFETY: as above — a second fresh manual-reset event; no pointers into Rust memory, no aliasing.
|
||||||
|
let session_raw = unsafe { CreateEventW(None, true, false, PCWSTR::null()) }
|
||||||
.context("CreateEvent session")?;
|
.context("CreateEvent session")?;
|
||||||
STOP_EVENT.store(stop.0 as isize, Ordering::Relaxed);
|
// Own each event handle (the OS reaps them at process exit); the handler reaches them through the
|
||||||
SESSION_EVENT.store(session.0 as isize, Ordering::Relaxed);
|
// OnceLocks, while `supervise` waits on the borrowed `HANDLE`s. SAFETY: each is a fresh CreateEventW
|
||||||
|
// handle we own — take ownership exactly once.
|
||||||
|
let stop_owned = unsafe { OwnedHandle::from_raw_handle(stop_raw.0) };
|
||||||
|
// SAFETY: `session_raw` is the other fresh CreateEventW handle nothing else owns — take ownership once.
|
||||||
|
let session_owned = unsafe { OwnedHandle::from_raw_handle(session_raw.0) };
|
||||||
|
let stop = HANDLE(stop_owned.as_raw_handle());
|
||||||
|
let session = HANDLE(session_owned.as_raw_handle());
|
||||||
|
let _ = STOP_EVENT.set(stop_owned); // set once per process
|
||||||
|
let _ = SESSION_EVENT.set(session_owned);
|
||||||
|
|
||||||
// The control handler captures nothing — it reaches the events through the statics, so it stays
|
// The control handler captures nothing — it reaches the events through the statics, so it stays
|
||||||
// `Fn + Send + 'static`. Session lock/unlock are handled inside the host (DesktopWatcher), so we
|
// `Fn + Send + 'static`. Session lock/unlock are handled inside the host (DesktopWatcher), so we
|
||||||
@@ -217,7 +233,12 @@ fn run_service() -> Result<()> {
|
|||||||
let handler = move |control| -> ServiceControlHandlerResult {
|
let handler = move |control| -> ServiceControlHandlerResult {
|
||||||
match control {
|
match control {
|
||||||
ServiceControl::Stop | ServiceControl::Preshutdown | ServiceControl::Shutdown => {
|
ServiceControl::Stop | ServiceControl::Preshutdown | ServiceControl::Shutdown => {
|
||||||
unsafe { SetEvent(load_event(&STOP_EVENT)) }.ok();
|
if let Some(h) = event_handle(&STOP_EVENT) {
|
||||||
|
// SAFETY: `h` borrows the STOP event HANDLE from the STOP_EVENT OwnedHandle, set for
|
||||||
|
// the whole process lifetime and never closed before exit, so it is open here; SetEvent
|
||||||
|
// only signals the event and passes no Rust memory.
|
||||||
|
unsafe { SetEvent(h) }.ok();
|
||||||
|
}
|
||||||
ServiceControlHandlerResult::NoError
|
ServiceControlHandlerResult::NoError
|
||||||
}
|
}
|
||||||
ServiceControl::SessionChange(param) => {
|
ServiceControl::SessionChange(param) => {
|
||||||
@@ -226,7 +247,12 @@ fn run_service() -> Result<()> {
|
|||||||
param.reason,
|
param.reason,
|
||||||
ConsoleConnect | ConsoleDisconnect | SessionLogon
|
ConsoleConnect | ConsoleDisconnect | SessionLogon
|
||||||
) {
|
) {
|
||||||
unsafe { SetEvent(load_event(&SESSION_EVENT)) }.ok();
|
if let Some(h) = event_handle(&SESSION_EVENT) {
|
||||||
|
// SAFETY: `h` borrows the SESSION event HANDLE from the SESSION_EVENT OwnedHandle,
|
||||||
|
// alive for the whole process lifetime and never closed before exit; SetEvent only
|
||||||
|
// signals the event and passes no Rust memory.
|
||||||
|
unsafe { SetEvent(h) }.ok();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
ServiceControlHandlerResult::NoError
|
ServiceControlHandlerResult::NoError
|
||||||
}
|
}
|
||||||
@@ -263,10 +289,8 @@ fn run_service() -> Result<()> {
|
|||||||
controls_accepted: ServiceControlAccept::empty(),
|
controls_accepted: ServiceControlAccept::empty(),
|
||||||
..running
|
..running
|
||||||
});
|
});
|
||||||
unsafe {
|
// The STOP/SESSION events stay owned by the OnceLocks for the process lifetime (the OS reaps them at
|
||||||
let _ = CloseHandle(stop);
|
// exit); NOT closing them while the SCM handler could still fire avoids a use-after-close.
|
||||||
let _ = CloseHandle(session);
|
|
||||||
}
|
|
||||||
result
|
result
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -287,6 +311,8 @@ fn supervise(stop: HANDLE, session_ev: HANDLE) -> Result<()> {
|
|||||||
// Kill-on-close job so a service crash never orphans the SYSTEM host; BREAKAWAY_OK lets the host
|
// Kill-on-close job so a service crash never orphans the SYSTEM host; BREAKAWAY_OK lets the host
|
||||||
// still spawn the WGC helper. Owned: dropping it at function exit (KILL_ON_JOB_CLOSE) reaps any
|
// still spawn the WGC helper. Owned: dropping it at function exit (KILL_ON_JOB_CLOSE) reaps any
|
||||||
// straggler still inside it — no manual CloseHandle(job).
|
// straggler still inside it — no manual CloseHandle(job).
|
||||||
|
// SAFETY: `make_job` is unsafe only for its Win32 FFI; it has no caller preconditions and creates +
|
||||||
|
// immediately takes RAII ownership of the job object, so calling it here is sound.
|
||||||
let job = unsafe { make_job() }.context("create job object")?;
|
let job = unsafe { make_job() }.context("create job object")?;
|
||||||
|
|
||||||
let mut restarts: u32 = 0;
|
let mut restarts: u32 = 0;
|
||||||
@@ -294,6 +320,8 @@ fn supervise(stop: HANDLE, session_ev: HANDLE) -> Result<()> {
|
|||||||
if wait_one(stop, 0) {
|
if wait_one(stop, 0) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
// SAFETY: WTSGetActiveConsoleSessionId takes no arguments and returns the active console session
|
||||||
|
// id (or 0xFFFFFFFF); it passes no pointers, so the call is always sound.
|
||||||
let session = unsafe { WTSGetActiveConsoleSessionId() };
|
let session = unsafe { WTSGetActiveConsoleSessionId() };
|
||||||
if session == 0xFFFF_FFFF {
|
if session == 0xFFFF_FFFF {
|
||||||
// No interactive session yet (boot / fully logged out). Wait, but wake on stop/session.
|
// No interactive session yet (boot / fully logged out). Wait, but wake on stop/session.
|
||||||
@@ -301,12 +329,17 @@ fn supervise(stop: HANDLE, session_ev: HANDLE) -> Result<()> {
|
|||||||
if wait_any(&[stop, session_ev], 3000) == Some(0) {
|
if wait_any(&[stop, session_ev], 3000) == Some(0) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
// SAFETY: `session_ev` is the SESSION event HANDLE borrowed from the SESSION_EVENT OwnedHandle,
|
||||||
|
// alive for the process lifetime; ResetEvent only clears its signalled state, no Rust memory.
|
||||||
unsafe { ResetEvent(session_ev) }.ok();
|
unsafe { ResetEvent(session_ev) }.ok();
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// BORROW the owned job handle for AssignProcessToJobObject inside spawn_host.
|
// BORROW the owned job handle for AssignProcessToJobObject inside spawn_host.
|
||||||
let job_h = HANDLE(job.as_raw_handle() as *mut c_void);
|
let job_h = HANDLE(job.as_raw_handle());
|
||||||
|
// SAFETY: `spawn_host` is unsafe only for its Win32 FFI. `session` is a valid console session id
|
||||||
|
// (checked != 0xFFFFFFFF above), `cmdline`/`workdir` are live borrows for the call, and `job_h`
|
||||||
|
// borrows the still-live `job` OwnedHandle — every argument is valid for the call's duration.
|
||||||
let child = match unsafe { spawn_host(session, &cmdline, &workdir, job_h) } {
|
let child = match unsafe { spawn_host(session, &cmdline, &workdir, job_h) } {
|
||||||
Ok(child) => child,
|
Ok(child) => child,
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
@@ -323,13 +356,16 @@ fn supervise(stop: HANDLE, session_ev: HANDLE) -> Result<()> {
|
|||||||
// `proc_h` is a plain copy that does NOT close it). `child` owns the process + thread handles
|
// `proc_h` is a plain copy that does NOT close it). `child` owns the process + thread handles
|
||||||
// and auto-closes BOTH when it drops — at the end of this iteration, on `continue`, or on
|
// and auto-closes BOTH when it drops — at the end of this iteration, on `continue`, or on
|
||||||
// `break` — so every match arm below only stops/terminates and lets the drop do the closing.
|
// `break` — so every match arm below only stops/terminates and lets the drop do the closing.
|
||||||
let proc_h = HANDLE(child.process.as_raw_handle() as *mut c_void);
|
let proc_h = HANDLE(child.process.as_raw_handle());
|
||||||
|
|
||||||
// Wait on stop / session-change / child-exit.
|
// Wait on stop / session-change / child-exit.
|
||||||
let reason = wait_any(&[stop, session_ev, proc_h], INFINITE);
|
let reason = wait_any(&[stop, session_ev, proc_h], INFINITE);
|
||||||
match reason {
|
match reason {
|
||||||
Some(0) => {
|
Some(0) => {
|
||||||
// Stop: terminate the child and exit (the `child` drop closes its handles).
|
// Stop: terminate the child and exit (the `child` drop closes its handles).
|
||||||
|
// SAFETY: `proc_h` is a HANDLE copy of the still-live `child.process` OwnedHandle (not
|
||||||
|
// dropped until end of iteration), so the process handle is open; TerminateProcess only
|
||||||
|
// signals termination by handle and passes no Rust memory.
|
||||||
unsafe {
|
unsafe {
|
||||||
let _ = TerminateProcess(proc_h, 0);
|
let _ = TerminateProcess(proc_h, 0);
|
||||||
}
|
}
|
||||||
@@ -337,7 +373,10 @@ fn supervise(stop: HANDLE, session_ev: HANDLE) -> Result<()> {
|
|||||||
}
|
}
|
||||||
Some(1) => {
|
Some(1) => {
|
||||||
// Session change: relaunch only if the active console session actually moved.
|
// Session change: relaunch only if the active console session actually moved.
|
||||||
|
// SAFETY: `session_ev` borrows the process-lifetime SESSION_EVENT OwnedHandle; ResetEvent
|
||||||
|
// only clears its signalled state and passes no Rust memory.
|
||||||
unsafe { ResetEvent(session_ev) }.ok();
|
unsafe { ResetEvent(session_ev) }.ok();
|
||||||
|
// SAFETY: WTSGetActiveConsoleSessionId takes no arguments and passes no pointers.
|
||||||
let now = unsafe { WTSGetActiveConsoleSessionId() };
|
let now = unsafe { WTSGetActiveConsoleSessionId() };
|
||||||
if now != session {
|
if now != session {
|
||||||
tracing::info!(
|
tracing::info!(
|
||||||
@@ -345,6 +384,8 @@ fn supervise(stop: HANDLE, session_ev: HANDLE) -> Result<()> {
|
|||||||
new = now,
|
new = now,
|
||||||
"console session changed — relaunching host"
|
"console session changed — relaunching host"
|
||||||
);
|
);
|
||||||
|
// SAFETY: `proc_h` copies the still-live `child.process` OwnedHandle (dropped only at
|
||||||
|
// end of iteration), so the handle is open; TerminateProcess only signals by handle.
|
||||||
unsafe {
|
unsafe {
|
||||||
let _ = TerminateProcess(proc_h, 0);
|
let _ = TerminateProcess(proc_h, 0);
|
||||||
}
|
}
|
||||||
@@ -353,6 +394,8 @@ fn supervise(stop: HANDLE, session_ev: HANDLE) -> Result<()> {
|
|||||||
}
|
}
|
||||||
// Same session (e.g. a stray notification) — keep waiting on the same child.
|
// Same session (e.g. a stray notification) — keep waiting on the same child.
|
||||||
let r = wait_any(&[stop, proc_h], INFINITE);
|
let r = wait_any(&[stop, proc_h], INFINITE);
|
||||||
|
// SAFETY: `proc_h` copies the still-live `child.process` OwnedHandle (dropped only at end
|
||||||
|
// of iteration), so the handle is open; TerminateProcess only signals by handle.
|
||||||
unsafe {
|
unsafe {
|
||||||
let _ = TerminateProcess(proc_h, 0);
|
let _ = TerminateProcess(proc_h, 0);
|
||||||
}
|
}
|
||||||
@@ -384,11 +427,17 @@ fn supervise(stop: HANDLE, session_ev: HANDLE) -> Result<()> {
|
|||||||
|
|
||||||
/// `true` if `h` is signalled within `ms`.
|
/// `true` if `h` is signalled within `ms`.
|
||||||
fn wait_one(h: HANDLE, ms: u32) -> bool {
|
fn wait_one(h: HANDLE, ms: u32) -> bool {
|
||||||
|
// SAFETY: `&[h]` is a live one-element HANDLE slice the caller keeps open across the wait; the kernel
|
||||||
|
// reads exactly one handle (the binding derives the count from the slice length), bWaitAll=false,
|
||||||
|
// `ms` is a timeout — no pointers escape and the array is only read for this synchronous call.
|
||||||
unsafe { WaitForMultipleObjects(&[h], false, ms) == WAIT_OBJECT_0 }
|
unsafe { WaitForMultipleObjects(&[h], false, ms) == WAIT_OBJECT_0 }
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Wait on several handles; returns the index of the first signalled, or `None` on timeout.
|
/// Wait on several handles; returns the index of the first signalled, or `None` on timeout.
|
||||||
fn wait_any(handles: &[HANDLE], ms: u32) -> Option<usize> {
|
fn wait_any(handles: &[HANDLE], ms: u32) -> Option<usize> {
|
||||||
|
// SAFETY: `handles` is a live slice the caller keeps open across the wait; WaitForMultipleObjects
|
||||||
|
// reads exactly `handles.len()` handles (the binding derives the count from the slice), bWaitAll=false,
|
||||||
|
// `ms` is a timeout — the array is only read for this synchronous call and no pointers escape it.
|
||||||
let r = unsafe { WaitForMultipleObjects(handles, false, ms) };
|
let r = unsafe { WaitForMultipleObjects(handles, false, ms) };
|
||||||
let idx = r.0.wrapping_sub(WAIT_OBJECT_0.0);
|
let idx = r.0.wrapping_sub(WAIT_OBJECT_0.0);
|
||||||
(idx < handles.len() as u32).then_some(idx as usize)
|
(idx < handles.len() as u32).then_some(idx as usize)
|
||||||
@@ -403,7 +452,7 @@ unsafe fn make_job() -> Result<OwnedHandle> {
|
|||||||
info.BasicLimitInformation.LimitFlags =
|
info.BasicLimitInformation.LimitFlags =
|
||||||
JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE | JOB_OBJECT_LIMIT_BREAKAWAY_OK;
|
JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE | JOB_OBJECT_LIMIT_BREAKAWAY_OK;
|
||||||
SetInformationJobObject(
|
SetInformationJobObject(
|
||||||
HANDLE(job.as_raw_handle() as *mut c_void),
|
HANDLE(job.as_raw_handle()),
|
||||||
JobObjectExtendedLimitInformation,
|
JobObjectExtendedLimitInformation,
|
||||||
&info as *const _ as *const c_void,
|
&info as *const _ as *const c_void,
|
||||||
std::mem::size_of::<JOBOBJECT_EXTENDED_LIMIT_INFORMATION>() as u32,
|
std::mem::size_of::<JOBOBJECT_EXTENDED_LIMIT_INFORMATION>() as u32,
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
//! USER-session WGC helper (Windows) — part of the two-process secure-desktop design
|
//! USER-session WGC helper (Windows) — part of the two-process secure-desktop design
|
||||||
//! (docs/windows-secure-desktop.md).
|
//! (design/archive/windows-secure-desktop.md).
|
||||||
//!
|
//!
|
||||||
//! WGC won't activate under the SYSTEM account, but the host must run as SYSTEM for the secure
|
//! WGC won't activate under the SYSTEM account, but the host must run as SYSTEM for the secure
|
||||||
//! desktop. So the SYSTEM host spawns THIS helper in the interactive user session
|
//! desktop. So the SYSTEM host spawns THIS helper in the interactive user session
|
||||||
@@ -12,6 +12,9 @@
|
|||||||
//!
|
//!
|
||||||
//! Wire framing on stdout, per AU: `[u32 len LE][u64 pts_ns LE][u8 keyframe][len bytes data]`.
|
//! Wire framing on stdout, per AU: `[u32 len LE][u64 pts_ns LE][u8 keyframe][len bytes data]`.
|
||||||
|
|
||||||
|
// Every `unsafe` block in this file carries a `// SAFETY:` proof; enforce it (unsafe-proof program).
|
||||||
|
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||||
|
|
||||||
use crate::capture::{dxgi::WinCaptureTarget, wgc::WgcCapturer, Capturer};
|
use crate::capture::{dxgi::WinCaptureTarget, wgc::WgcCapturer, Capturer};
|
||||||
use crate::encode::{self, Codec};
|
use crate::encode::{self, Codec};
|
||||||
use anyhow::{Context, Result};
|
use anyhow::{Context, Result};
|
||||||
@@ -72,6 +75,9 @@ pub fn run(opts: HelperOptions) -> Result<()> {
|
|||||||
.name("pf-present-trigger".into())
|
.name("pf-present-trigger".into())
|
||||||
.spawn(move || {
|
.spawn(move || {
|
||||||
tracing::info!("present-trigger: starting D3D present loop on the virtual display");
|
tracing::info!("present-trigger: starting D3D present loop on the virtual display");
|
||||||
|
// SAFETY: `present_trigger` is unsafe only for its Win32/D3D11 FFI; it has no caller
|
||||||
|
// preconditions (it creates and exclusively owns its own window, device, and swapchain on
|
||||||
|
// this dedicated thread), so the call is sound.
|
||||||
if let Err(e) = unsafe { present_trigger(w, h) } {
|
if let Err(e) = unsafe { present_trigger(w, h) } {
|
||||||
tracing::warn!("present-trigger error: {e:#}");
|
tracing::warn!("present-trigger error: {e:#}");
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -8,6 +8,9 @@
|
|||||||
//! them, which let the SudoVDA backend be dropped without losing them (audit §9 / Goal 2 — done). The
|
//! them, which let the SudoVDA backend be dropped without losing them (audit §9 / Goal 2 — done). The
|
||||||
//! plan's `windows/display_ccd.rs`. Extracted verbatim from the former SudoVDA backend before its removal.
|
//! plan's `windows/display_ccd.rs`. Extracted verbatim from the former SudoVDA backend before its removal.
|
||||||
|
|
||||||
|
// Every `unsafe` block in this file carries a `// SAFETY:` proof; enforce it (unsafe-proof program).
|
||||||
|
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||||
|
|
||||||
use std::mem::size_of;
|
use std::mem::size_of;
|
||||||
|
|
||||||
use windows::core::PCWSTR;
|
use windows::core::PCWSTR;
|
||||||
@@ -16,9 +19,9 @@ use windows::Win32::Devices::Display::{
|
|||||||
QueryDisplayConfig, SetDisplayConfig, DISPLAYCONFIG_DEVICE_INFO_GET_ADVANCED_COLOR_INFO,
|
QueryDisplayConfig, SetDisplayConfig, DISPLAYCONFIG_DEVICE_INFO_GET_ADVANCED_COLOR_INFO,
|
||||||
DISPLAYCONFIG_DEVICE_INFO_GET_SOURCE_NAME, DISPLAYCONFIG_DEVICE_INFO_SET_ADVANCED_COLOR_STATE,
|
DISPLAYCONFIG_DEVICE_INFO_GET_SOURCE_NAME, DISPLAYCONFIG_DEVICE_INFO_SET_ADVANCED_COLOR_STATE,
|
||||||
DISPLAYCONFIG_GET_ADVANCED_COLOR_INFO, DISPLAYCONFIG_MODE_INFO, DISPLAYCONFIG_PATH_INFO,
|
DISPLAYCONFIG_GET_ADVANCED_COLOR_INFO, DISPLAYCONFIG_MODE_INFO, DISPLAYCONFIG_PATH_INFO,
|
||||||
DISPLAYCONFIG_SET_ADVANCED_COLOR_STATE, DISPLAYCONFIG_SOURCE_DEVICE_NAME, QDC_ONLY_ACTIVE_PATHS,
|
DISPLAYCONFIG_SET_ADVANCED_COLOR_STATE, DISPLAYCONFIG_SOURCE_DEVICE_NAME,
|
||||||
SDC_ALLOW_CHANGES, SDC_APPLY, SDC_FORCE_MODE_ENUMERATION, SDC_SAVE_TO_DATABASE,
|
QDC_ONLY_ACTIVE_PATHS, SDC_ALLOW_CHANGES, SDC_APPLY, SDC_FORCE_MODE_ENUMERATION,
|
||||||
SDC_USE_SUPPLIED_DISPLAY_CONFIG,
|
SDC_SAVE_TO_DATABASE, SDC_TOPOLOGY_EXTEND, SDC_USE_SUPPLIED_DISPLAY_CONFIG,
|
||||||
};
|
};
|
||||||
use windows::Win32::Graphics::Gdi::{
|
use windows::Win32::Graphics::Gdi::{
|
||||||
ChangeDisplaySettingsExW, EnumDisplaySettingsW, CDS_TEST, CDS_UPDATEREGISTRY, DEVMODEW,
|
ChangeDisplaySettingsExW, EnumDisplaySettingsW, CDS_TEST, CDS_UPDATEREGISTRY, DEVMODEW,
|
||||||
@@ -28,6 +31,29 @@ use windows::Win32::Graphics::Gdi::{
|
|||||||
|
|
||||||
use crate::vdisplay::Mode;
|
use crate::vdisplay::Mode;
|
||||||
|
|
||||||
|
/// Force the desktop into EXTEND topology - the programmatic equivalent of the Win+P / DisplaySwitch
|
||||||
|
/// "Extend" shortcut. Windows defaults a FRESHLY-ADDED monitor into CLONE/duplicate mode when a
|
||||||
|
/// physical display is already active (e.g. a laptop panel): a cloned IddCx output shares the panel's
|
||||||
|
/// source, so the OS never commits a distinct path for it, never calls ASSIGN_SWAPCHAIN, and capture
|
||||||
|
/// sees no frames (`resolve_gdi_name` stays `None` and the session fails "not an active display path").
|
||||||
|
/// Applying the EXTEND preset across the live set of connected displays makes the new IddCx monitor its
|
||||||
|
/// OWN active path, so the rest of bring-up (`resolve_gdi_name` -> `set_active_mode` ->
|
||||||
|
/// `isolate_displays_ccd`) proceeds. Best-effort + idempotent: a no-op on a single-display (already
|
||||||
|
/// sole/extended) box, so it is safe to call unconditionally. `rc == 0` is success.
|
||||||
|
pub(crate) unsafe fn force_extend_topology() {
|
||||||
|
// A topology flag with no supplied path/mode arrays tells the OS to recompute + apply that preset
|
||||||
|
// for the currently-connected displays (the same code path DisplaySwitch.exe drives).
|
||||||
|
let rc = SetDisplayConfig(None, None, SDC_APPLY | SDC_TOPOLOGY_EXTEND);
|
||||||
|
if rc == 0 {
|
||||||
|
tracing::info!(
|
||||||
|
"display topology forced to EXTEND (a new IddCx monitor would otherwise be CLONED onto the \
|
||||||
|
existing panel -> no distinct source -> no frames)"
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
tracing::warn!("display force-EXTEND topology: SetDisplayConfig rc={rc:#x}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Resolve the `\\.\DisplayN` GDI name for a SudoVDA target id via the CCD API. Returns `None`
|
/// Resolve the `\\.\DisplayN` GDI name for a SudoVDA target id via the CCD API. Returns `None`
|
||||||
/// until the OS activates the target into the desktop topology (needs a real WDDM GPU; on a
|
/// until the OS activates the target into the desktop topology (needs a real WDDM GPU; on a
|
||||||
/// GPU-less box this stays `None` even though ADD succeeded).
|
/// GPU-less box this stays `None` even though ADD succeeded).
|
||||||
@@ -202,6 +228,10 @@ pub(crate) fn set_active_mode(gdi_name: &str, mode: Mode) {
|
|||||||
dmSize: size_of::<DEVMODEW>() as u16,
|
dmSize: size_of::<DEVMODEW>() as u16,
|
||||||
..Default::default()
|
..Default::default()
|
||||||
};
|
};
|
||||||
|
// SAFETY: `wname` is a live NUL-terminated UTF-16 device name (built above) whose pointer stays
|
||||||
|
// valid for the call; `&mut dm` is a live DEVMODEW with `dmSize` set that EnumDisplaySettingsW
|
||||||
|
// fills in for mode index `i`. Both outlive this synchronous call; the API only reads the name
|
||||||
|
// and writes `dm`, so nothing aliases.
|
||||||
let ok = unsafe {
|
let ok = unsafe {
|
||||||
EnumDisplaySettingsW(
|
EnumDisplaySettingsW(
|
||||||
PCWSTR(wname.as_ptr()),
|
PCWSTR(wname.as_ptr()),
|
||||||
@@ -269,6 +299,9 @@ pub(crate) fn set_active_mode(gdi_name: &str, mode: Mode) {
|
|||||||
dmDisplayFrequency: chosen_hz,
|
dmDisplayFrequency: chosen_hz,
|
||||||
..Default::default()
|
..Default::default()
|
||||||
};
|
};
|
||||||
|
// SAFETY: `wname` is a live NUL-terminated UTF-16 device name and `&dm` is a live DEVMODEW describing
|
||||||
|
// the requested mode; both outlive the call. CDS_TEST only validates the mode (no apply), the two
|
||||||
|
// trailing args are null, and the API only reads its inputs.
|
||||||
let test = unsafe {
|
let test = unsafe {
|
||||||
ChangeDisplaySettingsExW(PCWSTR(wname.as_ptr()), Some(&dm), None, CDS_TEST, None)
|
ChangeDisplaySettingsExW(PCWSTR(wname.as_ptr()), Some(&dm), None, CDS_TEST, None)
|
||||||
};
|
};
|
||||||
@@ -282,6 +315,9 @@ pub(crate) fn set_active_mode(gdi_name: &str, mode: Mode) {
|
|||||||
);
|
);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
// SAFETY: same inputs as the CDS_TEST call above — `wname` (live NUL-terminated device name) and
|
||||||
|
// `&dm` (live DEVMODEW) both outlive the call; CDS_UPDATEREGISTRY applies the already-validated mode,
|
||||||
|
// and the API only reads its inputs.
|
||||||
let apply = unsafe {
|
let apply = unsafe {
|
||||||
ChangeDisplaySettingsExW(
|
ChangeDisplaySettingsExW(
|
||||||
PCWSTR(wname.as_ptr()),
|
PCWSTR(wname.as_ptr()),
|
||||||
|
|||||||
@@ -0,0 +1,85 @@
|
|||||||
|
# design/ — design notes & deep-dive plans
|
||||||
|
|
||||||
|
Repo-internal design docs: architecture rationale, investigations, and the *why* behind decisions that
|
||||||
|
the code and [`../CLAUDE.md`](../CLAUDE.md) don't capture. **Authoritative current status lives in
|
||||||
|
[`../CLAUDE.md`](../CLAUDE.md)** ("Where the work stands" / "What's left"); the user-facing guides live in
|
||||||
|
`docs-site/`. These docs are kept trimmed: once work ships, the redundant implementation detail is dropped
|
||||||
|
(the code is the source of truth) and only the durable rationale + still-open items remain. Git history
|
||||||
|
holds the full originals.
|
||||||
|
|
||||||
|
## Index
|
||||||
|
|
||||||
|
| Doc | What it is | Status |
|
||||||
|
|-----|-----------|--------|
|
||||||
|
| [`implementation-plan.md`](implementation-plan.md) | Master design thesis (why GF(2¹⁶) FEC + Linux virtual displays; three-phase de-risking), architecture invariants, latency budget, risk register | **Design reference** — §0–7,9 kept; milestones → CLAUDE.md |
|
||||||
|
| [`apollo-comparison.md`](apollo-comparison.md) | Apollo↔punktfunk architecture map + file index + ~63-item transferable-improvement backlog (Windows-host focus) | **Reference + open backlog** — ~⅓ shipped (collapsed); rest open |
|
||||||
|
| [`security-review.md`](security-review.md) | Whole-project security audit (2026-06-21), 12 findings | **Audit trail** — 11 fixed/inherent; **#12 open** |
|
||||||
|
| [`ci.md`](ci.md) | CI/CD architecture: Gitea workflows, runners, release model, signing | **Evergreen reference** |
|
||||||
|
| [`linux-setup.md`](linux-setup.md) | Linux host bring-up (NVIDIA/headless) + troubleshooting | **Setup guide** (evergreen) |
|
||||||
|
| [`gamestream-host-plan.md`](gamestream-host-plan.md) | GameStream/Moonlight-compat host (P1.1–P1.6) | **Shipped** — stub + the 2 deferral decisions |
|
||||||
|
| [`stats-capture-plan.md`](stats-capture-plan.md) | Web-console performance capture | **Shipped** — stub |
|
||||||
|
| [`session-aware-host-followups.md`](session-aware-host-followups.md) | Session-aware host known limitations | **Open items** — #2/#3 shipped; #1,#4–8 parked |
|
||||||
|
| [`gamescope-multiuser.md`](gamescope-multiuser.md) | Per-session gamescope isolation (the 4 plumbing items) | **Deferred** — reference spec |
|
||||||
|
| [`host-latency-plan.md`](host-latency-plan.md) | Latency under GPU contention — 4-tier plan | **Partly shipped** — superseded by ↓; diagnostics + open tiers kept |
|
||||||
|
| [`gpu-contention-investigation.md`](gpu-contention-investigation.md) | GPU-contention root-cause + ranked levers (supersedes ↑) | **Active plan** — §5.A shipped; §5.B/C/E/F/G open |
|
||||||
|
| [`hdr-pipeline-plan.md`](hdr-pipeline-plan.md) | Glass-to-glass HDR | **Steps 0–3 shipped**; Step 4 (Linux) open |
|
||||||
|
| [`windows-host-rewrite.md`](windows-host-rewrite.md) | **Windows host — the single architecture/status/reference doc** (validated invariants, ops, open work) | **Active reference** |
|
||||||
|
| [`windows-build-and-packaging.md`](windows-build-and-packaging.md) | How the Windows host is built, signed, packaged (drivers-from-source, Inno, CI) | **Evergreen reference** |
|
||||||
|
| [`windows-service.md`](windows-service.md) | SYSTEM SCM service + secure-desktop deployment model | **Shipped** — stub + graceful-stop open item |
|
||||||
|
| [`windows-host.md`](windows-host.md) | (original 2026-06 plan) | **Redirect** → `windows-host-rewrite.md` |
|
||||||
|
| [`windows-virtual-display-rust-port.md`](windows-virtual-display-rust-port.md) | pf-vdisplay IddCx port + the "IDD-push is impossible on bare metal" finding | **Shipped** — P2 do-not-retry record kept |
|
||||||
|
| [`windows-dualsense-scoping.md`](windows-dualsense-scoping.md) | Virtual DualSense (UMDF2) decision + M0 bug lessons | **Shipped (M0–M4)** — public signing open |
|
||||||
|
| [`windows-dualsense-game-detection.md`](windows-dualsense-game-detection.md) | Native game-detection fix (SwDeviceCreate identity) | **Shipped** — on-glass test + GameInput open |
|
||||||
|
| [`windows-client-bootstrap.md`](windows-client-bootstrap.md) | Windows client architecture record + HDR guide + build gotchas | **Shipped** — on-glass validation open |
|
||||||
|
| [`apple-stage2-presenter.md`](apple-stage2-presenter.md) | Apple stage-2 (VTDecompressionSession + CAMetalLayer) presenter | **Shipped (opt-in)** — make-default + iOS open |
|
||||||
|
| [`game-library-stores.md`](game-library-stores.md) | Multi-store game library | **Phases 1–4 shipped** — 6 providers + 8 Qs open |
|
||||||
|
| [`dualsense-haptics.md`](dualsense-haptics.md) | DualSense advanced-haptics feasibility | **HID shipped**; audio haptics deferred (3 walls) |
|
||||||
|
| [`archive/windows-secure-desktop.md`](archive/windows-secure-desktop.md) | Two-process WGC secure-desktop design | **Archived** — shipped but now a fallback (IDD-push primary) |
|
||||||
|
|
||||||
|
Plus `research/gamestream-protocol-research.json` — raw Moonlight/GameStream wire reference (data, not prose).
|
||||||
|
|
||||||
|
## Consolidated open items
|
||||||
|
|
||||||
|
Still-open work scattered across the docs above, rolled up by theme so nothing is tracked in only one
|
||||||
|
buried doc. CLAUDE.md "What's left" is the headline list; this is the design-level detail. (→ names the
|
||||||
|
owning doc.)
|
||||||
|
|
||||||
|
**Latency / performance**
|
||||||
|
- Sub-frame pipelining — overlap encode+transmit within a frame; needs a direct NVENC SDK wrapper (~2–4 ms). → `implementation-plan`, `gamestream-host-plan`
|
||||||
|
- GPU-contention levers: correct async NVENC pipeline, auto-gated REALTIME GPU priority, clock/P-state pinning, frame-source escape (swapchain-hook/NvFBC/compose-flip), iGPU encode offload, PERF uniq-vs-fps instrumentation. → `gpu-contention-investigation` (§5.B/C/E/F/G), `host-latency-plan` (Tiers 1A/1B/3B/3C/3D/4)
|
||||||
|
- Apple stage-2 as default (after resolution/HDR checks) + smoothing/pacing policy + glass-to-glass numbers via `tools/latency-probe`. → `apple-stage2-presenter`
|
||||||
|
|
||||||
|
**HDR**
|
||||||
|
- Linux 10-bit HDR (Step 4): 8-bit→Main10 shim, true 10-bit PipeWire capture (blocked upstream — gamescope #2126), Linux-client P010 + GTK color management. → `hdr-pipeline-plan`
|
||||||
|
- GameStream HDR/10-bit (capture + metadata plumbing). → `gamestream-host-plan`
|
||||||
|
- Open Qs: MaxCLL source, GameStream SS_HDR_METADATA vs deliberate SDR, HLG sources, mid-session SDR-downgrade + SDR-for-SDR-client validation. → `hdr-pipeline-plan`
|
||||||
|
|
||||||
|
**Clients**
|
||||||
|
- Windows client on-glass validation (D3D11VA decode + HDR present + GUI on the RTX box) + RAWINPUT relative-mouse pointer-lock + per-host speed-test UI. → `windows-client-bootstrap`, `implementation-plan`
|
||||||
|
- iOS/iPadOS/tvOS stage-2 presenter variants. → `apple-stage2-presenter`, `implementation-plan`
|
||||||
|
- Android real-device validation (gamepad rumble/lightbar/DualSense, HDR10). → `implementation-plan`
|
||||||
|
|
||||||
|
**Windows host**
|
||||||
|
- Graceful stop signal — host is killed via TerminateProcess (skips RAII teardown → a stale virtual monitor can linger). → `windows-service`
|
||||||
|
- pf-vdisplay slot-reclaim on-glass reconnect-storm A/B; M4 driver-unification source-build validation; P2/P3 cleanup (D1-host lints, M6 scaffolding, M5 reshape WGC/DDA onto session/pipeline). → `windows-host-rewrite`
|
||||||
|
- Session-aware follow-ups: F44 gamescope teardown GPU-context corruption (#1, SIGKILL hypothesis); mid-stream-switch input-loss window; NVENC InitializeEncoder noise at 5K@240; NVENC HEVC ~800 Mbps cap (prefer AV1 above it); restore-guard/keep-warm coupling; Feature B (`PUNKTFUNK_SESSION_WATCH`) opt-in → default. → `session-aware-host-followups`
|
||||||
|
- Apollo backlog (~63 open) — highest-value: #9 Windows app launch (CreateProcessAsUserW), #7/#18 WASAPI device-loss recovery, #3 per-frame `IDXGIFactory::IsCurrent()`, #15 watchdog escalation, #14/#30/#56 abs-mouse through the real output rect, #10/#20/#32/#33 tray + browser-UI + in-binary service install + logs endpoint, #67/#68 frame pacing. → `apollo-comparison`
|
||||||
|
|
||||||
|
**Windows gamepads**
|
||||||
|
- DualSense public-distribution signing (EV cert + Microsoft Partner Center attestation — blocks public release); GameInput detection (reads VID/PID 0x0000 — may need a rank-3 KMDF USB-emulating bus driver); HidHide integration; minimum-OS / UMDFVERSION targeting; on-glass Cyberpunk glyph test. → `windows-dualsense-scoping`, `windows-dualsense-game-detection`
|
||||||
|
|
||||||
|
**GameStream**
|
||||||
|
- AV1 + surround 5.1/7.1 live stock-Moonlight confirmation (incl. FEC-under-loss); reconnect-at-new-mode robustness. → `gamestream-host-plan`, `implementation-plan`
|
||||||
|
|
||||||
|
**Game library**
|
||||||
|
- 6 remaining providers (Desktop/Flatpak, itch.io, Ubisoft Connect, Amazon Games, Battle.net, EA app); the `/library/art/<entryId>/<slot>` mgmt endpoint; refactor `library.rs` into a `library/` dir; 8 open design questions; optional SteamGridDB v2 enrichment. → `game-library-stores`
|
||||||
|
|
||||||
|
**Multi-user / sessions**
|
||||||
|
- gamescope per-session input/audio isolation (independent desktops) — the 4 plumbing items, deferred. → `gamescope-multiuser`, `implementation-plan`
|
||||||
|
|
||||||
|
**Security**
|
||||||
|
- **#12** — scope `NODE_TLS_REJECT_UNAUTHORIZED` to a per-request pinned agent (needs `bun add undici`); latent-only today, but **must fix before the web app gains any off-loopback server-side TLS**. → `security-review`
|
||||||
|
|
||||||
|
**Deferred / do-not-retry records** (kept so the dead ends aren't re-explored)
|
||||||
|
- DualSense audio-driven haptics — deferred until all 3 GO conditions are met. → `dualsense-haptics`
|
||||||
|
- IDD-push direct frame-push on bare-metal console capture — architecturally impossible (no presentation consumer for the swapchain). → `windows-virtual-display-rust-port`
|
||||||
@@ -1,5 +1,7 @@
|
|||||||
# Apollo vs punktfunk — architecture map & transferable improvements
|
# Apollo vs punktfunk — architecture map & transferable improvements
|
||||||
|
|
||||||
|
> **Status:** Reference doc — an Apollo↔punktfunk architecture map plus a 96-item transferable-improvement backlog. About a third of the backlog has since shipped or gone obsolete (those items are collapsed to one-liners below); the rest is still open with full citations. The **Re-verified status (2026-06-20)** section is the authoritative shipped-status record.
|
||||||
|
|
||||||
> Generated 2026-06-16 by the `apollo-vs-punktfunk` multi-agent workflow, then reconstructed from
|
> Generated 2026-06-16 by the `apollo-vs-punktfunk` multi-agent workflow, then reconstructed from
|
||||||
> the run journal after the live run was interrupted. **Apollo** = `~/Apollo` (commit `adc5c5a0`),
|
> the run journal after the live run was interrupted. **Apollo** = `~/Apollo` (commit `adc5c5a0`),
|
||||||
> a C++ fork of Sunshine — a Moonlight-compatible streaming **host only** (no client of its own).
|
> a C++ fork of Sunshine — a Moonlight-compatible streaming **host only** (no client of its own).
|
||||||
@@ -43,7 +45,7 @@ Apollo is host-only. A stream flows: **nvhttp** (HTTPS pairing + serverinfo/appl
|
|||||||
| Apollo — Audio capture, encode, transport (Windows host) | `audio.cpp`; `audio.h`; `audio.cpp`; `common.h`; `stream.cpp` | `audio.rs`; `audio/wasapi_cap.rs`; `audio/linux.rs`; `gamestream/audio.rs`; `punktfunk1.rs` |
|
| Apollo — Audio capture, encode, transport (Windows host) | `audio.cpp`; `audio.h`; `audio.cpp`; `common.h`; `stream.cpp` | `audio.rs`; `audio/wasapi_cap.rs`; `audio/linux.rs`; `gamestream/audio.rs`; `punktfunk1.rs` |
|
||||||
| Apollo (Sunshine fork) — Input handling & injection | `input.cpp`; `input.cpp`; `keylayout.h`; `misc.cpp` | — |
|
| Apollo (Sunshine fork) — Input handling & injection | `input.cpp`; `input.cpp`; `keylayout.h`; `misc.cpp` | — |
|
||||||
| Apollo: App/process launch & display configuration (Windows host) | `process.cpp`; `display_device.cpp`; `process.h`; `virtual_display.h`; `misc.cpp`; `utils.cpp` | `vdisplay/sudovda.rs`; `vdisplay.rs`; `gamestream/apps.rs`; `library.rs`; `punktfunk1.rs`; `capture/wgc_relay.rs` |
|
| Apollo: App/process launch & display configuration (Windows host) | `process.cpp`; `display_device.cpp`; `process.h`; `virtual_display.h`; `misc.cpp`; `utils.cpp` | `vdisplay/sudovda.rs`; `vdisplay.rs`; `gamestream/apps.rs`; `library.rs`; `punktfunk1.rs`; `capture/wgc_relay.rs` |
|
||||||
| Apollo: Config, management/web UI, system tray | `config.h`; `config.cpp`; `confighttp.cpp`; `confighttp.h`; `system_tray.cpp`; `system_tray.h` | `mgmt.rs`; `mgmt_token.rs`; `main.rs`; `native_pairing.rs`; `library.rs`; `docs/windows-host.md` |
|
| Apollo: Config, management/web UI, system tray | `config.h`; `config.cpp`; `confighttp.cpp`; `confighttp.h`; `system_tray.cpp`; `system_tray.h` | `mgmt.rs`; `mgmt_token.rs`; `main.rs`; `native_pairing.rs`; `library.rs`; `design/windows-host.md` |
|
||||||
|
|
||||||
### Apollo — Protocol & streaming (RTP/FEC/ENet/RTSP/crypto)
|
### Apollo — Protocol & streaming (RTP/FEC/ENet/RTSP/crypto)
|
||||||
|
|
||||||
@@ -354,7 +356,7 @@ The `formats[]` table (258-277) maps 2/6/8 channels to Stereo/5.1/7.1 with the G
|
|||||||
- **Decouple ingest from injection via task-pool queue with lock-then-release batching** — The control-stream thread only enqueues bytes and schedules a task (src/input.cpp:1639-1643). A pool thread pops one packet, coalesces later same-type packets into it while holding the queue lock, then RELEASES the lock before the (potentially slow) SendInput/ViGEm call (src/input.cpp:1486-1520). — _For a low-latency streaming host this is the core anti-head-of-line-blocking pattern: a slow OS input call (e.g. SendInput crossing a desktop switch) never stalls the network/control thread, and bursts of mouse/scroll/controller packets collapse to one OS event per drain. punktfunk should mirror this: never call SendInput on the QUIC/control thread._
|
- **Decouple ingest from injection via task-pool queue with lock-then-release batching** — The control-stream thread only enqueues bytes and schedules a task (src/input.cpp:1639-1643). A pool thread pops one packet, coalesces later same-type packets into it while holding the queue lock, then RELEASES the lock before the (potentially slow) SendInput/ViGEm call (src/input.cpp:1486-1520). — _For a low-latency streaming host this is the core anti-head-of-line-blocking pattern: a slow OS input call (e.g. SendInput crossing a desktop switch) never stalls the network/control thread, and bursts of mouse/scroll/controller packets collapse to one OS event per drain. punktfunk should mirror this: never call SendInput on the QUIC/control thread._
|
||||||
- **Type-aware packet batching with batch_result_e (batched / not_batchable / terminate_batch)** — batch() overloads (src/input.cpp:1208-1475) sum relative-mouse deltas and scroll amounts (with __builtin_add_overflow guards that terminate the batch on 16-bit overflow), take the latest absolute position, and collapse controller/touch/pen move/hover runs. terminate_batch stops at a state-changing event (button change, eventType change, active-mask change) so ordering semantics are preserved; not_batchable skips a non-matching controller but keeps scanning. — _Moonlight 'spams controller packets even when not necessary' (src/input.cpp:282). Batching cuts injected-event count under load without dropping state transitions — directly reduces input-to-screen jitter and OS overhead._
|
- **Type-aware packet batching with batch_result_e (batched / not_batchable / terminate_batch)** — batch() overloads (src/input.cpp:1208-1475) sum relative-mouse deltas and scroll amounts (with __builtin_add_overflow guards that terminate the batch on 16-bit overflow), take the latest absolute position, and collapse controller/touch/pen move/hover runs. terminate_batch stops at a state-changing event (button change, eventType change, active-mask change) so ordering semantics are preserved; not_batchable skips a non-matching controller but keeps scanning. — _Moonlight 'spams controller packets even when not necessary' (src/input.cpp:282). Batching cuts injected-event count under load without dropping state transitions — directly reduces input-to-screen jitter and OS overhead._
|
||||||
- **VK→scancode injection with normalization fallback ladder** — keyboard_update (src/platform/windows/input.cpp:608) prefers KEYEVENTF_SCANCODE using the static US-English VK_TO_SCANCODE_MAP (keylayout.h). If the client flagged the VK as non-normalized (SS_KBE_FLAG_NON_NORMALIZED) it falls back to MapVirtualKey under config::input.always_send_scancodes (excluding VK_LWIN/RWIN/PAUSE which misbehave), else sends a raw VK event. A curated switch adds KEYEVENTF_EXTENDEDKEY for the extended-key set (arrows, nav cluster, RWIN/RMENU/RCONTROL, numpad divide, apps). — _Many games read DirectInput/raw scancodes, not VK events; sending scancodes is essential for in-game key compatibility. The extended-key flag is required or arrow keys / right-modifiers misfire. This is a concrete table+logic punktfunk's Windows VK path can adopt verbatim._
|
- **VK→scancode injection with normalization fallback ladder** — keyboard_update (src/platform/windows/input.cpp:608) prefers KEYEVENTF_SCANCODE using the static US-English VK_TO_SCANCODE_MAP (keylayout.h). If the client flagged the VK as non-normalized (SS_KBE_FLAG_NON_NORMALIZED) it falls back to MapVirtualKey under config::input.always_send_scancodes (excluding VK_LWIN/RWIN/PAUSE which misbehave), else sends a raw VK event. A curated switch adds KEYEVENTF_EXTENDEDKEY for the extended-key set (arrows, nav cluster, RWIN/RMENU/RCONTROL, numpad divide, apps). — _Many games read DirectInput/raw scancodes, not VK events; sending scancodes is essential for in-game key compatibility. The extended-key flag is required or arrow keys / right-modifiers misfire. This is a concrete table+logic punktfunk's Windows VK path can adopt verbatim._
|
||||||
- **Desktop-switch retry on every SendInput / InjectSyntheticPointerInput** — send_input (src/platform/windows/input.cpp:477) and inject_synthetic_pointer_input (line 499) retry once after calling syncThreadDesktop() (misc.cpp:251 — OpenInputDesktop(DF_ALLOWOTHERACCOUNTHOOK)+SetThreadDesktop) when the call fails and the input desktop handle changed, tracked in a thread_local _lastKnownInputDesktop. — _On Windows the input desktop changes on UAC prompts, lock screen, and Ctrl+Alt+Del (secure desktop / Winlogon). Without re-binding the thread to the new desktop, all injected input silently fails. This is exactly the secure-desktop problem area called out in punktfunk's docs/memory — Apollo solves it cheaply per-call rather than with a second process._
|
- **Desktop-switch retry on every SendInput / InjectSyntheticPointerInput** — send_input (src/platform/windows/input.cpp:477) and inject_synthetic_pointer_input (line 499) retry once after calling syncThreadDesktop() (misc.cpp:251 — OpenInputDesktop(DF_ALLOWOTHERACCOUNTHOOK)+SetThreadDesktop) when the call fails and the input desktop handle changed, tracked in a thread_local _lastKnownInputDesktop. — _On Windows the input desktop changes on UAC prompts, lock screen, and Ctrl+Alt+Del (secure desktop / Winlogon). Without re-binding the thread to the new desktop, all injected input silently fails. This is exactly the secure-desktop problem area called out in punktfunk's design/memory — Apollo solves it cheaply per-call rather than with a second process._
|
||||||
- **ViGEm dual-target gamepad with client-negotiated type selection** — alloc_gamepad (src/platform/windows/input.cpp:1175) picks X360 vs DS4 by precedence: explicit config (x360/ds4) > client-reported LI_CTYPE_PS/XBOX > motion_as_ds4 if accel/gyro present > touchpad_as_ds4 > default X360. It warns when capabilities (motion/touchpad/RGB) will be lost on X360. DS4 path packs motion, touchpad, and battery into DS4_REPORT_EX. — _DS4 is the only ViGEm target that carries gyro/accel, touchpad, and lightbar; X360 is the safe default. punktfunk already does client-negotiated pad type — Apollo's capability-driven auto-selection (motion/touchpad presence → DS4) and the explicit 'feature will be lost' warnings are a more refined policy worth porting._
|
- **ViGEm dual-target gamepad with client-negotiated type selection** — alloc_gamepad (src/platform/windows/input.cpp:1175) picks X360 vs DS4 by precedence: explicit config (x360/ds4) > client-reported LI_CTYPE_PS/XBOX > motion_as_ds4 if accel/gyro present > touchpad_as_ds4 > default X360. It warns when capabilities (motion/touchpad/RGB) will be lost on X360. DS4 path packs motion, touchpad, and battery into DS4_REPORT_EX. — _DS4 is the only ViGEm target that carries gyro/accel, touchpad, and lightbar; X360 is the safe default. punktfunk already does client-negotiated pad type — Apollo's capability-driven auto-selection (motion/touchpad presence → DS4) and the explicit 'feature will be lost' warnings are a more refined policy worth porting._
|
||||||
- **DS4 timestamped resend loop (ds4_update_ts_and_send)** — Every DS4 report advances wTimestamp by elapsed time in 5.333µs units and re-arms a 100ms repeat_task (src/platform/windows/input.cpp:1454-1481), so the 16-bit timestamp never stalls/overflows even when no new input arrives. — _'Some applications require updated timestamp values to register DS4 input' (line 1450). Without the heartbeat, motion-aware games ignore a held DS4. Non-obvious gotcha that any DS4-emulating host must replicate._
|
- **DS4 timestamped resend loop (ds4_update_ts_and_send)** — Every DS4 report advances wTimestamp by elapsed time in 5.333µs units and re-arms a 100ms repeat_task (src/platform/windows/input.cpp:1454-1481), so the 16-bit timestamp never stalls/overflows even when no new input arrives. — _'Some applications require updated timestamp values to register DS4 input' (line 1450). Without the heartbeat, motion-aware games ignore a held DS4. Non-obvious gotcha that any DS4-emulating host must replicate._
|
||||||
- **Synthetic pen/touch via InjectSyntheticPointerInput with periodic refresh and slot compaction** — Per-client synthetic pointer devices (CreateSyntheticPointerDevice, Win10 1809+). Touch slots are kept contiguous via perform_touch_compaction (line 715, required by the API), edge-triggered flags (DOWN/UP/CANCELED/UPDATE) are cleared after each frame (line 900/1020), and a 50ms repeat task (ISPI_REPEAT_INTERVAL) re-injects held state because Windows auto-cancels untouched interactions after ~1s. — _Touch/pen are stateful, slot-indexed, and self-cancelling — a fundamentally different injection model than mouse/keyboard. If punktfunk grows touch/pen, this is the reference for the Windows-specific contiguity + refresh requirements._
|
- **Synthetic pen/touch via InjectSyntheticPointerInput with periodic refresh and slot compaction** — Per-client synthetic pointer devices (CreateSyntheticPointerDevice, Win10 1809+). Touch slots are kept contiguous via perform_touch_compaction (line 715, required by the API), edge-triggered flags (DOWN/UP/CANCELED/UPDATE) are cleared after each frame (line 900/1020), and a 50ms repeat task (ISPI_REPEAT_INTERVAL) re-injects held state because Windows auto-cancels untouched interactions after ~1s. — _Touch/pen are stateful, slot-indexed, and self-cancelling — a fundamentally different injection model than mouse/keyboard. If punktfunk grows touch/pen, this is the reference for the Windows-specific contiguity + refresh requirements._
|
||||||
@@ -479,7 +481,7 @@ A single static `struct tray` (l.112) holds icon path, tooltip, a fixed menu arr
|
|||||||
- **Per-vendor encoder enum string translators** — Whole namespaces (nv/amd/qsv/vt/sw, config.cpp l.53-357) map human strings ('ultralowlatency','cqp','superfast') to encoder SDK integer constants, with low-latency presets as the DEFAULTS (e.g. amd usage = ultralowlatency l.469-471, sw preset 'superfast'/'zerolatency' l.451-453, nvenc realtime HAGS + high-power mode on by default l.457-459). — _Defaults are explicitly tuned for latency, not quality — the encoder is configured ultra-low-latency out of the box. A low-latency host's config defaults should bias the same way; this is the concrete table punktfunk can port for AMD/QSV/VT vendor parity._
|
- **Per-vendor encoder enum string translators** — Whole namespaces (nv/amd/qsv/vt/sw, config.cpp l.53-357) map human strings ('ultralowlatency','cqp','superfast') to encoder SDK integer constants, with low-latency presets as the DEFAULTS (e.g. amd usage = ultralowlatency l.469-471, sw preset 'superfast'/'zerolatency' l.451-453, nvenc realtime HAGS + high-power mode on by default l.457-459). — _Defaults are explicitly tuned for latency, not quality — the encoder is configured ultra-low-latency out of the box. A low-latency host's config defaults should bias the same way; this is the concrete table punktfunk can port for AMD/QSV/VT vendor parity._
|
||||||
- **Embedded HTTPS server sharing the host TLS identity** — confighttp uses SimpleWeb::Server<HTTPS> seeded with nvhttp.cert/pkey (confighttp.cpp l.1511) — the SAME cert the Moonlight/GameStream pairing uses — on a fixed port offset (PORT_HTTPS=1 → base+1). — _One identity, one cert, management UI and stream control on adjacent ports. punktfunk already shares its cert.pem between GameStream pairing and punktfunk/1; the lesson is the web console can reuse it rather than carrying a separate mgmt TLS story._
|
- **Embedded HTTPS server sharing the host TLS identity** — confighttp uses SimpleWeb::Server<HTTPS> seeded with nvhttp.cert/pkey (confighttp.cpp l.1511) — the SAME cert the Moonlight/GameStream pairing uses — on a fixed port offset (PORT_HTTPS=1 → base+1). — _One identity, one cert, management UI and stream control on adjacent ports. punktfunk already shares its cert.pem between GameStream pairing and punktfunk/1; the lesson is the web console can reuse it rather than carrying a separate mgmt TLS story._
|
||||||
- **Single-string session cookie with salted-hash validation** — authenticate() (l.179) validates hex(hash(cookie + salt)) against an in-memory sessionCookie with a 15-day steady_clock expiry; login (l.1469) rand_alphabet(64) the raw cookie and stores only its hash. checkIPOrigin gates by pc/lan/wan BEFORE auth. — _Contrast with punktfunk's mgmt API (bearer token in ~/.config/punktfunk/mgmt-token + web login gate). Apollo's cookie+IP-origin model is simpler for a desktop single-operator host and avoids a static long-lived token; worth considering for the web console's UX._
|
- **Single-string session cookie with salted-hash validation** — authenticate() (l.179) validates hex(hash(cookie + salt)) against an in-memory sessionCookie with a 15-day steady_clock expiry; login (l.1469) rand_alphabet(64) the raw cookie and stores only its hash. checkIPOrigin gates by pc/lan/wan BEFORE auth. — _Contrast with punktfunk's mgmt API (bearer token in ~/.config/punktfunk/mgmt-token + web login gate). Apollo's cookie+IP-origin model is simpler for a desktop single-operator host and avoids a static long-lived token; worth considering for the web console's UX._
|
||||||
- **Windows service↔UI self-elevation handshake** — config::parse (l.1490-1534): a non-admin Start-Menu shortcut self-relaunches as admin (ShellExecuteExW 'runas' --shortcut-admin l.1511), starts the service, wait_for_ui_ready() polls the Win32 TCP table for the LISTEN socket (entry_handler.cpp l.236), then launch_ui(), and returns 1 so the shortcut process never starts a stream. — _This is the mature answer to the exact problem punktfunk's Windows host hit (docs/windows-host.md 'secure-desktop two-process design', Session-0 vs interactive session). Apollo solves UI-launch-from-service cleanly; the TCP-table readiness poll is directly portable._
|
- **Windows service↔UI self-elevation handshake** — config::parse (l.1490-1534): a non-admin Start-Menu shortcut self-relaunches as admin (ShellExecuteExW 'runas' --shortcut-admin l.1511), starts the service, wait_for_ui_ready() polls the Win32 TCP table for the LISTEN socket (entry_handler.cpp l.236), then launch_ui(), and returns 1 so the shortcut process never starts a stream. — _This is the mature answer to the exact problem punktfunk's Windows host hit (design/windows-host.md 'secure-desktop two-process design', Session-0 vs interactive session). Apollo solves UI-launch-from-service cleanly; the TCP-table readiness poll is directly portable._
|
||||||
- **Tray thread DACL hardening for SYSTEM-context survival** — init_tray() (l.143-197) adds an EXPLICIT_ACCESS ACE granting SYNCHRONIZE to Everyone on the current thread handle before registering the icon, and busy-waits for GetShellWindow() (l.201) so the icon registers reliably across logoff/logon. — _When the host runs as a Windows service (SYSTEM), Explorer can't open the thread to detect termination → ghost tray icons forever. punktfunk's Windows host, if it ever runs as a service with a tray, needs this exact DACL fix._
|
- **Tray thread DACL hardening for SYSTEM-context survival** — init_tray() (l.143-197) adds an EXPLICIT_ACCESS ACE granting SYNCHRONIZE to Everyone on the current thread handle before registering the icon, and busy-waits for GetShellWindow() (l.201) so the icon registers reliably across logoff/logon. — _When the host runs as a Windows service (SYSTEM), Explorer can't open the thread to detect termination → ghost tray icons forever. punktfunk's Windows host, if it ever runs as a service with a tray, needs this exact DACL fix._
|
||||||
- **JSON-list config values parsed via ptree wrapping** — Multi-line bracketed values (global_prep_cmd, server_cmd, dd_mode_remapping) are extracted as raw strings by the flat parser, wrapped in a synthetic JSON object, then parsed by boost ptree (list_prep_cmd_f l.949, mode_remapping_from_view l.411). — _A pragmatic hybrid: flat key=value for the human-editable 90%, embedded JSON for structured fields, without committing to full-JSON config. Shows how to grow a flat config without a rewrite._
|
- **JSON-list config values parsed via ptree wrapping** — Multi-line bracketed values (global_prep_cmd, server_cmd, dd_mode_remapping) are extracted as raw strings by the flat parser, wrapped in a synthetic JSON object, then parsed by boost ptree (list_prep_cmd_f l.949, mode_remapping_from_view l.411). — _A pragmatic hybrid: flat key=value for the human-editable 90%, embedded JSON for structured fields, without committing to full-JSON config. Shows how to grow a flat config without a rewrite._
|
||||||
|
|
||||||
@@ -680,7 +682,7 @@ Both transports use the persistent `AudioCapSlot` (gamestream/audio.rs:251-257)
|
|||||||
|
|
||||||
### Input handling & injection — 🔴 Apollo ahead
|
### Input handling & injection — 🔴 Apollo ahead
|
||||||
|
|
||||||
For the Windows host specifically, Apollo is ahead on input breadth and robustness. Apollo covers mouse (rel+abs), keyboard (with a static US-layout VK→scancode table for game compatibility), Unicode text, scroll, **touch + pen via CreateSyntheticPointerDevice**, and **both X360 and DS4** gamepads with rumble/LED/motion/touchpad/battery feedback (Apollo src/platform/windows/input.cpp). punktfunk's Windows host covers mouse/keyboard/scroll/X360-only; touch and pen are explicit no-ops (sendinput.rs:231-237), there is no Unicode text path (gamestream/input.rs:83-84), and only the Xbox 360 virtual pad exists on Windows. Apollo also has the more efficient secure-desktop model (retry-only) vs punktfunk's per-event reattach (sendinput.rs:97), and Apollo's task-pool queue + type-aware batching (Apollo src/input.cpp:1481-1571, 1208-1475) coalesces input spam off the network thread — punktfunk's GameStream path injects inline on the ENet thread (control.rs:207-211) with no batching anywhere. punktfunk's design is cleaner and its m3 path's session-end held-key release + backend-follow logic is genuinely nicer than Apollo, but those are punktfunk/1-specific; on the shared Windows-host injection surface Apollo is the more complete, battle-tested implementation. punktfunk's docs/windows-secure-desktop.md already flags the retry-only refactor as planned-but-unshipped, confirming the gap.
|
For the Windows host specifically, Apollo is ahead on input breadth and robustness. Apollo covers mouse (rel+abs), keyboard (with a static US-layout VK→scancode table for game compatibility), Unicode text, scroll, **touch + pen via CreateSyntheticPointerDevice**, and **both X360 and DS4** gamepads with rumble/LED/motion/touchpad/battery feedback (Apollo src/platform/windows/input.cpp). punktfunk's Windows host covers mouse/keyboard/scroll/X360-only; touch and pen are explicit no-ops (sendinput.rs:231-237), there is no Unicode text path (gamestream/input.rs:83-84), and only the Xbox 360 virtual pad exists on Windows. Apollo also has the more efficient secure-desktop model (retry-only) vs punktfunk's per-event reattach (sendinput.rs:97), and Apollo's task-pool queue + type-aware batching (Apollo src/input.cpp:1481-1571, 1208-1475) coalesces input spam off the network thread — punktfunk's GameStream path injects inline on the ENet thread (control.rs:207-211) with no batching anywhere. punktfunk's design is cleaner and its m3 path's session-end held-key release + backend-follow logic is genuinely nicer than Apollo, but those are punktfunk/1-specific; on the shared Windows-host injection surface Apollo is the more complete, battle-tested implementation. punktfunk's design/archive/windows-secure-desktop.md already flags the retry-only refactor as planned-but-unshipped, confirming the gap.
|
||||||
|
|
||||||
|
|
||||||
**How punktfunk does it.**
|
**How punktfunk does it.**
|
||||||
@@ -748,7 +750,7 @@ For the Windows host specifically, Apollo is clearly ahead on this subsystem. Ap
|
|||||||
- punktfunk has TWO app surfaces by design: the GameStream apps.json catalog (Moonlight compat) AND a richer punktfunk/1 library (Steam local scan + custom store + CDN art + uniform GameEntry grid). Apollo has only the apps.json catalog because it ships no client.
|
- punktfunk has TWO app surfaces by design: the GameStream apps.json catalog (Moonlight compat) AND a richer punktfunk/1 library (Steam local scan + custom store + CDN art + uniform GameEntry grid). Apollo has only the apps.json catalog because it ships no client.
|
||||||
- punktfunk's launch security model is deliberately client-can't-inject: the client sends only a store-qualified id and the host resolves it against its OWN library (library.rs:394-412), with steam appid validated digits-only. Apollo trusts its own apps.json cmds (it has no untrusted remote launch id).
|
- punktfunk's launch security model is deliberately client-can't-inject: the client sends only a store-qualified id and the host resolves it against its OWN library (library.rs:394-412), with steam appid validated digits-only. Apollo trusts its own apps.json cmds (it has no untrusted remote launch id).
|
||||||
- punktfunk keeps NO async on the per-frame path; the SudoVDA watchdog pinger and capture are native threads. Apollo's libdisplaydevice RetryScheduler is its own machinery; punktfunk has no equivalent scheduler by choice (yet — see candidate improvements).
|
- punktfunk keeps NO async on the per-frame path; the SudoVDA watchdog pinger and capture are native threads. Apollo's libdisplaydevice RetryScheduler is its own machinery; punktfunk has no equivalent scheduler by choice (yet — see candidate improvements).
|
||||||
- punktfunk's Windows virtual display is the SOLE primary output (isolate_displays + CDS_SET_PRIMARY) specifically to capture the secure/Winlogon desktop — a deliberate, documented design (docs/windows-secure-desktop.md) that goes beyond what stock Apollo needs.
|
- punktfunk's Windows virtual display is the SOLE primary output (isolate_displays + CDS_SET_PRIMARY) specifically to capture the secure/Winlogon desktop — a deliberate, documented design (design/archive/windows-secure-desktop.md) that goes beyond what stock Apollo needs.
|
||||||
|
|
||||||
**Transfer candidates from Apollo (6):** _Actually launch the app/game on Windows (CreateProcessAsUserW into the user session)_, _Display-config apply/revert with a retry scheduler and guaranteed revert on disconnect_, _Set HDR on the virtual display and advertise IsHdrSupported when the client requests it_, _Per-(app,client) stable virtual-display GUID instead of one fixed MONITOR_GUID_, _Inject per-app launch env (client res/fps/HDR/audio + status) for launch scripts_, _auto_detach heuristic for launcher-style apps (Steam/UWP) that exit immediately_ — see Part 4.
|
**Transfer candidates from Apollo (6):** _Actually launch the app/game on Windows (CreateProcessAsUserW into the user session)_, _Display-config apply/revert with a retry scheduler and guaranteed revert on disconnect_, _Set HDR on the virtual display and advertise IsHdrSupported when the client requests it_, _Per-(app,client) stable virtual-display GUID instead of one fixed MONITOR_GUID_, _Inject per-app launch env (client res/fps/HDR/audio + status) for launch scripts_, _auto_detach heuristic for launcher-style apps (Steam/UWP) that exit immediately_ — see Part 4.
|
||||||
|
|
||||||
@@ -765,7 +767,7 @@ On the API itself punktfunk is arguably ahead (versioned `/api/v1`, compile-time
|
|||||||
punktfunk splits the control surface into three pieces and deliberately keeps them OUT of the host binary where Apollo bundles them in.
|
punktfunk splits the control surface into three pieces and deliberately keeps them OUT of the host binary where Apollo bundles them in.
|
||||||
|
|
||||||
##### 1. Management plane = a versioned REST API only (`crates/punktfunk-host/src/mgmt.rs`)
|
##### 1. Management plane = a versioned REST API only (`crates/punktfunk-host/src/mgmt.rs`)
|
||||||
- An axum `Router` (`mgmt.rs:166` `fn app`) under `/api/v1`, single source of truth shared between the live server and the `openapi` subcommand (`mgmt.rs:195` `api_router_parts`, `main.rs:86`). The OpenAPI 3.1 doc is generated at compile time with `utoipa` and a checked-in copy is drift-tested against `docs/api/openapi.json` (`mgmt.rs:1582` `openapi_document_is_complete_and_checked_in`). This is a real maturity advantage over Apollo, which has no machine-readable API spec.
|
- An axum `Router` (`mgmt.rs:166` `fn app`) under `/api/v1`, single source of truth shared between the live server and the `openapi` subcommand (`mgmt.rs:195` `api_router_parts`, `main.rs:86`). The OpenAPI 3.1 doc is generated at compile time with `utoipa` and a checked-in copy is drift-tested against `api/openapi.json` (`mgmt.rs:1582` `openapi_document_is_complete_and_checked_in`). This is a real maturity advantage over Apollo, which has no machine-readable API spec.
|
||||||
- Routes: host info/capabilities/port map (`mgmt.rs:590`), live status (`mgmt.rs:671`), paired GameStream clients list/unpair (`mgmt.rs:707`,`752`), the GameStream PIN flow (`mgmt.rs:789`,`814`), the native punktfunk/1 pairing surface — arm/disarm/status/list/unpair (`mgmt.rs:870`-`994`), **delegated pairing approval** via a pending-device queue (`mgmt.rs:1011`,`1049`,`1094`), session stop + force-IDR (`mgmt.rs:1120`,`1144`), and game-library CRUD (`mgmt.rs:1171`-`1252`).
|
- Routes: host info/capabilities/port map (`mgmt.rs:590`), live status (`mgmt.rs:671`), paired GameStream clients list/unpair (`mgmt.rs:707`,`752`), the GameStream PIN flow (`mgmt.rs:789`,`814`), the native punktfunk/1 pairing surface — arm/disarm/status/list/unpair (`mgmt.rs:870`-`994`), **delegated pairing approval** via a pending-device queue (`mgmt.rs:1011`,`1049`,`1094`), session stop + force-IDR (`mgmt.rs:1120`,`1144`), and game-library CRUD (`mgmt.rs:1171`-`1252`).
|
||||||
- **HTTPS always, even on loopback** (`mgmt.rs:75` `run`): it runs the rustls handshake itself via tokio-rustls so it can surface the verified peer cert to handlers (`mgmt.rs:115` `serve_https`), reusing the host's persistent identity cert that clients already pin (`mgmt.rs:90`).
|
- **HTTPS always, even on loopback** (`mgmt.rs:75` `run`): it runs the rustls handshake itself via tokio-rustls so it can surface the verified peer cert to handlers (`mgmt.rs:115` `serve_https`), reusing the host's persistent identity cert that clients already pin (`mgmt.rs:90`).
|
||||||
- **Dual auth** (`mgmt.rs:518` `require_auth`): a paired native client authenticates by its **mTLS certificate fingerprint** (matched against the native paired store, no token needed); everyone else (the web console / admin) uses a bearer token compared in constant time (`mgmt.rs:551` `token_eq` via SHA-256 digest compare). `/api/v1/health` is the only unauthenticated route. This is stronger than Apollo's single-global-session-cookie scheme (Apollo `confighttp.cpp` has exactly one `std::string sessionCookie`).
|
- **Dual auth** (`mgmt.rs:518` `require_auth`): a paired native client authenticates by its **mTLS certificate fingerprint** (matched against the native paired store, no token needed); everyone else (the web console / admin) uses a bearer token compared in constant time (`mgmt.rs:551` `token_eq` via SHA-256 digest compare). `/api/v1/health` is the only unauthenticated route. This is stronger than Apollo's single-global-session-cookie scheme (Apollo `confighttp.cpp` has exactly one `std::string sessionCookie`).
|
||||||
@@ -784,7 +786,7 @@ A token always exists with zero operator steps: env `PUNKTFUNK_MGMT_TOKEN` wins,
|
|||||||
There is no system tray, no balloon notifications, and no "open the UI in the browser" entry point anywhere in `crates/punktfunk-host`. Apollo has a full cross-platform tray (`system_tray.cpp`) with state-driven icon/notification updates and menu callbacks.
|
There is no system tray, no balloon notifications, and no "open the UI in the browser" entry point anywhere in `crates/punktfunk-host`. Apollo has a full cross-platform tray (`system_tray.cpp`) with state-driven icon/notification updates and menu callbacks.
|
||||||
|
|
||||||
##### 6. Windows launch story = scripts, not in-binary
|
##### 6. Windows launch story = scripts, not in-binary
|
||||||
The two-process secure-desktop design exists for *capture* (`main.rs:204` `wgc-helper` subcommand + `capture/wgc_relay.rs` `CreateProcessAsUserW`), but the service/desktop launch dance is handled by external scripts (scheduled task -> PsExec64 -> launch.vbs -> host-run.cmd; `docs/windows-host.md:77-96`). punktfunk has no in-binary service install, no self-elevation, no "launch UI in browser", and no tray — all of which Apollo bakes into `config.cpp`/`entry_handler.cpp`/`system_tray.cpp`.
|
The two-process secure-desktop design exists for *capture* (`main.rs:204` `wgc-helper` subcommand + `capture/wgc_relay.rs` `CreateProcessAsUserW`), but the service/desktop launch dance is handled by external scripts (scheduled task -> PsExec64 -> launch.vbs -> host-run.cmd; `design/windows-host.md:77-96`). punktfunk has no in-binary service install, no self-elevation, no "launch UI in browser", and no tray — all of which Apollo bakes into `config.cpp`/`entry_handler.cpp`/`system_tray.cpp`.
|
||||||
|
|
||||||
|
|
||||||
**Intentional divergences (by design, not gaps):**
|
**Intentional divergences (by design, not gaps):**
|
||||||
@@ -897,11 +899,11 @@ QPC values from `LastPresentTime`/`LastMouseUpdateTime` are translated to `stead
|
|||||||
#### Transfer opportunities
|
#### Transfer opportunities
|
||||||
|
|
||||||
- **Treat S_OK-with-no-change frames as timeouts via DXGI update flags** (sev high, medium) — In dxgi.rs acquire(), after a successful AcquireNextFrame, compute frame_update_flag = info.LastPresentTime != 0 (and/or info.AccumulatedFrames != 0) and mouse_update_flag from LastMouseUpdateTime/PointerShapeBufferSize. Always call update_cursor (mouse). If !frame_update_flag, ReleaseFrame and return Ok(None) (so next_frame repeats last_present) UNLESS the cursor moved and we need a recomposite — in which case recomposite onto the existing last_present texture instead of CopyResource'ing the source. This cuts idle/cursor-only GPU load and avoids re-encoding unchanged content.
|
- **Treat S_OK-with-no-change frames as timeouts via DXGI update flags** (sev high, medium) — In dxgi.rs acquire(), after a successful AcquireNextFrame, compute frame_update_flag = info.LastPresentTime != 0 (and/or info.AccumulatedFrames != 0) and mouse_update_flag from LastMouseUpdateTime/PointerShapeBufferSize. Always call update_cursor (mouse). If !frame_update_flag, ReleaseFrame and return Ok(None) (so next_frame repeats last_present) UNLESS the cursor moved and we need a recomposite — in which case recomposite onto the existing last_present texture instead of CopyResource'ing the source. This cuts idle/cursor-only GPU load and avoids re-encoding unchanged content.
|
||||||
- **Detect resolution/format change on the acquire hot path, not only during rebuild** (sev high, small) — In acquire(), after res.cast::<ID3D11Texture2D>(), call GetDesc and compare Width/Height/Format against self.width/height and the expected format (BGRA8 vs R16G16B16A16_FLOAT). On mismatch, ReleaseFrame and run the existing recreate_dupl path (or drop gpu_copy/staging/fp16/hdr10 textures and update width/height/hdr_fp16) so the encoder re-inits cleanly. This makes live resolution + HDR-toggle changes robust even when DDA doesn't fault.
|
- ✅ **Detect resolution/format change on the acquire hot path, not only during rebuild** — SHIPPED (2026-06-20). [#2]
|
||||||
- **Release the duplication device lock during idle to avoid encoder starvation** (sev medium, small) — Cap the per-acquire DDA timeout to a small value (e.g. 8-16ms) and, when it returns WAIT_TIMEOUT, std::thread::sleep a few ms with no outstanding AcquireNextFrame before retrying — so the encode thread can grab the device for NVENC setup/reinit. Keep the generous timeout only for first_frame. Low risk, directly mirrors Apollo's documented fix.
|
- ⊘ **Release the duplication device lock during idle to avoid encoder starvation** — OBSOLETE / not-a-bug (2026-06-20). [#34]
|
||||||
- **Add client-framerate frame pacing with a high-precision timer** (sev medium, large) — Add an optional pacing layer (in dxgi.rs or the encode-loop caller in punktfunk1.rs/encode.rs) keyed on the negotiated client framerate: track a group start from the frame pts, sleep to the computed target with a Windows high-resolution timer (timeBeginPeriod or CREATE_WAITABLE_TIMER_HIGH_RESOLUTION), and snap near-integral refresh to integer divisors. This is the lever for steady pacing on odd refresh rates without changing the zero-copy design.
|
- **Add client-framerate frame pacing with a high-precision timer** (sev medium, large) — Add an optional pacing layer (in dxgi.rs or the encode-loop caller in punktfunk1.rs/encode.rs) keyed on the negotiated client framerate: track a group start from the frame pts, sleep to the computed target with a Windows high-resolution timer (timeBeginPeriod or CREATE_WAITABLE_TIMER_HIGH_RESOLUTION), and snap near-integral refresh to integer divisors. This is the lever for steady pacing on odd refresh rates without changing the zero-copy design.
|
||||||
- **Harden GPU scheduling priority + SetMaximumFrameLatency + NVIDIA-HAGS NVENC-realtime avoidance** (sev medium, medium) — After D3D11CreateDevice in dxgi.rs (and the NVENC encoder device wherever it's built), query IDXGIDevice1::SetMaximumFrameLatency(1) and SetGPUThreadPriority; load gdi32 D3DKMTSetProcessSchedulingPriorityClass and request HIGH (not REALTIME) when the adapter is NVIDIA (VendorId 0x10DE) with HAGS on, REALTIME otherwise. Mirror the privilege-enable. Guard behind admin/SYSTEM (host already relaunches as SYSTEM).
|
- ✅ **Harden GPU scheduling priority + SetMaximumFrameLatency + NVIDIA-HAGS NVENC-realtime avoidance** — SHIPPED (2026-06-20). [#47]
|
||||||
- **Retry DuplicateOutput at startup and request encoder-supported formats via Output5** (sev medium, small) — In open() wrap DuplicateOutput in a short retry (2-3 tries, ~200ms apart, re-attach_input_desktop between) before bailing. Optionally cast the output to IDXGIOutput5 and call DuplicateOutput1 with an explicit format list (BGRA8 for SDR, R16G16B16A16_FLOAT for HDR) so the capture format is intentional rather than incidental, falling back to DuplicateOutput when Output5 is absent.
|
- ✅ **Retry DuplicateOutput at startup and request encoder-supported formats via Output5** — SHIPPED (2026-06-20). [#35]
|
||||||
|
|
||||||
### Windows.Graphics.Capture (WGC) path — Apollo vs punktfunk
|
### Windows.Graphics.Capture (WGC) path — Apollo vs punktfunk
|
||||||
|
|
||||||
@@ -1099,10 +1101,10 @@ punktfunk's cursor handling lives in `crates/punktfunk-host/src/capture/dxgi.rs`
|
|||||||
|
|
||||||
#### Transfer opportunities
|
#### Transfer opportunities
|
||||||
|
|
||||||
- ✅ **DONE (2026-06-16)** — **Split every cursor shape into an alpha image + an XOR image (two-pass composite)** (sev high, medium) — Refactor convert_pointer_shape in dxgi.rs to return two optional images (alpha, xor) mirroring Apollo's split. Store cursor_shape as Option<(alpha, xor)>, upload up to two SRVs in CursorCompositor, and in composite_cursor_gpu run the alpha pass with self.blend then the xor pass with self.blend_invert (skip empties). Drop the single cursor_invert flag.
|
- ✅ **Split every cursor shape into an alpha image + an XOR image (two-pass composite)** — SHIPPED (2026-06-16; capture/dxgi.rs). [#13]
|
||||||
- **Render the monochrome 'inverse of screen' pixels via the XOR pass instead of dropping them** (sev medium, small) — In convert_pointer_shape's monochrome branch (dxgi.rs:628-654), once the dual-pass split (above) exists, route code (1,1) to the XOR image as white and codes (0,0)/(0,1) to the alpha image as opaque black/white, matching Apollo's case mapping.
|
- ✅ **Render the monochrome 'inverse of screen' pixels via the XOR pass instead of dropping them** — SHIPPED (2026-06-20). [#37]
|
||||||
- ⊘ **ALREADY-HANDLED (2026-06-16; premise incorrect — DDA returns S_OK on pointer-only updates, punktfunk recomposites)** — **Composite the moved cursor onto a clean copy even when DDA returns no new desktop frame** (sev high, large) — Keep a clean intermediate copy of the last desktop frame (an extra DEFAULT texture). In acquire (dxgi.rs:1341), when AcquireNextFrame times out but update_cursor saw a position change (LastMouseUpdateTime changed) and the cursor is visible, copy the clean intermediate into gpu_copy and re-run composite_cursor_gpu, then return that as a fresh frame instead of repeating last_present.
|
- ⊘ **Composite the moved cursor onto a clean copy even when DDA returns no new desktop frame** — NOT-A-BUG (2026-06-16; DDA returns S_OK on pointer-only updates and punktfunk recomposites). [#21]
|
||||||
- **Stop baking the cursor destructively into the repeated gpu_copy texture** (sev medium, medium) — Add a clean base texture: CopyResource(duplication -> clean_base), then CopyResource(clean_base -> gpu_copy) and composite onto gpu_copy. Repeat clean_base (cursor-free) plus a re-composite on repeats. Also create the cursor RTV once per gpu_copy and cache it rather than CreateRenderTargetView every composite (dxgi.rs:1181-1184).
|
- ✅ **Stop baking the cursor destructively into the repeated gpu_copy texture** — SHIPPED (2026-06-20). [#49]
|
||||||
- **Handle rotated outputs in cursor positioning** (sev low, medium) — Read rotation from DXGI_OUTDUPL_DESC.Rotation when opening/rebuilding the duplication (around dxgi.rs:888 and 1298), store it on DuplCapturer, and apply Apollo's rotation transform when computing the NDC rect in CursorCompositor::draw and when sampling the cursor texture in the VS.
|
- **Handle rotated outputs in cursor positioning** (sev low, medium) — Read rotation from DXGI_OUTDUPL_DESC.Rotation when opening/rebuilding the duplication (around dxgi.rs:888 and 1298), store it on DuplCapturer, and apply Apollo's rotation transform when computing the NDC rect in CursorCompositor::draw and when sampling the cursor texture in the VS.
|
||||||
- **Validate masked-color mask bytes and log illegal values** (sev low, small) — In the MASKED_COLOR branch of convert_pointer_shape (dxgi.rs:594-627), branch explicitly on mask==0x00 vs mask==0xFF and emit a tracing::warn! once for any other value, matching Apollo's guard, so future cursor-render bugs are observable.
|
- **Validate masked-color mask bytes and log illegal values** (sev low, small) — In the MASKED_COLOR branch of convert_pointer_shape (dxgi.rs:594-627), branch explicitly on mask==0x00 vs mask==0xFF and emit a tracing::warn! once for any other value, matching Apollo's guard, so future cursor-render bugs are observable.
|
||||||
|
|
||||||
@@ -1295,10 +1297,10 @@ punktfunk drives the **raw NVENC API** via `nvidia_video_codec_sdk::{sys, ENCODE
|
|||||||
|
|
||||||
#### Transfer opportunities
|
#### Transfer opportunities
|
||||||
|
|
||||||
- **Add real reference-frame invalidation (RFI) instead of always forcing IDR** (sev high, large) — In nvenc.rs add `maxNumRefFramesInDPB`/`numRefL0=1` to the HEVC/H264/AV1 config in init_session, gate on a new caps query NV_ENC_CAPS_SUPPORT_REF_PIC_INVALIDATION, track last_encoded_frame_index + last_rfi_range, and add an `invalidate_ref_frames(first,last)` method on the Encoder trait (encode.rs:41-51) that calls API.invalidate_ref_frames per index with Apollo's dedup/escalate-to-IDR-on-overflow logic. Wire punktfunk1.rs RFI requests to it, falling back to request_keyframe() only when it returns false.
|
- ✅ **Add real reference-frame invalidation (RFI) instead of always forcing IDR** — SHIPPED (2026-06-20; NVENC impl CI-pending). [#22]
|
||||||
- **Query nvEncGetEncodeCaps and gate config on real GPU capabilities** (sev medium, medium) — Add a `get_cap(cap: NV_ENC_CAPS) -> i32` helper in nvenc.rs after open_encode_session_ex (using API.get_encode_caps), verify codec_guid is in get_encode_guids, reject out-of-range WxH up front, and use SUPPORT_10BIT_ENCODE / SUPPORT_REF_PIC_INVALIDATION / SUPPORT_CUSTOM_VBV_BUF_SIZE to gate the corresponding config rather than assuming support. Surfaces clear errors instead of opaque InvalidParam.
|
- ✅ **Query nvEncGetEncodeCaps and gate config on real GPU capabilities** — SHIPPED (2026-06-20; CI-pending). [#51]
|
||||||
- **Use async encode with a Win32 completion event + timeout** (sev medium, medium) — In nvenc.rs, gate on NV_ENC_CAPS_ASYNC_ENCODE_SUPPORT, create a per-bitstream Win32 Event (windows::Win32::System::Threading::CreateEventW), set init.enableEncodeAsync=1, store the event in `pending`, set pic.completionEvent + lock.doNotWait=1, and in poll() WaitForSingleObject(ev, 100ms) before lock_bitstream — returning a clear timeout error instead of blocking forever.
|
- **Use async encode with a Win32 completion event + timeout** (sev medium, medium) — In nvenc.rs, gate on NV_ENC_CAPS_ASYNC_ENCODE_SUPPORT, create a per-bitstream Win32 Event (windows::Win32::System::Threading::CreateEventW), set init.enableEncodeAsync=1, store the event in `pending`, set pic.completionEvent + lock.doNotWait=1, and in poll() WaitForSingleObject(ev, 100ms) before lock_bitstream — returning a clear timeout error instead of blocking forever.
|
||||||
- **Minimize NvEnc API/struct versions per codec for older-driver compatibility** (sev medium, medium) — Add a `min_api_version(codec)` (v11 for H264/HEVC, v12 for AV1) and a helper that rewrites the version word (and optionally the struct-revision byte) before each NvEnc struct is passed, mirroring nvenc_base.cpp:666-680. Set apiVersion in open_encode_session_ex (nvenc.rs:186) from it. Maximizes driver compatibility for the field.
|
- ⊘ **Minimize NvEnc API/struct versions per codec for older-driver compatibility** — OBSOLETE (2026-06-20; handled by the SDK crate). [#53]
|
||||||
- **Add zeroReorderDelay/lookahead-off/lowDelayKeyFrameScale and always emit SDR VUI** (sev low, small) — In init_session set cfg.rcParams.zeroReorderDelay=1, enableLookahead=0, lowDelayKeyFrameScale=1 right after the CBR/VBV block (nvenc.rs:220-227). Add an SDR VUI branch (BT.709 primaries/transfer/matrix, limited range) alongside the existing HDR branch (:243) so every HEVC/H264 stream signals its colorspace.
|
- **Add zeroReorderDelay/lookahead-off/lowDelayKeyFrameScale and always emit SDR VUI** (sev low, small) — In init_session set cfg.rcParams.zeroReorderDelay=1, enableLookahead=0, lowDelayKeyFrameScale=1 right after the CBR/VBV block (nvenc.rs:220-227). Add an SDR VUI branch (BT.709 primaries/transfer/matrix, limited range) alongside the existing HDR branch (:243) so every HEVC/H264 stream signals its colorspace.
|
||||||
- **Honor client slices-per-frame and offer NVENC intra-refresh** (sev low, medium) — Thread a slices-per-frame value from session negotiation into NvencD3d11Encoder::open and set hevcConfig/h264Config sliceMode=3 + sliceModeData in init_session; for AV1 set numTileRows/numTileColumns as nearest powers of two. Optionally add an intra-refresh config branch gated on NV_ENC_CAPS_SUPPORT_INTRA_REFRESH as an alternative recovery mode to RFI.
|
- **Honor client slices-per-frame and offer NVENC intra-refresh** (sev low, medium) — Thread a slices-per-frame value from session negotiation into NvencD3d11Encoder::open and set hevcConfig/h264Config sliceMode=3 + sliceModeData in init_session; for AV1 set numTileRows/numTileColumns as nearest powers of two. Optionally add an intra-refresh config branch gated on NV_ENC_CAPS_SUPPORT_INTRA_REFRESH as an alternative recovery mode to RFI.
|
||||||
|
|
||||||
@@ -1492,8 +1494,8 @@ punktfunk's SudoVDA backend lives in `crates/punktfunk-host/src/vdisplay/sudovda
|
|||||||
- **Detect watchdog ping failures and escalate (re-open the device)** (sev high, medium) — In the pinger thread in sudovda.rs (around 485-494), track a consecutive-failure counter; after N (3) failures set a shared AtomicBool 'driver_dead' on SudoVdaDisplay/keepalive and stop pinging. Surface it so the session loop in punktfunk1.rs treats a dead virtual display like ACCESS_LOST and re-opens (re-run open_device + re-create). Add a DriverStatus enum mirroring Apollo's DRIVER_STATUS.
|
- **Detect watchdog ping failures and escalate (re-open the device)** (sev high, medium) — In the pinger thread in sudovda.rs (around 485-494), track a consecutive-failure counter; after N (3) failures set a shared AtomicBool 'driver_dead' on SudoVdaDisplay/keepalive and stop pinging. Surface it so the session loop in punktfunk1.rs treats a dead virtual display like ACCESS_LOST and re-opens (re-run open_device + re-create). Add a DriverStatus enum mirroring Apollo's DRIVER_STATUS.
|
||||||
- **Gate on SudoVDA protocol-version compatibility instead of only logging it** (sev medium, small) — In SudoVdaDisplay::new (sudovda.rs:412-432) parse {Major,Minor,Incremental} and compare against a compiled-in EXPECTED_PROTOCOL {Major:0,Minor:2}. If Major differs or our Minor > driver Minor, return Err with a 'driver too old / incompatible — update SudoVDA' message (and a distinct error variant the mgmt API can surface, like Apollo's VirtualDisplayDriverReady in nvhttp.cpp:936).
|
- **Gate on SudoVDA protocol-version compatibility instead of only logging it** (sev medium, small) — In SudoVdaDisplay::new (sudovda.rs:412-432) parse {Major,Minor,Incremental} and compare against a compiled-in EXPECTED_PROTOCOL {Major:0,Minor:2}. If Major differs or our Minor > driver Minor, return Err with a 'driver too old / incompatible — update SudoVDA' message (and a distinct error variant the mgmt API can surface, like Apollo's VirtualDisplayDriverReady in nvhttp.cpp:936).
|
||||||
- **Retry device open with exponential backoff** (sev medium, small) — Wrap open_device in SudoVdaDisplay::new (sudovda.rs:412-413) in a 20→320ms backoff loop matching Apollo; on a session-time re-open after watchdog failure, allow a few retries with ~1s spacing.
|
- **Retry device open with exponential backoff** (sev medium, small) — Wrap open_device in SudoVdaDisplay::new (sudovda.rs:412-413) in a 20→320ms backoff loop matching Apollo; on a session-time re-open after watchdog failure, allow a few retries with ~1s spacing.
|
||||||
- **Add SET_RENDER_ADAPTER (IOCTL 0x802) to bind the IDD render GPU to the capture/encode GPU** (sev high, medium) — Add `const IOCTL_SET_RENDER_ADAPTER: u32 = ctl(0x802);` and a `#[repr(C)] struct SetRenderAdapterParams { luid: LUID }` in sudovda.rs. Before ADD in create() (sudovda.rs:448), enumerate DXGI adapters (reuse capture/dxgi.rs adapter-by-LUID/name helpers) to match the configured/encoder GPU and issue the IOCTL so the IDD's AddOut LUID matches the capture device's adapter.
|
- ✅ **Add SET_RENDER_ADAPTER (IOCTL 0x802) to bind the IDD render GPU to the capture/encode GPU** — SHIPPED (2026-06-20). [#16]
|
||||||
- **Derive a stable per-client MonitorGuid instead of one global constant** (sev medium, medium) — Pass a client/session identifier into create() (thread it from the m3 handshake) and derive the GUID deterministically from it (e.g. hash the client cert fingerprint into a u128), replacing the constant at sudovda.rs:452-456 and the RemoveParams guid at sudovda.rs:568. Keep a fixed probe GUID for the startup encoder probe like Apollo's PROBE_DISPLAY_UUID.
|
- ✅ **Derive a stable per-client MonitorGuid instead of one global constant** — SHIPPED (2026-06-20). [#55]
|
||||||
- **Add millihertz CCD mode-set with ±1 Hz fallback and SDC_SAVE_TO_DATABASE persistence** (sev medium, medium) — In set_active_mode (sudovda.rs:146-265), after the integer DEVMODE attempt add a CCD path: QueryDisplayConfig(QDC_ONLY_ACTIVE_PATHS), match the path by GDI name, set sourceMode width/height and targetInfo.refreshRate = {hz,1000}, and call SetDisplayConfig with SDC_APPLY|SDC_USE_SUPPLIED_DISPLAY_CONFIG|SDC_SAVE_TO_DATABASE. Add an alt-rate (±1) retry mirroring virtual_display.cpp:294-300.
|
- **Add millihertz CCD mode-set with ±1 Hz fallback and SDC_SAVE_TO_DATABASE persistence** (sev medium, medium) — In set_active_mode (sudovda.rs:146-265), after the integer DEVMODE attempt add a CCD path: QueryDisplayConfig(QDC_ONLY_ACTIVE_PATHS), match the path by GDI name, set sourceMode width/height and targetInfo.refreshRate = {hz,1000}, and call SetDisplayConfig with SDC_APPLY|SDC_USE_SUPPLIED_DISPLAY_CONFIG|SDC_SAVE_TO_DATABASE. Add an alt-rate (±1) retry mirroring virtual_display.cpp:294-300.
|
||||||
|
|
||||||
### Windows host: running as SYSTEM, secure-desktop capture, session/desktop switching + D3D recreation, NVIDIA driver prefs (nvprefs), GPU/adapter preference, display isolation, mDNS publish
|
### Windows host: running as SYSTEM, secure-desktop capture, session/desktop switching + D3D recreation, NVIDIA driver prefs (nvprefs), GPU/adapter preference, display isolation, mDNS publish
|
||||||
@@ -1555,24 +1557,24 @@ punktfunk's **secure-desktop / desktop-switch capture recovery is genuinely matu
|
|||||||
|
|
||||||
##### Where punktfunk is weaker / missing / fragile
|
##### Where punktfunk is weaker / missing / fragile
|
||||||
|
|
||||||
1. **No real Windows service — relies on a PsExec scheduled task.** The launch chain is a scheduled task → `PsExec64 -s -i 1` → `wscript.exe launch.vbs` → hidden `host-run.cmd` (`docs/windows-host.md:78-84`). There is **no `SERVICE_CONTROL_SESSIONCHANGE` relaunch** — the doc even lists it as unimplemented "step 6" (`docs/windows-secure-desktop.md:89`). PsExec is a 3rd-party SysInternals tool, not redistributable cleanly, and `-s -i 1` hard-codes session 1. None of the launch scripts (`launch.vbs`, `host-run.cmd`) are checked into the repo (only `scripts/headless/win-build.cmd` exists). This is the single biggest fragility vs Apollo's `sunshinesvc.cpp`.
|
1. **No real Windows service — relies on a PsExec scheduled task.** The launch chain is a scheduled task → `PsExec64 -s -i 1` → `wscript.exe launch.vbs` → hidden `host-run.cmd` (`design/windows-host.md:78-84`). There is **no `SERVICE_CONTROL_SESSIONCHANGE` relaunch** — the doc even lists it as unimplemented "step 6" (`design/archive/windows-secure-desktop.md:89`). PsExec is a 3rd-party SysInternals tool, not redistributable cleanly, and `-s -i 1` hard-codes session 1. None of the launch scripts (`launch.vbs`, `host-run.cmd`) are checked into the repo (only `scripts/headless/win-build.cmd` exists). This is the single biggest fragility vs Apollo's `sunshinesvc.cpp`.
|
||||||
2. **No nvprefs / NvAPI at all.** `grep` for `nvprefs|NvAPI|DRS_|PREFERRED_PSTATE|DXPRESENT` across the host returns nothing. No PREFERRED_PSTATE_MAX for the encoder, no OGL_CPL_PREFER_DXPRESENT (so GL/Vulkan fullscreen apps may not be capturable via WGC/DDA), and no undo-file crash safety.
|
2. **No nvprefs / NvAPI at all.** `grep` for `nvprefs|NvAPI|DRS_|PREFERRED_PSTATE|DXPRESENT` across the host returns nothing. No PREFERRED_PSTATE_MAX for the encoder, no OGL_CPL_PREFER_DXPRESENT (so GL/Vulkan fullscreen apps may not be capturable via WGC/DDA), and no undo-file crash safety.
|
||||||
3. **No DXGI GPU-preference / output-reparenting hook.** No MinHook of `NtGdiDdDDIGetCachedHybridQueryValue`. On a hybrid/Optimus box DXGI can reparent the SudoVDA output onto the render GPU and break DDA. punktfunk's "search all adapters" partly papers over this but does not prevent the reparenting itself.
|
3. **No DXGI GPU-preference / output-reparenting hook.** No MinHook of `NtGdiDdDDIGetCachedHybridQueryValue`. On a hybrid/Optimus box DXGI can reparent the SudoVDA output onto the render GPU and break DDA. punktfunk's "search all adapters" partly papers over this but does not prevent the reparenting itself.
|
||||||
4. **mDNS uses the cross-platform `mdns-sd` crate, not Windows-native `DnsServiceRegister`** (`discovery.rs:17`). It works, but it does NOT carry Apollo's RFC-1035 empty-TXT fix — and the GameStream/Moonlight mDNS path on Windows is unverified (`docs/windows-host.md:46`). A non-RFC-compliant TXT can be rejected by Apple's resolver.
|
4. **mDNS uses the cross-platform `mdns-sd` crate, not Windows-native `DnsServiceRegister`** (`discovery.rs:17`). It works, but it does NOT carry Apollo's RFC-1035 empty-TXT fix — and the GameStream/Moonlight mDNS path on Windows is unverified (`design/windows-host.md:46`). A non-RFC-compliant TXT can be rejected by Apple's resolver.
|
||||||
5. **No stream-start system tuning.** No `NtSetTimerResolution`/`timeBeginPeriod`, no `DwmEnableMMCSS`, no `SetPriorityClass(HIGH_PRIORITY_CLASS)`, no `SetThreadExecutionState(ES_DISPLAY_REQUIRED)`, no WLAN media-streaming mode, no Mouse-Keys-on-headless trick. (Linux has none of this either, but on Windows these are real latency/jitter levers Apollo proves out.)
|
5. **No stream-start system tuning.** No `NtSetTimerResolution`/`timeBeginPeriod`, no `DwmEnableMMCSS`, no `SetPriorityClass(HIGH_PRIORITY_CLASS)`, no `SetThreadExecutionState(ES_DISPLAY_REQUIRED)`, no WLAN media-streaming mode, no Mouse-Keys-on-headless trick. (Linux has none of this either, but on Windows these are real latency/jitter levers Apollo proves out.)
|
||||||
6. **No `factory->IsCurrent()` per-frame check.** punktfunk reacts to errors from `AcquireNextFrame` but does not proactively detect HDR/topology changes the way Apollo does each frame (`display_base.cpp:235`) — it relies on ACCESS_LOST firing, which it usually does, but IsCurrent is the cleaner signal.
|
6. **No `factory->IsCurrent()` per-frame check.** punktfunk reacts to errors from `AcquireNextFrame` but does not proactively detect HDR/topology changes the way Apollo does each frame (`display_base.cpp:235`) — it relies on ACCESS_LOST firing, which it usually does, but IsCurrent is the cleaner signal.
|
||||||
7. **No `is_user_session_locked()` / CCD pre-flight.** Before a mode-set or isolation, Apollo checks `WTSQuerySessionInformationW` + `SetDisplayConfig(SDC_VALIDATE)` (`utils.cpp:184-237`); punktfunk just attempts and handles failure, which can thrash the display during a lock.
|
7. **No `is_user_session_locked()` / CCD pre-flight.** Before a mode-set or isolation, Apollo checks `WTSQuerySessionInformationW` + `SetDisplayConfig(SDC_VALIDATE)` (`utils.cpp:184-237`); punktfunk just attempts and handles failure, which can thrash the display during a lock.
|
||||||
8. **Clock epoch is `SystemTime::now()` (`dxgi.rs:1530`), not `GetSystemTimePreciseAsFileTime`.** The doc itself flags this as a cross-machine-latency risk (`docs/windows-host.md:284-286`); std SystemTime on Windows historically has coarser (~1–15 ms) resolution than the precise FILETIME API, which can corrupt the ClockProbe/ClockEcho skew handshake.
|
8. **Clock epoch is `SystemTime::now()` (`dxgi.rs:1530`), not `GetSystemTimePreciseAsFileTime`.** The doc itself flags this as a cross-machine-latency risk (`design/windows-host.md:284-286`); std SystemTime on Windows historically has coarser (~1–15 ms) resolution than the precise FILETIME API, which can corrupt the ClockProbe/ClockEcho skew handshake.
|
||||||
|
|
||||||
|
|
||||||
#### Transfer opportunities
|
#### Transfer opportunities
|
||||||
|
|
||||||
- **Replace the PsExec scheduled-task launch with a real Windows service that relaunches the host on session change** (sev high, large) — Add a small Rust service binary (new crate or punktfunk-host `service` subcommand) using windows::Win32::System::Services (RegisterServiceCtrlHandlerEx, StartServiceCtrlDispatcher) that mirrors sunshinesvc.cpp: WTSGetActiveConsoleSessionId -> DuplicateTokenEx+SetTokenInformation(TokenSessionId) -> CreateProcessAsUserW(lpDesktop=winsta0\\default) into a kill-on-close job, accept SERVICE_ACCEPT_SESSIONCHANGE, and relaunch the host on a genuine console-session change. Ship an installer and drop the PsExec dependency.
|
- ✅ **Replace the PsExec scheduled-task launch with a real Windows service that relaunches the host on session change** — SHIPPED (2026-06-20). [#24]
|
||||||
- **Add an NvAPI driver-settings manager (PREFERRED_PSTATE_MAX + OGL_CPL_PREFER_DXPRESENT) with a crash-safe undo file** (sev medium, large) — Add a windows-only nvprefs module wrapping NvAPI DRS (load nvapi64 dynamically, treat NvAPI_Initialize failure as 'no NVIDIA, skip'). Create a 'punktfunk' app profile with PREFERRED_PSTATE_PREFER_MAX, set OGL_CPL_PREFER_DXPRESENT_ENABLED on the base profile behind a config flag, write an undo file under %ProgramData%\\punktfunk before global changes, and call it on session start (the new stream_will_start hook below).
|
- **Add an NvAPI driver-settings manager (PREFERRED_PSTATE_MAX + OGL_CPL_PREFER_DXPRESENT) with a crash-safe undo file** (sev medium, large) — Add a windows-only nvprefs module wrapping NvAPI DRS (load nvapi64 dynamically, treat NvAPI_Initialize failure as 'no NVIDIA, skip'). Create a 'punktfunk' app profile with PREFERRED_PSTATE_PREFER_MAX, set OGL_CPL_PREFER_DXPRESENT_ENABLED on the base profile behind a config flag, write an undo file under %ProgramData%\\punktfunk before global changes, and call it on session start (the new stream_will_start hook below).
|
||||||
- **Hook win32u!NtGdiDdDDIGetCachedHybridQueryValue to stop DXGI output-reparenting on hybrid/Optimus GPUs** (sev medium, medium) — Add a once-init in the Windows capture path (capture/dxgi.rs open) that installs the same hook via a minhook-rs/detour crate (or a manual IAT/inline hook) on NtGdiDdDDIGetCachedHybridQueryValue forcing STATE_UNSPECIFIED, plus SetProcessDpiAwarenessContext(PER_MONITOR_AWARE_V2). Gate it to NVIDIA/hybrid boxes; it's process-lifetime so no teardown needed.
|
- ✅ **Hook win32u!NtGdiDdDDIGetCachedHybridQueryValue to stop DXGI output-reparenting on hybrid/Optimus GPUs** — SHIPPED (2026-06-20). [#57]
|
||||||
- **Add a Windows stream_will_start/stop hook: timer resolution, MMCSS, HIGH_PRIORITY_CLASS, display-required, headless Mouse Keys** (sev medium, medium) — Add a windows-only RAII guard invoked when a session starts (punktfunk1.rs/pipeline session setup) that raises timer resolution (NtSetTimerResolution or timeBeginPeriod(1)), DwmEnableMMCSS(true), SetPriorityClass(HIGH_PRIORITY_CLASS), and wraps the DXGI capture loop in SetThreadExecutionState(ES_CONTINUOUS|ES_DISPLAY_REQUIRED) (capture/dxgi.rs next_frame loop), reverting on drop. Optionally the headless Mouse-Keys trick for cursor visibility.
|
- **Add a Windows stream_will_start/stop hook: timer resolution, MMCSS, HIGH_PRIORITY_CLASS, display-required, headless Mouse Keys** (sev medium, medium) — Add a windows-only RAII guard invoked when a session starts (punktfunk1.rs/pipeline session setup) that raises timer resolution (NtSetTimerResolution or timeBeginPeriod(1)), DwmEnableMMCSS(true), SetPriorityClass(HIGH_PRIORITY_CLASS), and wraps the DXGI capture loop in SetThreadExecutionState(ES_CONTINUOUS|ES_DISPLAY_REQUIRED) (capture/dxgi.rs next_frame loop), reverting on drop. Optionally the headless Mouse-Keys trick for cursor visibility.
|
||||||
- **Use Windows-native DnsServiceRegister (or fix the TXT record) so Apple's mDNS resolver accepts the host** (sev low, medium) — Either (a) verify mdns-sd always emits an RFC-1035-valid TXT (never zero strings) and add a regression test, or (b) add a windows-only discovery backend using DnsServiceRegister via the windows crate's DNS APIs mirroring publish.cpp, including the single-empty-TXT workaround, so Apple NWBrowser/Moonlight discover the host reliably.
|
- ✅ **Use Windows-native DnsServiceRegister (or fix the TXT record) so Apple's mDNS resolver accepts the host** — SHIPPED (2026-06-20). [#87]
|
||||||
- **Add per-frame IDXGIFactory::IsCurrent reinit detection and switch the host clock to GetSystemTimePreciseAsFileTime** (sev medium, small) — In capture/dxgi.rs next_frame, query the cached IDXGIFactory's IsCurrent() once per loop and trigger the existing recreate path when it goes false (catches HDR/topology changes cleanly). Replace now_ns() on Windows with GetSystemTimePreciseAsFileTime converted to Unix-epoch ns so ClockProbe/ClockEcho skew correction stays accurate cross-machine.
|
- ✅ **Add per-frame IDXGIFactory::IsCurrent reinit detection and switch the host clock to GetSystemTimePreciseAsFileTime** — SHIPPED (2026-06-20). [#42]
|
||||||
|
|
||||||
### Completeness critic — areas flagged as under-covered
|
### Completeness critic — areas flagged as under-covered
|
||||||
|
|
||||||
@@ -1769,18 +1771,10 @@ GameStream `SO_SNDBUF`), **#8** (move GameStream input injection off the ENet se
|
|||||||
|
|
||||||
|
|
||||||
#### 1. Switch SendInput to retry-on-failure desktop reattach (drop per-event OpenInputDesktop)
|
#### 1. Switch SendInput to retry-on-failure desktop reattach (drop per-event OpenInputDesktop)
|
||||||
*Area:* `cmp:input` · *Windows-host:* yes · *Severity:* high · *Effort:* small
|
✅ **SHIPPED (2026-06-20)** — per-event OpenInputDesktop dropped for inject-first + retry-on-failure desktop reattach.
|
||||||
|
|
||||||
- **Apollo does:** send_input() / inject_synthetic_pointer_input() call SendInput FIRST, and only on failure (0 injected) re-run syncThreadDesktop() (OpenInputDesktop(DF_ALLOWOTHERACCOUNTHOOK)+SetThreadDesktop) and retry once, tracking the desktop in a thread_local _lastKnownInputDesktop — src/platform/windows/input.cpp:477,499 + src/platform/windows/misc.cpp:251
|
|
||||||
- **punktfunk gap:** SendInputInjector::inject() calls reattach_input_desktop() (an OpenInputDesktop+SetThreadDesktop+CloseDesktop) at the TOP of EVERY event — crates/punktfunk-host/src/inject/sendinput.rs:97,50-69. This is a syscall triple per mouse-move; punktfunk's own docs/windows-secure-desktop.md:78-80 lists this exact refactor (step 2) as planned but unshipped.
|
|
||||||
- **Proposal:** Inject first; cache the HDESK thread-local; only on a 0/partial SendInput result call reattach_input_desktop() and retry once. Use DF_ALLOWOTHERACCOUNTHOOK in the OpenInputDesktop access (sendinput.rs:52-56 currently passes DESKTOP_CONTROL_FLAGS(0)) so the secure desktop is reachable. Keeps the steady-state hot path to a single SendInput call.
|
|
||||||
|
|
||||||
#### 2. Detect resolution/format change on the acquire hot path, not only during rebuild
|
#### 2. Detect resolution/format change on the acquire hot path, not only during rebuild
|
||||||
*Area:* `win:capture-dxgi-dd` · *Windows-host:* yes · *Severity:* high · *Effort:* small
|
✅ **SHIPPED (2026-06-20)** — acquire-path GetDesc check now catches resolution/format changes that don't raise ACCESS_LOST.
|
||||||
|
|
||||||
- **Apollo does:** Every frame Apollo reads src->GetDesc() and reinits if desc.Width/Height != width_before_rotation/height_before_rotation or capture_format != desc.Format (display_vram.cpp:1215-1236, display_ram.cpp:253-265, wgc 1662-1674).
|
|
||||||
- **punktfunk gap:** punktfunk only re-reads dimensions inside recreate_dupl (dxgi.rs:1298-1313). On the normal acquire path (dxgi.rs:1426-1492) it never validates the acquired texture's desc, so a mode change that doesn't raise ACCESS_LOST leads to CopyResource of a mismatched-size/format source into a stale gpu_copy/staging/fp16_src — silent corruption or a hard copy failure.
|
|
||||||
- **Proposal:** In acquire(), after res.cast::<ID3D11Texture2D>(), call GetDesc and compare Width/Height/Format against self.width/height and the expected format (BGRA8 vs R16G16B16A16_FLOAT). On mismatch, ReleaseFrame and run the existing recreate_dupl path (or drop gpu_copy/staging/fp16/hdr10 textures and update width/height/hdr_fp16) so the encoder re-inits cleanly. This makes live resolution + HDR-toggle changes robust even when DDA doesn't fault.
|
|
||||||
|
|
||||||
#### 3. Per-frame IsCurrent() check to catch HDR/GPU/mode changes
|
#### 3. Per-frame IsCurrent() check to catch HDR/GPU/mode changes
|
||||||
*Area:* `win:capture-wgc` · *Windows-host:* yes · *Severity:* high · *Effort:* small
|
*Area:* `win:capture-wgc` · *Windows-host:* yes · *Severity:* high · *Effort:* small
|
||||||
@@ -1790,36 +1784,13 @@ GameStream `SO_SNDBUF`), **#8** (move GameStream input injection off the ENet se
|
|||||||
- **Proposal:** Hold an IDXGIFactory1 in WgcCapturer (from the same adapter as make_device) and call IsCurrent() at the top of next_frame/wait_and_drain; on false, return the reinit signal. This pairs with wgc-size-format-reinit to give a complete change-detection story.
|
- **Proposal:** Hold an IDXGIFactory1 in WgcCapturer (from the same adapter as make_device) and call IsCurrent() at the top of next_frame/wait_and_drain; on false, return the reinit signal. This pairs with wgc-size-format-reinit to give a complete change-detection story.
|
||||||
|
|
||||||
#### 4. Batched/GSO send for the GameStream video plane on Windows
|
#### 4. Batched/GSO send for the GameStream video plane on Windows
|
||||||
*Area:* `cmp:protocol-streaming` · *Windows-host:* yes · *Severity:* high · *Effort:* medium · **✓ verified · ✅ DONE (2026-06-16)**
|
✅ **SHIPPED (2026-06-16)** — Windows USO batched send for the GameStream video plane via the reusable `punktfunk_core::transport::send_uso_all` helper (one WSASendMsg per 16-packet paced burst, PUNKTFUNK_GSO=0 kill-switch + auto-fallback); Host Windows compile CI-pending.
|
||||||
|
|
||||||
> **Resolution:** Implemented per the refined proposal. Added a reusable Windows-only
|
|
||||||
> `punktfunk_core::transport::send_uso_all(&UdpSocket, &[&[u8]]) -> io::Result<usize>` that reuses the
|
|
||||||
> native plane's proven `send_one_uso` + `uso` on/off latch + `uso_unsupported`, with the same
|
|
||||||
> uniform-size guard and ≤512-segment chunking. `gamestream/stream.rs` `sendmmsg_all` now has a
|
|
||||||
> `#[cfg(target_os="windows")]` arm that calls it per 16-packet paced burst (one `WSASendMsg` instead
|
|
||||||
> of 16 `send`s) and sends any remainder scalar; the Linux `sendmmsg` arm and a generic scalar arm are
|
|
||||||
> unchanged. PUNKTFUNK_GSO=0 kill-switch + auto-fallback inherited. Linux build unaffected;
|
|
||||||
> punktfunk-core type-checks for x86_64-pc-windows-msvc. Host Windows compile deferred to CI/dev box.
|
|
||||||
|
|
||||||
- **Apollo does:** Apollo sends every plane through platf::send_batch / send (one code path for all OSes; on Windows it uses real batched socket writes), and the video broadcast thread is the single transmit path (stream.cpp:1327, send batching at stream.cpp:1337 send_batch latency logger).
|
|
||||||
- **punktfunk gap:** The GameStream video sender's batched path is Linux-only: sendmmsg_all has a #[cfg(target_os="linux")] real implementation (stream.rs:147) and a #[cfg(not(target_os="linux"))] fallback that does one sock.send() per packet (stream.rs:185-191). On a Windows GameStream-compat host (capture IS wired for Windows via DXGI/WGC, capture.rs:261) every video datagram is an individual syscall — the native punktfunk/1 plane got Windows USO (transport/udp.rs:135) but the GameStream plane did not.
|
|
||||||
- **Proposal:** Route the GameStream video send thread through the same Windows WSASendMsg/USO + WSASend-batch path the native plane already implements in punktfunk-core transport/udp.rs (or factor that send helper into a shared module and call it from gamestream/stream.rs). Keeps GameStream-on-Windows from being syscall-bound at high bitrate.
|
|
||||||
- **Verify verdict:** `confirmed_gap` — PUNKTFUNK gap is real. The GameStream video send path uses a private `sendmmsg_all`: real `sendmmsg` only under `#[cfg(target_os="linux")]` (crates/punktfunk-host/src/gamestream/stream.rs:147-181), and a `#[cfg(not(target_os="linux"))]` fallback that does one `sock.send(p)` per packet (stream.rs:185-191). The paced sender calls it in PACE_CHUNK=16 bursts (stream.rs:230). It operates on a raw `std::net::UdpSocket` (stream.rs:66, cloned at :310), NOT the core `Transport` trait, so it does NOT pick up the native plane's USO. The GameStream host genuinely runs on Windows: `serve`/`gamestream` are not OS-gated (main.rs:81-83 dispatch is uncfg'd; gamestream/mod.rs declares `mod stream;` with no cfg), capture is wired for Windows (capture.rs:261-279 `capture_virtual_output` via SudoVDA+WGC/DXGI), and the module has explicit Windows handling (gamestream/mod.rs:209-210 APPDATA, :216-217 COMPUTERNAME). So on a Windows GameStream-compat host every video datagram is its own syscall. Meanwhile the native plane already has the answer: crates/punktfunk-core/src/transport/udp.rs:141-246 (`uso` state + `send_one_uso` via `WSASendMsg`+`UDP_SEND_MSG_SIZE`), wired default-on at udp.rs:610-647 (`send_gso`), called by session.rs:182. Also note GameStream video datagrams are uniform `blocksize` (= packet_size+16): data shards, the zero-padded last data shard, and FEC parity shards are all full blocksize (gamestream/video.rs:41-42,76,111-166) — the exact uniform-size precondition USO/GSO needs. APOLLO confirms the claimed unified path: `platf::send_batch` (src/platform/common.h:697) is the single video transmit call (src/stream.cpp:1598, in videoBroadcastThread, latency-logged at stream.cpp:1337); its Windows impl is real USO — `WSASendMsg` with a `UDP_SEND_MSG_SIZE` cmsg of `header_size+payload_size` (src/platform/windows/misc.cpp:1408,1499,1508), with a per-packet `send()` fallback (misc.cpp:1510-1587) "if USO is not supported ... caller will fall back to unbatched sends" (misc.cpp:1504-1505).
|
|
||||||
- **Refined:** Route the GameStream Windows video send through USO instead of per-packet `send`. Do NOT duplicate the WSASendMsg code — factor the native plane's USO helper out of `UdpTransport`. Extract `send_one_uso` + the `uso` enable/latch state + `uso_unsupported` + the uniform-size chunking loop (currently udp.rs:185-246 and the `send_gso` Windows body udp.rs:610-647) into a small `pub(crate)` free function in punktfunk-core, e.g. `transport::udp::send_packets_uso(socket: &UdpSocket, packets: &[&[u8]]) -> io::Result<usize>` that takes a raw connected `std::net::UdpSocket` (the GameStream sender already owns one) and applies USO with the same default-on + auto-fallback-to-per-packet + PUNKTFUNK_GSO=0 kill-switch semantics. Then rewrite gamestream/stream.rs `sendmmsg_all` so the `#[cfg(target_os="windows")]` arm calls that helper (the Linux arm keeps its sendmmsg; a `not(any(linux,windows))` arm keeps the scalar loop). GameStream packets are already uniform blocksize per the packetizer, so the USO uniform-size guard passes; the existing PACE_CHUNK=16 microburst pacing is unaffected (each chunk becomes one WSASendMsg). Add a Linux GSO arm too while there (same helper pattern) for parity, but USO/Windows is the point of this item. Keep the change inside punktfunk-core for the helper (one core, C-ABI-stable — no new public ABI surface needed, it's pub(crate)) and a ~10-line edit in the host. This respects: no async on frame path (native sockets only), no protocol change, no scaling change.
|
|
||||||
|
|
||||||
#### 5. Gate the GameStream HTTPS plane on the paired-cert allow-list
|
#### 5. Gate the GameStream HTTPS plane on the paired-cert allow-list
|
||||||
*Area:* `cmp:gamestream-http-pairing` · *Windows-host:* yes · *Severity:* high · *Effort:* medium
|
✅ **SHIPPED (2026-06-20)** — gamestream/tls.rs surfaces the verified peer cert (PeerCertFingerprint) and nvhttp.rs gates /launch /resume /applist /cancel on the paired-fingerprint set (closes the "any TLS client can launch" hole).
|
||||||
|
|
||||||
- **Apollo does:** Apollo defers TLS verification (nvhttp.cpp:88 sets verify_peer|verify_fail_if_no_peer_cert with a permissive OpenSSL cb, then the accept() override runs cert_chain.verify() post-handshake and stashes the matched named_cert_t into request->userp; every authenticated handler calls get_verified_cert(request) — nvhttp.cpp:665-667,915,1086,1172,1360 — so an unpaired cert is rejected with a proper XML body, not just accepted).
|
|
||||||
- **punktfunk gap:** punktfunk pins the client cert at pairing (pairing.rs:230-236) and loads it into AppState.paired (mod.rs:134) but NEVER consults it: tls.rs:38-45 verify_client_cert always returns assertion(), and /launch (nvhttp.rs:87-109) does no identity check. Any client that completed a TLS handshake — paired or not — can launch a session.
|
|
||||||
- **Proposal:** After the handshake, recover the peer cert (axum_server exposes the rustls connection / peer certs), SHA-256 it, and check it against AppState.paired in /launch, /resume, /applist, /cancel (and reflect the real result in serverinfo PairStatus). Keep verify_client_cert lenient for the handshake but reject unpaired identities at the handler with an XML error, mirroring Apollo's get_verified_cert pattern. This is the single highest-value GameStream-compat hardening item and applies equally to the Windows host.
|
|
||||||
|
|
||||||
#### 6. Query NVENC encode capabilities before init and degrade gracefully
|
#### 6. Query NVENC encode capabilities before init and degrade gracefully
|
||||||
*Area:* `cmp:video-encode` · *Windows-host:* yes · *Severity:* high · *Effort:* medium
|
✅ **SHIPPED (2026-06-20)** — encode/nvenc.rs query_caps probes nvEncGetEncodeCaps and degrades gracefully (over-range reject, 10-bit→8-bit fallback, custom-VBV gate, RFI flag); Windows compile CI-pending.
|
||||||
|
|
||||||
- **Apollo does:** nvenc_base.cpp:175-220 builds a get_encoder_cap lambda over nvEncGetEncodeCaps and checks NV_ENC_CAPS_WIDTH_MAX/HEIGHT_MAX (rejects with a clear message), SUPPORT_10BIT_ENCODE, SUPPORT_YUV444_ENCODE, SUPPORT_REF_PIC_INVALIDATION (toggles encoder_params.rfi), SUPPORT_CUSTOM_VBV_BUF_SIZE (nvenc_base.cpp:250-255), SUPPORT_CABAC (nvenc_base.cpp:311-315), SUPPORT_WEIGHTED_PREDICTION (nvenc_base.cpp:220), and SUPPORT_INTRA_REFRESH/SINGLE_SLICE_INTRA_REFRESH (nvenc_base.cpp:334-345). Each missing cap downgrades a feature instead of failing.
|
|
||||||
- **punktfunk gap:** crates/punktfunk-host/src/encode/nvenc.rs:131-323 init_session never calls nvEncGetEncodeCaps. Max W/H is only checked against a static per-codec constant (encode.rs:57-62) not the GPU's real cap; 10-bit Main10 is forced (nvenc.rs:233-237) without checking SUPPORT_10BIT_ENCODE; custom VBV (nvenc.rs:224-227) is set without checking SUPPORT_CUSTOM_VBV_BUF_SIZE. On an unsupported card these surface as opaque InvalidParam handled only by bitrate step-down, which masks the real cause.
|
|
||||||
- **Proposal:** Add a caps query in NvencD3d11Encoder::init_session right after open_encode_session_ex: build a get_cap(NV_ENC_CAPS) helper over nvEncGetEncodeCaps, validate encodeWidth/Height against WIDTH_MAX/HEIGHT_MAX with a clear error, gate the 10-bit path on SUPPORT_10BIT_ENCODE (fall back to 8-bit with a warning instead of failing), gate custom VBV on SUPPORT_CUSTOM_VBV_BUF_SIZE, and record an rfi-supported flag for the RFI work below.
|
|
||||||
|
|
||||||
#### 7. Detect default-render-device changes and reinit WASAPI capture
|
#### 7. Detect default-render-device changes and reinit WASAPI capture
|
||||||
*Area:* `cmp:audio` · *Windows-host:* yes · *Severity:* high · *Effort:* medium
|
*Area:* `cmp:audio` · *Windows-host:* yes · *Severity:* high · *Effort:* medium
|
||||||
@@ -1829,11 +1800,7 @@ GameStream `SO_SNDBUF`), **#8** (move GameStream input injection off the ENet se
|
|||||||
- **Proposal:** In wasapi_cap.rs, register a device-notification callback on the DeviceEnumerator; on default-render change, break the capture loop and reopen get_default_device(Render) + a fresh loopback IAudioClient (re-running the init block at wasapi_cap.rs:105-133). Surface it through the existing thread without tearing down the WasapiLoopbackCapturer handle so the session keeps streaming.
|
- **Proposal:** In wasapi_cap.rs, register a device-notification callback on the DeviceEnumerator; on default-render change, break the capture loop and reopen get_default_device(Render) + a fresh loopback IAudioClient (re-running the init block at wasapi_cap.rs:105-133). Surface it through the existing thread without tearing down the WasapiLoopbackCapturer handle so the session keeps streaming.
|
||||||
|
|
||||||
#### 8. Move GameStream input injection off the ENet service thread
|
#### 8. Move GameStream input injection off the ENet service thread
|
||||||
*Area:* `cmp:input` · *Windows-host:* yes · *Severity:* high · *Effort:* medium
|
✅ **SHIPPED (2026-06-20)** — on_receive forwards to a shared crate::inject InjectorService thread (+ relative-mouse/scroll coalescing, #45); the ENet thread no longer blocks on injection.
|
||||||
|
|
||||||
- **Apollo does:** The control thread only enqueues bytes + schedules a task; a pool thread pops one packet, batches later same-type packets while holding the queue lock, then RELEASES the lock before the (slow) SendInput/ViGEm call — src/input.cpp:1481-1520, 1639-1643. A slow OS input call never stalls the network thread.
|
|
||||||
- **punktfunk gap:** on_receive() calls inj.inject(&ev) synchronously inside the host.service() ENet loop — crates/punktfunk-host/src/gamestream/control.rs:84-91,207-211. A SendInput that blocks crossing a desktop switch (or a slow ViGEm update) head-blocks ENet handshake/keepalive/retransmit servicing. The m3 path already does this right (punktfunk1.rs:1300 → injector_service_thread).
|
|
||||||
- **Proposal:** Mirror the m3 design in the GameStream control thread: push decoded InputEvents onto an mpsc channel drained by a dedicated injector thread (reuse injector_service_thread or a sibling), so the ENet thread never blocks on SendInput/ViGEm. No async needed — native thread + std::sync::mpsc, consistent with the invariant.
|
|
||||||
|
|
||||||
#### 9. Actually launch the app/game on Windows (CreateProcessAsUserW into the user session)
|
#### 9. Actually launch the app/game on Windows (CreateProcessAsUserW into the user session)
|
||||||
*Area:* `cmp:process-launch` · *Windows-host:* yes · *Severity:* high · *Effort:* medium
|
*Area:* `cmp:process-launch` · *Windows-host:* yes · *Severity:* high · *Effort:* medium
|
||||||
@@ -1846,7 +1813,7 @@ GameStream `SO_SNDBUF`), **#8** (move GameStream input injection off the ENet se
|
|||||||
*Area:* `cmp:config-management` · *Windows-host:* yes · *Severity:* high · *Effort:* medium
|
*Area:* `cmp:config-management` · *Windows-host:* yes · *Severity:* high · *Effort:* medium
|
||||||
|
|
||||||
- **Apollo does:** system_tray.cpp builds a single static tray struct with a menu (Open/Force-stop/Reset-display/Restart/Quit, l.112-141) and pushes state changes from the streaming pipeline — update_tray_playing/pausing/stopped/launch_error/require_pin/paired/client_connected (l.238-412) each swap the icon + raise a balloon notification; init_tray hardens the thread DACL so the icon survives running as SYSTEM (l.143-204); a 50 ms polling thread drives it (tray_thread_worker l.415).
|
- **Apollo does:** system_tray.cpp builds a single static tray struct with a menu (Open/Force-stop/Reset-display/Restart/Quit, l.112-141) and pushes state changes from the streaming pipeline — update_tray_playing/pausing/stopped/launch_error/require_pin/paired/client_connected (l.238-412) each swap the icon + raise a balloon notification; init_tray hardens the thread DACL so the icon survives running as SYSTEM (l.143-204); a 50 ms polling thread drives it (tray_thread_worker l.415).
|
||||||
- **punktfunk gap:** No tray code exists anywhere in crates/punktfunk-host (grep for tray/notify-rust/balloon returns nothing). On Windows the host runs windowless as SYSTEM in Session 1 via external scripts (docs/windows-host.md:77-84) with the only operator feedback being a redirected log file — there is no visible, clickable status/control surface for a desktop user.
|
- **punktfunk gap:** No tray code exists anywhere in crates/punktfunk-host (grep for tray/notify-rust/balloon returns nothing). On Windows the host runs windowless as SYSTEM in Session 1 via external scripts (design/windows-host.md:77-84) with the only operator feedback being a redirected log file — there is no visible, clickable status/control surface for a desktop user.
|
||||||
- **Proposal:** Add an optional system-tray plane behind a feature/flag using a Rust tray crate (e.g. tray-icon) spawned on its own native thread (no async on the per-frame path). Drive it from the existing AppState atomics/locks already exposed by mgmt.rs get_status (streaming/audio_streaming/pin_pending/session) — poll or push on state change to swap icon + show balloons (connected, pairing PIN, launch error). Menu items call the SAME primitives the API uses (stop_session, force_idr, native arm-pairing, quit). On Windows replicate Apollo's thread-DACL hardening so the icon shows when launched as SYSTEM in the interactive session.
|
- **Proposal:** Add an optional system-tray plane behind a feature/flag using a Rust tray crate (e.g. tray-icon) spawned on its own native thread (no async on the per-frame path). Drive it from the existing AppState atomics/locks already exposed by mgmt.rs get_status (streaming/audio_streaming/pin_pending/session) — poll or push on state change to swap icon + show balloons (connected, pairing PIN, launch error). Menu items call the SAME primitives the API uses (stop_session, force_idr, native arm-pairing, quit). On Windows replicate Apollo's thread-DACL hardening so the icon shows when launched as SYSTEM in the interactive session.
|
||||||
|
|
||||||
#### 11. Treat S_OK-with-no-change frames as timeouts via DXGI update flags
|
#### 11. Treat S_OK-with-no-change frames as timeouts via DXGI update flags
|
||||||
@@ -1864,18 +1831,7 @@ GameStream `SO_SNDBUF`), **#8** (move GameStream input injection off the ENet se
|
|||||||
- **Proposal:** In WgcCapturer::process_frame, call src.GetDesc() and compare Width/Height/Format against self.width/height and the expected format. On mismatch, return a Reinit error (add a capture_e::Reinit-equivalent to the Capturer contract or bail with a recognizable error the m3/stream loop maps to a capturer rebuild). Drop and re-create fp16_src/hdr10_out/bgra_copy when size changes.
|
- **Proposal:** In WgcCapturer::process_frame, call src.GetDesc() and compare Width/Height/Format against self.width/height and the expected format. On mismatch, return a Reinit error (add a capture_e::Reinit-equivalent to the Capturer contract or bail with a recognizable error the m3/stream loop maps to a capturer rebuild). Drop and re-create fp16_src/hdr10_out/bgra_copy when size changes.
|
||||||
|
|
||||||
#### 13. Split every cursor shape into an alpha image + an XOR image (two-pass composite)
|
#### 13. Split every cursor shape into an alpha image + an XOR image (two-pass composite)
|
||||||
*Area:* `win:cursor-compositing` · *Windows-host:* yes · *Severity:* high · *Effort:* medium · **✅ DONE (2026-06-16)**
|
✅ **SHIPPED (2026-06-16)** — two-pass cursor composite in capture/dxgi.rs (CursorShape alpha/xor layers, CursorCompositor draw_layer; MASKED_COLOR→alpha, MONOCHROME (1,1)→XOR; cursor_invert flag removed). Windows CI/dev-VM compile pending.
|
||||||
|
|
||||||
> **Resolution:** Implemented in `capture/dxgi.rs`. `convert_pointer_shape` now returns a `CursorShape`
|
|
||||||
> with optional `alpha`/`xor` layers; `CursorCompositor` holds `tex_alpha`/`tex_xor` and `draw_layer`
|
|
||||||
> renders each with its own blend (alpha = src-over + HDR scale; XOR = inversion, unscaled). MASKED_COLOR
|
|
||||||
> opaque pixels now go through the alpha pass (not the invert blend), and MONOCHROME `(1,1)` invert pixels
|
|
||||||
> now feed the XOR layer (previously approximated as solid black). CPU path blends both layers too.
|
|
||||||
> The `cursor_invert` flag was removed. Independently reviewed (ship); pending Windows CI/dev-VM compile.
|
|
||||||
|
|
||||||
- **Apollo does:** Apollo emits two BGRA images per shape — make_cursor_alpha_image (display_vram.cpp:279) and make_cursor_xor_image (display_vram.cpp:210) — and runs both an alpha-blend pass and an invert-blend pass in blend_cursor (display_vram.cpp:1448-1469), each skipped if its image is empty. MASKED_COLOR and MONOCHROME shapes legitimately need both.
|
|
||||||
- **punktfunk gap:** convert_pointer_shape (dxgi.rs:566) produces ONE image and cursor_invert (dxgi.rs:1133-1134) picks ONE blend for the whole shape, so a cursor mixing opaque and screen-inverting pixels (common I-beams and themed arrows) renders wrong; masked-color opaque pixels are even forced through the invert blend (dxgi.rs:612-624 + 1205).
|
|
||||||
- **Proposal:** Refactor convert_pointer_shape in dxgi.rs to return two optional images (alpha, xor) mirroring Apollo's split. Store cursor_shape as Option<(alpha, xor)>, upload up to two SRVs in CursorCompositor, and in composite_cursor_gpu run the alpha pass with self.blend then the xor pass with self.blend_invert (skip empties). Drop the single cursor_invert flag.
|
|
||||||
|
|
||||||
#### 14. Map absolute mouse through the real virtual-desktop / output rect, not a blind 0..65535 normalize
|
#### 14. Map absolute mouse through the real virtual-desktop / output rect, not a blind 0..65535 normalize
|
||||||
*Area:* `win:input-sendinput-vigem` · *Windows-host:* yes · *Severity:* high · *Effort:* medium
|
*Area:* `win:input-sendinput-vigem` · *Windows-host:* yes · *Severity:* high · *Effort:* medium
|
||||||
@@ -1892,11 +1848,7 @@ GameStream `SO_SNDBUF`), **#8** (move GameStream input injection off the ENet se
|
|||||||
- **Proposal:** In the pinger thread in sudovda.rs (around 485-494), track a consecutive-failure counter; after N (3) failures set a shared AtomicBool 'driver_dead' on SudoVdaDisplay/keepalive and stop pinging. Surface it so the session loop in punktfunk1.rs treats a dead virtual display like ACCESS_LOST and re-opens (re-run open_device + re-create). Add a DriverStatus enum mirroring Apollo's DRIVER_STATUS.
|
- **Proposal:** In the pinger thread in sudovda.rs (around 485-494), track a consecutive-failure counter; after N (3) failures set a shared AtomicBool 'driver_dead' on SudoVdaDisplay/keepalive and stop pinging. Surface it so the session loop in punktfunk1.rs treats a dead virtual display like ACCESS_LOST and re-opens (re-run open_device + re-create). Add a DriverStatus enum mirroring Apollo's DRIVER_STATUS.
|
||||||
|
|
||||||
#### 16. Add SET_RENDER_ADAPTER (IOCTL 0x802) to bind the IDD render GPU to the capture/encode GPU
|
#### 16. Add SET_RENDER_ADAPTER (IOCTL 0x802) to bind the IDD render GPU to the capture/encode GPU
|
||||||
*Area:* `win:virtual-display-sudovda` · *Windows-host:* yes · *Severity:* high · *Effort:* medium
|
✅ **SHIPPED (2026-06-20)** — SET_RENDER_ADAPTER (IOCTL 0x802) now binds the IDD render GPU to the capture/encode adapter on hybrid/multi-GPU boxes.
|
||||||
|
|
||||||
- **Apollo does:** setRenderAdapterByName enumerates DXGI adapters, matches desc.Description, and issues SET_RENDER_ADAPTER with that adapter's LUID before every create (virtual_display.cpp:624-654, sudovda.h:109-128, called at main.cpp:369-371 and process.cpp:250-252).
|
|
||||||
- **punktfunk gap:** punktfunk defines no IOCTL_SET_RENDER_ADAPTER and never binds the render adapter (sudovda.rs:47-54). On a hybrid/multi-GPU box the IDD may render on the iGPU while NVENC + Desktop Duplication run on the dGPU, breaking or slowing zero-copy.
|
|
||||||
- **Proposal:** Add `const IOCTL_SET_RENDER_ADAPTER: u32 = ctl(0x802);` and a `#[repr(C)] struct SetRenderAdapterParams { luid: LUID }` in sudovda.rs. Before ADD in create() (sudovda.rs:448), enumerate DXGI adapters (reuse capture/dxgi.rs adapter-by-LUID/name helpers) to match the configured/encoder GPU and issue the IOCTL so the IDD's AddOut LUID matches the capture device's adapter.
|
|
||||||
|
|
||||||
#### 17. Add streaming_will_start/stop session-level latency tuning on Windows
|
#### 17. Add streaming_will_start/stop session-level latency tuning on Windows
|
||||||
*Area:* `win:critic` · *Windows-host:* yes · *Severity:* high · *Effort:* medium
|
*Area:* `win:critic` · *Windows-host:* yes · *Severity:* high · *Effort:* medium
|
||||||
@@ -1913,43 +1865,16 @@ GameStream `SO_SNDBUF`), **#8** (move GameStream input injection off the ENet se
|
|||||||
- **Proposal:** On the capture thread, register an IMMNotificationClient (or poll GetDefaultAudioEndpoint) and treat a default-render change OR a device-invalidated error as a re-open: tear down the IAudioClient and re-acquire the new default endpoint in-place, like the Linux PipeWire reconnect discipline. Lives entirely in audio/wasapi_cap.rs
|
- **Proposal:** On the capture thread, register an IMMNotificationClient (or poll GetDefaultAudioEndpoint) and treat a default-render change OR a device-invalidated error as a re-open: tear down the IAudioClient and re-acquire the new default endpoint in-place, like the Linux PipeWire reconnect discipline. Lives entirely in audio/wasapi_cap.rs
|
||||||
|
|
||||||
#### 19. Implement true reference-frame invalidation with a multi-ref DPB instead of always-full-IDR
|
#### 19. Implement true reference-frame invalidation with a multi-ref DPB instead of always-full-IDR
|
||||||
*Area:* `cmp:video-encode` · *Windows-host:* yes · *Severity:* high · *Effort:* large
|
✅ **SHIPPED (2026-06-20)** — Encoder::invalidate_ref_frames added (Windows NVENC multi-ref DPB + nvEncInvalidateRefFrames; GameStream 0x0301 routes to invalidate); Linux degrades to IDR; NVENC impl CI-pending. See also #22.
|
||||||
|
|
||||||
- **Apollo does:** nvenc_base.cpp:268-281 sets maxNumRefFrames/maxNumRefFramesInDPB to 5 (HEVC/H264) and L0 to 1, enabling a deep DPB; invalidate_ref_frames (nvenc_base.cpp:574-610) calls nvEncInvalidateRefFrames per lost frame range, dedupes already-done ranges, falls back to IDR only when the range exceeds the DPB, and sets rfi_needs_confirmation so the next encoded frame is marked as the RFI fulfilment (nvenc_base.cpp:551-557, 490-491).
|
|
||||||
- **punktfunk gap:** crates/punktfunk-host/src/encode/nvenc.rs leaves ref frames at the preset default and exposes only request_keyframe (nvenc.rs:465-467) which always emits a full FORCE_IDR. gamestream/control.rs:163-177 collapses both RFI (0x0301) and request-IDR (0x0302) into the same full-IDR. A full IDR at high resolution is the multi-millisecond spike punktfunk's own infinite-GOP comments call out (linux.rs:197-201) — true RFI avoids it for recoverable loss.
|
|
||||||
- **Proposal:** Extend the Encoder trait with an invalidate_ref_frames(first,last) method (default: fall back to request_keyframe). In the Windows NVENC config set maxNumRefFramesInDPB/maxNumRefFrames>1 (and numRefL0=1) gated on SUPPORT_MULTIPLE_REF_FRAMES, implement invalidate_ref_frames via nvEncInvalidateRefFrames with the dedupe + IDR-fallback logic, and route control.rs 0x0301 to invalidate (carrying the lost frame range) while 0x0302 stays full-IDR.
|
|
||||||
|
|
||||||
#### 20. In-binary Windows service install + interactive-session launch
|
#### 20. In-binary Windows service install + interactive-session launch
|
||||||
*Area:* `cmp:config-management` · *Windows-host:* yes · *Severity:* high · *Effort:* large
|
✅ **SHIPPED (2026-06-20)** — in-binary punktfunk-host service subcommand installs/launches the host into the interactive session (PsExec chain dropped). See also #24.
|
||||||
|
|
||||||
- **Apollo does:** config.cpp:1490-1534 handles the Windows shortcut/service launch dance inside the binary: --shortcut/--shortcut-admin handling, ShellExecuteExW(runas, --shortcut-admin) to self-elevate when the service isn't running, waits for the service, wait_for_ui_ready(), launch_ui(), then returns 1 so the foreground process does NOT also start a stream host. This is Sunshine/Apollo's mature service<->UI two-process split that makes one-click launch work.
|
|
||||||
- **punktfunk gap:** punktfunk has no service-install / self-elevation / interactive-session bring-up in the binary. Deployment is documented as a manual chain of external scripts — scheduled task -> PsExec64 -i 1 -> launch.vbs -> host-run.cmd (docs/windows-host.md:77-96) — fragile and operator-hostile. main.rs has no install/service subcommand.
|
|
||||||
- **Proposal:** Add `punktfunk-host install`/`uninstall`/`service` subcommands (Windows-gated) that register a service or an Interactive/Highest scheduled task to launch the host in Session 1 (the documented requirement for DXGI duplication + SendInput), and the self-elevate-if-not-running shortcut path. Reuse the existing capture/wgc_relay CreateProcessAsUserW machinery already in the crate. This codifies the script chain into the binary without touching the per-frame path or core.
|
|
||||||
|
|
||||||
#### 21. Composite the moved cursor onto a clean copy even when DDA returns no new desktop frame
|
#### 21. Composite the moved cursor onto a clean copy even when DDA returns no new desktop frame
|
||||||
*Area:* `win:cursor-compositing` · *Windows-host:* yes · *Severity:* high · *Effort:* large · **⊘ ALREADY-HANDLED (2026-06-16)**
|
⊘ **NOT-A-BUG (2026-06-16)** — premise incorrect: DXGI returns S_OK for pointer-only updates (LastMouseUpdateTime != 0, LastPresentTime == 0) and acquire() recomposites the cursor at its new position; last_present is repeated only on a genuine WAIT_TIMEOUT. Only an optional perf micro-opt remains (Apollo re-blends just the cursor rect to avoid a full CopyResource per pointer update).
|
||||||
|
|
||||||
> **Resolution — not a bug for punktfunk.** The gap below assumes a cursor moving over a static screen
|
|
||||||
> produces `AcquireNextFrame` **timeouts**. It does not: DXGI returns **S_OK for pointer-only updates**
|
|
||||||
> (`FrameInfo.LastMouseUpdateTime != 0`, `LastPresentTime == 0`), with the resource holding the
|
|
||||||
> (unchanged) desktop. `acquire()` always re-runs `present_acquired` on S_OK (`dxgi.rs:1407,1474`), which
|
|
||||||
> re-copies the desktop and recomposites the cursor at its new position. `last_present` is repeated only
|
|
||||||
> on a genuine `WAIT_TIMEOUT` (nothing changed) or a mid-rebuild gap — correct. The agent that raised this
|
|
||||||
> didn't account for DDA's pointer-update S_OK semantics, and the run was killed before the verify phase
|
|
||||||
> reached it. The only real delta from Apollo is a **perf** micro-opt (Apollo retains a clean copy and
|
|
||||||
> re-blends just the cursor rect, avoiding a full ~29 MB `CopyResource` per pointer update) — deferred as
|
|
||||||
> optional, pending evidence of GPU-copy pressure.
|
|
||||||
|
|
||||||
- **Apollo does:** Apollo treats a mouse-only update as a real update (display_vram.cpp:1162-1168) and keeps an intermediate D3D surface of the last desktop frame so it can copy surface->fresh image and re-blend the cursor at its new position with no new DDA frame (last_frame_variant state machine, display_vram.cpp:1239-1306).
|
|
||||||
- **punktfunk gap (as originally filed — see Resolution above; premise incorrect):** punktfunk only composites on a fresh AcquireNextFrame (dxgi.rs:1477); on timeout it repeats last_present (dxgi.rs:1547-1561) which has the OLD cursor position baked in, so a cursor moving over a static screen stutters/lags.
|
|
||||||
- **Proposal (superseded; only the perf variant remains):** Keep a clean intermediate copy of the last desktop frame (an extra DEFAULT texture). In acquire (dxgi.rs:1341), when AcquireNextFrame times out but update_cursor saw a position change (LastMouseUpdateTime changed) and the cursor is visible, copy the clean intermediate into gpu_copy and re-run composite_cursor_gpu, then return that as a fresh frame instead of repeating last_present.
|
|
||||||
|
|
||||||
#### 22. Add real reference-frame invalidation (RFI) instead of always forcing IDR
|
#### 22. Add real reference-frame invalidation (RFI) instead of always forcing IDR
|
||||||
*Area:* `win:nvenc-d3d11` · *Windows-host:* yes · *Severity:* high · *Effort:* large
|
✅ **SHIPPED (2026-06-20)** — real RFI via nvEncInvalidateRefFrames with dedup + IDR-on-overflow; control plane 0x0301 routes to invalidate. NVENC impl CI-pending. See #19.
|
||||||
|
|
||||||
- **Apollo does:** Apollo keeps a deep DPB (maxNumRefFrames 5/HEVC, 8/AV1) but pins L0 ref to 1 (nvenc_base.cpp:268-281), then on a loss event calls nvEncInvalidateRefFrames per-frame over the requested range, dedups against the last range, expands to the last-encoded index, escalates to IDR only if the range exceeds DPB depth, and tags the next frame rfi_needs_confirmation (nvenc_base.cpp:574-610). This lets the encoder re-reference an older still-valid frame rather than emit a multi-millisecond keyframe.
|
|
||||||
- **punktfunk gap:** punktfunk has NO invalidate path — request_keyframe() always forces a full IDR (nvenc.rs:437-442,465-467); punktfunk1.rs:2153 / gamestream/stream.rs:336 wire 'RFI' straight to a keyframe. Every recovery is a costly IDR spike, defeating the infinite-GOP design.
|
|
||||||
- **Proposal:** In nvenc.rs add `maxNumRefFramesInDPB`/`numRefL0=1` to the HEVC/H264/AV1 config in init_session, gate on a new caps query NV_ENC_CAPS_SUPPORT_REF_PIC_INVALIDATION, track last_encoded_frame_index + last_rfi_range, and add an `invalidate_ref_frames(first,last)` method on the Encoder trait (encode.rs:41-51) that calls API.invalidate_ref_frames per index with Apollo's dedup/escalate-to-IDR-on-overflow logic. Wire punktfunk1.rs RFI requests to it, falling back to request_keyframe() only when it returns false.
|
|
||||||
|
|
||||||
#### 23. Add a DS4 (DualShock4) ViGEm target on Windows with type auto-selection, motion, touchpad, battery and timestamp pump
|
#### 23. Add a DS4 (DualShock4) ViGEm target on Windows with type auto-selection, motion, touchpad, battery and timestamp pump
|
||||||
*Area:* `win:input-sendinput-vigem` · *Windows-host:* yes · *Severity:* high · *Effort:* large
|
*Area:* `win:input-sendinput-vigem` · *Windows-host:* yes · *Severity:* high · *Effort:* large
|
||||||
@@ -1959,38 +1884,16 @@ GameStream `SO_SNDBUF`), **#8** (move GameStream input injection off the ENet se
|
|||||||
- **Proposal:** In gamepad_windows.rs, add a DS4Wired branch via vigem_client::DualShock4Wired with a union/enum PadEntry. Resolve type from the decoded Arrival (precedence: explicit env/client choice > PS type > motion/touchpad caps > X360), mirroring the existing GAMEPAD-preference negotiation. Port Apollo's wTimestamp pump (5.333us units, re-send every 100ms), motion calibration constants (:157-170), and the touchpad byte packing (:1604-1608). Surface the LED color via the existing 0xCA/feedback plane.
|
- **Proposal:** In gamepad_windows.rs, add a DS4Wired branch via vigem_client::DualShock4Wired with a union/enum PadEntry. Resolve type from the decoded Arrival (precedence: explicit env/client choice > PS type > motion/touchpad caps > X360), mirroring the existing GAMEPAD-preference negotiation. Port Apollo's wTimestamp pump (5.333us units, re-send every 100ms), motion calibration constants (:157-170), and the touchpad byte packing (:1604-1608). Surface the LED color via the existing 0xCA/feedback plane.
|
||||||
|
|
||||||
#### 24. Replace the PsExec scheduled-task launch with a real Windows service that relaunches the host on session change
|
#### 24. Replace the PsExec scheduled-task launch with a real Windows service that relaunches the host on session change
|
||||||
*Area:* `win:system-secure-desktop` · *Windows-host:* yes · *Severity:* high · *Effort:* large
|
✅ **SHIPPED (2026-06-20)** — real Windows service relaunches the host on console-session change (SERVICE_ACCEPT_SESSIONCHANGE); PsExec scheduled-task dropped. See also #20.
|
||||||
|
|
||||||
- **Apollo does:** SunshineSvc.exe runs as LocalSystem in Session 0, loops on WTSGetActiveConsoleSessionId, clones its own token with DuplicateTokenEx(TokenPrimary)+SetTokenInformation(TokenSessionId) and CreateProcessAsUserW into winsta0\\default inside a per-session job object (JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE|BREAKAWAY_OK); opts into SERVICE_ACCEPT_SESSIONCHANGE and on WTS_CONSOLE_CONNECT terminates+relaunches the host in the new session (tools/sunshinesvc.cpp:95,111,239,256,267,276-294)
|
|
||||||
- **punktfunk gap:** punktfunk has no Windows service; launch is a PsExec64 -s -i 1 scheduled task hard-coded to session 1 (docs/windows-host.md:78-84), with the SERVICE_CONTROL_SESSIONCHANGE relaunch listed as unimplemented step 6 (docs/windows-secure-desktop.md:89). Launch scripts are not even in the repo.
|
|
||||||
- **Proposal:** Add a small Rust service binary (new crate or punktfunk-host `service` subcommand) using windows::Win32::System::Services (RegisterServiceCtrlHandlerEx, StartServiceCtrlDispatcher) that mirrors sunshinesvc.cpp: WTSGetActiveConsoleSessionId -> DuplicateTokenEx+SetTokenInformation(TokenSessionId) -> CreateProcessAsUserW(lpDesktop=winsta0\\default) into a kill-on-close job, accept SERVICE_ACCEPT_SESSIONCHANGE, and relaunch the host on a genuine console-session change. Ship an installer and drop the PsExec dependency.
|
|
||||||
|
|
||||||
#### 25. Elevate capture/encode/send thread priority on the host hot path
|
#### 25. Elevate capture/encode/send thread priority on the host hot path
|
||||||
*Area:* `cmp:protocol-streaming` · *Windows-host:* yes · *Severity:* medium · *Effort:* small · **✓ verified**
|
✅ **SHIPPED (2026-06-20)** — hot-path capture/encode/send threads now elevate priority (Windows SetThreadPriority HIGHEST for send / ABOVE_NORMAL for capture+encode; best-effort niceness on Linux, no-ops without privilege), per the verified plan.
|
||||||
|
|
||||||
- **Apollo does:** Apollo raises the transmit/capture thread priority: platf::adjust_thread_priority(thread_priority_e::critical) in the video broadcast thread (stream.cpp:1122) and ::high in the audio/control paths (stream.cpp:1333, 1672); the Windows impl is SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_HIGHEST/ABOVE_NORMAL) (platform/windows/misc.cpp:1081-1102).
|
|
||||||
- **punktfunk gap:** punktfunk names its hot-path threads (stream.rs:44 video, stream.rs:204 send, punktfunk1.rs:1804 send_loop, punktfunk1.rs:2017/2328 send threads) but never sets a scheduling priority — every host capture/encode/send thread runs at default priority. Only the macOS client elevates (client.rs:169). On a loaded Windows desktop the encode/send thread can be preempted, adding jitter the frame-pacing logic can't recover.
|
|
||||||
- **Proposal:** Add a cross-platform raise_current_thread_priority() helper (SetThreadPriority on Windows, optionally AvSetMmThreadCharacteristics for MMCSS; sched/nice on Linux) and call it at the top of the GameStream send thread, the native send_loop, and the encode thread. Cheap, high-value jitter reduction, no design impact.
|
|
||||||
- **Verify verdict:** `confirmed_gap` — punktfunk: NO thread-priority call exists anywhere in the workspace (grep for SetThreadPriority/sched_setscheduler/setpriority/AvSetMm/THREAD_PRIORITY across crates/ returned zero hits). Hot-path threads are named-only at default priority: GameStream video thread crates/punktfunk-host/src/gamestream/stream.rs:44-53 (thread::Builder name "punktfunk-video") and GameStream send thread stream.rs:204-206 ("punktfunk-send"); native send threads crates/punktfunk-host/src/punktfunk1.rs:2017-2033 and punktfunk1.rs:2328-2333 ("punktfunk-send"), and the native send_loop at punktfunk1.rs:1804 — all spawned with no priority set. The encode work shares the capture thread (punktfunk1.rs:2011-2013 "this thread captures+encodes ... and hands each AU to a dedicated send thread"), also default priority. The windows crate is ALREADY a dependency with the needed feature: crates/punktfunk-host/Cargo.toml:141 enables "Win32_System_Threading" (SetThreadPriority/GetCurrentThread available, zero new deps). Apollo: confirmed it raises priority on every hot-path thread — capture src/video.cpp:1295 (critical), encode src/video.cpp:2359 and 2396 (high), video send src/stream.cpp:1333 (high), control src/stream.cpp:1122 (critical), audio src/stream.cpp:1672 + src/audio.cpp:94/208. Windows impl is SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_HIGHEST/ABOVE_NORMAL) at src/platform/windows/misc.cpp:1081-1102, plus DwmEnableMMCSS(true) (misc.cpp:1139) and AvSetMmThreadCharacteristics("Pro Audio") for the audio-capture thread (src/platform/windows/audio.cpp:540). CRITICAL NUANCE: Apollo's adjust_thread_priority is effectively Windows-only — src/platform/linux/misc.cpp:362-364 is "// Unimplemented" and src/platform/macos/misc.mm:218-220 is "// Unimplemented".
|
|
||||||
- **Refined:** Add a small cross-platform helper raise_current_thread_priority(level) and call it at the TOP of each hot-path thread body (so the calling thread itself is elevated): the GameStream send thread (stream.rs:206), the GameStream video/capture+encode thread (stream.rs:46), the native send threads (punktfunk1.rs:2021 and punktfunk1.rs:2331 closures, before/at the start of send_loop), and the native capture+encode thread (the punktfunk1.rs run body that owns capture+encode, punktfunk1.rs ~2011+). Windows: SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_HIGHEST) for the send/network thread (latency-critical, matches Apollo's video-send=high but the punktfunk send thread also does FEC+seal so HIGHEST is defensible) and THREAD_PRIORITY_ABOVE_NORMAL for capture+encode — using the windows crate already on Cargo.toml:141, no new deps. Optionally associate the network/encode thread with MMCSS via AvSetMmThreadCharacteristics (needs the Win32_System_Threading "Games"/"Pro Audio" task + AVRT feature) for higher-fidelity scheduling under DWM load; treat as a follow-up, not the first cut. Linux (net-new beyond Apollo, since Apollo leaves it unimplemented and punktfunk is Linux-first): best-effort nice(-10)/setpriority on the send+encode threads — note SCHED_FIFO/RR requires CAP_SYS_NICE/rtprio limits the host won't have by default, so do NOT default to realtime; a plain niceness bump is the safe portable choice and silently no-ops without privilege. Make every priority call best-effort (log-and-continue on failure, exactly as Apollo does at misc.cpp:1104). No async, no per-frame allocation, no ABI surface change — purely thread-setup, so no design invariant is touched.
|
|
||||||
|
|
||||||
#### 43. Socket QoS / DSCP marking on the media sockets
|
#### 43. Socket QoS / DSCP marking on the media sockets
|
||||||
*Area:* `cmp:protocol-streaming` · *Windows-host:* yes · *Severity:* medium · *Effort:* medium · **✓ verified**
|
✅ **SHIPPED (2026-06-20)** — punktfunk_core::transport::qos set_media_qos marks the native + GameStream media sockets (DSCP CS5 video / CS6 audio via IP_TOS + Linux SO_PRIORITY 5/6, opt-in PUNKTFUNK_DSCP=1). Windows caveat: plain IP_TOS is a no-op on the wire without a qWAVE policy — porting Apollo's qWAVE path (QOSAddSocketToFlow) remains a documented follow-up.
|
||||||
|
|
||||||
- **Apollo does:** Apollo tags video and audio sockets for prioritized delivery: enable_socket_qos(...qos_data_type_e::video...) and (...audio...) called per session (stream.cpp:1917, stream.cpp:1938); the Windows impl uses qWAVE QOSCreateHandle/QOSAddSocketToFlow with DSCP tagging (platform/windows/misc.cpp:1616-1652), with Linux/macOS equivalents.
|
|
||||||
- **punktfunk gap:** punktfunk sets NO QoS/DSCP anywhere — grep for qos/DSCP/IP_TOS across crates/punktfunk-host and crates/punktfunk-core finds only the x-nv-vqos ANNOUNCE keys (rtsp.rs:278) and a macOS *client* pthread QoS (client.rs:169). Neither the GameStream sockets (stream.rs:66 bind, audio) nor the native data socket (transport/udp.rs) request link-layer/router priority.
|
|
||||||
- **Proposal:** Add a small per-OS helper to mark the video/audio/data UDP sockets: DSCP EF/AF41 via IP_TOS/IPV6_TCLASS on Linux/macOS, qWAVE QOSAddSocketToFlow on Windows (gated behind an env/config opt-in). Wire it into stream.rs socket setup and the native transport socket creation. Directly improves latency under contended Wi-Fi / shared uplink.
|
|
||||||
- **Verify verdict:** `confirmed_gap` — PUNKTFUNK — gap is real on every media socket. Native data plane crates/punktfunk-core/src/transport/udp.rs:359-365 (UdpTransport::connect) and :374-414 (connect_via_punch) grow SO_SNDBUF/SO_RCVBUF (:431-447 grow_buffers via socket2::SockRef) and set GSO/USO, but never set IP_TOS/IPV6_TCLASS/SO_PRIORITY/qWAVE. GameStream sockets are bare std UdpSocket with no QoS: video crates/punktfunk-host/src/gamestream/stream.rs:66, audio audio.rs:305, control control.rs:36. RTSP does NOT parse the GameStream qosTrafficType keys at all (grep qosTrafficType in crates/punktfunk-host → exit 1), and rtsp.rs only reads x-nv-vqos bitrate/fec/codec (rtsp.rs:278). The only QoS in the tree is a macOS *client* pthread QoS-class (core/src/client.rs:156-169) — unrelated to link-layer marking. socket2 is already a punktfunk-core dep (Cargo.toml:34), so DSCP via SockRef::set_tos is trivial to add. APOLLO — confirmed it does exactly this, on by default. Per-session calls: src/stream.cpp:1917 (video) and :1938 (audio) → platf::enable_socket_qos(..., videoQosType/audioQosType != 0). Those flags come from RTSP src/rtsp.cpp:1005-1006 and are DEFAULTED non-zero at src/rtsp.cpp:982-983 (x-nv-vqos qosTrafficType="5", x-nv-aqos="4"), so QoS is on for stock Moonlight. Linux impl: src/platform/linux/misc.cpp:797-851 sets IP_TOS/IPV6_TCLASS (DSCP 40=AF41 video, 48=CS6 audio, shifted <<2) plus SO_PRIORITY 5/6. Windows impl: src/platform/windows/misc.cpp:1616-1722 dynamically loads qwave.dll and uses QOSCreateHandle/QOSAddSocketToFlow with QOSTrafficTypeAudioVideo/Voice — and crucially returns nullptr (no-op) unless dscp_tagging is set (:1622-1625). macOS: src/platform/macos/misc.mm:446.
|
|
||||||
- **Refined:** Add a per-OS set_media_qos(socket, kind) helper. Linux/macOS: use the already-present socket2 — SockRef::set_tos(AF41<<2) for IPv4 / set_tclass_v6 for IPv6, plus SO_PRIORITY on Linux (video=5, audio=6, the max without CAP_NET_ADMIN; set AFTER TOS since TOS resets it — Apollo linux/misc.cpp:841-845). Wire it into UdpTransport::connect / connect_via_punch (the native punktfunk/1 data plane — the primary, highest-value target) behind an opt-in env (PUNKTFUNK_DSCP=1) and optionally a Config field, plus the GameStream stream.rs:66 / audio.rs:305 / control.rs:36 sockets. IMPORTANT Windows-host caveat (this is the user's focus and where the naive version fails): on Windows, plain IP_TOS setsockopt is silently stripped by the OS unless a registry/group-policy QoS policy ('Do not use NLA') is configured — which is exactly why Apollo uses qWAVE (QOSAddSocketToFlow) instead. So a one-line socket2 set_tos does NOT tag on the wire on Windows. To actually deliver value on the Windows host, port Apollo's qWAVE path (runtime LoadLibraryExA qwave.dll, QOSCreateHandle once, QOSAddSocketToFlow per socket with QOSTrafficTypeAudioVideo/Voice) including the dual-stack v4-mapped connect() workaround (windows/misc.cpp:1675-1700) — note our data socket is already connect()ed (udp.rs:361), which sidesteps most of that hack. Keep RAII teardown (QOSRemoveSocketFromFlow on drop) like Apollo's qos_t/deinit_t. This is purely socket-setup, off the per-frame path, no core C-ABI change, no async — fully compatible with all three design invariants.
|
|
||||||
|
|
||||||
#### 90. Bitrate-derived rate-control pacing (vs frame-interval-only)
|
#### 90. Bitrate-derived rate-control pacing (vs frame-interval-only)
|
||||||
*Area:* `cmp:protocol-streaming` · *Windows-host:* no · *Severity:* medium · *Effort:* medium · **✓ verified**
|
⊘ **REJECTED / OBSOLETE (2026-06-20)** — proposal premise is false: Apollo paces to a hardcoded ~80%-of-1Gbps FIXED link ceiling (stream.cpp:1464), NOT the negotiated bitrate, and punktfunk is pixel-rate-bound by design (VBR/IDR spikes legitimately exceed average bitrate). Existing frame-interval + burst-cap pacing already covers the cited microburst risk; defer unless a measured rate-limited-link regression appears. (If anything, port the FIXED link-ceiling concept via an env knob like PUNKTFUNK_PACE_BURST_KB, not bitrate-derived pacing.)
|
||||||
|
|
||||||
- **Apollo does:** Apollo paces each frame's packets at the *negotiated bitrate*: ratecontrol_packets_in_1ms = giga*80/100/1000/blocksize/8 (stream.cpp:1464) and sleeps the send loop to that per-millisecond budget across the frame (stream.cpp:1578-1627), so the sender shapes to the link's allotted rate, not just the frame deadline.
|
|
||||||
- **punktfunk gap:** Both punktfunk send pacers spread purely over the FRAME INTERVAL: the GameStream sender uses budget = frame_interval * 0.75 (stream.rs:209) and the native paced_submit uses budget to next frame's deadline * 0.9 (punktfunk1.rs:1752) — neither derives a packets-per-ms budget from cfg.bitrate_kbps (the bitrate is only used to open NVENC, stream.rs:275). A spiky IDR or VBR overshoot can still microburst above the negotiated rate within its frame window.
|
|
||||||
- **Proposal:** Compute a bitrate-derived per-millisecond send budget (like Apollo's ratecontrol_packets_in_1ms) from the negotiated bitrate and pace overflow to THAT rate inside paced_submit / spawn_sender, taking the min of the frame-interval budget and the bitrate budget. Smooths VBR bursts on rate-limited links without breaking the existing microburst fast-path.
|
|
||||||
- **Verify verdict:** `partial` — PUNKTFUNK gap is real: both pacers spread over the FRAME INTERVAL only, never the bitrate. GameStream sender: `let budget = frame_interval.mul_f32(0.75)` (crates/punktfunk-host/src/gamestream/stream.rs:209). Native paced_submit: `let budget = deadline.checked_duration_since(pace_start)...mul_f32(0.9)` (crates/punktfunk-host/src/punktfunk1.rs:1752-1755) where deadline = `next += interval` (punktfunk1.rs:2162) and `interval = Duration::from_secs_f64(1.0 / effective_hz...)` (punktfunk1.rs:2357). bitrate_kbps only configures NVENC (stream.rs:275; punktfunk1.rs:2306, 2694) and is never fed to the pacer. So far the gap claim holds. BUT the Apollo characterization in the proposal is FACTUALLY WRONG: Apollo's `size_t ratecontrol_packets_in_1ms = std::giga::num * 80 / 100 / 1000 / blocksize / 8;` (/home/enricobuehler/Apollo/src/stream.cpp:1464) is a HARDCODED 80% of 1 Gigabit/sec — a fixed constant. grep across stream.cpp shows the negotiated/session bitrate never enters this formula (only std::giga::num, blocksize, and the 80/100 constant appear at lines 1464/1578-1582/1625-1627). Apollo paces to a FIXED ~800 Mbps link ceiling regardless of negotiated bitrate; it is NOT "negotiated-bitrate pacing." punktfunk's own design notes deliberately reject clamping to negotiated bitrate: "The encoder is pixel-rate bound, not bitrate bound" (punktfunk1.rs:321) and the whole 1Gbps+ effort raised the ceiling (punktfunk1.rs:1617-1619, MAX_BITRATE_KBPS ~2 Gbps).
|
|
||||||
- **Refined:** Reject the proposal AS WRITTEN — its premise ("Apollo paces to the negotiated bitrate") is false; Apollo paces to a hardcoded 80%-of-1Gbps fixed link ceiling (stream.cpp:1464), and pacing to negotiated bitrate would actively regress punktfunk (VBR/IDR spikes legitimately exceed average bitrate, and punktfunk explicitly treats the encoder as pixel-rate-bound, not bitrate-bound — punktfunk1.rs:321). If anything is worth porting, it is the FIXED per-millisecond link-rate ceiling concept, not bitrate-derived pacing: optionally compute a fixed packets-per-ms budget from a configurable link-rate ceiling (default high, e.g. matching MAX_BITRATE_KBPS, env-overridable like PUNKTFUNK_PACE_BURST_KB) and take min(frame-interval budget, link-ceiling budget) inside paced_submit/spawn_sender — purely as a microburst smoother for rate-limited links, NOT tied to cfg.bitrate_kbps. Note punktfunk already has the microburst fast-path (burst_cap, punktfunk1.rs:2005-2009 / paced_submit:1734-1743) and frame-interval spreading, which together already address the "spiky IDR microburst" symptom the proposal cites. Recommend deferring unless a measured rate-limited-link regression appears; the current frame-interval + burst-cap pacing covers the cited risk.
|
|
||||||
|
|
||||||
#### 94. Consume the GameStream client loss-stats report
|
#### 94. Consume the GameStream client loss-stats report
|
||||||
*Area:* `cmp:protocol-streaming` · *Windows-host:* no · *Severity:* low · *Effort:* small · **✓ verified**
|
*Area:* `cmp:protocol-streaming` · *Windows-host:* no · *Severity:* low · *Effort:* small · **✓ verified**
|
||||||
@@ -2001,5 +1904,5 @@ GameStream `SO_SNDBUF`), **#8** (move GameStream input injection off the ENet se
|
|||||||
- **Verify verdict:** `confirmed_gap` — PUNKTFUNK gap is real. crates/punktfunk-host/src/gamestream/control.rs:165-177 — after decrypt, the only inner-type dispatch is `if matches!(inner, 0x0301 | 0x0302 | 0x0305)` → force_idr; everything else falls through to gamepad::decode (returns None for non-controller) then input::decode, which at crates/punktfunk-host/src/gamestream/input.rs:35 returns empty unless `type == 0x0206`. So a loss-stats packet (`0x0201`) is silently dropped — `on_receive` has no branch for it. A broad grep across crates/ for loss-stats/last-good-frame/0x0201 found nothing (only DXGI's unrelated "last good frame" comment at capture/dxgi.rs:751). The native plane has only end-of-burst ProbeResult bandwidth/loss telemetry (crates/punktfunk-core/src/client.rs:436, abi.rs:1499) — a one-shot speed test, NOT continuous in-stream loss feedback. APOLLO confirms the claim: src/stream.cpp:41 `#define IDX_LOSS_STATS 3`, src/stream.cpp:61 maps it to wire type `0x0201`, and src/stream.cpp:943-957 reads `int32_t *stats` with stats[0]=count, stats[1]=time-window ms, stats[3]=lastGoodFrame (logged at BOOST verbose). Wire offset confirmed: the map callback receives `next_payload = plaintext.data()+4` (src/stream.cpp:1104), i.e. the body AFTER the 4-byte `[type][payloadLength]` header — so stats[0..] is at body offset 0. Note: Apollo only LOGS it; it does not yet drive adaptive FEC/bitrate off it either.
|
- **Verify verdict:** `confirmed_gap` — PUNKTFUNK gap is real. crates/punktfunk-host/src/gamestream/control.rs:165-177 — after decrypt, the only inner-type dispatch is `if matches!(inner, 0x0301 | 0x0302 | 0x0305)` → force_idr; everything else falls through to gamepad::decode (returns None for non-controller) then input::decode, which at crates/punktfunk-host/src/gamestream/input.rs:35 returns empty unless `type == 0x0206`. So a loss-stats packet (`0x0201`) is silently dropped — `on_receive` has no branch for it. A broad grep across crates/ for loss-stats/last-good-frame/0x0201 found nothing (only DXGI's unrelated "last good frame" comment at capture/dxgi.rs:751). The native plane has only end-of-burst ProbeResult bandwidth/loss telemetry (crates/punktfunk-core/src/client.rs:436, abi.rs:1499) — a one-shot speed test, NOT continuous in-stream loss feedback. APOLLO confirms the claim: src/stream.cpp:41 `#define IDX_LOSS_STATS 3`, src/stream.cpp:61 maps it to wire type `0x0201`, and src/stream.cpp:943-957 reads `int32_t *stats` with stats[0]=count, stats[1]=time-window ms, stats[3]=lastGoodFrame (logged at BOOST verbose). Wire offset confirmed: the map callback receives `next_payload = plaintext.data()+4` (src/stream.cpp:1104), i.e. the body AFTER the 4-byte `[type][payloadLength]` header — so stats[0..] is at body offset 0. Note: Apollo only LOGS it; it does not yet drive adaptive FEC/bitrate off it either.
|
||||||
- **Refined:** Add one branch to control.rs `on_receive`: when the decrypted `pt` inner type (LE u16 at pt[0..2]) == 0x0201 and pt.len() >= 20, decode the body as four LE i32 — pt[4..8]=loss_count, pt[8..12]=time_window_ms, pt[16..20]=last_good_frame (mirroring Apollo's stats[0]/stats[1]/stats[3]; verify endianness against a real Moonlight capture — moonlight-common-c writes these as host-order/LE, and punktfunk already treats control inner fields as LE). Initially log at debug/trace and optionally surface via an AtomicU32 in AppState or the mgmt API so the web console can show client-observed loss. Keep it read-only first. Caveat for the backlog: this is a low-value telemetry hook, NOT adaptive control. The actual lever (adaptive FEC % / bitrate de-rating) is a separate, larger piece of work that Apollo itself does not implement off this signal — do not over-scope. Place it next to the existing 0x0301/0x0302/0x0305 dispatch so the control hot path stays a single decrypt + cheap type match. windowsHost=false is correct: this is GameStream-plane, OS-independent, and the punktfunk/1 native plane is the higher-priority protocol — so prioritize accordingly.
|
- **Refined:** Add one branch to control.rs `on_receive`: when the decrypted `pt` inner type (LE u16 at pt[0..2]) == 0x0201 and pt.len() >= 20, decode the body as four LE i32 — pt[4..8]=loss_count, pt[8..12]=time_window_ms, pt[16..20]=last_good_frame (mirroring Apollo's stats[0]/stats[1]/stats[3]; verify endianness against a real Moonlight capture — moonlight-common-c writes these as host-order/LE, and punktfunk already treats control inner fields as LE). Initially log at debug/trace and optionally surface via an AtomicU32 in AppState or the mgmt API so the web console can show client-observed loss. Keep it read-only first. Caveat for the backlog: this is a low-value telemetry hook, NOT adaptive control. The actual lever (adaptive FEC % / bitrate de-rating) is a separate, larger piece of work that Apollo itself does not implement off this signal — do not over-scope. Place it next to the existing 0x0301/0x0302/0x0305 dispatch so the control hot path stays a single decrypt + cheap type match. windowsHost=false is correct: this is GameStream-plane, OS-independent, and the punktfunk/1 native plane is the higher-priority protocol — so prioritize accordingly.
|
||||||
|
|
||||||
_(28 detailed; remaining 68 medium/low items are in the table above with citations available in Parts 2–3.)_
|
_(28 items had detail subsections — 16 shipped/obsolete ones are now collapsed to one-liners above, 12 still-open ones keep full citations; the remaining 68 medium/low items are in the table above with citations available in Parts 2–3.)_
|
||||||
|
|
||||||
@@ -0,0 +1,56 @@
|
|||||||
|
---
|
||||||
|
title: "Apple Stage-2 Presenter (handoff)"
|
||||||
|
description: "Design rationale + open items for the explicit VTDecompressionSession → CAMetalLayer presenter. Implementation shipped; this page is trimmed to the why + what's left."
|
||||||
|
---
|
||||||
|
|
||||||
|
> **Status:** SHIPPED behind the opt-in `punktfunk.presenter` flag (`AVSampleBufferDisplayLayer`
|
||||||
|
> stage-1 remains the default known-good path). Live-validated ~11 ms p50 capture→present (commit
|
||||||
|
> `7b10714`). Code: `clients/apple/Sources/PunktfunkKit/{Stage2Pipeline,MetalVideoPresenter,VideoDecoder,LatencyMeter}.swift`;
|
||||||
|
> Settings has a presenter picker (`DefaultsKey.presenter`, `SettingsView.swift`). This doc is trimmed
|
||||||
|
> to design rationale + open items — the shipped `.swift` code is the source of truth for the
|
||||||
|
> decode/present/measurement walkthrough.
|
||||||
|
|
||||||
|
## Why stage 2 (design rationale)
|
||||||
|
|
||||||
|
The **stage-1** presenter feeds compressed HEVC straight into `AVSampleBufferDisplayLayer`, which
|
||||||
|
hardware-decodes **and presents internally with no per-frame callback** — so we can't stamp decode or
|
||||||
|
present, and we can't hand-pace. **Stage-2** takes explicit control: decode with
|
||||||
|
`VTDecompressionSession`, present decoded frames through a `CAMetalLayer` driven by a display link.
|
||||||
|
|
||||||
|
Two wins justify the extra machinery:
|
||||||
|
|
||||||
|
- **~0.5 refresh off the present tail** — the present tail is the biggest client latency term at 60 Hz;
|
||||||
|
display-link-driven present pops the newest-ready frame each vsync instead of letting the layer present
|
||||||
|
on its own internal schedule.
|
||||||
|
- **True decode→present / glass-to-glass measurement** — explicit decode-completion and present
|
||||||
|
timestamps make `capture→present` measurable (modulo the still-unmeasured host render→capture term).
|
||||||
|
|
||||||
|
All of this is **macOS/iOS/tvOS-only** — build + validate on a Mac (`swift build && swift test`, then
|
||||||
|
live against a Linux host). The host + connector side is already done:
|
||||||
|
`PunktfunkConnection.clockOffsetNs` (the connect-time skew offset, host minus client) is what makes the
|
||||||
|
present timestamp cross-machine valid. `skewCorrected` stays false when `clockOffsetNs == 0` (old host)
|
||||||
|
— then the numbers are same-host-only.
|
||||||
|
|
||||||
|
## Architecture pattern (worth recording)
|
||||||
|
|
||||||
|
Async `VTDecompressionSession` callback → **1-slot newest-ready ring** → display-link-driven present:
|
||||||
|
|
||||||
|
- VT decode is **async**; the output callback runs on a VT-managed thread — don't block it, just stamp
|
||||||
|
decode-completion (`CLOCK_REALTIME` ns) + enqueue. Retain the `CVPixelBuffer` until presented (the ring
|
||||||
|
owns it).
|
||||||
|
- Each vsync pops the **newest** ready frame and drops older undisplayed ones — low-latency default, no
|
||||||
|
smoothing buffer.
|
||||||
|
- Three per-frame instants (all `CLOCK_REALTIME` ns, all shifted by `clockOffsetNs` to the host clock):
|
||||||
|
**capture→decoded** = `decodedNs + offset − pts_ns`; **decode→present** = `presentedNs − decodedNs`
|
||||||
|
(the tail stage-2 shortens); **capture→present** = `presentedNs + offset − pts_ns` — the glass-to-glass
|
||||||
|
number.
|
||||||
|
|
||||||
|
## Open items
|
||||||
|
|
||||||
|
- **Make stage 2 the default** — after resolution / HDR edge-case checks (HDR = BT.2020/PQ, 10-bit
|
||||||
|
`…10BiPlanar` + EDR `CAMetalLayer.wantsExtendedDynamicRangeContent`; ties in with the HDR roadmap).
|
||||||
|
- **Glass-to-glass numbers via `tools/latency-probe`** — close the still-unmeasured host render→capture
|
||||||
|
term.
|
||||||
|
- **Smoothing / pacing policy** — present newest-ready for lowest latency today; a pacing policy can come
|
||||||
|
later if frames look uneven.
|
||||||
|
- **iOS / iPadOS / tvOS stage-2 variants.**
|
||||||
@@ -1,5 +1,9 @@
|
|||||||
# Windows secure-desktop capture — two-process design
|
# Windows secure-desktop capture — two-process design
|
||||||
|
|
||||||
|
> **ARCHIVED 2026-06-26** — this two-process WGC secure-desktop design shipped but is now a
|
||||||
|
> *fallback*; IDD-push is the primary secure-desktop capture path (2026-06-25). Kept for its
|
||||||
|
> constraint analysis + architectural rationale. Current status: design/windows-host-rewrite.md.
|
||||||
|
|
||||||
Status: **all steps (1–6) implemented and live-validated on the RTX 4090 (2026-06-16).** The
|
Status: **all steps (1–6) implemented and live-validated on the RTX 4090 (2026-06-16).** The
|
||||||
two-process path works end to end (host as SYSTEM): the user-session WGC helper relays video, the mux
|
two-process path works end to end (host as SYSTEM): the user-session WGC helper relays video, the mux
|
||||||
switches to the host's DDA on the secure desktop, a dead helper is rebuilt automatically, and the
|
switches to the host's DDA on the secure desktop, a dead helper is rebuilt automatically, and the
|
||||||
@@ -40,7 +40,7 @@ the GPU/compositor stack of the box it runs on). What is:
|
|||||||
|
|
||||||
| Image | Source | Notes |
|
| Image | Source | Notes |
|
||||||
|---|---|---|
|
|---|---|---|
|
||||||
| `git.unom.io/unom/punktfunk-web` | `web/Dockerfile` (repo-root context — orval needs `docs/api/openapi.json`) | Nitro `bun` bundle; `PORT` (3000) and `PUNKTFUNK_MGMT_URL` env at runtime |
|
| `git.unom.io/unom/punktfunk-web` | `web/Dockerfile` (repo-root context — orval needs `api/openapi.json`) | Nitro `bun` bundle; `PORT` (3000) and `PUNKTFUNK_MGMT_URL` env at runtime |
|
||||||
| `git.unom.io/unom/punktfunk-docs` | `docs-site/Dockerfile` | This site; `PORT` (3000) |
|
| `git.unom.io/unom/punktfunk-docs` | `docs-site/Dockerfile` | This site; `PORT` (3000) |
|
||||||
| `git.unom.io/unom/punktfunk-rust-ci` | `ci/rust-ci.Dockerfile` | Ubuntu 26.04 + FFmpeg 8/PipeWire/GL/GBM dev libs + a libcuda **link stub** (driver userspace, no kernel module) + pinned rustup — the container `ci.yml`'s Rust job runs in |
|
| `git.unom.io/unom/punktfunk-rust-ci` | `ci/rust-ci.Dockerfile` | Ubuntu 26.04 + FFmpeg 8/PipeWire/GL/GBM dev libs + a libcuda **link stub** (driver userspace, no kernel module) + pinned rustup — the container `ci.yml`'s Rust job runs in |
|
||||||
|
|
||||||
@@ -3,14 +3,16 @@ title: "DualSense Haptics"
|
|||||||
description: "Feasibility and scoping for audio-driven DualSense haptics."
|
description: "Feasibility and scoping for audio-driven DualSense haptics."
|
||||||
---
|
---
|
||||||
|
|
||||||
|
> **Status:** Audio-driven advanced (voice-coil) haptics — **NO-GO, DEFERRED.** The reachable
|
||||||
|
> HID work it scoped instead — **adaptive triggers + two-motor rumble** — **SHIPPED** (commit
|
||||||
|
> `59edeed`; see CLAUDE.md gamepad section, `inject/dualsense.rs`). This doc is trimmed to the
|
||||||
|
> deferral rationale (the three walls) + the conditions that would trigger a revisit.
|
||||||
|
|
||||||
**Status: scoped, NO-GO for now (deferred).** Advanced voice-coil haptics on the DualSense are
|
Advanced voice-coil haptics on the DualSense are driven by the controller's **USB audio interface**
|
||||||
driven by the controller's **USB audio interface** (4-channel surround, the back two channels carry
|
(4-channel surround, the back two channels carry the haptic waveform), *not* by HID reports.
|
||||||
the haptic waveform), *not* by HID reports. Emulating that on a Linux host and faithfully replaying
|
Emulating that on a Linux host and faithfully replaying it on the Apple client both hit hard walls,
|
||||||
it on the Apple client both hit hard walls, and the supply of software that actually *emits* these
|
and the supply of software that actually *emits* these haptics on a Linux host is essentially zero.
|
||||||
haptics on a Linux host is essentially zero. We defer the audio-haptics feature and instead land the
|
We defer the audio-haptics feature.
|
||||||
parts of "really supporting the DualSense" that *are* reachable: **adaptive triggers (HID) and
|
|
||||||
two-motor rumble.**
|
|
||||||
|
|
||||||
(Grounded in a 4-agent feasibility read — host USB-gadget viability, DualSense audio descriptors,
|
(Grounded in a 4-agent feasibility read — host USB-gadget viability, DualSense audio descriptors,
|
||||||
Linux game demand, Apple client render path — 2026-06-10.)
|
Linux game demand, Apple client render path — 2026-06-10.)
|
||||||
@@ -72,22 +74,6 @@ Even with a captured waveform, the primary client (macOS/iOS) can't render it we
|
|||||||
- There is **no public macOS API** to route CoreAudio to the DualSense's channels 3–4. Doing it
|
- There is **no public macOS API** to route CoreAudio to the DualSense's channels 3–4. Doing it
|
||||||
anyway means private/reverse-engineered APIs that break across OS updates.
|
anyway means private/reverse-engineered APIs that break across OS updates.
|
||||||
|
|
||||||
## What we *can* ship instead ("really supporting the DualSense" minus audio haptics)
|
|
||||||
|
|
||||||
The HID DualSense we built is the foundation, and the high-value parts are within reach:
|
|
||||||
|
|
||||||
1. **Adaptive triggers — GO.** `dualsense.rs` already parses the L2/R2 trigger effects out of HID
|
|
||||||
output report `0x02`. Finishing this is the paused HID work: route them over the `0xCD`
|
|
||||||
HID-output back-channel and render on the client. This delivers the headline "DualSense feel"
|
|
||||||
(trigger resistance/weapon tension) for any source that emits it — and it's pure HID, no audio
|
|
||||||
interface, no kernel rebuild.
|
|
||||||
2. **Two-motor rumble — already done.** Parsed host-side; the Apple client already has
|
|
||||||
`nextRumble()`. Wire it to `GCDeviceHaptics`/`CHHapticEngine` as discrete patterns (API-clean,
|
|
||||||
no private APIs).
|
|
||||||
3. **LED / player-LED / touchpad / motion** — already parsed; finish the `0xCC`/`0xCD` routing.
|
|
||||||
|
|
||||||
This is the resume-able HID DualSense Phase C/D/E work — it stands on its own and was never blocked.
|
|
||||||
|
|
||||||
## Conditions for a future GO on audio haptics
|
## Conditions for a future GO on audio haptics
|
||||||
|
|
||||||
Revisit if **all three** change:
|
Revisit if **all three** change:
|
||||||
@@ -103,9 +89,9 @@ Revisit if **all three** change:
|
|||||||
Until then the cost/benefit is upside-down: three hard subsystems (kernel, USB gadget, audio
|
Until then the cost/benefit is upside-down: three hard subsystems (kernel, USB gadget, audio
|
||||||
routing) to serve ~5–10 Proton titles, rendered lossily on the one client we ship.
|
routing) to serve ~5–10 Proton titles, rendered lossily on the one client we ship.
|
||||||
|
|
||||||
## Recommendation
|
## Open items
|
||||||
|
|
||||||
**Defer audio-driven advanced haptics. Land adaptive triggers (HID) + rumble instead** — that's the
|
- **Audio-driven advanced (voice-coil) haptics — DEFERRED.** Explicitly blocked until **all three**
|
||||||
reachable 80% of "really supporting the DualSense," needs no kernel work, and the parsing is already
|
"Conditions for a future GO" above are met (a real DualSense to capture the UAC layout, a host UDC
|
||||||
written. Keep this doc as the down payment for the audio-haptics feature whenever the three
|
or grown Linux haptic-title supply, and a client that can render PCM haptics). This doc is the down
|
||||||
conditions above are met.
|
payment to revisit then.
|
||||||
@@ -0,0 +1,300 @@
|
|||||||
|
# Game library: more game stores
|
||||||
|
|
||||||
|
> **Status:** Phases 1–4 SHIPPED — 6 `LibraryProvider` impls (Steam, Lutris, Heroic, Epic, GOG,
|
||||||
|
> Xbox) in [`crates/punktfunk-host/src/library.rs`](../crates/punktfunk-host/src/library.rs)
|
||||||
|
> (1869 lines; commits `5f8c6b6` Lutris+Heroic, `b657452` Epic+GOG, `aed0bf0` Xbox, `5acc12d` shared
|
||||||
|
> art cache, `7e9023f` GameStream/Windows+non-gamescope launch wiring, `203ad80` web store badges).
|
||||||
|
> Phases 5–6 (the remaining 6 providers + the `/library/art` endpoint) are **pending**. This doc is
|
||||||
|
> trimmed to design rationale + open items; the shipped code is the source of truth.
|
||||||
|
|
||||||
|
Goal: extend the unified game library so it enumerates and launches titles from more stores —
|
||||||
|
on **Windows** Xbox / Game Pass, Epic, EA app (and GOG / Ubisoft / Battle.net / Amazon);
|
||||||
|
on **Linux** Heroic (Epic+GOG+Amazon), Lutris, and a `.desktop`/Flatpak catch-all.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 1. The extension point
|
||||||
|
|
||||||
|
The library lives in [`library.rs`](../crates/punktfunk-host/src/library.rs) and is a plug-in system:
|
||||||
|
adding a store is a new `LibraryProvider`, not a rewrite.
|
||||||
|
|
||||||
|
```rust
|
||||||
|
pub trait LibraryProvider {
|
||||||
|
fn store(&self) -> &'static str; // "steam", ...
|
||||||
|
fn list(&self) -> Vec<GameEntry>; // best-effort: empty (not Err) if the store is absent
|
||||||
|
}
|
||||||
|
pub struct GameEntry { id: String /* "<store>:<localid>" */, store, title, art: Artwork, launch: Option<LaunchSpec> }
|
||||||
|
pub struct Artwork { portrait, hero, logo, header: Option<String> } // URLs the CLIENT fetches
|
||||||
|
pub struct LaunchSpec{ kind: String, value: String }
|
||||||
|
```
|
||||||
|
|
||||||
|
**The "read the launcher's own on-disk files, no auth" approach is the gold standard we replicate per
|
||||||
|
store.** Launcher-need-not-be-running unless noted.
|
||||||
|
|
||||||
|
> The legacy GameStream `apps.json` ([`gamestream/apps.rs`](../crates/punktfunk-host/src/gamestream/apps.rs))
|
||||||
|
> is a **separate** Moonlight surface (session recipes: compositor + nested command) and stays as-is.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2. The two cross-cutting pieces (this is the real work)
|
||||||
|
|
||||||
|
Per-store enumeration is mostly easy. Two shared problems gate everything — especially Windows.
|
||||||
|
|
||||||
|
### 2a. Launch abstraction + the Windows launch gap
|
||||||
|
|
||||||
|
- **Linux** runs the chosen title as a shell command **nested in the per-session gamescope**
|
||||||
|
(`set_launch_command` / `PUNKTFUNK_GAMESCOPE_APP`). Works today.
|
||||||
|
- **Windows** captures the whole desktop (DXGI/WGC); there is no nesting, and
|
||||||
|
`VirtualDisplay::set_launch_command` is a **no-op** ([`vdisplay.rs:57`](../crates/punktfunk-host/src/vdisplay.rs)).
|
||||||
|
So on Windows **nothing is auto-started** — the user just sees the desktop.
|
||||||
|
|
||||||
|
**Plan.** Stop returning a single Linux shell string from `command_for`; introduce an internal enum and
|
||||||
|
an OS-aware resolver:
|
||||||
|
|
||||||
|
```rust
|
||||||
|
enum LaunchAction { Shell(String), Spawn { exe: PathBuf, args: Vec<String>, workdir: Option<PathBuf> } }
|
||||||
|
fn resolve_launch(&LaunchSpec) -> Option<LaunchAction> // cfg-aware
|
||||||
|
fn launch_command(id) -> Option<String> // Linux: thin Shell wrapper (back-compat)
|
||||||
|
#[cfg(windows)] fn launch_title(id) -> Result<()> // resolve Spawn + run in interactive session
|
||||||
|
```
|
||||||
|
|
||||||
|
**The Windows launcher already exists in the codebase — reuse it.**
|
||||||
|
[`capture/windows/wgc_relay.rs:196-204`](../crates/punktfunk-host/src/capture/windows/wgc_relay.rs)
|
||||||
|
does exactly the needed sequence:
|
||||||
|
`WTSGetActiveConsoleSessionId → WTSQueryUserToken → DuplicateTokenEx(TokenPrimary) →
|
||||||
|
CreateEnvironmentBlock → CreateProcessAsUserW(lpDesktop="winsta0\\default")`.
|
||||||
|
|
||||||
|
- Factor that into `windows/interactive.rs::spawn_in_active_session(exe, args, workdir) -> u32`.
|
||||||
|
- **Critical:** use the **logged-in user token** (`WTSQueryUserToken`, as `wgc_relay` does) — **not**
|
||||||
|
`windows/service.rs:449-510`'s variant, which duplicates the **SYSTEM** token and only retargets its
|
||||||
|
session id. UWP/appx activation, the user-hive protocol handlers (`HKCU\Software\Classes`), and each
|
||||||
|
launcher's auth/entitlement context all require the *real user's* token. The host process stays SYSTEM.
|
||||||
|
- For URI-handoff kinds (Epic/Steam/EA/Amazon/GOG-Galaxy) build a **concrete EXE + the URI as a separate
|
||||||
|
argv element**. `CreateProcessAsUserW` does **no** shell/protocol resolution — never `cmd /c`, never a
|
||||||
|
bare URI. For schemes with no exe-argv form (`amazon-games://`, `origin2://`), add an impersonate-token
|
||||||
|
`ShellExecuteEx` fallback (`ImpersonateLoggedOnUser` on a worker thread + `CoInitialize`).
|
||||||
|
- **Order:** launch the title **after** the interactive capture pipeline is live, so the game renders onto
|
||||||
|
the already-captured desktop and grabs foreground.
|
||||||
|
- **Caveats:** `WTSQueryUserToken` fails when no interactive user is logged on (a pre-login box can stream
|
||||||
|
the login/secure desktop but can't auto-launch a title); on the lock/secure desktop a launch may queue
|
||||||
|
until unlock. **Needs on-glass validation** (RTX box) that each launcher EXE accepts its URI on argv and
|
||||||
|
that post-capture launch grabs foreground.
|
||||||
|
|
||||||
|
### 2b. Artwork: a layered, no-auth-first `ArtResolver`
|
||||||
|
|
||||||
|
Steam gets free CDN art keyed by appid. Most stores don't. Layered ladder, degrade to a title-only card:
|
||||||
|
|
||||||
|
1. **Steam** → public Steam CDN by appid (unchanged, client fetches directly).
|
||||||
|
2. **Stores that already hold public CDN URLs** → emit verbatim, **no host endpoint**: Heroic
|
||||||
|
`store_cache` `art_*` (Epic/GOG/Amazon CDN), itch `cover_url`, GOG via public `api.gog.com/products/<id>?expand=images`
|
||||||
|
(one cached lookup), Epic via local `catcache.bin` keyImages.
|
||||||
|
3. **Xbox** → one **unofficial** no-auth `displaycatalog.mp.microsoft.com` lookup by StoreId, cached,
|
||||||
|
degrade to no-art offline. (Not a stable contract — tolerate drift.)
|
||||||
|
4. **Genuinely-local art** (Lutris `coverart`/`banners` JPEGs, Flatpak/.desktop icons, Bottles) → a
|
||||||
|
**new host-served endpoint is required**, because `Artwork` carries URLs the client fetches and a file
|
||||||
|
on the host has no public URL.
|
||||||
|
5. **Opt-in SteamGridDB** enrichment (v2 API `https://www.steamgriddb.com/api/v2`, `Authorization: Bearer
|
||||||
|
<operator key>`, **off by default**) to fill gaps. Not no-auth; never blocks listing.
|
||||||
|
6. **None** → existing title-only card.
|
||||||
|
|
||||||
|
**New endpoint (still pending):** `GET /library/art/<entryId>/<slot>` (slot ∈ `portrait|hero|logo|header`)
|
||||||
|
on `mgmt.rs`. It resolves `entryId` in the host library to a **known on-disk absolute path** (never
|
||||||
|
interpolates raw client input into a filesystem path), sanitizes the slot, rejects `..`, streams the bytes
|
||||||
|
with the right content-type. Reserve `data:` URLs for tiny logos only (don't bloat the catalog JSON that
|
||||||
|
crosses the control plane). See open question on whether this GET bypasses the mgmt bearer (images are
|
||||||
|
non-sensitive and the streaming client connects over punktfunk/1, not the bearer-gated REST).
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 3. Security model (preserved and extended)
|
||||||
|
|
||||||
|
The invariant is unchanged: **the client sends only a store-qualified `GameEntry.id`** (e.g. `lutris:42`,
|
||||||
|
`xbox:9NBLGGH4R315`, `epic:fn:4fe…:Fortnite`) in `Hello.launch`. The host looks it up in its **own**
|
||||||
|
enumerated library, reads the **host-derived** `LaunchSpec`, and resolves it. The client never sends a
|
||||||
|
`LaunchSpec`, command, URI, or path.
|
||||||
|
|
||||||
|
Per-kind charset validators are belt-and-suspenders before any interpolation (values are already
|
||||||
|
host-derived from local files the host owns):
|
||||||
|
|
||||||
|
| kind | guard |
|
||||||
|
|---|---|
|
||||||
|
| `steam_appid`, `lutris_id`, `uplay` | digits only |
|
||||||
|
| `battlenet` | `^[A-Za-z0-9]+$` (case-sensitive) |
|
||||||
|
| `amazon` | `^[A-Za-z0-9-]+$` |
|
||||||
|
| `aumid` | `^[A-Za-z0-9._-]+![A-Za-z0-9._-]+$` (the `!` separator) |
|
||||||
|
| `epic` | ≤3 `:`-split parts, each `^[A-Za-z0-9._-]+$`, then URL-encode colons |
|
||||||
|
| `heroic` | runner ∈ {legendary,gog,nile} + appName `^[A-Za-z0-9._-]+$` |
|
||||||
|
| `ea_offer_ids` | `^[A-Za-z0-9._,-]+$` (allow comma) |
|
||||||
|
|
||||||
|
On **Windows never route a client-influenced string through `cmd /c start`.** `resolve_launch` yields
|
||||||
|
`Spawn{exe,args,workdir}`; `CreateProcessAsUserW` launches a concrete EXE with the URI/flags as separate
|
||||||
|
argv elements. The operator-only `command` kind (custom store + provider-generated Linux shell lines for
|
||||||
|
`desktop`/`itch`) is host-derived/operator-typed, never client-set.
|
||||||
|
|
||||||
|
The one net-new surface is `GET /library/art` — covered in §2b (id-resolved path, no traversal).
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 4. `LaunchSpec` kinds
|
||||||
|
|
||||||
|
| kind | value holds | maps to |
|
||||||
|
|---|---|---|
|
||||||
|
| `lutris_id` | `pga.db` `games.id` (digits) | Linux Shell `lutris lutris:rungameid/<id>` (nests in gamescope) |
|
||||||
|
| `heroic` | `<runner>:<appName>` | Linux argv `heroic --no-gui "heroic://launch?appName=<app>&runner=<runner>"` |
|
||||||
|
| `aumid` | `<PFN>!<AppId>` | Windows Spawn `explorer.exe "shell:AppsFolder\<aumid>"` (interactive session) |
|
||||||
|
| `epic` | `<namespace>:<catalogItemId>:<appName>` | Windows Spawn `EpicGamesLauncher.exe` + `com.epicgames.launcher://apps/<ns>%3A<cat>%3A<app>?action=launch&silent=true` |
|
||||||
|
| `gog` | host-resolved `exe \t args \t workdir` | Windows Spawn `CreateProcessAsUserW(exe,args,workdir)` (direct exe, no Galaxy) |
|
||||||
|
| `uplay` | Ubisoft gameId (digits) | Windows `uplay://launch/<gameId>/0` |
|
||||||
|
| `battlenet` | product code (e.g. `WTCG`, `Fen`, `OSI`) | Windows Spawn `Battle.net.exe --exec="launch <code>"` |
|
||||||
|
| `amazon` | Amazon Games `DbSet.Id` | Windows `amazon-games://play/<Id>` (impersonate ShellExecute) |
|
||||||
|
| `ea_offer_ids` | comma-joined contentID list | Windows `origin2://game/launch/?offerIds=<list>&autoDownload=1` |
|
||||||
|
| `command` (existing) | host-derived shell line | Linux gamescope-nested (desktop/flatpak/itch reuse this) |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 5. Per-store provider catalog
|
||||||
|
|
||||||
|
All enumeration is no-auth, local. Confidence is **after** adversarial web-verification.
|
||||||
|
|
||||||
|
### Shipped (phases 1–4)
|
||||||
|
|
||||||
|
| store | OS | enumerate | launch kind | art |
|
||||||
|
|---|---|---|---|---|
|
||||||
|
| **Steam** | both | local `.acf`/`.vdf` | `steam_appid` | Steam CDN (client-direct) |
|
||||||
|
| **Lutris** | Linux | read-only `pga.db` (`installed=1`) | `lutris_id` | local JPEGs (needs `/library/art`, still pending) |
|
||||||
|
| **Heroic** | Linux | `store_cache/{legendary,gog,nile}_library.json` | `heroic` | free public CDN (`art_*`) |
|
||||||
|
| **Epic** | Windows | `…\Manifests\*.item` | `epic` | local `catcache.bin` keyImages |
|
||||||
|
| **GOG** | Windows | registry + `goggame-<id>.info` | `gog` (direct-exe) | `api.gog.com/products/<id>?expand=images` |
|
||||||
|
| **Xbox / Game Pass** | Windows | `XboxGames\*\Content\MicrosoftGame.config` + AppRepository PFN | `aumid` | unofficial `displaycatalog` lookup |
|
||||||
|
|
||||||
|
The hard-won corrections folded into these (keep when revisiting): Epic uses Playnite's **exclusion**
|
||||||
|
filter (skip `UE_`, DLC `addons` w/o `addons/launchable`), builds the namespace:catalog:app **triple** when
|
||||||
|
ids exist else **falls back to the bare `appName` URI** (don't set launch=None); GOG launches the
|
||||||
|
**direct exe** (dodges Galaxy cold-start/anti-cheat); Xbox **reads** the PackageFamilyName from the
|
||||||
|
`AppRepository\Packages\<PackageFullName>` dir name (**never** hash the publisher), scans `XboxGames` rather
|
||||||
|
than parse the undocumented `.GamingRoot`, and UWP `aumid` activation is load-bearing on the interactive
|
||||||
|
user token; Heroic `gui=false` is inert (`--no-gui` does it) and single-instance Electron forwards-and-exits
|
||||||
|
(launch was gated). Misses pure-UWP (non-GDK) Store games under ACL-locked `WindowsApps` — accepted for v1.
|
||||||
|
|
||||||
|
### Remaining providers (phases 5–6)
|
||||||
|
|
||||||
|
#### Desktop (`.desktop` + Flatpak) — Linux, P1, effort M, confidence medium (universal catch-all)
|
||||||
|
- **Enumerate:** scan `{/var/lib/flatpak/exports/share/applications,
|
||||||
|
~/.local/share/flatpak/.../applications, /usr/share/applications, /usr/local/share/applications,
|
||||||
|
~/.local/share/applications}/*.desktop`. Require `Type=Application` + `Categories` contains `Game`; skip
|
||||||
|
`NoDisplay`/`Hidden`/`Terminal=true` and known launcher app-ids (Steam/Heroic/Lutris/Bottles/RetroArch)
|
||||||
|
to avoid recursion/dupes.
|
||||||
|
- **Launch:** reuse `command` (host-derived shell line, nested in gamescope): cleaned `Exec` (strip
|
||||||
|
`%U/%F/%f/%u/%i/%c/%k`) else `flatpak run <app-id>`.
|
||||||
|
- **Artwork:** local — resolve `Icon=` via the hicolor theme / flatpak exported icons → `/library/art`.
|
||||||
|
App icons are low-res, not box art (acceptable header fallback).
|
||||||
|
- **Notes:** run **last** and dedup by install path / drop ids already surfaced by Steam/Heroic/Lutris.
|
||||||
|
|
||||||
|
#### itch.io — Linux + Windows, P3, effort S, confidence medium
|
||||||
|
- **Enumerate:** read-only `rusqlite` of `butler.db` (`~/.config/itch/db/butler.db`; Flatpak
|
||||||
|
`io.itch.itch`; Windows `%AppData%\itch\db`, per-user). JOIN `caves`→`games`. **Key on `cave.ID`** (a
|
||||||
|
game can have multiple caves; install location + verdict are per-cave). Read game title / `cover_url`;
|
||||||
|
resolve install dir from `InstallLocationID`+`InstallFolderName`||`CustomInstallFolder` + the Verdict
|
||||||
|
candidate. Confirm exact column names on-box.
|
||||||
|
- **Launch:** `command` → direct binary `basePath`+`candidate.path`, **only** for Verdict candidates with
|
||||||
|
`flavor==native` (html/jar/love need itch's runtime — fall back to custom).
|
||||||
|
- **Artwork:** **free** — `games.cover_url` is a public itch CDN URL.
|
||||||
|
|
||||||
|
#### Ubisoft Connect — Windows, P2, effort S, confidence medium
|
||||||
|
- **Enumerate:** registry `HKLM\SOFTWARE\WOW6432Node\Ubisoft\Launcher\Installs\<gameId>` (both reg views),
|
||||||
|
read `InstallDir`; title = install-dir leaf folder (primary) else the `Uplay Install <gameId>` Uninstall
|
||||||
|
`DisplayName`.
|
||||||
|
- **Launch:** `uplay` → `uplay://launch/<gameId>/0`. **Artwork:** none → title-only.
|
||||||
|
- **Notes:** smallest effort once the Windows URI-launch wiring exists; hive+scheme unchanged across the
|
||||||
|
Origin→EA migration.
|
||||||
|
|
||||||
|
#### Amazon Games — Windows, P2, effort S, confidence medium
|
||||||
|
- **Enumerate:** read-only `rusqlite` of
|
||||||
|
`%LocalAppData%\Amazon Games\Data\Games\Sql\GameInstallInfo.sqlite`:
|
||||||
|
`SELECT Id,ProductTitle,InstallDirectory FROM DbSet WHERE Installed=1`. **Per-user path** — the SYSTEM
|
||||||
|
service must resolve the **active session user's** profile (not the SYSTEM profile).
|
||||||
|
- **Launch:** `amazon` → `amazon-games://play/<Id>` (impersonate-token ShellExecute; no clean exe-argv form).
|
||||||
|
- **Artwork:** `ProductIconUrl`/`ProductLogoUrl` columns when present, else none.
|
||||||
|
|
||||||
|
#### Battle.net — Windows, P2, effort **L**, confidence medium (high catalog value: WoW/Diablo IV/Overwatch 2/CoD)
|
||||||
|
- **Enumerate:** hand-roll a ~4-field protobuf decode of `C:\ProgramData\Battle.net\Agent\product.db`
|
||||||
|
(`product_install{ uid, product_code, settings.install_path, cached_product_state.base_product_state.installed }`).
|
||||||
|
Registry fallback: Uninstall keys whose `UninstallString` matches `Battle.net.exe --uid=<uid>`.
|
||||||
|
`product.db` has **no titles** → maintain a ~30-entry `product_code`→name map (source from
|
||||||
|
bnetlauncher/Lutris/Heroic; codes are **case-sensitive**).
|
||||||
|
- **Launch:** `battlenet` → `Battle.net.exe --exec="launch <code>"` (more reliable than the
|
||||||
|
`battlenet://<code>` URI, which only hands off). **Artwork:** none → title-only.
|
||||||
|
- **Notes:** the protobuf + name map + no-art make it L; pin the `.proto` and decode defensively.
|
||||||
|
|
||||||
|
#### EA app — Windows, P2, effort M, confidence medium (most closed/fragile — ship last)
|
||||||
|
- **Enumerate:** registry `HKLM\SOFTWARE\WOW6432Node\{EA Games,Origin Games}\<id>` (Install Dir /
|
||||||
|
DisplayName), parse `<dir>\__Installer\installerdata.xml` for the **full** `<contentIDs>` list +
|
||||||
|
`<gameTitle locale='en_US'>`. Registry under-reports for EA-app (vs legacy Origin) installs — known
|
||||||
|
completeness gap. Keep the AES-256 encrypted `IS`-file decrypt **out** of the default path (optional
|
||||||
|
feature flag for completeness).
|
||||||
|
- **Launch:** `ea_offer_ids` → `origin2://game/launch/?offerIds=<full,comma,list>&autoDownload=1`. **Emit
|
||||||
|
the full contentID list** — a single offerId generally no longer launches under the EA app.
|
||||||
|
- **Artwork:** none no-auth → title-only.
|
||||||
|
|
||||||
|
#### Rockstar — P3, fold into custom
|
||||||
|
- Registry `HKLM\SOFTWARE\WOW6432Node\Rockstar Games\<Title>\InstallFolder`; direct-exe Spawn; no art.
|
||||||
|
Tiny catalog, most titles now bought on Steam/Epic.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 6. Structure & phasing
|
||||||
|
|
||||||
|
**Structure (still pending refactor).** Split the 1869-line `library.rs` → a `library/` dir before it
|
||||||
|
balloons further: `mod.rs` (trait, wire types, `LaunchAction`, custom CRUD, `all_games`, `resolve_launch`,
|
||||||
|
`launch_command`/`launch_title`), `steam.rs`, one file per provider, `art.rs` (ArtResolver +
|
||||||
|
displaycatalog/gog-api/steamgriddb helpers), `win_util.rs` (HKLM subkey enumerator, read-only SQLite
|
||||||
|
opener, tiny read-only XML reader). Deps in play: `rusqlite` (bundled, read-only) for lutris/itch/amazon
|
||||||
|
DBs; `roxmltree`/`quick-xml` for the Windows manifests; registry via the `windows` crate's
|
||||||
|
`Win32_System_Registry` feature (no new crate). Avoid `prost` — hand-roll the ~4 Battle.net fields.
|
||||||
|
|
||||||
|
- **Phases 1–4 — DONE:** launch abstraction + Windows interactive-session spawn; Steam/Lutris/Heroic
|
||||||
|
providers + Linux art; Epic/GOG providers; Xbox / Game Pass provider; shared art warmer + cache; web
|
||||||
|
store badge generalized per `game.store`.
|
||||||
|
- **Phase 5 — future:** Linux Desktop catch-all (last + dedup, icons via `/library/art`), Ubisoft
|
||||||
|
(`UplayProvider`), Amazon (`AmazonProvider` + per-user-profile-under-SYSTEM helper); land the
|
||||||
|
`GET /library/art/<id>/<slot>` endpoint that Lutris/Desktop local art still needs.
|
||||||
|
- **Phase 6 — future:** Battle.net (hand-rolled protobuf + code→name map), EA app, itch.io; Rockstar/
|
||||||
|
Bottles → custom; optional SteamGridDB v2 enrichment behind an operator key.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 7. Open questions
|
||||||
|
|
||||||
|
- **Art delivery auth:** the streaming client connects over punktfunk/1 (QUIC), not the bearer-gated mgmt
|
||||||
|
REST, yet already fetches Steam CDN URLs over plain HTTP. Should `GET /library/art/*` be an
|
||||||
|
unauthenticated read-only image GET on the mgmt listener (bearer bypass for that path only), a separate
|
||||||
|
tiny image server, or should local-art bytes ride the punktfunk/1 control plane?
|
||||||
|
- **Windows launch ordering** needs on-glass RTX-box validation: confirm launching *after* capture is live
|
||||||
|
grabs foreground+capture, and that `CreateProcessAsUserW(EpicGamesLauncher.exe/steam.exe, URI-as-argv)`
|
||||||
|
actually starts the game per launcher (vs needing the impersonate-ShellExecute fallback).
|
||||||
|
- **Per-user-profile resolution under SYSTEM** for Amazon (`%LocalAppData%`) and itch (`%AppData%`): add
|
||||||
|
`WTSQueryUserToken` + `GetUserProfileDirectoryW` (or read `USERPROFILE` from `CreateEnvironmentBlock`)?
|
||||||
|
- **`rusqlite` bundled SQLite** — acceptable for deb/rpm/flatpak and no link conflict? Otherwise fall back
|
||||||
|
to `lutris -l -j` (fragile: single-instance D-Bus forwarding).
|
||||||
|
- **Battle.net** product-code→name map source/maintenance, and `product.db` `.proto` drift across Agent versions.
|
||||||
|
- **Unofficial art sources** (Xbox displaycatalog): best-effort with aggressive caching + no-art degrade,
|
||||||
|
or Xbox-art local-tile-only for v1?
|
||||||
|
- **Heroic launch:** ship enumeration+art only at first, or invest in direct legendary/gogdl/nile CLI
|
||||||
|
launch (needs the user's on-disk auth tokens) to dodge the single-instance-Electron / gamescope-escape problem?
|
||||||
|
- **`config_dir()` consistency:** `library.rs` uses an XDG/HOME-based dir; confirm the Windows SYSTEM host
|
||||||
|
lands its art cache + custom store under `%ProgramData%\punktfunk` (there's a separate
|
||||||
|
`gamestream::config_dir()` that already does this).
|
||||||
|
- Should provider-generated Linux shell lines (`desktop`/`itch`) reuse the `command` kind (documented
|
||||||
|
"operator-only") or get a distinct internal kind to keep the mgmt-UI `command` semantics clean?
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Open items (what's left)
|
||||||
|
|
||||||
|
- **6 remaining providers:** Desktop/Flatpak (Linux), itch.io (Linux+Windows), Ubisoft Connect, Amazon
|
||||||
|
Games, Battle.net, EA app (recipes in §5).
|
||||||
|
- **`GET /library/art/<entryId>/<slot>` mgmt endpoint** — still missing; Lutris local JPEGs (and the future
|
||||||
|
Desktop icons) have no public URL without it.
|
||||||
|
- **Refactor `library.rs` (1869 lines) into a `library/` directory** (structure in §6).
|
||||||
|
- The **8 open questions** in §7.
|
||||||
|
- **Optional SteamGridDB v2 enrichment** behind an operator key (off by default; never blocks listing).
|
||||||
@@ -0,0 +1,57 @@
|
|||||||
|
---
|
||||||
|
title: "GameStream Host"
|
||||||
|
description: "Stream to a stock Moonlight client on a client-sized virtual display."
|
||||||
|
---
|
||||||
|
|
||||||
|
> **Status:** SHIPPED — works end-to-end with a stock Moonlight/Artemis client (initial
|
||||||
|
> merge `ab6dda2`, June 2026). Code: [`crates/punktfunk-host/src/gamestream/`](../crates/punktfunk-host/src/gamestream/).
|
||||||
|
> Byte-level wire reference: [`research/gamestream-protocol-research.json`](research/gamestream-protocol-research.json)
|
||||||
|
> (distilled from Sunshine + moonlight-common-c). This doc is trimmed to design rationale +
|
||||||
|
> open items; the shipped code is the source of truth for wire/packet detail.
|
||||||
|
|
||||||
|
A stock Moonlight client discovers this host, pairs, launches, and gets video + input + audio
|
||||||
|
on a client-sized virtual display.
|
||||||
|
|
||||||
|
## Architecture (respects the "one core" invariant)
|
||||||
|
|
||||||
|
- **punktfunk-core** holds the **P1 GameStream wire codec** (`ProtocolPhase::P1GameStream`):
|
||||||
|
the RTP+`NV_VIDEO_PACKET` framing, the GameStream FEC shard layout, and the video/audio
|
||||||
|
AES-GCM/CBC paths. Hot path, native threads, **no async**. Kept beside punktfunk's native
|
||||||
|
internal format (P2), selected by phase.
|
||||||
|
- **punktfunk-host** holds the **control plane** (tokio/axum OK — I/O-bound, *not* the hot
|
||||||
|
path): mDNS discovery, nvhttp serverinfo + the 4-phase pairing, the RTSP handshake, the ENet
|
||||||
|
control stream + input injection, the virtual-display lifecycle, and Opus audio encode.
|
||||||
|
|
||||||
|
## Why we shipped in this order (the two highest interop risks)
|
||||||
|
|
||||||
|
These two mitigations are why early bring-up deliberately skipped crypto and FEC — both turn
|
||||||
|
out to be unnecessary on a clean LAN, and both have a wire-incompatibility that would have
|
||||||
|
silently broken interop if done naively.
|
||||||
|
|
||||||
|
1. **RS-FEC matrix incompatibility — clean-LAN first.** Sunshine + Moonlight both use
|
||||||
|
**nanors** (GF(2⁸), poly 0x11d, Vandermonde systematic). punktfunk-core uses
|
||||||
|
`reed-solomon-erasure` (Cauchy) — parity bytes **don't match**, so Moonlight silently fails
|
||||||
|
to recover any frame with a lost data shard. Mitigation: **on a clean LAN with no loss the
|
||||||
|
client never runs RS decode**, so we deferred it — get a frame decoded first, then port
|
||||||
|
nanors for loss recovery.
|
||||||
|
2. **Crypto layout incompatibility — plaintext video first.** punktfunk's `SessionCrypto`
|
||||||
|
(salt + seq-as-AAD) is wire-incompatible with GameStream's GCM; P1 needs a separate
|
||||||
|
GameStream GCM path (key = raw 16-byte RIKEY, IV = `counter_le[8]||0,0,0||'V'(0x56)`, **no
|
||||||
|
AAD**, **FEC first, then encrypt per shard**). Mitigation: **video encryption is negotiated
|
||||||
|
and usually off on LAN** — we implemented plaintext video first and added GCM later.
|
||||||
|
|
||||||
|
## Open items
|
||||||
|
|
||||||
|
- **HDR / 10-bit.** Needs HDR capture + metadata plumbing. (`av1_nvenc -highbitdepth 1`
|
||||||
|
already encodes Main10 from 8-bit input on this box.)
|
||||||
|
- **Reconnect-at-new-mode robustness.**
|
||||||
|
- **AV1 negotiation.** Implemented + unit/live-capture tested; needs a **live confirmation
|
||||||
|
with a stock Moonlight client** (select AV1 in a stock client).
|
||||||
|
- **Surround 5.1/7.1 audio.** Implemented + tested; needs a **real listen** including FEC
|
||||||
|
under loss, plus a live Moonlight confirmation.
|
||||||
|
|
||||||
|
## Testing note
|
||||||
|
|
||||||
|
The host is headless; end-to-end needs a **stock Moonlight client on the LAN** pointed at this
|
||||||
|
box (manual "add host" by IP works without mDNS). `/serverinfo` + the pair flow are testable
|
||||||
|
with `curl`; video needs a client that can display.
|
||||||
@@ -0,0 +1,450 @@
|
|||||||
|
# GPU-contention performance investigation — why a saturating game starves the stream (2026-06-25)
|
||||||
|
|
||||||
|
> **Status:** Investigation / plan. §5.A (NV12/P010 on the IDD-push default path) is **SHIPPED**
|
||||||
|
> — `3514702`, `capture/windows/idd_push.rs` + `encode/windows/nvenc.rs`. All other levers
|
||||||
|
> (§5.B/§5.C/§5.E/§5.F/§5.G) are **OPEN**; §5.C is partial (REALTIME knob exists, no auto-gate).
|
||||||
|
> Paired with [`host-latency-plan.md`](host-latency-plan.md) (mutual cross-refs — keep both).
|
||||||
|
> Trimmed to design rationale + open items; git history holds the full original.
|
||||||
|
|
||||||
|
> The headache, stated precisely:
|
||||||
|
> a game renders ~140 fps on the host GPU; the client requests 120/240; in a GPU-light scene the
|
||||||
|
> stream tracks; the moment the game pins the GPU the **stream collapses to 40–50 fps** while the
|
||||||
|
> game keeps rendering 140. Capping the game's fps raises the stream back up (clearest in light
|
||||||
|
> titles like CS2). **Capping is not an acceptable fix** — demanding titles exhaust the GPU even
|
||||||
|
> when capped.
|
||||||
|
|
||||||
|
This is the second, deeper pass on the problem. The first pass is
|
||||||
|
[`host-latency-plan.md`](host-latency-plan.md) (a 25-agent investigation, 2026-06-18). **This doc
|
||||||
|
supersedes several of that doc's conclusions** — the codebase moved a lot in the week since
|
||||||
|
(the Windows-host rewrite landed IDD-push as the default capture path, split-encode shipped, the
|
||||||
|
GPU-priority knob got configurable), and a fresh, adversarially-verified research pass overturned
|
||||||
|
two of the old plan's premises. Read §1 (corrections) before acting on the old doc.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 0. TL;DR — the corrected mental model and the action list
|
||||||
|
|
||||||
|
**The governing fact:** NVENC is a **dedicated ASIC on its own GPU runlist**, physically separate
|
||||||
|
from the SM/CUDA/graphics cores a 3D game saturates. The game does **not** steal the encode block.
|
||||||
|
It steals everything that *feeds* the block — capture-acquire, the **RGB→YUV colour-convert**, the
|
||||||
|
copy into the encoder's input surface, the readback — **and the GPU-scheduler time** to run that
|
||||||
|
feed work, which is queued behind the game's graphics context.
|
||||||
|
([NVENC app-note](https://docs.nvidia.com/video-technologies/video-codec-sdk/13.0/nvenc-application-note/index.html),
|
||||||
|
[engine-table proof, UNC RTAS'24](https://www.cs.unc.edu/~jbakita/rtas24.pdf))
|
||||||
|
|
||||||
|
**Therefore there are two different bottlenecks with opposite fixes, and you must tell them apart
|
||||||
|
before writing code:**
|
||||||
|
|
||||||
|
| Bottleneck | Symptom | Fix family |
|
||||||
|
|---|---|---|
|
||||||
|
| **(a) feed-scheduling contention** | `uniq`≈`fps`, both ~50; `encode_ms` 13–17 | shrink the host's contended-engine footprint; raise GPU scheduling priority; pipeline correctly; in the limit, a second GPU |
|
||||||
|
| **(b) frame-source ceiling** | `fps`≈240 (held re-encodes) but `uniq`→40–50 | capture the game's real frames (swapchain hook); compose-flip for the DLSS-FG case |
|
||||||
|
|
||||||
|
**The single hardest truth:** on one saturated GPU there is **no free lunch**. Any host GPU work
|
||||||
|
either *preempts* the game (and steals its frames) or *waits* behind it. Capping the game works
|
||||||
|
only because it cuts the game's **total** GPU demand and opens idle gaps. The non-capping
|
||||||
|
equivalents are exactly three: **need less GPU** (footprint shrink), **take more** (priority — which
|
||||||
|
costs the game fps), or **use a different GPU** (real isolation). Anything pitched as "make the game
|
||||||
|
politely yield without losing anything" — Reflex, render-queue tricks — is a **placebo** here (§7).
|
||||||
|
|
||||||
|
**Action list, highest leverage first** (detail in §5–§6):
|
||||||
|
|
||||||
|
1. **Diagnose first** (§3). Read `uniq`-vs-`fps` under the real workload + PresentMon presentation
|
||||||
|
mode. Half a day; decides whether you're fighting (a) or (b). The repo already prints the counter.
|
||||||
|
2. **Stop feeding NVENC RGB on the default path** — **DONE** for IDD-push (`3514702`): the install
|
||||||
|
default now converts BGRA→NV12 (SDR) / FP16→P010 (HDR) before NVENC, off the SM. Linux NV12-default
|
||||||
|
and a video-engine HDR P010 are still open. (§5.A)
|
||||||
|
3. **Build a *correct* async encode pipeline** — submit on one thread, blocking-retrieve on another,
|
||||||
|
deep surface pool, Windows completion events. Our past "pipelining didn't help" was a *same-thread*
|
||||||
|
implementation that can't overlap; the two-thread pattern the NVENC guide mandates was never
|
||||||
|
tried. Recovers the depth-1 serialization that produces ~50 fps, up to the priority ceiling. (§5.B)
|
||||||
|
4. **Auto-gated REALTIME GPU priority.** Our `LocalSystem` service *can* grant it (most apps can't).
|
||||||
|
Gate on HAGS-state + VRAM headroom to dodge the documented NVENC freeze. (§5.C)
|
||||||
|
5. **Lock clocks / pin P-state** for jitter (cheap; fixes the light-scene "200-not-240", not the
|
||||||
|
collapse). (§5.E)
|
||||||
|
6. **If source-bound: swapchain-hook capture** (OBS-style) — the real escape from the compose
|
||||||
|
ceiling. Big lift, anti-cheat tradeoffs. (§5.F)
|
||||||
|
7. **The honest endgame for demanding titles: encode on a second GPU / the iGPU.** The only approach
|
||||||
|
that *removes* contention instead of re-prioritizing it. We already have AMF/QSV paths. (§5.G)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 1. Corrections to `host-latency-plan.md` (read before reusing it)
|
||||||
|
|
||||||
|
The old doc was right about the shape but several specifics are now wrong or stale:
|
||||||
|
|
||||||
|
- **"Windows already feeds NVENC YUV on the video engine, so it does the right thing."** True for the
|
||||||
|
DDA and WGC paths — **was false for IDD-push, which became the install default and fed NVENC
|
||||||
|
RGB**, paying the SM-side CSC the old doc said Windows had eliminated. The default path *regressed*
|
||||||
|
on the exact axis the doc celebrated. **Since fixed** (`3514702`, §5.A): IDD-push now converts
|
||||||
|
BGRA→NV12 on the video engine (FP16→P010 shader for HDR) and feeds NVENC native YUV.
|
||||||
|
- **"`PUNKTFUNK_ENCODE_DEPTH` (default 4, ≤6) deep-pipelines."** **There is no such knob.** It exists
|
||||||
|
only in two stale comments (`encode/windows/nvenc.rs:30`, `capture/windows/wgc.rs:57`) and is never
|
||||||
|
parsed. The real depth knob is `PUNKTFUNK_IDD_DEPTH` (default 2), used only by IDD-push on the
|
||||||
|
native path; GameStream and the WGC helper are hardcoded depth-1.
|
||||||
|
- **"Async NVENC is measure-gated and probably stacks latency (Tier 3D)."** The measurement that
|
||||||
|
produced that verdict (`capture/windows/wgc_helper.rs:131-135`) pipelined **on a single thread** —
|
||||||
|
it queued more frames but still blocked `lock_bitstream` inline, so it added queue latency with
|
||||||
|
**zero overlap**. That is not the pattern the NVENC guide prescribes (submit/retrieve on
|
||||||
|
*separate* threads). The correct async pipeline is **untried**, not disproven. (§5.B)
|
||||||
|
- **"More GPU priority is maxed and hits a hard preemption wall with no recourse."** Half right.
|
||||||
|
Priority *is* near-maxed (HIGH), but the "no recourse" intuition is wrong: a **higher-priority GPU
|
||||||
|
context does preempt a saturating graphics context at pixel granularity** — that is precisely how
|
||||||
|
NVIDIA VR Async-TimeWarp injects a frame into a busy game
|
||||||
|
([VRWorks Context Priority](https://developer.nvidia.com/vrworks/headset/contextpriority)). And we
|
||||||
|
default to HIGH, leaving **REALTIME unused** even though our SYSTEM service can grant it. (§5.C)
|
||||||
|
- **"Force Composed Flip / double-refresh recovers the 'capture sees half the frames' loss."** The
|
||||||
|
"half the frames" effect is **specifically a DLSS-Frame-Generation flip-metering artifact**
|
||||||
|
(FG v310.x+ / RTX 50-series), *not* a general property of independent-flip games — normal
|
||||||
|
fullscreen flip games are captured at full rate by DDA. So composed-flip is a **narrow** fix, not a
|
||||||
|
general lever. ([Apollo #676 — DDA captured a flip game at full 120 fps](https://github.com/ClassicOldSong/Apollo/issues/676),
|
||||||
|
[Sunshine #3621 — version-pinned to FG 310.x](https://github.com/LizardByte/Sunshine/issues/3621))
|
||||||
|
- **"NvFBC is a possible low-overhead capture path."** **Dead on Windows** — deprecated, frozen at
|
||||||
|
Capture SDK 7.1 / Win10-1803
|
||||||
|
([NVIDIA deprecation bulletin](https://developer.download.nvidia.com/designworks/capture-sdk/docs/NVFBC_Win10_Deprecation_Tech_Bulletin.pdf)).
|
||||||
|
Linux-only, and there only via the consumer `keylase` patch.
|
||||||
|
|
||||||
|
What the old doc got right and still holds: feeding NVENC RGB is backwards; the source/compose ceiling
|
||||||
|
is real and upstream of encode; split-encode is a pixel-rate lever not a contention lever; the
|
||||||
|
honest residual ceiling at 100% GPU. Those carry forward.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2. How the pipeline serializes today — the key insight
|
||||||
|
|
||||||
|
The capture→encode loop is a **fixed-cadence pacer** (`gamestream/stream.rs`, `punktfunk1.rs`): every
|
||||||
|
`1/target_fps` tick it grabs the freshest frame with a **non-blocking** `try_latest()`, and **if
|
||||||
|
nothing new arrived it re-encodes the held frame** (a near-empty P-frame). So the **outbound fps is
|
||||||
|
pinned at `target_fps` no matter what the source did** — which is *why the raw fps counter lies* under
|
||||||
|
contention. The only honest signal is the `uniq` / `diag_new` counter; the code itself states the
|
||||||
|
diagnostic: *"low new_fps at high send rate ⇒ the source isn't producing frames, not an encode stall."*
|
||||||
|
|
||||||
|
The NVENC round-trip (the dominant path) is **depth-1 synchronous**: `encode_picture` is a
|
||||||
|
non-blocking ASIC launch, but `lock_bitstream` **blocks the same thread** until that frame completes
|
||||||
|
(no `enableEncodeAsync`, no completion event). The only thread split is encode-vs-network-send, never
|
||||||
|
submit-vs-retrieve. So under contention the loop is strictly serial — `capture (+convert) → submit →
|
||||||
|
block in lock_bitstream → hand AU to the send thread` — and the arithmetic matches the symptom:
|
||||||
|
`1000/17 ≈ 59` and `1000/13 ≈ 77` fps bracket the observed ~50, the signature of **one frame in
|
||||||
|
flight per round-trip**, not an ASIC throughput wall.
|
||||||
|
([independent NVENC latency study: ~7 frames across all presets](https://arxiv.org/html/2511.18688v2))
|
||||||
|
|
||||||
|
Where the per-frame GPU work lands, by path (the crux of contention — **lower contended-engine load is
|
||||||
|
better**):
|
||||||
|
|
||||||
|
| Path | Colour-convert | NVENC input | Contended-engine load/frame |
|
||||||
|
|---|---|---|---|
|
||||||
|
| **IDD-push** (install default) | **NV12/P010 on the video engine** (`3514702`; FP16→P010 via shader for HDR) | NV12/P010 | low (SDR) / shader-CSC on SM (HDR) |
|
||||||
|
| **WGC** (fallback default) | `VideoProcessorBlt` → NV12 on the **video engine** | NV12/P010 | low |
|
||||||
|
| **DDA** | `VideoProcessorBlt` → NV12 on the **video engine** | NV12/P010 | medium (one 3D `CopyResource` to release the dup fast) |
|
||||||
|
| **Linux NVENC** | **none → NVENC internal RGB→YUV on the SM** (default) | RGBZ/BGRZ (NV12 only if `PUNKTFUNK_NV12` *and* `PUNKTFUNK_ZEROCOPY`) | high |
|
||||||
|
|
||||||
|
Measured magnitude of "RGB vs NV12 to the encoder":
|
||||||
|
[**RGB input ≈ video-engine 40% + 3D/CUDA 15%; NV12 input ≈ video 26% + 3D 2%**](https://hardforum.com/threads/can-someone-explain-to-me-how-nvenc-obs-work-with-nvidia-gpus-and-the-gpu-load-they-cause.2025896/).
|
||||||
|
NVENC's guide confirms the mechanism: *"Encoding of RGB contents"* is on the explicit list of
|
||||||
|
features that **internally use CUDA**
|
||||||
|
([NVENC prog-guide §Encoder Features using CUDA](https://docs.nvidia.com/video-technologies/video-codec-sdk/13.0/nvenc-video-encoder-api-prog-guide/index.html)).
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 3. Diagnose first — cheap, decisive, do before any code
|
||||||
|
|
||||||
|
Everything in §5 is gated on knowing whether you're fighting bottleneck (a) or (b). The dev VM
|
||||||
|
cannot reproduce this — run on the **RTX 4090 Windows box** (and a real NVIDIA Linux box) with an
|
||||||
|
actual saturating game.
|
||||||
|
|
||||||
|
1. **Run with `PUNKTFUNK_PERF=1` and read `uniq` vs `fps`** under CS2 at GPU-100%:
|
||||||
|
- `fps`≈target but `uniq`→40–50 ⇒ **(b) source ceiling** — the compositor/IDD only produced
|
||||||
|
40–50 unique frames. No encode/priority fix exceeds that number. Go to §5.F.
|
||||||
|
- both `fps` and `uniq`→40–50, with `encode_ms` 13–17 ⇒ **(a) feed contention** — the round-trip
|
||||||
|
is starving. Go to §5.A/B/C.
|
||||||
|
2. **Classify the game's presentation with [PresentMon](https://github.com/GameTechDev/PresentMon)** —
|
||||||
|
"Presented FPS" vs "Displayed FPS" and **Presentation Mode** (Hardware: Independent Flip vs
|
||||||
|
Composed: Flip). Independent-Flip + `uniq` ≪ Presented ⇒ source/flip problem; **Presented FPS
|
||||||
|
itself** collapsed ⇒ the game is genuinely GPU-bound and no capture trick invents the missing
|
||||||
|
frames.
|
||||||
|
3. Log `cap_us` / `enc_us` / `pace_us` p50/p99 alongside to localise the stall. (Per-stage
|
||||||
|
`cap`/`submit`/`wait` µs instrumentation landed under `PUNKTFUNK_PERF` in `3514702`.)
|
||||||
|
|
||||||
|
> **Necessary-but-not-sufficient caveat:** if the game only *rendered* 50 frames because it's
|
||||||
|
> GPU-bound, **nothing downstream creates the other 90**. Source fixes address (b) only; the
|
||||||
|
> throughput of a saturated single GPU is split between game and host no matter what.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 4. Current-state audit (what's shipped / regressed / missing)
|
||||||
|
|
||||||
|
| Area | State | Where |
|
||||||
|
|---|---|---|
|
||||||
|
| Thread priority (Win) | HIGH class + MMCSS "Games" + 1 ms timer | `session_tuning.rs` ✅ |
|
||||||
|
| Thread priority (Linux) | `setpriority` −10/−5 — **native path only; GameStream Linux threads get none** | `punktfunk1.rs:1977` ⚠ |
|
||||||
|
| GPU sched priority | `D3DKMTSetProcessSchedulingPriorityClass` **HIGH(4)** default; `realtime` opt-in, no auto-gate; cross-process onto WGC helper | `capture/windows/dxgi.rs:208-330` ⚠ |
|
||||||
|
| GPU thread/latency | `SetGPUThreadPriority(0x4000001E)`, `SetMaximumFrameLatency(1)` | `dxgi.rs:193-200` ✅ |
|
||||||
|
| CSC off-SM (Win SDR) | WGC/DDA video-engine NV12 ✅ — **IDD-push (default) now video-engine NV12** (`3514702`) ✅ | `wgc.rs:631` / `idd_push.rs` |
|
||||||
|
| CSC off-SM (Win HDR) | IDD-push HDR via FP16→P010 **shader** (on-SM); other paths on-SM unless `PUNKTFUNK_HDR_SHADER_P010` | `wgc.rs:603` ⚠ |
|
||||||
|
| CSC off-SM (Linux) | RGB→SM by default; NV12 is **double-opt-in** (`PUNKTFUNK_NV12`+`PUNKTFUNK_ZEROCOPY`) | `encode/linux/mod.rs:104` ⚠ |
|
||||||
|
| Encode pipeline | depth-1 synchronous, inline `lock_bitstream`; IDD-push native = depth-2 same-thread | `nvenc.rs:801` ⚠ |
|
||||||
|
| Split-encode | 2-way >1 Gpix/s (HEVC/AV1); disabled 10-bit (correct); proper enum | `nvenc.rs:424-447` ✅ |
|
||||||
|
| Zero-copy register-in-place | yes; IDD-push out-ring is now the convert target (NV12/P010), no extra copy | `nvenc.rs:623` ✅ |
|
||||||
|
| AMF tuning | `usage=ultralowlatency`, `preanalysis=false` | `ffmpeg_win.rs:215-219` ✅ |
|
||||||
|
| QSV tuning | `async_depth=1`, `low_power=1` (VDEnc) | `ffmpeg_win.rs:226-227` ✅ |
|
||||||
|
| Intra-refresh / infinite GOP | yes (killed the periodic-IDR freeze) | ✅ |
|
||||||
|
| encode\|send split + paced send + sendmmsg + 32 MB sockbuf | yes | `stream.rs`, `transport/qos.rs` ✅ |
|
||||||
|
| **Clock / P-state pin** | **none** (zero hits repo-wide) | ✗ |
|
||||||
|
| **Async NVENC (2-thread)** | **none** | ✗ |
|
||||||
|
| **Frame-source escape (hook/NvFBC-Linux)** | **none** | ✗ |
|
||||||
|
| **Second-GPU / iGPU encode offload** | **none** | ✗ |
|
||||||
|
| DSCP/QoS | implemented, `PUNKTFUNK_DSCP` opt-in (default off) | `transport/qos.rs` ⚠ |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 5. The levers, ranked, with honest verdicts
|
||||||
|
|
||||||
|
### A. Stop feeding NVENC RGB on the default path — **DONE for Windows IDD-push** (`3514702`)
|
||||||
|
|
||||||
|
The default Windows IDD-push path used to hand NVENC packed RGB, forcing NVENC's internal RGB→YUV CSC
|
||||||
|
onto the SM the game saturates. `3514702` makes the out-ring the convert target: a D3D11 **video-engine**
|
||||||
|
`VideoConverter` does BGRA→NV12 (SDR, BT.709 limited) in place, so NVENC gets native NV12 and skips its
|
||||||
|
SM-side CSC; HDR uses the FP16→P010 shader (NVIDIA's VideoProcessor can't do RGB→P010). NV12 input forces
|
||||||
|
`bit_depth=8`, so an HDR↔SDR toggle re-inits the session at the matching depth (NV12 can't feed a 10-bit
|
||||||
|
session). This also removed the separate `CopyResource` (the convert writes the ring directly).
|
||||||
|
|
||||||
|
**Verdict: REAL, but honestly *conditional*** — the convert has to land **off** the SM to fully pay off.
|
||||||
|
`VideoProcessorBlt` is *designed* to use fixed-function video hardware and the hardforum numbers back the
|
||||||
|
15%→2% drop, **but no NVIDIA doc explicitly confirms `VideoProcessorBlt` runs off-SM on GeForce** — treat
|
||||||
|
the "video engine" claim as well-founded-but-unverified and confirm on-box with `nvidia-smi dmon` (watch
|
||||||
|
the `enc`/`sm` columns) before and after. Do **not** convert with a CUDA/3D shader and call it done — that
|
||||||
|
just relocates the CSC to the same SM (this is why the HDR P010 *shader* path is still on-SM; Sunshine's
|
||||||
|
RGB→NV12 CUDA kernel still contends).
|
||||||
|
|
||||||
|
**Still open in §A:**
|
||||||
|
- **Linux:** make NV12 the **default** for the tiled zero-copy path (gated behind `PUNKTFUNK_NV12` *and*
|
||||||
|
`PUNKTFUNK_ZEROCOPY` today — `encode/linux/mod.rs:104`, `linux/zerocopy/egl.rs:272`), feeding NVENC
|
||||||
|
`NV_ENC_BUFFER_FORMAT_NV12`. The GL detile already runs; emitting NV12 from it replaces the swizzle at
|
||||||
|
~equal cost and deletes NVENC's CSC.
|
||||||
|
- **Windows HDR:** move the FP16→P010 convert onto the video engine where the VP supports it (today's
|
||||||
|
shader keeps it on-SM), or flip `PUNKTFUNK_HDR_SHADER_P010` on by default for the non-IDD paths.
|
||||||
|
|
||||||
|
### B. A *correct* async encode pipeline (the untried encoder lever) — **OPEN**
|
||||||
|
|
||||||
|
The NVENC Programming Guide is explicit: *"The main encoder thread should be used only to submit
|
||||||
|
work… (non-blocking `NvEncEncodePicture`). Output buffer processing — waiting on the completion
|
||||||
|
event in asynchronous mode, or calling `NvEncLockBitstream` in synchronous mode — should be done in
|
||||||
|
the **secondary thread**."*
|
||||||
|
([NVENC prog-guide, threading model](https://docs.nvidia.com/video-technologies/video-codec-sdk/13.0/nvenc-video-encoder-api-prog-guide/index.html))
|
||||||
|
We do the opposite — submit and blocking-retrieve on **one** thread. Queuing more `pending` entries
|
||||||
|
(IDD-push depth-2, or the abandoned wgc_helper experiment) adds queue latency with **no overlap**,
|
||||||
|
which is exactly the "deeper pipeline only stacks latency" result we recorded. It was the wrong
|
||||||
|
implementation, not a disproof.
|
||||||
|
|
||||||
|
The fix: **submit on the capture/encode thread; do `lock_bitstream` on a dedicated retrieve thread;
|
||||||
|
hold a deep input+output surface pool (≈4–8); on Windows register a `completionEvent` per output
|
||||||
|
buffer (`enableEncodeAsync=1`) — on Linux async events are unsupported, so use the same two-thread
|
||||||
|
split with a blocking retrieve.**
|
||||||
|
([async is Windows/WDDM-only](https://docs.nvidia.com/video-technologies/video-codec-sdk/13.0/nvenc-video-encoder-api-prog-guide/index.html);
|
||||||
|
FFmpeg models the same knob as `delay`/`async_depth`,
|
||||||
|
[libavcodec/nvenc.c](https://github.com/FFmpeg/FFmpeg/blob/master/libavcodec/nvenc.c)).
|
||||||
|
|
||||||
|
This lets the WDDM scheduler find a **backlog** when it finally grants the encoder context a slice,
|
||||||
|
and drain several frames back-to-back, while the ASIC encodes frame N as the contended engines do
|
||||||
|
frame N+1's convert.
|
||||||
|
|
||||||
|
**Verdict: REAL throughput recovery for the depth-1 collapse, latency cost +1–2 frames, ceiling-bounded.**
|
||||||
|
The honest bound (and why this is *second* to §A/§C): pipelining cannot manufacture GPU time — if the
|
||||||
|
scheduler grants the encode context only X% under load, depth only guarantees work is *ready* for
|
||||||
|
each grant; it can't raise X. That is why Sunshine's documented lever for "GPU heavily loaded" is
|
||||||
|
**priority**, not depth. So §B recovers the serialization loss; §A/§C raise the share it's bounded by.
|
||||||
|
Watch out: this **forecloses sub-frame slice output** (mutually exclusive with `enableEncodeAsync`),
|
||||||
|
and HAGS can spike the *submit* call itself
|
||||||
|
([100–200 ms `nvEncEncodePicture` stalls under HAGS](https://forums.developer.nvidia.com/t/windows-11-hardware-accelerated-gpu-scheduling-issue/286128)).
|
||||||
|
|
||||||
|
### C. Auto-gated REALTIME GPU scheduling priority — **PARTIAL** (knob exists, no auto-gate)
|
||||||
|
|
||||||
|
Raising the host process's WDDM GPU priority is **the** proven single-PC production lever — OBS and
|
||||||
|
Sunshine both set `D3DKMT_SCHEDULINGPRIORITYCLASS_REALTIME` to stop being descheduled behind
|
||||||
|
fullscreen games
|
||||||
|
([OBS commit](https://github.com/obsproject/obs-studio/commit/ec769ef008b748f7dfba211daec9eb203ea4bea0),
|
||||||
|
[Sunshine `display_base.cpp`](https://raw.githubusercontent.com/LizardByte/Sunshine/master/src/platform/windows/display_base.cpp)).
|
||||||
|
It works **independently of HAGS** (HAGS does *not* reassign cross-process priority — Microsoft:
|
||||||
|
*"Windows continues to control prioritization"*
|
||||||
|
[DirectX devblog](https://devblogs.microsoft.com/directx/hardware-accelerated-gpu-scheduling/)).
|
||||||
|
|
||||||
|
We ship only **HIGH(4)** by default with a static `realtime` opt-in (`PUNKTFUNK_GPU_PRIORITY_CLASS`,
|
||||||
|
`dxgi.rs:208-330`) and **no auto-gate**. Two things to change:
|
||||||
|
|
||||||
|
- **We can actually grant REALTIME.** It needs `SeIncreaseBasePriorityPrivilege`, which an unelevated
|
||||||
|
app lacks (OBS logs the failure) — **but our host runs as a `LocalSystem` service, which holds it.**
|
||||||
|
The lever is available to us specifically.
|
||||||
|
- **Gate it to dodge the freeze.** REALTIME + NVIDIA + HAGS-on + near-full-VRAM is a **documented
|
||||||
|
NVENC hang** (Sunshine ships `nvenc_realtime_hags` to downgrade to HIGH for exactly this;
|
||||||
|
[Sunshine config](https://docs.lizardbyte.dev/projects/sunshine/latest/md_docs_2configuration.html),
|
||||||
|
[NVIDIA repro](https://forums.developer.nvidia.com/t/bug-report-nvenc-encoder-hangs-on-windows-when-using-d3d11-in-real-time-mode/357466)).
|
||||||
|
Implement the old plan's "Tier 3B": probe HAGS via `D3DKMTQueryAdapterInfo` and VRAM headroom via
|
||||||
|
`IDXGIAdapter3::QueryVideoMemoryInfo` (continuously); use REALTIME only when HAGS-off, or HAGS-on
|
||||||
|
with comfortable VRAM headroom; downgrade to HIGH the instant VRAM tightens.
|
||||||
|
|
||||||
|
**Verdict: REAL — the genuine ceiling-raiser — but it is the no-free-lunch lever.** Priority is how
|
||||||
|
the host *takes* GPU time from the game; it measurably **costs the game fps**
|
||||||
|
([Doom Eternal 121→60 with Sunshine running](https://github.com/LizardByte/Sunshine/issues/3703)).
|
||||||
|
That's acceptable for a streaming host (the remote view is the product), but say so plainly and make
|
||||||
|
the class operator-configurable (we already expose `PUNKTFUNK_GPU_PRIORITY_CLASS`).
|
||||||
|
|
||||||
|
### D. Multi-vendor encoder hygiene (AMF/QSV) — **stable / mostly done, one caveat**
|
||||||
|
|
||||||
|
Our `*_amf`/`*_qsv` libavcodec config already follows the research's advice: AMF
|
||||||
|
`usage=ultralowlatency` + `preanalysis=false` (`ffmpeg_win.rs:215`), QSV `async_depth=1` +
|
||||||
|
`low_power=1` VDEnc path (`:226`). Keep them. Two notes:
|
||||||
|
|
||||||
|
- **AMF/QSV suffer contention *worse* than NVENC.** OBS: *"For Intel and AMD GPUs, the hardware
|
||||||
|
encoder requires significant resources of the same type a 3D app/game requires… different from
|
||||||
|
NVIDIA's NVENC, which has dedicated encoding circuits"*
|
||||||
|
([OBS KB](https://obsproject.com/forum/threads/how-to-debug-encoding-overloaded.168625/)). So on an
|
||||||
|
AMD/Intel host the collapse is *expected to be harder* — and §G (iGPU offload) is even more
|
||||||
|
attractive there.
|
||||||
|
- **The AMF busy-poll floor** (a fixed-sleep `QueryOutput` poll imposes ~15 ms via timer
|
||||||
|
granularity) is fixed in FFmpeg's amf wrapper (Cameron Gutman's `QUERY_TIMEOUT` patch); since we
|
||||||
|
go through libavcodec we inherit it — just **confirm the pinned FFmpeg build includes it**.
|
||||||
|
([ffmpeg-devel](https://www.mail-archive.com/ffmpeg-devel@ffmpeg.org/msg170489.html))
|
||||||
|
|
||||||
|
**Verdict: REAL but largely already captured.** No big win left here except via §G.
|
||||||
|
|
||||||
|
### E. Lock clocks / pin P-state — cheap jitter fix, not a collapse fix — **OPEN**
|
||||||
|
|
||||||
|
NVIDIA's adaptive clocking downclocks between our small bursty frames and pays a ramp tax every
|
||||||
|
frame — most visible in the *light* scene (the "200-not-240"). Pin it:
|
||||||
|
|
||||||
|
- **Windows:** NvAPI per-application DRS `PREFERRED_PSTATE = PREFER_MAX` scoped to our exe (this is
|
||||||
|
exactly Sunshine's `nvenc_latency_over_power`,
|
||||||
|
[Sunshine nvprefs](https://github.com/LizardByte/Sunshine/blob/master/src/platform/windows/nvprefs/driver_settings.cpp)).
|
||||||
|
**Crash-safe undo is mandatory** — persist an undo record to `%ProgramData%\punktfunk\` *before*
|
||||||
|
applying, revert a stale profile on next start, so a crash never leaves the user's control panel
|
||||||
|
modified.
|
||||||
|
- **Linux:** `nvidia-smi -lgc`/NVML `nvmlDeviceSetGpuLockedClocks` (needs root/`CAP_SYS_ADMIN`; query
|
||||||
|
`nvmlDeviceGetMaxClockInfo`, lock to that, restore on teardown *and* SIGTERM). Plus the newly-added
|
||||||
|
`CudaNoStablePerfLimit` driver profile — *new in R580/595, so usable on the 595 box* — to defeat
|
||||||
|
the CUDA "Force P2" memory-clock clamp.
|
||||||
|
- Gate behind `PUNKTFUNK_PIN_CLOCKS`; **default off on battery / Steam Deck** (pinning is harmful
|
||||||
|
there).
|
||||||
|
|
||||||
|
**Verdict: REAL for latency *stability*, marginal for the saturated collapse** (at 100% util the game
|
||||||
|
already pins P0). Cheap, low risk, do it for the light-scene win.
|
||||||
|
|
||||||
|
### F. Escape the frame-source ceiling — only if §3 says (b) — **OPEN**
|
||||||
|
|
||||||
|
If `uniq` is the wall, no encoder/priority work helps — you need a better frame source.
|
||||||
|
|
||||||
|
- **Swapchain-hook capture (the real fix).** Inject a hook on `IDXGISwapChain::Present`/`Present1`,
|
||||||
|
`vkQueuePresentKHR`, `wglSwapBuffers` and copy the backbuffer to a shared texture *before* the
|
||||||
|
compositor — OBS Game Capture's mechanism. Sees **every presented frame**, no compose/refresh
|
||||||
|
gating.
|
||||||
|
([OBS dxgi-capture](https://github.com/obsproject/obs-studio/blob/master/plugins/win-capture/graphics-hook/dxgi-capture.cpp))
|
||||||
|
**Tradeoffs are serious:** anti-cheat (EAC/BattlEye/Vanguard) flags injection — needs
|
||||||
|
whitelisting/compat handling; per-graphics-API hooks; fragility across game updates. Scope it as an
|
||||||
|
opt-in "game capture" mode, not the default.
|
||||||
|
- **NvFBC:** **not an option on Windows** (dead, §1). On **Linux** it's viable via the consumer
|
||||||
|
keylase patch and captures below composition — worth a flag for the Linux NVIDIA host.
|
||||||
|
- **Compose-flip (narrow):** the topmost 1×1 layered-window trick (we already have
|
||||||
|
`composed_flip.rs`) forces DWM composition and fixes specifically the **DLSS-Frame-Gen** half-rate
|
||||||
|
case. Adds host-display latency; don't enable globally.
|
||||||
|
- **WGC "deliver 2× rate":** Apollo sets `MinUpdateInterval = 1e7/(fps*2)` so the pacer always has a
|
||||||
|
fresh frame to pick ([Apollo](https://github.com/ClassicOldSong/Apollo/pull/785)); we set it to 1×
|
||||||
|
refresh (`wgc.rs:310`). Cheap tweak to try on the WGC path.
|
||||||
|
|
||||||
|
**Verdict: swapchain-hook is REAL and the only general escape; the rest are narrow.** None invents
|
||||||
|
frames the game didn't render.
|
||||||
|
|
||||||
|
### G. The honest endgame — encode on a second GPU / the iGPU — **OPEN**
|
||||||
|
|
||||||
|
For *demanding* titles that saturate the GPU even when capped, the only thing that **removes**
|
||||||
|
contention rather than re-prioritizing it is to run the capture→convert→encode pipeline on a
|
||||||
|
**different** GPU — a second dGPU or, more realistically, the **iGPU** (Intel QuickSync / AMD VCN),
|
||||||
|
which most desktops already have. Render on the gaming GPU, copy the frame across the adapter once,
|
||||||
|
encode on the iGPU's independent media engine. This is the textbook "stream on a separate encoder"
|
||||||
|
play, and the OBS "second GPU is harmful" verdict does **not** apply — that verdict is about moving
|
||||||
|
*only the NVENC block*; moving capture + CSC + copies off the gaming GPU genuinely frees it.
|
||||||
|
([OBS forum](https://obsproject.com/forum/threads/can-you-use-a-2nd-gpu-to-eliminate-encoder-overload.149644/))
|
||||||
|
|
||||||
|
We're unusually well-placed for this: we already have working AMF and QSV backends
|
||||||
|
(`encode/windows/ffmpeg_win.rs`) and the Linux VAAPI backend. The missing piece is a capture/topology
|
||||||
|
mode that pins capture to the gaming adapter and the encoder to the iGPU adapter, with one
|
||||||
|
cross-adapter shared-texture copy. Cost: that copy still shares VRAM bandwidth, so it's not free, but
|
||||||
|
it's the only path that lets a demanding game and a clean stream coexist on one machine.
|
||||||
|
|
||||||
|
**Verdict: REAL — the cleanest isolation, and the right answer to "even capped it collapses."**
|
||||||
|
Datacenter stacks (GeForce NOW, Stadia) "solve" this by one dedicated GPU + encoder per session;
|
||||||
|
the consumer analogue is the iGPU.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 6. Recommended order of attack
|
||||||
|
|
||||||
|
1. **§3 Diagnose** on the RTX box + a real game. Settles (a) vs (b). *(half a day, decisive)*
|
||||||
|
2. **§5.A NV12/P010 on the default paths** — IDD-push **DONE** (`3514702`); remaining: Linux NV12
|
||||||
|
default-on, Windows HDR P010 off-SM. Confirm off-SM with `nvidia-smi dmon`.
|
||||||
|
3. **§5.C Auto-gated REALTIME** priority (HAGS + VRAM gate). Cheap, big, we can uniquely grant it.
|
||||||
|
4. **§5.E Clock pin** both OSes (crash-safe undo). Cheap light-scene win.
|
||||||
|
5. **§5.B Correct two-thread async pipeline.** Structural; recovers the depth-1 serialization.
|
||||||
|
6. **§3-gated §5.F** source escape (swapchain hook) — only if `uniq` is the wall.
|
||||||
|
7. **§5.G iGPU encode offload** — the strategic answer for demanding titles; larger build.
|
||||||
|
|
||||||
|
After 2–5 the light-scene gap closes and the saturated floor rises materially. But report the
|
||||||
|
honest ceiling: **on one saturated GPU the game and the host split a fixed pie** — coarse WDDM
|
||||||
|
graphics preemption caps how much priority can claw back, and a genuinely GPU-bound game that only
|
||||||
|
*rendered* 50 frames cannot also yield 140 unique frames to capture. The only escapes from that pie
|
||||||
|
are reducing the game's demand (cap — rejected), taking a bigger slice (priority — costs game fps),
|
||||||
|
or a second slice of silicon (§G). Don't chase the rest with encoder micro-optimisation.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 7. Placebos & dead ends (so we don't re-propose them)
|
||||||
|
|
||||||
|
| Candidate | Verdict | Why |
|
||||||
|
|---|---|---|
|
||||||
|
| **NVIDIA Reflex / Ultra-Low-Latency / max-pre-rendered-frames** as a "non-capping yield" | ✗ placebo | Shrinks the *game's* render queue but the game still demands ~99% GPU → frees ≈0 SM headroom. Reflex needs in-game SDK (host can't force it); ULLM is host-forceable only on DX11/DX9 (DX12 since driver 551.23) and is NVIDIA's weaker mechanism. Only honest effect: µs of tail-jitter smoothing. ([Battle(non)sense LDAT data](https://forums.guru3d.com/threads/battle-non-sense-youtuber-claims-low-latency-mode-only-helps-when-gpu-load-is-99.429074/)) |
|
||||||
|
| **HAGS on, as a contention fix** | ✗ neutral→harmful | Doesn't reassign cross-process priority (Microsoft); OBS reports it *causes* NVENC latency spikes; it's the freeze-hazard variable. Needed only to enable the VK/D3D12 realtime *queue*. ([OBS KB](https://obsproject.com/kb/hags)) |
|
||||||
|
| **Split-frame encode (2/3/4-way) to fix contention** | ✗ (pixel-rate only) | Parallelizes the ASIC, not the contended copy/CSC; measured **zero** latency change at 4K. Correct use = raise the single-session pixel ceiling (5K@240). `splitEncodeMode=15` is the legit *disable* sentinel, not a bug. ([SDK header](https://raw.githubusercontent.com/FFmpeg/nv-codec-headers/master/include/ffnvcodec/nvEncodeAPI.h)) |
|
||||||
|
| **Move the encoded-bitstream readback to a copy engine** | ✗ placebo | Output is KB-scale; the cost of `lock_bitstream` is the completion *wait*, not copy bandwidth. (The *input* full-frame copy is the real one — but D3D11 can't target the copy engine; zero-copy already avoids it.) |
|
||||||
|
| **CUDA stream priority / `CUDA_DEVICE_MAX_CONNECTIONS` / `CU_CTX_SCHED_*`** | ✗ placebo cross-process | Intra-context only; the game is a *separate* context. Stream priority "will not preempt already executing work". ([CUDA docs](https://docs.nvidia.com/cuda/cuda-programming-guide/02-basics/asynchronous-execution.html)) |
|
||||||
|
| **VK/EGL global-priority REALTIME on Linux NVIDIA** | ✗ | Not reliably granted on the proprietary driver, and moot anyway — our Linux NVENC is driven via CUDA/NVENC-SDK, not a Vulkan queue. |
|
||||||
|
| **Windows "High performance" GPU preference** | ✗ single-GPU placebo | Only selects an adapter; real only to split work across adapters (→ that's §G). |
|
||||||
|
| **MIG / MPS / vGPU** | ✗ N/A | MIG/vGPU are datacenter/pro + hypervisor/license; MPS is Linux-CUDA-only with no graphics notion. None apply to a consumer GPU. |
|
||||||
|
| **NvFBC on Windows** | ✗ dead | Deprecated, frozen at Capture SDK 7.1 / Win10-1803. |
|
||||||
|
| **Frame Generation / Smooth Motion** to "make more frames" | ✗ red herring | We stream *rendered* frames; FG adds optical-flow/tensor + present load to the same GPU → amplifies contention. |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 8. Open items / what's left
|
||||||
|
|
||||||
|
Diagnostics + still-unbuilt levers (verbatim, highest leverage first):
|
||||||
|
|
||||||
|
- **§3 automation** — instrument the `uniq`-vs-`fps` heuristic + a PresentMon probe so (a)/(b) is
|
||||||
|
decided automatically, not by hand. (Per-stage `cap`/`submit`/`wait` µs already land under
|
||||||
|
`PUNKTFUNK_PERF` from `3514702`; the uniq/PresentMon classifier is not yet automated.)
|
||||||
|
- **§5.A residual** — Linux NV12 default-on for the tiled zero-copy path (drop the
|
||||||
|
`PUNKTFUNK_NV12`+`PUNKTFUNK_ZEROCOPY` double-opt-in); move the Windows HDR FP16→P010 convert off the
|
||||||
|
SM (today it's a shader). Windows IDD-push SDR/HDR NV12/P010 is **DONE** (`3514702`).
|
||||||
|
- **§5.B** — build a *correct* async NVENC pipeline: submit on one thread, blocking-`lock_bitstream`
|
||||||
|
on a dedicated retrieve thread, deep input+output surface pool (≈4–8), Windows per-buffer
|
||||||
|
`completionEvent` (`enableEncodeAsync=1`), same two-thread split on Linux.
|
||||||
|
- **§5.C** — auto-gate REALTIME GPU priority: probe HAGS (`D3DKMTQueryAdapterInfo`) + VRAM headroom
|
||||||
|
(`IDXGIAdapter3::QueryVideoMemoryInfo`) continuously; REALTIME only when HAGS-off or HAGS-on with
|
||||||
|
comfortable headroom, downgrade to HIGH the instant VRAM tightens. (Static `realtime` opt-in exists
|
||||||
|
in `dxgi.rs`; no auto-gate.)
|
||||||
|
- **§5.E** — clock / P-state pinning: Windows NvAPI DRS `PREFERRED_PSTATE=PREFER_MAX` (crash-safe undo
|
||||||
|
to `%ProgramData%\punktfunk\`); Linux `nvidia-smi -lgc` / `nvmlDeviceSetGpuLockedClocks` (+
|
||||||
|
`CudaNoStablePerfLimit` on R580/595). Gate `PUNKTFUNK_PIN_CLOCKS`, default off on battery/Deck.
|
||||||
|
- **§5.F** — frame-source escape (only if §3 says (b)): swapchain-hook capture (OBS-style, anti-cheat
|
||||||
|
tradeoffs); NvFBC on Linux (keylase patch); compose-flip for the DLSS-FG half-rate case; WGC
|
||||||
|
`MinUpdateInterval = 1e7/(fps*2)` 2×-rate tweak.
|
||||||
|
- **§5.G** — iGPU / second-GPU encode offload: pin capture to the gaming adapter, encoder to the iGPU
|
||||||
|
adapter, one cross-adapter shared-texture copy. Reuses the AMF/QSV/VAAPI backends.
|
||||||
|
|
||||||
|
### Open evidence gaps (verify on-box)
|
||||||
|
|
||||||
|
- Whether `ID3D11VideoProcessor::VideoProcessorBlt` (BGRA→NV12) runs **off the SM on GeForce** is not
|
||||||
|
confirmed by any NVIDIA document — it's the linchpin of §5.A's full payoff. **Verify on-box** with
|
||||||
|
`nvidia-smi dmon` (sm% vs enc%) on the IDD-push/WGC path before assuming the win landed.
|
||||||
|
- The exact share of the 13–17 ms `encode_ms` that is *convert-on-SM* vs *scheduling-wait* is
|
||||||
|
unmeasured. §3 + an A/B of IDD-push-RGB (pre-`3514702`) vs IDD-push-NV12 on the same scene settles it
|
||||||
|
and tells you whether §5.A alone is enough or whether §5.C is doing the heavy lifting.
|
||||||
|
- AMD VCN "degrades worse under contention" is practitioner-consensus + architecture, not an AMD
|
||||||
|
whitepaper; treat the *direction* as solid, the magnitude as TBD.
|
||||||
@@ -0,0 +1,132 @@
|
|||||||
|
# HDR pipeline — investigation & implementation plan
|
||||||
|
|
||||||
|
> **Status:** Steps 0–3 SHIPPED — protocol/ABI/host in `3526517`, client apply + display
|
||||||
|
> capability-gate in `551012b`. Windows HDR live-validated; Apple/Android CI-compiled (on-glass
|
||||||
|
> pending). Step 4 (Linux) is OPEN, blocked upstream on capture. This doc is trimmed to design
|
||||||
|
> rationale + open items; the shipped code is the source of truth. The original audit (full gap
|
||||||
|
> list, per-file line refs, blocker walkthroughs) is in git history before this trim.
|
||||||
|
|
||||||
|
Goal: **true, correct HDR glass-to-glass** for punktfunk, across the host (Windows today; Linux
|
||||||
|
blocked upstream) and every client (Windows / Apple / Android / Linux). The plan was produced from a
|
||||||
|
full read of every HDR-touching subsystem cross-checked against the HDR10 standards (CICP/H.273 VUI,
|
||||||
|
SMPTE ST.2086 mastering, CEA-861.3 MaxCLL/MaxFALL) and the Sunshine/Apollo/Moonlight reference.
|
||||||
|
|
||||||
|
The original diagnosis: pixel math + the HEVC VUI we emitted were already correct (self-test
|
||||||
|
validated, matches Apollo), but nothing **measured, signalled, transported, or applied the static HDR
|
||||||
|
metadata** (mastering display colour volume + content light level). The fix was a metadata chain,
|
||||||
|
protocol-first.
|
||||||
|
|
||||||
|
## What shipped (Steps 0–3)
|
||||||
|
|
||||||
|
- **Step 0 — protocol + ABI carry colour end to end (`3526517`).** `ColorInfo` (4 CICP bytes on
|
||||||
|
`Welcome`) + `HdrMeta` (`0xCE` datagram, bounds-checked); `NativeClient` `color`/`bit_depth` fields
|
||||||
|
+ an `HdrMeta` receiver/demux + `next_hdr_meta`. C ABI: `punktfunk_connect_ex5(... video_caps)`,
|
||||||
|
`next_hdr_meta`, `color_info`, and **fixed `abi.rs:896` `video_caps = 0`** — the one-line root cause
|
||||||
|
that had made Apple's complete (and correct) HDR pipeline dead code. Header regenerated. No
|
||||||
|
rendering changes, CI-testable (round-trip + truncation + SDR back-compat).
|
||||||
|
- **Step 1 — host in-band SEI + complete VUI (`3526517`, live-validated on the RTX box).**
|
||||||
|
Cross-platform byte logic in unit-tested `src/hdr.rs`: `hdr_meta_from_display`,
|
||||||
|
`hevc_mastering_display_sei` SEI **type 137**, `hevc_content_light_level_sei` SEI **type 144**
|
||||||
|
(note: NOT "type 4" — that was a drafting error). Windows `dxgi.rs`/`wgc.rs` read
|
||||||
|
`IDXGIOutput6::GetDesc1` at capture init / output change → `HdrMeta` (MaxCLL/MaxFALL left 0, like
|
||||||
|
Apollo); `nvenc.rs` attaches mastering + CLL SEI on every IDR for HEVC/H.264 and sends the real
|
||||||
|
`0xCE` re-sent each keyframe. In-band SEI is read directly by decoders, so this fixed correctness
|
||||||
|
before clients consumed the protocol and gave an Apollo on-glass parity gate. *Follow-ups:* AV1
|
||||||
|
mastering rides METADATA OBUs (`HDR_MDCV`/`HDR_CLL`), not SEI; the Windows secure-desktop relay
|
||||||
|
still sends only the generic baseline `0xCE` (the helper's in-band SEI carries the real grade).
|
||||||
|
- **Step 2 — clients apply the metadata (`551012b`; Apple/Android CI-compiled).** Each client drains
|
||||||
|
`next_hdr_meta`/`nextHdrMeta` and remaps from the wire form (ST.2086 **G,B,R** order, mastering
|
||||||
|
luminance in 0.0001 cd/m²) to the platform layout: **Windows** `SetHDRMetaData`
|
||||||
|
(`hdr_meta_to_dxgi`: G,B,R→R,G,B reorder, 0.0001-nit→nit), dropping the 1000/1000/400 hardcode;
|
||||||
|
**Apple** `CVBufferSetAttachment` of `kCVImageBufferMasteringDisplayColorVolumeKey` (24-byte BE) +
|
||||||
|
`kCVImageBufferContentLightLevelInfoKey` (4-byte BE) per HDR pixel buffer — the correct path for the
|
||||||
|
`itur_2100_PQ` layer (`CAEDRMetadata` on a PQ layer is ambiguous, deliberately avoided); **Android**
|
||||||
|
`MediaFormat` `KEY_HDR_STATIC_INFO`, a 25-byte CTA-861.3 Type-1 blob (LE, **R,G,B** order, max-lum
|
||||||
|
in **nits-u16**). Apple's connect also flips to `connect_ex5` advertising `videoCap10Bit|videoCapHDR`
|
||||||
|
— the fix that resurrects Apple's previously-dead pipeline.
|
||||||
|
- **Step 3 — display-capability gate (`551012b`).** **Chosen approach: capability-gate, not an
|
||||||
|
in-shader BT.2390 tone-map.** Rationale: with Steps 1–2 the host sends *correct* mastering metadata
|
||||||
|
so an HDR display self-tone-maps; the remaining gap is SDR displays, best fixed by **not advertising
|
||||||
|
HDR you can't present** — the host then sends a proper BT.709 SDR stream instead of PQ the panel
|
||||||
|
mis-tone-maps (washed-out/dark). No guessed curve, deterministic. **Windows**
|
||||||
|
`display_supports_hdr` (any `IDXGIOutput6` colour space == `G2084`); **Apple**
|
||||||
|
`NSScreen.maximumExtendedDynamicRangeColorComponentValue > 1` (macOS) /
|
||||||
|
`UIScreen.main.potentialEDRHeadroom > 1` (iOS); **Android** `Display.getHdrCapabilities`
|
||||||
|
HDR10/HDR10+. Each ANDs with the user's HDR setting before advertising caps and logs when it drops
|
||||||
|
to SDR.
|
||||||
|
|
||||||
|
### Wire format — design decisions worth keeping
|
||||||
|
|
||||||
|
Two layers, both back-compat-safe via the established trailing-bytes / new-datagram-tag patterns.
|
||||||
|
|
||||||
|
- **(A) Per-session colorimetry** — 4 trailing bytes on `Welcome` (offsets 60..64):
|
||||||
|
`colour_primaries` (1=BT.709, 9=BT.2020) · `transfer_characteristics` (1=BT.709, 16=PQ/SMPTE2084,
|
||||||
|
18=HLG) · `matrix_coeffs` (1=BT.709, 9=BT.2020-NCL — **never emit 10 (CL): no client decodes it**) ·
|
||||||
|
`video_full_range_flag`. Decoded with `b.get(60).unwrap_or(1)` so an older host that omits them →
|
||||||
|
BT.709 limited SDR (today's behaviour). A future mirror on `Reconfigured` announces a mid-stream
|
||||||
|
SDR↔HDR / BT.709↔BT.2020 flip (deferred; today a mode switch never changes colour, and `0xCE`
|
||||||
|
re-send covers mastering changes).
|
||||||
|
- **(B) Per-change mastering + CLL** — host→client datagram tag **`0xCE`**, 28 bytes, standard SEI
|
||||||
|
fixed-point (display primaries G,B,R + white point in 1/50000 units; max/min mastering luminance in
|
||||||
|
0.0001 cd/m²; MaxCLL/MaxFALL in nits). ST.2086 is variable, so it rides a datagram rather than the
|
||||||
|
Welcome. **Re-sent on every IDR/RFI keyframe** so a client that dropped the best-effort datagram
|
||||||
|
converges within a GOP; until first receipt the client uses the Welcome transfer + a documented
|
||||||
|
generic default. **Bounds-check length before reading** (reassembler-bounds security invariant —
|
||||||
|
truncation test required). **Omitted entirely for HLG.** Units map straight to DXGI
|
||||||
|
`DXGI_HDR_METADATA_HDR10`, Android `KEY_HDR_STATIC_INFO`, Apple `CAEDRMetadata.hdr10`; the
|
||||||
|
libavcodec/Linux side needs conversion — `AVMasteringDisplayMetadata` stores `AVRational`, not raw
|
||||||
|
fixed-point.
|
||||||
|
|
||||||
|
## Out of scope (accepted — call out, don't build)
|
||||||
|
|
||||||
|
- **Dynamic metadata:** HDR10+ (ST.2094-40) and Dolby Vision RPU. We handle *static* ST.2086 only,
|
||||||
|
with mid-stream changes carried by re-sending the static block.
|
||||||
|
- **HLG:** the transfer enum carries `18` from day one (free), but the `0xCE` mastering datagram is
|
||||||
|
omitted for HLG (scene-referred, no mastering metadata).
|
||||||
|
|
||||||
|
## Step 4 — Linux (last; capture blocked upstream) — OPEN
|
||||||
|
|
||||||
|
- **(4a) 8-bit→Main10 NVENC upconvert shim** (`encode/linux.rs`) — Main10 transport with correct
|
||||||
|
VUI/SEI without HDR capture (gated so we don't claim HDR transfer on SDR content).
|
||||||
|
- **Linux encode colour + side-data (the deferred Step 1c):** set
|
||||||
|
`color_primaries/trc/colorspace/range` from the negotiated `ColorInfo` and attach
|
||||||
|
`AV_FRAME_DATA_MASTERING_DISPLAY_METADATA` / `CONTENT_LIGHT_LEVEL` side-data (with the `AVRational`
|
||||||
|
conversion) in `encode/linux.rs` + `vaapi.rs` — only once the encoder actually produces 10-bit, so
|
||||||
|
the signalling matches the bits. (Linux capture is 8-bit only, so signalling BT.2020 PQ + attaching
|
||||||
|
mastering side-data on a downconverted 8-bit stream would be *incorrect* — hence deferred out of
|
||||||
|
Step 1.)
|
||||||
|
- **(4b) True 10-bit capture:** offer `ABGR2101010`/`P010` PipeWire formats + read colorimetry; pilot
|
||||||
|
on Sway/wlroots; **blocked on gamescope #2126** (portals don't wire PipeWire 1.6 BT.2020/PQ).
|
||||||
|
**Don't block the rest of the plan on it.**
|
||||||
|
- **(4c) Linux client:** `ex5` caps, P010 decode, GdkDmabufTexture CICP from Welcome,
|
||||||
|
`wp_color_management` when GTK ≥ 4.14. (Also a standalone SDR bug: software path applies BT.601 to
|
||||||
|
BT.709 — needs a BT.601→BT.709 sws + texture `color_state`.)
|
||||||
|
|
||||||
|
## Deferred validation (need on-glass / the RTX box)
|
||||||
|
|
||||||
|
- The mid-session `Reconfigure` "downgrade to SDR" for a monitor move HDR↔SDR.
|
||||||
|
- Confirm the **host produces SDR for an SDR client even off an HDR desktop** — on the native path the
|
||||||
|
per-session SudoVDA follows the negotiated depth (SDR client → SDR virtual display → SDR stream), so
|
||||||
|
it should hold end to end; verify the stale-HDR-SudoVDA edge case.
|
||||||
|
|
||||||
|
## Open questions
|
||||||
|
|
||||||
|
- **MaxCLL source:** `GetDesc1` doesn't expose it (Apollo zeroes). Static default, or measure
|
||||||
|
per-frame peak in the PQ shader (only truly-correct, adds a readback)?
|
||||||
|
- **GameStream:** implement `SS_HDR_METADATA` (Moonlight `SS_HDR_METADATA` blob on the ENet control
|
||||||
|
channel) for parity, or keep it deliberately SDR and steer HDR users to punktfunk/1?
|
||||||
|
- **HLG:** carry the enum from day one (free) — but do any sources actually produce HLG?
|
||||||
|
- **Linux:** is shipping the 8-bit→Main10 shim as "HDR-capable transport" acceptable, or does it risk
|
||||||
|
advertising HDR we can't truly deliver?
|
||||||
|
|
||||||
|
## Ordering rationale
|
||||||
|
|
||||||
|
Step 0 first: it's the keystone (metadata transport is the dominant cross-cutter; the ABI
|
||||||
|
`video_caps = 0` line is a one-line root cause) and needs no hardware. Step 1 next: in-band SEI is
|
||||||
|
read directly by decoders, so it fixes correctness even before our clients consume the protocol, and
|
||||||
|
gives an Apollo-parity on-glass gate. Steps 2–3 are mechanical per-client wiring once metadata flows.
|
||||||
|
Linux is last because capture is gated on upstream we don't control; the shim delivers Main10
|
||||||
|
transport without that dependency.
|
||||||
|
|
||||||
|
Hardware dependencies: Step 0 = none (CI); Step 1 = RTX Windows host; Steps 2–3 = a real HDR display
|
||||||
|
per platform; Step 4 = a Linux GPU box + HDR-capable Wayland compositor.
|
||||||
@@ -0,0 +1,228 @@
|
|||||||
|
# Host latency & the GPU-contention collapse — analysis + prioritized plan
|
||||||
|
|
||||||
|
> **Status:** PARTLY SHIPPED. Tier 2A (Linux NV12 convert) = `1fc6f73`; Tier 2B (Linux
|
||||||
|
> scheduling) + Tier 3A (Windows session tuning) = `112a054`. Tiers 1A, 1B, 3B, 3C, 3D, 4 are
|
||||||
|
> still open. This doc is trimmed to design rationale + open items; the shipped code is the
|
||||||
|
> source of truth for the landed tiers.
|
||||||
|
|
||||||
|
> **⚠ Partially superseded (2026-06-25) by [`gpu-contention-investigation.md`](gpu-contention-investigation.md).**
|
||||||
|
> That follow-up re-verified this plan against the current code and overturned several specifics:
|
||||||
|
> the default Windows path (IDD-push) now feeds NVENC **RGB** (regressing the §0A "Windows does it
|
||||||
|
> right" claim); `PUNKTFUNK_ENCODE_DEPTH` never existed (phantom knob); the "async NVENC stacks
|
||||||
|
> latency" result was a *same-thread* implementation, not a disproof of a correct two-thread pipeline;
|
||||||
|
> "capture sees half the frames" is DLSS-Frame-Gen-specific, not general; and NvFBC is dead on
|
||||||
|
> Windows. **For current action prioritization see `gpu-contention-investigation.md`.** The
|
||||||
|
> tiers/dropped-placebo analysis below remain a useful record.
|
||||||
|
|
||||||
|
Scope: Windows + Linux GameStream/punktfunk1 hosts. Priority: **latency**, and specifically the
|
||||||
|
"saturating game starves the stream" headache:
|
||||||
|
|
||||||
|
> CS2 runs 400+ fps. Client requests 240. In an easy scene the client gets ~200; in a demanding
|
||||||
|
> (GPU-100%) scene it collapses to 40-50. Capping the game is **not** an acceptable fix.
|
||||||
|
|
||||||
|
This doc is the synthesis of a multi-agent investigation (deep read of our pipeline + the
|
||||||
|
[Apollo comparison](apollo-comparison.md) + external NVIDIA/streaming research) followed by an
|
||||||
|
**adversarial verification pass** — every candidate fix was attacked, against our actual code, to
|
||||||
|
separate real levers from placebo. The "Dropped / why" section exists so we don't re-propose the
|
||||||
|
placebos.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Mental model (§0A–0C) — see the follow-up
|
||||||
|
|
||||||
|
The original three-correction mental model (A: feeding NVENC RGB is backwards; B: GPU priority is
|
||||||
|
maxed on Windows and hits a preemption-granularity ceiling; C: a chunk of the collapse is upstream
|
||||||
|
of the encoder at the compositor compose-rate, with Independent/Direct Flip bypassing DWM) is
|
||||||
|
**partly corrected by `gpu-contention-investigation.md` §1** — notably that the default Windows
|
||||||
|
IDD-push path now feeds NVENC RGB (so §0A's "Windows already does the right thing" no longer holds),
|
||||||
|
and "capture sees half the frames" is DLSS-Frame-Gen-specific rather than general. Read the
|
||||||
|
follow-up doc for the corrected model. The durable takeaways still stand: **do less work on the
|
||||||
|
contended graphics/3D engine**, **overlap the unavoidable per-frame scheduling wait across frames**,
|
||||||
|
and **measure source-vs-pipeline before blaming encode**.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Tier 0 — Diagnose first (cheap, decisive, do before writing code)
|
||||||
|
|
||||||
|
Everything below is gated on knowing *which* bucket the collapse is in. We already have the tooling.
|
||||||
|
|
||||||
|
1. **Run the workload with `PUNKTFUNK_PERF=1` and read `uniq` vs `fps`.** The `uniq` counter
|
||||||
|
(genuinely-new captured frames vs re-encoded holds) already exists
|
||||||
|
(`gamestream/stream.rs:332-336,403`; `wgc_helper.rs:122-183`). Under CS2 at GPU-100%:
|
||||||
|
- **`fps`≈240 but `uniq`→40-50** ⇒ the *source/compositor* only produced 40-50 unique frames.
|
||||||
|
No encode/priority/cadence fix on our side exceeds that — it is the game's effective
|
||||||
|
present-to-compose rate at 100% GPU. The lever there is **reducing our own per-frame GPU
|
||||||
|
steal** (Tier 2) so the game keeps more headroom, plus the cadence work (Tier 1A).
|
||||||
|
- **both `fps` and `uniq`→40-50** ⇒ our capture→convert→encode round-trip is being starved (the
|
||||||
|
`lock_bitstream` scheduling stall). The Tier 1/2 contention levers apply directly.
|
||||||
|
2. **Confirm the game's flip mode on Windows.** If the game is on Independent/Direct Flip (MPO),
|
||||||
|
capture is bypassing DWM and seeing half the frames. We already have `capture/composed_flip.rs`
|
||||||
|
— verify ForceComposedFlip is actually engaged on the game path, and watch `cap_us`.
|
||||||
|
3. Capture `cap_us` / `enc_us` / `pace_us` p50/p99 alongside, to localise the stall.
|
||||||
|
|
||||||
|
Run this on the real-GPU boxes (RTX 4090 Windows host; a Linux NVIDIA box with a real game). This
|
||||||
|
headless dev VM cannot reproduce the contention.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Tier 1 — The two under-weighted, cross-platform levers (OPEN — confirmed by research, not yet done)
|
||||||
|
|
||||||
|
### 1A. Capture-source / compose-rate cadence (where "200 not 240" actually lives)
|
||||||
|
The capture ceiling is the compositor's compose rate, and under load the compositor gets starved.
|
||||||
|
Levers, in order:
|
||||||
|
- **Force Composed Flip on Windows** for the game path (defeat MPO/flip-metering frame loss).
|
||||||
|
Machinery exists (`composed_flip.rs`); confirm it engages and measure the unique-frame delta.
|
||||||
|
- **Opt-in "double-refresh" virtual output**: create the per-session virtual output at ~2× the
|
||||||
|
client's rate to break the game-present-vs-compose beat (community-validated; cheap for us since
|
||||||
|
we already mint arbitrary-mode virtual outputs). Gate **off** by default and **never** on the
|
||||||
|
gamescope/SudoVDA game-attach path (no DWM beat there; it just adds compose work to the saturated
|
||||||
|
engine). `PUNKTFUNK_OUTPUT_HZ_MULTIPLIER`.
|
||||||
|
- **Reflex / render-queue=0 style headroom** (non-capping): documented as the substitute for an fps
|
||||||
|
cap — removes render-queue backpressure so the compositor/capture get scheduled. Investigate what
|
||||||
|
we can influence from the host side.
|
||||||
|
|
||||||
|
Risk: the double-refresh trick can be a net regression under saturation (doubles compose + our
|
||||||
|
capture work on the saturated engine) — measure (Tier 0) before shipping it on by default.
|
||||||
|
|
||||||
|
### 1B. Pin GPU power / clock state for the session (kills the per-frame downclock tax)
|
||||||
|
NVIDIA's adaptive P-state downclocks between our small bursty frames and pays a ramp every frame —
|
||||||
|
a hidden latency tax, *most visible in easy scenes* (the ~200-should-be-240 case). Sunshine ships
|
||||||
|
this as `nvenc_latency_over_power` and calls it decisive. **Neither host does it.**
|
||||||
|
- **Windows**: NvAPI **per-application DRS profile** `PREFERRED_PSTATE = PREFER_MAX` scoped to our
|
||||||
|
exe (not a global override). Load `nvapi64.dll` dynamically; treat `NvAPI_Initialize` failure as
|
||||||
|
"no NVIDIA, skip" (covers AMD/Intel + the WARP dev VM). **Crash-safe undo is mandatory**: write
|
||||||
|
an undo record to `%ProgramData%\punktfunk\` *before* applying and revert a stale profile on next
|
||||||
|
startup — a crash must not leave the user's control panel modified.
|
||||||
|
- **Linux**: prefer the **root-free** path — disable the CUDA "Force P2 State" downclock that
|
||||||
|
context creation triggers (env/per-context), and `nvidia-smi -pm 1` (persistence) where
|
||||||
|
permitted. `nvmlDeviceSetGpuLockedClocks` needs root/CAP_SYS_ADMIN (our host runs as a normal
|
||||||
|
user → silent no-op) and is brittle across SKUs; if used, query `nvmlDeviceGetMaxClockInfo`, lock
|
||||||
|
to *that*, and restore on teardown **and** via a SIGTERM/panic handler.
|
||||||
|
- Gate behind `PUNKTFUNK_PIN_CLOCKS`; **default OFF on battery / Steam Deck** (thermal/power caps
|
||||||
|
make pinning actively harmful there).
|
||||||
|
|
||||||
|
Impact: reliable, modest p99 / easy-scene win on both OSes. Does **not** fix the saturated-scene
|
||||||
|
collapse (at 100% util the clock is already maxed). Low cost.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Tier 2 — Linux work-deletion + scheduling hygiene
|
||||||
|
|
||||||
|
### 2A. Linux NV12 convert — **SHIPPED (`1fc6f73`)**
|
||||||
|
GL de-tile blit emits NV12 (BT.709 limited) on the GPU and feeds NVENC native YUV, deleting NVENC's
|
||||||
|
internal RGB→YUV CSC off the contended SM. Gated `PUNKTFUNK_NV12` (default OFF). Tiled EGL/GL path
|
||||||
|
only; LINEAR/Vulkan-bridge (gamescope) stays RGB. Validated colour-correct on RTX 5070 Ti. Open
|
||||||
|
follow-up: glass-to-glass latency + CS2 fps-under-saturation A/B before flipping the default, and
|
||||||
|
the **P010** variant for the HDR/10-bit path. Code is the source of truth (`zerocopy/egl.rs`,
|
||||||
|
`encode/linux.rs`).
|
||||||
|
|
||||||
|
### 2B. Linux scheduling hygiene — **SHIPPED (`112a054`)**
|
||||||
|
`boost_thread_priority` nices capture/encode/send on Linux (best-effort `setpriority`);
|
||||||
|
CUDA context uses `CU_CTX_SCHED_BLOCKING_SYNC`; copies run on a per-thread highest-priority CUDA
|
||||||
|
stream (`cuStreamCreateWithPriority`, NULL-stream fallback). The stream-priority hint is
|
||||||
|
**measure-then-keep** (NVIDIA Linux may ignore it). **Do not** default to SCHED_RR/FIFO (can starve
|
||||||
|
the compositor + the game's render thread); opt-in only behind `PUNKTFUNK_SCHED_RR=1`. Code is the
|
||||||
|
source of truth (`punktfunk1.rs`).
|
||||||
|
|
||||||
|
> Explicitly **not** doing on Linux: Vulkan `VK_EXT_global_priority` as "the" lever (it only touches
|
||||||
|
> the minority gamescope/LINEAR copy, not the convert; likely a silent no-op on consumer NVIDIA).
|
||||||
|
> Replacing `cuCtxSynchronize` with a per-stream event chain for *contention* reasons (it's
|
||||||
|
> per-context, never waited on the game's separate context — a non-fix; keep the full sync where it
|
||||||
|
> guards dmabuf recycle, `egl.rs:491`).
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Tier 3 — Windows parity polish (Windows is already strong)
|
||||||
|
|
||||||
|
### 3A. Host-process session tuning — **SHIPPED (`112a054`)**
|
||||||
|
`session_tuning.rs` (raw C-ABI FFI, no-op off Windows): each capture/encode/send thread applies
|
||||||
|
process-wide tuning once (1 ms timer, `DwmEnableMMCSS`, `HIGH_PRIORITY_CLASS`) + per-thread MMCSS
|
||||||
|
"Games" + keep-display-awake; reverts on stop. Wired into both native (`boost_thread_priority`) and
|
||||||
|
GameStream (`stream.rs`) paths. FFI validated on the real MSVC toolchain.
|
||||||
|
|
||||||
|
### 3B. Auto-gated REALTIME D3DKMT class (OPEN)
|
||||||
|
Instead of fixed HIGH (the realtime opt-in already exists at `dxgi.rs:199-207`): probe HAGS
|
||||||
|
(`D3DKMTQueryAdapterInfo` `HwSchEnabled`) **and** VRAM headroom (`IDXGIAdapter3::QueryVideoMemoryInfo`,
|
||||||
|
continuously), allow REALTIME(5) only when safe (HAGS off, or HAGS on + VRAM comfortably below
|
||||||
|
budget), downgrade to HIGH the moment VRAM pressure rises — Sunshine's actual gate avoids the
|
||||||
|
HAGS+near-full-VRAM NVENC freeze/crash. Marginal (one scheduling rung, same preemption ceiling), so
|
||||||
|
rank it as cheap parity, not a fix.
|
||||||
|
|
||||||
|
### 3C. `VideoProcessorBlt` directly from the DDA surface (OPEN — cheap experiment)
|
||||||
|
Skip the same-format `gpu_copy` at `dxgi.rs:2375`, then `ReleaseFrame`, *iff* it doesn't
|
||||||
|
re-serialize `AcquireNextFrame` (the existing decouple-copy was measured 40-200 fps vs ~60 fps, but
|
||||||
|
that note predates confirming the Blt is on the video engine). One-line source-texture change;
|
||||||
|
benchmark only. Do **not** build a D3D11↔D3D12 copy-queue offload — the convert is already off-3D,
|
||||||
|
the remaining copy is intra-VRAM (~5% 3D, no PCIe), not worth the interop rebuild.
|
||||||
|
|
||||||
|
### 3D. Async NVENC + off-thread retrieve (OPEN — measure-gated, uncertain)
|
||||||
|
Today retrieve (`lock_bitstream`) runs **inline on the submit thread** (`nvenc.rs:524-558`), which
|
||||||
|
is *why* `depth>1` was measured to regress (`wgc_helper.rs:111-114`). The NVENC guide mandates
|
||||||
|
submit/retrieve on separate threads with completion events + a deep surface pool; doing that *could*
|
||||||
|
let per-frame scheduling waits **overlap across frames** and recover *throughput* — at a per-frame
|
||||||
|
*latency* cost (depth × frame time). This is the one place the research and our own prior
|
||||||
|
measurement disagree, so it is **strictly measure-first**, and it forecloses slice output
|
||||||
|
(`reportSliceOffsets` needs `enableEncodeAsync=0`). Treat as a structural experiment, not a
|
||||||
|
committed win. (The follow-up doc notes the prior "async stacks latency" result was a *same-thread*
|
||||||
|
implementation, not a disproof of a correct two-thread pipeline.)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Tier 4 — Deferred 2nd-order latency (OPEN — not contention fixes; do after Tiers 0-2)
|
||||||
|
|
||||||
|
- **GL2 — Intra-refresh for RFI/recovery** (`enableIntraRefresh` + recovery-point SEI) instead of a
|
||||||
|
forced full-IDR: spreads a moving intra band across N frames, killing the 20-40× keyframe size
|
||||||
|
spike and the VBV-overshoot drops it causes. Preconditions (infinite GOP, P-only) already met.
|
||||||
|
Medium; needs all 4 clients to trust the recovery-point SEI and stop demanding IDRs. Real p99 win,
|
||||||
|
orthogonal to the collapse.
|
||||||
|
- **GL1 + GL6 — Sub-frame slice output + per-slice paced send** (the roadmap's "~2-4 ms lever"):
|
||||||
|
`enableSubFrameWrite` + `sliceMode` + transmit each slice as it completes. **Big**: needs the
|
||||||
|
direct NVENC SDK on Linux (libavcodec emits whole AUs) **and** a per-slice wire/FEC redesign in
|
||||||
|
`punktfunk-core` (today `PacketHeader`/`Packetizer`/reassembler are whole-AU; per-slice FEC blocks
|
||||||
|
wreck Leopard efficiency) **and** client slice-granular submit. Gate on
|
||||||
|
`NV_ENC_CAPS_SUPPORT_SUBFRAME_READBACK` (often absent on consumer GeForce). The paced-send half is
|
||||||
|
**already shipped** (`stream.rs spawn_sender`, `punktfunk1.rs paced_submit`) — don't re-implement.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Dropped / why (so we don't re-propose placebo)
|
||||||
|
|
||||||
|
| Candidate | Verdict | Why |
|
||||||
|
|---|---|---|
|
||||||
|
| Feed NVENC ARGB to "offload CSC to ASIC" | ✗ backwards | RGB input forces CSC onto the SM; YUV-native is correct (see §0A). |
|
||||||
|
| Replace `cuCtxSynchronize` with per-stream event chain *for contention* | ✗ | `cuCtxSynchronize` is per-context, never waited on the game's separate process; single null stream = no overlap to win. Keep the full sync where it guards dmabuf recycle. |
|
||||||
|
| Vulkan `VK_EXT_global_priority` as the Linux priority lever | ✗ | Touches only the minority gamescope/LINEAR `vkCmdCopyBuffer`, not the convert; consumer NVIDIA denies realtime / ignores it. Retarget to CUDA/EGL priority. |
|
||||||
|
| Async NVENC as a *throughput/collapse* fix | ✗ (→ measure-gated 3D) | Async is CPU-thread-only (NVIDIA guide); Apollo's own PR #3629 measured no gain; our `depth>1` regressed; Linux-impossible. Kept only as the structural pipelining experiment (§3D). |
|
||||||
|
| D3D12 copy-queue offload of the DDA copy | ✗ | Convert already off-3D; remaining copy is intra-VRAM ~5%, no PCIe — not worth a D3D11↔D3D12 interop rebuild. |
|
||||||
|
| Empty-frame (`LastPresentTime==0`) skip | ✗ for this | Static desktop already coalesced via WAIT_TIMEOUT; under a 400 fps game there are no empty frames to skip. |
|
||||||
|
| GL5 — set ULL RC knobs explicitly | ✗ (audit only) | ULL preset already sets `zeroReorderDelay=1`, lookahead/multipass/AQ off; ffmpeg defaults match + we set `bf=0`. Only `lowDelayKeyFrameScale=1` is non-redundant → fold into GL2 (Windows SDK path only). |
|
||||||
|
| GL3 — true ref-frame invalidation | ✗ for this | No lost-range protocol signal (both control planes collapse to a bool/unit); libavcodec exposes no `nvEncInvalidateRefFrames`; deeper DPB adds per-frame cost. Revisit only as loss-recovery robustness. |
|
||||||
|
| GL4 — move input injection off the ENet thread | ✗ for this | CPU-side, orthogonal to GPU contention; the blocking case is a once-per-UAC desktop switch. Demote to control-plane robustness. |
|
||||||
|
| SCHED_RR/FIFO by default (Linux) | ✗ default | Can preempt the compositor + the game's render thread → adds game frame-time the user refuses. Opt-in only. |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Open items / What's left
|
||||||
|
|
||||||
|
For current action prioritization see [`gpu-contention-investigation.md`](gpu-contention-investigation.md).
|
||||||
|
Still-open work tracked by this doc:
|
||||||
|
|
||||||
|
- **Tier 0** — run the `PUNKTFUNK_PERF=1` uniq-vs-fps + flip-mode diagnosis on the real-GPU boxes
|
||||||
|
(gate for everything below).
|
||||||
|
- **Tier 1A** — capture-source / compose-rate cadence levers (ForceComposedFlip verify;
|
||||||
|
`PUNKTFUNK_OUTPUT_HZ_MULTIPLIER` double-refresh; Reflex/render-queue=0 headroom).
|
||||||
|
- **Tier 1B** — GPU clock/power pinning (`PUNKTFUNK_PIN_CLOCKS`; NvAPI per-app DRS on Windows w/
|
||||||
|
crash-safe undo; root-free CUDA-P2/persistence on Linux; default OFF on battery/Deck).
|
||||||
|
- **Tier 2A follow-up** — glass-to-glass + CS2-floor A/B before defaulting `PUNKTFUNK_NV12`, and the
|
||||||
|
**P010** HDR/10-bit variant.
|
||||||
|
- **Tier 3B** — auto-gated REALTIME D3DKMT class (HAGS + VRAM-headroom gate).
|
||||||
|
- **Tier 3C** — `VideoProcessorBlt` directly from the DDA surface (benchmark-only experiment).
|
||||||
|
- **Tier 3D** — correct async NVENC two-thread submit/retrieve pipeline (strictly measure-first).
|
||||||
|
- **Tier 4** — GL2 intra-refresh for RFI/recovery; GL1/GL6 sub-frame slice output + per-slice paced
|
||||||
|
send (paced-send half already shipped).
|
||||||
|
|
||||||
|
Honest expectation: with the work-deletion + cadence + power-pin levers stacked, the easy-scene gap
|
||||||
|
closes and the saturated floor rises, but a residual ceiling remains — at 100% GPU the game
|
||||||
|
physically cannot also render the game *and* compose 240 unique frames, and WDDM/NVIDIA preemption
|
||||||
|
granularity caps how far priority can claw back. Report that ceiling honestly rather than chasing it
|
||||||
|
with encoder micro-optimisations.
|
||||||
@@ -6,6 +6,8 @@ description: "The full design: protocol core, milestones, and architecture."
|
|||||||
|
|
||||||
*A ground-up low-latency desktop streaming stack, built Linux-first, with a shared Rust protocol core and native clients per platform.*
|
*A ground-up low-latency desktop streaming stack, built Linux-first, with a shared Rust protocol core and native clients per platform.*
|
||||||
|
|
||||||
|
> **Status:** SHIPPED — M0–M5 complete, M6 largely shipped. This is the project's canonical design doc; it is trimmed to the load-bearing design (thesis, scope, architecture, protocol strategy, C ABI, virtual-display orchestration, latency budget, risk register) plus still-open items. For current shipped-feature status see CLAUDE.md "Where the work stands"; for build/test/run, repo layout, and next actions see CLAUDE.md. Git history holds the full original milestone acceptance criteria.
|
||||||
|
|
||||||
> The name `punktfunk` fits the lowercase house style (`unom`, `played`, `remplir`) and reads as "glass-to-glass light," which is the whole point.
|
> The name `punktfunk` fits the lowercase house style (`unom`, `played`, `remplir`) and reads as "glass-to-glass light," which is the whole point.
|
||||||
|
|
||||||
---
|
---
|
||||||
@@ -31,7 +33,7 @@ Two concrete gaps justify a new project rather than another fork:
|
|||||||
- Native clients: Rust (Linux), Swift (macOS/iOS), Kotlin (Android) — all linking the same core.
|
- Native clients: Rust (Linux), Swift (macOS/iOS), Kotlin (Android) — all linking the same core.
|
||||||
|
|
||||||
**Explicit non-goals (at least at first):**
|
**Explicit non-goals (at least at first):**
|
||||||
- Windows *host* support (Sunshine/Apollo already do this well; no gap to fill).
|
- Windows *host* support (Sunshine/Apollo already do this well; no gap to fill). *(Note: this non-goal was later reversed — a Windows host shipped; see CLAUDE.md.)*
|
||||||
- Internet/NAT-traversal relay infrastructure (LAN/VPN first; lean on an existing mesh VPN such as Headscale/NetBird/Tailscale).
|
- Internet/NAT-traversal relay infrastructure (LAN/VPN first; lean on an existing mesh VPN such as Headscale/NetBird/Tailscale).
|
||||||
- Reinventing encoders/decoders (bind to FFmpeg + vendor SDKs; never rewrite codecs).
|
- Reinventing encoders/decoders (bind to FFmpeg + vendor SDKs; never rewrite codecs).
|
||||||
- A bespoke compositor (drive existing ones; only consider a dedicated headless compositor as a *deployment mode*, see §6).
|
- A bespoke compositor (drive existing ones; only consider a dedicated headless compositor as a *deployment mode*, see §6).
|
||||||
@@ -213,23 +215,18 @@ client: recv → core[reorder+FEC recover+jitter] → decode → present
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## 8. Milestones
|
## 8. Milestones — status
|
||||||
|
|
||||||
Sizing is rough and relative (Spike / S / M / L) for a focused solo dev; treat as ordering, not deadlines.
|
M0–M5 complete; M6 (feature surface) largely shipped. The original per-milestone acceptance criteria (M0 pipeline spike → M1 core+C ABI → M2 P1 host to stock Moonlight → M3 measurement harness → M4 P2 GF(2¹⁶) wall-breaker → M5 Apple client → M6 mic/HDR/per-client identity) are in git history. Live status — what is validated, what is partial — lives in CLAUDE.md "Where the work stands." The bet held: M2 (virtual-display streaming to stock Moonlight on Linux) shipped first as a complete, gap-filling release; the wall-breaking transport, native clients, and mic-done-right were unlocked from that position, resting on a FEC core that makes the 1 Gbps ceiling a thing of the past rather than a thing to hack around.
|
||||||
|
|
||||||
**M0 — Pipeline spike (S).** wlroots headless output → PipeWire capture → VAAPI/NVENC encode → dump H.265 to a file that plays. *Acceptance:* a valid encoded file from a virtual output, no streaming yet. Proves the Linux capture+encode chain end-to-end.
|
### Open items (still in flight)
|
||||||
|
|
||||||
**M1 — `punktfunk-core` skeleton + C ABI (M).** Session lifecycle, GameStream-compatible packetization and GF(2⁸) FEC (P1), AES-GCM, `cbindgen` header, a tiny C test harness. *Acceptance:* core links from C; round-trips packets in a loopback test with simulated loss.
|
- **Sub-frame pipelining**: overlap encode and transmit within a frame. Requires a direct NVENC SDK wrapper (libavcodec only emits whole AUs) — the next big latency lever (~2–4 ms at high res).
|
||||||
|
- **Apple stage-2 presenter as the default** (`VTDecompressionSession` + `CAMetalLayer`, live-validated behind the opt-in `punktfunk.presenter` flag at ~11 ms p50) after a few resolution/HDR checks, plus **iOS/iPadOS/tvOS variants**.
|
||||||
**M2 — P1 host: stream to stock Moonlight (L).** Wire M0's pipeline into the core; implement `serverinfo`/pairing/RTSP enough for a real Moonlight client to connect, with a KWin virtual output created on connect and destroyed on disconnect. Input via `reis`/uinput. *Acceptance:* **you play a game on your KDE box streamed to a stock Moonlight client on a virtual display, no dummy plug, no kernel args.** This is the shippable milestone and the project's reason to exist.
|
- **Windows client on-glass validation**: D3D11VA zero-copy decode + HDR present + the WinUI GUI polish are written against the windows-rs/reactor APIs but not yet validated on a real display+GPU (the dev VM is headless/Session-0/WARP); needs the RTX box. Then RAWINPUT relative-mouse pointer-lock and a per-host speed test in the UI.
|
||||||
|
- **Android real-device validation**: gamepad rumble/HID feedback and HDR10 (Main10/BT.2020 PQ) live-verify; presenter/latency polish.
|
||||||
**M3 — Measurement harness (S).** Glass-to-glass latency measurement (on-screen QR/timestamp or photodiode), packet-loss injection, frame-pacing and stall metrics surfaced in the web UI. *Acceptance:* you can quantify a regression. Build this before optimizing anything.
|
- **gamescope multi-user isolation**: per-session input/audio so concurrent sessions are independent desktops (§8b-2 peer-push approval from a paired device's own app is the related open protocol-growth item).
|
||||||
|
- **GameStream AV1 + surround audio live confirmation**: both are implemented and unit/live-capture tested but still need a live Moonlight confirmation (select AV1 in a stock client; a real 5.1/7.1 listen including FEC under loss).
|
||||||
**M4 — P2 transport: break the wall (L).** Add `punktfunk/1` negotiation; swap to `reed-solomon-simd` GF(2¹⁶) with multi-block per-frame framing; optional QUIC control/audio. Write a minimal **Rust** reference client (decode via VAAPI, present via wgpu/Vulkan) to exercise it. *Acceptance:* a stable stream above 1.4 Gbps at 5120×1440@240 with loss recovery working; latency unchanged vs. M2.
|
|
||||||
|
|
||||||
**M5 — Apple client (L).** Swift + VideoToolbox + Metal + SwiftUI, linking `punktfunk-core` via the C header. *Acceptance:* a Mac plays a stream at native resolution/refresh.
|
|
||||||
|
|
||||||
**M6 — Feature surface (M, ongoing).** Mic passthrough as a proper encrypted, per-client reverse audio stream (the thing the upstream PR got wrong); HDR signalling; per-client identity/permissions; pause/resume. *Acceptance:* feature parity with Apollo on the items you care about, plus mic done right.
|
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
@@ -244,57 +241,3 @@ Sizing is rough and relative (Spike / S / M / L) for a focused solo dev; treat a
|
|||||||
| Frame pacing eats more time than expected | High | Med | M3 measurement harness first; treat pacing as a first-class subsystem, not a polish step |
|
| Frame pacing eats more time than expected | High | Med | M3 measurement harness first; treat pacing as a first-class subsystem, not a polish step |
|
||||||
| Scope creep into a full Moonlight replacement | High | High | P1 (stock-client compat) is the firewall: it forces you to ship value before writing a client |
|
| Scope creep into a full Moonlight replacement | High | High | P1 (stock-client compat) is the firewall: it forces you to ship value before writing a client |
|
||||||
| Solo bandwidth vs. other projects | High | Med | M2 is a complete, useful artifact on its own; the plan is safe to pause after any milestone |
|
| Solo bandwidth vs. other projects | High | Med | M2 is a complete, useful artifact on its own; the plan is safe to pause after any milestone |
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## 10. Testing & measurement
|
|
||||||
|
|
||||||
- **Loopback correctness:** core encodes→FEC→loss-inject→recover→decode in-process; property tests over loss patterns and shard counts (proptest).
|
|
||||||
- **Glass-to-glass latency:** rendered timestamp/QR on host, read back on client capture; or a photodiode for true photons. Track p50/p99.
|
|
||||||
- **Loss resilience:** `tc netem` to inject loss/jitter/reorder; verify FEC recovery and graceful degradation.
|
|
||||||
- **Pacing:** log present timestamps vs. client vsync; alert on stalls and duplicate/dropped frames.
|
|
||||||
- **Soak:** multi-hour streams; watch for buffer growth, fd leaks, encoder session exhaustion.
|
|
||||||
- **Hardware matrix:** an NVIDIA box (NVENC), an AMD/Intel box (VAAPI), a Mac (VideoToolbox decode). Catch driver quirks early.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## 11. Repo / workspace structure
|
|
||||||
|
|
||||||
```
|
|
||||||
punktfunk/
|
|
||||||
├── Cargo.toml # workspace
|
|
||||||
├── crates/
|
|
||||||
│ ├── punktfunk-core/ # protocol, FEC, pacing, crypto — C ABI (cdylib + staticlib)
|
|
||||||
│ │ ├── src/abi.rs # #[no_mangle] extern "C" surface
|
|
||||||
│ │ ├── src/fec.rs # GF(2^16) blocking over reed-solomon-simd
|
|
||||||
│ │ ├── src/transport/ # udp+fec video, quinn control/audio
|
|
||||||
│ │ ├── src/protocol/ # gamestream-compat (P1) + punktfunk/1 (P2)
|
|
||||||
│ │ └── cbindgen.toml
|
|
||||||
│ ├── punktfunk-host/ # Linux host binary
|
|
||||||
│ │ ├── src/capture/ # pipewire / portal
|
|
||||||
│ │ ├── src/encode/ # ffmpeg vaapi/nvenc
|
|
||||||
│ │ ├── src/vdisplay/ # trait + kwin/wlroots/mutter impls
|
|
||||||
│ │ ├── src/input/ # reis + uinput
|
|
||||||
│ │ └── src/web/ # axum config/pairing API
|
|
||||||
│ └── punktfunk-probe/ # reference Rust client (M4)
|
|
||||||
├── clients/
|
|
||||||
│ ├── apple/ # Swift package, imports punktfunk_core.h (M5)
|
|
||||||
│ └── android/ # Kotlin + JNI (later)
|
|
||||||
├── include/ # generated punktfunk_core.h
|
|
||||||
└── tools/
|
|
||||||
├── latency-probe/
|
|
||||||
└── loss-harness/
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## 12. Immediate next actions (first week)
|
|
||||||
|
|
||||||
1. **Stand up the workspace** with `punktfunk-core` (empty ABI + `cbindgen`) and `punktfunk-host` skeletons; wire up CI (Gitea Actions, BuildKit-based pipelines).
|
|
||||||
2. **M0 spike on wlroots:** headless output → PipeWire capture → NVENC/VAAPI encode → playable file. This validates the riskiest *pipeline* assumptions in days, on real GPU hardware.
|
|
||||||
3. **Read KRdp's source** for how KDE creates virtual outputs and casts them — it's the closest existing reference for the KWin path needed in M2.
|
|
||||||
4. **Decide P1 protocol depth:** confirm exactly which `serverinfo`/RTSP/pairing messages a current Moonlight client requires for a successful connect, so M2's compat surface is scoped precisely.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
*The shape of the bet: M2 alone — virtual-display streaming to stock Moonlight clients on Linux — is a complete, useful, gap-filling release. Everything after it (the wall-breaking transport, native clients, mic-done-right) is upside you unlock from a position of having already shipped, with the hard transport work resting on a FEC core that makes the 1 Gbps ceiling a thing of the past rather than a thing to hack around.*
|
|
||||||
@@ -1,5 +1,10 @@
|
|||||||
# Linux host setup — NVIDIA GPU VM (pipeline spike + GameStream host)
|
# Linux host setup — NVIDIA GPU VM (pipeline spike + GameStream host)
|
||||||
|
|
||||||
|
> **Status:** Setup guide — still current and in active use (referenced from
|
||||||
|
> `clients/apple/README.md`). The pipeline spike and the GameStream host are **shipped**
|
||||||
|
> (see `design/gamestream-host-plan.md` + `CLAUDE.md`). This doc is trimmed to the bring-up
|
||||||
|
> steps + gotchas; the §4 crate-API walkthrough was folded into `CLAUDE.md` "Pinned crate facts".
|
||||||
|
|
||||||
How to bring up the build environment for the punktfunk Linux host on an NVIDIA-GPU Ubuntu VM
|
How to bring up the build environment for the punktfunk Linux host on an NVIDIA-GPU Ubuntu VM
|
||||||
and run the **pipeline spike** (capture→encode). `punktfunk-core` already builds and is tested
|
and run the **pipeline spike** (capture→encode). `punktfunk-core` already builds and is tested
|
||||||
cross-platform; this is about the platform backends in `crates/punktfunk-host`.
|
cross-platform; this is about the platform backends in `crates/punktfunk-host`.
|
||||||
@@ -103,48 +108,26 @@ source /tmp/punktfunk-sway-env.sh
|
|||||||
swaymsg exec foot # animated content
|
swaymsg exec foot # animated content
|
||||||
# Live portal capture → NVENC HEVC → playable file, with each AU also round-tripped
|
# Live portal capture → NVENC HEVC → playable file, with each AU also round-tripped
|
||||||
# through a punktfunk_core host→client Session (FEC + packetize + reassemble) and verified:
|
# through a punktfunk_core host→client Session (FEC + packetize + reassemble) and verified:
|
||||||
cargo run -p punktfunk-host -- m0 --source portal --seconds 5 --out /tmp/punktfunk-m0.h265
|
cargo run -p punktfunk-host -- spike --source portal --seconds 5 --out /tmp/punktfunk-spike.h265
|
||||||
ffprobe /tmp/punktfunk-m0.h265
|
ffprobe /tmp/punktfunk-spike.h265
|
||||||
# No capture session needed (encode + core only): --source synthetic
|
# No capture session needed (encode + core only): --source synthetic
|
||||||
```
|
```
|
||||||
|
|
||||||
Verified result: `1920x1080` HEVC, ~300 frames in 5s, `punktfunk-core loopback … 0 mismatches`.
|
Verified result: `1920x1080` HEVC, ~300 frames in 5s, `punktfunk-core loopback … 0 mismatches`.
|
||||||
The portal negotiates packed **`RGB` (24-bit, 3 bpp)** on wlroots; the encoder expands it to
|
The portal negotiates packed **`RGB` (24-bit, 3 bpp)** on wlroots; the encoder expands it to
|
||||||
`rgb0` (one pad byte/pixel, no colour math) since NVENC accepts `rgb0`/`bgr0` but not
|
`rgb0` (one pad byte/pixel, no colour math) since NVENC accepts `rgb0`/`bgr0` but not
|
||||||
`rgb24`. dmabuf zero-copy import is still deferred (plan §9) — this is the CPU-copy path.
|
`rgb24`. **GPU zero-copy is now implemented on all paths** (tiled dmabuf → EGL/GL → CUDA;
|
||||||
|
LINEAR dmabuf → Vulkan bridge → CUDA → NVENC — see `CLAUDE.md`); the `capture` module keeps a
|
||||||
|
`cpu_bytes` fallback for inputs that can't be imported.
|
||||||
|
|
||||||
Crate choices, verified current:
|
Crate/API details (`ashpd` 0.13 screencast handshake, `pipewire` 0.9 frame pull, `ffmpeg-next`
|
||||||
- **Capture (portal path):** [`ashpd`](https://docs.rs/ashpd) **0.13** with the
|
8.x encoder selection, `reis`/uinput input) now live in `CLAUDE.md` "Pinned crate facts" — they
|
||||||
`screencast` feature (the `pipewire` feature is *not* needed — `open_pipe_wire_remote`
|
are the source of truth, with the FFmpeg-prefix override `export FFMPEG_DIR=/that/prefix` and
|
||||||
is unconditional). Flow (0.13 API, verified against the vendored source): `Screencast::new`
|
the bindgen `LIBCLANG_PATH` knob in the troubleshooting table below.
|
||||||
→ `create_session(Default)` → `select_sources(&session, SelectSourcesOptions::default()
|
|
||||||
.set_sources(BitFlags::from_flag(SourceType::Monitor))…)` → `start(&session, None,
|
|
||||||
Default)` → `.response()?` → `Stream::pipe_wire_node_id()` + `open_pipe_wire_remote()`.
|
|
||||||
Note 0.13 takes **options structs**, not the old positional args, and defaults to the
|
|
||||||
**tokio** runtime — drive the handshake on a *multi-thread* tokio runtime (a
|
|
||||||
current-thread one starves zbus's reader and the portal reports "Invalid session").
|
|
||||||
Pull frames with [`pipewire`](https://docs.rs/pipewire) **0.9** — it must match the
|
|
||||||
pipewire crate ashpd 0.13 links (the `pipewire-sys` `links` key is unique per build, so
|
|
||||||
`0.10` fails to resolve). 0.9 uses `MainLoopRc`/`ContextRc::connect_fd_rc(OwnedFd)`/
|
|
||||||
`StreamBox`. Only request `SourceType::Monitor` — the wlr backend's
|
|
||||||
`AvailableSourceTypes` is `1` (Monitor only); asking for `Window`/`Virtual` invalidates
|
|
||||||
the session. Set `XDG_CURRENT_DESKTOP=sway` so the wlr portal backend is chosen, and
|
|
||||||
import it into the portal's environment (see "Portal bring-up" below).
|
|
||||||
- **Encode:** [`ffmpeg-next`](https://crates.io/crates/ffmpeg-next) **8.x** (binds the
|
|
||||||
system FFmpeg 8.x via pkg-config; needs `clang`/`libclang`). Select the encoder by
|
|
||||||
name — `encoder::find_by_name("hevc_nvenc")`, *not* by codec id (that's the SW encoder).
|
|
||||||
Low-latency opts: `preset=p1`, `tune=ull`, `rc=cbr`, `bf=0`, `delay=0`, large `g`.
|
|
||||||
If your FFmpeg is in a non-standard prefix, `export FFMPEG_DIR=/that/prefix`.
|
|
||||||
- **Zero-copy is the hard part.** There's no direct dmabuf→CUDA import in FFmpeg.
|
|
||||||
**Start with the CPU-copy fallback** (download frame → `hwupload_cuda` → `hevc_nvenc`)
|
|
||||||
to get an end-to-end stream, then chase true dmabuf zero-copy. The plan flags this
|
|
||||||
(§9) and the `capture` module already has a `cpu_bytes` fallback field.
|
|
||||||
- **Input (GameStream host):** [`reis`](https://crates.io/crates/reis) (pure-Rust libei — no native
|
|
||||||
`libei` needed) with `input-linux`/uinput as the universal fallback.
|
|
||||||
|
|
||||||
Then continue toward the **GameStream host**: `serverinfo`/RTSP/pairing enough for a stock Moonlight client
|
The **GameStream host** built on this spike is shipped — `serverinfo`/RTSP/pairing for a stock
|
||||||
to connect, a KWin virtual output created on connect, input via reis/uinput — the
|
Moonlight client, a per-compositor virtual output created on connect, input via reis/uinput.
|
||||||
shippable milestone.
|
See `design/gamestream-host-plan.md` + `CLAUDE.md`.
|
||||||
|
|
||||||
## Troubleshooting
|
## Troubleshooting
|
||||||
|
|
||||||
@@ -157,3 +140,8 @@ shippable milestone.
|
|||||||
| `Cannot load libnvidia-encode.so.1` | NVENC runtime lib missing (driver) or unlicensed vGPU |
|
| `Cannot load libnvidia-encode.so.1` | NVENC runtime lib missing (driver) or unlicensed vGPU |
|
||||||
| `cargo build` can't find FFmpeg | `export FFMPEG_DIR=$(pkg-config --variable=prefix libavcodec)` or point `PKG_CONFIG_PATH` at the custom build |
|
| `cargo build` can't find FFmpeg | `export FFMPEG_DIR=$(pkg-config --variable=prefix libavcodec)` or point `PKG_CONFIG_PATH` at the custom build |
|
||||||
| bindgen: libclang not found | `export LIBCLANG_PATH=$(llvm-config --libdir)` |
|
| bindgen: libclang not found | `export LIBCLANG_PATH=$(llvm-config --libdir)` |
|
||||||
|
|
||||||
|
## Open items
|
||||||
|
|
||||||
|
None — the pipeline spike and the GameStream host it seeds are both shipped (see
|
||||||
|
`design/gamestream-host-plan.md` + `CLAUDE.md`). This file remains as the host-box bring-up guide.
|
||||||
@@ -1,5 +1,7 @@
|
|||||||
# punktfunk — security audit (2026-06-21)
|
# punktfunk — security audit (2026-06-21)
|
||||||
|
|
||||||
|
> **Status:** AUDIT COMPLETE (2026-06-21). 11 of 12 confirmed findings are FIXED (`3526517` · `3c55ec3`) or accepted-risk DOCUMENTED-INHERENT (#5/#9); **one remains OPEN: #12** (global `NODE_TLS_REJECT_UNAUTHORIZED`, DEFERRED). This doc is trimmed to the audit trail (executive summary · threat model · per-finding status · cross-cutting themes · positives · refuted findings) plus the accepted-risk rationale and the open item. Per-finding remediation prose for the FIXED findings is collapsed to one line + commit — the shipped code is the source of truth and git history holds the full original.
|
||||||
|
|
||||||
Whole-project audit by a 10-surface multi-agent review; every finding adversarially verified (reachability, attacker-control, existing mitigation). **10 surfaces · 20 raw findings → 18 confirmed/partial, 2 refuted.** Threat model: a malicious network client (pre- and post-pairing) is the primary adversary; also an on-path MITM and a local unprivileged user (the host is privileged).
|
Whole-project audit by a 10-surface multi-agent review; every finding adversarially verified (reachability, attacker-control, existing mitigation). **10 surfaces · 20 raw findings → 18 confirmed/partial, 2 refuted.** Threat model: a malicious network client (pre- and post-pairing) is the primary adversary; also an on-path MITM and a local unprivileged user (the host is privileged).
|
||||||
|
|
||||||
## Remediation status (2026-06-21)
|
## Remediation status (2026-06-21)
|
||||||
@@ -11,97 +13,65 @@ All 12 confirmed findings have been addressed — fixed, or documented where a f
|
|||||||
| #1 | high | **FIXED** (3526517) — secret files 0600 + dir 0700 / Windows icacls DACL |
|
| #1 | high | **FIXED** (3526517) — secret files 0600 + dir 0700 / Windows icacls DACL |
|
||||||
| #2 | high | **FIXED** (3526517) — single-use SPAKE2 PIN (consumed at the host key-confirmation) |
|
| #2 | high | **FIXED** (3526517) — single-use SPAKE2 PIN (consumed at the host key-confirmation) |
|
||||||
| #3 | med | **FIXED** (3526517) — RTSP packetSize bounded + saturating packetizer math |
|
| #3 | med | **FIXED** (3526517) — RTSP packetSize bounded + saturating packetizer math |
|
||||||
| #4 | low | **FIXED** — mgmt mTLS-cert auth restricted to a read-only allowlist; admin/state-changing routes require the bearer token |
|
| #4 | low | **FIXED** (3c55ec3) — mgmt mTLS-cert auth restricted to a read-only allowlist; admin/state-changing routes require the bearer token |
|
||||||
| #5 | low | **DOCUMENTED (won't-fix on legacy)** — legacy GameStream GCM nonce reuse is inherent to Nvidia's old-style control encryption (Apollo/Moonlight identical); the GCM key is client-known. Real fix = V2 control-encryption negotiation; use punktfunk/1 for untrusted nets. Code comment at `control.rs` rumble loop. |
|
| #5 | low | **DOCUMENTED (won't-fix on legacy)** — legacy GameStream GCM nonce reuse is inherent to Nvidia's old-style control encryption (Apollo/Moonlight identical); the GCM key is client-known. Real fix = V2 control-encryption negotiation; use punktfunk/1 for untrusted nets. Code comment at `control.rs` rumble loop. |
|
||||||
| #6 | low | **FIXED** — RTSP Content-Length/header caps + per-read timeout + concurrent-connection cap |
|
| #6 | low | **FIXED** (3c55ec3) — RTSP Content-Length/header caps + per-read timeout + concurrent-connection cap |
|
||||||
| #7 | low | **FIXED (GameStream) / DOCUMENTED (native)** — new `VirtualDisplay::set_launch_command` carries the launch command per-session (GameStream); native path keeps the env (safe under today's single-session model; plumb per-session with concurrent sessions) |
|
| #7 | low | **FIXED (3c55ec3, GameStream) / DOCUMENTED (native)** — new `VirtualDisplay::set_launch_command` carries the launch command per-session (GameStream); native path keeps the env (safe under today's single-session model; plumb per-session with concurrent sessions) |
|
||||||
| #8 | info | **FIXED** — constant-time GameStream phase-4 hash compare (`crypto::ct_eq`) |
|
| #8 | info | **FIXED** (3c55ec3) — constant-time GameStream phase-4 hash compare (`crypto::ct_eq`) |
|
||||||
| #9 | info | **DOCUMENTED** — GameStream pairing over plain HTTP is inherent to GFE compat; steer untrusted networks to the SPAKE2 native plane |
|
| #9 | info | **DOCUMENTED** — GameStream pairing over plain HTTP is inherent to GFE compat; steer untrusted networks to the SPAKE2 native plane |
|
||||||
| #10 | info | **FIXED** — fixed ALPN (`pkf1`) on both QUIC endpoints (coordinated client+host upgrade) |
|
| #10 | info | **FIXED** (3c55ec3) — fixed ALPN (`pkf1`) on both QUIC endpoints (coordinated client+host upgrade) |
|
||||||
| #11 | info | **FIXED** — FEC reconstruction failure is now a counted drop, not stream-fatal |
|
| #11 | info | **FIXED** (3c55ec3) — FEC reconstruction failure is now a counted drop, not stream-fatal |
|
||||||
| #12 | low | **DEFERRED (fix ready, reverted)** — the scoped-dispatcher fix (undici `Agent` on `proxyRequest`'s `fetch` option) is designed and the mechanism verified sound (h3 honors the fetch option), but it needs `undici` added as a web dependency (`bun add undici` + lockfile regen), which requires the web build env — not available here. Reverted to keep the web build/proxy working. Latent-only: the loopback mgmt fetch is the web console's ONLY outbound TLS, so the global env weakens nothing today. Apply with: `cd web && bun add undici`, then scope `rejectUnauthorized:false` to the mgmt fetch and drop the global env. |
|
| #12 | low | **DEFERRED (fix ready, reverted)** — the scoped-dispatcher fix (undici `Agent` on `proxyRequest`'s `fetch` option) is designed and the mechanism verified sound (h3 honors the fetch option), but it needs `undici` added as a web dependency (`bun add undici` + lockfile regen), which requires the web build env — not available here. Reverted to keep the web build/proxy working. Latent-only: the loopback mgmt fetch is the web console's ONLY outbound TLS, so the global env weakens nothing today. Apply with: `cd web && bun add undici`, then scope `rejectUnauthorized:false` to the mgmt fetch and drop the global env. |
|
||||||
|
|
||||||
|
## Open items
|
||||||
|
|
||||||
|
Exactly **one** finding is still open — everything else is FIXED or accepted-risk DOCUMENTED (see the table above and #5/#9 below).
|
||||||
|
|
||||||
|
- **#12 [LOW] Web console sets `NODE_TLS_REJECT_UNAUTHORIZED=0` process-globally — DEFERRED.** Latent-only today: the only server-side outbound TLS hop is the loopback proxy to `https://127.0.0.1:47990`, which cannot be MITM'd, so impact is nil now. The fix is designed and verified sound (scope `rejectUnauthorized:false` to a per-request `https.Agent`/undici `Agent` pinned to the host cert on `proxyRequest`'s `fetch`, and drop the global env) but was reverted because it needs `bun add undici` (+ lockfile regen) in the web build env, not available here. **Guard: this MUST be fixed before the web app gains ANY off-loopback server-side outbound TLS** — an update check, webhook, metadata fetch, or pointing `PUNKTFUNK_MGMT_URL` off-loopback would make it silently exploitable. Full detail in the findings section below.
|
||||||
|
|
||||||
## Executive summary
|
## Executive summary
|
||||||
|
|
||||||
Overall the punktfunk host is a security-conscious codebase with a strong cryptographic and wire-parsing core: the FEC/reassembler path bounds every attacker-controlled length field before allocation, AES-GCM is used correctly with per-direction nonce separation and seq-as-AAD on the native plane, and the native trust model (SPAKE2 PIN binding both cert fingerprints, fingerprint pinning that still verifies the real TLS handshake signature) is genuinely sound. The most serious real defects are (1) local secret-disclosure of the host's master private key (key.pem) — written with no restrictive mode/ACL while the far-less-sensitive mgmt token is carefully 0600 — which on Windows (%ProgramData% default Users-read ACL, LocalSystem service) is a near-certain cross-privilege host-impersonation primitive, and (2) the native SPAKE2 PIN ceremony permitting unlimited online guesses against a static, non-rotating 4-digit PIN (no disarm-on-failure, no lockout), which contradicts the documented "one online guess" guarantee and lets a pre-auth LAN attacker brute-force pairing of a fully-trusted rogue client in a few hours against the default standalone/CLI flow. Dominant themes: file-permission hygiene on secrets is inconsistent (the secure pattern exists but is applied selectively), pairing throttling relies on a single global rate-limit rather than attempt-bounding, and authorization is overbroad (any streaming-paired cert is also a full mgmt admin). The remaining findings are a contained pre-auth RTSP video-thread DoS (unbounded packetSize and Content-Length), a legacy GameStream control-stream GCM nonce-reuse that is muted by modern V2 negotiation and being key-gated, and several defense-in-depth nits (non-constant-time GameStream hash compare, no QUIC ALPN, cross-session env-var launch confusion, global NODE_TLS_REJECT_UNAUTHORIZED). No memory-unsafety or RCE was found on attacker wire bytes; panics are safe Rust and isolated by panic=unwind. Net: a solid foundation whose highest-leverage fixes are tightening secret file permissions and making the PIN single-use/lockout-bounded.
|
Overall the punktfunk host is a security-conscious codebase with a strong cryptographic and wire-parsing core: the FEC/reassembler path bounds every attacker-controlled length field before allocation, AES-GCM is used correctly with per-direction nonce separation and seq-as-AAD on the native plane, and the native trust model (SPAKE2 PIN binding both cert fingerprints, fingerprint pinning that still verifies the real TLS handshake signature) is genuinely sound. The most serious real defects are (1) local secret-disclosure of the host's master private key (key.pem) — written with no restrictive mode/ACL while the far-less-sensitive mgmt token is carefully 0600 — which on Windows (%ProgramData% default Users-read ACL, LocalSystem service) is a near-certain cross-privilege host-impersonation primitive, and (2) the native SPAKE2 PIN ceremony permitting unlimited online guesses against a static, non-rotating 4-digit PIN (no disarm-on-failure, no lockout), which contradicts the documented "one online guess" guarantee and lets a pre-auth LAN attacker brute-force pairing of a fully-trusted rogue client in a few hours against the default standalone/CLI flow. Dominant themes: file-permission hygiene on secrets is inconsistent (the secure pattern exists but is applied selectively), pairing throttling relies on a single global rate-limit rather than attempt-bounding, and authorization is overbroad (any streaming-paired cert is also a full mgmt admin). The remaining findings are a contained pre-auth RTSP video-thread DoS (unbounded packetSize and Content-Length), a legacy GameStream control-stream GCM nonce-reuse that is muted by modern V2 negotiation and being key-gated, and several defense-in-depth nits (non-constant-time GameStream hash compare, no QUIC ALPN, cross-session env-var launch confusion, global NODE_TLS_REJECT_UNAUTHORIZED). No memory-unsafety or RCE was found on attacker wire bytes; panics are safe Rust and isolated by panic=unwind. Net: a solid foundation whose highest-leverage fixes are tightening secret file permissions and making the PIN single-use/lockout-bounded.
|
||||||
|
|
||||||
|
> The executive summary describes the **pre-fix** state captured by the audit; it is retained verbatim as the historical record. See the status table for what shipped.
|
||||||
|
|
||||||
## Findings (ranked by severity × exploitability)
|
## Findings (ranked by severity × exploitability)
|
||||||
|
|
||||||
|
FIXED findings are collapsed to one line + commit. The two accepted-risk findings (#5, #9) and the one open finding (#12) keep their full rationale.
|
||||||
|
|
||||||
### 🟠 #1 [HIGH] Host master private key (key.pem) written with no restrictive file mode / ACL — local secret disclosure enabling full host impersonation
|
### 🟠 #1 [HIGH] Host master private key (key.pem) written with no restrictive file mode / ACL — local secret disclosure enabling full host impersonation
|
||||||
|
**Surface:** `secrets-availability` · **FIXED (3526517).** key.pem is the single trust root for ALL surfaces (GameStream TLS cert + pairing signing key, the punktfunk/1 QUIC identity every client pins, the mgmt HTTPS cert); `ServerIdentity::load_or_create` wrote it with a bare `fs::write`/`create_dir_all` while the less-sensitive mgmt token used `OpenOptions::mode(0o600)`. On Windows (%ProgramData% Users-read ACL + LocalSystem service) this was a near-certain cross-privilege host-impersonation primitive; on Linux it landed at umask (verified world-readable). Fix: 0600 file + 0700 dir on Unix mirroring `mgmt_token.rs`, SYSTEM+Administrators-only DACL on the Windows subtree, extended to `client-key.pem`/trust stores, + a 0600 regression test. Refs `gamestream/cert.rs`, `gamestream/mod.rs`, `mgmt_token.rs`, `service.rs`, `native_pairing.rs`.
|
||||||
**Surface:** `secrets-availability`
|
|
||||||
**Refs:** `crates/punktfunk-host/src/gamestream/cert.rs:36-44`, `crates/punktfunk-host/src/gamestream/mod.rs:216-232`, `crates/punktfunk-host/src/mgmt_token.rs:58-70`, `crates/punktfunk-host/src/service.rs:605-627`, `crates/punktfunk-host/src/native_pairing.rs:116-126`
|
|
||||||
|
|
||||||
**Why it ranks here / impact:** Ranked #1 because it is the highest verdict-adjusted severity (high, three corroborating findings merged) and the most reliably exploitable post-foothold: key.pem is the single trust root for ALL surfaces — GameStream TLS server cert, GameStream pairing signing key, the punktfunk/1 QUIC identity every client pins, and the mgmt HTTPS cert — so its disclosure yields full host impersonation/MITM that defeats client fingerprint pinning, plus the mgmt bearer token is likewise unprotected on Windows. ServerIdentity::load_or_create writes it with a bare fs::write (no mode) and create_dir_all (no DACL). On Windows the leak is near-certain and umask-independent: config_dir() is %ProgramData%\punktfunk, whose default ACL grants BUILTIN\Users read, and the host runs as LocalSystem — any local unprivileged user reads the SYSTEM service's key; the mgmt-token 0o600 hardening is #[cfg(unix)] so it is a no-op there. On Linux the file lands at umask (commonly 0664/0644, verified live as world-readable) and is reachable cross-user whenever the home/config chain is traversable. The project demonstrably knows the secure pattern (mgmt_token.rs uses OpenOptions::mode(0o600)+set_permissions) but applies it to the less-sensitive token and not the master key. Local-only (adversary #3), not pre-auth/network, which caps it below critical.
|
|
||||||
|
|
||||||
**Fix:** Write key.pem (and cert.pem) via OpenOptions::mode(0o600) + a follow-up set_permissions(0o600) on Unix, mirroring mgmt_token.rs; create config_dir() with DirBuilder::mode(0o700). On Windows set an explicit DACL granting only SYSTEM+Administrators on the punktfunk %ProgramData% subtree and per-file on key.pem / mgmt-token / *paired.json (or relocate the key under a SYSTEM-only path), since the default ProgramData ACL is Users-readable. Extend the same hardening to client-key.pem and the persisted trust stores. Add a regression test asserting 0600 on key.pem on Unix.
|
|
||||||
|
|
||||||
### 🟠 #2 [HIGH] Native SPAKE2 PIN ceremony allows unlimited online guesses against a static 4-digit PIN — pre-auth brute-force to a fully-trusted rogue client
|
### 🟠 #2 [HIGH] Native SPAKE2 PIN ceremony allows unlimited online guesses against a static 4-digit PIN — pre-auth brute-force to a fully-trusted rogue client
|
||||||
|
**Surface:** `pairing-pin` · **FIXED (3526517).** `pair_ceremony` logged+errored on a wrong PIN but never called `np.disarm()`/rotated; `current_pin()` returned the same value forever, throttled only by one process-wide 2s `PAIRING_COOLDOWN` over a 10,000-PIN space (~2.8h avg → permanently-pinned rogue cert with input/capture/launch). The standalone `punktfunk1-host` default (`--require-pairing` arms `expires_at:None`) made the indefinite static-PIN window the DEFAULT, not opt-in — contradicting the documented "one online guess". Fix: a failed confirmation now consumes/disarms the PIN (the actual "one online guess"); disarm after a successful pair too. Refs `punktfunk1.rs`, `native_pairing.rs`, `mgmt.rs`.
|
||||||
|
|
||||||
**Surface:** `pairing-pin`
|
### 🟡 #3 [MEDIUM] Pre-auth RTSP ANNOUNCE packetSize underflows/panics the GameStream video pipeline
|
||||||
**Refs:** `crates/punktfunk-host/src/punktfunk1.rs:388-446`, `crates/punktfunk-host/src/punktfunk1.rs:475-491`, `crates/punktfunk-host/src/punktfunk1.rs:82`, `crates/punktfunk-host/src/native_pairing.rs:189-234`, `crates/punktfunk-host/src/native_pairing.rs:128-131`, `crates/punktfunk-host/src/mgmt.rs:841-842`
|
**Surface:** `gamestream-parsing` · **FIXED (3526517).** The RTSP listener on TCP 48010 does no TLS/pairing/auth; `x-nv-video[0].packetSize` flowed unbounded into `VideoPacketizer::new` where `payload_per_shard = packet_size - 16` → packetSize==16 div-by-zero, <16 underflow OOB slice, ==17 per-frame datagram flood; the safe-Rust panic unwound before `running.store(false)`, wedging the session until restart (not RCE — isolated by panic=unwind). Fix: clamp packetSize (floor ~64 / cap ~2048) + checked/saturating packetizer math + `store(false)` on the unwind path + a `{0,15,16,17}` regression test. Refs `gamestream/rtsp.rs`, `gamestream/video.rs`, `gamestream/stream.rs`.
|
||||||
|
|
||||||
**Why it ranks here / impact:** Ranked #2: high severity AND pre-auth + fully attacker-controlled, the strongest exploitability combination among the high-rated issues — gated only on pairing being armed and an hours-long active window. Merges the three pairing-pin brute-force findings (they share one root cause: no disarm/rotate-on-failure and no attempt budget). pair_ceremony logs a warning and returns Err on a wrong PIN but never calls np.disarm() or rotates the PIN; current_pin() returns the same value forever (cleared only by TTL or operator); the only throttle is one process-wide 2s PAIRING_COOLDOWN. The PIN space is 10,000. Critically the standalone punktfunk1-host default (--require-pairing forces allow_pairing) arms with expires_at:None at startup, so the indefinite static-PIN window is the DEFAULT for that binary, not an opt-in. At ~1 guess/2s the space exhausts in ~5.5h worst / ~2.8h avg, and on success the attacker's cert is permanently pinned, granting input injection, screen capture and app launch. This directly contradicts the documented 'one online guess, no offline dictionary' claim — the offline-dictionary resistance from SPAKE2 holds, but the online single-guess limit is simply not implemented. Mitigations partial: the web/mgmt arm path is TTL-bounded (15..600s), confining the worst case to the CLI/standalone mode.
|
### 🔵 #4 [LOW] Any paired punktfunk/1 streaming client gets full management-API authority via the mTLS-paired-cert auth path
|
||||||
|
**Surface:** `authz-trust` · **FIXED (3c55ec3).** `require_auth` granted any verified peer cert in the native paired store full unscoped `/api/v1` access — the SAME set that admits a device to stream — so a watch-only device could `DELETE /clients/{fp}`, arm pairing + read the PIN, approve knocks, `DELETE /session`, CRUD the library. Fix: mTLS-cert auth restricted to a read-only allowlist; state-changing/admin/pairing-administration routes require the bearer token. Refs `mgmt.rs:459-488`.
|
||||||
**Fix:** Make a failed confirmation consume the PIN: on ok==false in pair_ceremony, call np.disarm() (or rotate to a fresh random PIN) so a single wrong guess closes the window — this is what actually delivers the documented 'one online guess'. Add a per-window failed-attempt budget (auto-disarm after N>=1 failures), give the CLI no-expiry arm path a default expiry, and disarm after a SUCCESSFUL pair too. Keep the 2s cooldown as defence-in-depth and raise the web-armed PIN to 6 digits.
|
|
||||||
|
|
||||||
### 🟡 #3 [MEDIUM] Pre-auth RTSP ANNOUNCE packetSize underflows/panics the GameStream video pipeline (div-by-zero / OOB slice / allocation amplification)
|
|
||||||
|
|
||||||
**Surface:** `gamestream-parsing`
|
|
||||||
**Refs:** `crates/punktfunk-host/src/gamestream/rtsp.rs:275`, `crates/punktfunk-host/src/gamestream/video.rs:55-89`, `crates/punktfunk-host/src/gamestream/stream.rs:322`
|
|
||||||
|
|
||||||
**Why it ranks here / impact:** Ranked #3: medium and fully pre-auth + attacker-controlled — the highest-exploitability of the medium-and-below tier. The RTSP listener on TCP 48010 performs no TLS/pairing/auth; an unauthenticated peer drives OPTIONS→ANNOUNCE→PLAY (+ a UDP ping to the video port) and the video thread starts on state.stream alone, no paired session required. x-nv-video[0].packetSize is read with no bound and flows into VideoPacketizer::new where payload_per_shard = packet_size - 16: packetSize==16 → pps==0 → div-by-zero panic; packetSize<16 → underflow → OOB slice panic; packetSize==17 → one byte/shard → per-frame datagram flood. Reliable remote pre-auth DoS of a privileged media service, made stickier because the panic unwinds before running.store(false) leaving the session wedged until restart. Calibrated medium (not higher) because it is a SAFE Rust panic (checked slice access, no memory corruption/UB) isolated to the punktfunk-video thread by panic=unwind — the host process and other listeners survive; not RCE.
|
|
||||||
|
|
||||||
**Fix:** Validate packet_size in stream_config() before building StreamConfig: reject packetSize below a sane floor (e.g. < 64) and clamp to a sane max (e.g. <= 2048). Additionally harden VideoPacketizer::new to use checked/saturating arithmetic and refuse construction (or fall back to a default) when packet_size < SHARD_HEADER-16 so the per-frame path never sees pps==0 or a wrapped payload_per_shard. Also store(false) on the unwind path so a panic doesn't wedge the session. Add a regression test over packetSize in {0,15,16,17}.
|
|
||||||
|
|
||||||
### 🔵 #4 [LOW] Any paired punktfunk/1 streaming client gets full management-API authority via the mTLS-paired-cert auth path (no streaming-vs-admin separation)
|
|
||||||
|
|
||||||
**Surface:** `authz-trust`
|
|
||||||
**Refs:** `crates/punktfunk-host/src/mgmt.rs:459-488`, `crates/punktfunk-host/src/mgmt.rs:466-470`
|
|
||||||
|
|
||||||
**Why it ranks here / impact:** Ranked #4: low but a genuine post-auth privilege over-broadening with concrete admin impact. require_auth grants any verified peer cert whose fingerprint is in the native paired store full unscoped access to every /api/v1 route — the SAME paired set that admits a device to stream. So a device paired purely to watch the screen can DELETE /clients/{fp} (unpair others), POST /native/pair/arm (open a pairing window and read the PIN), approve arbitrary knocking devices, DELETE /session, and CRUD the library; there is no role/scope check anywhere in the router. The native client presents its identity via TLS client auth on both ports, so the credential is genuinely usable against mgmt. Bounded to low because it requires being an already-paired (operator-trusted) device and the mgmt port binds loopback by default — remote reach needs an explicit routable --mgmt-bind (and the mTLS path then bypasses the token requirement).
|
|
||||||
|
|
||||||
**Fix:** Separate streaming trust from management trust: keep a distinct admin allow-list (or an admin flag on a paired entry) for the mTLS mgmt path, or restrict mTLS-cert auth to read-only endpoints and require the bearer token for state-changing/admin routes. At minimum gate the pairing-administration endpoints (arm/approve/unpair) and session/library mutation behind the bearer token only.
|
|
||||||
|
|
||||||
### 🔵 #5 [LOW] GameStream legacy control-stream AES-GCM nonce reuse across directions (host rumble vs client input share key+nonce)
|
### 🔵 #5 [LOW] GameStream legacy control-stream AES-GCM nonce reuse across directions (host rumble vs client input share key+nonce)
|
||||||
|
|
||||||
**Surface:** `crypto`
|
**Surface:** `crypto` · **DOCUMENTED (won't-fix on legacy).**
|
||||||
**Refs:** `crates/punktfunk-host/src/gamestream/control.rs:373-400`, `crates/punktfunk-host/src/gamestream/control.rs:257-266`, `crates/punktfunk-host/src/gamestream/control.rs:67,106-114`
|
**Refs:** `crates/punktfunk-host/src/gamestream/control.rs:373-400`, `crates/punktfunk-host/src/gamestream/control.rs:257-266`, `crates/punktfunk-host/src/gamestream/control.rs:67,106-114`
|
||||||
|
|
||||||
**Why it ranks here / impact:** Ranked #5: a real, correctly-identified catastrophic-class crypto defect (AES-GCM (key,nonce) reuse) but adjusted to low because reachability and impact are heavily muted. The legacy NonceKind branches apply no direction separation (other => other), so host rumble (rumble_seq from 0) and client control (seq from 0) under the shared rikey produce identical (key,nonce). BUT: (1) it only triggers on the legacy auto-detected scheme — modern moonlight-common-c negotiates the V2 scheme which flips marker[0] to 'H' and is direction-separated, so the default path is safe; the doc claim 'the legacy path — which we hit' is stale; (2) the rikey is delivered only over the mTLS /launch, so a pure MITM cannot derive the key — only a paired client can; (3) a paired client can already legitimately send any client→host control message (in-scope-by-design), so forgery is largely redundant and the only genuinely new gain is recovering low-value rumble keystream / forging rumble to its own client. Post-auth, conditional path.
|
**Why it ranks here / impact:** Ranked #5: a real, correctly-identified catastrophic-class crypto defect (AES-GCM (key,nonce) reuse) but adjusted to low because reachability and impact are heavily muted. The legacy NonceKind branches apply no direction separation (other => other), so host rumble (rumble_seq from 0) and client control (seq from 0) under the shared rikey produce identical (key,nonce). BUT: (1) it only triggers on the legacy auto-detected scheme — modern moonlight-common-c negotiates the V2 scheme which flips marker[0] to 'H' and is direction-separated, so the default path is safe; the doc claim 'the legacy path — which we hit' is stale; (2) the rikey is delivered only over the mTLS /launch, so a pure MITM cannot derive the key — only a paired client can; (3) a paired client can already legitimately send any client→host control message (in-scope-by-design), so forgery is largely redundant and the only genuinely new gain is recovering low-value rumble keystream / forging rumble to its own client. Post-auth, conditional path.
|
||||||
|
|
||||||
**Fix:** Separate the two directions' nonce spaces for the legacy schemes too — set a reserved high bit/byte of the legacy IV for host-originated packets (mirror the V2 'H' marker), or better, HKDF-derive an independent host→client key from the rikey with a direction label so host and client never share a GCM key. Never let host rumble and client input share (key,nonce).
|
**Fix:** Separate the two directions' nonce spaces for the legacy schemes too — set a reserved high bit/byte of the legacy IV for host-originated packets (mirror the V2 'H' marker), or better, HKDF-derive an independent host→client key from the rikey with a direction label so host and client never share a GCM key. Never let host rumble and client input share (key,nonce). (Inherent to the legacy Nvidia wire; the real fix is the V2 control-encryption / punktfunk/1 path. A code comment marks it at the `control.rs` rumble loop.)
|
||||||
|
|
||||||
### 🔵 #6 [LOW] RTSP request Content-Length / header size unbounded with no read timeout or connection cap — pre-auth slow-loris / memory-growth DoS
|
### 🔵 #6 [LOW] RTSP request Content-Length / header size unbounded with no read timeout or connection cap — pre-auth slow-loris / memory-growth DoS
|
||||||
|
**Surface:** `gamestream-parsing` · **FIXED (3c55ec3).** `read_message` computed `total = end+4+content_len` with no cap and looped `extend_from_slice`; the header scan was unbounded and one unbounded native thread spawned per connection with no global limit (slow exhaustion + thread/FD exhaustion on a privileged plaintext LAN listener). Fix: Content-Length/total-header caps + per-read timeout + concurrent-connection cap. Refs `gamestream/rtsp.rs`.
|
||||||
|
|
||||||
**Surface:** `gamestream-parsing`
|
### 🔵 #7 [LOW] Per-session launch command carried via process-global PUNKTFUNK_GAMESCOPE_APP env var, stomped under concurrent native sessions
|
||||||
**Refs:** `crates/punktfunk-host/src/gamestream/rtsp.rs:82-106`, `crates/punktfunk-host/src/gamestream/rtsp.rs:24-48`
|
**Surface:** `privilege-process-launch` · **FIXED (3c55ec3, GameStream) / DOCUMENTED (native).** `serve_session` did `std::env::set_var(PUNKTFUNK_GAMESCOPE_APP)` per connection under `DEFAULT_MAX_CONCURRENT=4` — a TOCTOU where client B's launch overwrote what client A's gamescope bare-spawn read (and a never-cleared value leaked to a later no-launch client). NOT command injection: `cmd` always resolves through `library::launch_command` (digit-validated Steam appids / operator-only custom store), so the worst case is a different operator-approved title, and only the gamescope bare-spawn backend reads the var. Fix: `VirtualDisplay::set_launch_command` carries it per-session for GameStream; **the native path keeps the env — safe under today's single-session model, plumb per-session as concurrent sessions land** (still-open follow-up). Refs `punktfunk1.rs`, `vdisplay/gamescope.rs`.
|
||||||
|
|
||||||
**Why it ranks here / impact:** Ranked #6: low, pre-auth and attacker-controlled but a rate-limited resource DoS, not unsafety or auth bypass. read_message parses content-length and computes total = end+4+content_len with no cap, looping buf.extend_from_slice until buf.len()>=total; the header scan is likewise unbounded and there is no body/header cap, no read/write timeout, and one unbounded native thread is spawned per connection with no global limit. Growth is bounded by attacker send rate (no pre-allocation), so it is slow exhaustion rather than instant OOM; the stronger lever is thread/FD exhaustion from many idle slow-loris connections at near-zero bandwidth. On a privileged LAN-facing plaintext listener with zero defensive caps.
|
|
||||||
|
|
||||||
**Fix:** Cap Content-Length and total header size to small constants (e.g. reject content_len > 64 KiB, total header > 16 KiB) and close on violation. Add a read timeout so a slow-loris connection cannot pin a thread indefinitely, and bound concurrent RTSP connections.
|
|
||||||
|
|
||||||
### 🔵 #7 [LOW] Per-session launch command carried via process-global PUNKTFUNK_GAMESCOPE_APP env var, stomped under concurrent native sessions (cross-session launch confusion)
|
|
||||||
|
|
||||||
**Surface:** `privilege-process-launch`
|
|
||||||
**Refs:** `crates/punktfunk-host/src/punktfunk1.rs:560-571`, `crates/punktfunk-host/src/punktfunk1.rs:140`, `crates/punktfunk-host/src/vdisplay/gamescope.rs:629-647`
|
|
||||||
|
|
||||||
**Why it ranks here / impact:** Ranked #7: low, post-auth cross-session isolation bug, explicitly NOT command injection. serve_session does std::env::set_var(PUNKTFUNK_GAMESCOPE_APP) per accepted connection with a stale comment claiming 'one session at a time', but DEFAULT_MAX_CONCURRENT=4 sessions run concurrently and the var is read in gamescope::spawn during VirtualDisplay::create — a genuine TOCTOU where client B's launch overwrites what client A's bare-spawn reads, and the never-cleared value leaks into a later no-launch client. Impact is capped because cmd always resolves through library::launch_command (digit-validated Steam appids / operator-only custom store), so the worst case is launching a DIFFERENT operator-approved title or a stale title — and it only affects the gamescope bare-spawn backend (kwin/mutter/wlroots/attach ignore the var).
|
|
||||||
|
|
||||||
**Fix:** Stop carrying the per-session launch command in a process-global env var. Plumb the resolved command through the VirtualDisplay::create call / per-session context (e.g. a field on Mode or a per-session GamescopeDisplay), and on the bare-spawn path pass it explicitly to spawn(); clear/scope it so a stale value never leaks to the next client.
|
|
||||||
|
|
||||||
### ⚪ #8 [INFO] GameStream pairing phase-4 hash compare is not constant-time
|
### ⚪ #8 [INFO] GameStream pairing phase-4 hash compare is not constant-time
|
||||||
|
**Surface:** `pairing-pin` · **FIXED (3c55ec3).** Variable-time `==` on attacker-influenced 32-byte phase-4 SHA-256 digests; not weaponizable (`expected` mixes undisclosed host-random `server_challenge`, a mismatch `map.remove`s the session forcing fresh randomness — no stable secret, no timing→PIN path). Fixed for consistency with the native ceremony: `crypto::ct_eq`. Refs `gamestream/pairing.rs:226-247`.
|
||||||
**Surface:** `pairing-pin`
|
|
||||||
**Refs:** `crates/punktfunk-host/src/gamestream/pairing.rs:226-247`
|
|
||||||
|
|
||||||
**Why it ranks here / impact:** Ranked #8: info / hardening only — a real variable-time `==` on attacker-influenced 32-byte SHA-256 digests, but not weaponizable. The compared `expected` mixes in host-random server_challenge that is never disclosed (so the attacker can neither compute nor aim at the target), the attacker cannot steer client_hash to a chosen value without the PIN key, and any mismatch removes the session (map.remove) forcing a fresh ceremony with new randomness — so there is no stable secret to recover prefix-by-prefix and no path from timing to PIN recovery or match forgery. Worth fixing for consistency since the codebase already has ct_eq for the native ceremony.
|
|
||||||
|
|
||||||
**Fix:** Use a constant-time comparator (subtle::ConstantTimeEq or the project's existing ct_eq) for hash_ok, matching the constant-time discipline already used in the native SPAKE2 ceremony.
|
|
||||||
|
|
||||||
### ⚪ #9 [INFO] GameStream pairing ceremony runs over plain HTTP — inherited GFE brute-forceable-PIN / MITM weakness
|
### ⚪ #9 [INFO] GameStream pairing ceremony runs over plain HTTP — inherited GFE brute-forceable-PIN / MITM weakness
|
||||||
|
|
||||||
**Surface:** `authz-trust`
|
**Surface:** `authz-trust` · **DOCUMENTED (inherent to GameStream compat).**
|
||||||
**Refs:** `crates/punktfunk-host/src/gamestream/nvhttp.rs:33`, `crates/punktfunk-host/src/gamestream/nvhttp.rs:215-264`, `crates/punktfunk-host/src/gamestream/pairing.rs:102-247`
|
**Refs:** `crates/punktfunk-host/src/gamestream/nvhttp.rs:33`, `crates/punktfunk-host/src/gamestream/nvhttp.rs:215-264`, `crates/punktfunk-host/src/gamestream/pairing.rs:102-247`
|
||||||
|
|
||||||
**Why it ranks here / impact:** Ranked #9: info — real but intentional Moonlight-compat behavior, on record rather than a regression. The whole /pair flow (incl. phase-4 cert pinning) is on plain HTTP 47989 with no transport confidentiality and no rate-limiting; the AES key is pin_key(salt,pin) = SHA-256(salt||pin)[..16] feeding AES-128-ECB, so an on-path attacker observing a legitimate pairing can offline-brute-force the 4-digit PIN and forge a clientpairingsecret to get a cert pinned. This is the well-known GFE/Sunshine construction, fixed by interop, and is precisely why punktfunk/1's SPAKE2 path exists; it requires an active MITM during an operator-initiated pairing within the 300s window. A paired GameStream client is in-scope-by-design.
|
**Why it ranks here / impact:** Ranked #9: info — real but intentional Moonlight-compat behavior, on record rather than a regression. The whole /pair flow (incl. phase-4 cert pinning) is on plain HTTP 47989 with no transport confidentiality and no rate-limiting; the AES key is pin_key(salt,pin) = SHA-256(salt||pin)[..16] feeding AES-128-ECB, so an on-path attacker observing a legitimate pairing can offline-brute-force the 4-digit PIN and forge a clientpairingsecret to get a cert pinned. This is the well-known GFE/Sunshine construction, fixed by interop, and is precisely why punktfunk/1's SPAKE2 path exists; it requires an active MITM during an operator-initiated pairing within the 300s window. A paired GameStream client is in-scope-by-design.
|
||||||
@@ -109,31 +79,19 @@ Overall the punktfunk host is a security-conscious codebase with a strong crypto
|
|||||||
**Fix:** Inherent to GameStream compatibility — document it and steer users to punktfunk/1 (SPAKE2) for untrusted networks. Optionally rate-limit pairing sessions per uniqueid/IP and tighten/expire the awaiting-PIN window aggressively.
|
**Fix:** Inherent to GameStream compatibility — document it and steer users to punktfunk/1 (SPAKE2) for untrusted networks. Optionally rate-limit pairing sessions per uniqueid/IP and tighten/expire the awaiting-PIN window aggressively.
|
||||||
|
|
||||||
### ⚪ #10 [INFO] No ALPN configured on the native QUIC server/client (cross-protocol confusion hardening absent)
|
### ⚪ #10 [INFO] No ALPN configured on the native QUIC server/client (cross-protocol confusion hardening absent)
|
||||||
|
**Surface:** `cert-tls-identity` · **FIXED (3c55ec3).** No `alpn_protocols` was set on either endpoint, but no reachable confusion attack: ALPACA needs two TLS services sharing a cert on the SAME transport — GameStream is TLS-over-TCP, punktfunk/1 is TLS-in-QUIC (UDP), and there is exactly one QUIC server, with trust already enforced by fingerprint pinning + Hello/Welcome magic. Fixed as cheap future-proofing: fixed ALPN `pkf1` on both endpoints (coordinated client+host upgrade). Refs `quic.rs:1335-1448`.
|
||||||
**Surface:** `cert-tls-identity`
|
|
||||||
**Refs:** `crates/punktfunk-core/src/quic.rs:1335-1354`, `crates/punktfunk-core/src/quic.rs:1412-1448`
|
|
||||||
|
|
||||||
**Why it ranks here / impact:** Ranked #10: info — factually correct (no alpn_protocols set on either endpoint; the cert.pem identity is shared with GameStream TLS) but no reachable confusion attack. ALPACA-style attacks need two TLS services sharing a cert on the SAME transport; here GameStream is TLS-over-TCP and punktfunk/1 is TLS-in-QUIC (UDP) — not cross-reachable — and there is exactly one QUIC server so ALPN would make no authorization decision. Trust is already enforced by fingerprint pinning + app-layer Hello/Welcome magic. Cheap future-proofing only.
|
|
||||||
|
|
||||||
**Fix:** Set a fixed ALPN on both endpoints (e.g. rustls_cfg.alpn_protocols = vec![b"pkf1".to_vec()]) so a mismatched protocol is rejected during the TLS handshake — defense-in-depth against ever multiplexing protocols on the QUIC endpoint.
|
|
||||||
|
|
||||||
### ⚪ #11 [INFO] FEC reconstruct error on the receive path is stream-fatal — code-contract inconsistency (not an exploitable DoS)
|
### ⚪ #11 [INFO] FEC reconstruct error on the receive path is stream-fatal — code-contract inconsistency (not an exploitable DoS)
|
||||||
|
**Surface:** `core-wire-deser` · **FIXED (3c55ec3).** `Reassembler::push` propagated `coder.reconstruct(...)?` and both receive-side callers treated any non-`NoFrame` error as fatal — inconsistent with the surrounding "malformed = silent drop, never fatal" discipline. Every Err arm was traced unreachable from hostile input (header firewall + block-geometry pinning guarantee equal-length correctly-counted shards; `Config::validate` rejects odd/zero `shard_payload`; MDS Reed-Solomon decodes any `data_shards` distinct shards; reaching the reassembler needs an AES-GCM-decryptable packet; client-side only). Fixed as defense-in-depth: a reconstruct failure is now a counted drop returning `Ok(None)`, reserving `Err` for genuinely fatal conditions. Refs `packet.rs`, `session.rs`, `clients/probe/src/main.rs`, `spike.rs`.
|
||||||
**Surface:** `core-wire-deser`
|
|
||||||
**Refs:** `crates/punktfunk-core/src/packet.rs:411`, `crates/punktfunk-core/src/session.rs:283-289`, `clients/probe/src/main.rs:959`, `crates/punktfunk-host/src/spike.rs:251`
|
|
||||||
|
|
||||||
**Why it ranks here / impact:** Ranked last: info — a correctly-identified contract inconsistency with NO demonstrable exploit. Reassembler::push propagates coder.reconstruct(...)? and both real receive-side callers treat any non-NoFrame error as fatal, inconsistent with the surrounding 'malformed = silent drop, never fatal' discipline. But every Err arm was traced unreachable from hostile input: header firewall + block-geometry pinning guarantee equal-length, correctly-counted shards; reconstruct is only called once received>=data_shards; Config::validate rejects odd/zero shard_payload before any decode; and MDS Reed-Solomon decodes any data_shards distinct shards. Reaching the reassembler also requires an AES-GCM-decryptable packet, so it is the connected host (not a port-sprayer), and it is client-side only — the privileged host never runs the reassembler on attacker bytes. Pure defense-in-depth hardening.
|
|
||||||
|
|
||||||
**Fix:** Make a FEC reconstruction failure a counted drop rather than stream-fatal: in Reassembler::push match coder.reconstruct(...) and on Err bump packets_dropped (or a fec_failed counter), discard the block, and return Ok(None). Reserve poll_frame's Err for genuinely fatal conditions (role misuse, transport teardown), matching the discipline documented at packet.rs:298-300.
|
|
||||||
|
|
||||||
### 🔵 #12 [LOW] Web console sets NODE_TLS_REJECT_UNAUTHORIZED=0 process-globally — latent footgun disabling all outbound TLS verification
|
### 🔵 #12 [LOW] Web console sets NODE_TLS_REJECT_UNAUTHORIZED=0 process-globally — latent footgun disabling all outbound TLS verification
|
||||||
|
|
||||||
**Surface:** `deps-config-exposure`
|
**Surface:** `deps-config-exposure` · **DEFERRED — THE ONE STILL-OPEN ITEM.**
|
||||||
**Refs:** `web/.env.example:22-24`, `web/web.env.example:11-14`, `web/server/util/auth.ts:17-22`, `web/vite.config.ts:23`
|
**Refs:** `web/.env.example:22-24`, `web/web.env.example:11-14`, `web/server/util/auth.ts:17-22`, `web/vite.config.ts:23`
|
||||||
|
|
||||||
**Why it ranks here / impact:** Ranked #12: low and not currently exploitable (attackerControlled false), included as a latent defense-in-depth defect. NODE_TLS_REJECT_UNAUTHORIZED=0 disables certificate validation for every outbound TLS connection the Node process makes, but the only current server-side outbound hop is the loopback proxy to https://127.0.0.1:47990 (CDN/art fetches are browser-side), and a loopback connection cannot be MITM'd — so impact is nil today. Real impact materializes silently if anyone later adds a server-side off-host HTTPS call (update check, webhook, metadata fetch) or points PUNKTFUNK_MGMT_URL off-loopback.
|
**Why it ranks here / impact:** Ranked #12: low and not currently exploitable (attackerControlled false), included as a latent defense-in-depth defect. NODE_TLS_REJECT_UNAUTHORIZED=0 disables certificate validation for every outbound TLS connection the Node process makes, but the only current server-side outbound hop is the loopback proxy to https://127.0.0.1:47990 (CDN/art fetches are browser-side), and a loopback connection cannot be MITM'd — so impact is nil today. Real impact materializes silently if anyone later adds a server-side off-host HTTPS call (update check, webhook, metadata fetch) or points PUNKTFUNK_MGMT_URL off-loopback.
|
||||||
|
|
||||||
**Fix:** Do not disable TLS verification globally. Pin the host's self-signed cert for the single loopback fetch: pass an https.Agent with the host cert as `ca` (or rejectUnauthorized:false on that one Agent only) to the proxyRequest fetch in server/routes/api/[...].ts, and drop NODE_TLS_REJECT_UNAUTHORIZED from the deployment env.
|
**Fix:** Do not disable TLS verification globally. Pin the host's self-signed cert for the single loopback fetch: pass an https.Agent with the host cert as `ca` (or rejectUnauthorized:false on that one Agent only) to the proxyRequest fetch in server/routes/api/[...].ts, and drop NODE_TLS_REJECT_UNAUTHORIZED from the deployment env. **Reverted** because it needs `undici` added as a web dependency (`bun add undici` + lockfile regen) in the web build env. Apply with `cd web && bun add undici`, then scope `rejectUnauthorized:false` to the mgmt fetch and drop the global env.
|
||||||
|
|
||||||
## Cross-cutting themes
|
## Cross-cutting themes
|
||||||
|
|
||||||
@@ -144,17 +102,6 @@ Overall the punktfunk host is a security-conscious codebase with a strong crypto
|
|||||||
- Stale concurrency assumptions and process-global mutable state (legacy GCM nonce direction, PUNKTFUNK_GAMESCOPE_APP env var) that were safe under a since-removed 'one session at a time' invariant and now cause cross-session confusion / crypto reuse.
|
- Stale concurrency assumptions and process-global mutable state (legacy GCM nonce direction, PUNKTFUNK_GAMESCOPE_APP env var) that were safe under a since-removed 'one session at a time' invariant and now cause cross-session confusion / crypto reuse.
|
||||||
- Strong, well-tested cryptographic and memory-safety core (bounded wire parsing, correct AEAD/SPAKE2/pinning, catch_unwind FFI, panic=unwind isolation) — the foundation is solid; the residual risk is in operational hardening and trust-tier granularity, not in unsafe/RCE.
|
- Strong, well-tested cryptographic and memory-safety core (bounded wire parsing, correct AEAD/SPAKE2/pinning, catch_unwind FFI, panic=unwind isolation) — the foundation is solid; the residual risk is in operational hardening and trust-tier granularity, not in unsafe/RCE.
|
||||||
|
|
||||||
## Prioritized remediation (do in this order)
|
|
||||||
|
|
||||||
1. Lock down secret files: write key.pem (and cert.pem) 0600 + create config_dir 0700 on Unix using the existing mgmt_token OpenOptions::mode pattern, and set an explicit SYSTEM+Administrators-only DACL on the punktfunk %ProgramData% subtree / key.pem / mgmt-token / *paired.json on Windows. Extend to client-key.pem; add a 0600 regression test.
|
|
||||||
2. Make the native PIN single-use and lockout-bounded: disarm or rotate the PIN on a failed SPAKE2 confirmation, add a per-window failed-attempt budget, give the CLI no-expiry arm path a default expiry, and disarm after a successful pair — this is what delivers the documented 'one online guess'.
|
|
||||||
3. Bound the RTSP video path: validate/clamp x-nv-video[0].packetSize (floor ~64, cap ~2048) in stream_config() and use checked/saturating arithmetic in VideoPacketizer::new so pps==0 / underflow can never occur; store(false) on the unwind path; add a {0,15,16,17} regression test.
|
|
||||||
4. Cap RTSP request parsing: enforce a Content-Length and total-header-size limit, add a read timeout, and bound concurrent connections so a pre-auth peer cannot slow-loris exhaust threads/memory.
|
|
||||||
5. Separate streaming trust from management trust: require the mgmt bearer token (not just a paired streaming cert) for state-changing and pairing-administration routes (arm/approve/unpair/session/library), or keep a distinct admin allow-list.
|
|
||||||
6. Fix the legacy GameStream GCM nonce reuse: HKDF-derive an independent host→client key from the rikey (direction label), or mirror the V2 'H' direction marker into the legacy IV so host rumble and client input never share (key,nonce).
|
|
||||||
7. Stop carrying the per-session gamescope launch command in a process-global env var: plumb it through the per-session VirtualDisplay::create/context and clear it when no launch is requested, eliminating cross-session stomping under concurrency.
|
|
||||||
8. Apply the cheap hardening nits: constant-time compare for the GameStream phase-4 hash (use ct_eq), set a fixed ALPN ('pkf1') on both QUIC endpoints, make FEC reconstruct failures a counted drop instead of stream-fatal, and replace the global NODE_TLS_REJECT_UNAUTHORIZED with a cert-pinned https.Agent scoped to the loopback mgmt fetch.
|
|
||||||
|
|
||||||
## Security controls done right (positives)
|
## Security controls done right (positives)
|
||||||
|
|
||||||
- Defense-in-depth wire parsing: every attacker-controllable FEC/reassembler header field is bounded against negotiated limits BEFORE any allocation keyed on it (packet.rs:328-343) — shard_bytes exact-match, data/total/block counts in range, indices in bounds, frame_bytes<=max — with no integer overflow in the size math and regression tests (rejects_oversized_shard_counts, rejects_inconsistent_block_geometry_without_panicking).
|
- Defense-in-depth wire parsing: every attacker-controllable FEC/reassembler header field is bounded against negotiated limits BEFORE any allocation keyed on it (packet.rs:328-343) — shard_bytes exact-match, data/total/block counts in range, indices in bounds, frame_bytes<=max — with no integer overflow in the size math and regression tests (rejects_oversized_shard_counts, rejects_inconsistent_block_geometry_without_panicking).
|
||||||
@@ -1,25 +1,24 @@
|
|||||||
# Session-aware host — known limitations & follow-ups
|
# Session-aware host — known limitations & follow-ups
|
||||||
|
|
||||||
Status: 2026-06-14. The host auto-detects the live session (Gaming / KDE / GNOME / wlroots) **per
|
> **Status:** Session detection SHIPPED — host auto-detects the live session (Gaming / KDE / GNOME /
|
||||||
connect** and routes both video and input at it — managed gamescope at the client's resolution in
|
> wlroots) **per connect** and routes video+input at it; opt-in mid-stream switch watcher
|
||||||
Steam Gaming Mode, a KWin/Mutter virtual output at the client's resolution on a Desktop. A watcher
|
> (`PUNKTFUNK_SESSION_WATCH=1`). Code: `crates/punktfunk-host/src/vdisplay.rs` +
|
||||||
(opt-in: `PUNKTFUNK_SESSION_WATCH=1`) follows a Gaming↔Desktop switch **mid-stream** and rebuilds the
|
> `vdisplay/linux/{gamescope,kwin,mutter}.rs`. Items #2 + #3 resolved in code (`3363576`); this doc is
|
||||||
backend in place without a reconnect.
|
> trimmed to the still-open limitations + their design rationale.
|
||||||
|
|
||||||
|
The host auto-detects the live session per connect and routes both video and input at it — managed
|
||||||
|
gamescope at the client's resolution in Steam Gaming Mode, a KWin/Mutter virtual output at the
|
||||||
|
client's resolution on a Desktop. A watcher (opt-in: `PUNKTFUNK_SESSION_WATCH=1`) follows a
|
||||||
|
Gaming↔Desktop switch **mid-stream** and rebuilds the backend in place without a reconnect.
|
||||||
|
|
||||||
Live-validated on the Bazzite F44 box (`bazzite-deck-nvidia:testing`, RTX 4090): Desktop KDE at
|
Live-validated on the Bazzite F44 box (`bazzite-deck-nvidia:testing`, RTX 4090): Desktop KDE at
|
||||||
5120×1440 + input; Gaming managed at 5120×1440; warm-session reuse on quick reconnect; Feature B
|
5120×1440 + input; Gaming managed at 5120×1440; warm-session reuse on quick reconnect; Feature B
|
||||||
video-switch both directions.
|
video-switch both directions.
|
||||||
|
|
||||||
## Resolved (2026-06-15, `3363576`)
|
(Resolved: **#2 mid-stream-switch input** — `vdisplay::settle_desktop_portal()`; **#3 KWin/Mutter
|
||||||
|
virtual output primary** — `apply_session_env` defaults `PUNKTFUNK_KWIN_VIRTUAL_PRIMARY` /
|
||||||
- **#2 — mid-stream-switch input** ✅ `vdisplay::settle_desktop_portal()` pushes the live session env
|
`PUNKTFUNK_MUTTER_VIRTUAL_PRIMARY` on for the auto desktop path. Both shipped in `3363576`; details in
|
||||||
into the systemd/D-Bus activation environment and restarts the KWin portal on a switch, so input
|
git history.)
|
||||||
lands without a reconnect. Validated live: `settled desktop portal env … compositor=kwin` →
|
|
||||||
`libei: portal granted devices` → `device RESUMED` on a Gaming→Desktop mid-stream switch.
|
|
||||||
- **#3 — KWin/Mutter virtual output primary** ✅ `apply_session_env` defaults
|
|
||||||
`PUNKTFUNK_KWIN_VIRTUAL_PRIMARY` / `PUNKTFUNK_MUTTER_VIRTUAL_PRIMARY` on for the auto desktop path.
|
|
||||||
Validated live: `KWin: streamed output set as the sole desktop also_disabled=["HDMI-A-1"]` — panels
|
|
||||||
now render on the streamed screen.
|
|
||||||
|
|
||||||
## Still parked
|
## Still parked
|
||||||
|
|
||||||
@@ -32,17 +31,15 @@ but don't eliminate it. Options, in order of preference:
|
|||||||
- **SIGKILL the gamescope on teardown** instead of `systemctl stop` (SIGTERM). Hypothesis: skipping
|
- **SIGKILL the gamescope on teardown** instead of `systemctl stop` (SIGTERM). Hypothesis: skipping
|
||||||
gamescope's buggy SIGTERM teardown handler (the part that SIGSEGVs, exit 139) lets the process die
|
gamescope's buggy SIGTERM teardown handler (the part that SIGSEGVs, exit 139) lets the process die
|
||||||
hard and the driver reclaim its GPU resources cleanly via normal process exit — no half-torn-down
|
hard and the driver reclaim its GPU resources cleanly via normal process exit — no half-torn-down
|
||||||
context. Change `stop_autologin_sessions` + `stop_session` (`vdisplay/gamescope.rs`) to
|
context. Change `stop_autologin_sessions` + `stop_session` (`vdisplay/linux/gamescope.rs`, both
|
||||||
`systemctl --user kill --signal=SIGKILL <unit>` (+ a follow-up `stop`/`reset-failed` to clear unit
|
still use `systemctl --user stop` = SIGTERM) to `systemctl --user kill --signal=SIGKILL <unit>`
|
||||||
state). **Untested** — this is the first thing to try; it would preserve "managed client-res
|
(+ a follow-up `stop`/`reset-failed` to clear unit state). **Untested** — this is the first thing
|
||||||
gaming AND TV-shows-gaming-when-idle".
|
to try; it would preserve "managed client-res gaming AND TV-shows-gaming-when-idle".
|
||||||
- **Keep the managed session warm** (no per-disconnect restore): spawn once, reuse forever, never
|
- **Keep the managed session warm** (no per-disconnect restore): spawn once, reuse forever, never
|
||||||
tear down → ~1 teardown per host lifetime. Tradeoff: the TV is blank/idle when no client is
|
tear down → ~1 teardown per host lifetime. Tradeoff: the TV is blank/idle when no client is
|
||||||
connected (the autologin is never restored; return to gaming manually).
|
connected (the autologin is never restored; return to gaming manually).
|
||||||
- Upstream gamescope/driver fix.
|
- Upstream gamescope/driver fix.
|
||||||
|
|
||||||
(#2 mid-stream-switch input and #3 virtual-output-primary are **resolved** — see the Resolved section above.)
|
|
||||||
|
|
||||||
## Lower priority / polish
|
## Lower priority / polish
|
||||||
|
|
||||||
### 4. Mid-stream-switch input loss window (~6 s)
|
### 4. Mid-stream-switch input loss window (~6 s)
|
||||||
@@ -68,3 +65,18 @@ the TV session). Resolve together with the keep-warm decision in #1.
|
|||||||
### 8. Feature B is opt-in
|
### 8. Feature B is opt-in
|
||||||
The mid-stream watcher is gated behind `PUNKTFUNK_SESSION_WATCH=1` pending broader validation. Promote
|
The mid-stream watcher is gated behind `PUNKTFUNK_SESSION_WATCH=1` pending broader validation. Promote
|
||||||
to default-on once #2 (mid-stream input) lands and it's exercised on more boxes.
|
to default-on once #2 (mid-stream input) lands and it's exercised on more boxes.
|
||||||
|
|
||||||
|
## Open items
|
||||||
|
|
||||||
|
1. **F44 gamescope teardown corrupts the GPU context** (#1) — try SIGKILL on teardown
|
||||||
|
(`stop_autologin_sessions` / `stop_session` in `vdisplay/linux/gamescope.rs`), else keep the
|
||||||
|
managed session warm, else upstream fix.
|
||||||
|
2. **Mid-stream-switch input-loss window (~6 s)** (#4) — pre-warm the portal or buffer/hold events
|
||||||
|
instead of dropping during the device-resume window.
|
||||||
|
3. **NVENC `InitializeEncoder failed: invalid param` noise at 5120×1440@240** (#5) — recovers via
|
||||||
|
split-encode; investigate the first-attempt failure / silence the log.
|
||||||
|
4. **NVENC HEVC ~800 Mbps cap on the RTX 4090** (#6) — consider preferring AV1 above it + surface the
|
||||||
|
cap in the speed-test / bitrate UI.
|
||||||
|
5. **Restore-guard / keep-warm interaction** (#7) — couples to #1; resolve together.
|
||||||
|
6. **Feature B (`PUNKTFUNK_SESSION_WATCH`) still opt-in** (#8) — promote to default-on after #2 lands
|
||||||
|
and it's exercised on more boxes.
|
||||||
@@ -0,0 +1,51 @@
|
|||||||
|
# Stats capture & graphing — design
|
||||||
|
|
||||||
|
> **Status:** SHIPPED (commit `5bf787e`) — host `crates/punktfunk-host/src/stats_recorder.rs`,
|
||||||
|
> mgmt endpoints `/api/v1/stats/*` (`mgmt.rs`), web console Performance page
|
||||||
|
> (`web/src/sections/Stats/`). Implemented; not yet on-glass validated. This doc is trimmed to
|
||||||
|
> design rationale + open items; the shipped code is the source of truth (data models, recorder
|
||||||
|
> API, endpoint list, and UI layout all live there).
|
||||||
|
|
||||||
|
Goal: let an operator **enable performance-stats capture from the web console**, play a session,
|
||||||
|
**stop**, and **review the captured time-series as graphs**. Captures are **saved to disk**
|
||||||
|
(browse/compare past sessions; survive host restart) and cover **both** streaming paths: native
|
||||||
|
punktfunk/1 (`virtual_stream`) and GameStream/Moonlight (`gamestream/stream.rs`).
|
||||||
|
|
||||||
|
## Why / design rationale
|
||||||
|
|
||||||
|
- **Reuse the existing per-stage instrumentation** that was startup-gated by `PUNKTFUNK_PERF=1`
|
||||||
|
(stdout-only, read once at startup). The key behavioral change: make the per-frame
|
||||||
|
**measurement** predicate `perf || recorder.is_armed()`, re-evaluated each frame via a cheap
|
||||||
|
`Relaxed` atomic. `PUNKTFUNK_PERF=1` still emits its `tracing::info!` log line exactly as
|
||||||
|
before; the web toggle additionally builds a `StatsSample` at the aggregation boundary — so
|
||||||
|
the web toggle works at runtime with **zero startup flags**.
|
||||||
|
- **No async on the per-frame path.** `is_armed()` is a `Relaxed` atomic load; sample
|
||||||
|
construction happens only at the existing **~2 s native / ~1 s GameStream** aggregation
|
||||||
|
boundary, never per frame. One shared `Arc<StatsRecorder>` is created once in the unified host
|
||||||
|
entry and threaded into both streaming loops + `MgmtState`, mirroring the existing
|
||||||
|
`Arc<NativePairing>` sharing pattern.
|
||||||
|
- **Stage sets are the per-frame critical path so stacking is meaningful.** native:
|
||||||
|
`capture` / `submit` (NVENC enqueue) / `encode` (`lock_bitstream` = NVENC schedule + ASIC, the
|
||||||
|
dominant stage under GPU load) / `send` (paced_submit: seal + FEC + pace + sendmmsg).
|
||||||
|
gamestream: `capture` / `encode` / `packetize` / `send`. Native source vectors map
|
||||||
|
`st_cap`→`capture`, `st_submit`→`submit`, `st_wait`→`encode`, `pace_us`→`send`; `encode_us`
|
||||||
|
total ≈ capture+submit+encode and is **not** emitted as its own stage to avoid double-counting.
|
||||||
|
- **Gotchas / accepted-risk decisions:**
|
||||||
|
- **`id` is path-traversal-safe.** `load`/`delete` reject any id not matching
|
||||||
|
`^[A-Za-z0-9._-]+$` (no `/`, no `..`, no `:` — keep it a valid Windows filename) and only ever
|
||||||
|
join `dir/<id>.json`. Endpoints are bearer-authed, but defend in depth.
|
||||||
|
- **Bounded memory, keep the start.** `MAX_SAMPLES` cap (~5400 ≈ 3 h @ 2 s); on overflow stop
|
||||||
|
appending and set a `truncated` flag — **do NOT drop oldest**, a saved recording must keep
|
||||||
|
its start.
|
||||||
|
- **Atomic disk write.** Write `<id>.json.tmp` then rename so a crash mid-write can't leave a
|
||||||
|
half file. Captures dir `~/.config/punktfunk/captures/` (0700), next to `cert.pem`.
|
||||||
|
- Counters that a path doesn't expose are recorded as `0` — **do NOT fabricate**.
|
||||||
|
- mgmt endpoints are **bearer-token only** (operator actions) — deliberately NOT in the mTLS
|
||||||
|
`cert_may_access` read-only allowlist.
|
||||||
|
- Charts render **client-only** (mounted guard) so SSR doesn't choke on `ResponsiveContainer`'s
|
||||||
|
0-width measure.
|
||||||
|
|
||||||
|
## Open items
|
||||||
|
|
||||||
|
- **On-glass validation.** Implemented but not yet validated on real hardware end-to-end (arm
|
||||||
|
from the console, play, stop, review graphs across both native + GameStream paths).
|
||||||
@@ -0,0 +1,224 @@
|
|||||||
|
---
|
||||||
|
title: "Windows build & packaging"
|
||||||
|
description: "How the punktfunk Windows host is built, signed, and packaged: the all-Rust driver workspace built from source in CI, the Inno Setup installer, the web console bundle, the CI workflows, and the dev-iteration helpers. Repo-internal source of truth - not part of the user-facing docs-site."
|
||||||
|
---
|
||||||
|
|
||||||
|
# Windows build & packaging
|
||||||
|
|
||||||
|
Single source of truth for **how the Windows host ships**: what artifacts are built, the all-Rust
|
||||||
|
driver workspace and why we build it from source in CI, the Inno Setup installer, the web console
|
||||||
|
bundle, the CI workflows, signing, and the dev loop. Architecture lives in
|
||||||
|
[`windows-host-rewrite.md`](windows-host-rewrite.md); deployment/runtime in
|
||||||
|
[`windows-service.md`](windows-service.md). This doc is repo-internal (do **not** mirror into
|
||||||
|
`docs-site/`).
|
||||||
|
|
||||||
|
> **x64-only by design.** The host is coupled to NVENC (`nvEncodeAPI64.dll`) and the pf-vdisplay IddCx
|
||||||
|
> driver, neither of which exists on Windows ARM64 (no ARM64 NVIDIA driver / IddCx path). The *client*
|
||||||
|
> ships x64 + ARM64 MSIX; the *host* does not.
|
||||||
|
|
||||||
|
## 1. What ships
|
||||||
|
|
||||||
|
The signed `punktfunk-host-setup-<ver>.exe` (Inno Setup) lays down, under `C:\Program Files\punktfunk\`:
|
||||||
|
|
||||||
|
| Component | What it is |
|
||||||
|
|-----------|------------|
|
||||||
|
| `punktfunk-host.exe` | the host binary (`--features nvenc,amf-qsv` = NVIDIA + AMD/Intel in one build) |
|
||||||
|
| `pf-vdisplay` driver | all-Rust UMDF IddCx virtual display (per-session client-resolution output) |
|
||||||
|
| `pf-dualsense` driver | virtual DualSense / DualShock 4 (one type-aware HID minidriver) |
|
||||||
|
| `pf-xusb` driver | virtual Xbox 360 / XInput companion |
|
||||||
|
| `pf-vkhdr-layer` | Vulkan implicit layer that advertises HDR formats on the virtual display |
|
||||||
|
| web console | self-contained Nitro `.output` + a portable `bun` runtime (the `PunktfunkWeb` task) |
|
||||||
|
| FFmpeg DLLs | `avcodec`/`avutil`/`swscale`/... - the AMD/Intel (AMF/QSV) encode backend link-imports them |
|
||||||
|
| `nefconc.exe` | nefarius' nefcon (creates the `root\pf_vdisplay` device node; pnputil can't) |
|
||||||
|
|
||||||
|
All three drivers and the HDR layer are **bundled, not external** - no ViGEmBus, no SudoVDA, no separate
|
||||||
|
driver download. The host installs a `LocalSystem` SCM service that `CreateProcessAsUserW`s into the
|
||||||
|
interactive session for secure-desktop capture (why MSIX is unusable - see
|
||||||
|
[`windows-service.md`](windows-service.md)).
|
||||||
|
|
||||||
|
## 2. Component map (source -> artifact)
|
||||||
|
|
||||||
|
| Source | Built by | Artifact |
|
||||||
|
|--------|----------|----------|
|
||||||
|
| `crates/punktfunk-host/` | `cargo build --release -p punktfunk-host --features nvenc,amf-qsv` | `punktfunk-host.exe` |
|
||||||
|
| `packaging/windows/drivers/pf-vdisplay/` | `build-pf-vdisplay.ps1` (workspace `cargo build` + sign) | `pf_vdisplay.{dll,inf,cat}` + `.cer` |
|
||||||
|
| `packaging/windows/drivers/pf-dualsense/` `pf-xusb/` | `build-gamepad-drivers.ps1` (sign the workspace build) | `pf_{dualsense,xusb}.{dll,inf,cat}` + shared `.cer` |
|
||||||
|
| `packaging/windows/pf-vkhdr-layer/` | `pack-host-installer.ps1` (`cargo build --release`) | `pf_vkhdr_layer.dll` + `.json` |
|
||||||
|
| `web/` | `scripts/windows/build-web.ps1` (`bun run build`) | self-contained `.output` |
|
||||||
|
| `packaging/windows/nvenc/nvenc.def` | `gen-nvenc-importlib.ps1` (llvm-dlltool) | `nvencodeapi.lib` (link import, no GPU/SDK) |
|
||||||
|
|
||||||
|
## 3. The driver workspace - `packaging/windows/drivers/`
|
||||||
|
|
||||||
|
A **separate cargo workspace** (its own `[workspace]` root) because driver crates are `cdylib`s built
|
||||||
|
with the WDK toolchain on Windows only. Members:
|
||||||
|
|
||||||
|
- `pf-vdisplay` - the IddCx virtual display (the real driver).
|
||||||
|
- `pf-dualsense`, `pf-xusb` - the virtual gamepad HID/XUSB minidrivers.
|
||||||
|
- `wdk-iddcx` - hand-written IddCx DDI wrappers (the `iddcx` ApiSubset bindgen reuses `wdk_default`).
|
||||||
|
- `wdk-probe` - a toolchain/surface-assert probe crate.
|
||||||
|
- `vendor/wdk-sys` + `vendor/wdk-build` - **vendored** microsoft/windows-drivers-rs 0.5.1 (the published
|
||||||
|
crates) + an added `iddcx` ApiSubset. A `[patch.crates-io]` redirects every `wdk-sys`/`wdk-build`
|
||||||
|
reference (incl. `wdk` 0.4.1's transitive deps) to these copies, so the graph has exactly one
|
||||||
|
iddcx-capable `wdk-sys`. **Pinned - do not chase upstream.**
|
||||||
|
|
||||||
|
Path-deps the owned ABI crate `crates/pf-driver-proto` (the host<->driver control protocol). `.cargo/
|
||||||
|
config.toml` sets an explicit `--target x86_64-pc-windows-msvc` + `target-feature=+crt-static` (UMDF
|
||||||
|
needs the static CRT; the explicit target keeps `crt-static` off host build-scripts/proc-macros).
|
||||||
|
`[workspace.metadata.wdk.driver-model]` sets UMDF 2.31 once for all members.
|
||||||
|
|
||||||
|
Driver-specific gotchas (handled by the build scripts):
|
||||||
|
|
||||||
|
- **`/INTEGRITYCHECK` (FORCE_INTEGRITY).** `wdk-build` links `/INTEGRITYCHECK`, which a non-EV
|
||||||
|
(self-signed) cert can't satisfy, so the driver won't load. `clear-force-integrity.ps1` clears the PE
|
||||||
|
`DllCharacteristics` bit (offset `0x5e`) **before** signing.
|
||||||
|
- **Self-signed cert.** The drivers are signed with a self-signed CodeSigning cert; the installer trusts
|
||||||
|
the bundled `.cer` (machine `Root` + `TrustedPublisher`) at install time so PnP loads them silently.
|
||||||
|
Validated to load under Secure Boot on. (CI can use a stable `DRIVER_CERT_PFX_B64` secret instead.)
|
||||||
|
- **Device node via nefcon, never devgen.** The `root\pf_vdisplay` node is created with `nefconc`
|
||||||
|
(a clean `ROOT\DISPLAY` node). `devgen` leaves persistent `SWD\DEVGEN` phantoms that survive reboot +
|
||||||
|
registry deletion. The gamepad drivers create their per-session nodes from the host via
|
||||||
|
`SwDeviceCreate` (no install-time node).
|
||||||
|
- **Strictly-increasing `DriverVer`.** `9.9.MMdd.HHmm` (stampinf). pnputil silently keeps the old binary
|
||||||
|
on a non-increasing version; a later-minute redeploy always wins.
|
||||||
|
|
||||||
|
## 4. Drivers are BUILT FROM SOURCE - the anti-stale decision
|
||||||
|
|
||||||
|
The drivers used to ship as **checked-in prebuilt binaries** (`packaging/windows/pf-vdisplay/` +
|
||||||
|
`gamepad-drivers/`). That model went stale and shipped two field bugs on a fresh install:
|
||||||
|
|
||||||
|
1. A repo-wide rename edited `pf_vdisplay.inf` (a comment) but never re-signed `pf_vdisplay.cat`. A
|
||||||
|
catalog hashes the INF+DLL byte-for-byte, so `pnputil /add-driver` failed
|
||||||
|
`SPAPI_E_FILE_HASH_NOT_IN_CATALOG` **on every box** - the driver never installed, every session died
|
||||||
|
"pf-vdisplay driver interface not found".
|
||||||
|
2. The frozen binary predated `IOCTL_SET_RENDER_ADAPTER`, which the host needs to pin the IddCx render
|
||||||
|
GPU on hybrid/Optimus boxes.
|
||||||
|
|
||||||
|
Fix: **build from source every release.** `pack-host-installer.ps1` calls `build-pf-vdisplay.ps1` (which
|
||||||
|
`cargo build`s the *whole* workspace) then `build-gamepad-drivers.ps1 -SkipBuild` (sign the already-built
|
||||||
|
gamepad cdylibs), so `.dll`/`.inf`/`.cat` are always in lockstep and current driver features ship. The
|
||||||
|
checked-in binaries were deleted. Re-introducing a vendored binary is the bug; if you must, a catalog
|
||||||
|
guard (`Test-FileCatalog` hash-membership) belongs in the build script.
|
||||||
|
|
||||||
|
The build scripts share the same shape (WDK env -> build -> clear FORCE_INTEGRITY -> sign DLL ->
|
||||||
|
stampinf -> Inf2Cat -> sign cat -> export `.cer`); `build-gamepad-drivers.ps1` loops over the two gamepad
|
||||||
|
drivers and signs both with one shared cert. (A `_driver-pack-common.ps1` helper to dedup the ~90% they
|
||||||
|
share is a known TODO - keep behavior identical and re-run `windows-host` if you do it.)
|
||||||
|
|
||||||
|
## 5. Toolchain / build env
|
||||||
|
|
||||||
|
The drivers build with **plain `cargo build`** against the vendored windows-drivers-rs - **no cargo-make,
|
||||||
|
no cargo-wdk for the build** (cargo-wdk is only provisioned + probed by `windows-drivers.yml`). The build
|
||||||
|
needs, on the runner:
|
||||||
|
|
||||||
|
- **WDK 26100** - `Version_Number=10.0.26100.0` pins the SDK version `wdk-build` uses (it otherwise picks
|
||||||
|
`10.0.28000.0`, which has no `km`/`crt`, and bindgen fails). Provisioned by
|
||||||
|
`scripts/ci/provision-windows-wdk.ps1` (iddcx headers are the "WDK present" signal).
|
||||||
|
- **clang 22 + bindgen 0.72** - the vendored `bindgen` is `0.72.1`, which builds clean on the runner's
|
||||||
|
**default** LLVM (`C:\Program Files\LLVM`, currently clang 22). `LIBCLANG_PATH` is left unset (defaults
|
||||||
|
to the runner default). *History:* LLVM 21.1.2 was briefly pinned (`C:\llvm-21`) to dodge a
|
||||||
|
bindgen-0.71 layout-test overflow on clang 22; the 0.72 bump retired that pin, so there's now one
|
||||||
|
toolchain for both driver builds (the pack and `windows-drivers.yml`).
|
||||||
|
- NVENC import lib synthesised from a 2-export `.def` via `llvm-dlltool` (`gen-nvenc-importlib.ps1`) -
|
||||||
|
no GPU or NVIDIA SDK at build time.
|
||||||
|
- `FFMPEG_DIR` (the BtbN gpl-shared x64 tree) for the AMD/Intel AMF/QSV link; NASM + CMake +
|
||||||
|
`CMAKE_POLICY_VERSION_MINIMUM=3.5` for the CMake-from-source deps (aws-lc, opus).
|
||||||
|
- **Gotcha:** `CARGO_HOME` must be an ASCII path (a non-ASCII username breaks SDL3's MSVC precompiled
|
||||||
|
header). The runner uses `C:\Users\Public\.cargo`.
|
||||||
|
- **`CARGO_TARGET_DIR` for the driver build must be the DEFAULT (in-tree) dir.** `wdk-build`'s
|
||||||
|
`find_top_level_cargo_manifest()` walks up from `OUT_DIR` to the first ancestor with a `Cargo.lock`; a
|
||||||
|
relocated `C:\t` target dir hides the workspace lock and the build-script panics "a Cargo.lock file
|
||||||
|
should exist...". The driver deps have no deep CMake crates, so the in-tree target stays under MAX_PATH.
|
||||||
|
(The host/client builds *do* relocate to `C:\t` to dodge MAX_PATH - that's the opposite need.)
|
||||||
|
|
||||||
|
## 6. The installer - Inno Setup
|
||||||
|
|
||||||
|
`pack-host-installer.ps1` orchestrates, in order: resolve a code-signing cert -> sign `punktfunk-host.exe`
|
||||||
|
-> **build + sign the drivers from source** (`build-pf-vdisplay.ps1` + `build-gamepad-drivers.ps1`,
|
||||||
|
staged via `stage-pf-vdisplay.ps1` which also fetches/verifies pinned nefcon) -> stage FFmpeg DLLs + the
|
||||||
|
web console + a portable bun -> build + sign the HDR Vulkan layer -> run `ISCC` on `punktfunk-host.iss`
|
||||||
|
-> sign `setup.exe`.
|
||||||
|
|
||||||
|
`punktfunk-host.iss` (Inno) lays down `{app}`, runs the install steps, and registers things. **Optional
|
||||||
|
tasks** (all default-checked): install the pf-vdisplay driver, install the gamepad drivers, install the
|
||||||
|
HDR Vulkan layer, start the service. Silent install: `/VERYSILENT` (omit a task with
|
||||||
|
`/MERGETASKS="!installdriver"`).
|
||||||
|
|
||||||
|
Install-time work runs from `punktfunk-host.exe` subcommands, **not** locale-parsed PowerShell *files* -
|
||||||
|
the `[Run]` section calls `driver install [--gamepad] --dir <stage>` and `web setup --app-dir <app>
|
||||||
|
[--password-file <f>]` (`crates/punktfunk-host/src/windows/install.rs`). This is the ANSI-codepage
|
||||||
|
root fix: PowerShell 5.1 reads a BOM-less `.ps1` *file* in the machine codepage, so a stray non-ASCII
|
||||||
|
byte aborted the install on a non-English box; a compiled subcommand drives the same external tools as
|
||||||
|
fixed string literals (the `service install` precedent, see [`windows-service.md`](windows-service.md)).
|
||||||
|
The `.iss`'s *inline* `-Command` PowerShell is a command-line string, not a file read, so it's unaffected
|
||||||
|
and stays. Each subcommand is best-effort (a hiccup warns, never aborts the installer):
|
||||||
|
|
||||||
|
- **Driver install:** trust the bundled `.cer` (Root + TrustedPublisher), create the `root\pf_vdisplay`
|
||||||
|
node if absent (nefconc, gated so a re-create can't spawn a phantom), `pnputil /add-driver /install`
|
||||||
|
(pf-vdisplay) or `pnputil /add-driver` per-inf (gamepads - the host SwDeviceCreate's the devnodes).
|
||||||
|
A driver hiccup never aborts the install (the host degrades to a physical display).
|
||||||
|
- **Web console (`web setup`):** write the ACL'd `web-password`, register the `PunktfunkWeb` task (boot,
|
||||||
|
SYSTEM, restart-on-failure -> `bun` on `:3000`, via a generated UTF-16 Task Scheduler XML), open TCP
|
||||||
|
3000, start it. Upgrade-safe: stop + reap any old console (by the `:3000` listener owner, runtime-
|
||||||
|
agnostic - identified by the wildcard foreign address, so the localized state word is never parsed)
|
||||||
|
before re-registering so the new one can bind.
|
||||||
|
|
||||||
|
**Signing:** the exe/setup/HDR-layer use the **`MSIX_CERT_PFX_B64`/`MSIX_CERT_PASSWORD`** secrets
|
||||||
|
(`CN=unom`, shared with the client); the **drivers** use a separate cert (self-signed per build, or a
|
||||||
|
stable `DRIVER_CERT_PFX_B64`) and their own bundled `.cer` - the two never collide. Without the MSIX
|
||||||
|
secrets, an ephemeral self-signed cert is generated and its `.cer` published next to the installer.
|
||||||
|
|
||||||
|
## 7. The web console bundle
|
||||||
|
|
||||||
|
The console is a TanStack Start / Nitro SSR app (`web/`). `vite.config.ts` sets `noExternals: true`, so
|
||||||
|
`bun run build` emits a **self-contained `.output`** (~75 files, deps bundled + tree-shaken, no
|
||||||
|
`node_modules`/`.npmrc`). The installer ships that `.output` + a portable `bun.exe`; the `PunktfunkWeb`
|
||||||
|
task runs `bun .output/server/index.mjs` on `:3000`, auto-wired to the host's loopback mgmt API via
|
||||||
|
`web-run.cmd` (sources `%ProgramData%\punktfunk\mgmt-token` + `web-password`). No node, no node_modules
|
||||||
|
forest. (`build-web.ps1` is the dev-box rebuild-and-restart helper.)
|
||||||
|
|
||||||
|
## 8. CI workflows (`.gitea/workflows/`)
|
||||||
|
|
||||||
|
All run on the single self-hosted `windows-amd64` runner (`home-windows-1`), which **serializes** the
|
||||||
|
whole Windows fleet - a `Cargo.lock`/`packaging/windows/**` touch queues several builds back-to-back.
|
||||||
|
|
||||||
|
| Workflow | Trigger | Does |
|
||||||
|
|----------|---------|------|
|
||||||
|
| `windows-host.yml` | `crates/punktfunk-host`, `packaging/windows`, `scripts/windows`, `web`, tags `v*` | build host + clippy + HDR layer + web smoke-boot -> pack + sign installer -> publish (canary/latest) |
|
||||||
|
| `windows-drivers.yml` | `packaging/windows/drivers`, `crates/pf-driver-proto` | probe the driver toolchain + build/test/clippy `pf-driver-proto` + `cargo build` the driver workspace + inspect FORCE_INTEGRITY (the fast driver-only gate; coverage the pack lacks) |
|
||||||
|
| `windows-drivers-provision.yml` | `provision-windows-wdk.ps1` | one-shot WDK + cargo-wdk provisioning onto the persistent runner |
|
||||||
|
| `windows.yml` / `windows-msix.yml` | client | build the Windows *client* + its signed MSIX (x64 + ARM64) |
|
||||||
|
|
||||||
|
`windows-host.yml` also builds the drivers from source (in pack), so it overlaps `windows-drivers.yml` on
|
||||||
|
a `drivers/**` edit (two driver builds on the serialized runner). They're kept separate on purpose -
|
||||||
|
`windows-drivers.yml` is the fast pre-pack gate. **CI builds, never launches the exe** (no GPU on the
|
||||||
|
runner), so AMF/QSV + on-glass behavior are validated on a real box, not in CI.
|
||||||
|
|
||||||
|
## 9. Dev iteration
|
||||||
|
|
||||||
|
- **Host:** `scripts/windows/deploy-host.ps1` (build + redeploy the exe to a box), `build-web.ps1`
|
||||||
|
(rebuild + restart the console).
|
||||||
|
- **pf-vdisplay driver:** `packaging/windows/drivers/deploy-dev.ps1` (build -> clear FORCE_INTEGRITY ->
|
||||||
|
sign -> stampinf a strictly-increasing `DriverVer` -> Inf2Cat -> sign -> `-Install`);
|
||||||
|
`redeploy-pf-vdisplay.ps1` (one-shot: stop host -> install -> reload adapter -> start);
|
||||||
|
`reset-pf-vdisplay.ps1` (recover a wedged driver: reap ghost monitor nodes + cycle the adapter, no
|
||||||
|
reboot). Run elevated; default to the `PunktfunkHost` service.
|
||||||
|
- Drive any of these from Linux over SSH:
|
||||||
|
`ssh user@box 'powershell -ExecutionPolicy Bypass -File C:\...\reset-pf-vdisplay.ps1'`.
|
||||||
|
- The RTX/on-glass box is where NVENC encode + IDD-push frame flow are validated (CI can't).
|
||||||
|
|
||||||
|
## 10. Release
|
||||||
|
|
||||||
|
Push a `vX.Y.Z` tag (one tag releases every platform): `windows-host.yml` builds + signs
|
||||||
|
`punktfunk-host-setup-X.Y.Z.exe` + the public `.cer`, refreshes the `latest/` alias, and attaches them to
|
||||||
|
the unified Gitea Release. Main pushes publish rolling `0.3.<run>` **canary** builds to `canary/`.
|
||||||
|
Download: `https://git.unom.io/api/packages/unom/generic/punktfunk-host-windows/{latest,canary}/punktfunk-host-setup.exe`.
|
||||||
|
|
||||||
|
## 11. See also
|
||||||
|
|
||||||
|
- [`windows-host-rewrite.md`](windows-host-rewrite.md) - host architecture (capture/encode/vdisplay
|
||||||
|
backends, IDD-push, the rewrite milestones). The architecture source of truth.
|
||||||
|
- [`windows-service.md`](windows-service.md) - the SYSTEM service + secure-desktop deployment model.
|
||||||
|
- [`windows-virtual-display-rust-port.md`](windows-virtual-display-rust-port.md) - history of the all-Rust
|
||||||
|
IddCx driver port (SUPERSEDED in its conclusion: IDD-push became the primary capture path).
|
||||||
|
- `packaging/windows/pf-vkhdr-layer/README.md` - the HDR Vulkan layer.
|
||||||
|
- `packaging/windows/README.md` - the file index for `packaging/windows/`.
|
||||||
@@ -0,0 +1,106 @@
|
|||||||
|
# Windows native client — bootstrap handoff
|
||||||
|
|
||||||
|
> **Status:** SHIPPED — `clients/windows` (binary `punktfunk-client`), WinUI 3 via `windows-reactor`;
|
||||||
|
> commits `0a3b92d..0cc36fa`. Build + clippy + fmt green on `x86_64-pc-windows-msvc` and
|
||||||
|
> `aarch64-pc-windows-msvc` (ARM64 cross-compiled off the one x64 runner; signed MSIX for both arches
|
||||||
|
> via `windows-msix.yml`). This doc is trimmed to design rationale, the HDR reference, hard-won
|
||||||
|
> gotchas, and open items. The shipped source under `clients/windows/src/` is the truth.
|
||||||
|
|
||||||
|
The native Windows punktfunk/1 client connects to a host (`serve` / `punktfunk1-host`), decodes HEVC,
|
||||||
|
presents it low-latency on a `SwapChainPanel`, plays Opus audio, and captures local
|
||||||
|
mouse/keyboard/gamepad to send back — the Windows analogue of the GTK4 Linux client
|
||||||
|
(`clients/linux`), which was the architectural template. The locked decisions below are the durable
|
||||||
|
"why"; the HDR section is the evergreen present reference.
|
||||||
|
|
||||||
|
## Locked decisions (the "why")
|
||||||
|
|
||||||
|
- **Pure Rust.** `windows-rs` + **Windows App SDK "Reactor"** (WinUI 3 from Rust, merged windows-rs
|
||||||
|
PR #4479). No C++/C#. Reactor + `SwapChainPanel` was the only novel/uncertain piece and was
|
||||||
|
de-risked first; everything else is a known-good port of the Linux client.
|
||||||
|
- Reactor is viable: windows-rs [PR #4499](https://github.com/microsoft/windows-rs/pull/4499)
|
||||||
|
(merged 2026-06-01) added a `SwapChainPanel` widget to `windows-reactor` with `set_swap_chain`
|
||||||
|
over `CreateSwapChainForComposition`, so a DXGI presenter *can* be hosted. (An earlier read that
|
||||||
|
Reactor had no swapchain hatch was wrong/stale.) The UI is a declarative React-like tree
|
||||||
|
(`App::new().render(app)`, `use_state`/`use_resource`/`use_effect` hooks); the video page is
|
||||||
|
`swap_chain_panel().on_ready(|p| p.set_swap_chain(&sc))` driven by `on_rendering`.
|
||||||
|
- **Links `punktfunk-core` directly** (Cargo path dep, `features = ["quic"]`) — **no C ABI**, exactly
|
||||||
|
like the GTK client, *unlike* the Apple path. `NativeClient` is already `Sync` (mutexed plane
|
||||||
|
receivers), so it drops into a UI app cleanly. The C ABI (`punktfunk_connect` + `next_au`/
|
||||||
|
`next_audio`/`next_rumble`/`next_hidout`/`send_input`/`send_rich_input`) is the *Apple* path; the
|
||||||
|
native Rust clients call `crates/punktfunk-core/src/client.rs` (`NativeClient`) methods directly.
|
||||||
|
- **Video widget = WinUI 3 `SwapChainPanel`** (built-in), fed a D3D11 swapchain via
|
||||||
|
`ISwapChainPanelNative::SetSwapChain`. `present.rs` owns the D3D11 composition swapchain (WARP
|
||||||
|
fallback, runtime shaders, Contain-fit) — the same renderer, bound to the panel instead of an HWND.
|
||||||
|
- **Decode = FFmpeg-next + D3D11VA** (HEVC; **Main10** for 10-bit/HDR — see below).
|
||||||
|
- **Audio playback = WASAPI render** + Opus decode (`opus` crate, vendors libopus via cmake).
|
||||||
|
- **Input capture→send**: the client captures LOCAL input and sends it. Mouse (abs + relative) +
|
||||||
|
keyboard via the **inverse VK table** (Windows VK is the native source, so simpler than Linux);
|
||||||
|
gamepad via **SDL3** (already a workspace dep, cross-platform) → `NativeClient::send_input`/
|
||||||
|
`send_rich_input`. (`SendInput`/`ViGEm` are HOST-side injection — not used by the client.)
|
||||||
|
- **Stream input is Win32 low-level hooks**, not XAML: reactor exposes only keyboard *accelerators*
|
||||||
|
+ pointer *button-state* (no raw key-down/up, no pointer position, no wheel), insufficient for a
|
||||||
|
game stream. `input.rs` installs `WH_KEYBOARD_LL`/`WH_MOUSE_LL` on the stream page (uninstalled
|
||||||
|
on exit), maps the pointer through the window client rect, sends native VK + abs mouse + wheel,
|
||||||
|
with a Ctrl+Alt+Shift+Q capture toggle. (A future alternative: generate
|
||||||
|
`Microsoft.UI.Xaml.UIElement` bindings from the staged winmd and subscribe to `KeyDown`/
|
||||||
|
`PointerMoved` — scoped to the panel.)
|
||||||
|
- **Discovery = `mdns-sd`** (cross-platform, browses `_punktfunk._udp`).
|
||||||
|
- **Trust = shared client identity + SPAKE2 PIN pairing + TOFU** (`trust.rs`; same identity
|
||||||
|
files/logic as the other native clients).
|
||||||
|
|
||||||
|
## 10-bit + HDR (the present reference)
|
||||||
|
|
||||||
|
The host negotiates and emits **HEVC Main10 + BT.2020 PQ HDR10** when the captured desktop is HDR
|
||||||
|
(and 10-bit SDR Main10 when negotiated). The Windows client mirrors the Apple present:
|
||||||
|
|
||||||
|
- **Advertise caps** in the `Hello`: `video_caps = VIDEO_CAP_10BIT | VIDEO_CAP_HDR`
|
||||||
|
(`crates/punktfunk-core/src/quic.rs`). The host enables 10-bit only if the client advertised it.
|
||||||
|
- **Detect HDR in-band** from the HEVC VUI (transfer characteristics = SMPTE ST 2084 / PQ), exactly
|
||||||
|
like the Apple client's `VideoDecoder.isHDRFormat` (`clients/apple/Sources/PunktfunkKit/`). This
|
||||||
|
handles a mid-session HDR toggle without renegotiation. `Welcome.bit_depth` (8/10) is also
|
||||||
|
available.
|
||||||
|
- **Decode** Main10 → **P010** (10-bit) via D3D11VA.
|
||||||
|
- **Present HDR**: swapchain in `DXGI_FORMAT_R10G10B10A2_UNORM` (or `R16G16B16A16_FLOAT`),
|
||||||
|
`IDXGISwapChain3::SetColorSpace1(DXGI_COLOR_SPACE_RGB_FULL_G2084_NONE_P2020)` + `SetHDRMetaData`
|
||||||
|
for HDR10; the host's stream is BT.2020 PQ, so present PQ. For SDR, the existing
|
||||||
|
`DXGI_FORMAT_B8G8R8A8_UNORM` + BT.709 path. (The host-side HDR conversion math is in
|
||||||
|
`crates/punktfunk-host/src/capture/dxgi.rs` `HDR_PS`/`HdrConverter` if you need the inverse.)
|
||||||
|
|
||||||
|
## Build gotchas (hard-won)
|
||||||
|
|
||||||
|
- **`CARGO_HOME` must be an ASCII path** (`C:\Users\Public\.cargo`). SDL3's `build-from-source` PCH
|
||||||
|
embeds the registry source path; the `ü` in the dev box's username makes MSVC fail (`MSB8084` /
|
||||||
|
`C4828`). Build under an ASCII path generally (the same `ü` triggers `LNK1201` PDB-write failures
|
||||||
|
under `~/Developer`).
|
||||||
|
- **`CMAKE_POLICY_VERSION_MINIMUM=3.5`** in the build env (CMake 4 rejects libopus's old minimum).
|
||||||
|
- **Toolchain:** `winget install NASM.NASM Kitware.CMake LLVM.LLVM` (NASM for aws-lc-rs on the quic
|
||||||
|
path; libclang/LLVM for ffmpeg-sys).
|
||||||
|
- **windows-reactor is unpublished** (`version 0.0.0`) and fast-moving — depend on it as a **git dep
|
||||||
|
pinned to a commit** (`b4129fcc`), and pin the `windows` crate to the **same commit** so the
|
||||||
|
`IDXGISwapChain1` you pass to `set_swap_chain` satisfies reactor's `windows_core::Interface`. Its
|
||||||
|
`build.rs` downloads the Windows App SDK NuGets (Foundation/Interactive/Runtime), stages the
|
||||||
|
bootstrap DLL + `resources.pri` next to the exe, and **`.unwrap()`s `CARGO_WORKSPACE_DIR`** — set
|
||||||
|
it in the build env (`CARGO_WORKSPACE_DIR=C:\Users\Public\punktfunk`). It writes `/temp` + `/winmd`
|
||||||
|
to the workspace root (gitignored). The App SDK runtime must be installed to *run*.
|
||||||
|
- **Windows clippy is stricter** than Linux CI and `cfg(windows)` code is excluded from Linux CI →
|
||||||
|
run `cargo clippy -p punktfunk-client-windows -- -D warnings` ON the Windows box before committing.
|
||||||
|
|
||||||
|
## Open items
|
||||||
|
|
||||||
|
- **On-glass validation on the RTX box** of **D3D11VA hardware decode** + **10-bit/HDR present** +
|
||||||
|
the **WinUI GUI** — the dev VM is headless / SSH Session 0 / WARP, so the WinUI window can't show
|
||||||
|
there and there is no hardware decode. Validate over RDP or on the RTX box against a live HDR host.
|
||||||
|
- **RAWINPUT relative-mouse pointer-lock** for the stream view.
|
||||||
|
- **Per-host speed-test widget** in the UI.
|
||||||
|
|
||||||
|
## Key references
|
||||||
|
|
||||||
|
- **Full Windows plan + SudoVDA/host details:** `design/windows-host.md`.
|
||||||
|
- **Template ported from:** `clients/linux/src/*`.
|
||||||
|
- **Apple HDR present** (the pattern mirrored): `clients/apple/Sources/PunktfunkKit/{VideoDecoder,
|
||||||
|
MetalVideoPresenter,Stage2Pipeline}.swift` — in-band PQ detection, P010 decode, EDR present.
|
||||||
|
- **Core client API:** `crates/punktfunk-core/src/client.rs` (`NativeClient`).
|
||||||
|
- **Protocol:** `crates/punktfunk-core/src/quic.rs` (`Hello.video_caps`, `Welcome.bit_depth`,
|
||||||
|
`VIDEO_CAP_10BIT`/`VIDEO_CAP_HDR`).
|
||||||
|
- **Host HDR conversion (inverse math):** `crates/punktfunk-host/src/capture/dxgi.rs` (`HDR_PS`,
|
||||||
|
`HdrConverter`) + `crates/punktfunk-host/src/encode/nvenc.rs` (BT.2020/PQ VUI).
|
||||||
@@ -0,0 +1,143 @@
|
|||||||
|
# Windows virtual DualSense — game detection handoff
|
||||||
|
|
||||||
|
> **Status:** Identity fix SHIPPED (commits `6db3525`, `aa159df`, `4a73102`) —
|
||||||
|
> `crates/punktfunk-host/src/inject/windows/dualsense_windows.rs` (`create_swdevice`). This doc is trimmed
|
||||||
|
> to the root-cause analysis, the SwDeviceCreate identity rationale, the GameInput fallback design, and the
|
||||||
|
> still-open on-glass Cyberpunk verification. The implementation walkthrough, probe tooling, and the
|
||||||
|
> (now-fixed) secondary driver gaps are cut — the shipped code is the source of truth.
|
||||||
|
|
||||||
|
Goal: get the host's virtual DualSense **detected and usable in games** (Cyberpunk's native PS5 path +
|
||||||
|
others) on the Windows host. Run the decisive experiments **on the interactive desktop of the Windows host**
|
||||||
|
(`.173`) — not over SSH.
|
||||||
|
|
||||||
|
## Where it works / where it doesn't
|
||||||
|
|
||||||
|
- **Input works.** Client → host → virtual DualSense → games read input (verified in Steam's controller
|
||||||
|
test).
|
||||||
|
- **The HID is a CORRECT, COMPLETE DualSense.** SDL3 reports the live device as
|
||||||
|
`name='DualSense Wireless Controller' vid=0x054C pid=0x0CE6 isGamepad=True gamepadType=PS5`. SDL = HIDAPI =
|
||||||
|
what Steam (and many games) build on → that's why Steam works. This is **not** a descriptor/feature-report
|
||||||
|
problem.
|
||||||
|
- **Cyberpunk's native DualSense path does NOT detect it at all** (Steam Input was off — Cyberpunk was
|
||||||
|
reading the raw HID). This is the problem the identity fix targets; on-glass confirmation is still open.
|
||||||
|
|
||||||
|
## Root cause — the PnP identity, not the HID descriptor (CONFIRMED, run live in console session 3)
|
||||||
|
|
||||||
|
The break is the device's **PnP identity / device-interface path**, not the HID descriptor or feature
|
||||||
|
reports. `hidclass` derives the HID child's path token and its `HID\VID_054C&PID_0CE6` hardware-ids from the
|
||||||
|
**parent bus device's hardware-id**. Our parent is the software (SWD) devnode `SWD\PUNKTFUNK\PF_PAD_0` whose
|
||||||
|
hardware-id is `pf_dualsense` (no VID/PID), so hidclass emits only the *VendorID+usage* fallback and **no
|
||||||
|
PID**. Measured on this box (one virtual pad live + one real 8BitDo present):
|
||||||
|
|
||||||
|
HID-child hardware-ids (`DEVPKEY_Device_HardwareIds`, CompatibleIds empty):
|
||||||
|
`HID\pf_dualsense` · `HID\VID_054C&UP:0001_U:0005` · `HID_DEVICE_SYSTEM_GAME` · `HID_DEVICE_UP:0001_U:0005`
|
||||||
|
· `HID_DEVICE` — **note the absent `HID\VID_054C&PID_0CE6`.** `HIDD_ATTRIBUTES` itself is correct (VID 054C
|
||||||
|
/ PID 0CE6), which is why attribute-readers work.
|
||||||
|
|
||||||
|
Device-interface paths (from `HKLM\SYSTEM\CurrentControlSet\Control\DeviceClasses\{4d1e55b2-…}`):
|
||||||
|
|
||||||
|
| Device | HID interface path |
|
||||||
|
| --- | --- |
|
||||||
|
| **Ours (virtual)** | `\\?\HID#punktfunk#1&ca418da&0&0000#{…}` — **no `VID_/PID_` token** |
|
||||||
|
| Real DualShock 4 (USB, registry remnant) | `\\?\HID#VID_054C&PID_05C4&REV_0100#…` |
|
||||||
|
| Real DualSense (BT, registry remnant) | `\\?\HID#{00001124-…}_VID&0002054c_PID&0ce6#…` |
|
||||||
|
|
||||||
|
**Cross-API enumeration matrix (the decisive experiment — impossible over SSH, run live in the console):**
|
||||||
|
|
||||||
|
| API | Sees our virtual DS5? | Identity reported | Reads from |
|
||||||
|
| --- | --- | --- | --- |
|
||||||
|
| SDL3 / HIDAPI | ✅ | 054C:0CE6, type=PS5 | `HIDD_ATTRIBUTES` → Steam works |
|
||||||
|
| RawInput | ✅ | 054C:0CE6 | `HIDD_ATTRIBUTES` |
|
||||||
|
| WGI `RawGameController` | ✅ | 054C:0CE6 | `HIDD_ATTRIBUTES` |
|
||||||
|
| WGI `Gamepad` | ❌ empty | — | (empty for *all* pads on this box — no Xbox-profile pad; not DS-specific) |
|
||||||
|
| **MS GameInput** | ✅ enumerates it | **vid=0x0000 pid=0x0000** | **PnP path / hardware-ids** |
|
||||||
|
| Cyberpunk native PS5 | ❌ | — | needs the DS5 VID/PID identity |
|
||||||
|
|
||||||
|
The GameInput result is the clincher: it **does** enumerate our pad — descriptor fingerprint matches exactly
|
||||||
|
(15 buttons, 6 axes, 1 hat, usage Game Pad 0x05) — but reports **vid/pid = 0**, while it reads the real
|
||||||
|
8BitDo's `vid=0x3434` correctly. So GameInput (and, by the same logic, a native PS5 path) takes VID/PID from
|
||||||
|
the **PnP device path / hardware-ids, NOT from `HIDD_ATTRIBUTES`**. Everything that reads attributes directly
|
||||||
|
(SDL / RawInput / WGI-raw) is fine; everything that keys off the device *identity/path* (GameInput, native
|
||||||
|
DualSense detection) sees a generic, unidentified gamepad → no PS5 path.
|
||||||
|
|
||||||
|
**⇒ The fix must put `VID_054C&PID_0CE6` into the device-interface path and the `HID\VID&PID` hardware-ids**
|
||||||
|
(give the device a real-USB-like PnP identity), not merely correct `HIDD_ATTRIBUTES`.
|
||||||
|
|
||||||
|
## The fix — SwDeviceCreate identity (shipped)
|
||||||
|
|
||||||
|
`create_swdevice` sets the identity via **`SW_DEVICE_CREATE_INFO` struct fields** (NOT `pProperties` — a
|
||||||
|
`DEVPROPERTY` write of these PnP-owned identity keys is empirically *ignored*; the create-time struct fields
|
||||||
|
are the supported lever, confirmed on `.173`):
|
||||||
|
|
||||||
|
- **`pszzCompatibleIds`** = `USB\VID_054C&PID_0CE6`, `USB\Class_03&SubClass_00&Prot_00`, `USB\Class_03`
|
||||||
|
(Windows appends `SWD\Generic`). HIDAPI/SDL/libScePad walk HID-child → `CM_Get_Parent` → this parent's
|
||||||
|
CompatibleIds and string-match `"USB"` → **`bus_type` now resolves to USB** (was UNKNOWN).
|
||||||
|
- **`pszzHardwareIds`** = `pf_dualsense` **first** (so the INF still binds our UMDF driver), then
|
||||||
|
`USB\VID_054C&PID_0CE6&REV_0100`, `USB\VID_054C&PID_0CE6`. hidclass then derives the real-DS5 child ids
|
||||||
|
**`HID\VID_054C&PID_0CE6[&REV_0100]`** (previously only `HID\VID_054C&UP:0001_U:0005`).
|
||||||
|
- **`pContainerId`** = a deterministic per-pad GUID `{50464453-0000-0000-0000-00000000000<idx>}` ("PFDS")
|
||||||
|
— avoids the null-sentinel-ContainerId `xinput1_4` slot-skip bug, and groups the pad's devnodes.
|
||||||
|
|
||||||
|
**Validated live** (real shipping path, `dualsense-windows-test --index 1` alongside the running service's
|
||||||
|
pad 0): INF still binds (`Service=MsHidUmdf`), parent CompatibleIds/HardwareIds + per-pad ContainerId set,
|
||||||
|
the HID child gains `HID\VID_054C&PID_0CE6`, and the HIDAPI parent-walk reports **bus_type=USB**. SDL /
|
||||||
|
RawInput / WGI `RawGameController` identity stays correct (054C:0CE6).
|
||||||
|
|
||||||
|
**Why this may still not satisfy GameInput / a native PS5 path:** GameInput parses VID/PID from the HID
|
||||||
|
child's **instance path** (`HID\punktfunk\1&…`), which carries no `VID_…&PID_…` token; neither CompatibleIds
|
||||||
|
nor HardwareIds change the instance path. Only a real USB-bus instance path (`HID\VID_054C&PID_0CE6\…`) does —
|
||||||
|
i.e. a **ViGEm-style KMDF USB-emulating bus driver** (see fallback below). Prior art (HIDMaestro) shows pure
|
||||||
|
user-mode pads ARE accepted by WGI/GameInput, so other parity (descriptor / strings / mapping) may matter
|
||||||
|
more than a genuine USB bus.
|
||||||
|
|
||||||
|
## GameInput fallback design (rank-3, only if needed)
|
||||||
|
|
||||||
|
If a target title uses **GameInput** AND the shipped identity fix above doesn't satisfy it, the last-resort
|
||||||
|
option is a **rank-3 KMDF USB-emulating bus driver** (the way ViGEmBus presents a real-looking device)
|
||||||
|
instead of SwDeviceCreate + UMDF-HID — it produces a genuine `HID\VID_054C&PID_0CE6\…` instance path, the one
|
||||||
|
thing GameInput keys off. Pursue this only if required by a target title; it is heavier than the user-mode
|
||||||
|
path and HIDMaestro suggests user-mode pads can be made acceptable to GameInput without it.
|
||||||
|
|
||||||
|
## On-glass Cyberpunk verification procedure (open — only the user can run it)
|
||||||
|
|
||||||
|
Must run on the interactive desktop (RDP in or run locally) — WGI / RawInput / GameInput enumeration returns
|
||||||
|
**empty from a headless SSH session** (no window/message pump); only HIDAPI works headless.
|
||||||
|
|
||||||
|
1. Free the service's pad 0 so only the new-identity pad is present:
|
||||||
|
`sc stop PunktfunkHost`
|
||||||
|
2. Spawn a single virtual DS5 carrying the new identity (cycles Cross/stick so input is visible):
|
||||||
|
`target\debug\punktfunk-host.exe dualsense-windows-test --index 0 --seconds 600`
|
||||||
|
3. Launch **Cyberpunk 2077 with Steam Input OFF** (so the game reads the raw HID). Check the in-game
|
||||||
|
glyphs/prompt **switch to DualSense**.
|
||||||
|
4. Restore the service afterward: redeploy the release + restart with `scripts\windows\deploy-host.ps1`.
|
||||||
|
|
||||||
|
## Key code
|
||||||
|
|
||||||
|
| What | File |
|
||||||
|
| --- | --- |
|
||||||
|
| Host backend (`create_swdevice`, the `Global\pfds-shm-<idx>` section, write_state/service/pump) | `crates/punktfunk-host/src/inject/windows/dualsense_windows.rs` |
|
||||||
|
| UMDF driver (HID descriptor, feature reports, `on_output_report`) | `packaging/windows/drivers/pf-dualsense/src/lib.rs` |
|
||||||
|
| Shared report codec (`serialize_state` input, `parse_ds_output` feedback) | `crates/punktfunk-host/src/inject/proto/dualsense_proto.rs` |
|
||||||
|
| Pad seam (`PadBackend`, `pump` → rumble `0xCA` / hidout `0xCD`) | `crates/punktfunk-host/src/punktfunk1.rs` |
|
||||||
|
|
||||||
|
## Open items
|
||||||
|
|
||||||
|
1. **Decisive on-glass Cyberpunk test — pending execution.** Launch Cyberpunk 2077 with Steam Input OFF
|
||||||
|
against a virtual DS5 carrying the new identity; verify the in-game glyphs switch to DualSense (procedure
|
||||||
|
above).
|
||||||
|
2. **GameInput rank-3 KMDF USB-emulating bus-driver fallback — optional.** Only if a GameInput-only title
|
||||||
|
needs the real VID/PID and the shipped SwDeviceCreate identity fix doesn't satisfy it.
|
||||||
|
|
||||||
|
## Facts proven (don't re-litigate)
|
||||||
|
|
||||||
|
- `SwDeviceCreate` requirements: enumerator must have **no underscore** (`punktfunk`); the completion
|
||||||
|
**callback is mandatory** (NULL → E_INVALIDARG). Per-session device works; auto-removed on disconnect.
|
||||||
|
- The identity keys must be set via the **`SW_DEVICE_CREATE_INFO` struct fields**, not `pProperties` — a
|
||||||
|
`DEVPROPERTY` write of the PnP-owned identity keys is ignored.
|
||||||
|
- HID descriptor + feature reports are DS5-accurate enough that **SDL identifies it as PS5**.
|
||||||
|
- The `IOCTL_HID_GET_STRING` and `DS_FEATURE_CALIBRATION` (42 → 41 bytes) driver gaps were fixed + shipped;
|
||||||
|
the driver answers `HidD_GetManufacturer/Product/SerialNumberString` with distinct strings. (Detail in
|
||||||
|
`packaging/windows/drivers/pf-dualsense/src/lib.rs`.)
|
||||||
|
- Host-side rumble works end to end (driver captures the game's `0x02`, `parse_ds_output` extracts the
|
||||||
|
motors, host forwards `0xCA`); the client (macOS) rendering of `0xCA` onto the physical pad is a separate
|
||||||
|
open bug, not part of game detection.
|
||||||
@@ -0,0 +1,157 @@
|
|||||||
|
# Windows host — virtual DualSense scoping
|
||||||
|
|
||||||
|
> **Status:** SHIPPED — M0 feasibility gate PASSED (2026-06-21), M1–M4 landed. Driver:
|
||||||
|
> `packaging/windows/drivers/pf-dualsense/` (README there); host backend
|
||||||
|
> `crates/punktfunk-host/src/inject/dualsense_windows.rs` + shared contract
|
||||||
|
> `inject/dualsense_proto.rs`. Commits `aa159df` (Rust UMDF driver + shm channel),
|
||||||
|
> `4a73102` (host backend), `fde438a`/`6db3525` (SwDeviceCreate per-session devnode),
|
||||||
|
> `b0c8233` (pure-user-mode DS4/Xbox 360, ViGEm dropped). This doc is trimmed to design
|
||||||
|
> rationale + open items; implementation detail lives in the code and the driver README.
|
||||||
|
|
||||||
|
## Why UMDF2, and why a real virtual DualSense (the WHY)
|
||||||
|
|
||||||
|
Apollo's backlog "DS4 ViGEm target on Windows" is the **wrong target** for *actual DualSense*.
|
||||||
|
ViGEmBus emulates only **Xbox 360 (XUSB)** and **DualShock 4** — never a DualSense. Because this
|
||||||
|
is a *host-side* virtual pad, the DualSense-defining features (adaptive triggers, the fine haptic
|
||||||
|
actuators, DS5 identity) only work end-to-end if the **game sees a real DualSense** and therefore
|
||||||
|
drives them. A DS4 virtual pad makes the game take its DS4 code path and never emit those commands,
|
||||||
|
so the client's adaptive-trigger rendering is never exercised. **ViGEm DS4 structurally cannot
|
||||||
|
deliver adaptive triggers** — that ceiling is the whole reason not to copy Apollo here (and Apollo
|
||||||
|
itself does DualSense only on Linux via `inputtino`; its Windows path is ViGEm `XUSB`/`DS4_REPORT_EX`
|
||||||
|
only — zero virtual-HID/DualSense code to vendor).
|
||||||
|
|
||||||
|
The right path is the Windows analog of the Linux host's `/dev/uhid` device: present a **real virtual
|
||||||
|
DualSense HID device** (Sony VID `054C` / PID `0CE6`, the inputtino PS5 report descriptor we already
|
||||||
|
ship) so the game/Steam/GameInput bind it as genuine.
|
||||||
|
|
||||||
|
**Mechanism = a UMDF2 (user-mode) HID minidriver**, created/torn-down per session via
|
||||||
|
`SwDeviceCreate`, as a lower filter under the OS pass-through driver `mshidumdf.sys`. This is the
|
||||||
|
**same driver tier as SudoVDA** (UMDF, not kernel), so the existing vendor → sign → Inno-installer
|
||||||
|
machinery applies almost unchanged. Two corrections drove this conclusion over the 2026-06-20 draft:
|
||||||
|
|
||||||
|
- **VHF (Virtual HID Framework) supports a HID *source* driver only in kernel mode** — it is *not*
|
||||||
|
the mechanism for a user-mode virtual pad. The user-mode mechanism is a UMDF2 HID minidriver built
|
||||||
|
from the `vhidmini2` sample. So the earlier "KMDF, a higher bar than SudoVDA" framing was wrong:
|
||||||
|
it is the *same* UMDF tier.
|
||||||
|
- **UMDF 2.0 is NOT COM-based** (COM/`IDriverEntry`/`IWDFDriver` are legacy UMDF 1.x). UMDF 2.0 uses
|
||||||
|
the same **C-style WDF object model as KMDF** — a `DriverEntry` symbol + C function pointers, no
|
||||||
|
vtable. This is precisely why a Rust FFI implementation is even conceivable.
|
||||||
|
|
||||||
|
Everything except the host backend was already platform-agnostic and DualSense-complete (protocol
|
||||||
|
planes `0xCC`/`0xCA`/`0xCD`, the `HidOutput` feedback abstraction, pad-type negotiation, clients, the
|
||||||
|
C-ABI). The DualSense HID contract (the 232-byte `DUALSENSE_RDESC`, `serialize_state` for input report
|
||||||
|
`0x01`, `parse_ds_output` for output report `0x02`, the `0x05`/`0x09`/`0x20` feature blobs, USB framing
|
||||||
|
no-CRC) was already pure transport-independent Rust — so the report bytes are identical to Linux and
|
||||||
|
only the device-framing layer is new.
|
||||||
|
|
||||||
|
## Why Rust ("Option R") despite zero precedent
|
||||||
|
|
||||||
|
The user's strong preference was a **self-authored Rust driver**, accepted as pioneering risk.
|
||||||
|
`microsoft/windows-drivers-rs` officially targets UMDF and ships a real (but *bare-stub*) UMDF sample;
|
||||||
|
because UMDF 2.0 is the C function-pointer model, the FFI maps cleanly. The honest gap going in: the
|
||||||
|
whole HID-minidriver layer (`WdfFdoInitSetFilter`, the manual inverted-call queue, `IOCTL_UMDF_HID_*`
|
||||||
|
dispatch, `HID_XFER_PACKET`) was hand-written `unsafe` FFI with no safe wrappers, and **every** other
|
||||||
|
shipping virtual-HID controller driver (`vhidmini2`, HIDMaestro, DsHidMini) is C — so symbol coverage
|
||||||
|
for the UMDF target was unproven. The de-risk plan was a C `vhidmini2` shim fallback (keeping all
|
||||||
|
DualSense logic in the Rust host either way), with forking HIDMaestro as the last resort (rejected for
|
||||||
|
real use because **HIDMaestro omits adaptive triggers** — it cannot prove the one thing that makes a
|
||||||
|
virtual DualSense worth building).
|
||||||
|
|
||||||
|
**Outcome: Option R confirmed.** The M0 spike answered both the build-symbol question and the on-glass
|
||||||
|
gate with a Rust driver — no C shim needed. The DualSense *logic* stays in Rust where it already lived.
|
||||||
|
|
||||||
|
## M0 feasibility gate — PASSED (2026-06-21), and the three bugs
|
||||||
|
|
||||||
|
The blocking gate (RTX box `192.168.1.173`; the dev VM is headless/WARP and cannot validate
|
||||||
|
game-facing HID recognition) asked two questions no prior art settled:
|
||||||
|
|
||||||
|
1. **Recognition** — is a virtual `054C:0CE6` UMDF2 device accepted as a *genuine DualSense* by
|
||||||
|
`Windows.Gaming.Input` / GameInput / Steam? **YES** — Steam recognized it and drove its
|
||||||
|
DualSense-specific LEDs.
|
||||||
|
2. **Adaptive-trigger fidelity** — does the game's output report `0x02` (the adaptive-trigger block)
|
||||||
|
actually reach the driver's `WriteReport`/`SetOutputReport` callback? **YES** — captured two
|
||||||
|
Steam-Input output reports (`validFlag1=0x14` = LIGHTBAR|PLAYER_INDICATOR). Adaptive-trigger bytes
|
||||||
|
ride the same `0x02` path.
|
||||||
|
|
||||||
|
> **Three M0 bugs — reference for any future UMDF-in-Rust work:**
|
||||||
|
> 1. **PE FORCE_INTEGRITY blocks self-signed load.** `wdk-build`'s `/INTEGRITYCHECK` sets the PE
|
||||||
|
> FORCE_INTEGRITY bit, which demands a Microsoft-trusted signature to load. Fix: **clear bit `0x80`
|
||||||
|
> at offset PE+`0x5e` post-build and re-sign.** This was the load wall (earlier "Secure Boot blocks
|
||||||
|
> self-signed UMDF" conclusions were wrong).
|
||||||
|
> 2. **Timer `ExecutionLevel` must be `InheritFromParent`, not zeroed.** A `mem::zeroed`
|
||||||
|
> `WDF_TIMER_CONFIG` gives ExecutionLevel 0, which the framework rejects.
|
||||||
|
> 3. **Queue `NumberOfPresentedRequests` must be `u32::MAX`, not 0.** A zeroed parallel-queue config
|
||||||
|
> caps in-flight requests at 0 → `EvtIoDeviceControl` never fires.
|
||||||
|
|
||||||
|
## Milestones
|
||||||
|
|
||||||
|
| # | Milestone | State | Commit(s) |
|
||||||
|
|---|---|---|---|
|
||||||
|
| **M0** | Feasibility spike (Rust UMDF build + on-glass recognition + `0x02` callback) | ✅ SHIPPED | driver `aa159df` |
|
||||||
|
| **M1** | Extract transport-independent contract into `inject/dualsense_proto.rs` (`DUALSENSE_RDESC`, `serialize_state`, `parse_ds_output`, feature blobs; calibration trimmed 42→41) | ✅ SHIPPED | `4a73102` |
|
||||||
|
| **M2** | UMDF2 HID minidriver + INF + signed `.cat` (authored in **Rust**) | ✅ SHIPPED | `aa159df` |
|
||||||
|
| **M3** | Rust host bridge `inject/dualsense_windows.rs` (`DualSenseWindowsManager` over `Global\pfds-shm-<idx>`; `SwDeviceCreate` per-session devnode) | ✅ SHIPPED | `4a73102`, `fde438a`, `6db3525` |
|
||||||
|
| **M4** | Un-gate the `PadBackend::DualSense` seam + `GamepadPref::DualSense` resolution on Windows; ViGEm dropped (pure user-mode DS4/Xbox 360 too) | ✅ SHIPPED | `b0c8233` |
|
||||||
|
|
||||||
|
A `SwDeviceCreate` gotcha surfaced during M3 and is worth keeping: two `E_INVALIDARG` causes were found
|
||||||
|
— (1) an **underscore in the enumerator name** (`pf_dualsense` → must be `punktfunk`), and (2) passing
|
||||||
|
the completion callback was rejected; the INF lists both `root\pf_dualsense` (devgen) and `pf_dualsense`
|
||||||
|
(SwDevice) and the host falls back to an out-of-band devnode when per-session create fails.
|
||||||
|
|
||||||
|
## Decision matrix (condensed)
|
||||||
|
|
||||||
|
| Option | Adaptive triggers / DS5 identity | Effort | When it's right |
|
||||||
|
|---|---|---|---|
|
||||||
|
| **A. UMDF2 virtual DualSense** (shipped) | ✅ full | medium — UMDF, same tier as SudoVDA | the goal — matches the Linux host |
|
||||||
|
| **B. ViGEm DS4** | ❌ never (DS4 ceiling) | small | quick PS-pad, no adaptive triggers — **rejected, ViGEm removed** |
|
||||||
|
| **C. Hybrid** | A for DS5, Xbox 360 fallback | A + small | belt-and-suspenders (Xbox 360/XInput still covers most games) |
|
||||||
|
| **D. Defer** | — | — | would have applied only if the M0 `0x02` gate had failed |
|
||||||
|
|
||||||
|
Xbox 360 (XInput) covers most Windows games regardless; Xbox One/Series fold into it on Windows.
|
||||||
|
|
||||||
|
## Risk register (condensed)
|
||||||
|
|
||||||
|
| Risk | Status |
|
||||||
|
|---|---|
|
||||||
|
| Output `0x02` never reaches the driver write callback (fatal to value prop) | **resolved** — M0 measured it directly, YES |
|
||||||
|
| `054C:0CE6` not accepted as a real DualSense | **resolved** — Steam recognizes it |
|
||||||
|
| Rust UMDF pioneering risk (no safe WDF/HID wrappers; symbol coverage) | **resolved** — Rust driver shipped, no C shim |
|
||||||
|
| `SwDeviceCreate` device lifetime tied to host process handle | accepted — hold `HSWDEVICE` for the session (matches Linux UHID fd semantics) |
|
||||||
|
| `windows-drivers-rs` transient toolchain breaks (LLVM-22 bindgen, Disc. #591) | low — pin LLVM 21.1.2 |
|
||||||
|
| EV cert + Partner Center attestation lead time / friction | **open** (see below) |
|
||||||
|
|
||||||
|
## Open items
|
||||||
|
|
||||||
|
1. **Public-distribution signing — EV cert + Microsoft attestation.** The fleet/self-signed recipe
|
||||||
|
(bundled `.cer` → machine Root + TrustedPublisher via `certutil -addstore -f`, then `pnputil
|
||||||
|
/add-driver /install`, cloned from `install-sudovda.ps1`) works for dev/internal boxes only —
|
||||||
|
Microsoft is explicit it "should never be followed for any driver package distributed outside your
|
||||||
|
organization." For arms-length public users the minimal correct path is **Microsoft attestation
|
||||||
|
signing** via Partner Center (it re-signs the `.cat` with a Microsoft cert → silent PnP install, no
|
||||||
|
publisher prompt, no Root-store import). A bare Authenticode/OV/EV signature is **not** sufficient:
|
||||||
|
it installs but with the blocking "would you like to install this device software?" prompt
|
||||||
|
(setupapi `0x800b0109` / `0xe0000242`). Attestation needs a registered Windows Hardware Developer
|
||||||
|
Program (Partner Center) account **and an EV code-signing cert** (FIPS hardware token, ~USD
|
||||||
|
250–560/yr, 1–7 day vetting) to register and to sign the submission CAB. UMDF is exempt from
|
||||||
|
kernel-mode load enforcement so the `.dll` *loads* unsigned, but *installation* still needs a
|
||||||
|
trusted catalog. The EV key is non-exportable → CAB signing + submission is a **manual offline
|
||||||
|
step**, not a CI secret; vendor the Microsoft-resigned `.cat` like SudoVDA's. (Azure Trusted Signing
|
||||||
|
cannot substitute — it signs only user-mode PE/`/INTEGRITYCHECK`/SmartScreen, not the driver `.cat`.)
|
||||||
|
**Blocks public release; dev/fleet self-signed works today.**
|
||||||
|
|
||||||
|
2. **GameInput API detection reads VID/PID as `0x0000`.** The GameInput path does not pick up the
|
||||||
|
`054C:0CE6` identity (reads `0x0000`); may require the KMDF USB-emulating bus driver rather than the
|
||||||
|
root-enumerated UMDF HID device. Tracked in
|
||||||
|
[`design/windows-dualsense-game-detection.md`](windows-dualsense-game-detection.md).
|
||||||
|
|
||||||
|
3. **HidHide integration** — unclear value on a usually-headless host; only relevant when a physical
|
||||||
|
pad is also attached. Decide whether to bundle/integrate at all.
|
||||||
|
|
||||||
|
4. **Minimum-OS / `UMDFVERSION` targeting decision** — which `UmdfLibraryVersion` / WDK to target for
|
||||||
|
the widest Win10/11 install base, consistent with punktfunk's existing host support matrix.
|
||||||
|
|
||||||
|
5. **Single multi-driver CAB** — can one Partner Center submission carry *both* SudoVDA and the
|
||||||
|
DualSense driver? Multi-driver CABs are supported in general; unverified for this account.
|
||||||
|
|
||||||
|
6. **DsHidMini end-user signing tier** — self-signed vs attestation in its WixSharp MSI, useful as a
|
||||||
|
second public-distribution data point.
|
||||||
@@ -0,0 +1,410 @@
|
|||||||
|
# Windows Host — Architecture, Status & Roadmap
|
||||||
|
|
||||||
|
> **Single source of truth** for the punktfunk Windows streaming host: the all-Rust **`pf-vdisplay`
|
||||||
|
> IddCx virtual-display driver** + **IDD-push zero-copy capture** + **NVENC/AMF/QSV encode**, shipped as
|
||||||
|
> a signed Inno Setup installer with a LocalSystem SCM service. Live-validated on the RTX box through
|
||||||
|
> 5120×1440@240 HDR, the secure desktop (lock/UAC), and a fullscreen game.
|
||||||
|
>
|
||||||
|
> This file is the consolidated Windows-host doc — it absorbs the rewrite design plan, the Goal-1
|
||||||
|
> staged-refactor plan, the audit + remediation tracker, the fullscreen-game capture-bug analysis, and
|
||||||
|
> the durable rationale from the original `windows-host.md` implementation plan (now a stub).
|
||||||
|
> **Last updated 2026-06-26.** All of this work is **merged to `main`** (the `windows-host-goal1`
|
||||||
|
> branch landed at `3e7c9bd`).
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 1. Status at a glance
|
||||||
|
|
||||||
|
**Goals 1–3 and milestones M0–M4 are complete and merged to `main`.** The host has a clean, typed,
|
||||||
|
layered architecture (`HostConfig → SessionPlan → SessionContext`, `windows/`+`linux/` confinement, a
|
||||||
|
single `VirtualDisplayManager` ownership model, `EncoderCaps`); the all-Rust IddCx `pf-vdisplay` driver
|
||||||
|
loads self-signed under Secure Boot and does IDD-push zero-copy capture at 5K@240 HDR including the
|
||||||
|
**secure desktop** (Winlogon/UAC/lock); SudoVDA is gone (`84a3b95`) — `pf-vdisplay` is the sole
|
||||||
|
virtual-display backend; and the three UMDF drivers (`pf-vdisplay`, `pf-dualsense`, `pf-xusb`) now build
|
||||||
|
from source in one unified `packaging/windows/drivers/` workspace (M4, `92e6802`). The shipped path
|
||||||
|
(IDD-push + NVENC) is live-validated on glass; the AMF/QSV encode path is CI-green but not yet
|
||||||
|
on-hardware (no AMD/Intel Windows box in the lab).
|
||||||
|
|
||||||
|
Ground the details against the code: `crates/punktfunk-host/src/windows/`,
|
||||||
|
`crates/punktfunk-host/src/{capture,encode,inject,audio,vdisplay}/windows/`, and
|
||||||
|
`packaging/windows/drivers/`.
|
||||||
|
|
||||||
|
**What remains (all non-blocking):** the `pf-vdisplay` slot-reclaim-on-REMOVE fix needs an on-glass
|
||||||
|
reconnect-storm A/B (§4 P1.3); host-crate `unsafe` lint hygiene + old-monolith / bring-up-scaffolding
|
||||||
|
cleanup (§4 P2); and the hardware-gated items — AMF/QSV on-glass, hybrid-GPU `SET_RENDER_ADAPTER`,
|
||||||
|
the WGC/DDA fallback reshape, and true `max_concurrent>1` (§4 P3). One framing note: the host was
|
||||||
|
**not** greenfield-rebuilt — it was **refactored in place** via a staged, behavior-preserving sequence
|
||||||
|
that kept the live host working at every step; only the *driver* was rebuilt fresh.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2. Architecture (what is on disk)
|
||||||
|
|
||||||
|
A ~1-page map; the empirical constraints these encode are in §3, the deep reference is in §6.
|
||||||
|
|
||||||
|
### 2.1 Layering & crates
|
||||||
|
|
||||||
|
- **`crates/punktfunk-host`** — one shared host crate (Linux + Windows; not split). Platform code is
|
||||||
|
confined under per-module `windows/`+`linux/` folders behind `#[cfg]` seams (`capture/{windows,linux}/`,
|
||||||
|
`encode/…`, `inject/…`, `audio/…`, `vdisplay/…`, plus top-level `src/windows/`+`src/linux/`). Module
|
||||||
|
names stay flat (`#[path]`), so caller paths are platform-agnostic.
|
||||||
|
- **`crates/punktfunk-core`** — the one linked protocol/FEC/crypto/QUIC core (unchanged here).
|
||||||
|
- **`crates/pf-driver-proto`** — the owned, `no_std` host↔driver ABI (frame ring + control plane +
|
||||||
|
gamepad SHM), consumed by both the host crate and the driver workspace.
|
||||||
|
- **`packaging/windows/drivers/`** — the unified driver workspace on `microsoft/windows-drivers-rs`
|
||||||
|
(vendored 0.5.1 + an `iddcx` subset): `pf-vdisplay` (the IddCx display driver), `pf-dualsense` +
|
||||||
|
`pf-xusb` (the gamepad drivers, folded in by M4), `wdk-iddcx` (typed IddCx DDI wrappers), `wdk-probe`
|
||||||
|
(the CI link/surface gate), `vendor/{wdk-build,wdk-sys}`.
|
||||||
|
|
||||||
|
### 2.2 Session resolution, ownership, and seam traits (Goal-1)
|
||||||
|
|
||||||
|
The old ~40-knob `PUNKTFUNK_*` env soup (re-read and recomputed in three places) is replaced by a
|
||||||
|
**resolve-once** pipeline: `config.rs` `HostConfig` (typed, parsed once) → `session_plan.rs` `SessionPlan`
|
||||||
|
(a `Copy` plan resolved once per session — `CaptureBackend::resolve()` picks `IddPush | Dda | Wgc`,
|
||||||
|
`resolve_topology` picks `SingleProcess | TwoProcessRelay`; this killed the latent capture/encode
|
||||||
|
backend-disagreement bug) → `SessionContext` (bundles the ~13 session args + plane receivers, moved into
|
||||||
|
the stream thread).
|
||||||
|
|
||||||
|
Ownership is a single **OnceLock `VirtualDisplayManager`** (`vdisplay/windows/manager.rs`) owning a
|
||||||
|
*typed* `Arc<OwnedHandle>` control-device handle (no raw-`isize` cross-thread smuggle), a refcounted
|
||||||
|
Idle/Active/Lingering state machine, and the monitor generation; a per-session `MonitorLease`'s `Drop`
|
||||||
|
releases the refcount (a stale lease can't tear down a fresh monitor). This deleted a fistful of
|
||||||
|
`CURRENT_MON_GEN`/`MGR`/`IDD_*` globals and validated on glass at **0 leaked monitors across a reconnect
|
||||||
|
storm**, A/B-equivalent to the shipping host.
|
||||||
|
|
||||||
|
The seam traits (`VirtualDisplay`/`VirtualOutput`/`VirtualLease`, `Capturer`, `Encoder`,
|
||||||
|
`AudioCapturer`/`VirtualMic`/`InputInjector`/`PadManager`) got two tightenings: the capturer takes the
|
||||||
|
desired `OutputFormat { gpu, hdr }` **in** (killing the `capture → encode::windows_resolved_backend()`
|
||||||
|
back-reference), and `Encoder::caps() -> EncoderCaps` (§2.4) lets the session glue route loss-recovery by
|
||||||
|
query.
|
||||||
|
|
||||||
|
### 2.3 Capture — IDD-push primary (normal **and** secure desktop), WGC/DDA fallback, GB1 recovery
|
||||||
|
|
||||||
|
**IDD-push is the universal primary path.** Capture comes straight from the driver's shared keyed-mutex
|
||||||
|
texture ring (`capture/windows/idd_push.rs`) — no Desktop Duplication, no `win32u` reparenting hook. The
|
||||||
|
host creates the ring; the driver opens it (permissive `D:(A;;GA;;;WD)` SDDL). The generation-tagged
|
||||||
|
`latest = gen<<40 | seq<<8 | slot` stale-ring reject kills the HDR-flip garbage frame; a host-owned
|
||||||
|
3-slot `OUT_RING` rotated per frame is the texture-ownership contract that enables `pipeline_depth=2`
|
||||||
|
(convert/copy on the 3D engine overlapping NVENC on the ASIC). It captures the **secure desktop**
|
||||||
|
(Winlogon/UAC/lock) directly (validated 2026-06-25), so there is no separate secure capturer in the
|
||||||
|
primary path.
|
||||||
|
|
||||||
|
- **Open-time fallback:** `IddPushCapturer::open` waits a bounded ~4 s for a *first frame* (not just
|
||||||
|
`DRV_STATUS_OPENED`); on attach failure it returns the keepalive back so `capture.rs` opens **DDA** on
|
||||||
|
the same `WinCaptureTarget` — never a 20 s black bail (`ed58365`/`f98ab07`).
|
||||||
|
- **Mid-session game mode-set recovery (GB1, fixed):** the 250 ms poll follows the display's *actual*
|
||||||
|
resolution (`win_display::active_resolution`, CCD/GDI) and recreates the ring on any descriptor change
|
||||||
|
(size **or** HDR) → the driver re-attaches → frames resume at the game's mode, **no reconnect**. If a
|
||||||
|
change is unrecoverable (e.g. an exclusive flip), a `recovering_since` clock drops the session after 3 s
|
||||||
|
so the client reconnects cleanly. No protocol bump was needed — the host reads the resolution straight
|
||||||
|
from Windows (`c87bfe0`; the driver's `publish()` width/height guard + flushed log is `789ad49`).
|
||||||
|
- **WGC + DDA** stay as demoted fallbacks for non-IddCx hardware (`wgc.rs`/`dxgi.rs`). The two-process WGC
|
||||||
|
secure-desktop relay (`wgc_relay.rs`) is no longer load-bearing now that IDD-push handles the secure
|
||||||
|
desktop; it is kept recoverable but slated for M5/M6 cleanup. (Its constraint analysis is archived in
|
||||||
|
[`archive/windows-secure-desktop.md`](archive/windows-secure-desktop.md).)
|
||||||
|
|
||||||
|
### 2.4 Encode — NVENC / AMF / QSV / software; `EncoderCaps`; HDR
|
||||||
|
|
||||||
|
`encode/windows/` dispatches per DXGI adapter vendor (`open_video`): **NVENC** (NVIDIA, direct SDK,
|
||||||
|
`nvenc.rs` — caps-probe-before-configure, bitrate-clamp binary search, true RFI over the DPB, in-band
|
||||||
|
ST.2086/CLL SEI), **AMF**/**QSV** (AMD/Intel via libavcodec, `ffmpeg_win.rs` — system-readback default,
|
||||||
|
opt-in zero-copy D3D11; CI-only, no lab hardware), or **software** H.264 (`sw.rs`). HDR (10-bit) forces
|
||||||
|
HEVC Main10 + BT.2020 PQ; the client auto-detects PQ from the VUI. The encoder adapts to a mid-session
|
||||||
|
size/format/HDR change per frame (tears down + re-inits), so the GB1 capturer's resolution changes are
|
||||||
|
handled downstream with no API change.
|
||||||
|
|
||||||
|
`Encoder::caps() -> EncoderCaps { supports_rfi, supports_hdr_metadata }` lets the session glue route
|
||||||
|
loss-recovery by query (only Windows direct-NVENC overrides it; the GameStream loop gates the RFI path on
|
||||||
|
`supports_rfi` rather than hard-coding per-backend knowledge into the glue).
|
||||||
|
|
||||||
|
### 2.5 Host↔driver ABI & the `pf-vdisplay` driver
|
||||||
|
|
||||||
|
`pf-driver-proto` is one `no_std` crate in both build graphs. It owns the **frame plane** (`FrameToken`
|
||||||
|
+ `Global\pfvd-*` names), the **control plane** (a fresh interface GUID — *not* SudoVDA's `e5bcc234`;
|
||||||
|
contiguous `0x900` IOCTL ops; a `GET_INFO` version handshake the host **asserts** + bails on mismatch),
|
||||||
|
and the **gamepad SHM** (`XusbShm`/`PadShm` incl. `device_type`). `bytemuck`-`Pod` + `size_of` **and**
|
||||||
|
`offset_of!` asserts make ABI drift a **compile error**.
|
||||||
|
|
||||||
|
The driver (`packaging/windows/drivers/pf-vdisplay/src/`) is an all-Rust UMDF IddCx driver on
|
||||||
|
`windows-drivers-rs` + the `iddcx` `wdk-sys` subset; the STEP 0–8 build is the checklist in §6.3, its
|
||||||
|
internals are the invariants in §3, and it loads self-signed under Secure Boot (FORCE_INTEGRITY cleared
|
||||||
|
post-link, §6.1). **Known gaps:** ownership state is still partly process-global with
|
||||||
|
`EvtCleanupCallback` on the **WDFDEVICE** (a deliberate, sound choice — E1 in §4); and
|
||||||
|
slot-reclaim-on-REMOVE (§4 P1.3).
|
||||||
|
|
||||||
|
### 2.6 Service, packaging, installer
|
||||||
|
|
||||||
|
A `LocalSystem` SCM supervisor (`windows/service.rs`) token-retargets and `CreateProcessAsUserW`s `serve`
|
||||||
|
into the console session (so `SendInput` reaches both the streamed and the secure desktop), relaunches on
|
||||||
|
session-change, and kills-on-close via a Job Object — the Sunshine/Apollo model (rationale:
|
||||||
|
[`windows-service.md`](windows-service.md)). Shipped as a **signed Inno Setup** `setup.exe`
|
||||||
|
(`packaging/windows/`, `windows-host.yml`) that builds + signs all three drivers from source, bundles
|
||||||
|
them + the FFmpeg DLLs, and delegates to `service install`. GameStream (Moonlight) is kept, but the
|
||||||
|
installer/service default to secure `serve` (GameStream opt-in).
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 3. Validated invariants — preserve, do not regress
|
||||||
|
|
||||||
|
These are expensive empirical wins; keep them intact when touching the code:
|
||||||
|
|
||||||
|
- **Frame transport:** host-creates/driver-opens keyed-mutex ring; generation-tagged stale-ring reject;
|
||||||
|
0 ms try-acquire / drop-on-full publish (never block the swap-chain thread); the `OUT_RING` rotation +
|
||||||
|
`pipeline_depth=2` overlap; `repeat_last` rotates into a fresh out-ring slot (depth-safe).
|
||||||
|
- **Driver internals:** `edid.rs` (128-byte EDID + CTA-861.3 HDR block, dual checksums); the FP16 HDR
|
||||||
|
recipe (`CAN_PROCESS_FP16` + the `*2` DDIs + gamma/HDR accept-stubs + `HIGH_COLOR_SPACE`); `DEVICE_POOL`
|
||||||
|
per render-LUID (NVIDIA UMD/VRAM leak fix); target-id stamped on the monitor context; the two swap-chain
|
||||||
|
leak fixes (borrow `IDXGIDevice` across `SetDevice` retries; check `terminate` at the loop top).
|
||||||
|
- **Monitor lifecycle:** serialized ADD/REMOVE/teardown; restore CCD topology **before** REMOVE; the
|
||||||
|
generation-stamped lease (a stale lease can't tear down a fresh monitor); 0-leak across reconnects.
|
||||||
|
- **HDR color math:** `hdr.rs` (pure, unit-tested, ST.2086 + big-endian SEI); the FP16→P010/Rgb10a2
|
||||||
|
converters + `hdr_p010_selftest`; the cursor decomposition.
|
||||||
|
- **NVENC tuning:** caps-probe-before-configure (10-bit→8-bit graceful downgrade); bitrate-clamp binary
|
||||||
|
search (each GPU's real ceiling); true RFI over the DPB; CBR / infinite-GOP / P-only / ~1-frame VBV.
|
||||||
|
- **Gamepad recipe:** the SwDeviceCreate identity (enumerator with no `_`; mandatory completion callback;
|
||||||
|
synthesized DS5 compat-ids; non-null per-pad `ContainerId`); one `pf_dualsense` serving DualSense+DS4
|
||||||
|
via a `device_type` byte; XUSB declining `WAIT_*`; per-pad index via `pszDeviceLocation`.
|
||||||
|
- **Session glue:** the trait seam + RAII keepalive teardown; host-lifetime shared services + per-session
|
||||||
|
gamepads; the encode|send split + microburst pacing; `build_pipeline_with_retry` permanent-vs-transient
|
||||||
|
classification; the GameStream `VideoPacketizer` (GF8 Cauchy, Moonlight byte-exact); the pairing/trust
|
||||||
|
handshake.
|
||||||
|
- **Core discipline:** no async on the per-frame path; `pf-driver-proto` is the single ABI source
|
||||||
|
(drift = compile error); the version handshake the host asserts.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 4. Open work / next tasks (prioritized)
|
||||||
|
|
||||||
|
**P1 — ship-readiness / correctness**
|
||||||
|
1. **Goal-1 → `main` merge — ✅ DONE.** The `windows-host-goal1` branch is merged (tip `3e7c9bd`); the
|
||||||
|
full Windows CI matrix (incl. the `amf-qsv` encode path that local checks skip) runs on push.
|
||||||
|
2. **IDD-push default — ✅ resolved via `host.env`.** The shipped default `host.env` sets
|
||||||
|
`PUNKTFUNK_IDD_PUSH=1`, so a fresh install runs the validated IDD-push path (with the WGC/DDA fallback
|
||||||
|
in place). The bare *in-code* default (`config.rs`) is still `false` (the dev / non-pf-driver default);
|
||||||
|
flipping it to follow the deployed default is an optional tidy.
|
||||||
|
3. **pf-vdisplay slot reclaim on REMOVE** (driver robustness) — 🟡 **fix landed, on-glass-validation
|
||||||
|
pending.** Sustained ADD/REMOVE churn wedged the driver (`ADD → 0x80070490 ERROR_NOT_FOUND`) because the
|
||||||
|
monitor id (EDID serial / `ConnectorIndex` / container GUID) was a **monotonic** `NEXT_ID`, never
|
||||||
|
reclaimed → IddCx accumulated a new OS target slot per cycle until exhaustion. `monitor.rs` now allocates
|
||||||
|
the **lowest free id** (`alloc_monitor_id`), reused on REMOVE, so a fresh ADD reuses the departed
|
||||||
|
monitor's target slot instead of orphaning it. CI-compile-gated; the wedge only reproduces under
|
||||||
|
sustained churn on the RTX box, so this needs an **on-glass reconnect-storm A/B** to confirm (the box is
|
||||||
|
ephemeral). Keep `packaging/windows/reset-pf-vdisplay.ps1` as the recovery until validated.
|
||||||
|
|
||||||
|
**P2 — hygiene / architecture completion**
|
||||||
|
4. **D1-host — host-crate P0 lints — deferred (low value / high churn).** A crate-wide
|
||||||
|
`#![deny(unsafe_op_in_unsafe_fn)]` produced 100+ FFI-wrap sites across the Linux modules; it *wraps*
|
||||||
|
unsafe (discipline) rather than reducing it and doesn't improve stability, so it was deprioritized vs
|
||||||
|
the `OwnedHandle`/RAII reductions (which are **complete** — `idd_push.rs`, `service.rs`, the three
|
||||||
|
gamepad backends via a shared `gamepad_raii.rs`, the SCM STOP/SESSION events as `OnceLock<OwnedHandle>`,
|
||||||
|
the hot-loop `KeyedMutexGuard`, and the driver's `pod_init!`; all box-validated, clean `sc stop` in
|
||||||
|
~1 s). The driver already has the deny. Revisit D1-host as a final discipline pass (staged per-module)
|
||||||
|
if desired.
|
||||||
|
5. **M6 scaffolding cleanup** — delete the bring-up diagnostics (`spawn_observer`/`DebugBlock` in
|
||||||
|
`idd_push.rs`) and, once full parity is proven on glass, the host monoliths.
|
||||||
|
|
||||||
|
**Explicitly NOT doing (stability decision): E1 — driver `DeviceContext` ownership + per-`IDDCX_MONITOR`
|
||||||
|
`EvtCleanupCallback`.** The current process-global design is *sound*: IddCx DDIs receive only an
|
||||||
|
`IDDCX_MONITOR` handle (never the WDFDEVICE/context), and `ProcessSharingDisabled` makes one devnode = one
|
||||||
|
host process that dies with the device. A "device-owned" variant would *add* a use-after-free window (the
|
||||||
|
watchdog races device cleanup) for no gain, and the per-monitor cleanup callback isn't reliably reachable
|
||||||
|
on this UMDF/IddCx stack. Cleanup is already deterministic (WDFDEVICE `EvtCleanupCallback` +
|
||||||
|
`cleanup_for_device_removal` + the host-gone watchdog). **Revisit only if `max_concurrent>1` on Windows is
|
||||||
|
actually needed.** (`monitor.rs` documents this rationale at the `MONITOR_MODES` static.)
|
||||||
|
|
||||||
|
**P3 — larger, mostly hardware-gated**
|
||||||
|
6. **M4 — gamepad-driver unification — ✅ substantially DONE** (`92e6802`). `pf-dualsense` (DualSense /
|
||||||
|
DualShock 4) and `pf-xusb` (Xbox 360 / XInput) now live in the unified `packaging/windows/drivers/`
|
||||||
|
workspace and build from source per release against the vendored `wdk-sys`, exactly like `pf-vdisplay`;
|
||||||
|
`build-gamepad-drivers.ps1` signs them with the shared cert. **Remaining:** point the **driver side** at
|
||||||
|
`pf_driver_proto::gamepad::{PadShm,XusbShm}` (the host side already does — the `device_type`-at-offset
|
||||||
|
hand-duplication is the last ABI-drift hazard), add WDF device contexts for true multi-pad, and confirm
|
||||||
|
the source build matches the prior shipped binaries.
|
||||||
|
7. **M5 — reshape WGC/DDA + GameStream onto `session/pipeline`**, then delete the old relay/monoliths.
|
||||||
|
AMF/QSV stays CI-only (no lab hardware).
|
||||||
|
8. **On-glass behavioral validation** of the committed-but-unexercised fixes: the watchdog reaping on
|
||||||
|
host-kill, `SET_RENDER_ADAPTER` on a **hybrid** box (the lab box is single-dGPU), the IDD-push→DDA
|
||||||
|
fallback trigger, HDR-ring sizing + out-ring repeat under real HDR/static-desktop pipelining, and the
|
||||||
|
AMF/QSV encode path on real AMD/Intel hardware.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 5. Operations
|
||||||
|
|
||||||
|
### 5.1 RTX box on-glass recipe
|
||||||
|
|
||||||
|
The persistent on-glass validator is the **RTX box** (`ssh "Enrico Bühler"@<ip>`, ENRICOS-DESKTOP, RTX
|
||||||
|
4090, PS shell). **The IP FLOATS** (DHCP; boots to **Proxmox** on reboot → ephemeral, unreachable after a
|
||||||
|
reboot; recently `.173`/`.158` — confirm current first; **never reboot it, never depend on it surviving**).
|
||||||
|
It has WDK 26100 + LLVM 21.1.2 + the Rust toolchain; build clone at `C:\Users\Public\pf-rewrite` (the
|
||||||
|
user's active driver-dev tree — **don't clobber uncommitted WIP**; use a worktree). Username has a `ü` →
|
||||||
|
quote it; it only breaks SDL3/client builds, not the host. To validate a host branch: worktree-checkout,
|
||||||
|
build with `CARGO_TARGET_DIR=C:\t-goal1`, then stop the **PunktfunkHost** service, back up the binary +
|
||||||
|
`%ProgramData%\punktfunk\host.env`, copy your build in, restart, drive `punktfunk-probe.exe` loopback,
|
||||||
|
then restore + `git worktree remove`. Drive over ssh via `powershell -EncodedCommand <base64 UTF-16LE>`
|
||||||
|
(plain quoting mangles; prefer `Write-Output`/file-redirect for clean output). Driver redeploy:
|
||||||
|
`packaging/windows/redeploy-pf-vdisplay.ps1`; ghost-monitor recovery: `reset-pf-vdisplay.ps1`.
|
||||||
|
|
||||||
|
### 5.2 CI / validation
|
||||||
|
|
||||||
|
The persistent build validator is the **windows-amd64 CI runner** (no GPU — fine for builds / `iddcx`
|
||||||
|
link / `/INTEGRITYCHECK` self-sign / the surface-asserts; live NVENC encode + on-glass defers to the RTX
|
||||||
|
box). Workflows: `windows-host.yml` (the host installer), `windows-drivers.yml` (the driver workspace
|
||||||
|
build + FORCE_INTEGRITY clear), `windows-drivers-provision.yml` (WDK/LLVM toolchain), `windows-msix.yml`
|
||||||
|
(the client). A single Windows runner serializes the whole fleet; a `Cargo.toml` touch costs ~25 min of
|
||||||
|
queue, so driver pushes that avoid `Cargo.toml` skip the fleet serialization.
|
||||||
|
|
||||||
|
Local pre-push checks (this Linux box can't compile the Windows paths):
|
||||||
|
```sh
|
||||||
|
cargo test -p pf-driver-proto # the ABI crate (cross-platform)
|
||||||
|
cargo check -p punktfunk-host # Linux paths; win_* mods are #[cfg(windows)]
|
||||||
|
cargo clippy -p punktfunk-host --all-targets -- -D warnings
|
||||||
|
# Windows host clippy (on the box): PUNKTFUNK_NVENC_LIB_DIR=C:\t\nvenc;
|
||||||
|
# cargo clippy -p punktfunk-host --features nvenc --target x86_64-pc-windows-msvc -- -D warnings
|
||||||
|
# Driver build (on the box): cd packaging/windows/drivers; Version_Number=10.0.26100.0;
|
||||||
|
# LIBCLANG_PATH='C:\Program Files\LLVM\bin'; cargo build
|
||||||
|
```
|
||||||
|
Note: a pre-existing rustfmt-version drift exists in some Windows-only files (this box's rustfmt 1.9.0
|
||||||
|
wraps `offset_of!`/`unsafe fn` differently than the runner's) — don't reformat unrelated files to chase it.
|
||||||
|
|
||||||
|
### 5.3 Env knobs (Windows host)
|
||||||
|
|
||||||
|
`PUNKTFUNK_IDD_PUSH=1` (capture from the driver ring; shipped `host.env` default on, in-code default off),
|
||||||
|
`PUNKTFUNK_ENCODER=auto|nvenc` (auto → vendor-detect), `PUNKTFUNK_10BIT=1` + `PUNKTFUNK_HDR_SHADER_P010=1`
|
||||||
|
(HDR), `PUNKTFUNK_SECURE_DDA=1`, `PUNKTFUNK_NO_WGC=1` (pure DDA), `PUNKTFUNK_ZEROCOPY=1`,
|
||||||
|
`PUNKTFUNK_MONITOR_LINGER_MS`, `PFVD_DEBUG_LOG=1` (driver file log — release builds are silent without it).
|
||||||
|
Config lives in `%ProgramData%\punktfunk\host.env`; logs in `%ProgramData%\punktfunk\logs\host.log`.
|
||||||
|
|
||||||
|
### 5.4 Build / deploy / packaging
|
||||||
|
|
||||||
|
x64-only by design (no ARM64 NVIDIA driver). The installer is the thin-`.iss` / fat-binary model
|
||||||
|
delegating to `service install`; tag `host-win-vX.Y.Z`. The drivers are built + FORCE_INTEGRITY-cleared +
|
||||||
|
signed + `Inf2Cat`'d in CI from source. DriverVer must bump on any driver change; create the ROOT devnode
|
||||||
|
via nefcon (devgen is forbidden).
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 6. Reference (hard-won — keep)
|
||||||
|
|
||||||
|
### 6.1 The `/INTEGRITYCHECK` answer
|
||||||
|
|
||||||
|
`wdk-build` emits `cargo::rustc-cdylib-link-arg=/INTEGRITYCHECK` **unconditionally** (no cfg/env/Config
|
||||||
|
opt-out), so a self-signed driver can't load (CodeIntegrity 3004/3089). The fix: a deterministic,
|
||||||
|
idempotent post-link step `packaging/windows/clear-force-integrity.ps1` clears the PE FORCE_INTEGRITY bit
|
||||||
|
(`0x0080 @ e_lfanew+0x5e`) + verifies (CI-proven `0x01E0 → 0x0160`), **before** signing. Packaging order:
|
||||||
|
`cargo build` → clear-force-integrity → sign `.dll` → `Inf2Cat` → sign `.cat`. (A public build would use
|
||||||
|
real attestation signing, which satisfies `/INTEGRITYCHECK` legitimately.)
|
||||||
|
|
||||||
|
### 6.2 The `iddcx` binding on `wdk-sys` (the make-or-break — proven, the 6 bindgen knobs)
|
||||||
|
|
||||||
|
IddCx DDIs are **function-table dispatched** (`IddFunctions[]` indexed by `_IDDFUNCENUM::<Name>TableIndex`,
|
||||||
|
`IddDriverGlobals` implicit arg 1) — the same model `wdk-sys` already implements for WDF. The vendored
|
||||||
|
`windows-drivers-rs` 0.5.1 (`packaging/windows/drivers/vendor/`, `[patch.crates-io]`'d) gets a first-class
|
||||||
|
`ApiSubset::Iddcx` that bindgens `iddcx/1.10/IddCx.h` reusing the identical `wdk_default(config)` baseline
|
||||||
|
(so WDF/DXGI types **resolve to**, not redefine, `wdk-sys`'s — type-identity by construction). The six
|
||||||
|
knobs `generate_iddcx` needed (each a real gotcha, all CI-proven):
|
||||||
|
|
||||||
|
1. **`--language=c++`** — `wdk_default` parses C; `IddCx.h`'s `IDARG_*` typedefs need C++ (else a "must use
|
||||||
|
'struct' tag" cascade).
|
||||||
|
2. **`-DIDD_STUB`** — table-dispatch mode; skips `IddCxFuncEnum.h`'s `#error IDDCX_VERSION_MAJOR not
|
||||||
|
defined`. **Do NOT add `WDF_STUB`** (would desync the shared WDF type-identity).
|
||||||
|
3. **`allowlist_recursively(false)` + `allowlist_file("(?i).*iddcx.*")`, full codegen (no `.complement()`)**
|
||||||
|
— emit ONLY IddCx items; WDF/Win types resolve via `use crate::types::*`.
|
||||||
|
4. **`allowlist_type("_?DXGI_.*" / "IDXGI.*" / "_?OPM_.*" / "_?D3DCOLORVALUE")`** — emit the non-WDF types
|
||||||
|
`wdk-sys` doesn't bindgen, locally. The `_?` is load-bearing (`typedef struct _OPM_X {} OPM_X` needs the
|
||||||
|
tag AND the alias).
|
||||||
|
5. **`pub type UINT = ::core::ffi::c_uint;` in `src/iddcx.rs`** — `UINT` is absent from `crate::types`.
|
||||||
|
6. **`translate_enum_integer_types(true)`** — emit native `u32` reprs for the DXGI/OPM ModuleConsts enums
|
||||||
|
(nested modules can't see a parent `UINT`).
|
||||||
|
|
||||||
|
Wrapper note: table dispatch via `_IDDFUNCENUM::<Name>TableIndex as usize` (the ModuleConsts const, **not**
|
||||||
|
a NewType `.0`); NTSTATUS is plain `i32` (`wdk_sys::NT_SUCCESS`). The driver `build.rs` adds the IddCxStub
|
||||||
|
link-search (the import lib is under `iddcx\1.0\` even though headers are `1.10`) + `#[no_mangle] pub static
|
||||||
|
IddMinimumVersionRequired: ULONG = 4`. The versioned `IDD_STRUCTURE_SIZE!` path is dropped — the WDK links
|
||||||
|
the iddcx **1.0** stub (lacks the version table); we target 1.10 vs a current framework, so `size_of` is
|
||||||
|
exactly correct.
|
||||||
|
|
||||||
|
### 6.3 Driver port checklist (STEP 0–8, as landed)
|
||||||
|
|
||||||
|
0. workspace `pf-vdisplay`(cdylib)+`wdk-iddcx`; prove `std::thread`+`OwnedHandle` link under UMDF (done).
|
||||||
|
1. `wdk-iddcx`: 11 typed DDI wrappers via one dispatch macro + re-export the inbound `PFN_*` types.
|
||||||
|
2. DriverEntry + `IDD_CX_CLIENT_CONFIG` (15 callbacks) + DeviceInitConfig + WdfDeviceCreate +
|
||||||
|
CreateDeviceInterface (the owned pf GUID) + DeviceInitialize; `edid.rs` salvaged verbatim.
|
||||||
|
3. DeviceContext + `WDF_DECLARE_CONTEXT_TYPE` blob; `init_adapter` in D0Entry (caps + FP16) →
|
||||||
|
AdapterInitAsync; the `*2` mode DDIs + `query_target_info` + gamma/HDR accept-stubs. (Box gate: loads
|
||||||
|
under Secure Boot, enumerates as an IddCx adapter, Status OK.)
|
||||||
|
4. control plane (`GET_INFO` version handshake the host asserts, ADD/REMOVE/SET_RENDER_ADAPTER/PING/
|
||||||
|
CLEAR_ALL) + create_monitor + real mode DDIs + watchdog + mode bounds; host switched to
|
||||||
|
`pf_driver_proto`.
|
||||||
|
5. `Direct3DDevice` + assign/unassign + `SwapChainProcessor` (worker, `SetDevice` 60×@50 ms single-borrow
|
||||||
|
retry, top-of-loop `terminate`, `ReleaseAndAcquireBuffer2`, `from_raw_borrowed`).
|
||||||
|
6. `FramePublisher` on `pf_driver_proto::frame` + keyed-mutex RAII guard; wire into `run_core`. (Box:
|
||||||
|
full IDD-push glass-to-glass + the **secure-desktop** gate — validated 2026-06-25.)
|
||||||
|
7. HDR / FP16 ring (validated: Mac connects WITH HDR).
|
||||||
|
8. its own `.inx` + an `unsafe`-reduction pass (`deny(unsafe_op_in_unsafe_fn)`, per-site `// SAFETY:`).
|
||||||
|
|
||||||
|
**Remaining driver work** beyond STEP 8: E1 (DeviceContext-owned state + per-`IDDCX_MONITOR`
|
||||||
|
`EvtCleanupCallback` → unblock `max_concurrent>1` — see §4 for why it's deliberately deferred), the
|
||||||
|
slot-reclaim-on-REMOVE fix (§4 P1.3), and folding the gamepad-driver side onto `pf_driver_proto` (M4 tail,
|
||||||
|
§4 P3).
|
||||||
|
|
||||||
|
### 6.4 Resolved product decisions (the five forks)
|
||||||
|
|
||||||
|
**A** the host was refactored **in place** (staged, behavior-preserving), not greenfield-rebuilt — the
|
||||||
|
driver *was* rebuilt fresh. **B** IDD-push primary for everything incl. the **secure desktop** (validated);
|
||||||
|
WGC+DDA demoted to non-IddCx fallbacks. **C** all drivers on `microsoft/windows-drivers-rs` (+ the `iddcx`
|
||||||
|
subset; `/INTEGRITYCHECK` solved) — done for `pf-vdisplay` and now for the gamepad drivers (M4, `92e6802`).
|
||||||
|
**D** keep GameStream (Moonlight), default to secure `serve`. **E** concurrent sessions: the host-side
|
||||||
|
preempt dance was removed by the ownership-model work, but true `max_concurrent>1` on Windows stays blocked
|
||||||
|
on the E1 driver swap-chain-reuse work (deliberately deferred, §4). **Rejected: DeviceContext-per-monitor
|
||||||
|
ownership** — see the E1 stability decision in §4 (it would add a use-after-free window for no gain under
|
||||||
|
`ProcessSharingDisabled`).
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Origins & design rationale (from the original plan)
|
||||||
|
|
||||||
|
This folds in the durable rationale from the original Windows host + client plan
|
||||||
|
([`windows-host.md`](windows-host.md), now a stub; full original text in git history). The Windows host
|
||||||
|
began (2026-06-10 to 2026-06-14) as a *"add backends behind the existing traits"* job, not a parallel
|
||||||
|
port — `punktfunk-core` and the whole control plane are platform-agnostic, and the host already compiled
|
||||||
|
on non-Linux (macOS) thanks to existing `cfg(target_os)` gating. These framing decisions shaped what
|
||||||
|
shipped and still explain *why* the code is the way it is:
|
||||||
|
|
||||||
|
- **Build order: host-first.** A user preference (the research had recommended *client*-first, since the
|
||||||
|
client is unblocked by the no-GPU problem and becomes the host's test endpoint). The trade-off held —
|
||||||
|
the GPU-gated steps were the only ones that stalled GPU-less.
|
||||||
|
- **Trait-based abstraction → ~95% reuse.** `punktfunk-core` (protocol/FEC/crypto/session/transport/QUIC/
|
||||||
|
C ABI), the GameStream wire logic (mDNS, serverinfo, pairing, RTSP, ENet), the management REST API +
|
||||||
|
`native_pairing`/`discovery`, and the `punktfunk1`/`spike`/`pipeline` orchestration all carried over
|
||||||
|
unchanged — only the OS-touching backends behind `Capturer`/`Encoder`/`VirtualDisplay`/`InputInjector`/
|
||||||
|
`AudioCapturer`/`VirtualMic` are new `#[cfg(windows)]` code. Getting to MSVC needed only ~3 `cfg`-gates
|
||||||
|
(gate the `std::os::fd`/`OwnedFd` unix-isms in `main.rs`/`vdisplay.rs`).
|
||||||
|
- **The no-GPU dev strategy.** Most of the port was built + validated on a **GPU-less Windows VM**: the
|
||||||
|
MSVC compile, the virtual-display control path (WARP), the openh264 software-encode pipeline (full
|
||||||
|
capture→encode→FEC→UDP transport minus HW), SendInput injection + interactive-session/desktop-reattach,
|
||||||
|
gamepad + rumble, and the entire client (software-decode loopback). Only NVENC-D3D11 zero-copy, the
|
||||||
|
DDA-vs-WGC bake-off, split-encode/bitrate-ceiling, and *all* glass-to-glass numbers deferred to a real
|
||||||
|
NVIDIA box (no perf claim transfers from Linux).
|
||||||
|
- **Windows-specific structural issues (no Linux precedent)** — these are the gotchas that drove the
|
||||||
|
service + capture design and remain true:
|
||||||
|
- **Interactive session, not a Session-0 service.** SendInput can't reach the desktop from Session 0;
|
||||||
|
Desktop Duplication / capture need the interactive session. Hence the SYSTEM-in-interactive-session
|
||||||
|
supervisor (§2.6, [`windows-service.md`](windows-service.md)) and the `OpenInputDesktop`/
|
||||||
|
`SetThreadDesktop` re-attach to survive UAC/lock desktop switches.
|
||||||
|
- **Clock epoch.** The skew handshake assumes both ends read the same realtime epoch in ns — the Windows
|
||||||
|
host must emit timestamps from `GetSystemTimePreciseAsFileTime`→Unix-epoch-ns, or cross-machine latency
|
||||||
|
+ `ClockProbe`/`ClockEcho` break (std `SystemTime` on Windows is historically coarser).
|
||||||
|
- **No audio endpoint on a headless IDD.** WASAPI loopback needs a real/virtual render device; the
|
||||||
|
virtual *mic* (client→host) has no clean user-mode path — deferred.
|
||||||
|
- **Color/range.** All clients assume BT.709 limited-range; the BGRA→I420/NV12 path must match or colors
|
||||||
|
wash out — validated against the existing decoders.
|
||||||
|
|
||||||
|
**SudoVDA → pf-vdisplay evolution.** The original plan was built around **SudoVDA**, an off-the-shelf
|
||||||
|
indirect display driver (the same IDD Apollo ships) — chosen to avoid writing/WHQL-signing a driver and to
|
||||||
|
get arbitrary `WxH@Hz` modes on the fly. It carried the host all the way to live-validated NVENC on a real
|
||||||
|
RTX 4090. It was then replaced by the all-Rust `pf-vdisplay` IddCx driver (which solved
|
||||||
|
`/INTEGRITYCHECK` self-signing, §6.1, and gave us the IDD-**push** zero-copy capture path that captures the
|
||||||
|
secure desktop directly) and **deleted in commit `84a3b95`** — `pf-vdisplay` is now the sole
|
||||||
|
virtual-display backend. The full SudoVDA control protocol (IOCTL layout, watchdog keepalive, GDI-name
|
||||||
|
resolution) lives in git history if ever needed as a reference.
|
||||||
@@ -0,0 +1,15 @@
|
|||||||
|
# Windows host + client — implementation plan (superseded)
|
||||||
|
|
||||||
|
> **This was the original Windows host + client plan (2026-06-10 to 2026-06-14).** All of it
|
||||||
|
> shipped; its durable design rationale has been folded into
|
||||||
|
> [`windows-host-rewrite.md`](windows-host-rewrite.md), which is now the single Windows-host
|
||||||
|
> architecture / status / reference doc.
|
||||||
|
|
||||||
|
For the current picture, see:
|
||||||
|
|
||||||
|
- **`CLAUDE.md`** ("Where the work stands" → the Windows-host paragraph) — current features and live-validation status.
|
||||||
|
- **[`windows-host-rewrite.md`](windows-host-rewrite.md)** — architecture, validated invariants, operations, and the hard-won reference notes (`/INTEGRITYCHECK`, the `iddcx` bindgen knobs, the driver-port checklist). Its "Origins & design rationale" section absorbs the keep-worthy parts of this original plan.
|
||||||
|
- **git history** — the full original plan (SudoVDA control protocol, the no-GPU dev strategy, the phased host-first build order) is recoverable there.
|
||||||
|
|
||||||
|
Note: SudoVDA — the off-the-shelf virtual display this plan was built around — was replaced by the
|
||||||
|
all-Rust `pf-vdisplay` IddCx driver and deleted in commit `84a3b95`.
|
||||||
@@ -0,0 +1,29 @@
|
|||||||
|
# Windows service (deployment)
|
||||||
|
|
||||||
|
**Status: SHIPPED.** The `PunktfunkHost` LocalSystem SCM service is the end-user way to run the host
|
||||||
|
on Windows, installed by the signed Inno Setup installer. Sources / details:
|
||||||
|
|
||||||
|
- `crates/punktfunk-host/src/windows/service.rs` — the supervisor.
|
||||||
|
- [`packaging/windows/README.md`](../packaging/windows/README.md) — installer + driver packaging.
|
||||||
|
- `punktfunk-host service --help` — install / start / stop / status / uninstall.
|
||||||
|
|
||||||
|
## Why it works the way it does (the durable rationale)
|
||||||
|
|
||||||
|
The host must capture the **secure desktop** (UAC / lock / login) and inject input there. Desktop
|
||||||
|
Duplication of the secure desktop and `SendInput` both require **SYSTEM**, while capture and injection
|
||||||
|
require the **interactive console session** — which a plain Session-0 service is *not* in. One process
|
||||||
|
must therefore be SYSTEM *and* in the interactive session.
|
||||||
|
|
||||||
|
The service resolves this the same way Sunshine/Apollo do: it runs as **LocalSystem in Session 0** but
|
||||||
|
**never captures**. Instead it duplicates its own LocalSystem token, retargets it to the active console
|
||||||
|
session (`SetTokenInformation(TokenSessionId)`), and launches the host there with
|
||||||
|
`CreateProcessAsUserW` (`lpDesktop = winsta0\default`) — supervising it across exits and console-session
|
||||||
|
switches, with a kill-on-close Job Object so a service crash never orphans the SYSTEM host.
|
||||||
|
|
||||||
|
`service run` is the **SCM entry point only** — don't run it by hand (it errors with a hint).
|
||||||
|
|
||||||
|
## Open item — graceful stop
|
||||||
|
|
||||||
|
A service stop currently `TerminateProcess`es the host, which **skips RAII teardown**, so a stale
|
||||||
|
virtual monitor can linger until the next start. The follow-up is a cooperative-stop signal
|
||||||
|
(event/pipe) that lets the host unwind cleanly before exit.
|
||||||
@@ -0,0 +1,149 @@
|
|||||||
|
# Windows virtual display — a Rust port of SudoVDA (investigation & plan)
|
||||||
|
|
||||||
|
> **Status:** SHIPPED (P1, 2026-06-22) + P2 CLOSED as a dead end. The all-Rust IddCx driver
|
||||||
|
> `pf-vdisplay` (`packaging/windows/drivers/pf-vdisplay/`) replaced the vendored SudoVDA C++ driver
|
||||||
|
> (SudoVDA backend deleted in `84a3b95`) and is the **sole** Windows vdisplay backend; the host drives it
|
||||||
|
> via `crates/punktfunk-host/src/vdisplay/windows/pf_vdisplay.rs`. Live-validated streaming on the RTX box
|
||||||
|
> at 5120×1440@240. The current consolidated Windows-host architecture lives in
|
||||||
|
> [`windows-host-rewrite.md`](windows-host-rewrite.md). This doc is trimmed to the two things git history
|
||||||
|
> can't replace: the **on-glass driver-iteration gotchas**, and the **P2 decision record** proving
|
||||||
|
> direct-frame-push (IDD-push) is architecturally impossible for bare-metal capture — *do not re-attempt it.*
|
||||||
|
|
||||||
|
All the P1 planning/feasibility/decision content (signing tier, Rust prior art, binding-stack choice,
|
||||||
|
IOCTL contract, phased plan) executed as designed and now lives in the code + `windows-host-rewrite.md`;
|
||||||
|
it has been cut. What remains below is the durable record.
|
||||||
|
|
||||||
|
## Driver-iteration gotchas (hard-won, on-glass)
|
||||||
|
|
||||||
|
These cost real time during P1 bring-up and apply to **any** future IddCx/UMDF driver work on this box.
|
||||||
|
|
||||||
|
- **INF DriverVer gate.** Updating an installed UMDF driver only takes if the INF **DriverVer changes** —
|
||||||
|
`deploy-dev.ps1` stamps a date.time `-v` on every run; without a bump the **old binary keeps running
|
||||||
|
(silently)**.
|
||||||
|
- **Devnode hygiene — `nefconc`, never `devgen`.** Create the root devnode with
|
||||||
|
`nefconc --create-device-node` (a clean `ROOT\DISPLAY` node), **NOT** `devgen /add` — devgen makes
|
||||||
|
**persistent `SWD\DEVGEN` software devices** that survive reboot *and* registry deletion and resurrect on
|
||||||
|
every `pnputil /add-driver` (they carry `hwid root\pf_vdisplay`, so the driver install re-materializes
|
||||||
|
them). The production installer must use a single `nefconc`/INF-created node and never `devgen`.
|
||||||
|
- **Session-0 vs Session-1 observability.** Every standalone probe (`vdtest`, the host's
|
||||||
|
`live_create_drop` test) runs in **Session 0** — the services session, whose desktop is a throwaway
|
||||||
|
**1024×768** basic display. IddCx activation happens in the **console Session 1**, where the GPU drives
|
||||||
|
the real desktop. So `Screen.AllScreens`/CCD queries from Session 0 *can never* see the virtual monitor
|
||||||
|
activate — they report the wrong desktop. The only valid way to drive + observe it is the **host service**
|
||||||
|
(SYSTEM, which targets Session 1) plus the driver's own `OutputDebugString` (system-wide,
|
||||||
|
session-agnostic). (An early "monitor arrives but never gets a swap-chain / no DXGI output" symptom was
|
||||||
|
this measurement artifact, not a driver bug.)
|
||||||
|
- **Accumulated device-state damage.** Repeated reinstalls + `Disable`/`Enable-PnpDevice` cycles + a control
|
||||||
|
handle the host **cached across all of it** wedge the device tree (stale handle → the host's PINGs fail →
|
||||||
|
the 3 s watchdog tears the monitor down mid-session → capture opens a dying display → "no DXGI output").
|
||||||
|
A **reboot** clears it and it works on the first connect. Lesson: after device churn, restart the host
|
||||||
|
service (fresh handle) — and when in doubt, reboot.
|
||||||
|
- **Hot-reload is unreliable; deploy = install + reboot.** `pnputil /restart-device` does **NOT** restart
|
||||||
|
WUDFHost (old image stays mapped), `Disable/Enable-PnpDevice` errors on the root-enumerated IDD, and
|
||||||
|
**killing WUDFHost invalidates the host's cached `{e5bcc234}` control handle** (every ADD then fails
|
||||||
|
`0x80070006`, and the device can wedge to `FAILED_POST_START`). A **reboot** loads a freshly-installed
|
||||||
|
build cleanly. **Recovery** from a broken build is clean and reboot-free:
|
||||||
|
`pnputil /delete-driver <oemNN>.inf /uninstall` removes the bad package and the device rebinds the
|
||||||
|
previous (validated) package in the DriverStore.
|
||||||
|
- **`FAILED_POST_START` is usually churn, not the binary.** Comparing a working vs. a suspect DLL's import
|
||||||
|
tables came out **identical** (same DLLs; the size/hash delta is just the Authenticode signature). A clean
|
||||||
|
install **+ reboot** (no `restart-device`/`disable-enable`/kill in between) loads to `OK`.
|
||||||
|
- **The swap-chain drain is required.** The swap-chain processor is a faithful port of
|
||||||
|
virtual-display-rs's — it drains correctly via `ReleaseAndAcquireBuffer` + `FinishedProcessingFrame`. The
|
||||||
|
drain is *required*; a true no-op stalls DWM and freezes the captured image.
|
||||||
|
- **`pf-vdisplay` can't coexist with SudoVDA.** They register the same control-interface GUID, so two IddCx
|
||||||
|
adapters claiming `{e5bcc234}` → `FAILED_POST_START`. pf-vdisplay *replaces* SudoVDA (now moot — SudoVDA
|
||||||
|
is deleted — but the same rule binds any second IDD that claims the GUID).
|
||||||
|
|
||||||
|
## P2 — direct frame push (kill DDA): decision record — DEAD END, DO NOT PURSUE
|
||||||
|
|
||||||
|
P2 wanted the driver to *publish* each swap-chain frame to the host directly (Looking-Glass style), to
|
||||||
|
retire DXGI Desktop Duplication and its multi-GPU survival code (`capture/dxgi.rs`'s
|
||||||
|
`DXGI_ERROR_ACCESS_LOST`/`MODE_CHANGE_IN_PROGRESS` re-duplication churn and the `win32u.dll`
|
||||||
|
`install_gpu_pref_hook()` patch). **It cannot work for bare-metal console-desktop capture.** All the
|
||||||
|
IDD-push code stays in-tree, compiles, and is gated **off** behind `PUNKTFUNK_IDD_PUSH` — dormant and
|
||||||
|
harmless — as the documented record so it isn't re-tried.
|
||||||
|
|
||||||
|
### What was proven sound (so the failure is *not* a transport bug)
|
||||||
|
|
||||||
|
- **Producer and consumer are both in Session 0.** The pf-vdisplay host process is `WUDFHost.exe`
|
||||||
|
(`-DeviceGroupId:pfVDisplayGroup`) and the punktfunk host service is `LocalSystem` — **both Session 0**.
|
||||||
|
So a D3D11 **shared keyed-mutex texture** created in the driver can be opened by name in the host
|
||||||
|
(`ID3D11Device1::OpenSharedResourceByName`) with both devices on the **same render-adapter LUID** (the
|
||||||
|
driver reports it out of the `ADD` IOCTL via `OsAdapterLuid`). Named kernel objects resolve through
|
||||||
|
Session 0's shared `\BaseNamedObjects`, so no `Global\` prefix / `SeCreateGlobalPrivilege` gymnastics are
|
||||||
|
needed for same-session use. The Looking-Glass cross-*VM* shared-memory device is unnecessary — this is
|
||||||
|
cross-*process*, same-session, one GPU.
|
||||||
|
- **Transport shape (built):** a **ring** of N (default 3) shared keyed-mutex textures (newest-wins, so the
|
||||||
|
swap-chain thread never blocks — a stalled `IddCxSwapChainReleaseAndAcquire` loop freezes DWM compositing
|
||||||
|
system-wide) + a named metadata header (`{magic, version, generation, width, height, dxgi_format,
|
||||||
|
ring_len, latest}`) + a frame-ready auto-reset event. A **generation** counter bumps on a mode change so
|
||||||
|
the host re-opens the ring.
|
||||||
|
- **The inversion (required) — host creates, driver opens.** **WUDFHost runs with a restricted token: it
|
||||||
|
can neither write the filesystem nor create named kernel objects** (`CreateFileMappingW`/`CreateEventW`/
|
||||||
|
`CreateSharedHandle` all fail silently), which a file-logging driver build confirmed (it wrote no file at
|
||||||
|
all even though `init()` runs in `DriverEntry` and the device is `OK`). This is exactly why the gamepad
|
||||||
|
UMDF drivers invert it (`inject/dualsense_windows.rs`): **the HOST creates the section** (privileged → a
|
||||||
|
permissive `Global\` name + SDDL `D:(A;;GA;;;WD)`) and **the DRIVER only OPENS it**. The host-created-ring
|
||||||
|
/ restricted-open split was implemented and **works every time** (`created shared ring … render_luid=…`,
|
||||||
|
no name collisions after the per-attempt generation fix). The gamepad drivers independently prove a UMDF
|
||||||
|
driver *can* open + write a host-created `Global\` section on this box — so the driver writing nothing is
|
||||||
|
**not** an access problem.
|
||||||
|
|
||||||
|
### Root cause — the swap-chain is never assigned (fundamental, not fixable)
|
||||||
|
|
||||||
|
Across **every** configuration tested, the driver's `run_core` swap-chain processor is **never entered**
|
||||||
|
(`run_core_entries=0`):
|
||||||
|
|
||||||
|
- in-process (Session 0) and WGC-triggered (Session 1 helper) sessions,
|
||||||
|
- a user-created ring AND a host-created (LocalSystem) ring with the permissive `D:(A;;GA;;;WD)` SDDL,
|
||||||
|
- with and without a Low-IL (`S:(ML;;NW;;;LW)`) mandatory label,
|
||||||
|
- with WUDFHost confirmed **not** an AppContainer (`IsAppContainer=0`),
|
||||||
|
|
||||||
|
— even while WGC simultaneously captured the same virtual monitor's composition and streamed multi-MB of
|
||||||
|
HEVC.
|
||||||
|
|
||||||
|
**An IddCx virtual monitor only receives a swap-chain (`EVT_IDD_CX_MONITOR_ASSIGN_SWAPCHAIN`) when the OS
|
||||||
|
presents/scans-out to it, which requires a real presentation consumer. WGC/DDA capture of the composed
|
||||||
|
desktop does NOT count** — it reads DWM's composition, bypassing the driver's swap-chain. With no physical
|
||||||
|
scanout and no consumer that routes *through the driver*, the path stays inactive (`IDDCX_PATH_FLAGS=0`) and
|
||||||
|
`ASSIGN_SWAPCHAIN` never fires. Session 0 additionally has no DWM/compositor at all.
|
||||||
|
|
||||||
|
Ecosystem + first-party confirmation:
|
||||||
|
|
||||||
|
- **Every bare-metal virtual-display capture project uses WGC/DDA, not the driver swap-chain:** SudoVDA
|
||||||
|
(its swap-chain loop acquires-and-discards), Apollo/Sunshine (DDA + WGC backends), virtual-display-rs
|
||||||
|
(discards), parsec-vdd (no frame path). Only **Looking Glass** consumes the driver swap-chain — and only
|
||||||
|
because a **VM guest scans out** the display (the consumer). Bare metal has no equivalent.
|
||||||
|
- Microsoft's own unanswered Q&A (learn.microsoft.com/answers 4096179) reports the identical symptom for
|
||||||
|
the IddSampleDriver: virtual display "always inactive," `ASSIGN_SWAPCHAIN` never runs.
|
||||||
|
|
||||||
|
### Both remaining escape hatches tested and closed
|
||||||
|
|
||||||
|
- **Option 3 — a present *source* on the display — TESTED, failed.** A present-trigger added to the
|
||||||
|
Session-1 WGC helper successfully created a D3D11 swapchain on the virtual display and presented
|
||||||
|
continuously (WGC even captured the flashing window). The driver stayed `run_core_entries=0` /
|
||||||
|
`frames_acquired=0`. So an active present *source* does NOT make the OS assign the driver's swap-chain —
|
||||||
|
DWM composes the present onto the display (capturable) without routing it through the driver.
|
||||||
|
- **Option 2 — a driver flag — closed by analysis.** The present-trigger succeeding proves the **path is
|
||||||
|
already active**; the missing piece is **scanout routed through the driver**, which the OS does only for a
|
||||||
|
real consumer (physical display / VM guest / RDP). The one IddCx flag for that —
|
||||||
|
`IDDCX_ADAPTER_FLAGS_REMOTE_SESSION_DRIVER` — requires the **RDP protocol stack** as the consumer, which
|
||||||
|
bare-metal console capture has no equivalent of.
|
||||||
|
|
||||||
|
### Verdict (final)
|
||||||
|
|
||||||
|
IDD-push needs a presentation consumer (scanout / VM guest / RDP) that bare-metal console desktop-capture
|
||||||
|
fundamentally cannot provide. No host-side capture, no in-process path, no present source, and no available
|
||||||
|
driver flag overcomes it. **WGC (normal desktop) + DDA (secure desktop) is the only viable Windows capture
|
||||||
|
path — as the entire ecosystem already does.** Any future "lower overhead" must come from optimizing the
|
||||||
|
WGC/DDA path (trimming the Session-0↔Session-1 relay, zero-copy encode), **not** from IDD-push. The
|
||||||
|
remaining gaps a hypothetical IDD-push would also have had (cursor delivered separately via
|
||||||
|
`IddCxMonitorSetupHardwareCursor`/`QueryHardwareCursor`; HDR needing the IddCx **1.11 D3D12 acquire path**
|
||||||
|
`SetDevice2`/`ReleaseAndAcquireBuffer2` → `ID3D12Resource`, since the default swap-chain surface is 8-bit)
|
||||||
|
are moot for the same reason.
|
||||||
|
|
||||||
|
## Open items
|
||||||
|
|
||||||
|
**None.** P1 shipped; P2 is a permanent *do-not-pursue* record (no pending work). WGC/DDA is the shipping
|
||||||
|
capture path.
|
||||||
+2
-2
@@ -16,8 +16,8 @@ sidebar, and the landing page). It reads [`public/openapi.json`](public/openapi.
|
|||||||
|
|
||||||
```sh
|
```sh
|
||||||
# from the repo root — regenerate the spec, then copy the snapshot in:
|
# from the repo root — regenerate the spec, then copy the snapshot in:
|
||||||
cargo run -p punktfunk-host -- openapi > docs/api/openapi.json
|
cargo run -p punktfunk-host -- openapi > api/openapi.json
|
||||||
cp docs/api/openapi.json docs-site/public/openapi.json
|
cp api/openapi.json docs-site/public/openapi.json
|
||||||
```
|
```
|
||||||
|
|
||||||
## Develop
|
## Develop
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user