Full project rename, decided 2026-06-10: - Crates/binaries: punktfunk-core / punktfunk-host / punktfunk-client-rs. - C ABI: punktfunk_* symbols, Punktfunk* types, include/punktfunk_core.h, PUNKTFUNK_FEATURE_QUIC guard (header regenerated; cbindgen renames updated, incl. PUNKTFUNK_BTN_*/PUNKTFUNK_AXIS_* wire constants). - Protocol: punktfunk/1 — control-plane magic LMN1 → PKF1, nonce salt lmn1 → pkf1. WIRE BREAK: clients must be rebuilt from this revision. - Env knobs: PUNKTFUNK_VIDEO_SOURCE / PUNKTFUNK_COMPOSITOR / PUNKTFUNK_ZEROCOPY / …. - Host config dir: ~/.config/punktfunk (the box's dir was migrated in place — the persistent identity is unchanged, pinned fingerprints stay valid). - Swift package: PunktfunkKit + PunktfunkCore.xcframework + PunktfunkConnection (Sources/PunktfunkClient app + tests renamed with it); build-xcframework.sh updated. - scripts/: 60-punktfunk.rules, punktfunk-host.service; OpenAPI doc regenerated. Also: scripts/headless/run-headless-kde.sh — full headless Plasma bringup. Root cause of "desktop but no apps/settings" over the stream: plasmashell launched without XDG_MENU_PREFIX=plasma-, so the launcher resolved a nonexistent applications.menu and rendered an empty menu. The script sets the complete KDE session env (menu prefix, KDE_FULL_SESSION, session version) and rebuilds ksycoca before starting plasmashell. Gate: 97/97 tests, clippy -D warnings (both feature sets), fmt, C-ABI harness PASS, zero lumen references left outside .git. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,92 @@
|
||||
[package]
|
||||
name = "punktfunk-host"
|
||||
description = "punktfunk Linux streaming host: virtual display, capture, encode, input injection"
|
||||
version.workspace = true
|
||||
edition.workspace = true
|
||||
rust-version.workspace = true
|
||||
license.workspace = true
|
||||
authors.workspace = true
|
||||
repository.workspace = true
|
||||
|
||||
[dependencies]
|
||||
punktfunk-core = { path = "../punktfunk-core", features = ["quic"] }
|
||||
# M3 native control plane (the `punktfunk/1` QUIC handshake; data plane stays native-thread UDP).
|
||||
quinn = "0.11"
|
||||
anyhow = "1"
|
||||
tracing = "0.1"
|
||||
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
|
||||
axum = "0.8"
|
||||
mdns-sd = "0.20"
|
||||
tokio = { version = "1", features = ["full"] }
|
||||
rsa = "0.9"
|
||||
sha2 = { version = "0.10", features = ["oid"] }
|
||||
aes = "0.8"
|
||||
aes-gcm = "0.10"
|
||||
cbc = { version = "0.1", features = ["alloc"] }
|
||||
rand = "0.8"
|
||||
hex = "0.4"
|
||||
rcgen = { version = "0.13", default-features = false, features = ["aws_lc_rs", "pem"] }
|
||||
x509-parser = "0.16"
|
||||
axum-server = { version = "0.7", features = ["tls-rustls"] }
|
||||
rustls = "0.23"
|
||||
rustls-pemfile = "2"
|
||||
rusty_enet = "0.4"
|
||||
serde = { version = "1", features = ["derive"] }
|
||||
serde_json = "1"
|
||||
# Management/control-plane REST API + OpenAPI (control pane, M2). `axum_extras` wires
|
||||
# utoipa into axum 0.8 extractors; utoipa-axum collects `#[utoipa::path]` routes into the
|
||||
# spec; utoipa-scalar serves the interactive docs. Codegen-friendly: the spec is emitted
|
||||
# verbatim by the `openapi` subcommand. Control plane only — never the per-frame path.
|
||||
utoipa = { version = "5", features = ["axum_extras"] }
|
||||
utoipa-axum = "0.2"
|
||||
utoipa-scalar = { version = "0.3", features = ["axum"] }
|
||||
|
||||
[dev-dependencies]
|
||||
# Drive the management API router in-process (no socket) in the handler tests.
|
||||
tower = { version = "0.5", features = ["util"] }
|
||||
http-body-util = "0.1"
|
||||
|
||||
[target.'cfg(target_os = "linux")'.dependencies]
|
||||
# `screencast` gates the ScreenCast portal module; `remote_desktop` adds the RemoteDesktop
|
||||
# portal we use for libei input on KWin/GNOME; `tokio` is the default runtime.
|
||||
# `open_pipe_wire_remote` is unconditional, so ashpd's own `pipewire` feature is not
|
||||
# needed — we drive PipeWire with the `pipewire` crate below.
|
||||
ashpd = { version = "0.13", features = ["screencast", "remote_desktop"] }
|
||||
ffmpeg-next = "8"
|
||||
libc = "0.2"
|
||||
# Must match the pipewire crate ashpd 0.13 links (libspa/pipewire-sys `links` key is
|
||||
# unique per build), i.e. 0.9 — NOT the 0.10 the setup doc mentions.
|
||||
pipewire = "0.9"
|
||||
# ashpd 0.13 uses the tokio runtime; a current-thread runtime drives the one-time
|
||||
# portal handshake (control plane — never the per-frame path).
|
||||
tokio = { version = "1", features = ["rt", "rt-multi-thread", "net", "time"] }
|
||||
# Input injection into headless Sway via the wlroots virtual-input Wayland protocols
|
||||
# (uinput won't reach a compositor running with WLR_LIBINPUT_NO_DEVICES=1).
|
||||
wayland-client = "0.31"
|
||||
wayland-protocols-wlr = { version = "0.3", features = ["client"] }
|
||||
wayland-protocols-misc = { version = "0.3", features = ["client"] }
|
||||
# Codegen for KDE's `zkde_screencast_unstable_v1` (vendored in `protocols/`): create a KWin
|
||||
# virtual output sized to the client's resolution and get its PipeWire node (KRdp's path).
|
||||
# `wayland-backend` is referenced by the generated interface tables.
|
||||
wayland-scanner = "0.31"
|
||||
wayland-backend = "0.3"
|
||||
# Parse `pw-dump` JSON to find gamescope's PipeWire node (gamescope backend).
|
||||
serde_json = "1"
|
||||
# Builds/validates the xkb keymap uploaded to the virtual keyboard + tracks modifier state.
|
||||
xkbcommon = "0.8"
|
||||
# Opus encode for the GameStream audio stream (links system libopus).
|
||||
opus = "0.3"
|
||||
# libei (EI sender) for the portable input path on KWin/GNOME (RemoteDesktop portal).
|
||||
# The `tokio` feature wires reis's event stream into tokio's reactor.
|
||||
reis = { version = "0.6.1", features = ["tokio"] }
|
||||
# `StreamExt::next` on reis's tokio event stream in the libei worker loop.
|
||||
futures-util = "0.3"
|
||||
# Zero-copy capture (plan §9): EGL imports the PipeWire dmabuf, CUDA maps it, NVENC encodes
|
||||
# it with no CPU roundtrip. `khronos-egl` (dynamic = load the NVIDIA libEGL at runtime) gives
|
||||
# eglCreateImage + the dma_buf import; the CUDA driver API (EGL interop) and libgbm are linked
|
||||
# via hand-rolled FFI in `src/zerocopy/` (no Rust crate exposes the EGL-interop driver calls).
|
||||
khronos-egl = { version = "6", features = ["dynamic"] }
|
||||
# Vulkan bridge for LINEAR dmabufs (gamescope): import via VK_EXT_external_memory_dma_buf,
|
||||
# GPU-copy into an exportable allocation, export OPAQUE_FD → cuImportExternalMemory (the
|
||||
# officially-supported CUDA pairing; raw dmabuf fds are rejected by the desktop driver).
|
||||
ash = "0.38"
|
||||
@@ -0,0 +1,105 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<protocol name="zkde_screencast_unstable_v1">
|
||||
<copyright><![CDATA[
|
||||
SPDX-FileCopyrightText: 2020-2021 Aleix Pol Gonzalez <aleixpol@kde.org>
|
||||
|
||||
SPDX-License-Identifier: LGPL-2.1-or-later
|
||||
]]></copyright>
|
||||
<interface name="zkde_screencast_unstable_v1" version="6">
|
||||
<description summary="Protocol for managing PipeWire feeds of the different displays and windows">
|
||||
Warning! The protocol described in this file is a desktop environment
|
||||
implementation detail. Regular clients must not use this protocol.
|
||||
Backward incompatible changes may be added without bumping the major
|
||||
version of the extension.
|
||||
</description>
|
||||
|
||||
<enum name="pointer">
|
||||
<description summary="Stream consumer attachment attributes" />
|
||||
<entry name="hidden" value="1" summary="No cursor"/>
|
||||
<entry name="embedded" value="2" summary="Render the cursor on the stream"/>
|
||||
<entry name="metadata" value="4" summary="Send metadata about where the cursor is through PipeWire"/>
|
||||
</enum>
|
||||
|
||||
<request name="stream_output">
|
||||
<description summary="requests a feed from a given source"/>
|
||||
<arg name="stream" type="new_id" interface="zkde_screencast_stream_unstable_v1"/>
|
||||
<arg name="output" type="object" interface="wl_output"/>
|
||||
<arg name="pointer" type="uint" summary="Requested pointer mode"/>
|
||||
</request>
|
||||
<request name="stream_window">
|
||||
<description summary="requests a feed from a given source"/>
|
||||
<arg name="stream" type="new_id" interface="zkde_screencast_stream_unstable_v1"/>
|
||||
<arg name="window_uuid" type="string" summary="window Identifier"/>
|
||||
<arg name="pointer" type="uint" summary="Requested pointer mode"/>
|
||||
</request>
|
||||
|
||||
<request name="destroy" type="destructor">
|
||||
<description summary="Destroy the zkde_screencast_unstable_v1">
|
||||
Destroy the zkde_screencast_unstable_v1 object.
|
||||
</description>
|
||||
</request>
|
||||
|
||||
<request name="stream_virtual_output" since="2">
|
||||
<description summary="requests a feed from a new virtual output"/>
|
||||
<arg name="stream" type="new_id" interface="zkde_screencast_stream_unstable_v1"/>
|
||||
<arg name="name" type="string" summary="name of the created output"/>
|
||||
<arg name="width" type="int" summary="Logical width resolution"/>
|
||||
<arg name="height" type="int" summary="Logical height resolution"/>
|
||||
<arg name="scale" type="fixed" summary="Scaling factor of the display where it's to be displayed"/>
|
||||
<arg name="pointer" type="uint" summary="Requested pointer mode"/>
|
||||
</request>
|
||||
|
||||
<request name="stream_region" since="3">
|
||||
<description summary="requests a feed from region in the workspace">
|
||||
Since version 5, the compositor will choose the highest scale
|
||||
factor for the region if the given scale is 0.0.
|
||||
</description>
|
||||
<arg name="stream" type="new_id" interface="zkde_screencast_stream_unstable_v1"/>
|
||||
|
||||
<arg name="x" type="int" summary="Logical left position"/>
|
||||
<arg name="y" type="int" summary="Logical top position"/>
|
||||
<arg name="width" type="uint" summary="Logical width resolution"/>
|
||||
<arg name="height" type="uint" summary="Logical height resolution"/>
|
||||
<arg name="scale" type="fixed" summary="Scaling factor of the output recording"/>
|
||||
<arg name="pointer" type="uint" summary="Requested pointer mode"/>
|
||||
</request>
|
||||
|
||||
<request name="stream_virtual_output_with_description" since="4">
|
||||
<description summary="requests a feed from a new virtual output"/>
|
||||
<arg name="stream" type="new_id" interface="zkde_screencast_stream_unstable_v1"/>
|
||||
<arg name="name" type="string" summary="name of the created output"/>
|
||||
<arg name="description" type="string" summary="user visible description of the created output"/>
|
||||
<arg name="width" type="int" summary="Logical width resolution"/>
|
||||
<arg name="height" type="int" summary="Logical height resolution"/>
|
||||
<arg name="scale" type="fixed" summary="Scaling factor of the display where it's to be displayed"/>
|
||||
<arg name="pointer" type="uint" summary="Requested pointer mode"/>
|
||||
</request>
|
||||
</interface>
|
||||
|
||||
<interface name="zkde_screencast_stream_unstable_v1" version="6">
|
||||
<request name="close" type="destructor">
|
||||
<description summary="Indicates we are done with the stream and the communication is over."/>
|
||||
</request>
|
||||
<event name="closed">
|
||||
<description summary="Notifies that the server has stopped the stream. Clients should now call close."/>
|
||||
</event>
|
||||
<event name="created" deprecated-since="6">
|
||||
<description summary="Notifies about a pipewire feed being created">
|
||||
Deprecated since version 6, use the object serial from the serial event instead
|
||||
</description>
|
||||
<arg name="node" type="uint" summary="node of the pipewire buffer"/>
|
||||
</event>
|
||||
<event name="failed">
|
||||
<description summary="Offers an error message so the client knows the created event will not arrive, and the client should close the resource."/>
|
||||
<arg name="error" type="string" summary="A human readable translated error message."/>
|
||||
</event>
|
||||
<event name="serial" since="6">
|
||||
<description summary="the pipewire object serial">
|
||||
The pipewire object serial of the stream. Should be preferred over the node id which is prone to id reuse.
|
||||
Will be sent before the created event.
|
||||
</description>
|
||||
<arg name="object_serial_hi" type="uint" summary="high bits of the pipewire object serial"/>
|
||||
<arg name="object_serial_low" type="uint" summary="low bits of the pipewire object serial"/>
|
||||
</event>
|
||||
</interface>
|
||||
</protocol>
|
||||
@@ -0,0 +1,36 @@
|
||||
//! Desktop audio capture for the GameStream audio stream. On Linux: a PipeWire stream that
|
||||
//! records the default sink's monitor (i.e. everything playing out of the system), delivered
|
||||
//! as interleaved `f32` stereo PCM at 48 kHz. The audio data plane (`gamestream::audio`)
|
||||
//! reframes this into fixed Opus frames, encodes, and sends it.
|
||||
|
||||
use anyhow::Result;
|
||||
|
||||
/// Opus/GameStream audio is 48 kHz stereo.
|
||||
pub const SAMPLE_RATE: u32 = 48_000;
|
||||
pub const CHANNELS: usize = 2;
|
||||
|
||||
/// Produces interleaved `f32` stereo PCM (L,R,L,R,…) at [`SAMPLE_RATE`]. Lives on its own
|
||||
/// thread; never blocks the capture loop (drops if the consumer falls behind).
|
||||
pub trait AudioCapturer: Send {
|
||||
/// Block until the next chunk of interleaved samples is available (variable size). The
|
||||
/// caller reframes into fixed Opus frames.
|
||||
fn next_chunk(&mut self) -> Result<Vec<f32>>;
|
||||
|
||||
/// Discard any buffered chunks (called when a persistent capturer is reused for a new
|
||||
/// stream, so the client doesn't hear stale audio captured while idle). Default: no-op.
|
||||
fn drain(&mut self) {}
|
||||
}
|
||||
|
||||
/// Open a live capturer for the default sink monitor (system output) via PipeWire.
|
||||
#[cfg(target_os = "linux")]
|
||||
pub fn open_audio_capture() -> Result<Box<dyn AudioCapturer>> {
|
||||
linux::PwAudioCapturer::open().map(|c| Box::new(c) as Box<dyn AudioCapturer>)
|
||||
}
|
||||
|
||||
#[cfg(not(target_os = "linux"))]
|
||||
pub fn open_audio_capture() -> Result<Box<dyn AudioCapturer>> {
|
||||
anyhow::bail!("audio capture requires Linux + PipeWire")
|
||||
}
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
mod linux;
|
||||
@@ -0,0 +1,172 @@
|
||||
//! PipeWire audio capture of the default sink's monitor (system output).
|
||||
//!
|
||||
//! Connects to the user's PipeWire daemon (via `XDG_RUNTIME_DIR`, inherited from the Sway
|
||||
//! session) and opens an input stream with `stream.capture.sink=true`, which routes the
|
||||
//! default sink's monitor into us — no portal needed (unlike screen capture). The (`!Send`)
|
||||
//! MainLoop/Stream live on a dedicated thread; interleaved `f32` chunks leave over a bounded
|
||||
//! channel (dropped if the encoder falls behind, never blocking the PipeWire loop).
|
||||
|
||||
use super::{AudioCapturer, CHANNELS, SAMPLE_RATE};
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use std::sync::mpsc::{sync_channel, Receiver, RecvTimeoutError};
|
||||
use std::thread;
|
||||
use std::time::Duration;
|
||||
|
||||
pub struct PwAudioCapturer {
|
||||
chunks: Receiver<Vec<f32>>,
|
||||
}
|
||||
|
||||
impl PwAudioCapturer {
|
||||
pub fn open() -> Result<PwAudioCapturer> {
|
||||
let (tx, rx) = sync_channel::<Vec<f32>>(64);
|
||||
thread::Builder::new()
|
||||
.name("punktfunk-pw-audio".into())
|
||||
.spawn(move || {
|
||||
if let Err(e) = pw_thread(tx) {
|
||||
tracing::error!(error = %format!("{e:#}"), "pipewire audio thread failed");
|
||||
}
|
||||
})
|
||||
.context("spawn pipewire audio thread")?;
|
||||
Ok(PwAudioCapturer { chunks: rx })
|
||||
}
|
||||
}
|
||||
|
||||
impl AudioCapturer for PwAudioCapturer {
|
||||
fn next_chunk(&mut self) -> Result<Vec<f32>> {
|
||||
match self.chunks.recv_timeout(Duration::from_secs(5)) {
|
||||
Ok(c) => Ok(c),
|
||||
Err(RecvTimeoutError::Timeout) => Err(anyhow!("no PipeWire audio within 5s")),
|
||||
Err(RecvTimeoutError::Disconnected) => Err(anyhow!("pipewire audio thread ended")),
|
||||
}
|
||||
}
|
||||
|
||||
fn drain(&mut self) {
|
||||
while self.chunks.try_recv().is_ok() {}
|
||||
}
|
||||
}
|
||||
|
||||
fn pw_thread(tx: std::sync::mpsc::SyncSender<Vec<f32>>) -> Result<()> {
|
||||
use pipewire as pw;
|
||||
use pw::{properties::properties, spa};
|
||||
use spa::param::audio::{AudioFormat, AudioInfoRaw};
|
||||
use spa::pod::Pod;
|
||||
|
||||
crate::pwinit::ensure_init();
|
||||
let mainloop = pw::main_loop::MainLoopRc::new(None).context("pw audio MainLoop")?;
|
||||
let context = pw::context::ContextRc::new(&mainloop, None).context("pw audio Context")?;
|
||||
let core = context
|
||||
.connect_rc(None)
|
||||
.context("pw audio connect (is PipeWire running in this session?)")?;
|
||||
|
||||
let stream = pw::stream::StreamBox::new(
|
||||
&core,
|
||||
"punktfunk-audio",
|
||||
properties! {
|
||||
*pw::keys::MEDIA_TYPE => "Audio",
|
||||
*pw::keys::MEDIA_CATEGORY => "Capture",
|
||||
*pw::keys::MEDIA_ROLE => "Music",
|
||||
// Capture the default sink's monitor (system output), not a microphone.
|
||||
*pw::keys::STREAM_CAPTURE_SINK => "true",
|
||||
// Ask for a ~5ms quantum (= one Opus frame) so buffers arrive smoothly rather than
|
||||
// in large bursts the client's low-latency jitter buffer would hear as glitching.
|
||||
*pw::keys::NODE_LATENCY => "240/48000",
|
||||
},
|
||||
)
|
||||
.context("pw audio Stream")?;
|
||||
|
||||
let _listener = stream
|
||||
.add_local_listener_with_user_data(tx)
|
||||
.state_changed(|_s, _ud, old, new| {
|
||||
tracing::info!(?old, ?new, "pipewire audio stream state");
|
||||
})
|
||||
.param_changed(|_stream, _tx, id, param| {
|
||||
let Some(param) = param else { return };
|
||||
if id != pw::spa::param::ParamType::Format.as_raw() {
|
||||
return;
|
||||
}
|
||||
let mut info = AudioInfoRaw::default();
|
||||
if info.parse(param).is_ok() {
|
||||
tracing::info!(
|
||||
format = ?info.format(),
|
||||
rate = info.rate(),
|
||||
channels = info.channels(),
|
||||
"audio format negotiated"
|
||||
);
|
||||
}
|
||||
})
|
||||
.process(|stream, tx| {
|
||||
let outcome = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
|
||||
let Some(mut buffer) = stream.dequeue_buffer() else {
|
||||
return;
|
||||
};
|
||||
let datas = buffer.datas_mut();
|
||||
if datas.is_empty() {
|
||||
return;
|
||||
}
|
||||
let d = &mut datas[0];
|
||||
let (offset, size) = {
|
||||
let c = d.chunk();
|
||||
(c.offset() as usize, c.size() as usize)
|
||||
};
|
||||
let Some(buf) = d.data() else { return };
|
||||
if offset > buf.len() {
|
||||
return;
|
||||
}
|
||||
let region = &buf[offset..(offset + size).min(buf.len())];
|
||||
// Negotiated as F32LE; reinterpret the byte region as interleaved f32.
|
||||
let n = region.len() / 4;
|
||||
static FIRST: std::sync::atomic::AtomicBool =
|
||||
std::sync::atomic::AtomicBool::new(true);
|
||||
if FIRST.swap(false, std::sync::atomic::Ordering::Relaxed) {
|
||||
tracing::info!(samples = n, frames = n / 2, "audio first capture buffer");
|
||||
}
|
||||
let mut samples = Vec::with_capacity(n);
|
||||
for i in 0..n {
|
||||
let b = [
|
||||
region[i * 4],
|
||||
region[i * 4 + 1],
|
||||
region[i * 4 + 2],
|
||||
region[i * 4 + 3],
|
||||
];
|
||||
samples.push(f32::from_le_bytes(b));
|
||||
}
|
||||
let _ = tx.try_send(samples); // drop if the encoder is behind
|
||||
}));
|
||||
if outcome.is_err() {
|
||||
tracing::error!("panic in pipewire audio callback — chunk dropped");
|
||||
}
|
||||
})
|
||||
.register()
|
||||
.context("register audio stream listener")?;
|
||||
|
||||
// Request F32LE, 48 kHz, stereo.
|
||||
let mut info = AudioInfoRaw::new();
|
||||
info.set_format(AudioFormat::F32LE);
|
||||
info.set_rate(SAMPLE_RATE);
|
||||
info.set_channels(CHANNELS as u32);
|
||||
let obj = pw::spa::pod::Object {
|
||||
type_: pw::spa::utils::SpaTypes::ObjectParamFormat.as_raw(),
|
||||
id: pw::spa::param::ParamType::EnumFormat.as_raw(),
|
||||
properties: info.into(),
|
||||
};
|
||||
let values: Vec<u8> = pw::spa::pod::serialize::PodSerializer::serialize(
|
||||
std::io::Cursor::new(Vec::new()),
|
||||
&pw::spa::pod::Value::Object(obj),
|
||||
)
|
||||
.context("serialize audio format pod")?
|
||||
.0
|
||||
.into_inner();
|
||||
let mut params = [Pod::from_bytes(&values).context("audio pod from bytes")?];
|
||||
|
||||
stream
|
||||
.connect(
|
||||
spa::utils::Direction::Input,
|
||||
None, // PW_ID_ANY — autoconnect to the default sink monitor
|
||||
pw::stream::StreamFlags::AUTOCONNECT | pw::stream::StreamFlags::MAP_BUFFERS,
|
||||
&mut params,
|
||||
)
|
||||
.context("pw audio stream connect")?;
|
||||
|
||||
mainloop.run();
|
||||
Ok(())
|
||||
}
|
||||
@@ -0,0 +1,236 @@
|
||||
//! Frame capture (plan §7). On Linux: a PipeWire ScreenCast portal stream. M0 uses the
|
||||
//! CPU-copy fallback (the portal delivers a CPU buffer; the encoder uploads it to the GPU
|
||||
//! internally). Zero-copy dmabuf→NVENC import is deferred (plan §9 risk).
|
||||
|
||||
use anyhow::Result;
|
||||
|
||||
/// Packed pixel layout of a [`CapturedFrame`]. The ScreenCast portal negotiates the
|
||||
/// format; on wlroots it is commonly packed `RGB` (3 bytes/pixel). The encoder maps these
|
||||
/// to an NVENC-accepted input format (`rgb0`/`bgr0`/`rgba`/`bgra`), expanding 3→4 bytes
|
||||
/// where needed — no host-side colour conversion.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub enum PixelFormat {
|
||||
/// `[B,G,R,x]`, 4 bpp.
|
||||
Bgrx,
|
||||
/// `[R,G,B,x]`, 4 bpp.
|
||||
Rgbx,
|
||||
/// `[B,G,R,A]`, 4 bpp.
|
||||
Bgra,
|
||||
/// `[R,G,B,A]`, 4 bpp.
|
||||
Rgba,
|
||||
/// `[R,G,B]`, 3 bpp.
|
||||
Rgb,
|
||||
/// `[B,G,R]`, 3 bpp.
|
||||
Bgr,
|
||||
}
|
||||
|
||||
impl PixelFormat {
|
||||
pub fn bytes_per_pixel(self) -> usize {
|
||||
match self {
|
||||
PixelFormat::Rgb | PixelFormat::Bgr => 3,
|
||||
_ => 4,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A captured frame. [`format`](Self::format)/dimensions describe the pixels regardless of
|
||||
/// where they live — [`payload`](Self::payload) is either a CPU buffer (the M0/fallback path)
|
||||
/// or a GPU buffer already on the device (the zero-copy path, plan §9).
|
||||
pub struct CapturedFrame {
|
||||
pub width: u32,
|
||||
pub height: u32,
|
||||
pub pts_ns: u64,
|
||||
/// Pixel layout of the payload.
|
||||
pub format: PixelFormat,
|
||||
pub payload: FramePayload,
|
||||
}
|
||||
|
||||
/// Where a captured frame's pixels live.
|
||||
pub enum FramePayload {
|
||||
/// Tightly-packed CPU pixels in `format`, `width*height*bytes_per_pixel` (no row padding).
|
||||
Cpu(Vec<u8>),
|
||||
/// A pitched GPU buffer (BGRA-order, on the shared CUDA context) — the zero-copy path. The
|
||||
/// dmabuf has already been imported + copied into this owned device buffer.
|
||||
#[cfg(target_os = "linux")]
|
||||
Cuda(crate::zerocopy::DeviceBuffer),
|
||||
}
|
||||
|
||||
impl CapturedFrame {
|
||||
/// True if the frame's pixels are a GPU/CUDA buffer (the zero-copy path).
|
||||
pub fn is_cuda(&self) -> bool {
|
||||
#[cfg(target_os = "linux")]
|
||||
{
|
||||
matches!(self.payload, FramePayload::Cuda(_))
|
||||
}
|
||||
#[cfg(not(target_os = "linux"))]
|
||||
{
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Produces frames from a captured output. Lives on its own thread, feeding the encoder
|
||||
/// over a bounded drop-oldest channel (never block the compositor).
|
||||
pub trait Capturer: Send {
|
||||
fn next_frame(&mut self) -> Result<CapturedFrame>;
|
||||
|
||||
/// Non-blocking: the freshest frame available since the last call, or `None` if none has
|
||||
/// arrived (the caller reuses its last frame to hold a steady output rate). The default
|
||||
/// just produces a frame each call — fine for instant synthetic sources; the portal
|
||||
/// overrides it to drain its channel without blocking.
|
||||
fn try_latest(&mut self) -> Result<Option<CapturedFrame>> {
|
||||
self.next_frame().map(Some)
|
||||
}
|
||||
|
||||
/// Gate expensive per-frame work so the capturer can be kept alive (reused) between
|
||||
/// streams without burning CPU. The portal capturer skips the de-pad copy while inactive;
|
||||
/// the default is a no-op (synthetic sources are produced on demand). Set `true` for the
|
||||
/// duration of a stream, `false` when it ends.
|
||||
fn set_active(&self, _active: bool) {}
|
||||
}
|
||||
|
||||
/// A deterministic moving test pattern (BGRx). Lets M0 exercise the encode → file →
|
||||
/// `punktfunk_core` path with no live capture session, and produces obviously non-static
|
||||
/// content (a sweeping bar + animated gradient) so the encoded output is verifiable.
|
||||
pub struct SyntheticCapturer {
|
||||
width: u32,
|
||||
height: u32,
|
||||
fps: u32,
|
||||
frame_idx: u64,
|
||||
buf: Vec<u8>,
|
||||
}
|
||||
|
||||
impl SyntheticCapturer {
|
||||
const BPP: usize = 4; // emits BGRx
|
||||
|
||||
pub fn new(width: u32, height: u32, fps: u32) -> Self {
|
||||
assert!(width > 0 && height > 0 && fps > 0);
|
||||
let buf = vec![0u8; width as usize * height as usize * Self::BPP];
|
||||
SyntheticCapturer {
|
||||
width,
|
||||
height,
|
||||
fps,
|
||||
frame_idx: 0,
|
||||
buf,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Capturer for SyntheticCapturer {
|
||||
fn next_frame(&mut self) -> Result<CapturedFrame> {
|
||||
let w = self.width as usize;
|
||||
let h = self.height as usize;
|
||||
let bpp = Self::BPP;
|
||||
let t = self.frame_idx;
|
||||
// A vertical bar sweeps left→right once every ~2s; the background is a gradient
|
||||
// whose phase advances each frame, so every pixel changes frame-to-frame.
|
||||
let bar_x = ((t * w as u64) / (self.fps as u64 * 2)) % w as u64;
|
||||
let phase = (t % 256) as usize;
|
||||
for y in 0..h {
|
||||
let row = y * w * bpp;
|
||||
for x in 0..w {
|
||||
let i = row + x * bpp;
|
||||
let on_bar = (x as u64).abs_diff(bar_x) < 8;
|
||||
// BGRx byte order: [B, G, R, x]
|
||||
self.buf[i] = if on_bar {
|
||||
255
|
||||
} else {
|
||||
((x + phase) & 0xff) as u8
|
||||
};
|
||||
self.buf[i + 1] = if on_bar {
|
||||
255
|
||||
} else {
|
||||
((y + phase) & 0xff) as u8
|
||||
};
|
||||
self.buf[i + 2] = if on_bar { 255 } else { ((x + y) & 0xff) as u8 };
|
||||
self.buf[i + 3] = 0;
|
||||
}
|
||||
}
|
||||
let pts_ns = self.frame_idx * 1_000_000_000 / self.fps as u64;
|
||||
self.frame_idx += 1;
|
||||
Ok(CapturedFrame {
|
||||
width: self.width,
|
||||
height: self.height,
|
||||
pts_ns,
|
||||
format: PixelFormat::Bgrx,
|
||||
payload: FramePayload::Cpu(self.buf.clone()),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// A cheap moving test pattern (BGRx) for the streaming path: a pulsing field + a white band
|
||||
/// sweeping down, generated with whole-buffer `fill`s so it stays real-time even at 5K.
|
||||
pub struct FastSyntheticCapturer {
|
||||
width: u32,
|
||||
height: u32,
|
||||
frame_idx: u64,
|
||||
buf: Vec<u8>,
|
||||
}
|
||||
|
||||
impl FastSyntheticCapturer {
|
||||
pub fn new(width: u32, height: u32) -> Self {
|
||||
assert!(width > 0 && height > 0);
|
||||
FastSyntheticCapturer {
|
||||
width,
|
||||
height,
|
||||
frame_idx: 0,
|
||||
buf: vec![0u8; width as usize * height as usize * 4],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Capturer for FastSyntheticCapturer {
|
||||
fn next_frame(&mut self) -> Result<CapturedFrame> {
|
||||
let (w, h) = (self.width as usize, self.height as usize);
|
||||
let row = w * 4;
|
||||
let shade = (self.frame_idx % 256) as u8;
|
||||
self.buf.fill(shade);
|
||||
let band_h = (h / 20).max(1);
|
||||
let band_y = (self.frame_idx as usize * 6) % h;
|
||||
for y in band_y..(band_y + band_h).min(h) {
|
||||
self.buf[y * row..(y + 1) * row].fill(0xff);
|
||||
}
|
||||
self.frame_idx += 1;
|
||||
Ok(CapturedFrame {
|
||||
width: self.width,
|
||||
height: self.height,
|
||||
pts_ns: 0,
|
||||
format: PixelFormat::Bgrx,
|
||||
payload: FramePayload::Cpu(self.buf.clone()),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Open a live capturer for a client-sized monitor via the xdg ScreenCast portal
|
||||
/// (`ashpd`) → PipeWire (`pipewire`). Implemented in the `linux` submodule.
|
||||
#[cfg(target_os = "linux")]
|
||||
pub fn open_portal_monitor() -> Result<Box<dyn Capturer>> {
|
||||
// On RemoteDesktop-capable desktops (KWin/GNOME) anchor ScreenCast to a RemoteDesktop
|
||||
// session so it inherits that grant headlessly; wlroots/Sway has no RemoteDesktop portal,
|
||||
// so use a plain ScreenCast session there.
|
||||
let anchored = crate::inject::default_backend() == crate::inject::Backend::Libei;
|
||||
linux::PortalCapturer::open(anchored).map(|c| Box::new(c) as Box<dyn Capturer>)
|
||||
}
|
||||
|
||||
#[cfg(not(target_os = "linux"))]
|
||||
pub fn open_portal_monitor() -> Result<Box<dyn Capturer>> {
|
||||
anyhow::bail!("portal capture requires Linux (xdg-desktop-portal + PipeWire)")
|
||||
}
|
||||
|
||||
/// Build a capturer from an already-created virtual output (see [`crate::vdisplay`]). Consumes
|
||||
/// the output's PipeWire node + optional remote fd + keepalive — the capturer owns the keepalive,
|
||||
/// so dropping the capturer releases the virtual output. Compositor-agnostic: works for any
|
||||
/// [`crate::vdisplay::VirtualDisplay`] backend. The captured size is the size the output was
|
||||
/// created at — native, no scaling.
|
||||
#[cfg(target_os = "linux")]
|
||||
pub fn capture_virtual_output(vout: crate::vdisplay::VirtualOutput) -> Result<Box<dyn Capturer>> {
|
||||
linux::PortalCapturer::from_virtual_output(vout).map(|c| Box::new(c) as Box<dyn Capturer>)
|
||||
}
|
||||
|
||||
#[cfg(not(target_os = "linux"))]
|
||||
pub fn capture_virtual_output(_vout: crate::vdisplay::VirtualOutput) -> Result<Box<dyn Capturer>> {
|
||||
anyhow::bail!("virtual-output capture requires Linux")
|
||||
}
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
mod linux;
|
||||
@@ -0,0 +1,963 @@
|
||||
//! Live capture: xdg ScreenCast portal (`ashpd`) → PipeWire (`pipewire`), CPU-copy path.
|
||||
//!
|
||||
//! Two dedicated threads, because both stacks are tied to their thread:
|
||||
//! * **portal thread** drives the async ashpd handshake on a multi-thread tokio runtime
|
||||
//! (control plane — never the per-frame path), then parks on a pending future so the
|
||||
//! `proxy` + its zbus connection stay alive (the cast is torn down when that connection
|
||||
//! drops; ashpd's `Session` has no `Drop`);
|
||||
//! * **pipewire thread** owns the (`!Send`) MainLoop/Stream and pumps frames.
|
||||
//!
|
||||
//! The portal hands the PipeWire remote fd + node id to the pipewire thread; decoded BGRx
|
||||
//! frames leave the pipewire thread over a bounded channel. The authoritative frame size
|
||||
//! comes from the negotiated PipeWire format, not the portal's size hint.
|
||||
//!
|
||||
//! Cleanup note (M0): the two threads are detached and torn down at process exit. A
|
||||
//! graceful stop (pipewire `channel` quit + Session close) belongs with the M2 session
|
||||
//! lifecycle.
|
||||
|
||||
use super::{CapturedFrame, Capturer, FramePayload, PixelFormat};
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use std::os::fd::OwnedFd;
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::sync::mpsc::{sync_channel, Receiver, RecvTimeoutError, TryRecvError};
|
||||
use std::sync::Arc;
|
||||
use std::thread;
|
||||
use std::time::Duration;
|
||||
|
||||
/// Live monitor capturer backed by the portal + PipeWire threads. Kept alive (reused) across
|
||||
/// streams — [`set_active`](Capturer::set_active) gates the per-frame de-pad copy so it costs
|
||||
/// almost nothing between streams while the screencast session stays up (instant reconnect,
|
||||
/// and no second session to conflict with).
|
||||
pub struct PortalCapturer {
|
||||
frames: Receiver<CapturedFrame>,
|
||||
active: Arc<AtomicBool>,
|
||||
/// Owns the virtual output (if this capturer was built from one) — dropped when the capturer
|
||||
/// is, releasing the compositor-side output via the keepalive's own `Drop`. `None` for the
|
||||
/// portal source (its session ends with the portal thread's zbus connection).
|
||||
_keepalive: Option<Box<dyn Send>>,
|
||||
}
|
||||
|
||||
impl PortalCapturer {
|
||||
/// `anchored` drives ScreenCast off a RemoteDesktop session (KWin/GNOME) so it inherits the
|
||||
/// RemoteDesktop grant and never raises a separate ScreenCast dialog; `false` uses a plain
|
||||
/// ScreenCast session (wlroots, which has no RemoteDesktop portal).
|
||||
pub fn open(anchored: bool) -> Result<PortalCapturer> {
|
||||
// Portal handshake (async) on its own thread; hands back the PW fd + node id.
|
||||
let (setup_tx, setup_rx) = std::sync::mpsc::channel::<Result<(OwnedFd, u32), String>>();
|
||||
thread::Builder::new()
|
||||
.name("punktfunk-portal".into())
|
||||
.spawn(move || {
|
||||
if anchored {
|
||||
portal_thread_remote_desktop(setup_tx)
|
||||
} else {
|
||||
portal_thread(setup_tx)
|
||||
}
|
||||
})
|
||||
.context("spawn portal thread")?;
|
||||
|
||||
let (fd, node_id) = match setup_rx.recv_timeout(Duration::from_secs(20)) {
|
||||
Ok(Ok(v)) => v,
|
||||
Ok(Err(e)) => return Err(anyhow!("ScreenCast portal setup failed: {e}")),
|
||||
Err(_) => return Err(anyhow!("timed out waiting for the ScreenCast portal")),
|
||||
};
|
||||
tracing::info!(
|
||||
node_id,
|
||||
"ScreenCast portal session started; connecting PipeWire"
|
||||
);
|
||||
let (frames, active) = spawn_pipewire(Some(fd), node_id, None)?;
|
||||
Ok(PortalCapturer {
|
||||
frames,
|
||||
active,
|
||||
_keepalive: None,
|
||||
})
|
||||
}
|
||||
|
||||
/// Build a capturer from an already-created virtual output ([`crate::vdisplay::VirtualOutput`]):
|
||||
/// connect PipeWire to its node (`remote_fd` selects portal-remote vs. default-daemon) and
|
||||
/// take ownership of its keepalive so the output lives exactly as long as this capturer. This
|
||||
/// is how the client's requested resolution becomes the captured resolution without scaling.
|
||||
pub fn from_virtual_output(vout: crate::vdisplay::VirtualOutput) -> Result<PortalCapturer> {
|
||||
tracing::info!(
|
||||
node_id = vout.node_id,
|
||||
"connecting PipeWire to virtual output"
|
||||
);
|
||||
let (frames, active) = spawn_pipewire(vout.remote_fd, vout.node_id, vout.preferred_mode)?;
|
||||
Ok(PortalCapturer {
|
||||
frames,
|
||||
active,
|
||||
_keepalive: Some(vout.keepalive),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Spawn the PipeWire consumer thread for `node_id` (fd `Some` = portal remote, `None` =
|
||||
/// default daemon) and return the frame channel + the activation flag it gates on.
|
||||
/// `preferred` seeds the format negotiation's default size/framerate — for Mutter virtual
|
||||
/// monitors this is what actually sizes the monitor.
|
||||
fn spawn_pipewire(
|
||||
fd: Option<OwnedFd>,
|
||||
node_id: u32,
|
||||
preferred: Option<(u32, u32, u32)>,
|
||||
) -> Result<(Receiver<CapturedFrame>, Arc<AtomicBool>)> {
|
||||
// Frames flow from the pipewire thread over a small bounded channel.
|
||||
let (frame_tx, frame_rx) = sync_channel::<CapturedFrame>(8);
|
||||
let active = Arc::new(AtomicBool::new(false));
|
||||
let active_cb = active.clone();
|
||||
let zerocopy = crate::zerocopy::enabled();
|
||||
thread::Builder::new()
|
||||
.name("punktfunk-pipewire".into())
|
||||
.spawn(move || {
|
||||
if let Err(e) =
|
||||
pipewire::pipewire_thread(fd, node_id, frame_tx, active_cb, zerocopy, preferred)
|
||||
{
|
||||
tracing::error!(error = %format!("{e:#}"), "pipewire capture thread failed");
|
||||
}
|
||||
})
|
||||
.context("spawn pipewire thread")?;
|
||||
Ok((frame_rx, active))
|
||||
}
|
||||
|
||||
impl Capturer for PortalCapturer {
|
||||
fn next_frame(&mut self) -> Result<CapturedFrame> {
|
||||
// First frame can lag behind format negotiation; later frames arrive at ~fps.
|
||||
match self.frames.recv_timeout(Duration::from_secs(10)) {
|
||||
Ok(frame) => Ok(frame),
|
||||
Err(RecvTimeoutError::Timeout) => Err(anyhow!("no PipeWire frame within 10s")),
|
||||
Err(RecvTimeoutError::Disconnected) => Err(anyhow!("PipeWire capture thread ended")),
|
||||
}
|
||||
}
|
||||
|
||||
fn try_latest(&mut self) -> Result<Option<CapturedFrame>> {
|
||||
// Drain to the newest queued frame without blocking; `None` means the compositor
|
||||
// hasn't produced a new frame since last call (static/idle desktop).
|
||||
let mut latest = None;
|
||||
loop {
|
||||
match self.frames.try_recv() {
|
||||
Ok(frame) => latest = Some(frame),
|
||||
Err(TryRecvError::Empty) => break,
|
||||
Err(TryRecvError::Disconnected) => {
|
||||
return Err(anyhow!("PipeWire capture thread ended"))
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(latest)
|
||||
}
|
||||
|
||||
fn set_active(&self, active: bool) {
|
||||
self.active.store(active, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
|
||||
/// The portal handshake: connect ScreenCast, select a single monitor, start, open the
|
||||
/// PipeWire remote, hand the fd + node id back, then keep the session alive.
|
||||
fn portal_thread(setup_tx: std::sync::mpsc::Sender<Result<(OwnedFd, u32), String>>) {
|
||||
use ashpd::desktop::screencast::{CursorMode, Screencast, SelectSourcesOptions, SourceType};
|
||||
use ashpd::desktop::PersistMode;
|
||||
use ashpd::enumflags2::BitFlags;
|
||||
|
||||
// Multi-thread runtime: the zbus connection's background reader must be pumped
|
||||
// continuously across the create_session → select_sources → start handshake, or the
|
||||
// portal reports "Invalid session". (A current-thread runtime starves it.)
|
||||
let rt = match tokio::runtime::Builder::new_multi_thread()
|
||||
.worker_threads(2)
|
||||
.enable_all()
|
||||
.build()
|
||||
{
|
||||
Ok(rt) => rt,
|
||||
Err(e) => {
|
||||
let _ = setup_tx.send(Err(format!("build tokio runtime: {e}")));
|
||||
return;
|
||||
}
|
||||
};
|
||||
let err_tx = setup_tx.clone();
|
||||
|
||||
rt.block_on(async move {
|
||||
let result: Result<()> = async {
|
||||
let proxy = Screencast::new()
|
||||
.await
|
||||
.context("connect ScreenCast portal")?;
|
||||
let session = proxy
|
||||
.create_session(Default::default())
|
||||
.await
|
||||
.context("create_session")?;
|
||||
proxy
|
||||
.select_sources(
|
||||
&session,
|
||||
SelectSourcesOptions::default()
|
||||
.set_cursor_mode(CursorMode::Embedded)
|
||||
// Only MONITOR is offered by the wlroots backend
|
||||
// (AvailableSourceTypes=1); requesting unsupported types
|
||||
// invalidates the session.
|
||||
.set_sources(BitFlags::from_flag(SourceType::Monitor))
|
||||
.set_multiple(false)
|
||||
.set_persist_mode(PersistMode::DoNot),
|
||||
)
|
||||
.await
|
||||
.context("select_sources")?
|
||||
.response()
|
||||
.context("select_sources rejected (unsupported source type / cursor mode?)")?;
|
||||
let streams = proxy
|
||||
.start(&session, None, Default::default())
|
||||
.await
|
||||
.context("start cast")?
|
||||
.response()
|
||||
.context("start response (chooser cancelled? portal misconfigured?)")?;
|
||||
let stream = streams
|
||||
.streams()
|
||||
.first()
|
||||
.context("portal returned no streams")?
|
||||
.clone();
|
||||
let node_id = stream.pipe_wire_node_id();
|
||||
let fd = proxy
|
||||
.open_pipe_wire_remote(&session, Default::default())
|
||||
.await
|
||||
.context("open_pipe_wire_remote")?;
|
||||
|
||||
setup_tx
|
||||
.send(Ok((fd, node_id)))
|
||||
.map_err(|_| anyhow!("capturer dropped before setup completed"))?;
|
||||
|
||||
// Keep `proxy` + `session` (and the underlying zbus connection) alive for the
|
||||
// capture; the cast is torn down when the connection drops (ashpd's `Session`
|
||||
// has no `Drop`), which here happens at process exit.
|
||||
let _keep_alive = (&proxy, &session);
|
||||
std::future::pending::<()>().await;
|
||||
Ok(())
|
||||
}
|
||||
.await;
|
||||
|
||||
if let Err(e) = result {
|
||||
let _ = err_tx.send(Err(format!("{e:#}")));
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/// Combined RemoteDesktop+ScreenCast portal setup (KWin/GNOME). ScreenCast sources are selected
|
||||
/// on a session created via RemoteDesktop, so a single RemoteDesktop `start` grant —
|
||||
/// pre-authorized headlessly via the `kde-authorized` permission, exactly like the libei input
|
||||
/// path — also covers screen capture, with no separate ScreenCast dialog (which has no such
|
||||
/// bypass). Yields the same PipeWire fd + node id as the standalone path; the consumer is
|
||||
/// identical.
|
||||
fn portal_thread_remote_desktop(setup_tx: std::sync::mpsc::Sender<Result<(OwnedFd, u32), String>>) {
|
||||
use ashpd::desktop::remote_desktop::{DeviceType, RemoteDesktop, SelectDevicesOptions};
|
||||
use ashpd::desktop::screencast::{CursorMode, Screencast, SelectSourcesOptions, SourceType};
|
||||
use ashpd::desktop::PersistMode;
|
||||
use ashpd::enumflags2::BitFlags;
|
||||
|
||||
let rt = match tokio::runtime::Builder::new_multi_thread()
|
||||
.worker_threads(2)
|
||||
.enable_all()
|
||||
.build()
|
||||
{
|
||||
Ok(rt) => rt,
|
||||
Err(e) => {
|
||||
let _ = setup_tx.send(Err(format!("build tokio runtime: {e}")));
|
||||
return;
|
||||
}
|
||||
};
|
||||
let err_tx = setup_tx.clone();
|
||||
|
||||
rt.block_on(async move {
|
||||
let result: Result<()> = async {
|
||||
let remote = RemoteDesktop::new()
|
||||
.await
|
||||
.context("connect RemoteDesktop portal")?;
|
||||
let screencast = Screencast::new()
|
||||
.await
|
||||
.context("connect ScreenCast portal")?;
|
||||
let session = remote
|
||||
.create_session(Default::default())
|
||||
.await
|
||||
.context("create RemoteDesktop session")?;
|
||||
// RemoteDesktop requires a device selection; we never connect_to_eis on this session
|
||||
// (input injection runs its own), but selecting devices is what makes `start` the
|
||||
// RemoteDesktop grant the kde-authorized bypass covers.
|
||||
remote
|
||||
.select_devices(
|
||||
&session,
|
||||
SelectDevicesOptions::default()
|
||||
.set_devices(DeviceType::Keyboard | DeviceType::Pointer)
|
||||
.set_persist_mode(PersistMode::DoNot),
|
||||
)
|
||||
.await
|
||||
.context("select_devices")?
|
||||
.response()
|
||||
.context("select_devices rejected")?;
|
||||
screencast
|
||||
.select_sources(
|
||||
&session,
|
||||
SelectSourcesOptions::default()
|
||||
.set_cursor_mode(CursorMode::Embedded)
|
||||
.set_sources(BitFlags::from_flag(SourceType::Monitor))
|
||||
.set_multiple(false)
|
||||
.set_persist_mode(PersistMode::DoNot),
|
||||
)
|
||||
.await
|
||||
.context("select_sources")?
|
||||
.response()
|
||||
.context("select_sources rejected (unsupported source type?)")?;
|
||||
let streams = remote
|
||||
.start(&session, None, Default::default())
|
||||
.await
|
||||
.context("start RemoteDesktop+ScreenCast")?
|
||||
.response()
|
||||
.context("start response (grant not pre-authorized / headless dialog?)")?;
|
||||
let stream = streams
|
||||
.streams()
|
||||
.first()
|
||||
.context("portal returned no screencast streams")?
|
||||
.clone();
|
||||
let node_id = stream.pipe_wire_node_id();
|
||||
let fd = screencast
|
||||
.open_pipe_wire_remote(&session, Default::default())
|
||||
.await
|
||||
.context("open_pipe_wire_remote")?;
|
||||
|
||||
setup_tx
|
||||
.send(Ok((fd, node_id)))
|
||||
.map_err(|_| anyhow!("capturer dropped before setup completed"))?;
|
||||
|
||||
// Keep the proxies + session (and their zbus connection) alive for the capture.
|
||||
let _keep_alive = (&remote, &screencast, &session);
|
||||
std::future::pending::<()>().await;
|
||||
Ok(())
|
||||
}
|
||||
.await;
|
||||
|
||||
if let Err(e) = result {
|
||||
let _ = err_tx.send(Err(format!("{e:#}")));
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
mod pipewire {
|
||||
//! The PipeWire consumer, confined to its own thread (the PW types are `!Send`).
|
||||
|
||||
use super::{CapturedFrame, FramePayload, PixelFormat};
|
||||
use anyhow::{Context, Result};
|
||||
use pipewire as pw;
|
||||
use pw::{properties::properties, spa};
|
||||
use std::os::fd::OwnedFd;
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::sync::mpsc::SyncSender;
|
||||
use std::sync::Arc;
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
|
||||
use spa::param::video::{VideoFormat, VideoInfoRaw};
|
||||
use spa::pod::Pod;
|
||||
|
||||
/// Map a negotiated SPA video format to a layout the encoder can consume. Returns
|
||||
/// `None` for formats we don't handle (the frame is then skipped).
|
||||
fn map_format(f: VideoFormat) -> Option<PixelFormat> {
|
||||
Some(match f {
|
||||
VideoFormat::BGRx => PixelFormat::Bgrx,
|
||||
VideoFormat::RGBx => PixelFormat::Rgbx,
|
||||
VideoFormat::BGRA => PixelFormat::Bgra,
|
||||
VideoFormat::RGBA => PixelFormat::Rgba,
|
||||
VideoFormat::RGB => PixelFormat::Rgb,
|
||||
VideoFormat::BGR => PixelFormat::Bgr,
|
||||
_ => return None,
|
||||
})
|
||||
}
|
||||
|
||||
struct UserData {
|
||||
info: VideoInfoRaw,
|
||||
/// Negotiated layout (`None` until param_changed, or if unsupported).
|
||||
format: Option<PixelFormat>,
|
||||
/// Negotiated DRM format modifier (for dmabuf import); 0 = LINEAR.
|
||||
modifier: u64,
|
||||
tx: SyncSender<CapturedFrame>,
|
||||
/// When false (no active stream), skip the de-pad copy — the buffer is just released.
|
||||
active: Arc<AtomicBool>,
|
||||
/// Present when zero-copy is enabled: imports a dmabuf → CUDA device buffer.
|
||||
importer: Option<crate::zerocopy::EglImporter>,
|
||||
}
|
||||
|
||||
/// Log a frame-drop reason once per process (the process callback runs per frame; a stuck
|
||||
/// pipeline must say why without flooding).
|
||||
fn warn_once(msg: &'static str) {
|
||||
use std::sync::Mutex;
|
||||
static SEEN: Mutex<Vec<&'static str>> = Mutex::new(Vec::new());
|
||||
let mut seen = SEEN.lock().unwrap();
|
||||
if !seen.contains(&msg) {
|
||||
seen.push(msg);
|
||||
tracing::warn!("{msg}");
|
||||
}
|
||||
}
|
||||
|
||||
/// A read-only mmap of a dmabuf fd, unmapped on drop. Used when MAP_BUFFERS didn't map the
|
||||
/// buffer (producers don't always flag dmabufs mappable, e.g. gamescope's Vulkan exports).
|
||||
struct DmabufMap {
|
||||
ptr: *mut std::ffi::c_void,
|
||||
len: usize,
|
||||
}
|
||||
|
||||
impl DmabufMap {
|
||||
fn new(fd: i32, len: usize) -> Option<DmabufMap> {
|
||||
let ptr = unsafe {
|
||||
libc::mmap(
|
||||
std::ptr::null_mut(),
|
||||
len,
|
||||
libc::PROT_READ,
|
||||
libc::MAP_SHARED,
|
||||
fd,
|
||||
0,
|
||||
)
|
||||
};
|
||||
(ptr != libc::MAP_FAILED).then_some(DmabufMap { ptr, len })
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for DmabufMap {
|
||||
fn drop(&mut self) {
|
||||
unsafe {
|
||||
libc::munmap(self.ptr, self.len);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn serialize_pod(obj: pw::spa::pod::Object) -> Result<Vec<u8>> {
|
||||
Ok(pw::spa::pod::serialize::PodSerializer::serialize(
|
||||
std::io::Cursor::new(Vec::new()),
|
||||
&pw::spa::pod::Value::Object(obj),
|
||||
)
|
||||
.context("serialize pod")?
|
||||
.0
|
||||
.into_inner())
|
||||
}
|
||||
|
||||
/// Build a BGRx dmabuf `EnumFormat` pod advertising the EGL-importable `modifiers` as a
|
||||
/// mandatory enum Choice; the compositor fixates to one of them that it can allocate, which
|
||||
/// we read back in `param_changed`.
|
||||
fn build_dmabuf_format(
|
||||
modifiers: &[u64],
|
||||
preferred: Option<(u32, u32, u32)>,
|
||||
) -> Result<Vec<u8>> {
|
||||
let (dw, dh, dhz) = preferred.unwrap_or((1920, 1080, 60));
|
||||
use pw::spa::param::format::{FormatProperties, MediaSubtype, MediaType};
|
||||
let mut obj = pw::spa::pod::object!(
|
||||
pw::spa::utils::SpaTypes::ObjectParamFormat,
|
||||
pw::spa::param::ParamType::EnumFormat,
|
||||
pw::spa::pod::property!(FormatProperties::MediaType, Id, MediaType::Video),
|
||||
pw::spa::pod::property!(FormatProperties::MediaSubtype, Id, MediaSubtype::Raw),
|
||||
pw::spa::pod::property!(FormatProperties::VideoFormat, Id, VideoFormat::BGRx),
|
||||
pw::spa::pod::property!(
|
||||
FormatProperties::VideoSize,
|
||||
Choice,
|
||||
Range,
|
||||
Rectangle,
|
||||
pw::spa::utils::Rectangle {
|
||||
width: dw,
|
||||
height: dh
|
||||
},
|
||||
pw::spa::utils::Rectangle {
|
||||
width: 1,
|
||||
height: 1
|
||||
},
|
||||
pw::spa::utils::Rectangle {
|
||||
width: 8192,
|
||||
height: 8192
|
||||
}
|
||||
),
|
||||
pw::spa::pod::property!(
|
||||
FormatProperties::VideoFramerate,
|
||||
Choice,
|
||||
Range,
|
||||
Fraction,
|
||||
pw::spa::utils::Fraction { num: dhz, denom: 1 },
|
||||
pw::spa::utils::Fraction { num: 0, denom: 1 },
|
||||
pw::spa::utils::Fraction { num: 240, denom: 1 }
|
||||
),
|
||||
);
|
||||
obj.properties.push(pw::spa::pod::Property {
|
||||
key: pw::spa::sys::SPA_FORMAT_VIDEO_modifier,
|
||||
flags: pw::spa::pod::PropertyFlags::MANDATORY,
|
||||
value: pw::spa::pod::Value::Choice(pw::spa::pod::ChoiceValue::Long(
|
||||
pw::spa::utils::Choice(
|
||||
pw::spa::utils::ChoiceFlags::empty(),
|
||||
pw::spa::utils::ChoiceEnum::Enum {
|
||||
default: modifiers[0] as i64,
|
||||
alternatives: modifiers.iter().map(|&m| m as i64).collect(),
|
||||
},
|
||||
),
|
||||
)),
|
||||
});
|
||||
serialize_pod(obj)
|
||||
}
|
||||
|
||||
/// The default (shm/CPU-path) format offer: raw video in any encoder-mappable layout, any
|
||||
/// size, any framerate (0/1 = variable allowed — gamescope fixates exactly that).
|
||||
fn build_default_format_obj(preferred: Option<(u32, u32, u32)>) -> pw::spa::pod::Object {
|
||||
let (dw, dh, dhz) = preferred.unwrap_or((1920, 1080, 60));
|
||||
pw::spa::pod::object!(
|
||||
pw::spa::utils::SpaTypes::ObjectParamFormat,
|
||||
pw::spa::param::ParamType::EnumFormat,
|
||||
pw::spa::pod::property!(
|
||||
pw::spa::param::format::FormatProperties::MediaType,
|
||||
Id,
|
||||
pw::spa::param::format::MediaType::Video
|
||||
),
|
||||
pw::spa::pod::property!(
|
||||
pw::spa::param::format::FormatProperties::MediaSubtype,
|
||||
Id,
|
||||
pw::spa::param::format::MediaSubtype::Raw
|
||||
),
|
||||
// Offer the layouts the encoder can map to an NVENC input format. wlroots
|
||||
// commonly fixates packed RGB (3 bpp); other compositors offer 4 bpp. Only
|
||||
// these are requested, so negotiation fails loudly rather than handing us a
|
||||
// format we'd misinterpret.
|
||||
pw::spa::pod::property!(
|
||||
pw::spa::param::format::FormatProperties::VideoFormat,
|
||||
Choice,
|
||||
Enum,
|
||||
Id,
|
||||
VideoFormat::RGB,
|
||||
VideoFormat::RGB,
|
||||
VideoFormat::BGR,
|
||||
VideoFormat::RGBx,
|
||||
VideoFormat::BGRx,
|
||||
VideoFormat::RGBA,
|
||||
VideoFormat::BGRA,
|
||||
),
|
||||
pw::spa::pod::property!(
|
||||
pw::spa::param::format::FormatProperties::VideoSize,
|
||||
Choice,
|
||||
Range,
|
||||
Rectangle,
|
||||
pw::spa::utils::Rectangle {
|
||||
width: dw,
|
||||
height: dh
|
||||
},
|
||||
pw::spa::utils::Rectangle {
|
||||
width: 1,
|
||||
height: 1
|
||||
},
|
||||
pw::spa::utils::Rectangle {
|
||||
width: 8192,
|
||||
height: 8192
|
||||
}
|
||||
),
|
||||
pw::spa::pod::property!(
|
||||
pw::spa::param::format::FormatProperties::VideoFramerate,
|
||||
Choice,
|
||||
Range,
|
||||
Fraction,
|
||||
pw::spa::utils::Fraction { num: dhz, denom: 1 },
|
||||
pw::spa::utils::Fraction { num: 0, denom: 1 },
|
||||
pw::spa::utils::Fraction { num: 240, denom: 1 }
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
/// Build a Buffers param for the CPU path accepting anything mappable: MemPtr, MemFd, and
|
||||
/// DmaBuf. The DmaBuf bit matters for producers like gamescope whose format intersection
|
||||
/// lands on their modifier-bearing (LINEAR) pod: they then offer *only* DmaBuf buffers, and
|
||||
/// without this bit the buffer-type intersection is empty and the link silently stalls in
|
||||
/// "negotiating". A LINEAR dmabuf is mmap-able by MAP_BUFFERS, so the CPU de-pad copy works.
|
||||
fn build_mappable_buffers() -> Result<Vec<u8>> {
|
||||
serialize_pod(pw::spa::pod::Object {
|
||||
type_: pw::spa::utils::SpaTypes::ObjectParamBuffers.as_raw(),
|
||||
id: pw::spa::param::ParamType::Buffers.as_raw(),
|
||||
properties: vec![pw::spa::pod::Property {
|
||||
key: pw::spa::sys::SPA_PARAM_BUFFERS_dataType,
|
||||
flags: pw::spa::pod::PropertyFlags::empty(),
|
||||
value: pw::spa::pod::Value::Int(
|
||||
(1i32 << pw::spa::sys::SPA_DATA_MemPtr)
|
||||
| (1i32 << pw::spa::sys::SPA_DATA_MemFd)
|
||||
| (1i32 << pw::spa::sys::SPA_DATA_DmaBuf),
|
||||
),
|
||||
}],
|
||||
})
|
||||
}
|
||||
|
||||
/// Build a Buffers param requesting dmabuf-only buffers.
|
||||
fn build_dmabuf_buffers() -> Result<Vec<u8>> {
|
||||
serialize_pod(pw::spa::pod::Object {
|
||||
type_: pw::spa::utils::SpaTypes::ObjectParamBuffers.as_raw(),
|
||||
id: pw::spa::param::ParamType::Buffers.as_raw(),
|
||||
properties: vec![pw::spa::pod::Property {
|
||||
key: pw::spa::sys::SPA_PARAM_BUFFERS_dataType,
|
||||
flags: pw::spa::pod::PropertyFlags::empty(),
|
||||
value: pw::spa::pod::Value::Int(1i32 << pw::spa::sys::SPA_DATA_DmaBuf),
|
||||
}],
|
||||
})
|
||||
}
|
||||
|
||||
pub fn pipewire_thread(
|
||||
fd: Option<OwnedFd>,
|
||||
node_id: u32,
|
||||
tx: SyncSender<CapturedFrame>,
|
||||
active: Arc<AtomicBool>,
|
||||
zerocopy: bool,
|
||||
preferred: Option<(u32, u32, u32)>,
|
||||
) -> Result<()> {
|
||||
crate::pwinit::ensure_init();
|
||||
|
||||
let mainloop = pw::main_loop::MainLoopRc::new(None).context("pw MainLoop")?;
|
||||
let context = pw::context::ContextRc::new(&mainloop, None).context("pw Context")?;
|
||||
// A portal source hands us an fd to a (sandboxed) PipeWire remote; the KWin
|
||||
// virtual-output source has no fd — its node lives on the user's default daemon.
|
||||
let core = match fd {
|
||||
Some(fd) => context
|
||||
.connect_fd_rc(fd, None)
|
||||
.context("pw connect_fd (portal remote)")?,
|
||||
None => context
|
||||
.connect_rc(None)
|
||||
.context("pw connect (default daemon)")?,
|
||||
};
|
||||
|
||||
// Build the EGL→CUDA importer up front; if it fails, log and fall back to the CPU path
|
||||
// (we simply won't request dmabuf below).
|
||||
let importer = if zerocopy {
|
||||
match crate::zerocopy::EglImporter::new() {
|
||||
Ok(i) => Some(i),
|
||||
Err(e) => {
|
||||
tracing::warn!(error = %format!("{e:#}"), "zero-copy import unavailable — using CPU path");
|
||||
None
|
||||
}
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
// Modifiers our import stack handles for BGRx: the EGL-importable (tiled) set, plus
|
||||
// LINEAR (0) — NVIDIA's EGL won't list it, but LINEAR dmabufs (gamescope's only offer)
|
||||
// import via CUDA external memory instead. Tiled stays first so allocators that can do
|
||||
// both (KWin) prefer it. If none, we can't negotiate dmabuf → shm path.
|
||||
let mut modifiers = importer
|
||||
.as_ref()
|
||||
.map(|i| i.supported_modifiers(crate::zerocopy::drm_fourcc(PixelFormat::Bgrx).unwrap()))
|
||||
.unwrap_or_default();
|
||||
if importer.is_some() && !modifiers.contains(&0) {
|
||||
modifiers.push(0); // DRM_FORMAT_MOD_LINEAR
|
||||
}
|
||||
let want_dmabuf = importer.is_some() && !modifiers.is_empty();
|
||||
if zerocopy && !want_dmabuf {
|
||||
tracing::warn!("zero-copy: no EGL-importable dmabuf modifiers — using CPU path");
|
||||
} else if want_dmabuf {
|
||||
tracing::info!(
|
||||
count = modifiers.len(),
|
||||
sample = ?&modifiers[..modifiers.len().min(6)],
|
||||
"zero-copy: advertising EGL-importable dmabuf modifiers"
|
||||
);
|
||||
}
|
||||
|
||||
let data = UserData {
|
||||
info: VideoInfoRaw::default(),
|
||||
format: None,
|
||||
modifier: 0,
|
||||
tx,
|
||||
active,
|
||||
importer,
|
||||
};
|
||||
|
||||
let stream = pw::stream::StreamBox::new(
|
||||
&core,
|
||||
"punktfunk-screencast",
|
||||
properties! {
|
||||
*pw::keys::MEDIA_TYPE => "Video",
|
||||
*pw::keys::MEDIA_CATEGORY => "Capture",
|
||||
*pw::keys::MEDIA_ROLE => "Screen",
|
||||
// Never let the session manager re-target this stream to a different node when
|
||||
// its target goes away: an orphaned stream auto-linked to a fresh Video/Source
|
||||
// wedges that node — and a stuck link head-blocks the PipeWire daemon's shared
|
||||
// work queue, stalling ALL new link negotiation system-wide.
|
||||
"node.dont-reconnect" => "true",
|
||||
},
|
||||
)
|
||||
.context("pw Stream")?;
|
||||
|
||||
let _listener = stream
|
||||
.add_local_listener_with_user_data(data)
|
||||
.state_changed(|_stream, _ud, old, new| {
|
||||
tracing::info!(?old, ?new, "pipewire stream state");
|
||||
})
|
||||
.param_changed(|_stream, ud, id, param| {
|
||||
let Some(param) = param else { return };
|
||||
if id != pw::spa::param::ParamType::Format.as_raw() {
|
||||
return;
|
||||
}
|
||||
let Ok((media_type, media_subtype)) =
|
||||
pw::spa::param::format_utils::parse_format(param)
|
||||
else {
|
||||
return;
|
||||
};
|
||||
if media_type != pw::spa::param::format::MediaType::Video
|
||||
|| media_subtype != pw::spa::param::format::MediaSubtype::Raw
|
||||
{
|
||||
return;
|
||||
}
|
||||
if ud.info.parse(param).is_ok() {
|
||||
let sz = ud.info.size();
|
||||
ud.format = map_format(ud.info.format());
|
||||
ud.modifier = ud.info.modifier();
|
||||
tracing::info!(
|
||||
width = sz.width,
|
||||
height = sz.height,
|
||||
spa_format = ?ud.info.format(),
|
||||
mapped = ?ud.format,
|
||||
modifier = ud.modifier,
|
||||
"pipewire format negotiated"
|
||||
);
|
||||
if ud.format.is_none() {
|
||||
tracing::error!(
|
||||
spa_format = ?ud.info.format(),
|
||||
"negotiated a pixel format the encoder cannot consume — frames will be skipped"
|
||||
);
|
||||
}
|
||||
}
|
||||
})
|
||||
.process(|stream, ud| {
|
||||
// PipeWire dispatches this from a C trampoline with no catch_unwind; a
|
||||
// panic crossing that FFI boundary would abort the whole host. Contain it.
|
||||
let outcome = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
|
||||
let Some(mut buffer) = stream.dequeue_buffer() else {
|
||||
return;
|
||||
};
|
||||
// No active stream: release the buffer without the (expensive at 5K) de-pad.
|
||||
if !ud.active.load(Ordering::Relaxed) {
|
||||
return;
|
||||
}
|
||||
let datas = buffer.datas_mut();
|
||||
if datas.is_empty() {
|
||||
return;
|
||||
}
|
||||
let sz = ud.info.size();
|
||||
let (w, h) = (sz.width as usize, sz.height as usize);
|
||||
if w == 0 || h == 0 {
|
||||
return; // format not negotiated yet
|
||||
}
|
||||
|
||||
// Zero-copy path: if the buffer is a dmabuf and we have an importer, import it
|
||||
// into a CUDA device buffer (no CPU touch) and deliver that. Otherwise fall
|
||||
// through to the shm de-pad copy below.
|
||||
let mut gpu_import_broken = false;
|
||||
if let (Some(importer), Some(fmt)) = (ud.importer.as_mut(), ud.format) {
|
||||
if datas[0].type_() == pw::spa::buffer::DataType::DmaBuf {
|
||||
let plane = crate::zerocopy::DmabufPlane {
|
||||
fd: datas[0].fd(),
|
||||
offset: datas[0].chunk().offset(),
|
||||
stride: datas[0].chunk().stride().max(0) as u32,
|
||||
};
|
||||
// Tiled modifier → EGL/GL de-tile import; LINEAR (0/unset, e.g.
|
||||
// gamescope) → direct CUDA external-memory import (NVIDIA EGL can't
|
||||
// sample LINEAR).
|
||||
let modifier = (ud.modifier != 0).then_some(ud.modifier);
|
||||
if let Some(fourcc) = crate::zerocopy::drm_fourcc(fmt) {
|
||||
let imported = if modifier.is_some() {
|
||||
importer.import(&plane, w as u32, h as u32, fourcc, modifier)
|
||||
} else {
|
||||
importer.import_linear(&plane, w as u32, h as u32)
|
||||
};
|
||||
match imported {
|
||||
Ok(devbuf) => {
|
||||
static ONCE: std::sync::atomic::AtomicBool =
|
||||
std::sync::atomic::AtomicBool::new(true);
|
||||
if ONCE.swap(false, Ordering::Relaxed) {
|
||||
tracing::info!(w, h, modifier = ud.modifier,
|
||||
"zero-copy: dmabuf imported to CUDA (no CPU copy)");
|
||||
}
|
||||
let pts_ns = SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.map(|d| d.as_nanos() as u64)
|
||||
.unwrap_or(0);
|
||||
let _ = ud.tx.try_send(CapturedFrame {
|
||||
width: w as u32,
|
||||
height: h as u32,
|
||||
pts_ns,
|
||||
format: fmt,
|
||||
payload: FramePayload::Cuda(devbuf),
|
||||
});
|
||||
return;
|
||||
}
|
||||
Err(e) => {
|
||||
// GPU import unavailable for this buffer kind (e.g. the
|
||||
// driver rejects LINEAR external-memory import). Disable
|
||||
// the importer and fall through to the CPU mmap path —
|
||||
// degraded, not dead.
|
||||
tracing::warn!(error = %format!("{e:#}"),
|
||||
"dmabuf GPU import failed — falling back to the CPU copy path");
|
||||
gpu_import_broken = true;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return; // format has no DRM fourcc mapping — skip the frame
|
||||
}
|
||||
}
|
||||
}
|
||||
if gpu_import_broken {
|
||||
ud.importer = None;
|
||||
}
|
||||
|
||||
let d = &mut datas[0];
|
||||
// CPU path may also receive LINEAR dmabufs (gamescope offers only those once its
|
||||
// modifier-bearing format pod wins); capture the fd before `data()` borrows `d`.
|
||||
let dmabuf_fd =
|
||||
(d.type_() == pw::spa::buffer::DataType::DmaBuf).then(|| d.fd());
|
||||
let (size, offset, stride) = {
|
||||
let c = d.chunk();
|
||||
(
|
||||
c.size() as usize,
|
||||
c.offset() as usize,
|
||||
c.stride().max(0) as usize,
|
||||
)
|
||||
};
|
||||
let Some(fmt) = ud.format else { return }; // unsupported/not negotiated
|
||||
let bpp = fmt.bytes_per_pixel();
|
||||
let row = w * bpp;
|
||||
let stride = if stride == 0 { row } else { stride };
|
||||
if stride < row {
|
||||
warn_once("chunk stride < row — frames dropped");
|
||||
return;
|
||||
}
|
||||
let needed = stride * (h - 1) + row;
|
||||
// dmabuf chunks commonly report size 0; fall back to the computed span.
|
||||
let size = if size == 0 { needed } else { size };
|
||||
// MAP_BUFFERS only maps buffers flagged mappable; Vulkan-exported dmabufs
|
||||
// (gamescope) usually aren't, so mmap the fd ourselves for the de-pad read.
|
||||
let _mapping; // keeps a manual mmap alive for the copy below
|
||||
let buf: &[u8] = if let Some(data) = d.data() {
|
||||
data
|
||||
} else if let Some(fd) = dmabuf_fd.filter(|&fd| fd > 0) {
|
||||
match DmabufMap::new(fd, offset + needed) {
|
||||
Some(m) => {
|
||||
_mapping = m;
|
||||
unsafe {
|
||||
std::slice::from_raw_parts(_mapping.ptr as *const u8, _mapping.len)
|
||||
}
|
||||
}
|
||||
None => {
|
||||
warn_once("mmap(dmabuf) failed — frames dropped");
|
||||
return;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
warn_once("buffer has no mappable data — frames dropped");
|
||||
return;
|
||||
};
|
||||
// Need stride*(h-1)+row valid bytes within [offset, offset+size).
|
||||
if offset > buf.len() {
|
||||
return;
|
||||
}
|
||||
let avail = buf.len() - offset;
|
||||
if needed > avail || needed > size {
|
||||
warn_once("buffer smaller than frame span — frames dropped");
|
||||
return;
|
||||
}
|
||||
let region = &buf[offset..offset + size.min(avail)];
|
||||
// De-pad into a tightly-packed buffer (chunk stride may exceed w*bpp).
|
||||
let mut tight = vec![0u8; row * h];
|
||||
for y in 0..h {
|
||||
tight[y * row..y * row + row]
|
||||
.copy_from_slice(®ion[y * stride..y * stride + row]);
|
||||
}
|
||||
let pts_ns = SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.map(|d| d.as_nanos() as u64)
|
||||
.unwrap_or(0);
|
||||
let frame = CapturedFrame {
|
||||
width: w as u32,
|
||||
height: h as u32,
|
||||
pts_ns,
|
||||
format: fmt,
|
||||
payload: FramePayload::Cpu(tight),
|
||||
};
|
||||
// Drop if the encoder is behind — never block the pipewire loop.
|
||||
let _ = ud.tx.try_send(frame);
|
||||
}));
|
||||
if outcome.is_err() {
|
||||
tracing::error!("panic in pipewire process callback — frame dropped");
|
||||
}
|
||||
})
|
||||
.register()
|
||||
.context("register stream listener")?;
|
||||
|
||||
// Debug knob: offer a single fixed format (PUNKTFUNK_PW_FIXED_POD="WxH") to bisect
|
||||
// negotiation failures against a producer's exact EnumFormat (e.g. gamescope).
|
||||
let fixed_pod: Option<(u32, u32)> = std::env::var("PUNKTFUNK_PW_FIXED_POD")
|
||||
.ok()
|
||||
.and_then(|v| v.split_once('x').map(|(w, h)| (w.parse(), h.parse())))
|
||||
.and_then(|(w, h)| Some((w.ok()?, h.ok()?)));
|
||||
|
||||
// Request raw video in any encoder-mappable layout, any size/framerate.
|
||||
let obj = if let Some((fw, fh)) = fixed_pod {
|
||||
tracing::info!(fw, fh, "PW DEBUG: offering fixed BGRx pod");
|
||||
pw::spa::pod::object!(
|
||||
pw::spa::utils::SpaTypes::ObjectParamFormat,
|
||||
pw::spa::param::ParamType::EnumFormat,
|
||||
pw::spa::pod::property!(
|
||||
pw::spa::param::format::FormatProperties::MediaType,
|
||||
Id,
|
||||
pw::spa::param::format::MediaType::Video
|
||||
),
|
||||
pw::spa::pod::property!(
|
||||
pw::spa::param::format::FormatProperties::MediaSubtype,
|
||||
Id,
|
||||
pw::spa::param::format::MediaSubtype::Raw
|
||||
),
|
||||
pw::spa::pod::property!(
|
||||
pw::spa::param::format::FormatProperties::VideoFormat,
|
||||
Id,
|
||||
VideoFormat::BGRx
|
||||
),
|
||||
pw::spa::pod::property!(
|
||||
pw::spa::param::format::FormatProperties::VideoSize,
|
||||
Rectangle,
|
||||
pw::spa::utils::Rectangle {
|
||||
width: fw,
|
||||
height: fh
|
||||
}
|
||||
),
|
||||
pw::spa::pod::property!(
|
||||
pw::spa::param::format::FormatProperties::VideoFramerate,
|
||||
Fraction,
|
||||
pw::spa::utils::Fraction { num: 0, denom: 1 }
|
||||
),
|
||||
)
|
||||
} else {
|
||||
build_default_format_obj(preferred)
|
||||
};
|
||||
|
||||
// When zero-copy is on, offer ONLY a BGRx dmabuf format with our EGL-importable modifiers
|
||||
// (offering shm too makes the compositor pick shm). The modifier list is advertised with
|
||||
// DONT_FIXATE so the compositor's allocator chooses one; we re-emit the fixated format in
|
||||
// `param_changed` (the two-step DMA-BUF handshake). Otherwise offer the multi-format shm
|
||||
// pod and let MAP_BUFFERS map it.
|
||||
let shm_values = serialize_pod(obj)?;
|
||||
let (dmabuf_values, buffers_values) = if want_dmabuf {
|
||||
(
|
||||
Some(build_dmabuf_format(&modifiers, preferred)?),
|
||||
Some(build_dmabuf_buffers()?),
|
||||
)
|
||||
} else {
|
||||
// CPU path still accepts mappable dmabufs (gamescope offers only those once its
|
||||
// modifier-bearing format pod wins the intersection).
|
||||
(None, Some(build_mappable_buffers()?))
|
||||
};
|
||||
|
||||
let mut byte_slices: Vec<&[u8]> = Vec::new();
|
||||
match &dmabuf_values {
|
||||
Some(d) => byte_slices.push(d),
|
||||
None => byte_slices.push(&shm_values),
|
||||
}
|
||||
if let Some(b) = &buffers_values {
|
||||
byte_slices.push(b);
|
||||
}
|
||||
let mut params: Vec<&Pod> = byte_slices
|
||||
.iter()
|
||||
.map(|&b| Pod::from_bytes(b).context("pod from bytes"))
|
||||
.collect::<Result<_>>()?;
|
||||
|
||||
stream
|
||||
.connect(
|
||||
spa::utils::Direction::Input,
|
||||
Some(node_id),
|
||||
pw::stream::StreamFlags::AUTOCONNECT | pw::stream::StreamFlags::MAP_BUFFERS,
|
||||
&mut params,
|
||||
)
|
||||
.context("pw stream connect")?;
|
||||
|
||||
// Blocks this thread, pumping frame callbacks until process exit.
|
||||
mainloop.run();
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,154 @@
|
||||
//! Hardware video encode (plan §7). Binds FFmpeg (NVENC); never rewrites codecs.
|
||||
//! Low-latency preset, B-frames off. M0 feeds BGRx CPU frames directly — `*_nvenc`
|
||||
//! accepts `bgr0` input and converts to YUV on the GPU, so no host-side swscale is
|
||||
//! needed (dmabuf zero-copy import is deferred; plan §9).
|
||||
|
||||
use crate::capture::{CapturedFrame, PixelFormat};
|
||||
use anyhow::Result;
|
||||
|
||||
/// An encoded access unit (one NAL/AU) to hand to `punktfunk_core` for FEC + packetization.
|
||||
/// `data` is in-band Annex-B (the encoder is opened without a global header), so each
|
||||
/// keyframe carries its own VPS/SPS/PPS — the bytes are both a playable elementary
|
||||
/// stream and a self-contained AU for the wire.
|
||||
pub struct EncodedFrame {
|
||||
pub data: Vec<u8>,
|
||||
pub pts_ns: u64,
|
||||
/// True for IDR/keyframes (sets the SOF/keyframe wire flags).
|
||||
pub keyframe: bool,
|
||||
}
|
||||
|
||||
/// Codec selection negotiated with the client.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub enum Codec {
|
||||
H264,
|
||||
H265,
|
||||
Av1,
|
||||
}
|
||||
|
||||
impl Codec {
|
||||
/// The FFmpeg NVENC encoder name (selected by name, not codec id — the latter would
|
||||
/// pick the software encoder).
|
||||
pub fn nvenc_name(self) -> &'static str {
|
||||
match self {
|
||||
Codec::H264 => "h264_nvenc",
|
||||
Codec::H265 => "hevc_nvenc",
|
||||
Codec::Av1 => "av1_nvenc",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A hardware encoder. One per session; runs on the encode thread.
|
||||
pub trait Encoder: Send {
|
||||
fn submit(&mut self, frame: &CapturedFrame) -> Result<()>;
|
||||
/// Force the next submitted frame to be an IDR keyframe (e.g. after a client
|
||||
/// reference-frame-invalidation request). Default: no-op.
|
||||
fn request_keyframe(&mut self) {}
|
||||
/// Pull the next encoded AU if one is ready.
|
||||
fn poll(&mut self) -> Result<Option<EncodedFrame>>;
|
||||
/// Signal end-of-stream. After this, drain the remaining AUs with [`poll`](Self::poll)
|
||||
/// until it returns `None` — NVENC buffers frames internally even at `delay=0`.
|
||||
fn flush(&mut self) -> Result<()>;
|
||||
}
|
||||
|
||||
impl Codec {
|
||||
/// Maximum encodable dimension (px) per side for this codec on NVENC. H.264 tops out at
|
||||
/// 4096 (level constraint); HEVC and AV1 allow 8192. Used to reject out-of-range client
|
||||
/// modes up front (see [`validate_dimensions`]).
|
||||
pub fn max_dimension(self) -> u32 {
|
||||
match self {
|
||||
Codec::H264 => 4096,
|
||||
Codec::H265 | Codec::Av1 => 8192,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Validate a requested encode resolution before we allocate buffers or open NVENC. Rejects
|
||||
/// zero/odd-sized and out-of-range modes with a clear error instead of letting buffer math
|
||||
/// overflow or the encoder open fail with an opaque NVENC code. A client can request any
|
||||
/// `mode=WxHxFPS`, so this is the gate on attacker/typo-controlled dimensions.
|
||||
pub fn validate_dimensions(codec: Codec, width: u32, height: u32) -> Result<()> {
|
||||
if width == 0 || height == 0 {
|
||||
anyhow::bail!("invalid encode resolution {width}x{height}: dimensions must be non-zero");
|
||||
}
|
||||
// NVENC requires even dimensions for the chroma subsampling it does internally.
|
||||
if width % 2 != 0 || height % 2 != 0 {
|
||||
anyhow::bail!("invalid encode resolution {width}x{height}: dimensions must be even");
|
||||
}
|
||||
let max = codec.max_dimension();
|
||||
if width > max || height > max {
|
||||
anyhow::bail!(
|
||||
"{codec:?} max dimension is {max}px; requested {width}x{height} \
|
||||
(use HEVC/AV1 above 4096, or lower the client resolution)"
|
||||
);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Open an NVENC encoder for frames of the given `format` and mode. When `cuda` is true the
|
||||
/// encoder takes GPU frames (`AV_PIX_FMT_CUDA`) from the zero-copy path; otherwise it takes
|
||||
/// packed RGB/BGR CPU frames. `format`/`bitrate_bps`/`codec`/mode come from session
|
||||
/// negotiation; the caller derives `cuda` from the first captured frame's payload.
|
||||
pub fn open_video(
|
||||
codec: Codec,
|
||||
format: PixelFormat,
|
||||
width: u32,
|
||||
height: u32,
|
||||
fps: u32,
|
||||
bitrate_bps: u64,
|
||||
cuda: bool,
|
||||
) -> Result<Box<dyn Encoder>> {
|
||||
validate_dimensions(codec, width, height)?;
|
||||
#[cfg(target_os = "linux")]
|
||||
{
|
||||
let enc = linux::NvencEncoder::open(codec, format, width, height, fps, bitrate_bps, cuda)?;
|
||||
Ok(Box::new(enc) as Box<dyn Encoder>)
|
||||
}
|
||||
#[cfg(not(target_os = "linux"))]
|
||||
{
|
||||
let _ = (codec, format, width, height, fps, bitrate_bps, cuda);
|
||||
anyhow::bail!("NVENC encode requires Linux (FFmpeg + NVIDIA driver)")
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
mod linux;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn rejects_zero_and_odd_dimensions() {
|
||||
assert!(validate_dimensions(Codec::H265, 0, 1080).is_err());
|
||||
assert!(validate_dimensions(Codec::H265, 1920, 0).is_err());
|
||||
assert!(validate_dimensions(Codec::H265, 1921, 1080).is_err()); // odd width
|
||||
assert!(validate_dimensions(Codec::H265, 1920, 1081).is_err()); // odd height
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn h264_capped_at_4096() {
|
||||
assert!(validate_dimensions(Codec::H264, 3840, 2160).is_ok()); // 4K fits (width < 4096)
|
||||
assert!(validate_dimensions(Codec::H264, 4096, 4096).is_ok()); // exactly at the limit
|
||||
assert!(validate_dimensions(Codec::H264, 4098, 2160).is_err());
|
||||
assert!(validate_dimensions(Codec::H264, 3840, 4098).is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn hevc_and_av1_allow_up_to_8192() {
|
||||
for c in [Codec::H265, Codec::Av1] {
|
||||
assert!(validate_dimensions(c, 3840, 2160).is_ok());
|
||||
assert!(validate_dimensions(c, 7680, 4320).is_ok()); // 8K fits
|
||||
assert!(validate_dimensions(c, 8192, 8192).is_ok());
|
||||
assert!(validate_dimensions(c, 8194, 4320).is_err());
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn common_modes_accepted() {
|
||||
for c in [Codec::H264, Codec::H265, Codec::Av1] {
|
||||
for (w, h) in [(1280, 720), (1920, 1080), (2560, 1440)] {
|
||||
assert!(validate_dimensions(c, w, h).is_ok(), "{c:?} {w}x{h}");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,406 @@
|
||||
//! NVENC encoder via `ffmpeg-next` (binds the system FFmpeg 8.x / libavcodec 62).
|
||||
//!
|
||||
//! Input is a packed RGB/BGR CPU frame; `*_nvenc` accepts `rgb0`/`bgr0`/`rgba`/`bgra`
|
||||
//! directly and does the RGB→YUV conversion on the GPU, so the host stays off the
|
||||
//! colour-conversion path. The portal commonly negotiates packed 24-bit `RGB`, which NVENC
|
||||
//! does *not* accept — we expand it to `rgb0` (one padding byte/pixel, no colour math).
|
||||
//! The encoder is opened *without* a global header so VPS/SPS/PPS are emitted in-band on
|
||||
//! every IDR — the output is both a playable raw Annex-B stream and self-contained AUs.
|
||||
|
||||
use super::{Codec, EncodedFrame, Encoder};
|
||||
use crate::capture::{CapturedFrame, FramePayload, PixelFormat};
|
||||
use anyhow::{anyhow, bail, Context, Result};
|
||||
use ffmpeg::format::Pixel;
|
||||
use ffmpeg::util::frame::Video as VideoFrame;
|
||||
use ffmpeg::{codec, encoder, Dictionary, Packet, Rational};
|
||||
use ffmpeg_next as ffmpeg;
|
||||
use std::os::raw::c_int;
|
||||
|
||||
use ffmpeg::ffi; // = ffmpeg_sys_next
|
||||
|
||||
/// `AVCUDADeviceContext` (libavutil/hwcontext_cuda.h) — not in the ffmpeg-sys bindings (the
|
||||
/// crate doesn't allowlist that header), so mirror its stable 3-pointer layout. We set the
|
||||
/// first field to *our* `CUcontext` so NVENC shares the context the EGL importer maps into.
|
||||
#[repr(C)]
|
||||
struct AVCUDADeviceContext {
|
||||
cuda_ctx: *mut std::ffi::c_void, // CUcontext
|
||||
stream: *mut std::ffi::c_void, // CUstream (null = default)
|
||||
internal: *mut std::ffi::c_void, // filled by ctx_init
|
||||
}
|
||||
|
||||
/// CUDA hardware-frame contexts that wrap our shared `CUcontext`, so `hevc_nvenc` reads the
|
||||
/// imported device buffer directly. Owns two `AVBufferRef`s, unref'd on drop.
|
||||
struct CudaHw {
|
||||
device_ref: *mut ffi::AVBufferRef,
|
||||
frames_ref: *mut ffi::AVBufferRef,
|
||||
}
|
||||
|
||||
impl CudaHw {
|
||||
/// Build a CUDA hwdevice wrapping `cu_ctx` and a frames pool (`sw_format` = `pixel`).
|
||||
unsafe fn new(cu_ctx: *mut std::ffi::c_void, sw_format: Pixel, w: u32, h: u32) -> Result<Self> {
|
||||
let mut device_ref = ffi::av_hwdevice_ctx_alloc(ffi::AVHWDeviceType::AV_HWDEVICE_TYPE_CUDA);
|
||||
if device_ref.is_null() {
|
||||
bail!("av_hwdevice_ctx_alloc(CUDA) failed");
|
||||
}
|
||||
let dev_ctx = (*device_ref).data as *mut ffi::AVHWDeviceContext;
|
||||
let cu = (*dev_ctx).hwctx as *mut AVCUDADeviceContext;
|
||||
(*cu).cuda_ctx = cu_ctx; // share the importer's context
|
||||
let r = ffi::av_hwdevice_ctx_init(device_ref);
|
||||
if r < 0 {
|
||||
ffi::av_buffer_unref(&mut device_ref);
|
||||
bail!("av_hwdevice_ctx_init failed ({r})");
|
||||
}
|
||||
|
||||
let mut frames_ref = ffi::av_hwframe_ctx_alloc(device_ref);
|
||||
if frames_ref.is_null() {
|
||||
ffi::av_buffer_unref(&mut device_ref);
|
||||
bail!("av_hwframe_ctx_alloc failed");
|
||||
}
|
||||
let fc = (*frames_ref).data as *mut ffi::AVHWFramesContext;
|
||||
(*fc).format = ffi::AVPixelFormat::AV_PIX_FMT_CUDA;
|
||||
(*fc).sw_format = pixel_to_av(sw_format);
|
||||
(*fc).width = w as c_int;
|
||||
(*fc).height = h as c_int;
|
||||
(*fc).initial_pool_size = 0; // we supply the device pointers
|
||||
let r = ffi::av_hwframe_ctx_init(frames_ref);
|
||||
if r < 0 {
|
||||
ffi::av_buffer_unref(&mut frames_ref);
|
||||
ffi::av_buffer_unref(&mut device_ref);
|
||||
bail!("av_hwframe_ctx_init failed ({r})");
|
||||
}
|
||||
Ok(CudaHw {
|
||||
device_ref,
|
||||
frames_ref,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for CudaHw {
|
||||
fn drop(&mut self) {
|
||||
unsafe {
|
||||
ffi::av_buffer_unref(&mut self.frames_ref);
|
||||
ffi::av_buffer_unref(&mut self.device_ref);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// `ffmpeg::format::Pixel` → raw `AVPixelFormat`.
|
||||
fn pixel_to_av(p: Pixel) -> ffi::AVPixelFormat {
|
||||
// `Pixel` is `#[repr(i32)]`-compatible with `AVPixelFormat` (the bindgen enum) via this
|
||||
// documented conversion in ffmpeg-next.
|
||||
ffi::AVPixelFormat::from(p)
|
||||
}
|
||||
|
||||
/// Map a captured layout to the NVENC input pixel format, and whether a 3→4 byte expand is
|
||||
/// needed (packed RGB/BGR have no padding byte; the NVENC `*0` formats do).
|
||||
fn nvenc_input(format: PixelFormat) -> (Pixel, bool) {
|
||||
match format {
|
||||
PixelFormat::Bgrx => (Pixel::BGRZ, false), // bgr0
|
||||
PixelFormat::Rgbx => (Pixel::RGBZ, false), // rgb0
|
||||
PixelFormat::Bgra => (Pixel::BGRA, false),
|
||||
PixelFormat::Rgba => (Pixel::RGBA, false),
|
||||
PixelFormat::Rgb => (Pixel::RGBZ, true), // RGB -> rgb0
|
||||
PixelFormat::Bgr => (Pixel::BGRZ, true), // BGR -> bgr0
|
||||
}
|
||||
}
|
||||
|
||||
pub struct NvencEncoder {
|
||||
enc: encoder::video::Encoder,
|
||||
/// Reusable 4-bpp CPU input frame (CPU path only; `None` for the zero-copy/CUDA path).
|
||||
/// Mutating it in place across frames is sound only because the encoder is opened with
|
||||
/// `delay=0`/`bf=0`/`max_b_frames=0` and the caller drains `poll()` after each `submit`,
|
||||
/// so libavcodec holds no reference to the previous frame's buffer when we overwrite it.
|
||||
frame: Option<VideoFrame>,
|
||||
/// Zero-copy path: CUDA hwdevice/hwframes contexts (the encoder takes `AV_PIX_FMT_CUDA`).
|
||||
cuda: Option<CudaHw>,
|
||||
src_format: PixelFormat,
|
||||
expand: bool,
|
||||
width: u32,
|
||||
height: u32,
|
||||
fps: u32,
|
||||
/// Monotonic presentation index, in `1/fps` time-base units.
|
||||
frame_idx: i64,
|
||||
/// Force the next submitted frame to be an IDR (set by [`request_keyframe`]).
|
||||
force_kf: bool,
|
||||
}
|
||||
|
||||
// `CudaHw` holds raw `AVBufferRef`s; the encoder lives on a single thread. The CPU encoder is
|
||||
// already `Send` via ffmpeg-next; assert it for the CUDA fields too.
|
||||
unsafe impl Send for NvencEncoder {}
|
||||
|
||||
impl NvencEncoder {
|
||||
pub fn open(
|
||||
codec: Codec,
|
||||
format: PixelFormat,
|
||||
width: u32,
|
||||
height: u32,
|
||||
fps: u32,
|
||||
bitrate_bps: u64,
|
||||
cuda: bool,
|
||||
) -> Result<Self> {
|
||||
ffmpeg::init().context("ffmpeg init")?;
|
||||
if std::env::var_os("PUNKTFUNK_FFMPEG_DEBUG").is_some() {
|
||||
unsafe { ffi::av_log_set_level(48) }; // AV_LOG_DEBUG — surface NVENC hw-frame rejects
|
||||
}
|
||||
let name = codec.nvenc_name();
|
||||
let av_codec = encoder::find_by_name(name)
|
||||
.ok_or_else(|| anyhow!("{name} not built into libavcodec"))?;
|
||||
let (nvenc_pixel, expand) = nvenc_input(format);
|
||||
|
||||
let mut video = codec::context::Context::new_with_codec(av_codec)
|
||||
.encoder()
|
||||
.video()
|
||||
.context("alloc video encoder")?;
|
||||
video.set_width(width);
|
||||
video.set_height(height);
|
||||
video.set_format(nvenc_pixel); // NVENC converts RGB→YUV internally
|
||||
video.set_time_base(Rational(1, fps as i32));
|
||||
video.set_frame_rate(Some(Rational(fps as i32, 1)));
|
||||
video.set_bit_rate(bitrate_bps as usize);
|
||||
video.set_max_bit_rate(bitrate_bps as usize);
|
||||
video.set_max_b_frames(0);
|
||||
// Infinite GOP — NO periodic IDR. A keyframe at 5120x1440 is ~20-40x a P-frame, so a
|
||||
// periodic IDR is a recurring multi-millisecond encode+packetize+send spike — the ~2s
|
||||
// "freeze". NVENC emits one IDR at stream start, then P-frames only; `forced-idr` (below)
|
||||
// turns a client recovery request (RFI, via `request_keyframe`) into an IDR on demand.
|
||||
// This is the Moonlight/Sunshine low-latency model.
|
||||
unsafe {
|
||||
(*video.as_mut_ptr()).gop_size = -1;
|
||||
}
|
||||
|
||||
// For the zero-copy path, take CUDA surfaces: wrap the shared CUcontext in CUDA
|
||||
// hwdevice/hwframes contexts and set `pix_fmt = CUDA` on the raw encoder context
|
||||
// *before* open (NVENC derives the device from `hw_frames_ctx`).
|
||||
let cuda_hw = if cuda {
|
||||
let cu_ctx = crate::zerocopy::cuda::context().context("shared CUDA context")?;
|
||||
let hw = unsafe { CudaHw::new(cu_ctx, nvenc_pixel, width, height)? };
|
||||
unsafe {
|
||||
let raw = video.as_mut_ptr();
|
||||
(*raw).pix_fmt = ffi::AVPixelFormat::AV_PIX_FMT_CUDA;
|
||||
(*raw).hw_device_ctx = ffi::av_buffer_ref(hw.device_ref);
|
||||
(*raw).hw_frames_ctx = ffi::av_buffer_ref(hw.frames_ref);
|
||||
}
|
||||
Some(hw)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// Low-latency NVENC tuning (plan §7 / linux-setup doc).
|
||||
let mut opts = Dictionary::new();
|
||||
opts.set("preset", "p1"); // fastest
|
||||
opts.set("tune", "ull"); // ultra-low-latency
|
||||
opts.set("rc", "cbr");
|
||||
opts.set("bf", "0");
|
||||
opts.set("delay", "0");
|
||||
opts.set("forced-idr", "1"); // RFI/request_keyframe → real IDR under the infinite GOP
|
||||
|
||||
// Split-frame encode across both NVENC engines (GB203 has 2) when the pixel rate exceeds
|
||||
// a single engine's HEVC capacity (~1 Gpix/s); e.g. 5120x1440@240 = 1.77 Gpix/s needs it,
|
||||
// @120 = 0.88 Gpix/s does not. HEVC/AV1 only (not H.264). AUTO won't engage below ~2112px
|
||||
// height, so we force `2`; below the threshold we leave it AUTO (split costs ~2% BD-rate).
|
||||
// Output is standard HEVC — transparent to the client. Override with PUNKTFUNK_SPLIT_ENCODE.
|
||||
let pix_rate = width as u64 * height as u64 * fps as u64;
|
||||
let split = std::env::var("PUNKTFUNK_SPLIT_ENCODE").ok();
|
||||
match split.as_deref() {
|
||||
Some(mode) => opts.set("split_encode_mode", mode),
|
||||
None if matches!(codec, Codec::H265 | Codec::Av1) && pix_rate > 1_000_000_000 => {
|
||||
opts.set("split_encode_mode", "2");
|
||||
tracing::info!(
|
||||
pix_rate,
|
||||
"NVENC: forcing 2-way split encode (high pixel rate)"
|
||||
);
|
||||
}
|
||||
None => {}
|
||||
}
|
||||
|
||||
let enc = video
|
||||
.open_with(opts)
|
||||
.with_context(|| format!("open {name} ({width}x{height}@{fps}, {bitrate_bps} bps)"))?;
|
||||
|
||||
let frame = if cuda {
|
||||
None
|
||||
} else {
|
||||
Some(VideoFrame::new(nvenc_pixel, width, height))
|
||||
};
|
||||
Ok(NvencEncoder {
|
||||
enc,
|
||||
frame,
|
||||
cuda: cuda_hw,
|
||||
src_format: format,
|
||||
expand,
|
||||
width,
|
||||
height,
|
||||
fps,
|
||||
frame_idx: 0,
|
||||
force_kf: false,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl Encoder for NvencEncoder {
|
||||
fn submit(&mut self, captured: &CapturedFrame) -> Result<()> {
|
||||
anyhow::ensure!(
|
||||
captured.width == self.width && captured.height == self.height,
|
||||
"captured frame {}x{} != encoder {}x{}",
|
||||
captured.width,
|
||||
captured.height,
|
||||
self.width,
|
||||
self.height
|
||||
);
|
||||
let pts = self.frame_idx;
|
||||
self.frame_idx += 1;
|
||||
// Force an IDR when requested (client RFI); otherwise let NVENC pick (GOP/P-frame).
|
||||
let idr = self.force_kf;
|
||||
self.force_kf = false;
|
||||
match &captured.payload {
|
||||
FramePayload::Cuda(buf) => self.submit_cuda(buf, pts, idr),
|
||||
FramePayload::Cpu(bytes) => self.submit_cpu(bytes, captured.format, pts, idr),
|
||||
}
|
||||
}
|
||||
|
||||
fn request_keyframe(&mut self) {
|
||||
self.force_kf = true;
|
||||
}
|
||||
|
||||
fn poll(&mut self) -> Result<Option<EncodedFrame>> {
|
||||
let mut pkt = Packet::empty();
|
||||
match self.enc.receive_packet(&mut pkt) {
|
||||
Ok(()) => {
|
||||
let data = pkt.data().map(|d| d.to_vec()).unwrap_or_default();
|
||||
let pts = pkt.pts().unwrap_or(0).max(0) as u64;
|
||||
let pts_ns = pts * 1_000_000_000 / self.fps as u64;
|
||||
Ok(Some(EncodedFrame {
|
||||
data,
|
||||
pts_ns,
|
||||
keyframe: pkt.is_key(),
|
||||
}))
|
||||
}
|
||||
// No packet ready yet (need another input frame).
|
||||
Err(ffmpeg::Error::Other { errno })
|
||||
if errno == ffmpeg::util::error::EAGAIN
|
||||
|| errno == ffmpeg::util::error::EWOULDBLOCK =>
|
||||
{
|
||||
Ok(None)
|
||||
}
|
||||
// Fully drained after flush().
|
||||
Err(ffmpeg::Error::Eof) => Ok(None),
|
||||
Err(e) => Err(e).context("receive_packet"),
|
||||
}
|
||||
}
|
||||
|
||||
fn flush(&mut self) -> Result<()> {
|
||||
self.enc.send_eof().context("send_eof")?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl NvencEncoder {
|
||||
/// CPU path: expand/copy the packed RGB/BGR bytes into the reusable 4-bpp frame, then send.
|
||||
fn submit_cpu(&mut self, bytes: &[u8], format: PixelFormat, pts: i64, idr: bool) -> Result<()> {
|
||||
anyhow::ensure!(
|
||||
format == self.src_format,
|
||||
"captured format {:?} != encoder source {:?}",
|
||||
format,
|
||||
self.src_format
|
||||
);
|
||||
let w = self.width as usize;
|
||||
let h = self.height as usize;
|
||||
let src_bpp = self.src_format.bytes_per_pixel();
|
||||
let src_row = w * src_bpp;
|
||||
anyhow::ensure!(
|
||||
bytes.len() >= src_row * h,
|
||||
"captured buffer {} bytes < required {}",
|
||||
bytes.len(),
|
||||
src_row * h
|
||||
);
|
||||
let frame = self
|
||||
.frame
|
||||
.as_mut()
|
||||
.context("CPU frame missing (encoder opened in CUDA mode)")?;
|
||||
let stride = frame.stride(0); // dst is 4-bpp, aligned
|
||||
let dst = frame.data_mut(0);
|
||||
if self.expand {
|
||||
// packed 3-bpp RGB/BGR → 4-bpp *0 (copy 3 bytes, zero the pad byte)
|
||||
for y in 0..h {
|
||||
let s = &bytes[y * src_row..y * src_row + src_row];
|
||||
let drow = &mut dst[y * stride..y * stride + w * 4];
|
||||
for x in 0..w {
|
||||
drow[x * 4..x * 4 + 3].copy_from_slice(&s[x * 3..x * 3 + 3]);
|
||||
drow[x * 4 + 3] = 0;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// 4-bpp → 4-bpp, honoring the (possibly larger) dst stride
|
||||
for y in 0..h {
|
||||
dst[y * stride..y * stride + src_row]
|
||||
.copy_from_slice(&bytes[y * src_row..y * src_row + src_row]);
|
||||
}
|
||||
}
|
||||
frame.set_pts(Some(pts));
|
||||
frame.set_kind(if idr {
|
||||
ffmpeg::picture::Type::I
|
||||
} else {
|
||||
ffmpeg::picture::Type::None
|
||||
});
|
||||
self.enc.send_frame(frame).context("send_frame")?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Zero-copy path: hand the imported CUDA device buffer to NVENC with no CPU touch.
|
||||
///
|
||||
/// We take a *pooled* surface from the CUDA hwframes context (`av_hwframe_get_buffer`) and
|
||||
/// device→device-copy our imported buffer into it, rather than wrapping our own pointer in a
|
||||
/// bare frame. Two reasons: (1) NVENC's `nvenc_send_frame` ignores frames whose `buf[0]` is
|
||||
/// null and the generic encode path's `av_frame_ref` needs a refcounted buffer — a bare
|
||||
/// frame is rejected with `EINVAL`; (2) NVENC caches CUDA-resource *registrations* keyed by
|
||||
/// device pointer with a bounded table, so a fresh pointer every frame would thrash/overflow
|
||||
/// it — the pool recycles a small set of pointers. The extra copy is device-local (~8 MB at
|
||||
/// 1080p, sub-millisecond on the GPU) and keeps the host fully off the pixel path.
|
||||
fn submit_cuda(
|
||||
&mut self,
|
||||
buf: &crate::zerocopy::DeviceBuffer,
|
||||
pts: i64,
|
||||
idr: bool,
|
||||
) -> Result<()> {
|
||||
let frames_ref = self
|
||||
.cuda
|
||||
.as_ref()
|
||||
.context("CUDA hw context missing (encoder opened in CPU mode)")?
|
||||
.frames_ref;
|
||||
// The device→device copy below uses our shared context directly; make it current on the
|
||||
// encode thread (ffmpeg pushes its own around the pool alloc, so order is fine).
|
||||
crate::zerocopy::cuda::make_current().context("CUDA context current (encode thread)")?;
|
||||
unsafe {
|
||||
let mut f = ffi::av_frame_alloc();
|
||||
if f.is_null() {
|
||||
bail!("av_frame_alloc failed");
|
||||
}
|
||||
// Pooled CUDA surface: sets format, width/height, data[0]/linesize[0], buf[0] and
|
||||
// hw_frames_ctx. Reused across frames (the pool recycles), keeping NVENC's
|
||||
// registration cache warm.
|
||||
let r = ffi::av_hwframe_get_buffer(frames_ref, f, 0);
|
||||
if r < 0 {
|
||||
ffi::av_frame_free(&mut f);
|
||||
bail!("av_hwframe_get_buffer(CUDA) failed ({r})");
|
||||
}
|
||||
let dst_ptr = (*f).data[0] as crate::zerocopy::cuda::CUdeviceptr;
|
||||
let dst_pitch = (*f).linesize[0] as usize;
|
||||
if let Err(e) = crate::zerocopy::cuda::copy_device_to_device(buf, dst_ptr, dst_pitch) {
|
||||
ffi::av_frame_free(&mut f);
|
||||
return Err(e).context("copy imported buffer into NVENC surface");
|
||||
}
|
||||
(*f).pts = pts;
|
||||
(*f).pict_type = if idr {
|
||||
ffi::AVPictureType::AV_PICTURE_TYPE_I
|
||||
} else {
|
||||
ffi::AVPictureType::AV_PICTURE_TYPE_NONE
|
||||
};
|
||||
let r = ffi::avcodec_send_frame(self.enc.as_mut_ptr(), f);
|
||||
ffi::av_frame_free(&mut f);
|
||||
if r < 0 {
|
||||
bail!("avcodec_send_frame(CUDA) failed ({r})");
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,143 @@
|
||||
//! The app catalog: what `/applist` advertises and what `/launch?appid=N` selects. Each entry
|
||||
//! maps to a session recipe — which compositor backend hosts it and (for gamescope) which
|
||||
//! command runs nested. Loaded from `~/.config/punktfunk/apps.json`; sensible defaults otherwise.
|
||||
//!
|
||||
//! ```json
|
||||
//! [ {"id":1,"title":"Desktop"},
|
||||
//! {"id":2,"title":"Steam","compositor":"gamescope","cmd":"steam -gamepadui"} ]
|
||||
//! ```
|
||||
|
||||
use serde_json::Value;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct AppEntry {
|
||||
pub id: u32,
|
||||
pub title: String,
|
||||
/// `None` = auto-detect (the desktop session's compositor).
|
||||
pub compositor: Option<crate::vdisplay::Compositor>,
|
||||
/// Command gamescope runs nested (gamescope entries only).
|
||||
pub cmd: Option<String>,
|
||||
}
|
||||
|
||||
fn config_path() -> Option<std::path::PathBuf> {
|
||||
Some(std::path::Path::new(&std::env::var("HOME").ok()?).join(".config/punktfunk/apps.json"))
|
||||
}
|
||||
|
||||
fn parse_compositor(s: &str) -> Option<crate::vdisplay::Compositor> {
|
||||
use crate::vdisplay::Compositor::*;
|
||||
match s.to_ascii_lowercase().as_str() {
|
||||
"kwin" | "kde" => Some(Kwin),
|
||||
"mutter" | "gnome" => Some(Mutter),
|
||||
"gamescope" => Some(Gamescope),
|
||||
"wlroots" | "sway" => Some(Wlroots),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// The catalog: the user's `apps.json` if present, else defaults (Desktop, plus gamescope
|
||||
/// entries when gamescope is installed).
|
||||
pub fn catalog() -> Vec<AppEntry> {
|
||||
if let Some(path) = config_path() {
|
||||
if let Ok(raw) = std::fs::read_to_string(&path) {
|
||||
match serde_json::from_str::<Value>(&raw) {
|
||||
Ok(Value::Array(items)) => {
|
||||
let apps: Vec<AppEntry> = items
|
||||
.iter()
|
||||
.filter_map(|it| {
|
||||
Some(AppEntry {
|
||||
id: it.get("id")?.as_u64()? as u32,
|
||||
title: it.get("title")?.as_str()?.to_string(),
|
||||
compositor: it
|
||||
.get("compositor")
|
||||
.and_then(|c| c.as_str())
|
||||
.and_then(parse_compositor),
|
||||
cmd: it.get("cmd").and_then(|c| c.as_str()).map(String::from),
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
if !apps.is_empty() {
|
||||
return apps;
|
||||
}
|
||||
tracing::warn!(path = %path.display(), "apps.json parsed to zero entries — using defaults");
|
||||
}
|
||||
_ => {
|
||||
tracing::warn!(path = %path.display(), "apps.json malformed — using defaults")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
let mut apps = vec![AppEntry {
|
||||
id: 1,
|
||||
title: "Desktop".into(),
|
||||
compositor: None,
|
||||
cmd: None,
|
||||
}];
|
||||
if which("gamescope") {
|
||||
if which("steam") {
|
||||
apps.push(AppEntry {
|
||||
id: 2,
|
||||
title: "Steam".into(),
|
||||
compositor: Some(crate::vdisplay::Compositor::Gamescope),
|
||||
cmd: Some("steam -gamepadui".into()),
|
||||
});
|
||||
}
|
||||
if which("vkcube") {
|
||||
apps.push(AppEntry {
|
||||
id: 3,
|
||||
title: "vkcube (test)".into(),
|
||||
compositor: Some(crate::vdisplay::Compositor::Gamescope),
|
||||
cmd: Some("vkcube".into()),
|
||||
});
|
||||
}
|
||||
}
|
||||
apps
|
||||
}
|
||||
|
||||
pub fn by_id(id: u32) -> Option<AppEntry> {
|
||||
catalog().into_iter().find(|a| a.id == id)
|
||||
}
|
||||
|
||||
/// Render the GameStream `/applist` XML.
|
||||
pub fn applist_xml() -> String {
|
||||
let mut xml =
|
||||
String::from("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<root status_code=\"200\">\n");
|
||||
for app in catalog() {
|
||||
xml.push_str(&format!(
|
||||
"<App>\n<IsHdrSupported>0</IsHdrSupported>\n<AppTitle>{}</AppTitle>\n<ID>{}</ID>\n</App>\n",
|
||||
xml_escape(&app.title),
|
||||
app.id
|
||||
));
|
||||
}
|
||||
xml.push_str("</root>\n");
|
||||
xml
|
||||
}
|
||||
|
||||
fn xml_escape(s: &str) -> String {
|
||||
s.replace('&', "&")
|
||||
.replace('<', "<")
|
||||
.replace('>', ">")
|
||||
}
|
||||
|
||||
fn which(bin: &str) -> bool {
|
||||
std::env::var_os("PATH")
|
||||
.is_some_and(|paths| std::env::split_paths(&paths).any(|d| d.join(bin).is_file()))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn default_catalog_has_desktop() {
|
||||
let apps = catalog();
|
||||
assert!(apps.iter().any(|a| a.id == 1 && a.title == "Desktop"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn applist_xml_is_wellformed_ish() {
|
||||
let xml = applist_xml();
|
||||
assert!(xml.contains("<AppTitle>Desktop</AppTitle>"));
|
||||
assert!(xml.starts_with("<?xml"));
|
||||
assert_eq!(xml.matches("<App>").count(), xml.matches("</App>").count());
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,188 @@
|
||||
//! The audio data plane (UDP 48000). On RTSP PLAY we learn the client's audio endpoint from
|
||||
//! its port-learning ping, capture the default-sink monitor, Opus-encode 5 ms stereo frames,
|
||||
//! and send each as a GameStream RTP audio packet.
|
||||
//!
|
||||
//! Wire format (moonlight-common-c `AudioStream.c`): a 12-byte big-endian `RTP_PACKET`
|
||||
//! (`packetType = 97`, `sequenceNumber++`, `timestamp += packetDuration`, `ssrc = 0`)
|
||||
//! followed by the AES-128-CBC-encrypted Opus payload. Stereo Opus is a single coupled
|
||||
//! multistream, so a plain `opus_encode` bitstream is what the client's multistream decoder
|
||||
//! expects. Like the control stream, modern Moonlight always AES-CBC-decrypts audio (it
|
||||
//! reports "Failed to decrypt audio packet" on plaintext), so we encrypt the payload under the
|
||||
//! `/launch` `rikey` with a per-packet IV `BE32(rikeyid + seq)` (PKCS7 padding, RTP header
|
||||
//! left in the clear). Reed-Solomon audio FEC is layered on top in P1.5.
|
||||
|
||||
use super::AUDIO_PORT;
|
||||
use crate::audio::{self, AudioCapturer, CHANNELS, SAMPLE_RATE};
|
||||
use anyhow::{Context, Result};
|
||||
use cbc::cipher::{block_padding::Pkcs7, BlockEncryptMut, KeyIvInit};
|
||||
use opus::{Application, Bitrate, Channels, Encoder};
|
||||
use std::net::UdpSocket;
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::sync::Arc;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
type Aes128CbcEnc = cbc::Encryptor<aes::Aes128>;
|
||||
|
||||
/// Opus frame duration; 5 ms is moonlight's default (`x-nv-aqos.packetDuration`).
|
||||
const FRAME_MS: usize = 5;
|
||||
/// Samples per channel per Opus frame (48 kHz · 5 ms = 240).
|
||||
const SAMPLES_PER_FRAME: usize = SAMPLE_RATE as usize * FRAME_MS / 1000;
|
||||
/// RTP payload type for audio (moonlight `AudioStream.c` checks `packetType == 97`).
|
||||
const AUDIO_PACKET_TYPE: u8 = 97;
|
||||
const OPUS_BITRATE: i32 = 128_000;
|
||||
|
||||
/// Slot for the persistent audio capturer, reused across streams (no leaked PipeWire thread).
|
||||
pub type AudioCapSlot = Arc<std::sync::Mutex<Option<Box<dyn AudioCapturer>>>>;
|
||||
|
||||
/// Spawn the audio stream thread (idempotent via `running`). Stops when `running` clears.
|
||||
/// `gcm_key`/`rikeyid` come from `/launch` and key the AES-CBC payload encryption.
|
||||
pub fn start(running: Arc<AtomicBool>, gcm_key: [u8; 16], rikeyid: i32, audio_cap: AudioCapSlot) {
|
||||
let _ = std::thread::Builder::new()
|
||||
.name("punktfunk-audio".into())
|
||||
.spawn(move || {
|
||||
tracing::info!("audio stream starting");
|
||||
if let Err(e) = run(&running, &gcm_key, rikeyid, &audio_cap) {
|
||||
tracing::error!(error = %format!("{e:#}"), "audio stream failed");
|
||||
}
|
||||
running.store(false, Ordering::SeqCst);
|
||||
tracing::info!("audio stream stopped");
|
||||
});
|
||||
}
|
||||
|
||||
fn run(
|
||||
running: &AtomicBool,
|
||||
gcm_key: &[u8; 16],
|
||||
rikeyid: i32,
|
||||
audio_cap: &std::sync::Mutex<Option<Box<dyn AudioCapturer>>>,
|
||||
) -> Result<()> {
|
||||
let sock = UdpSocket::bind(("0.0.0.0", AUDIO_PORT)).context("bind audio UDP")?;
|
||||
// The client pings the audio port (~every 500ms) so we learn where to send.
|
||||
sock.set_read_timeout(Some(Duration::from_secs(10)))?;
|
||||
tracing::info!(port = AUDIO_PORT, "audio: awaiting client ping");
|
||||
let mut probe = [0u8; 256];
|
||||
let (_, client) = sock
|
||||
.recv_from(&mut probe)
|
||||
.context("audio: no client ping within 10s")?;
|
||||
sock.connect(client)
|
||||
.context("connect client audio endpoint")?;
|
||||
tracing::info!(%client, "audio: client endpoint learned");
|
||||
|
||||
// Reuse the persistent capturer (create on first stream); drain stale buffered audio.
|
||||
let mut cap = match audio_cap.lock().unwrap().take() {
|
||||
Some(mut c) => {
|
||||
c.drain();
|
||||
c
|
||||
}
|
||||
None => audio::open_audio_capture().context("open audio capture")?,
|
||||
};
|
||||
let result = audio_body(&mut *cap, &sock, gcm_key, rikeyid, running);
|
||||
*audio_cap.lock().unwrap() = Some(cap);
|
||||
result
|
||||
}
|
||||
|
||||
fn audio_body(
|
||||
cap: &mut dyn AudioCapturer,
|
||||
sock: &UdpSocket,
|
||||
gcm_key: &[u8; 16],
|
||||
rikeyid: i32,
|
||||
running: &AtomicBool,
|
||||
) -> Result<()> {
|
||||
// RESTRICTED_LOWDELAY + CBR, matching Sunshine — CBR keeps the Opus TOC byte constant,
|
||||
// which the client asserts per stream.
|
||||
let mut enc = Encoder::new(SAMPLE_RATE, Channels::Stereo, Application::LowDelay)
|
||||
.context("create Opus encoder")?;
|
||||
enc.set_bitrate(Bitrate::Bits(OPUS_BITRATE)).ok();
|
||||
enc.set_vbr(false).ok();
|
||||
|
||||
let frame_len = SAMPLES_PER_FRAME * CHANNELS; // interleaved samples per Opus frame
|
||||
let mut acc: Vec<f32> = Vec::with_capacity(frame_len * 4);
|
||||
let mut out = vec![0u8; 1400];
|
||||
let mut seq: u16 = 0;
|
||||
let mut timestamp: u32 = 0;
|
||||
let mut sent: u64 = 0;
|
||||
// Pacing anchor: PipeWire hands us large capture buffers (~1024 frames), so we'd otherwise
|
||||
// emit packets in bursts the client's low-latency jitter buffer hears as glitching. Emit
|
||||
// each frame at its 5 ms slot instead. Production is real-time, so the backlog stays small.
|
||||
let start = Instant::now();
|
||||
let mut frame_no: u64 = 0;
|
||||
// Optional linear gain for quiet capture sources (PUNKTFUNK_AUDIO_GAIN, default 1.0).
|
||||
let gain: f32 = std::env::var("PUNKTFUNK_AUDIO_GAIN")
|
||||
.ok()
|
||||
.and_then(|v| v.parse().ok())
|
||||
.unwrap_or(1.0);
|
||||
|
||||
while running.load(Ordering::SeqCst) {
|
||||
let chunk = cap.next_chunk().context("capture audio chunk")?;
|
||||
acc.extend_from_slice(&chunk);
|
||||
while acc.len() >= frame_len {
|
||||
let mut frame: Vec<f32> = acc.drain(..frame_len).collect();
|
||||
if gain != 1.0 {
|
||||
for s in &mut frame {
|
||||
*s = (*s * gain).clamp(-1.0, 1.0);
|
||||
}
|
||||
}
|
||||
let n = enc.encode_float(&frame, &mut out).context("opus encode")?;
|
||||
// AES-128-CBC the Opus payload (RTP header stays plaintext). Per-packet IV =
|
||||
// BE32(rikeyid + seq) in [0..4], zero elsewhere; PKCS7 padding.
|
||||
let iv_seq = (rikeyid as u32).wrapping_add(seq as u32);
|
||||
let mut iv = [0u8; 16];
|
||||
iv[0..4].copy_from_slice(&iv_seq.to_be_bytes());
|
||||
let ct = Aes128CbcEnc::new(gcm_key.into(), (&iv).into())
|
||||
.encrypt_padded_vec_mut::<Pkcs7>(&out[..n]);
|
||||
let pkt = build_rtp(seq, timestamp, &ct);
|
||||
if sock.send(&pkt).is_err() {
|
||||
tracing::info!(sent, "audio: client unreachable — stopping");
|
||||
return Ok(());
|
||||
}
|
||||
seq = seq.wrapping_add(1);
|
||||
// GameStream's audio RTP timestamp ticks by packetDuration (ms), not by samples.
|
||||
timestamp = timestamp.wrapping_add(FRAME_MS as u32);
|
||||
sent += 1;
|
||||
if sent % 400 == 0 {
|
||||
tracing::info!(sent, "audio: streaming");
|
||||
}
|
||||
|
||||
// Hold each frame to its 5 ms slot (skip if we've fallen behind a burst).
|
||||
frame_no += 1;
|
||||
let scheduled = start + Duration::from_millis(5 * frame_no);
|
||||
let now = Instant::now();
|
||||
if scheduled > now {
|
||||
std::thread::sleep((scheduled - now).min(Duration::from_millis(20)));
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Build a GameStream RTP audio packet: 12-byte BE `RTP_PACKET` header + Opus payload.
|
||||
fn build_rtp(seq: u16, timestamp: u32, opus: &[u8]) -> Vec<u8> {
|
||||
let mut p = Vec::with_capacity(12 + opus.len());
|
||||
p.push(0x80); // RTP version 2, no padding/extension/CSRC
|
||||
p.push(AUDIO_PACKET_TYPE);
|
||||
p.extend_from_slice(&seq.to_be_bytes());
|
||||
p.extend_from_slice(×tamp.to_be_bytes());
|
||||
p.extend_from_slice(&0u32.to_be_bytes()); // ssrc
|
||||
p.extend_from_slice(opus);
|
||||
p
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn rtp_header_layout() {
|
||||
let p = build_rtp(0x0102, 0x03040506, &[0xaa, 0xbb]);
|
||||
assert_eq!(p[0], 0x80);
|
||||
assert_eq!(p[1], 97);
|
||||
assert_eq!(&p[2..4], &[0x01, 0x02]); // seq BE
|
||||
assert_eq!(&p[4..8], &[0x03, 0x04, 0x05, 0x06]); // timestamp BE
|
||||
assert_eq!(&p[8..12], &[0, 0, 0, 0]); // ssrc
|
||||
assert_eq!(&p[12..], &[0xaa, 0xbb]); // opus payload
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn frame_sizing() {
|
||||
assert_eq!(SAMPLES_PER_FRAME, 240);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,86 @@
|
||||
//! The host's self-signed RSA-2048 identity: the cert returned to clients as `plaincert`
|
||||
//! during pairing AND presented as the TLS server cert on 47984 (Moonlight pins it). The
|
||||
//! cert's own X.509 signature bytes are an input to the pairing hashes, so we extract them.
|
||||
|
||||
use super::config_dir;
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use rsa::pkcs1v15::SigningKey;
|
||||
use rsa::pkcs8::DecodePrivateKey;
|
||||
use rsa::RsaPrivateKey;
|
||||
use sha2::Sha256;
|
||||
use std::fs;
|
||||
|
||||
pub struct ServerIdentity {
|
||||
/// PEM of the cert (returned hex-encoded as `plaincert`; also the TLS server cert).
|
||||
pub cert_pem: String,
|
||||
/// PKCS#8 PEM of the private key (TLS server key).
|
||||
pub key_pem: String,
|
||||
/// The cert's X.509 `signatureValue` bytes — bound into the pairing challenge hashes.
|
||||
pub signature: Vec<u8>,
|
||||
/// RSA-PKCS1v15-SHA256 signer over the host key (the pairing `sign256`).
|
||||
pub signing_key: SigningKey<Sha256>,
|
||||
}
|
||||
|
||||
impl ServerIdentity {
|
||||
pub fn load_or_create() -> Result<ServerIdentity> {
|
||||
let dir = config_dir();
|
||||
let cert_path = dir.join("cert.pem");
|
||||
let key_path = dir.join("key.pem");
|
||||
let (cert_pem, key_pem) = match (
|
||||
fs::read_to_string(&cert_path),
|
||||
fs::read_to_string(&key_path),
|
||||
) {
|
||||
(Ok(c), Ok(k)) if !c.trim().is_empty() && !k.trim().is_empty() => (c, k),
|
||||
_ => {
|
||||
let (c, k) = generate()?;
|
||||
fs::create_dir_all(&dir).ok();
|
||||
fs::write(&cert_path, &c)
|
||||
.with_context(|| format!("write {}", cert_path.display()))?;
|
||||
fs::write(&key_path, &k)
|
||||
.with_context(|| format!("write {}", key_path.display()))?;
|
||||
tracing::info!(path = %cert_path.display(), "generated punktfunk host certificate (RSA-2048)");
|
||||
(c, k)
|
||||
}
|
||||
};
|
||||
Self::from_pems(cert_pem, key_pem)
|
||||
}
|
||||
|
||||
/// Build an identity from PEMs (no I/O).
|
||||
pub fn from_pems(cert_pem: String, key_pem: String) -> Result<ServerIdentity> {
|
||||
let priv_key = RsaPrivateKey::from_pkcs8_pem(&key_pem).context("parse host private key")?;
|
||||
let signing_key = SigningKey::<Sha256>::new(priv_key);
|
||||
let signature = cert_signature(&cert_pem)?;
|
||||
Ok(ServerIdentity {
|
||||
cert_pem,
|
||||
key_pem,
|
||||
signature,
|
||||
signing_key,
|
||||
})
|
||||
}
|
||||
|
||||
/// Throwaway in-memory identity — nothing touches the config dir (used by tests).
|
||||
pub fn ephemeral() -> Result<ServerIdentity> {
|
||||
let (cert_pem, key_pem) = generate()?;
|
||||
Self::from_pems(cert_pem, key_pem)
|
||||
}
|
||||
}
|
||||
|
||||
fn generate() -> Result<(String, String)> {
|
||||
let key = rcgen::KeyPair::generate_for(&rcgen::PKCS_RSA_SHA256).context("rcgen RSA keygen")?;
|
||||
let mut params = rcgen::CertificateParams::new(Vec::<String>::new()).context("cert params")?;
|
||||
params
|
||||
.distinguished_name
|
||||
.push(rcgen::DnType::CommonName, "punktfunk");
|
||||
params.not_before = rcgen::date_time_ymd(2020, 1, 1);
|
||||
params.not_after = rcgen::date_time_ymd(2040, 1, 1);
|
||||
let cert = params.self_signed(&key).context("self-sign cert")?;
|
||||
Ok((cert.pem(), key.serialize_pem()))
|
||||
}
|
||||
|
||||
/// Extract the X.509 `signatureValue` bytes from a cert PEM.
|
||||
fn cert_signature(cert_pem: &str) -> Result<Vec<u8>> {
|
||||
let (_, pem) = x509_parser::pem::parse_x509_pem(cert_pem.as_bytes())
|
||||
.map_err(|e| anyhow!("parse cert pem: {e}"))?;
|
||||
let x509 = pem.parse_x509().context("parse x509")?;
|
||||
Ok(x509.signature_value.data.to_vec())
|
||||
}
|
||||
@@ -0,0 +1,428 @@
|
||||
//! The GameStream control stream: an ENet host on UDP 47999. Moonlight connects this
|
||||
//! BEFORE the video stream starts (`STAGE_CONTROL_STREAM_START` precedes
|
||||
//! `STAGE_VIDEO_STREAM_START`), so it must be up or the whole connection aborts. It carries
|
||||
//! input (mouse/keyboard/gamepad), keepalives, and QoS feedback.
|
||||
//!
|
||||
//! Sunshine-mode hosts (we advertise `state=SUNSHINE_SERVER_FREE`) make Moonlight encrypt the
|
||||
//! control stream with AES-128-GCM under the `/launch` `rikey`, even though we negotiate no
|
||||
//! media encryption. Wire framing (all little-endian):
|
||||
//!
|
||||
//! ```text
|
||||
//! u16 encType = 0x0001 | u16 length | u32 seq | [16-byte GCM tag] | ciphertext
|
||||
//! length = sizeof(seq) + 16 (tag) + plaintext
|
||||
//! ```
|
||||
//!
|
||||
//! The GCM nonce depends on what Moonlight negotiated (`encryptControlMessage` in
|
||||
//! moonlight-common-c). For `SS_ENC_CONTROL_V2` it is a 12-byte nonce with `seq` (LE) in bytes
|
||||
//! [0..4] and `b"CC"` (client→host) at [10..12]. For the legacy path — which we hit, since we
|
||||
//! advertise no encryption — it is a 16-byte nonce with only `iv[0] = seq & 0xff` and the rest
|
||||
//! zero. The tag is prepended to the ciphertext; there is no AAD; the key is the forward
|
||||
//! `hex::decode(rikey)`. We auto-detect the exact scheme via [`decrypt_control`] on the first
|
||||
//! packet that authenticates, since GCM gives no partial credit.
|
||||
//!
|
||||
//! Runs on its own native thread for the host's lifetime.
|
||||
|
||||
use super::{AppState, CONTROL_PORT};
|
||||
use crate::inject::gamepad::GamepadManager;
|
||||
use crate::inject::InputInjector;
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use rusty_enet::{Event, Host, HostSettings, Packet, PeerID};
|
||||
use std::net::UdpSocket;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
/// Bind the ENet control host on 47999 and service it forever on a dedicated thread.
|
||||
pub fn spawn(state: Arc<AppState>) -> Result<()> {
|
||||
let socket = UdpSocket::bind(("0.0.0.0", CONTROL_PORT)).context("bind control UDP")?;
|
||||
socket
|
||||
.set_nonblocking(true)
|
||||
.context("control socket nonblocking")?;
|
||||
let mut host = Host::new(
|
||||
socket,
|
||||
HostSettings {
|
||||
peer_limit: 4,
|
||||
// Moonlight connects with CTRL_CHANNEL_COUNT (0x30) channels and sends gamepad
|
||||
// input on channel 0x10+n — a smaller limit silently discards controller input.
|
||||
channel_limit: 0x30,
|
||||
..Default::default()
|
||||
},
|
||||
)
|
||||
.map_err(|e| anyhow!("ENet host init: {e:?}"))?;
|
||||
tracing::info!(port = CONTROL_PORT, "ENet control listening");
|
||||
|
||||
std::thread::Builder::new()
|
||||
.name("punktfunk-control".into())
|
||||
.spawn(move || {
|
||||
// Thread-local (the injector owns non-Send Wayland/xkb state, so it must be
|
||||
// created and live here rather than be captured into the closure).
|
||||
// GCM scheme detected from the first authenticating packet; reused thereafter.
|
||||
let mut detected: Option<Scheme> = None;
|
||||
// Lazily opened on the first input event (Sway's Wayland socket is up by then).
|
||||
let mut injector: Option<Box<dyn InputInjector>> = None;
|
||||
// Virtual gamepads (uinput) + the host→client rumble sequence counter.
|
||||
let mut pads = GamepadManager::new();
|
||||
let mut rumble_seq: u32 = 0;
|
||||
let mut peer: Option<PeerID> = None;
|
||||
loop {
|
||||
loop {
|
||||
match host.service() {
|
||||
Ok(Some(event)) => match event {
|
||||
Event::Connect { peer: p, .. } => {
|
||||
tracing::info!("control: client connected");
|
||||
peer = Some(p.id());
|
||||
}
|
||||
Event::Disconnect { .. } => {
|
||||
tracing::info!("control: client disconnected");
|
||||
detected = None;
|
||||
peer = None;
|
||||
// Unplug the session's virtual pads.
|
||||
pads = GamepadManager::new();
|
||||
}
|
||||
Event::Receive {
|
||||
channel_id, packet, ..
|
||||
} => {
|
||||
on_receive(
|
||||
&state,
|
||||
channel_id,
|
||||
packet.data(),
|
||||
&mut detected,
|
||||
&mut injector,
|
||||
&mut pads,
|
||||
);
|
||||
}
|
||||
},
|
||||
Ok(None) => break,
|
||||
Err(e) => {
|
||||
tracing::warn!(error = %format!("{e:?}"), "control: service error");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Service the pads' force-feedback protocol every tick (games block inside
|
||||
// EVIOCSFF until answered) and relay mixed rumble levels to the client.
|
||||
if let (Some(pid), Some(scheme)) = (peer, detected) {
|
||||
let key = state.launch.lock().unwrap().map(|s| s.gcm_key);
|
||||
if let Some(key) = key {
|
||||
let mut out: Vec<Vec<u8>> = Vec::new();
|
||||
pads.pump_rumble(|index, low, high| {
|
||||
let pt = super::gamepad::rumble_plaintext(index, low, high);
|
||||
out.push(encrypt_control(&key, &scheme, rumble_seq, &pt));
|
||||
rumble_seq = rumble_seq.wrapping_add(1);
|
||||
});
|
||||
for wire in out {
|
||||
if let Err(e) = host.peer_mut(pid).send(0, &Packet::reliable(&wire[..]))
|
||||
{
|
||||
tracing::warn!(error = %format!("{e:?}"), "rumble send failed");
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// No client/scheme yet: still answer FF uploads so games don't block.
|
||||
pads.pump_rumble(|_, _, _| {});
|
||||
}
|
||||
// ENet needs frequent servicing for handshake/keepalive/retransmit.
|
||||
std::thread::sleep(Duration::from_millis(2));
|
||||
}
|
||||
})
|
||||
.context("spawn control thread")?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Handle one received control packet: decrypt it (learning the GCM scheme on the first one),
|
||||
/// decode any input event, and inject it into the host session.
|
||||
fn on_receive(
|
||||
state: &AppState,
|
||||
_channel_id: u8,
|
||||
d: &[u8],
|
||||
detected: &mut Option<Scheme>,
|
||||
injector: &mut Option<Box<dyn InputInjector>>,
|
||||
pads: &mut GamepadManager,
|
||||
) {
|
||||
let Some(key) = state.launch.lock().unwrap().map(|s| s.gcm_key) else {
|
||||
return; // control traffic before /launch — no key yet
|
||||
};
|
||||
// Encrypted control packets begin with u16 LE encType = 0x0001 and an 8-byte header.
|
||||
if d.len() < 8 || d[0] != 0x01 || d[1] != 0x00 {
|
||||
return;
|
||||
}
|
||||
|
||||
let pt = match decrypt_control(&key, d, detected) {
|
||||
Some((scheme, pt)) => {
|
||||
if detected.is_none() {
|
||||
tracing::info!(?scheme, "control: GCM scheme locked in");
|
||||
}
|
||||
*detected = Some(scheme);
|
||||
pt
|
||||
}
|
||||
None => {
|
||||
tracing::warn!(len = d.len(), "control: GCM decrypt failed");
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
// Recovery requests after loss: invalidate-reference-frames (0x0301, Gen7) or request-IDR
|
||||
// (0x0302, Gen7Enc). Force a keyframe so the client can resync without a multi-second stall.
|
||||
if pt.len() >= 2 {
|
||||
let inner = u16::from_le_bytes([pt[0], pt[1]]);
|
||||
if matches!(inner, 0x0301 | 0x0302 | 0x0305) {
|
||||
state
|
||||
.force_idr
|
||||
.store(true, std::sync::atomic::Ordering::SeqCst);
|
||||
tracing::info!(
|
||||
ty = format!("{inner:#06x}"),
|
||||
"control: IDR/RFI request → keyframe"
|
||||
);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Controller events go to the uinput virtual pads (created on demand per the mask).
|
||||
if let Some(gp) = super::gamepad::decode(&pt) {
|
||||
pads.handle(&gp);
|
||||
return;
|
||||
}
|
||||
|
||||
let events = super::input::decode(&pt);
|
||||
if events.is_empty() {
|
||||
return; // keepalive / QoS / unhandled input kind
|
||||
}
|
||||
|
||||
// Open the injector on demand — by the first input event the compositor session is up.
|
||||
// Backend auto-selects per desktop (wlr on Sway, libei on KWin/GNOME); override with
|
||||
// PUNKTFUNK_INPUT_BACKEND.
|
||||
if injector.is_none() {
|
||||
let backend = crate::inject::default_backend();
|
||||
match crate::inject::open(backend) {
|
||||
Ok(i) => {
|
||||
tracing::info!(?backend, "input injection backend opened");
|
||||
*injector = Some(i);
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::error!(error = %format!("{e:#}"), "input injection unavailable");
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
let inj = injector.as_mut().unwrap();
|
||||
for ev in events {
|
||||
if let Err(e) = inj.inject(&ev) {
|
||||
tracing::warn!(error = %format!("{e:#}"), "inject failed");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// How a control packet's nonce is built — Moonlight picks one based on the negotiated flags.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
enum NonceKind {
|
||||
/// `SS_ENC_CONTROL_V2`: 12-byte nonce, `seq` in [0..4], marker bytes at [10..12].
|
||||
V2 { seq_be: bool, marker: [u8; 2] },
|
||||
/// Legacy: 16-byte nonce, only `iv[0] = seq & 0xff` (the rest zero).
|
||||
LegacyLowByte,
|
||||
/// Legacy variant: 16-byte nonce, full `seq` in [0..4] (the rest zero).
|
||||
Legacy16Seq { seq_be: bool },
|
||||
}
|
||||
|
||||
impl NonceKind {
|
||||
fn nonce(&self, seq: u32) -> Vec<u8> {
|
||||
let seq_bytes = |be: bool| {
|
||||
if be {
|
||||
seq.to_be_bytes()
|
||||
} else {
|
||||
seq.to_le_bytes()
|
||||
}
|
||||
};
|
||||
match *self {
|
||||
NonceKind::V2 { seq_be, marker } => {
|
||||
let mut iv = vec![0u8; 12];
|
||||
iv[0..4].copy_from_slice(&seq_bytes(seq_be));
|
||||
iv[10] = marker[0];
|
||||
iv[11] = marker[1];
|
||||
iv
|
||||
}
|
||||
NonceKind::LegacyLowByte => {
|
||||
let mut iv = vec![0u8; 16];
|
||||
iv[0] = (seq & 0xff) as u8;
|
||||
iv
|
||||
}
|
||||
NonceKind::Legacy16Seq { seq_be } => {
|
||||
let mut iv = vec![0u8; 16];
|
||||
iv[0..4].copy_from_slice(&seq_bytes(seq_be));
|
||||
iv
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The byte-exact GCM scheme that opened a control packet. Determined empirically once per
|
||||
/// connection (AES-GCM gives no partial credit, so an authenticating combination is proof).
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
struct Scheme {
|
||||
/// `gcm_key` is byte-reversed before use (defensive; Sunshine's net effect is forward).
|
||||
key_rev: bool,
|
||||
nonce: NonceKind,
|
||||
/// GCM tag sits before the ciphertext (vs after).
|
||||
tag_first: bool,
|
||||
aad: Aad,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
enum Aad {
|
||||
None,
|
||||
/// The 4-byte cleartext header prefix (encType + length), `d[0..4]`.
|
||||
Header4,
|
||||
}
|
||||
|
||||
impl Scheme {
|
||||
fn key(&self, base: &[u8; 16]) -> [u8; 16] {
|
||||
let mut k = *base;
|
||||
if self.key_rev {
|
||||
k.reverse();
|
||||
}
|
||||
k
|
||||
}
|
||||
}
|
||||
|
||||
/// Open an encrypted control packet `d` (8-byte cleartext header + `[tag?][ciphertext]`). If
|
||||
/// `detected` is set only that scheme is tried (fast path); otherwise the full cross-product
|
||||
/// of plausible schemes (nonce construction × key byte-order × tag position × AAD) is swept
|
||||
/// and the combination whose GCM tag authenticates is returned.
|
||||
fn decrypt_control(
|
||||
key: &[u8; 16],
|
||||
d: &[u8],
|
||||
detected: &Option<Scheme>,
|
||||
) -> Option<(Scheme, Vec<u8>)> {
|
||||
let seq = u32::from_le_bytes([d[4], d[5], d[6], d[7]]);
|
||||
let payload = &d[8..];
|
||||
if payload.len() < 16 {
|
||||
return None;
|
||||
}
|
||||
|
||||
let attempt = |s: Scheme| -> Option<Vec<u8>> {
|
||||
// aes-gcm wants `ciphertext || tag`; reassemble from whichever wire order this is.
|
||||
let (ct, tag) = if s.tag_first {
|
||||
(&payload[16..], &payload[..16])
|
||||
} else {
|
||||
(
|
||||
&payload[..payload.len() - 16],
|
||||
&payload[payload.len() - 16..],
|
||||
)
|
||||
};
|
||||
let mut ct_tag = Vec::with_capacity(ct.len() + 16);
|
||||
ct_tag.extend_from_slice(ct);
|
||||
ct_tag.extend_from_slice(tag);
|
||||
let aad: &[u8] = match s.aad {
|
||||
Aad::None => &[],
|
||||
Aad::Header4 => &d[0..4],
|
||||
};
|
||||
gcm_open(&s.key(key), &s.nonce.nonce(seq), &ct_tag, aad)
|
||||
};
|
||||
|
||||
if let Some(s) = *detected {
|
||||
return attempt(s).map(|pt| (s, pt));
|
||||
}
|
||||
|
||||
// Candidate nonce constructions, most-likely first.
|
||||
const MARKERS: [[u8; 2]; 3] = [*b"CC", *b"HC", *b"CH"];
|
||||
let mut kinds: Vec<NonceKind> = vec![NonceKind::LegacyLowByte];
|
||||
for seq_be in [false, true] {
|
||||
for marker in MARKERS {
|
||||
kinds.push(NonceKind::V2 { seq_be, marker });
|
||||
}
|
||||
kinds.push(NonceKind::Legacy16Seq { seq_be });
|
||||
}
|
||||
|
||||
for &nonce in &kinds {
|
||||
for key_rev in [false, true] {
|
||||
for tag_first in [true, false] {
|
||||
for aad in [Aad::None, Aad::Header4] {
|
||||
let s = Scheme {
|
||||
key_rev,
|
||||
nonce,
|
||||
tag_first,
|
||||
aad,
|
||||
};
|
||||
if let Some(pt) = attempt(s) {
|
||||
return Some((s, pt));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Seal a host→client control message, mirroring the client's `detected` scheme with the
|
||||
/// direction flipped: V2 nonces use marker `H?` (host-originated) instead of `C?`; legacy
|
||||
/// nonces keep their construction with our own independent `seq` counter. Wire layout matches
|
||||
/// what the client sends us: `[0x0001][length][seq][tag|ct per scheme.tag_first]`.
|
||||
fn encrypt_control(key: &[u8; 16], scheme: &Scheme, seq: u32, pt: &[u8]) -> Vec<u8> {
|
||||
let nonce_kind = match scheme.nonce {
|
||||
NonceKind::V2 { seq_be, marker } => NonceKind::V2 {
|
||||
seq_be,
|
||||
marker: [b'H', marker[1]],
|
||||
},
|
||||
other => other,
|
||||
};
|
||||
let length = (4 + 16 + pt.len()) as u16;
|
||||
let mut wire = Vec::with_capacity(8 + 16 + pt.len());
|
||||
wire.extend_from_slice(&0x0001u16.to_le_bytes());
|
||||
wire.extend_from_slice(&length.to_le_bytes());
|
||||
wire.extend_from_slice(&seq.to_le_bytes());
|
||||
let aad: Vec<u8> = match scheme.aad {
|
||||
Aad::None => Vec::new(),
|
||||
Aad::Header4 => wire[0..4].to_vec(),
|
||||
};
|
||||
let ct_tag = gcm_seal(&scheme.key(key), &nonce_kind.nonce(seq), pt, &aad);
|
||||
let (ct, tag) = ct_tag.split_at(ct_tag.len() - 16);
|
||||
if scheme.tag_first {
|
||||
wire.extend_from_slice(tag);
|
||||
wire.extend_from_slice(ct);
|
||||
} else {
|
||||
wire.extend_from_slice(ct);
|
||||
wire.extend_from_slice(tag);
|
||||
}
|
||||
wire
|
||||
}
|
||||
|
||||
/// AES-128-GCM seal (companion to [`gcm_open`]); returns `ciphertext || tag`.
|
||||
fn gcm_seal(key: &[u8; 16], nonce: &[u8], pt: &[u8], aad: &[u8]) -> Vec<u8> {
|
||||
use aes_gcm::aead::consts::{U12, U16};
|
||||
use aes_gcm::aead::generic_array::GenericArray;
|
||||
use aes_gcm::aead::{Aead, KeyInit, Payload};
|
||||
use aes_gcm::{aes::Aes128, AesGcm};
|
||||
|
||||
let p = Payload { msg: pt, aad };
|
||||
match nonce.len() {
|
||||
12 => AesGcm::<Aes128, U12>::new_from_slice(key)
|
||||
.unwrap()
|
||||
.encrypt(GenericArray::from_slice(nonce), p)
|
||||
.expect("GCM seal"),
|
||||
16 => AesGcm::<Aes128, U16>::new_from_slice(key)
|
||||
.unwrap()
|
||||
.encrypt(GenericArray::from_slice(nonce), p)
|
||||
.expect("GCM seal"),
|
||||
_ => unreachable!("nonce length"),
|
||||
}
|
||||
}
|
||||
|
||||
/// AES-128-GCM open with a 12- or 16-byte nonce and explicit AAD. Returns the plaintext iff
|
||||
/// the tag authenticates. `ct_tag` is `ciphertext || tag` (aes-gcm's expected order).
|
||||
fn gcm_open(key: &[u8; 16], nonce: &[u8], ct_tag: &[u8], aad: &[u8]) -> Option<Vec<u8>> {
|
||||
use aes_gcm::aead::consts::{U12, U16};
|
||||
use aes_gcm::aead::generic_array::GenericArray;
|
||||
use aes_gcm::aead::{Aead, KeyInit, Payload};
|
||||
use aes_gcm::{aes::Aes128, AesGcm};
|
||||
|
||||
let p = Payload { msg: ct_tag, aad };
|
||||
match nonce.len() {
|
||||
12 => AesGcm::<Aes128, U12>::new_from_slice(key)
|
||||
.ok()?
|
||||
.decrypt(GenericArray::from_slice(nonce), p)
|
||||
.ok(),
|
||||
16 => AesGcm::<Aes128, U16>::new_from_slice(key)
|
||||
.ok()?
|
||||
.decrypt(GenericArray::from_slice(nonce), p)
|
||||
.ok(),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,60 @@
|
||||
//! Pairing crypto primitives (control plane only — distinct from `punktfunk_core`'s AES-GCM
|
||||
//! data-plane sealing). GameStream pairing uses: AES-128-**ECB** with **no padding**,
|
||||
//! SHA-256 (host appversion major ≥ 7), and RSA-PKCS1v15-SHA256 signatures. See the
|
||||
//! `serverinfo + pairing` section of `docs/research/gamestream-protocol-research.json`.
|
||||
|
||||
use aes::cipher::generic_array::GenericArray;
|
||||
use aes::cipher::{BlockDecrypt, BlockEncrypt, KeyInit};
|
||||
use aes::Aes128;
|
||||
use rand::RngCore;
|
||||
use sha2::{Digest, Sha256};
|
||||
|
||||
/// `n` cryptographically-random bytes.
|
||||
pub fn random<const N: usize>() -> [u8; N] {
|
||||
let mut b = [0u8; N];
|
||||
rand::thread_rng().fill_bytes(&mut b);
|
||||
b
|
||||
}
|
||||
|
||||
/// SHA-256 over the concatenation of `parts`.
|
||||
pub fn sha256(parts: &[&[u8]]) -> [u8; 32] {
|
||||
let mut h = Sha256::new();
|
||||
for p in parts {
|
||||
h.update(p);
|
||||
}
|
||||
h.finalize().into()
|
||||
}
|
||||
|
||||
/// The PIN-derived AES-128 key: `SHA-256(salt || pin)[..16]` (salt first, PIN as ASCII).
|
||||
pub fn pin_key(salt: &[u8; 16], pin: &str) -> [u8; 16] {
|
||||
let d = sha256(&[salt, pin.as_bytes()]);
|
||||
let mut k = [0u8; 16];
|
||||
k.copy_from_slice(&d[..16]);
|
||||
k
|
||||
}
|
||||
|
||||
/// AES-128-ECB encrypt, no padding: input is zero-extended to a 16-byte multiple.
|
||||
pub fn ecb_encrypt(key: &[u8; 16], data: &[u8]) -> Vec<u8> {
|
||||
let cipher = Aes128::new(GenericArray::from_slice(key));
|
||||
let mut out = data.to_vec();
|
||||
let rem = out.len() % 16;
|
||||
if rem != 0 {
|
||||
out.resize(out.len() + (16 - rem), 0);
|
||||
}
|
||||
for chunk in out.chunks_mut(16) {
|
||||
cipher.encrypt_block(GenericArray::from_mut_slice(chunk));
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// AES-128-ECB decrypt, no padding: trailing bytes past the last whole block are ignored.
|
||||
pub fn ecb_decrypt(key: &[u8; 16], data: &[u8]) -> Vec<u8> {
|
||||
let cipher = Aes128::new(GenericArray::from_slice(key));
|
||||
let mut out = Vec::with_capacity(data.len());
|
||||
for chunk in data.chunks_exact(16) {
|
||||
let mut block = *GenericArray::from_slice(chunk);
|
||||
cipher.decrypt_block(&mut block);
|
||||
out.extend_from_slice(&block);
|
||||
}
|
||||
out
|
||||
}
|
||||
@@ -0,0 +1,203 @@
|
||||
//! Decode GameStream controller packets (carried on the same encrypted control stream as
|
||||
//! mouse/keyboard — see [`super::input`]) into [`GamepadFrame`]s for the uinput virtual pads.
|
||||
//!
|
||||
//! Layouts mirror moonlight-common-c `Input.h` (all `#pragma pack(1)`; the `size` header field
|
||||
//! is big-endian, everything else little-endian). We implement the Gen5+ `MULTI_CONTROLLER`
|
||||
//! event (magic `0x0C`) — the only controller event Sunshine-class hosts receive — plus the
|
||||
//! Sunshine-extension `CONTROLLER_ARRIVAL` (`0x55000004`). Because our serverinfo advertises a
|
||||
//! Sunshine appversion (4th component negative), clients also send `buttonFlags2` (paddles /
|
||||
//! touchpad-click / Share) inside the MC packet.
|
||||
|
||||
/// Inner control-message type for input (same as [`super::input`]).
|
||||
const INPUT_DATA_TYPE: u16 = 0x0206;
|
||||
|
||||
/// `NV_INPUT_HEADER.magic` for the Gen5+ multi-controller event.
|
||||
const MAGIC_MULTI_CONTROLLER: u32 = 0x0C;
|
||||
/// Sunshine extension: controller arrival metadata (type/capabilities).
|
||||
const MAGIC_CONTROLLER_ARRIVAL: u32 = 0x5500_0004;
|
||||
|
||||
/// Most controllers a session tracks (Sunshine's MAX_GAMEPADS).
|
||||
pub const MAX_PADS: usize = 16;
|
||||
|
||||
/// One decoded controller event.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub enum GamepadEvent {
|
||||
/// Full state of one controller + the set of attached controllers.
|
||||
State(GamepadFrame),
|
||||
/// Sunshine arrival metadata (precedes the first State for that pad).
|
||||
Arrival {
|
||||
index: u8,
|
||||
/// 0 unknown, 1 xbox, 2 ps, 3 nintendo.
|
||||
kind: u8,
|
||||
/// LI_CCAP_* bits (0x02 = rumble).
|
||||
capabilities: u16,
|
||||
},
|
||||
}
|
||||
|
||||
/// Snapshot of one controller's inputs (Moonlight conventions: sticks −32768..32767 with +Y
|
||||
/// up, triggers 0..255, buttons = `buttonFlags | buttonFlags2 << 16`).
|
||||
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
|
||||
pub struct GamepadFrame {
|
||||
pub index: i16,
|
||||
/// Bit n set = controller n attached; a clear bit for an allocated pad means unplug.
|
||||
pub active_mask: u16,
|
||||
pub buttons: u32,
|
||||
pub left_trigger: u8,
|
||||
pub right_trigger: u8,
|
||||
pub ls_x: i16,
|
||||
pub ls_y: i16,
|
||||
pub rs_x: i16,
|
||||
pub rs_y: i16,
|
||||
}
|
||||
|
||||
// buttonFlags bits (Limelight.h).
|
||||
pub const BTN_DPAD_UP: u32 = 0x0001;
|
||||
pub const BTN_DPAD_DOWN: u32 = 0x0002;
|
||||
pub const BTN_DPAD_LEFT: u32 = 0x0004;
|
||||
pub const BTN_DPAD_RIGHT: u32 = 0x0008;
|
||||
pub const BTN_START: u32 = 0x0010;
|
||||
pub const BTN_BACK: u32 = 0x0020;
|
||||
pub const BTN_LS_CLK: u32 = 0x0040;
|
||||
pub const BTN_RS_CLK: u32 = 0x0080;
|
||||
pub const BTN_LB: u32 = 0x0100;
|
||||
pub const BTN_RB: u32 = 0x0200;
|
||||
pub const BTN_GUIDE: u32 = 0x0400;
|
||||
pub const BTN_A: u32 = 0x1000;
|
||||
pub const BTN_B: u32 = 0x2000;
|
||||
pub const BTN_X: u32 = 0x4000;
|
||||
pub const BTN_Y: u32 = 0x8000;
|
||||
|
||||
/// Decode one decrypted control plaintext into a controller event, if it is one. Mouse,
|
||||
/// keyboard, keepalives etc. yield `None` (they're handled by [`super::input::decode`]).
|
||||
pub fn decode(plaintext: &[u8]) -> Option<GamepadEvent> {
|
||||
if plaintext.len() < 4 || u16::from_le_bytes([plaintext[0], plaintext[1]]) != INPUT_DATA_TYPE {
|
||||
return None;
|
||||
}
|
||||
let p = &plaintext[4..];
|
||||
if p.len() < 8 {
|
||||
return None;
|
||||
}
|
||||
let magic = u32::from_le_bytes([p[4], p[5], p[6], p[7]]);
|
||||
let b = &p[8..]; // body after NV_INPUT_HEADER
|
||||
let le16 = |o: usize| -> Option<i16> { Some(i16::from_le_bytes([*b.get(o)?, *b.get(o + 1)?])) };
|
||||
|
||||
match magic {
|
||||
MAGIC_MULTI_CONTROLLER => {
|
||||
// Body: headerB@0, controllerNumber@2, activeGamepadMask@4, midB@6, buttonFlags@8,
|
||||
// LT@10, RT@11, lsX@12, lsY@14, rsX@16, rsY@18, tailA@20, buttonFlags2@22, tailB@24.
|
||||
// The constants (headerB/midB/tail*) are never validated, mirroring Sunshine.
|
||||
let buttons_lo = le16(8)? as u16 as u32;
|
||||
// buttonFlags2 is absent on pre-extension clients (shorter packet) — treat as 0.
|
||||
let buttons_hi = le16(22).map(|v| v as u16 as u32).unwrap_or(0);
|
||||
Some(GamepadEvent::State(GamepadFrame {
|
||||
index: le16(2)?,
|
||||
active_mask: le16(4)? as u16,
|
||||
buttons: buttons_lo | (buttons_hi << 16),
|
||||
left_trigger: *b.get(10)?,
|
||||
right_trigger: *b.get(11)?,
|
||||
ls_x: le16(12)?,
|
||||
ls_y: le16(14)?,
|
||||
rs_x: le16(16)?,
|
||||
rs_y: le16(18)?,
|
||||
}))
|
||||
}
|
||||
MAGIC_CONTROLLER_ARRIVAL => Some(GamepadEvent::Arrival {
|
||||
index: *b.first()?,
|
||||
kind: *b.get(1)?,
|
||||
capabilities: le16(2)? as u16,
|
||||
}),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Build the host→client rumble plaintext (type `0x010B`): `[type][len=10][u32 filler]
|
||||
/// [controllerNumber][lowFreqMotor][highFreqMotor]` (all LE; motors 0..0xFFFF). The caller
|
||||
/// seals it with the host-direction GCM scheme and sends it on the ENet control peer.
|
||||
pub fn rumble_plaintext(index: u16, low: u16, high: u16) -> Vec<u8> {
|
||||
let mut pt = Vec::with_capacity(14);
|
||||
pt.extend_from_slice(&0x010Bu16.to_le_bytes());
|
||||
pt.extend_from_slice(&10u16.to_le_bytes());
|
||||
pt.extend_from_slice(&0x00C0_FFEEu32.to_le_bytes()); // filler — present but ignored
|
||||
pt.extend_from_slice(&index.to_le_bytes());
|
||||
pt.extend_from_slice(&low.to_le_bytes());
|
||||
pt.extend_from_slice(&high.to_le_bytes());
|
||||
pt
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn wrap(magic: u32, body: &[u8]) -> Vec<u8> {
|
||||
let mut inp = Vec::new();
|
||||
inp.extend_from_slice(&((4 + body.len()) as u32).to_be_bytes());
|
||||
inp.extend_from_slice(&magic.to_le_bytes());
|
||||
inp.extend_from_slice(body);
|
||||
let mut pt = Vec::new();
|
||||
pt.extend_from_slice(&INPUT_DATA_TYPE.to_le_bytes());
|
||||
pt.extend_from_slice(&(inp.len() as u16).to_le_bytes());
|
||||
pt.extend_from_slice(&inp);
|
||||
pt
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decodes_multi_controller() {
|
||||
// Pad 1 attached (mask 0b10), A+RB held, LT=10 RT=200, LS=(1000,-2000), RS=(-1,32767),
|
||||
// paddle1 via buttonFlags2.
|
||||
let mut body = Vec::new();
|
||||
body.extend_from_slice(&0x001Ai16.to_le_bytes()); // headerB
|
||||
body.extend_from_slice(&1i16.to_le_bytes()); // controllerNumber
|
||||
body.extend_from_slice(&0b10i16.to_le_bytes()); // activeGamepadMask
|
||||
body.extend_from_slice(&0x0014i16.to_le_bytes()); // midB
|
||||
body.extend_from_slice(&((BTN_A | BTN_RB) as u16).to_le_bytes());
|
||||
body.push(10); // LT
|
||||
body.push(200); // RT
|
||||
body.extend_from_slice(&1000i16.to_le_bytes());
|
||||
body.extend_from_slice(&(-2000i16).to_le_bytes());
|
||||
body.extend_from_slice(&(-1i16).to_le_bytes());
|
||||
body.extend_from_slice(&32767i16.to_le_bytes());
|
||||
body.extend_from_slice(&0x009Ci16.to_le_bytes()); // tailA
|
||||
body.extend_from_slice(&0x0001u16.to_le_bytes()); // buttonFlags2 (paddle1)
|
||||
body.extend_from_slice(&0x0055i16.to_le_bytes()); // tailB
|
||||
|
||||
let Some(GamepadEvent::State(f)) = decode(&wrap(MAGIC_MULTI_CONTROLLER, &body)) else {
|
||||
panic!("expected State");
|
||||
};
|
||||
assert_eq!(f.index, 1);
|
||||
assert_eq!(f.active_mask, 0b10);
|
||||
assert_eq!(f.buttons, BTN_A | BTN_RB | 0x0001_0000);
|
||||
assert_eq!((f.left_trigger, f.right_trigger), (10, 200));
|
||||
assert_eq!((f.ls_x, f.ls_y, f.rs_x, f.rs_y), (1000, -2000, -1, 32767));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decodes_arrival() {
|
||||
let body = [0u8, 1, 0x02, 0x00, 0xFF, 0xFF, 0x0F, 0x00]; // pad 0, xbox, rumble cap
|
||||
let Some(GamepadEvent::Arrival {
|
||||
index,
|
||||
kind,
|
||||
capabilities,
|
||||
}) = decode(&wrap(MAGIC_CONTROLLER_ARRIVAL, &body))
|
||||
else {
|
||||
panic!("expected Arrival");
|
||||
};
|
||||
assert_eq!((index, kind, capabilities), (0, 1, 0x0002));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ignores_mouse_and_short_packets() {
|
||||
assert!(decode(&wrap(0x07, &[0, 1, 0, 2])).is_none()); // relative mouse
|
||||
assert!(decode(&[0u8; 3]).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rumble_layout() {
|
||||
let pt = rumble_plaintext(2, 0x1234, 0xBEEF);
|
||||
assert_eq!(pt.len(), 14);
|
||||
assert_eq!(u16::from_le_bytes([pt[0], pt[1]]), 0x010B);
|
||||
assert_eq!(u16::from_le_bytes([pt[2], pt[3]]), 10);
|
||||
assert_eq!(u16::from_le_bytes([pt[8], pt[9]]), 2);
|
||||
assert_eq!(u16::from_le_bytes([pt[10], pt[11]]), 0x1234);
|
||||
assert_eq!(u16::from_le_bytes([pt[12], pt[13]]), 0xBEEF);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,143 @@
|
||||
//! Decode the GameStream input wire format (carried AES-GCM-encrypted on the ENet control
|
||||
//! stream — see [`super::control`]) into platform-agnostic
|
||||
//! [`punktfunk_core::input::InputEvent`]s for injection.
|
||||
//!
|
||||
//! A decrypted control message is `[u16 type LE][u16 length LE][NV_INPUT packet]`. We only
|
||||
//! handle the input type (`0x0206`); the packet is an 8-byte `NV_INPUT_HEADER` (`size` BE,
|
||||
//! `magic` LE) followed by a magic-specific body. Multi-byte body fields are big-endian
|
||||
//! (network order) except `magic` and the keyboard `keyCode` (little-endian). Struct layouts
|
||||
//! mirror moonlight-common-c `Input.h`; the magic dispatch matches Sunshine `input.cpp`
|
||||
//! (Gen5+, where scroll is `0x0A` and controllers are `0x0C`, so there's no ambiguity).
|
||||
|
||||
use punktfunk_core::input::{InputEvent, InputKind};
|
||||
|
||||
/// Inner control-message type for input (moonlight `packetTypesGen7[IDX_INPUT_DATA]`).
|
||||
const INPUT_DATA_TYPE: u16 = 0x0206;
|
||||
|
||||
// NV_INPUT_HEADER.magic values (Input.h), with the Gen5+ variants where they differ.
|
||||
const MAGIC_KEY_DOWN: u32 = 0x03;
|
||||
const MAGIC_KEY_UP: u32 = 0x04;
|
||||
const MAGIC_MOUSE_ABS: u32 = 0x05;
|
||||
const MAGIC_MOUSE_REL: u32 = 0x06;
|
||||
const MAGIC_MOUSE_REL_GEN5: u32 = 0x07;
|
||||
const MAGIC_MOUSE_BTN_DOWN: u32 = 0x08;
|
||||
const MAGIC_MOUSE_BTN_UP: u32 = 0x09;
|
||||
const MAGIC_SCROLL_GEN5: u32 = 0x0A;
|
||||
const MAGIC_UTF8: u32 = 0x17;
|
||||
const MAGIC_HSCROLL: u32 = 0x5500_0001;
|
||||
|
||||
/// `code` value marking a [`InputKind::MouseScroll`] as horizontal (vs `0` = vertical).
|
||||
pub const SCROLL_HORIZONTAL: u32 = 1;
|
||||
|
||||
/// Decode one decrypted control plaintext into zero or more input events. Non-input control
|
||||
/// messages (keepalives, QoS) and unhandled input kinds (gamepad/pen/touch) yield nothing.
|
||||
pub fn decode(plaintext: &[u8]) -> Vec<InputEvent> {
|
||||
if plaintext.len() < 4 || u16::from_le_bytes([plaintext[0], plaintext[1]]) != INPUT_DATA_TYPE {
|
||||
return Vec::new();
|
||||
}
|
||||
decode_input_packet(&plaintext[4..]).into_iter().collect()
|
||||
}
|
||||
|
||||
fn decode_input_packet(p: &[u8]) -> Option<InputEvent> {
|
||||
if p.len() < 8 {
|
||||
return None;
|
||||
}
|
||||
// NV_INPUT_HEADER: size (BE u32, excludes itself) + magic (LE u32). Body follows.
|
||||
let magic = u32::from_le_bytes([p[4], p[5], p[6], p[7]]);
|
||||
let b = &p[8..];
|
||||
let be16 = |o: usize| -> Option<i16> { Some(i16::from_be_bytes([*b.get(o)?, *b.get(o + 1)?])) };
|
||||
|
||||
Some(match magic {
|
||||
MAGIC_MOUSE_REL | MAGIC_MOUSE_REL_GEN5 => {
|
||||
ev(InputKind::MouseMove, 0, be16(0)? as i32, be16(2)? as i32, 0)
|
||||
}
|
||||
MAGIC_MOUSE_ABS => {
|
||||
// short x, y, unused, width, height (all BE). Carry the client's reference extent
|
||||
// (width<<16 | height) in `flags` so the injector can scale to its output.
|
||||
let (x, y) = (be16(0)? as i32, be16(2)? as i32);
|
||||
let flags = ((be16(6)? as u16 as u32) << 16) | (be16(8)? as u16 as u32);
|
||||
ev(InputKind::MouseMoveAbs, 0, x, y, flags)
|
||||
}
|
||||
MAGIC_MOUSE_BTN_DOWN => ev(InputKind::MouseButtonDown, *b.first()? as u32, 0, 0, 0),
|
||||
MAGIC_MOUSE_BTN_UP => ev(InputKind::MouseButtonUp, *b.first()? as u32, 0, 0, 0),
|
||||
MAGIC_SCROLL_GEN5 => ev(InputKind::MouseScroll, 0, be16(0)? as i32, 0, 0),
|
||||
MAGIC_HSCROLL => ev(
|
||||
InputKind::MouseScroll,
|
||||
SCROLL_HORIZONTAL,
|
||||
be16(0)? as i32,
|
||||
0,
|
||||
0,
|
||||
),
|
||||
MAGIC_KEY_DOWN | MAGIC_KEY_UP => {
|
||||
// char flags, short keyCode (LE), char modifiers, short zero2. The client stuffs a
|
||||
// 0x80 high byte on key-down; Sunshine masks to the low-byte VK (`& 0xFF`).
|
||||
let key_code = (u16::from_le_bytes([*b.get(1)?, *b.get(2)?]) & 0x00FF) as u32;
|
||||
let modifiers = *b.get(3)? as u32;
|
||||
let kind = if magic == MAGIC_KEY_DOWN {
|
||||
InputKind::KeyDown
|
||||
} else {
|
||||
InputKind::KeyUp
|
||||
};
|
||||
ev(kind, key_code, 0, 0, modifiers)
|
||||
}
|
||||
// UTF-8 text, gamepad, pen, touch, haptics — not yet injected.
|
||||
_ => return None,
|
||||
})
|
||||
}
|
||||
|
||||
fn ev(kind: InputKind, code: u32, x: i32, y: i32, flags: u32) -> InputEvent {
|
||||
InputEvent {
|
||||
kind,
|
||||
_pad: [0; 3],
|
||||
code,
|
||||
x,
|
||||
y,
|
||||
flags,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
/// Build a control plaintext: inner header + NV_INPUT_HEADER + body.
|
||||
fn wrap(magic: u32, body: &[u8]) -> Vec<u8> {
|
||||
let mut inp = Vec::new();
|
||||
inp.extend_from_slice(&((4 + body.len()) as u32).to_be_bytes()); // size (excl. itself)
|
||||
inp.extend_from_slice(&magic.to_le_bytes());
|
||||
inp.extend_from_slice(body);
|
||||
let mut pt = Vec::new();
|
||||
pt.extend_from_slice(&INPUT_DATA_TYPE.to_le_bytes());
|
||||
pt.extend_from_slice(&(inp.len() as u16).to_le_bytes());
|
||||
pt.extend_from_slice(&inp);
|
||||
pt
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decodes_relative_mouse() {
|
||||
// deltaX = -1 (ffff BE), deltaY = +2 (0002 BE) — matches a real captured packet.
|
||||
let pt = wrap(MAGIC_MOUSE_REL_GEN5, &[0xff, 0xff, 0x00, 0x02]);
|
||||
let ev = decode(&pt);
|
||||
assert_eq!(ev.len(), 1);
|
||||
assert_eq!(ev[0].kind, InputKind::MouseMove);
|
||||
assert_eq!((ev[0].x, ev[0].y), (-1, 2));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decodes_key_down_masking_high_byte() {
|
||||
// keyCode 0x80A4 (LE a4 80) → VK 0xA4 (VK_LMENU); modifiers 0x04 (Alt).
|
||||
let pt = wrap(MAGIC_KEY_DOWN, &[0x00, 0xa4, 0x80, 0x04, 0x00, 0x00]);
|
||||
let ev = decode(&pt);
|
||||
assert_eq!(ev.len(), 1);
|
||||
assert_eq!(ev[0].kind, InputKind::KeyDown);
|
||||
assert_eq!(ev[0].code, 0xA4);
|
||||
assert_eq!(ev[0].flags, 0x04);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ignores_non_input_type() {
|
||||
let mut pt = vec![0x00, 0x02]; // type 0x0200 (keepalive)
|
||||
pt.extend_from_slice(&[0x08, 0x00, 0x04, 0, 0, 0, 0, 0, 0, 0]);
|
||||
assert!(decode(&pt).is_empty());
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,37 @@
|
||||
//! mDNS advertisement of `_nvstream._tcp.local.` so Moonlight auto-discovers the host.
|
||||
//! (Manual "add host by IP" also works as a fallback, which is what we test with first.)
|
||||
|
||||
use super::Host;
|
||||
use anyhow::{Context, Result};
|
||||
use mdns_sd::{ServiceDaemon, ServiceInfo};
|
||||
use std::collections::HashMap;
|
||||
|
||||
/// Holds the mDNS daemon; dropping it unregisters the service.
|
||||
pub struct Advert {
|
||||
_daemon: ServiceDaemon,
|
||||
}
|
||||
|
||||
pub fn advertise(host: &Host) -> Result<Advert> {
|
||||
let daemon = ServiceDaemon::new().context("create mDNS daemon")?;
|
||||
let host_name = format!("{}.local.", host.hostname);
|
||||
// No TXT records are required for Moonlight discovery; it resolves the A record and then
|
||||
// GETs /serverinfo for capabilities.
|
||||
let props: HashMap<String, String> = HashMap::new();
|
||||
let service = ServiceInfo::new(
|
||||
"_nvstream._tcp.local.",
|
||||
&host.hostname,
|
||||
&host_name,
|
||||
host.local_ip,
|
||||
host.http_port,
|
||||
props,
|
||||
)
|
||||
.context("build mDNS ServiceInfo")?;
|
||||
daemon.register(service).context("register mDNS service")?;
|
||||
tracing::info!(
|
||||
service = "_nvstream._tcp",
|
||||
port = host.http_port,
|
||||
host = %host_name,
|
||||
"mDNS advertising"
|
||||
);
|
||||
Ok(Advert { _daemon: daemon })
|
||||
}
|
||||
@@ -0,0 +1,252 @@
|
||||
//! GameStream (P1) control plane — what a stock Moonlight/Artemis client talks to around
|
||||
//! the media streams: mDNS discovery, the nvhttp serverinfo + pairing HTTP(S) API, RTSP,
|
||||
//! and the ENet control stream. `tokio`/`axum` live here (control plane, I/O-bound — never
|
||||
//! the per-frame hot path; that is `punktfunk_core`'s P1 wire codec). See `docs/m2-plan.md`.
|
||||
//!
|
||||
//! Status: P1.1 — mDNS `_nvstream._tcp` advertisement + `/serverinfo`. Pairing, RTSP, and
|
||||
//! the media streams follow (see the M2 task list / plan).
|
||||
|
||||
pub mod apps;
|
||||
#[cfg(target_os = "linux")]
|
||||
mod audio;
|
||||
/// Stub — the audio plane needs Linux (PipeWire capture + libopus); this keeps non-Linux
|
||||
/// dev builds compiling (crate doc: "the crate compiles everywhere"). Reports failure the
|
||||
/// same way the real stream thread does: by clearing `running`.
|
||||
#[cfg(not(target_os = "linux"))]
|
||||
mod audio {
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
pub fn start(
|
||||
running: Arc<AtomicBool>,
|
||||
_gcm_key: [u8; 16],
|
||||
_rikeyid: i32,
|
||||
_audio_cap: Arc<Mutex<Option<Box<dyn crate::audio::AudioCapturer>>>>,
|
||||
) {
|
||||
tracing::error!("GameStream audio requires Linux (PipeWire + libopus)");
|
||||
running.store(false, Ordering::SeqCst);
|
||||
}
|
||||
}
|
||||
pub(crate) mod cert;
|
||||
mod control;
|
||||
mod crypto;
|
||||
pub mod gamepad;
|
||||
mod input;
|
||||
mod mdns;
|
||||
mod nvhttp;
|
||||
mod pairing;
|
||||
mod rtsp;
|
||||
mod serverinfo;
|
||||
mod stream;
|
||||
mod tls;
|
||||
mod video;
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use std::net::{IpAddr, Ipv4Addr, UdpSocket};
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
|
||||
/// nvhttp ports (Moonlight derives all stream ports by offset from the HTTP base 47989).
|
||||
pub const HTTP_PORT: u16 = 47989;
|
||||
pub const HTTPS_PORT: u16 = 47984;
|
||||
pub const RTSP_PORT: u16 = 48010;
|
||||
pub const VIDEO_PORT: u16 = 47998;
|
||||
pub const CONTROL_PORT: u16 = 47999;
|
||||
pub const AUDIO_PORT: u16 = 48000;
|
||||
|
||||
/// Advertised host version. Major ≥ 7 tells Moonlight to use SHA-256 for pairing.
|
||||
pub const APP_VERSION: &str = "7.1.431.-1";
|
||||
pub const GFE_VERSION: &str = "3.23.0.74";
|
||||
/// Codec support bitmask: 3=H264, 259=+HEVC, 3843=+AV1 (we encode HEVC/H264/AV1 via NVENC).
|
||||
pub const SERVER_CODEC_MODE_SUPPORT: u32 = 3843;
|
||||
|
||||
/// Stable host identity + advertised capabilities, shared across control-plane handlers.
|
||||
pub struct Host {
|
||||
pub hostname: String,
|
||||
/// Stable per-host id (persisted), echoed in serverinfo + matched on pairing.
|
||||
pub uniqueid: String,
|
||||
pub local_ip: IpAddr,
|
||||
pub http_port: u16,
|
||||
pub https_port: u16,
|
||||
// Pairing state (server cert, paired client certs) lands in the next P1.1 slice.
|
||||
}
|
||||
|
||||
impl Host {
|
||||
pub fn detect() -> Result<Host> {
|
||||
Ok(Host {
|
||||
hostname: hostname_string(),
|
||||
uniqueid: load_or_create_uniqueid()?,
|
||||
local_ip: primary_local_ip().unwrap_or(IpAddr::V4(Ipv4Addr::LOCALHOST)),
|
||||
http_port: HTTP_PORT,
|
||||
https_port: HTTPS_PORT,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// The stream parameters a client passes at `/launch`, shared with the RTSP + media stages.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub struct LaunchSession {
|
||||
/// AES-128 key for the RTSP/control/video/audio planes (from `rikey`).
|
||||
pub gcm_key: [u8; 16],
|
||||
/// `rikeyid` — seeds the per-stream GCM IVs.
|
||||
pub rikeyid: i32,
|
||||
pub width: u32,
|
||||
pub height: u32,
|
||||
pub fps: u32,
|
||||
/// `/launch?appid=N` — selects the app-catalog entry (session recipe).
|
||||
pub appid: u32,
|
||||
}
|
||||
|
||||
/// Shared control-plane state used as the axum app state.
|
||||
pub struct AppState {
|
||||
pub host: Host,
|
||||
pub identity: cert::ServerIdentity,
|
||||
pub pairing: pairing::Pairing,
|
||||
/// Pinned (paired) client certificate DERs — the post-pair allow-list.
|
||||
pub paired: std::sync::Mutex<Vec<Vec<u8>>>,
|
||||
/// The active launch session (set by `/launch`, consumed by RTSP/media).
|
||||
pub launch: std::sync::Mutex<Option<LaunchSession>>,
|
||||
/// Negotiated video config from RTSP ANNOUNCE (consumed by the stream on PLAY).
|
||||
pub stream: std::sync::Mutex<Option<stream::StreamConfig>>,
|
||||
/// True while the video stream thread is running (also its keep-running flag).
|
||||
pub streaming: std::sync::Arc<std::sync::atomic::AtomicBool>,
|
||||
/// True while the audio stream thread is running (also its keep-running flag).
|
||||
pub audio_streaming: std::sync::Arc<std::sync::atomic::AtomicBool>,
|
||||
/// Set by the control stream when the client requests an IDR / invalidates reference
|
||||
/// frames (recovery after loss); the video thread forces a keyframe and clears it.
|
||||
pub force_idr: std::sync::Arc<std::sync::atomic::AtomicBool>,
|
||||
/// Persistent screen capturer, reused across streams so reconnects don't spawn a second
|
||||
/// (conflicting) screencast session. The video thread borrows it for the stream's duration
|
||||
/// and returns it; `set_active` gates its cost while idle.
|
||||
pub video_cap: std::sync::Arc<std::sync::Mutex<Option<Box<dyn crate::capture::Capturer>>>>,
|
||||
/// Persistent audio capturer, reused across streams (avoids leaking a PipeWire capture
|
||||
/// thread per reconnect); drained on reuse so no stale audio is sent.
|
||||
pub audio_cap: std::sync::Arc<std::sync::Mutex<Option<Box<dyn crate::audio::AudioCapturer>>>>,
|
||||
}
|
||||
|
||||
impl AppState {
|
||||
/// Fresh control-plane state: no active session; the pairing allow-list is loaded from
|
||||
/// disk (pairings persist across restarts).
|
||||
pub fn new(host: Host, identity: cert::ServerIdentity) -> AppState {
|
||||
AppState {
|
||||
host,
|
||||
identity,
|
||||
pairing: pairing::Pairing::new(),
|
||||
paired: std::sync::Mutex::new(load_paired()),
|
||||
launch: std::sync::Mutex::new(None),
|
||||
stream: std::sync::Mutex::new(None),
|
||||
streaming: std::sync::Arc::new(std::sync::atomic::AtomicBool::new(false)),
|
||||
audio_streaming: std::sync::Arc::new(std::sync::atomic::AtomicBool::new(false)),
|
||||
force_idr: std::sync::Arc::new(std::sync::atomic::AtomicBool::new(false)),
|
||||
video_cap: std::sync::Arc::new(std::sync::Mutex::new(None)),
|
||||
audio_cap: std::sync::Arc::new(std::sync::Mutex::new(None)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Run the GameStream control plane (blocks): mDNS advertisement, the nvhttp servers, and
|
||||
/// the management REST API.
|
||||
pub fn serve(mgmt: crate::mgmt::Options) -> Result<()> {
|
||||
let host = Host::detect()?;
|
||||
let identity = cert::ServerIdentity::load_or_create().context("host certificate")?;
|
||||
let state = Arc::new(AppState::new(host, identity));
|
||||
tracing::info!(
|
||||
hostname = %state.host.hostname,
|
||||
uniqueid = %state.host.uniqueid,
|
||||
ip = %state.host.local_ip,
|
||||
"punktfunk GameStream host (P1.1: serverinfo + pairing + mDNS)"
|
||||
);
|
||||
let rt = tokio::runtime::Runtime::new().context("build tokio runtime")?;
|
||||
rt.block_on(async move {
|
||||
// rustls needs a process-wide crypto provider before any TLS config is built.
|
||||
let _ = rustls::crypto::aws_lc_rs::default_provider().install_default();
|
||||
let _advert = mdns::advertise(&state.host).context("mDNS advertise")?;
|
||||
rtsp::spawn(state.clone()).context("start RTSP server")?;
|
||||
control::spawn(state.clone()).context("start ENet control server")?;
|
||||
tokio::try_join!(nvhttp::run(state.clone()), crate::mgmt::run(state, mgmt))?;
|
||||
Ok(())
|
||||
})
|
||||
}
|
||||
|
||||
/// `~/.config/punktfunk`, created on demand — host identity + (later) pairing state live here.
|
||||
fn config_dir() -> PathBuf {
|
||||
let base = std::env::var_os("XDG_CONFIG_HOME")
|
||||
.map(PathBuf::from)
|
||||
.or_else(|| std::env::var_os("HOME").map(|h| PathBuf::from(h).join(".config")))
|
||||
.unwrap_or_else(|| PathBuf::from("."));
|
||||
base.join("punktfunk")
|
||||
}
|
||||
|
||||
fn hostname_string() -> String {
|
||||
std::fs::read_to_string("/proc/sys/kernel/hostname")
|
||||
.ok()
|
||||
.map(|s| s.trim().to_string())
|
||||
.filter(|s| !s.is_empty())
|
||||
.unwrap_or_else(|| "punktfunk-host".to_string())
|
||||
}
|
||||
|
||||
/// Load the persisted host uniqueid, or mint one (from the kernel UUID source) and store it.
|
||||
fn load_or_create_uniqueid() -> Result<String> {
|
||||
let path = config_dir().join("uniqueid");
|
||||
if let Ok(s) = std::fs::read_to_string(&path) {
|
||||
let t = s.trim();
|
||||
if !t.is_empty() {
|
||||
return Ok(t.to_string());
|
||||
}
|
||||
}
|
||||
let id = std::fs::read_to_string("/proc/sys/kernel/random/uuid")
|
||||
.map(|u| u.trim().replace('-', ""))
|
||||
.unwrap_or_else(|_| format!("{:016x}{:016x}", std::process::id(), HTTP_PORT));
|
||||
std::fs::create_dir_all(config_dir()).ok();
|
||||
std::fs::write(&path, &id).with_context(|| format!("write {}", path.display()))?;
|
||||
Ok(id)
|
||||
}
|
||||
|
||||
/// Best-effort primary LAN IP: open a UDP socket "toward" a public address and read the
|
||||
/// local address the OS would route through. No packets are actually sent.
|
||||
fn primary_local_ip() -> Option<IpAddr> {
|
||||
let sock = UdpSocket::bind("0.0.0.0:0").ok()?;
|
||||
sock.connect("8.8.8.8:80").ok()?;
|
||||
sock.local_addr().ok().map(|a| a.ip())
|
||||
}
|
||||
|
||||
/// Where the paired-client allow-list persists (survives host restarts, like Sunshine).
|
||||
fn paired_path() -> Option<std::path::PathBuf> {
|
||||
Some(std::path::Path::new(&std::env::var("HOME").ok()?).join(".config/punktfunk/paired.json"))
|
||||
}
|
||||
|
||||
/// Load the persisted paired-client certificate DERs (empty on first run / parse failure).
|
||||
fn load_paired() -> Vec<Vec<u8>> {
|
||||
let Some(path) = paired_path() else {
|
||||
return Vec::new();
|
||||
};
|
||||
let Ok(raw) = std::fs::read(&path) else {
|
||||
return Vec::new();
|
||||
};
|
||||
match serde_json::from_slice::<Vec<Vec<u8>>>(&raw) {
|
||||
Ok(v) => {
|
||||
tracing::info!(clients = v.len(), "loaded persisted pairings");
|
||||
v
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!(error = %e, "paired.json unreadable — starting unpaired");
|
||||
Vec::new()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Persist the paired-client allow-list (called after each successful pairing).
|
||||
pub(crate) fn save_paired(paired: &[Vec<u8>]) {
|
||||
let Some(path) = paired_path() else { return };
|
||||
if let Some(dir) = path.parent() {
|
||||
let _ = std::fs::create_dir_all(dir);
|
||||
}
|
||||
match serde_json::to_vec(paired) {
|
||||
Ok(bytes) => {
|
||||
if let Err(e) = std::fs::write(&path, bytes) {
|
||||
tracing::warn!(error = %e, "persisting pairings failed");
|
||||
}
|
||||
}
|
||||
Err(e) => tracing::warn!(error = %e, "serializing pairings failed"),
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,236 @@
|
||||
//! The nvhttp servers: plain HTTP on 47989 and mutual-TLS on 47984. Serves `/serverinfo`,
|
||||
//! the `/pair` flow, `/applist`, and `/launch`/`/resume`/`/cancel`, plus a punktfunk-only
|
||||
//! `/pin` endpoint to deliver the Moonlight-displayed PIN. Over HTTPS the client is
|
||||
//! mutual-TLS-authenticated, so `/serverinfo` reports `PairStatus=1` there.
|
||||
|
||||
use super::{serverinfo, AppState, LaunchSession, HTTPS_PORT, HTTP_PORT, RTSP_PORT};
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use axum::{
|
||||
extract::{Query, State},
|
||||
http::header,
|
||||
response::IntoResponse,
|
||||
routing::get,
|
||||
Extension, Router,
|
||||
};
|
||||
use std::collections::HashMap;
|
||||
use std::net::SocketAddr;
|
||||
use std::sync::Arc;
|
||||
|
||||
/// Which listener a request arrived on — HTTPS means a mutual-TLS-authenticated client.
|
||||
#[derive(Clone, Copy)]
|
||||
struct Https(bool);
|
||||
|
||||
pub async fn run(state: Arc<AppState>) -> Result<()> {
|
||||
// Mutual-TLS: request + verify the client cert (Moonlight presents one for the
|
||||
// post-pairing pairchallenge + all post-pair endpoints).
|
||||
let tls = axum_server::tls_rustls::RustlsConfig::from_config(super::tls::server_config(
|
||||
&state.identity.cert_pem,
|
||||
&state.identity.key_pem,
|
||||
)?);
|
||||
|
||||
let http_addr = SocketAddr::from(([0, 0, 0, 0], HTTP_PORT));
|
||||
let https_addr = SocketAddr::from(([0, 0, 0, 0], HTTPS_PORT));
|
||||
tracing::info!(%http_addr, %https_addr, "nvhttp listening (serverinfo + pair + launch)");
|
||||
|
||||
let http = axum_server::bind(http_addr).serve(router(state.clone(), false).into_make_service());
|
||||
let https =
|
||||
axum_server::bind_rustls(https_addr, tls).serve(router(state, true).into_make_service());
|
||||
tokio::try_join!(async { http.await.context("nvhttp HTTP server") }, async {
|
||||
https.await.context("nvhttp HTTPS server")
|
||||
},)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn router(state: Arc<AppState>, https: bool) -> Router {
|
||||
Router::new()
|
||||
.route("/serverinfo", get(h_serverinfo))
|
||||
.route("/pair", get(h_pair))
|
||||
.route("/pin", get(h_pin))
|
||||
.route("/applist", get(h_applist))
|
||||
.route("/launch", get(h_launch))
|
||||
.route("/resume", get(h_resume))
|
||||
.route("/cancel", get(h_cancel))
|
||||
.layer(Extension(Https(https)))
|
||||
.with_state(state)
|
||||
}
|
||||
|
||||
fn xml(body: String) -> impl IntoResponse {
|
||||
([(header::CONTENT_TYPE, "application/xml")], body)
|
||||
}
|
||||
|
||||
async fn h_serverinfo(
|
||||
State(st): State<Arc<AppState>>,
|
||||
Extension(Https(https)): Extension<Https>,
|
||||
) -> impl IntoResponse {
|
||||
// Over the mutual-TLS port the peer is an authenticated (paired) client → PairStatus=1.
|
||||
xml(serverinfo::serverinfo_xml(&st.host, https))
|
||||
}
|
||||
|
||||
async fn h_pin(
|
||||
State(st): State<Arc<AppState>>,
|
||||
Query(q): Query<HashMap<String, String>>,
|
||||
) -> impl IntoResponse {
|
||||
match q.get("pin").filter(|p| !p.is_empty()) {
|
||||
Some(pin) => {
|
||||
st.pairing.pin.submit(pin.clone());
|
||||
"PIN accepted\n".to_string()
|
||||
}
|
||||
None => "usage: GET /pin?pin=NNNN\n".to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
async fn h_applist(State(_st): State<Arc<AppState>>) -> impl IntoResponse {
|
||||
// One app for now: the headless desktop (the wlroots virtual output).
|
||||
xml(super::apps::applist_xml())
|
||||
}
|
||||
|
||||
async fn h_launch(
|
||||
State(st): State<Arc<AppState>>,
|
||||
Query(q): Query<HashMap<String, String>>,
|
||||
) -> impl IntoResponse {
|
||||
match launch(&st, &q) {
|
||||
Ok(session) => {
|
||||
*st.launch.lock().unwrap() = Some(session);
|
||||
tracing::info!(
|
||||
w = session.width,
|
||||
h = session.height,
|
||||
fps = session.fps,
|
||||
rikeyid = session.rikeyid,
|
||||
"launch — session created; RTSP at rtsp://{}:{RTSP_PORT}",
|
||||
st.host.local_ip
|
||||
);
|
||||
xml(session_url_xml(&st, "gamesession"))
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!(error = %format!("{e:#}"), "launch failed");
|
||||
xml(error_xml())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn h_resume(State(st): State<Arc<AppState>>) -> impl IntoResponse {
|
||||
if st.launch.lock().unwrap().is_some() {
|
||||
xml(session_url_xml(&st, "resume"))
|
||||
} else {
|
||||
xml(error_xml())
|
||||
}
|
||||
}
|
||||
|
||||
async fn h_cancel(State(st): State<Arc<AppState>>) -> impl IntoResponse {
|
||||
*st.launch.lock().unwrap() = None;
|
||||
// Quit semantics: stop the running media threads (they observe these flags) so the session
|
||||
// actually ends — the virtual output/gamescope teardown follows via the capturer's RAII.
|
||||
st.streaming
|
||||
.store(false, std::sync::atomic::Ordering::SeqCst);
|
||||
st.audio_streaming
|
||||
.store(false, std::sync::atomic::Ordering::SeqCst);
|
||||
tracing::info!("cancel — launch session cleared, streams stopping");
|
||||
xml("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<root status_code=\"200\"><cancel>1</cancel></root>\n".to_string())
|
||||
}
|
||||
|
||||
/// Parse the `/launch` query (rikey/rikeyid/mode) into a [`LaunchSession`].
|
||||
fn launch(_st: &AppState, q: &HashMap<String, String>) -> Result<LaunchSession> {
|
||||
let rikey = q.get("rikey").ok_or_else(|| anyhow!("missing rikey"))?;
|
||||
let key_bytes = hex::decode(rikey).context("rikey hex")?;
|
||||
if key_bytes.len() < 16 {
|
||||
return Err(anyhow!("rikey too short"));
|
||||
}
|
||||
let mut gcm_key = [0u8; 16];
|
||||
gcm_key.copy_from_slice(&key_bytes[..16]);
|
||||
// rikeyid is a signed 32-bit int (negative values wrap to a big-endian u32 IV later).
|
||||
let rikeyid: i32 = q.get("rikeyid").and_then(|s| s.parse().ok()).unwrap_or(0);
|
||||
let (width, height, fps) = q
|
||||
.get("mode")
|
||||
.and_then(|m| parse_mode(m))
|
||||
.unwrap_or((1920, 1080, 60));
|
||||
let appid = q.get("appid").and_then(|s| s.parse().ok()).unwrap_or(1);
|
||||
Ok(LaunchSession {
|
||||
gcm_key,
|
||||
rikeyid,
|
||||
width,
|
||||
height,
|
||||
fps,
|
||||
appid,
|
||||
})
|
||||
}
|
||||
|
||||
/// `"1920x1080x60"` → `(1920, 1080, 60)`.
|
||||
fn parse_mode(mode: &str) -> Option<(u32, u32, u32)> {
|
||||
let mut it = mode.split('x');
|
||||
let w = it.next()?.parse().ok()?;
|
||||
let h = it.next()?.parse().ok()?;
|
||||
let fps = it.next()?.parse().ok()?;
|
||||
Some((w, h, fps))
|
||||
}
|
||||
|
||||
fn session_url_xml(st: &AppState, tag: &str) -> String {
|
||||
format!(
|
||||
"<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<root status_code=\"200\">\n<sessionUrl0>rtsp://{}:{RTSP_PORT}</sessionUrl0>\n<{tag}>1</{tag}>\n</root>\n",
|
||||
st.host.local_ip
|
||||
)
|
||||
}
|
||||
|
||||
async fn h_pair(
|
||||
State(st): State<Arc<AppState>>,
|
||||
Query(q): Query<HashMap<String, String>>,
|
||||
) -> impl IntoResponse {
|
||||
let uniqueid = q.get("uniqueid").cloned().unwrap_or_default();
|
||||
let phrase = q.get("phrase").map(String::as_str);
|
||||
|
||||
let step = phrase
|
||||
.filter(|p| *p == "getservercert" || *p == "pairchallenge")
|
||||
.or_else(|| {
|
||||
[
|
||||
"clientchallenge",
|
||||
"serverchallengeresp",
|
||||
"clientpairingsecret",
|
||||
]
|
||||
.into_iter()
|
||||
.find(|k| q.contains_key(*k))
|
||||
})
|
||||
.unwrap_or("?");
|
||||
tracing::info!(uniqueid, step, "pair request");
|
||||
|
||||
let result = if phrase == Some("getservercert") {
|
||||
match (q.get("salt"), q.get("clientcert")) {
|
||||
(Some(salt), Some(cc)) => {
|
||||
st.pairing
|
||||
.getservercert(&st.identity, &uniqueid, salt, cc)
|
||||
.await
|
||||
}
|
||||
_ => Ok(pair_error_xml()),
|
||||
}
|
||||
} else if phrase == Some("pairchallenge") {
|
||||
// Reached only over the TLS port with the pinned host cert; the handshake is the
|
||||
// proof, so acknowledge success.
|
||||
Ok(paired_ok_xml())
|
||||
} else if let Some(v) = q.get("clientchallenge") {
|
||||
st.pairing.clientchallenge(&st.identity, &uniqueid, v)
|
||||
} else if let Some(v) = q.get("serverchallengeresp") {
|
||||
st.pairing.serverchallengeresp(&st.identity, &uniqueid, v)
|
||||
} else if let Some(v) = q.get("clientpairingsecret") {
|
||||
st.pairing.clientpairingsecret(&uniqueid, v, &st.paired)
|
||||
} else {
|
||||
Ok(pair_error_xml())
|
||||
};
|
||||
|
||||
let body = result.unwrap_or_else(|e| {
|
||||
tracing::warn!(error = %format!("{e:#}"), uniqueid, "pair handler error");
|
||||
pair_error_xml()
|
||||
});
|
||||
xml(body)
|
||||
}
|
||||
|
||||
fn paired_ok_xml() -> String {
|
||||
"<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<root status_code=\"200\"><paired>1</paired></root>\n"
|
||||
.to_string()
|
||||
}
|
||||
|
||||
fn pair_error_xml() -> String {
|
||||
"<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<root status_code=\"200\"><paired>0</paired></root>\n"
|
||||
.to_string()
|
||||
}
|
||||
|
||||
fn error_xml() -> String {
|
||||
"<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<root status_code=\"400\"></root>\n".to_string()
|
||||
}
|
||||
@@ -0,0 +1,306 @@
|
||||
//! The 4-phase GameStream pairing state machine (over HTTP), keyed by `uniqueid`. Proves
|
||||
//! both sides know the PIN (via the SHA-256(salt||pin) AES-ECB key) and own their certs
|
||||
//! (RSA signatures), then pins the client cert. The final `pairchallenge` happens over
|
||||
//! HTTPS (handled in `nvhttp`). Byte-exact spec: `docs/research/…-research.json`.
|
||||
|
||||
use super::cert::ServerIdentity;
|
||||
use super::crypto;
|
||||
use anyhow::{anyhow, bail, Context, Result};
|
||||
use rsa::pkcs1v15::{Signature, VerifyingKey};
|
||||
use rsa::pkcs8::DecodePublicKey;
|
||||
use rsa::signature::{SignatureEncoding, Signer, Verifier};
|
||||
use rsa::RsaPublicKey;
|
||||
use sha2::Sha256;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
use std::sync::Mutex;
|
||||
use std::time::Duration;
|
||||
use tokio::sync::Notify;
|
||||
|
||||
/// Out-of-band PIN delivery. Moonlight generates + displays a PIN; the user submits it
|
||||
/// (via the management API's `POST /api/v1/pair/pin` or nvhttp's `GET /pin?pin=NNNN`).
|
||||
/// `getservercert` parks until a PIN arrives.
|
||||
pub struct PinGate {
|
||||
pin: Mutex<Option<String>>,
|
||||
notify: Notify,
|
||||
/// Handshakes currently parked in [`take`](Self::take) — drives the management API's
|
||||
/// `pin_pending` so a control pane knows when to prompt for the PIN.
|
||||
waiters: AtomicUsize,
|
||||
}
|
||||
|
||||
impl PinGate {
|
||||
fn new() -> Self {
|
||||
PinGate {
|
||||
pin: Mutex::new(None),
|
||||
notify: Notify::new(),
|
||||
waiters: AtomicUsize::new(0),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn submit(&self, pin: String) {
|
||||
*self.pin.lock().unwrap() = Some(pin);
|
||||
self.notify.notify_waiters();
|
||||
}
|
||||
|
||||
/// True while a pairing handshake is parked waiting for the user's PIN.
|
||||
pub fn awaiting_pin(&self) -> bool {
|
||||
self.waiters.load(Ordering::SeqCst) > 0
|
||||
}
|
||||
|
||||
async fn take(&self, timeout: Duration) -> Option<String> {
|
||||
self.waiters.fetch_add(1, Ordering::SeqCst);
|
||||
// Decrement on every exit path (PIN delivered, timeout, or future cancellation).
|
||||
struct WaiterGuard<'a>(&'a AtomicUsize);
|
||||
impl Drop for WaiterGuard<'_> {
|
||||
fn drop(&mut self) {
|
||||
self.0.fetch_sub(1, Ordering::SeqCst);
|
||||
}
|
||||
}
|
||||
let _guard = WaiterGuard(&self.waiters);
|
||||
|
||||
let deadline = tokio::time::Instant::now() + timeout;
|
||||
loop {
|
||||
if let Some(p) = self.pin.lock().unwrap().take() {
|
||||
return Some(p);
|
||||
}
|
||||
if tokio::time::timeout_at(deadline, self.notify.notified())
|
||||
.await
|
||||
.is_err()
|
||||
{
|
||||
return None;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Per-client pairing session carried across the 4 separate HTTP GETs.
|
||||
struct Session {
|
||||
aes_key: [u8; 16],
|
||||
client_cert_der: Vec<u8>,
|
||||
client_cert_sig: Vec<u8>,
|
||||
client_pubkey: RsaPublicKey,
|
||||
serversecret: [u8; 16],
|
||||
server_challenge: [u8; 16],
|
||||
/// The client's phase-3 hash, recomputed + checked in phase 4.
|
||||
client_hash: Vec<u8>,
|
||||
}
|
||||
|
||||
pub struct Pairing {
|
||||
sessions: Mutex<HashMap<String, Session>>,
|
||||
pub pin: PinGate,
|
||||
}
|
||||
|
||||
impl Pairing {
|
||||
pub fn new() -> Self {
|
||||
Pairing {
|
||||
sessions: Mutex::new(HashMap::new()),
|
||||
pin: PinGate::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Phase 1: store the client cert, await the PIN, derive the AES key, return our cert.
|
||||
pub async fn getservercert(
|
||||
&self,
|
||||
id: &ServerIdentity,
|
||||
uniqueid: &str,
|
||||
salt_hex: &str,
|
||||
clientcert_hex: &str,
|
||||
) -> Result<String> {
|
||||
let salt_bytes = hex::decode(salt_hex).context("salt hex")?;
|
||||
if salt_bytes.len() < 16 {
|
||||
bail!("salt too short");
|
||||
}
|
||||
let mut salt = [0u8; 16];
|
||||
salt.copy_from_slice(&salt_bytes[..16]);
|
||||
let pem_bytes = hex::decode(clientcert_hex).context("clientcert hex")?;
|
||||
let (der, sig, pubkey) = parse_client_cert(&pem_bytes)?;
|
||||
|
||||
tracing::info!(
|
||||
uniqueid,
|
||||
"pairing phase 1 (getservercert) — awaiting PIN: submit `GET /pin?pin=NNNN`"
|
||||
);
|
||||
let pin = self
|
||||
.pin
|
||||
.take(Duration::from_secs(300))
|
||||
.await
|
||||
.ok_or_else(|| anyhow!("no PIN submitted within 300s"))?;
|
||||
let aes_key = crypto::pin_key(&salt, &pin);
|
||||
|
||||
self.sessions.lock().unwrap().insert(
|
||||
uniqueid.to_string(),
|
||||
Session {
|
||||
aes_key,
|
||||
client_cert_der: der,
|
||||
client_cert_sig: sig,
|
||||
client_pubkey: pubkey,
|
||||
serversecret: [0; 16],
|
||||
server_challenge: [0; 16],
|
||||
client_hash: Vec::new(),
|
||||
},
|
||||
);
|
||||
tracing::info!(
|
||||
uniqueid,
|
||||
"pairing phase 1 — PIN accepted, returning host cert"
|
||||
);
|
||||
let inner = format!(
|
||||
"<plaincert>{}</plaincert>",
|
||||
hex::encode(id.cert_pem.as_bytes())
|
||||
);
|
||||
Ok(paired_xml(&inner, true))
|
||||
}
|
||||
|
||||
/// Phase 2: decrypt the client challenge, return our hash + server challenge.
|
||||
pub fn clientchallenge(
|
||||
&self,
|
||||
id: &ServerIdentity,
|
||||
uniqueid: &str,
|
||||
hexv: &str,
|
||||
) -> Result<String> {
|
||||
let mut map = self.sessions.lock().unwrap();
|
||||
let s = map
|
||||
.get_mut(uniqueid)
|
||||
.ok_or_else(|| anyhow!("no pairing session"))?;
|
||||
let enc = hex::decode(hexv).context("clientchallenge hex")?;
|
||||
let client_challenge = crypto::ecb_decrypt(&s.aes_key, &enc);
|
||||
if client_challenge.len() < 16 {
|
||||
bail!("short client challenge");
|
||||
}
|
||||
s.serversecret = crypto::random();
|
||||
s.server_challenge = crypto::random();
|
||||
let server_hash =
|
||||
crypto::sha256(&[&client_challenge[..16], &id.signature, &s.serversecret]);
|
||||
let mut plain = Vec::with_capacity(48);
|
||||
plain.extend_from_slice(&server_hash);
|
||||
plain.extend_from_slice(&s.server_challenge);
|
||||
let resp = crypto::ecb_encrypt(&s.aes_key, &plain);
|
||||
let inner = format!(
|
||||
"<challengeresponse>{}</challengeresponse>",
|
||||
hex::encode(resp)
|
||||
);
|
||||
Ok(paired_xml(&inner, true))
|
||||
}
|
||||
|
||||
/// Phase 3: store the client's hash, return our RSA-signed serversecret.
|
||||
pub fn serverchallengeresp(
|
||||
&self,
|
||||
id: &ServerIdentity,
|
||||
uniqueid: &str,
|
||||
hexv: &str,
|
||||
) -> Result<String> {
|
||||
let mut map = self.sessions.lock().unwrap();
|
||||
let s = map
|
||||
.get_mut(uniqueid)
|
||||
.ok_or_else(|| anyhow!("no pairing session"))?;
|
||||
let enc = hex::decode(hexv).context("serverchallengeresp hex")?;
|
||||
let client_hash = crypto::ecb_decrypt(&s.aes_key, &enc);
|
||||
if client_hash.len() < 32 {
|
||||
bail!("short challenge response");
|
||||
}
|
||||
s.client_hash = client_hash[..32].to_vec();
|
||||
let sig: Signature = id.signing_key.sign(&s.serversecret);
|
||||
let mut secret = Vec::with_capacity(16 + 256);
|
||||
secret.extend_from_slice(&s.serversecret);
|
||||
secret.extend_from_slice(&sig.to_vec());
|
||||
let inner = format!("<pairingsecret>{}</pairingsecret>", hex::encode(secret));
|
||||
Ok(paired_xml(&inner, true))
|
||||
}
|
||||
|
||||
/// Phase 4: verify the client knew the PIN (hash match) and owns its cert (RSA verify);
|
||||
/// on success, pin the client cert.
|
||||
pub fn clientpairingsecret(
|
||||
&self,
|
||||
uniqueid: &str,
|
||||
hexv: &str,
|
||||
paired_store: &Mutex<Vec<Vec<u8>>>,
|
||||
) -> Result<String> {
|
||||
let mut map = self.sessions.lock().unwrap();
|
||||
let s = map
|
||||
.get_mut(uniqueid)
|
||||
.ok_or_else(|| anyhow!("no pairing session"))?;
|
||||
let data = hex::decode(hexv).context("clientpairingsecret hex")?;
|
||||
if data.len() < 16 {
|
||||
bail!("short pairing secret");
|
||||
}
|
||||
let client_secret = &data[..16];
|
||||
let client_sig = &data[16..];
|
||||
let expected = crypto::sha256(&[&s.server_challenge, &s.client_cert_sig, client_secret]);
|
||||
let hash_ok = expected[..] == s.client_hash[..];
|
||||
let sig_ok = verify256(&s.client_pubkey, client_secret, client_sig).is_ok();
|
||||
if hash_ok && sig_ok {
|
||||
{
|
||||
let mut store = paired_store.lock().unwrap();
|
||||
store.push(s.client_cert_der.clone());
|
||||
super::save_paired(&store);
|
||||
}
|
||||
tracing::info!(uniqueid, "pairing phase 4 — SUCCESS, client cert pinned");
|
||||
Ok(paired_xml("", true))
|
||||
} else {
|
||||
tracing::warn!(
|
||||
uniqueid,
|
||||
hash_ok,
|
||||
sig_ok,
|
||||
"pairing phase 4 — FAILED (PIN/cert)"
|
||||
);
|
||||
map.remove(uniqueid);
|
||||
Ok(paired_xml("", false))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn verify256(pubkey: &RsaPublicKey, msg: &[u8], sig: &[u8]) -> Result<()> {
|
||||
let vk = VerifyingKey::<Sha256>::new(pubkey.clone());
|
||||
let signature = Signature::try_from(sig).context("parse client signature")?;
|
||||
vk.verify(msg, &signature)
|
||||
.context("verify client signature")?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn parse_client_cert(pem_bytes: &[u8]) -> Result<(Vec<u8>, Vec<u8>, RsaPublicKey)> {
|
||||
let (_, pem) =
|
||||
x509_parser::pem::parse_x509_pem(pem_bytes).map_err(|e| anyhow!("client cert pem: {e}"))?;
|
||||
let der = pem.contents.clone();
|
||||
let x509 = pem.parse_x509().context("parse client x509")?;
|
||||
let sig = x509.signature_value.data.to_vec();
|
||||
let pubkey =
|
||||
RsaPublicKey::from_public_key_der(x509.public_key().raw).context("client rsa pubkey")?;
|
||||
Ok((der, sig, pubkey))
|
||||
}
|
||||
|
||||
/// `<root status_code="200"><paired>0|1</paired> inner </root>`.
|
||||
fn paired_xml(inner: &str, paired: bool) -> String {
|
||||
format!(
|
||||
"<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<root status_code=\"200\">\n<paired>{}</paired>\n{}</root>\n",
|
||||
u8::from(paired),
|
||||
inner
|
||||
)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::sync::Arc;
|
||||
|
||||
/// `awaiting_pin` flips true while `take` is parked and back to false on every exit
|
||||
/// path (delivered + timeout) — the management API's pairing UX depends on it.
|
||||
#[tokio::test]
|
||||
async fn pin_gate_reports_waiting() {
|
||||
let pairing = Arc::new(Pairing::new());
|
||||
assert!(!pairing.pin.awaiting_pin());
|
||||
|
||||
let waiter = {
|
||||
let p = pairing.clone();
|
||||
tokio::spawn(async move { p.pin.take(Duration::from_secs(5)).await })
|
||||
};
|
||||
while !pairing.pin.awaiting_pin() {
|
||||
tokio::time::sleep(Duration::from_millis(2)).await;
|
||||
}
|
||||
|
||||
pairing.pin.submit("1234".into());
|
||||
assert_eq!(waiter.await.unwrap().as_deref(), Some("1234"));
|
||||
assert!(!pairing.pin.awaiting_pin());
|
||||
|
||||
// Timeout path also clears the flag.
|
||||
assert_eq!(pairing.pin.take(Duration::from_millis(10)).await, None);
|
||||
assert!(!pairing.pin.awaiting_pin());
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,318 @@
|
||||
//! The GameStream RTSP handshake (TCP 48010). Hand-rolled because GameStream's RTSP is
|
||||
//! non-standard (streamid= targets, the literal `DEADBEEFCAFE` session, the X-SS-* headers)
|
||||
//! and off-the-shelf RTSP crates assume standard semantics. Sequence Moonlight drives:
|
||||
//! OPTIONS → DESCRIBE → SETUP(audio/video/control) → ANNOUNCE → PLAY. ANNOUNCE carries the
|
||||
//! negotiated stream config; PLAY is where the media stages start (P1.3+).
|
||||
//!
|
||||
//! Runs on its own native thread (control-plane setup, not the per-frame hot path), one
|
||||
//! thread per connection. Plaintext only for now (encryption is negotiated; P1.5).
|
||||
|
||||
use super::audio;
|
||||
use super::stream::{self, StreamConfig};
|
||||
use super::{AppState, AUDIO_PORT, CONTROL_PORT, RTSP_PORT, VIDEO_PORT};
|
||||
use crate::encode::Codec;
|
||||
use anyhow::{Context, Result};
|
||||
use std::collections::HashMap;
|
||||
use std::io::{Read, Write};
|
||||
use std::net::{TcpListener, TcpStream};
|
||||
use std::sync::atomic::Ordering;
|
||||
use std::sync::Arc;
|
||||
|
||||
/// Opaque per-session payload the client echoes as its first UDP datagram (port-learning).
|
||||
const PING_PAYLOAD: &str = "0011223344556677";
|
||||
|
||||
/// Bind 48010 and accept RTSP connections on a dedicated thread.
|
||||
pub fn spawn(state: Arc<AppState>) -> Result<()> {
|
||||
let listener = TcpListener::bind(("0.0.0.0", RTSP_PORT))
|
||||
.with_context(|| format!("bind RTSP {RTSP_PORT}"))?;
|
||||
tracing::info!(port = RTSP_PORT, "RTSP listening");
|
||||
std::thread::Builder::new()
|
||||
.name("punktfunk-rtsp".into())
|
||||
.spawn(move || {
|
||||
for conn in listener.incoming() {
|
||||
match conn {
|
||||
Ok(stream) => {
|
||||
let st = state.clone();
|
||||
std::thread::spawn(move || {
|
||||
if let Err(e) = handle_conn(stream, st) {
|
||||
tracing::warn!(error = %format!("{e:#}"), "RTSP connection ended");
|
||||
}
|
||||
});
|
||||
}
|
||||
Err(e) => tracing::warn!(error = %e, "RTSP accept failed"),
|
||||
}
|
||||
}
|
||||
})
|
||||
.context("spawn RTSP thread")?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
struct Request {
|
||||
method: String,
|
||||
uri: String,
|
||||
cseq: String,
|
||||
head: String,
|
||||
body: String,
|
||||
}
|
||||
|
||||
fn handle_conn(mut stream: TcpStream, state: Arc<AppState>) -> Result<()> {
|
||||
let peer = stream.peer_addr().ok();
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
// GameStream RTSP is one request per TCP connection: moonlight-common-c reads the
|
||||
// response until EOF, so we answer one message and close the connection (which signals
|
||||
// the end of the response). Session state lives in `AppState`, not the connection.
|
||||
if let Some(req) = read_message(&mut stream, &mut buf)? {
|
||||
tracing::info!(
|
||||
method = %req.method, cseq = %req.cseq,
|
||||
"RTSP {} | {}", req.head.replace("\r\n", " | "),
|
||||
if req.body.is_empty() { String::new() } else { format!("body: {}", req.body.replace("\r\n", " | ")) }
|
||||
);
|
||||
let resp = handle_request(&req, &state);
|
||||
stream.write_all(resp.as_bytes()).context("RTSP write")?;
|
||||
stream.flush().ok();
|
||||
// Close (FIN after the flushed response) so the client detects end-of-response.
|
||||
let _ = stream.shutdown(std::net::Shutdown::Both);
|
||||
}
|
||||
let _ = peer;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Read one complete RTSP message (headers + any Content-Length body) from the stream,
|
||||
/// buffering across reads and leaving any pipelined remainder in `buf`.
|
||||
fn read_message(stream: &mut TcpStream, buf: &mut Vec<u8>) -> Result<Option<Request>> {
|
||||
loop {
|
||||
if let Some(end) = find_subslice(buf, b"\r\n\r\n") {
|
||||
let head = std::str::from_utf8(&buf[..end]).context("RTSP header utf8")?;
|
||||
let content_len = header_value(head, "content-length")
|
||||
.and_then(|v| v.trim().parse::<usize>().ok())
|
||||
.unwrap_or(0);
|
||||
let total = end + 4 + content_len;
|
||||
if buf.len() < total {
|
||||
// headers complete but body still arriving — read more
|
||||
} else {
|
||||
let head = head.to_string();
|
||||
let body = String::from_utf8_lossy(&buf[end + 4..total]).into_owned();
|
||||
buf.drain(..total);
|
||||
return Ok(Some(parse_request(&head, body)));
|
||||
}
|
||||
}
|
||||
let mut tmp = [0u8; 8192];
|
||||
let n = stream.read(&mut tmp).context("RTSP read")?;
|
||||
if n == 0 {
|
||||
return Ok(None); // peer closed
|
||||
}
|
||||
buf.extend_from_slice(&tmp[..n]);
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_request(head: &str, body: String) -> Request {
|
||||
let mut lines = head.split("\r\n");
|
||||
let request_line = lines.next().unwrap_or("");
|
||||
let mut parts = request_line.split_whitespace();
|
||||
let method = parts.next().unwrap_or("").to_string();
|
||||
let uri = parts.next().unwrap_or("").to_string();
|
||||
let cseq = header_value(head, "cseq").unwrap_or("0").trim().to_string();
|
||||
Request {
|
||||
method,
|
||||
uri,
|
||||
cseq,
|
||||
head: head.to_string(),
|
||||
body,
|
||||
}
|
||||
}
|
||||
|
||||
fn handle_request(req: &Request, state: &AppState) -> String {
|
||||
match req.method.as_str() {
|
||||
"OPTIONS" => response(
|
||||
&req.cseq,
|
||||
&[("Public", "OPTIONS DESCRIBE SETUP ANNOUNCE PLAY TEARDOWN")],
|
||||
None,
|
||||
),
|
||||
"DESCRIBE" => response(
|
||||
&req.cseq,
|
||||
&[("Content-Type", "application/sdp")],
|
||||
Some(&describe_sdp()),
|
||||
),
|
||||
"SETUP" => {
|
||||
let (port, extra_key) = match stream_type(&req.uri) {
|
||||
Some("audio") => (AUDIO_PORT, "X-SS-Ping-Payload"),
|
||||
Some("video") => (VIDEO_PORT, "X-SS-Ping-Payload"),
|
||||
Some("control") => (CONTROL_PORT, "X-SS-Connect-Data"),
|
||||
_ => return response_status("404 Not Found", &req.cseq, &[], None),
|
||||
};
|
||||
let transport = format!("server_port={port}");
|
||||
response(
|
||||
&req.cseq,
|
||||
&[
|
||||
("Session", "DEADBEEFCAFE;timeout = 90"),
|
||||
("Transport", &transport),
|
||||
(extra_key, PING_PAYLOAD),
|
||||
],
|
||||
None,
|
||||
)
|
||||
}
|
||||
"ANNOUNCE" => {
|
||||
let map = parse_announce(&req.body);
|
||||
match stream_config(&map) {
|
||||
Some(cfg) => {
|
||||
tracing::info!(?cfg, "RTSP ANNOUNCE — negotiated stream config");
|
||||
*state.stream.lock().unwrap() = Some(cfg);
|
||||
}
|
||||
None => tracing::warn!("RTSP ANNOUNCE — missing required video config keys"),
|
||||
}
|
||||
response(&req.cseq, &[], None)
|
||||
}
|
||||
"PLAY" => {
|
||||
let cfg = *state.stream.lock().unwrap();
|
||||
match cfg {
|
||||
Some(cfg) if !state.streaming.swap(true, Ordering::SeqCst) => {
|
||||
// Resolve the launched catalog entry (session recipe) for the stream.
|
||||
let app = state
|
||||
.launch
|
||||
.lock()
|
||||
.unwrap()
|
||||
.map(|l| l.appid)
|
||||
.and_then(super::apps::by_id);
|
||||
tracing::info!(app = ?app.as_ref().map(|a| &a.title), "RTSP PLAY — starting video stream");
|
||||
stream::start(
|
||||
cfg,
|
||||
app,
|
||||
state.streaming.clone(),
|
||||
state.force_idr.clone(),
|
||||
state.video_cap.clone(),
|
||||
);
|
||||
}
|
||||
Some(_) => tracing::info!("RTSP PLAY — stream already running"),
|
||||
None => tracing::warn!("RTSP PLAY — no negotiated config (ANNOUNCE missing)"),
|
||||
}
|
||||
// Audio runs independently (stereo Opus on UDP 48000); it needs the launch key for
|
||||
// the AES-CBC payload encryption the client expects.
|
||||
let launch = *state.launch.lock().unwrap();
|
||||
if let Some(ls) = launch {
|
||||
if !state.audio_streaming.swap(true, Ordering::SeqCst) {
|
||||
tracing::info!("RTSP PLAY — starting audio stream");
|
||||
audio::start(
|
||||
state.audio_streaming.clone(),
|
||||
ls.gcm_key,
|
||||
ls.rikeyid,
|
||||
state.audio_cap.clone(),
|
||||
);
|
||||
}
|
||||
}
|
||||
response(&req.cseq, &[("Session", "DEADBEEFCAFE;timeout = 90")], None)
|
||||
}
|
||||
"TEARDOWN" => {
|
||||
// Signal both stream threads to stop.
|
||||
state.streaming.store(false, Ordering::SeqCst);
|
||||
state.audio_streaming.store(false, Ordering::SeqCst);
|
||||
response(&req.cseq, &[], None)
|
||||
}
|
||||
other => {
|
||||
tracing::warn!(method = other, "RTSP unsupported method");
|
||||
response_status("501 Not Implemented", &req.cseq, &[], None)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Host capability SDP returned by DESCRIBE. Advertises HEVC + AV1 and no encryption
|
||||
/// (plaintext streams for now; P1.5 adds the negotiated AES paths).
|
||||
fn describe_sdp() -> String {
|
||||
// Line-oriented a=key:value, matching what moonlight-common-c scans for.
|
||||
[
|
||||
"a=x-ss-general.featureFlags:0",
|
||||
"a=x-ss-general.encryptionSupported:0",
|
||||
"a=x-ss-general.encryptionRequested:0",
|
||||
"sprop-parameter-sets=AAAAAU", // HEVC capability indicator
|
||||
"a=rtpmap:98 AV1/90000", // AV1 capability indicator
|
||||
// Opus config the client matches by channel count (Sunshine emits one per config):
|
||||
// surround-params = channelCount, streams, coupledStreams, then the channel mapping.
|
||||
// The client negotiated stereo, so advertise just that.
|
||||
"a=fmtp:97 surround-params=21101", // stereo: 2ch, 1 stream, 1 coupled, mapping [0,1]
|
||||
"",
|
||||
]
|
||||
.join("\r\n")
|
||||
}
|
||||
|
||||
/// Parse an ANNOUNCE SDP body's `a=key:value` lines into a map.
|
||||
fn parse_announce(body: &str) -> HashMap<String, String> {
|
||||
let mut map = HashMap::new();
|
||||
for line in body.lines() {
|
||||
if let Some(rest) = line.strip_prefix("a=") {
|
||||
if let Some((k, v)) = rest.split_once(':') {
|
||||
map.insert(k.to_string(), v.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
map
|
||||
}
|
||||
|
||||
/// Map the negotiated ANNOUNCE keys to a [`StreamConfig`] (resolution/packetSize required).
|
||||
fn stream_config(map: &HashMap<String, String>) -> Option<StreamConfig> {
|
||||
let parse_u = |k: &str| map.get(k).and_then(|s| s.trim().parse::<u32>().ok());
|
||||
let width = parse_u("x-nv-video[0].clientViewportWd")?;
|
||||
let height = parse_u("x-nv-video[0].clientViewportHt")?;
|
||||
let packet_size = parse_u("x-nv-video[0].packetSize")? as usize;
|
||||
let fps = parse_u("x-nv-video[0].maxFPS")
|
||||
.filter(|&f| f > 0)
|
||||
.unwrap_or(60);
|
||||
let bitrate_kbps = parse_u("x-nv-vqos[0].bw.maximumBitrateKbps").unwrap_or(20_000);
|
||||
let codec = match map.get("x-nv-vqos[0].bitStreamFormat").map(|s| s.trim()) {
|
||||
Some("1") => Codec::H265,
|
||||
Some("2") => Codec::Av1,
|
||||
_ => Codec::H264,
|
||||
};
|
||||
// Parity floor the client asks for (protects small frames); clamp to a sane max.
|
||||
let min_fec = parse_u("x-nv-vqos[0].fec.minRequiredFecPackets")
|
||||
.unwrap_or(2)
|
||||
.min(16) as u8;
|
||||
Some(StreamConfig {
|
||||
width,
|
||||
height,
|
||||
fps,
|
||||
packet_size,
|
||||
bitrate_kbps,
|
||||
codec,
|
||||
min_fec,
|
||||
})
|
||||
}
|
||||
|
||||
/// Extract the stream type from a SETUP URI like `…/streamid=video/0/0`.
|
||||
fn stream_type(uri: &str) -> Option<&str> {
|
||||
let after = uri.split("streamid=").nth(1)?;
|
||||
let token = after.split('/').next()?;
|
||||
match token {
|
||||
"audio" | "video" | "control" => Some(token),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn response(cseq: &str, headers: &[(&str, &str)], body: Option<&str>) -> String {
|
||||
response_status("200 OK", cseq, headers, body)
|
||||
}
|
||||
|
||||
fn response_status(
|
||||
status: &str,
|
||||
cseq: &str,
|
||||
headers: &[(&str, &str)],
|
||||
body: Option<&str>,
|
||||
) -> String {
|
||||
let body = body.unwrap_or("");
|
||||
let mut out = format!("RTSP/1.0 {status}\r\nCSeq: {cseq}\r\n");
|
||||
for (k, v) in headers {
|
||||
out.push_str(&format!("{k}: {v}\r\n"));
|
||||
}
|
||||
out.push_str(&format!("Content-Length: {}\r\n\r\n", body.len()));
|
||||
out.push_str(body);
|
||||
out
|
||||
}
|
||||
|
||||
fn find_subslice(hay: &[u8], needle: &[u8]) -> Option<usize> {
|
||||
hay.windows(needle.len()).position(|w| w == needle)
|
||||
}
|
||||
|
||||
fn header_value<'a>(head: &'a str, key_lower: &str) -> Option<&'a str> {
|
||||
head.split("\r\n").find_map(|line| {
|
||||
let (k, v) = line.split_once(':')?;
|
||||
(k.trim().eq_ignore_ascii_case(key_lower)).then(|| v.trim_start())
|
||||
})
|
||||
}
|
||||
@@ -0,0 +1,42 @@
|
||||
//! The `/serverinfo` capability/status XML Moonlight GETs before pairing and each launch.
|
||||
|
||||
use super::{Host, APP_VERSION, GFE_VERSION, SERVER_CODEC_MODE_SUPPORT};
|
||||
|
||||
/// Build the `<root status_code="200">…</root>` serverinfo document. `https` selects the
|
||||
/// paired-HTTPS variant (real MAC). Element names are case-sensitive and match what
|
||||
/// moonlight-common-c parses.
|
||||
pub fn serverinfo_xml(host: &Host, https: bool) -> String {
|
||||
// MAC is hidden over plain HTTP; PairStatus reflects the pairing store once the HTTPS
|
||||
// path carries per-client identity (a hardening follow-up — 0 for now).
|
||||
let mac = if https {
|
||||
"01:02:03:04:05:06"
|
||||
} else {
|
||||
"00:00:00:00:00:00"
|
||||
};
|
||||
// Over the mutual-TLS HTTPS port the peer is an authenticated (paired) client.
|
||||
let pair_status = u8::from(https);
|
||||
format!(
|
||||
r#"<?xml version="1.0" encoding="utf-8"?>
|
||||
<root status_code="200">
|
||||
<hostname>{hostname}</hostname>
|
||||
<appversion>{APP_VERSION}</appversion>
|
||||
<GfeVersion>{GFE_VERSION}</GfeVersion>
|
||||
<uniqueid>{uniqueid}</uniqueid>
|
||||
<HttpsPort>{https_port}</HttpsPort>
|
||||
<ExternalPort>{http_port}</ExternalPort>
|
||||
<MaxLumaPixelsHEVC>1869449984</MaxLumaPixelsHEVC>
|
||||
<mac>{mac}</mac>
|
||||
<LocalIP>{local_ip}</LocalIP>
|
||||
<ServerCodecModeSupport>{SERVER_CODEC_MODE_SUPPORT}</ServerCodecModeSupport>
|
||||
<PairStatus>{pair_status}</PairStatus>
|
||||
<currentgame>0</currentgame>
|
||||
<state>SUNSHINE_SERVER_FREE</state>
|
||||
</root>
|
||||
"#,
|
||||
hostname = host.hostname,
|
||||
uniqueid = host.uniqueid,
|
||||
https_port = host.https_port,
|
||||
http_port = host.http_port,
|
||||
local_ip = host.local_ip,
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,478 @@
|
||||
//! The video data plane: on RTSP PLAY, learn the client's UDP endpoint (it pings the video
|
||||
//! port), then run capture → NVENC encode → [`VideoPacketizer`] → UDP send. The source is
|
||||
//! either real portal desktop capture (`PUNKTFUNK_VIDEO_SOURCE=portal`, the M0 PipeWire path) or
|
||||
//! a synthetic test pattern (default). Runs on its own native thread.
|
||||
|
||||
use super::video::{FrameType, VideoPacketizer};
|
||||
use super::VIDEO_PORT;
|
||||
use crate::capture::{self, Capturer, FastSyntheticCapturer};
|
||||
use crate::encode::{self, Codec};
|
||||
use anyhow::{Context, Result};
|
||||
use rand::Rng;
|
||||
use std::net::UdpSocket;
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::sync::Arc;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
/// Negotiated video parameters from the RTSP ANNOUNCE.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub struct StreamConfig {
|
||||
pub width: u32,
|
||||
pub height: u32,
|
||||
pub fps: u32,
|
||||
pub packet_size: usize,
|
||||
pub bitrate_kbps: u32,
|
||||
pub codec: Codec,
|
||||
/// Client's `x-nv-vqos[0].fec.minRequiredFecPackets` — parity floor per FEC block.
|
||||
pub min_fec: u8,
|
||||
}
|
||||
|
||||
/// Slot for the persistent screen capturer, shared with the control plane and reused across
|
||||
/// streams so a reconnect doesn't open a second (conflicting) screencast session.
|
||||
pub type CapturerSlot = Arc<std::sync::Mutex<Option<Box<dyn Capturer>>>>;
|
||||
|
||||
/// Spawn the video stream thread (idempotent via `running`). Stops when `running` clears.
|
||||
/// `force_idr` is set by the control stream on a client recovery request; `video_cap` holds
|
||||
/// the persistent capturer the thread borrows for the stream's duration.
|
||||
pub fn start(
|
||||
cfg: StreamConfig,
|
||||
app: Option<super::apps::AppEntry>,
|
||||
running: Arc<AtomicBool>,
|
||||
force_idr: Arc<AtomicBool>,
|
||||
video_cap: CapturerSlot,
|
||||
) {
|
||||
let _ = std::thread::Builder::new()
|
||||
.name("punktfunk-video".into())
|
||||
.spawn(move || {
|
||||
tracing::info!(?cfg, "video stream starting");
|
||||
if let Err(e) = run(cfg, app.as_ref(), &running, &force_idr, &video_cap) {
|
||||
tracing::error!(error = %format!("{e:#}"), "video stream failed");
|
||||
}
|
||||
running.store(false, Ordering::SeqCst);
|
||||
tracing::info!("video stream stopped");
|
||||
});
|
||||
}
|
||||
|
||||
fn run(
|
||||
cfg: StreamConfig,
|
||||
app: Option<&super::apps::AppEntry>,
|
||||
running: &Arc<AtomicBool>,
|
||||
force_idr: &AtomicBool,
|
||||
video_cap: &std::sync::Mutex<Option<Box<dyn Capturer>>>,
|
||||
) -> Result<()> {
|
||||
// Reject an out-of-range client mode before allocating capture/encode buffers.
|
||||
encode::validate_dimensions(cfg.codec, cfg.width, cfg.height)
|
||||
.context("client-requested video mode")?;
|
||||
let sock = UdpSocket::bind(("0.0.0.0", VIDEO_PORT)).context("bind video UDP")?;
|
||||
// The client pings the video port so we learn where to send; it re-pings until video
|
||||
// flows, so a missed early ping is fine.
|
||||
sock.set_read_timeout(Some(Duration::from_secs(10)))?;
|
||||
tracing::info!(
|
||||
port = VIDEO_PORT,
|
||||
"video: awaiting client ping to learn endpoint"
|
||||
);
|
||||
let mut probe = [0u8; 256];
|
||||
let (_, client) = sock
|
||||
.recv_from(&mut probe)
|
||||
.context("video: no client ping within 10s")?;
|
||||
sock.connect(client)
|
||||
.context("connect client video endpoint")?;
|
||||
tracing::info!(%client, "video: client endpoint learned");
|
||||
|
||||
// Native client-resolution source: create a compositor virtual output sized to the client's
|
||||
// request and capture it (no scaling). Self-contained — deliberately NOT pooled in
|
||||
// `video_cap`, since a reconnect at a different resolution needs a freshly-sized output; the
|
||||
// output is released when this capturer drops at stream end (RAII via its keepalive).
|
||||
if std::env::var("PUNKTFUNK_VIDEO_SOURCE").as_deref() == Ok("virtual") {
|
||||
// The launched app picks the compositor (e.g. gamescope for game entries) and the
|
||||
// nested command; env vars remain manual overrides / fallbacks.
|
||||
let compositor = app
|
||||
.and_then(|a| a.compositor)
|
||||
.map(Ok)
|
||||
.unwrap_or_else(|| crate::vdisplay::detect().context("detect compositor"))?;
|
||||
if let Some(cmd) = app.and_then(|a| a.cmd.as_deref()) {
|
||||
// The gamescope backend reads the nested command from this env var; setting it
|
||||
// per-launch is safe (one stream session at a time).
|
||||
std::env::set_var("PUNKTFUNK_GAMESCOPE_APP", cmd);
|
||||
}
|
||||
tracing::info!(
|
||||
?compositor,
|
||||
app = ?app.map(|a| &a.title),
|
||||
w = cfg.width,
|
||||
h = cfg.height,
|
||||
"video source: virtual display (native client resolution)"
|
||||
);
|
||||
let mut vd = crate::vdisplay::open(compositor).context("open virtual display")?;
|
||||
let vout = vd
|
||||
.create(punktfunk_core::Mode {
|
||||
width: cfg.width,
|
||||
height: cfg.height,
|
||||
refresh_hz: cfg.fps,
|
||||
})
|
||||
.context("create virtual output at client resolution")?;
|
||||
let mut capturer =
|
||||
capture::capture_virtual_output(vout).context("capture virtual output")?;
|
||||
capturer.set_active(true);
|
||||
return stream_body(&mut *capturer, &sock, cfg, running, force_idr);
|
||||
}
|
||||
|
||||
// Reuse the persistent capturer (one screencast session → clean reconnect); create it on
|
||||
// the first stream. Borrow it for this stream and return it on exit.
|
||||
let mut capturer: Box<dyn Capturer> = match video_cap.lock().unwrap().take() {
|
||||
Some(c) => {
|
||||
tracing::info!("video source: reusing capturer");
|
||||
c
|
||||
}
|
||||
None if std::env::var("PUNKTFUNK_VIDEO_SOURCE").is_ok_and(|v| v == "portal") => {
|
||||
tracing::info!("video source: portal desktop capture");
|
||||
capture::open_portal_monitor().context("open portal capturer")?
|
||||
}
|
||||
None => {
|
||||
tracing::info!("video source: synthetic test pattern");
|
||||
Box::new(FastSyntheticCapturer::new(cfg.width, cfg.height))
|
||||
}
|
||||
};
|
||||
capturer.set_active(true);
|
||||
let result = stream_body(&mut *capturer, &sock, cfg, running, force_idr);
|
||||
capturer.set_active(false);
|
||||
*video_cap.lock().unwrap() = Some(capturer);
|
||||
result
|
||||
}
|
||||
|
||||
/// One frame's packets, handed from the encode thread to the send thread.
|
||||
type PacketBatch = Vec<Vec<u8>>;
|
||||
|
||||
/// Send `pkts` with as few syscalls as possible (`sendmmsg`, up to 64 per call). The socket is
|
||||
/// connected, so no per-message address. Returns an error on the first send failure.
|
||||
#[cfg(target_os = "linux")]
|
||||
fn sendmmsg_all(sock: &UdpSocket, pkts: &[Vec<u8>]) -> std::io::Result<()> {
|
||||
use std::os::fd::AsRawFd;
|
||||
const CHUNK: usize = 64;
|
||||
let fd = sock.as_raw_fd();
|
||||
for chunk in pkts.chunks(CHUNK) {
|
||||
let mut iovs: Vec<libc::iovec> = chunk
|
||||
.iter()
|
||||
.map(|p| libc::iovec {
|
||||
iov_base: p.as_ptr() as *mut libc::c_void,
|
||||
iov_len: p.len(),
|
||||
})
|
||||
.collect();
|
||||
let mut hdrs: Vec<libc::mmsghdr> = iovs
|
||||
.iter_mut()
|
||||
.map(|iov| {
|
||||
let mut h: libc::mmsghdr = unsafe { std::mem::zeroed() };
|
||||
h.msg_hdr.msg_iov = iov;
|
||||
h.msg_hdr.msg_iovlen = 1;
|
||||
h
|
||||
})
|
||||
.collect();
|
||||
let mut off = 0usize;
|
||||
while off < hdrs.len() {
|
||||
let n = unsafe {
|
||||
libc::sendmmsg(fd, hdrs[off..].as_mut_ptr(), (hdrs.len() - off) as u32, 0)
|
||||
};
|
||||
if n < 0 {
|
||||
return Err(std::io::Error::last_os_error());
|
||||
}
|
||||
off += n as usize;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Portable fallback (non-Linux dev builds — GameStream hosting never ships there): one
|
||||
/// syscall per packet.
|
||||
#[cfg(not(target_os = "linux"))]
|
||||
fn sendmmsg_all(sock: &UdpSocket, pkts: &[Vec<u8>]) -> std::io::Result<()> {
|
||||
for p in pkts {
|
||||
sock.send(p)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Dedicated send thread: one [`PacketBatch`] per frame arrives on `rx`; its packets go out in
|
||||
/// `sendmmsg` chunks, paced so the frame's data spreads over ~3/4 of the frame interval
|
||||
/// (microburst shaping at chunk granularity — a real link drops line-rate bursts; the encode
|
||||
/// thread is never blocked by this). On send failure (client gone) it clears `running`.
|
||||
fn spawn_sender(
|
||||
sock: UdpSocket,
|
||||
rx: std::sync::mpsc::Receiver<PacketBatch>,
|
||||
frame_interval: Duration,
|
||||
running: Arc<AtomicBool>,
|
||||
drop_pct: u32,
|
||||
) -> Result<()> {
|
||||
std::thread::Builder::new()
|
||||
.name("punktfunk-send".into())
|
||||
.spawn(move || {
|
||||
// Chunk pacing: 16 packets per burst, bursts spread across the send budget.
|
||||
const PACE_CHUNK: usize = 16;
|
||||
let budget = frame_interval.mul_f32(0.75);
|
||||
let mut rng = rand::thread_rng();
|
||||
let mut sent: u64 = 0;
|
||||
let mut dropped: u64 = 0;
|
||||
while let Ok(mut batch) = rx.recv() {
|
||||
if drop_pct > 0 {
|
||||
batch.retain(|_| {
|
||||
let keep = rng.gen_range(0..100) >= drop_pct;
|
||||
if !keep {
|
||||
dropped += 1;
|
||||
}
|
||||
keep
|
||||
});
|
||||
}
|
||||
let n = batch.len();
|
||||
if n == 0 {
|
||||
continue;
|
||||
}
|
||||
let per_chunk = budget.mul_f64((PACE_CHUNK as f64 / n as f64).min(1.0));
|
||||
let start = Instant::now();
|
||||
for (i, chunk) in batch.chunks(PACE_CHUNK).enumerate() {
|
||||
if let Err(e) = sendmmsg_all(&sock, chunk) {
|
||||
tracing::info!(error = %e, sent, "video: client unreachable — stopping stream");
|
||||
running.store(false, Ordering::SeqCst);
|
||||
return;
|
||||
}
|
||||
sent += chunk.len() as u64;
|
||||
// Sleep toward the next chunk's deadline; skip sub-500µs sleeps (jitter).
|
||||
let target = start + per_chunk.mul_f64((i + 1) as f64);
|
||||
if let Some(ahead) = target.checked_duration_since(Instant::now()) {
|
||||
if ahead >= Duration::from_micros(500) {
|
||||
std::thread::sleep(ahead);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
tracing::debug!(sent, dropped, "video sender exiting");
|
||||
})
|
||||
.context("spawn send thread")?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// The encode → packetize loop, over a borrowed capturer. Sending runs on a dedicated thread
|
||||
/// (see [`spawn_sender`]) so a send spike can never stall capture/encode.
|
||||
fn stream_body(
|
||||
capturer: &mut dyn Capturer,
|
||||
sock: &UdpSocket,
|
||||
cfg: StreamConfig,
|
||||
running: &Arc<AtomicBool>,
|
||||
force_idr: &AtomicBool,
|
||||
) -> Result<()> {
|
||||
// The first frame establishes the authoritative size/format for the encoder.
|
||||
let mut frame = capturer.next_frame().context("capture first frame")?;
|
||||
if frame.width != cfg.width || frame.height != cfg.height {
|
||||
tracing::warn!(
|
||||
captured = ?(frame.width, frame.height),
|
||||
negotiated = ?(cfg.width, cfg.height),
|
||||
"captured size != negotiated size — Moonlight expects the negotiated size; resize the output"
|
||||
);
|
||||
}
|
||||
let mut enc = encode::open_video(
|
||||
cfg.codec,
|
||||
frame.format,
|
||||
frame.width,
|
||||
frame.height,
|
||||
cfg.fps,
|
||||
cfg.bitrate_kbps as u64 * 1000,
|
||||
frame.is_cuda(),
|
||||
)
|
||||
.context("open NVENC for stream")?;
|
||||
// FEC overhead percent (Sunshine default 20). Override with PUNKTFUNK_FEC_PCT (0 = data-only).
|
||||
let fec_pct: u8 = std::env::var("PUNKTFUNK_FEC_PCT")
|
||||
.ok()
|
||||
.and_then(|v| v.parse().ok())
|
||||
.unwrap_or(20);
|
||||
let mut pk = VideoPacketizer::new(cfg.packet_size, fec_pct, cfg.min_fec);
|
||||
|
||||
// Pace at the client's negotiated frame rate, re-encoding the last captured frame when the
|
||||
// compositor produced no new one. Compositors only emit frames on damage, so a static or
|
||||
// slow-updating desktop would otherwise starve the client into a "network too slow" abort.
|
||||
// Re-encoding an unchanged frame is cheap — NVENC emits a near-empty P-frame. The upper
|
||||
// bound just guards against an absurd client request (the encoder is opened at `cfg.fps`).
|
||||
let target_fps = cfg.fps.clamp(1, 240);
|
||||
let frame_interval = Duration::from_secs_f64(1.0 / target_fps as f64);
|
||||
let mut fps_count: u32 = 0;
|
||||
let mut fps_t = Instant::now();
|
||||
let stream_start = Instant::now();
|
||||
// Test knob: drop this % of outbound packets to exercise FEC recovery (0 = off).
|
||||
let drop_pct: u32 = std::env::var("PUNKTFUNK_VIDEO_DROP")
|
||||
.ok()
|
||||
.and_then(|v| v.parse().ok())
|
||||
.unwrap_or(0);
|
||||
let mut sent_batches: u64 = 0;
|
||||
let mut dropped_batches: u64 = 0;
|
||||
|
||||
// The send thread: one frame's batch at a time over a small bounded queue. Depth 2 means a
|
||||
// slow send can buffer one frame while the next encodes; beyond that the NEWEST batch is
|
||||
// dropped (the client recovers via FEC/RFI) rather than ever stalling the encode loop.
|
||||
let (batch_tx, batch_rx) = std::sync::mpsc::sync_channel::<PacketBatch>(2);
|
||||
spawn_sender(
|
||||
sock.try_clone().context("clone video socket")?,
|
||||
batch_rx,
|
||||
Duration::from_secs_f64(1.0 / target_fps as f64),
|
||||
running.clone(),
|
||||
drop_pct,
|
||||
)?;
|
||||
|
||||
// Per-stage timing (PUNKTFUNK_PERF=1): max µs/stage per second + unique vs re-encoded frames,
|
||||
// to pinpoint stalls. `unique` counts genuinely-new captured frames (vs re-encoded holds).
|
||||
let perf = std::env::var_os("PUNKTFUNK_PERF").is_some();
|
||||
let (mut mx_cap, mut mx_enc, mut mx_pkt, mut mx_send, mut mx_pkts, mut uniq) =
|
||||
(0u128, 0u128, 0u128, 0u128, 0usize, 0u32);
|
||||
// Absolute next-frame deadline — the single pacing clock for the loop.
|
||||
let mut next_frame = Instant::now();
|
||||
|
||||
while running.load(Ordering::SeqCst) {
|
||||
let tick = Instant::now();
|
||||
// Advance to the freshest captured frame if one arrived; otherwise reuse the last.
|
||||
if let Some(f) = capturer.try_latest().context("capture frame")? {
|
||||
frame = f;
|
||||
uniq += 1;
|
||||
}
|
||||
let t_cap = tick.elapsed();
|
||||
// Honor a client recovery request (RFI / request-IDR): force a keyframe so the client
|
||||
// resyncs immediately instead of waiting for the next GOP boundary.
|
||||
if force_idr.swap(false, Ordering::SeqCst) {
|
||||
enc.request_keyframe();
|
||||
}
|
||||
enc.submit(&frame).context("encoder submit")?;
|
||||
let t_enc = tick.elapsed();
|
||||
|
||||
// 90 kHz RTP timestamp from wall-clock, so a variable capture rate stays correct.
|
||||
let ts = (stream_start.elapsed().as_secs_f64() * 90_000.0) as u32;
|
||||
let mut batch: Vec<Vec<u8>> = Vec::new();
|
||||
while let Some(au) = enc.poll().context("encoder poll")? {
|
||||
let ft = if au.keyframe {
|
||||
FrameType::Idr
|
||||
} else {
|
||||
FrameType::P
|
||||
};
|
||||
batch.extend(pk.packetize(&au.data, ft, ts));
|
||||
}
|
||||
let t_pkt = tick.elapsed();
|
||||
|
||||
// Hand the frame's packets to the send thread; never block here. A full queue means
|
||||
// the sender is behind — drop this batch (FEC/RFI covers the client) and keep encoding.
|
||||
let n = batch.len();
|
||||
if n > 0 {
|
||||
match batch_tx.try_send(batch) {
|
||||
Ok(()) => sent_batches += 1,
|
||||
Err(std::sync::mpsc::TrySendError::Full(_)) => {
|
||||
dropped_batches += 1;
|
||||
if dropped_batches.is_power_of_two() {
|
||||
tracing::warn!(dropped_batches, "video: send queue full — frame dropped");
|
||||
}
|
||||
}
|
||||
Err(std::sync::mpsc::TrySendError::Disconnected(_)) => {
|
||||
break; // sender exited (client gone)
|
||||
}
|
||||
}
|
||||
}
|
||||
if perf {
|
||||
let t_send = tick.elapsed();
|
||||
mx_cap = mx_cap.max(t_cap.as_micros());
|
||||
mx_enc = mx_enc.max((t_enc - t_cap).as_micros());
|
||||
mx_pkt = mx_pkt.max((t_pkt - t_enc).as_micros());
|
||||
mx_send = mx_send.max((t_send - t_pkt).as_micros());
|
||||
mx_pkts = mx_pkts.max(n);
|
||||
}
|
||||
|
||||
fps_count += 1;
|
||||
if fps_t.elapsed() >= Duration::from_secs(1) {
|
||||
if perf {
|
||||
// Max µs/stage this second: cap=drain channel, enc=submit (zero-copy device
|
||||
// copy + NVENC), pkt=poll+FEC+packetize, send=paced packet send. `uniq`=new
|
||||
// captured frames (vs re-encoded). `pkts`=max packets in one frame (IDR spike).
|
||||
tracing::info!(
|
||||
fps = fps_count,
|
||||
uniq,
|
||||
enc_us = mx_enc,
|
||||
pkt_us = mx_pkt,
|
||||
send_us = mx_send,
|
||||
cap_us = mx_cap,
|
||||
max_pkts = mx_pkts,
|
||||
"video: streaming (perf)"
|
||||
);
|
||||
mx_cap = 0;
|
||||
mx_enc = 0;
|
||||
mx_pkt = 0;
|
||||
mx_send = 0;
|
||||
mx_pkts = 0;
|
||||
uniq = 0;
|
||||
} else {
|
||||
tracing::info!(
|
||||
fps = fps_count,
|
||||
sent_batches,
|
||||
dropped_batches,
|
||||
"video: streaming"
|
||||
);
|
||||
}
|
||||
fps_count = 0;
|
||||
fps_t = Instant::now();
|
||||
}
|
||||
// Single pacing authority: hold a steady cadence at the target rate from an absolute
|
||||
// clock. No double-sleep. If a slow frame put us behind, resync to now rather than
|
||||
// bursting to catch up.
|
||||
next_frame += frame_interval;
|
||||
match next_frame.checked_duration_since(Instant::now()) {
|
||||
Some(d) => std::thread::sleep(d),
|
||||
None => next_frame = Instant::now(),
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
/// End-to-end check of the send thread: batches pushed on the channel arrive, complete and
|
||||
/// byte-identical, at a peer socket via the paced sendmmsg path.
|
||||
#[test]
|
||||
fn sender_delivers_batches() {
|
||||
let rx_sock = UdpSocket::bind("127.0.0.1:0").unwrap();
|
||||
rx_sock
|
||||
.set_read_timeout(Some(Duration::from_secs(3)))
|
||||
.unwrap();
|
||||
let tx_sock = UdpSocket::bind("127.0.0.1:0").unwrap();
|
||||
tx_sock.connect(rx_sock.local_addr().unwrap()).unwrap();
|
||||
|
||||
let running = Arc::new(AtomicBool::new(true));
|
||||
let (tx, rx) = std::sync::mpsc::sync_channel::<PacketBatch>(2);
|
||||
spawn_sender(
|
||||
tx_sock,
|
||||
rx,
|
||||
Duration::from_millis(8), // ~120fps frame interval
|
||||
running.clone(),
|
||||
0,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
// 3 frames of 100 packets, content-tagged for verification.
|
||||
let mut sent = Vec::new();
|
||||
for f in 0..3u8 {
|
||||
let batch: PacketBatch = (0..100u8)
|
||||
.map(|i| {
|
||||
let mut p = vec![0u8; 1200];
|
||||
p[0] = f;
|
||||
p[1] = i;
|
||||
p
|
||||
})
|
||||
.collect();
|
||||
sent.extend(batch.iter().cloned());
|
||||
tx.send(batch).unwrap();
|
||||
}
|
||||
drop(tx); // sender drains then exits
|
||||
|
||||
let mut got = 0usize;
|
||||
let mut buf = [0u8; 2048];
|
||||
while got < sent.len() {
|
||||
let n = rx_sock.recv(&mut buf).expect("packet within timeout");
|
||||
assert_eq!(n, 1200);
|
||||
let (f, i) = (buf[0] as usize, buf[1] as usize);
|
||||
assert_eq!(&buf[..n], &sent[f * 100 + i][..], "payload intact");
|
||||
got += 1;
|
||||
}
|
||||
assert_eq!(got, 300);
|
||||
assert!(running.load(Ordering::SeqCst), "no spurious client-gone");
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,99 @@
|
||||
//! TLS for the HTTPS nvhttp port (47984). Moonlight does **mutual TLS** — it presents its
|
||||
//! client cert and expects the server to request one — so a plain server-auth config makes
|
||||
//! the post-pairing `pairchallenge` fail. This config requests the client cert and verifies
|
||||
//! the client owns its key, but (for now) accepts any well-formed cert; enforcing the
|
||||
//! paired allow-list (rejecting unpaired clients on /launch) is a follow-up hardening step.
|
||||
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use rustls::client::danger::HandshakeSignatureValid;
|
||||
use rustls::crypto::{verify_tls12_signature, verify_tls13_signature, CryptoProvider};
|
||||
use rustls::pki_types::{CertificateDer, UnixTime};
|
||||
use rustls::server::danger::{ClientCertVerified, ClientCertVerifier};
|
||||
use rustls::{DigitallySignedStruct, DistinguishedName, ServerConfig, SignatureScheme};
|
||||
use std::sync::Arc;
|
||||
|
||||
/// Requests + signature-checks the client cert but accepts any (the pairing handshake is
|
||||
/// the real proof). Pinning to the paired set is a hardening follow-up.
|
||||
#[derive(Debug)]
|
||||
struct AcceptAnyClientCert {
|
||||
provider: Arc<CryptoProvider>,
|
||||
}
|
||||
|
||||
impl ClientCertVerifier for AcceptAnyClientCert {
|
||||
fn offer_client_auth(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn client_auth_mandatory(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn root_hint_subjects(&self) -> &[DistinguishedName] {
|
||||
&[]
|
||||
}
|
||||
|
||||
fn verify_client_cert(
|
||||
&self,
|
||||
_end_entity: &CertificateDer,
|
||||
_intermediates: &[CertificateDer],
|
||||
_now: UnixTime,
|
||||
) -> Result<ClientCertVerified, rustls::Error> {
|
||||
Ok(ClientCertVerified::assertion())
|
||||
}
|
||||
|
||||
fn verify_tls12_signature(
|
||||
&self,
|
||||
message: &[u8],
|
||||
cert: &CertificateDer,
|
||||
dss: &DigitallySignedStruct,
|
||||
) -> Result<HandshakeSignatureValid, rustls::Error> {
|
||||
verify_tls12_signature(
|
||||
message,
|
||||
cert,
|
||||
dss,
|
||||
&self.provider.signature_verification_algorithms,
|
||||
)
|
||||
}
|
||||
|
||||
fn verify_tls13_signature(
|
||||
&self,
|
||||
message: &[u8],
|
||||
cert: &CertificateDer,
|
||||
dss: &DigitallySignedStruct,
|
||||
) -> Result<HandshakeSignatureValid, rustls::Error> {
|
||||
verify_tls13_signature(
|
||||
message,
|
||||
cert,
|
||||
dss,
|
||||
&self.provider.signature_verification_algorithms,
|
||||
)
|
||||
}
|
||||
|
||||
fn supported_verify_schemes(&self) -> Vec<SignatureScheme> {
|
||||
self.provider
|
||||
.signature_verification_algorithms
|
||||
.supported_schemes()
|
||||
}
|
||||
}
|
||||
|
||||
/// Build a mutual-TLS `ServerConfig` presenting the host cert/key.
|
||||
pub fn server_config(cert_pem: &str, key_pem: &str) -> Result<Arc<ServerConfig>> {
|
||||
let provider = Arc::new(rustls::crypto::aws_lc_rs::default_provider());
|
||||
let certs = rustls_pemfile::certs(&mut cert_pem.as_bytes())
|
||||
.collect::<std::result::Result<Vec<_>, _>>()
|
||||
.context("parse host cert PEM")?;
|
||||
let key = rustls_pemfile::private_key(&mut key_pem.as_bytes())
|
||||
.context("parse host key PEM")?
|
||||
.ok_or_else(|| anyhow!("no private key in host key PEM"))?;
|
||||
|
||||
let verifier = Arc::new(AcceptAnyClientCert {
|
||||
provider: provider.clone(),
|
||||
});
|
||||
let config = ServerConfig::builder_with_provider(provider)
|
||||
.with_safe_default_protocol_versions()
|
||||
.context("rustls protocol versions")?
|
||||
.with_client_cert_verifier(verifier)
|
||||
.with_single_cert(certs, key)
|
||||
.context("rustls server cert")?;
|
||||
Ok(Arc::new(config))
|
||||
}
|
||||
@@ -0,0 +1,312 @@
|
||||
//! GameStream video wire packetization: an encoded access unit → UDP datagrams a stock
|
||||
//! Moonlight client decodes (and recovers under loss). Each datagram is
|
||||
//! `RTP_PACKET(12, big-endian) + reserved[4] + NV_VIDEO_PACKET(16, little-endian) + payload`
|
||||
//! and the frame's bitstream is prefixed with an 8-byte `video_short_frame_header_t`, then
|
||||
//! striped into ≤4 FEC blocks of ≤255 shards. Byte-exact spec:
|
||||
//! `docs/research/gamestream-protocol-research.json` (video plane).
|
||||
//!
|
||||
//! FEC (P1.5): each block carries `m = ⌈k·pct/100⌉` Reed–Solomon parity shards generated by
|
||||
//! `punktfunk_core::fec::Gf8Coder` (the nanors-compatible Cauchy GF(2⁸) coder). Crucially, RS runs
|
||||
//! over the **whole `blocksize` shard** — Moonlight decodes over `packetSize + 16` bytes from
|
||||
//! the datagram start (`RtpVideoQueue.c`), and rejects a recovered shard whose reconstructed
|
||||
//! `flags` byte isn't valid — so the NV header fields RS must reproduce (streamPacketIndex,
|
||||
//! frameIndex, flags, multiFec*) are written into the data shards **before** encoding, and only
|
||||
//! the transport fields (RTP header/seq/timestamp + fecInfo) are stamped **after**, matching
|
||||
//! Sunshine `stream.cpp`. `pct = 0` falls back to data-shards-only. Plaintext (AES-GCM video
|
||||
//! encryption is negotiated off for now).
|
||||
|
||||
use punktfunk_core::fec::{ErasureCoder, Gf8Coder};
|
||||
|
||||
/// RTP `header` byte: version 2 (0x80) | extension (0x10) — Moonlight keys on the extension.
|
||||
const RTP_HEADER_BYTE: u8 = 0x80 | 0x10;
|
||||
const FLAG_PIC: u8 = 0x1;
|
||||
const FLAG_EOF: u8 = 0x2;
|
||||
const FLAG_SOF: u8 = 0x4;
|
||||
const MULTI_FEC_FLAGS: u8 = 0x10;
|
||||
const MAX_DATA_SHARDS_PER_BLOCK: usize = 255;
|
||||
const MAX_FEC_BLOCKS: usize = 4;
|
||||
/// Per-shard header: RTP(12) + reserved(4) + NV_VIDEO_PACKET(16).
|
||||
const SHARD_HEADER: usize = 32;
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub enum FrameType {
|
||||
Idr,
|
||||
P,
|
||||
}
|
||||
|
||||
/// Splits encoded access units into GameStream video datagrams (data + FEC parity shards).
|
||||
pub struct VideoPacketizer {
|
||||
/// Negotiated `packetSize` (ANNOUNCE `x-nv-video[0].packetSize`).
|
||||
packet_size: usize,
|
||||
/// Per-shard payload bytes = `blocksize - SHARD_HEADER`, `blocksize = packetSize + 16`.
|
||||
payload_per_shard: usize,
|
||||
/// Requested FEC overhead percent (0 = data shards only). The wire carries the recomputed
|
||||
/// per-block `(100·m)/k` so Moonlight derives the same parity count.
|
||||
fec_percentage: usize,
|
||||
/// Minimum parity shards per block (the client's `fec.minRequiredFecPackets`) — protects
|
||||
/// small frames whose `⌈k·pct/100⌉` would otherwise be just 1.
|
||||
min_fec: usize,
|
||||
frame_index: u32,
|
||||
/// Monotonic per-stream packet counter (the RTP sequence / streamPacketIndex source).
|
||||
seq: u32,
|
||||
}
|
||||
|
||||
impl VideoPacketizer {
|
||||
pub fn new(packet_size: usize, fec_percentage: u8, min_fec: u8) -> Self {
|
||||
VideoPacketizer {
|
||||
packet_size,
|
||||
payload_per_shard: packet_size + 16 - SHARD_HEADER,
|
||||
fec_percentage: fec_percentage as usize,
|
||||
min_fec: min_fec as usize,
|
||||
frame_index: 0,
|
||||
seq: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Packetize one encoded AU into wire datagrams (data shards + Cauchy RS parity shards).
|
||||
pub fn packetize(
|
||||
&mut self,
|
||||
au: &[u8],
|
||||
frame_type: FrameType,
|
||||
timestamp_90k: u32,
|
||||
) -> Vec<Vec<u8>> {
|
||||
let frame_index = self.frame_index;
|
||||
self.frame_index = self.frame_index.wrapping_add(1);
|
||||
let pps = self.payload_per_shard;
|
||||
let blocksize = SHARD_HEADER + pps; // = packet_size + 16
|
||||
let pct = self.fec_percentage;
|
||||
|
||||
// frame payload = 8-byte short frame header + the AU bitstream.
|
||||
let total_len = 8 + au.len();
|
||||
let last_payload_len = match total_len % pps {
|
||||
0 => pps,
|
||||
r => r,
|
||||
};
|
||||
let mut fp = Vec::with_capacity(total_len);
|
||||
fp.extend_from_slice(&short_frame_header(frame_type, last_payload_len as u16));
|
||||
fp.extend_from_slice(au);
|
||||
|
||||
let total_data = total_len.div_ceil(pps).max(1);
|
||||
// With parity, cap per-block data so k + m ≤ 255 (the GF(2⁸) ceiling): parity for k
|
||||
// data shards is ⌈k·pct/100⌉, so k ≤ 255·100/(100+pct).
|
||||
let max_data = if pct > 0 {
|
||||
(255 * 100) / (100 + pct)
|
||||
} else {
|
||||
MAX_DATA_SHARDS_PER_BLOCK
|
||||
};
|
||||
let n_blocks = total_data.div_ceil(max_data).clamp(1, MAX_FEC_BLOCKS);
|
||||
let per_block = total_data.div_ceil(n_blocks);
|
||||
|
||||
let mut packets = Vec::with_capacity(total_data + total_data * pct / 100 + n_blocks);
|
||||
for b in 0..n_blocks {
|
||||
let first = b * per_block;
|
||||
let last = ((b + 1) * per_block).min(total_data);
|
||||
if first >= last {
|
||||
break;
|
||||
}
|
||||
let k = last - first;
|
||||
let block_seq_base = self.seq;
|
||||
let multi_fec_blocks = ((b as u8) << 4) | (((n_blocks - 1) as u8) << 6);
|
||||
|
||||
// 1. Build this block's k data-shard datagrams (full `blocksize`), writing the NV
|
||||
// header fields RS must reproduce on recovery (streamPacketIndex, frameIndex,
|
||||
// flags, multiFec*). The RTP header + fecInfo are left zero (stamped post-RS).
|
||||
let mut shards: Vec<Vec<u8>> = Vec::with_capacity(k);
|
||||
for i in 0..k {
|
||||
let global = first + i;
|
||||
let seq = block_seq_base + i as u32;
|
||||
let mut buf = vec![0u8; blocksize];
|
||||
let mut flags = FLAG_PIC;
|
||||
if global == 0 {
|
||||
flags |= FLAG_SOF;
|
||||
}
|
||||
if global == total_data - 1 {
|
||||
flags |= FLAG_EOF;
|
||||
}
|
||||
buf[16..20].copy_from_slice(&(seq << 8).to_le_bytes()); // streamPacketIndex
|
||||
buf[20..24].copy_from_slice(&frame_index.to_le_bytes()); // frameIndex
|
||||
buf[24] = flags;
|
||||
buf[26] = MULTI_FEC_FLAGS;
|
||||
buf[27] = multi_fec_blocks;
|
||||
let ps = global * pps;
|
||||
let pe = (ps + pps).min(fp.len());
|
||||
buf[SHARD_HEADER..SHARD_HEADER + (pe - ps)].copy_from_slice(&fp[ps..pe]);
|
||||
shards.push(buf);
|
||||
}
|
||||
|
||||
// 2. m = ⌈k·pct/100⌉ parity shards (floored at the client's min, capped so k+m≤255)
|
||||
// over the full datagrams. The wire percentage is recomputed from m so the client
|
||||
// derives the same count.
|
||||
let m = if pct > 0 {
|
||||
(k * pct).div_ceil(100).max(self.min_fec).min(255 - k)
|
||||
} else {
|
||||
0
|
||||
};
|
||||
let wire_pct = if m > 0 { (100 * m) / k } else { 0 };
|
||||
let parity = if m > 0 {
|
||||
Gf8Coder.encode(&shards, m).unwrap_or_default()
|
||||
} else {
|
||||
Vec::new()
|
||||
};
|
||||
|
||||
// 3. Stamp transport headers (RTP + fecInfo) on every shard. We do NOT touch the
|
||||
// flags/streamPacketIndex bytes, so a recovered data shard's RS-reconstructed
|
||||
// NV header stays valid.
|
||||
self.seq = block_seq_base + k as u32;
|
||||
for (i, mut buf) in shards.into_iter().enumerate() {
|
||||
let seq = block_seq_base + i as u32;
|
||||
finalize(
|
||||
&mut buf,
|
||||
seq,
|
||||
timestamp_90k,
|
||||
frame_index,
|
||||
multi_fec_blocks,
|
||||
fec_info(k, i, wire_pct),
|
||||
);
|
||||
packets.push(buf);
|
||||
}
|
||||
for (j, mut buf) in parity.into_iter().enumerate() {
|
||||
let seq = self.seq;
|
||||
self.seq = self.seq.wrapping_add(1);
|
||||
finalize(
|
||||
&mut buf,
|
||||
seq,
|
||||
timestamp_90k,
|
||||
frame_index,
|
||||
multi_fec_blocks,
|
||||
fec_info(k, k + j, wire_pct),
|
||||
);
|
||||
packets.push(buf);
|
||||
}
|
||||
}
|
||||
packets
|
||||
}
|
||||
}
|
||||
|
||||
/// `fecInfo` (u32, little-endian): `dataShards<<22 | fecIndex<<12 | fecPercentage<<4`.
|
||||
fn fec_info(k: usize, fec_index: usize, pct: usize) -> u32 {
|
||||
((k as u32) << 22) | ((fec_index as u32) << 12) | ((pct as u32) << 4)
|
||||
}
|
||||
|
||||
/// Stamp the post-RS transport fields into a shard datagram (in place). Leaves the NV
|
||||
/// `flags`/`streamPacketIndex`/`multiFecFlags` bytes untouched (RS-covered).
|
||||
fn finalize(
|
||||
buf: &mut [u8],
|
||||
seq: u32,
|
||||
ts_90k: u32,
|
||||
frame_index: u32,
|
||||
multi_fec_blocks: u8,
|
||||
fec_info: u32,
|
||||
) {
|
||||
buf[0] = RTP_HEADER_BYTE; // header (version 2 + extension)
|
||||
buf[2..4].copy_from_slice(&(seq as u16).to_be_bytes()); // sequenceNumber (BE)
|
||||
buf[4..8].copy_from_slice(&ts_90k.to_be_bytes()); // timestamp (90 kHz, BE)
|
||||
buf[20..24].copy_from_slice(&frame_index.to_le_bytes()); // frameIndex (re-affirm for parity)
|
||||
buf[27] = multi_fec_blocks; // re-affirm for parity
|
||||
buf[28..32].copy_from_slice(&fec_info.to_le_bytes()); // fecInfo (LE)
|
||||
}
|
||||
|
||||
/// 8-byte `video_short_frame_header_t` (little-endian), prefixed to the AU bitstream.
|
||||
fn short_frame_header(frame_type: FrameType, last_payload_len: u16) -> [u8; 8] {
|
||||
let mut h = [0u8; 8];
|
||||
h[0] = 0x01; // headerType
|
||||
h[1..3].copy_from_slice(&0u16.to_le_bytes()); // frame_processing_latency
|
||||
h[3] = match frame_type {
|
||||
FrameType::Idr => 2,
|
||||
FrameType::P => 1,
|
||||
};
|
||||
h[4..6].copy_from_slice(&last_payload_len.to_le_bytes());
|
||||
// h[6..8] unknown = 0
|
||||
h
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn single_block_layout() {
|
||||
let mut pk = VideoPacketizer::new(1392, 0, 0); // data-only; pps = 1392+16-32 = 1376
|
||||
assert_eq!(pk.payload_per_shard, 1376);
|
||||
let au = vec![0xABu8; 4000]; // 8+4000 = 4008 → ceil(4008/1376) = 3 data shards
|
||||
let pkts = pk.packetize(&au, FrameType::Idr, 90_000);
|
||||
assert_eq!(pkts.len(), 3);
|
||||
for p in &pkts {
|
||||
assert_eq!(p.len(), SHARD_HEADER + 1376);
|
||||
assert_eq!(p[0], 0x90); // RTP header byte
|
||||
}
|
||||
let first = &pkts[0];
|
||||
assert_eq!(first[24] & FLAG_SOF, FLAG_SOF);
|
||||
assert_eq!(first[24] & FLAG_PIC, FLAG_PIC);
|
||||
let frame_index = u32::from_le_bytes(first[20..24].try_into().unwrap());
|
||||
assert_eq!(frame_index, 0);
|
||||
let fec_info = u32::from_le_bytes(first[28..32].try_into().unwrap());
|
||||
assert_eq!(fec_info >> 22, 3); // dataShards = 3
|
||||
assert_eq!((fec_info >> 12) & 0x3ff, 0); // fecIndex 0
|
||||
let last = &pkts[2];
|
||||
assert_eq!(last[24] & FLAG_EOF, FLAG_EOF);
|
||||
let fec_info_last = u32::from_le_bytes(last[28..32].try_into().unwrap());
|
||||
assert_eq!((fec_info_last >> 12) & 0x3ff, 2);
|
||||
for (i, p) in pkts.iter().enumerate() {
|
||||
assert_eq!(u16::from_be_bytes(p[2..4].try_into().unwrap()), i as u16);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn multi_block_split() {
|
||||
let mut pk = VideoPacketizer::new(1392, 0, 0); // data-only
|
||||
let au = vec![0u8; 600_000];
|
||||
let pkts = pk.packetize(&au, FrameType::P, 0);
|
||||
let total = (8 + au.len()).div_ceil(1376);
|
||||
assert_eq!(pkts.len(), total);
|
||||
let n_blocks = total.div_ceil(255).clamp(1, 4);
|
||||
let last_block = ((pkts.last().unwrap()[27]) >> 6) & 0x3;
|
||||
assert_eq!(last_block as usize, n_blocks - 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn emits_parity_shards() {
|
||||
let mut pk = VideoPacketizer::new(1392, 20, 0); // pps = 1376, 20% FEC
|
||||
let au = vec![0xABu8; 4000]; // 8+4000 = 4008 → 3 data shards (k=3)
|
||||
let pkts = pk.packetize(&au, FrameType::Idr, 0);
|
||||
// m = ceil(3*20/100) = 1 parity shard → 4 packets; wire_pct = 100*1/3 = 33.
|
||||
assert_eq!(pkts.len(), 4);
|
||||
for p in &pkts {
|
||||
let fec_info = u32::from_le_bytes(p[28..32].try_into().unwrap());
|
||||
assert_eq!(fec_info >> 22, 3); // dataShards = k = 3
|
||||
assert_eq!((fec_info >> 4) & 0xff, 33); // wire fecPercentage
|
||||
}
|
||||
// The parity shard is last: fecIndex = k = 3.
|
||||
let parity = &pkts[3];
|
||||
let fec_info = u32::from_le_bytes(parity[28..32].try_into().unwrap());
|
||||
assert_eq!((fec_info >> 12) & 0x3ff, 3);
|
||||
// Data shards keep SOF (first) / EOF (last data shard) / PIC.
|
||||
assert_eq!(pkts[0][24] & FLAG_SOF, FLAG_SOF);
|
||||
assert_eq!(pkts[2][24] & FLAG_EOF, FLAG_EOF);
|
||||
// RTP sequence numbers are contiguous across data + parity (0,1,2,3).
|
||||
for (i, p) in pkts.iter().enumerate() {
|
||||
assert_eq!(u16::from_be_bytes(p[2..4].try_into().unwrap()), i as u16);
|
||||
}
|
||||
}
|
||||
|
||||
/// End-to-end recovery: parity over the full datagram reconstructs a dropped data shard's
|
||||
/// payload AND its NV `flags` byte (the byte Moonlight validates), proving the layout.
|
||||
#[test]
|
||||
fn parity_recovers_full_datagram_incl_flags() {
|
||||
let mut pk = VideoPacketizer::new(1392, 50, 0); // high pct → plenty of parity
|
||||
let au = vec![0x5Au8; 4000]; // k = 3
|
||||
let pkts = pk.packetize(&au, FrameType::Idr, 0);
|
||||
let k = 3usize;
|
||||
let m = pkts.len() - k;
|
||||
assert!(m >= 1);
|
||||
// Drop data shard 1; reconstruct from the rest via the same Cauchy coder.
|
||||
let mut received: Vec<Option<Vec<u8>>> = pkts.iter().map(|p| Some(p.clone())).collect();
|
||||
received[1] = None;
|
||||
let recovered = Gf8Coder.reconstruct(k, m, &mut received).unwrap();
|
||||
// The recovered shard equals the original data shard's RS-covered bytes: its flags
|
||||
// byte (offset 24) is PIC (middle shard), proving the NV header recovers correctly.
|
||||
assert_eq!(recovered[1][24], FLAG_PIC);
|
||||
// ...and the payload region matches the original.
|
||||
assert_eq!(recovered[1][SHARD_HEADER..], pkts[1][SHARD_HEADER..]);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,274 @@
|
||||
//! Input injection (plan §4): turn client [`punktfunk_core::input::InputEvent`]s into host input.
|
||||
//!
|
||||
//! The headless Sway compositor runs with `WLR_LIBINPUT_NO_DEVICES=1`, so kernel `uinput`
|
||||
//! devices are never picked up. Instead we inject through the wlroots virtual-input Wayland
|
||||
//! protocols — `zwlr_virtual_pointer_manager_v1` + `zwp_virtual_keyboard_manager_v1` — which
|
||||
//! Sway always advertises. We connect as an ordinary Wayland client (the host process
|
||||
//! inherits Sway's `WAYLAND_DISPLAY`/`XDG_RUNTIME_DIR`), bind the two managers, and translate
|
||||
//! events into virtual pointer/keyboard requests. Keyboard codes are Linux evdev; we upload a
|
||||
//! standard evdev/US xkb keymap and track modifier state so the compositor resolves shifted
|
||||
//! keysyms correctly.
|
||||
|
||||
use anyhow::Result;
|
||||
use punktfunk_core::input::InputEvent;
|
||||
|
||||
/// Injects input events into the host session. Not `Send`: an injector owns compositor
|
||||
/// resources (a Wayland connection, an xkb state) and lives entirely on the control thread
|
||||
/// that creates it.
|
||||
pub trait InputInjector {
|
||||
fn inject(&mut self, event: &InputEvent) -> Result<()>;
|
||||
}
|
||||
|
||||
/// Preferred injection backend.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub enum Backend {
|
||||
/// wlroots virtual pointer + keyboard Wayland protocols — the headless-Sway path.
|
||||
WlrVirtual,
|
||||
/// libei via `reis` — Wayland-native (RemoteDesktop portal). Not yet implemented.
|
||||
Libei,
|
||||
/// libei directly against gamescope's own EIS socket (no portal): input lands in the
|
||||
/// nested game — the SteamOS-like session.
|
||||
GamescopeEi,
|
||||
/// `/dev/uinput` — universal fallback (but invisible to `WLR_LIBINPUT_NO_DEVICES=1`).
|
||||
Uinput,
|
||||
}
|
||||
|
||||
pub fn open(backend: Backend) -> Result<Box<dyn InputInjector>> {
|
||||
match backend {
|
||||
Backend::WlrVirtual => {
|
||||
#[cfg(target_os = "linux")]
|
||||
{
|
||||
Ok(Box::new(wlr::WlrootsInjector::open()?))
|
||||
}
|
||||
#[cfg(not(target_os = "linux"))]
|
||||
{
|
||||
anyhow::bail!("wlroots virtual input requires Linux + a Wayland compositor")
|
||||
}
|
||||
}
|
||||
Backend::Libei => {
|
||||
#[cfg(target_os = "linux")]
|
||||
{
|
||||
Ok(Box::new(libei::LibeiInjector::open()?))
|
||||
}
|
||||
#[cfg(not(target_os = "linux"))]
|
||||
{
|
||||
anyhow::bail!("libei input requires Linux + a RemoteDesktop portal")
|
||||
}
|
||||
}
|
||||
Backend::GamescopeEi => {
|
||||
#[cfg(target_os = "linux")]
|
||||
{
|
||||
Ok(Box::new(libei::LibeiInjector::open_with(
|
||||
libei::EiSource::SocketPathFile(
|
||||
crate::vdisplay::gamescope_ei_socket_file().into(),
|
||||
),
|
||||
)?))
|
||||
}
|
||||
#[cfg(not(target_os = "linux"))]
|
||||
{
|
||||
anyhow::bail!("gamescope EIS input requires Linux")
|
||||
}
|
||||
}
|
||||
other => anyhow::bail!("injection backend {other:?} not implemented"),
|
||||
}
|
||||
}
|
||||
|
||||
/// Pick the injection backend for the current session. gamescope hosts its own EIS server (no
|
||||
/// portal), so a gamescope session injects directly into it. wlroots/Sway only implements the
|
||||
/// ScreenCast portal (no RemoteDesktop), so libei can't run there — use the wlr virtual-input
|
||||
/// protocols. KWin and GNOME implement RemoteDesktop but not the wlr protocols, so use libei.
|
||||
/// `PUNKTFUNK_INPUT_BACKEND=wlr|libei|gamescope|uinput` overrides the auto-detection.
|
||||
pub fn default_backend() -> Backend {
|
||||
if let Ok(v) = std::env::var("PUNKTFUNK_INPUT_BACKEND") {
|
||||
match v.trim().to_ascii_lowercase().as_str() {
|
||||
"wlr" | "wlroots" | "wlrvirtual" => return Backend::WlrVirtual,
|
||||
"libei" | "ei" | "portal" => return Backend::Libei,
|
||||
"gamescope" | "gamescope-ei" => return Backend::GamescopeEi,
|
||||
"uinput" => return Backend::Uinput,
|
||||
other => tracing::warn!(
|
||||
value = other,
|
||||
"unknown PUNKTFUNK_INPUT_BACKEND — auto-detecting"
|
||||
),
|
||||
}
|
||||
}
|
||||
if std::env::var("PUNKTFUNK_COMPOSITOR")
|
||||
.is_ok_and(|v| v.trim().eq_ignore_ascii_case("gamescope"))
|
||||
{
|
||||
return Backend::GamescopeEi;
|
||||
}
|
||||
let desktop = std::env::var("XDG_CURRENT_DESKTOP").unwrap_or_default();
|
||||
let d = desktop.to_ascii_uppercase();
|
||||
if d.contains("KDE") || d.contains("GNOME") {
|
||||
Backend::Libei
|
||||
} else {
|
||||
Backend::WlrVirtual
|
||||
}
|
||||
}
|
||||
|
||||
/// Map a Windows Virtual-Key code (as sent by Moonlight/GameStream) to a Linux evdev key code.
|
||||
pub fn vk_to_evdev(vk: u8) -> Option<u16> {
|
||||
match vk {
|
||||
// --- Navigation / editing / whitespace ---
|
||||
0x08 => Some(14), // VK_BACK -> KEY_BACKSPACE
|
||||
0x09 => Some(15), // VK_TAB -> KEY_TAB
|
||||
0x0D => Some(28), // VK_RETURN -> KEY_ENTER
|
||||
0x13 => Some(119), // VK_PAUSE -> KEY_PAUSE
|
||||
0x14 => Some(58), // VK_CAPITAL -> KEY_CAPSLOCK
|
||||
0x1B => Some(1), // VK_ESCAPE -> KEY_ESC
|
||||
0x20 => Some(57), // VK_SPACE -> KEY_SPACE
|
||||
0x21 => Some(104), // VK_PRIOR -> KEY_PAGEUP
|
||||
0x22 => Some(109), // VK_NEXT -> KEY_PAGEDOWN
|
||||
0x23 => Some(107), // VK_END -> KEY_END
|
||||
0x24 => Some(102), // VK_HOME -> KEY_HOME
|
||||
0x25 => Some(105), // VK_LEFT -> KEY_LEFT
|
||||
0x26 => Some(103), // VK_UP -> KEY_UP
|
||||
0x27 => Some(106), // VK_RIGHT -> KEY_RIGHT
|
||||
0x28 => Some(108), // VK_DOWN -> KEY_DOWN
|
||||
0x2C => Some(99), // VK_SNAPSHOT -> KEY_SYSRQ
|
||||
0x2D => Some(110), // VK_INSERT -> KEY_INSERT
|
||||
0x2E => Some(111), // VK_DELETE -> KEY_DELETE
|
||||
|
||||
// --- Generic modifiers ---
|
||||
0x10 => Some(42), // VK_SHIFT -> KEY_LEFTSHIFT
|
||||
0x11 => Some(29), // VK_CONTROL -> KEY_LEFTCTRL
|
||||
0x12 => Some(56), // VK_MENU -> KEY_LEFTALT
|
||||
|
||||
// --- Digit row (KEY_0 is 11, KEY_1..KEY_9 are 2..10) ---
|
||||
0x30 => Some(11), // VK_0
|
||||
0x31 => Some(2), // VK_1
|
||||
0x32 => Some(3), // VK_2
|
||||
0x33 => Some(4), // VK_3
|
||||
0x34 => Some(5), // VK_4
|
||||
0x35 => Some(6), // VK_5
|
||||
0x36 => Some(7), // VK_6
|
||||
0x37 => Some(8), // VK_7
|
||||
0x38 => Some(9), // VK_8
|
||||
0x39 => Some(10), // VK_9
|
||||
|
||||
// --- Letters A-Z (NOT sequential in evdev) ---
|
||||
0x41 => Some(30), // A
|
||||
0x42 => Some(48), // B
|
||||
0x43 => Some(46), // C
|
||||
0x44 => Some(32), // D
|
||||
0x45 => Some(18), // E
|
||||
0x46 => Some(33), // F
|
||||
0x47 => Some(34), // G
|
||||
0x48 => Some(35), // H
|
||||
0x49 => Some(23), // I
|
||||
0x4A => Some(36), // J
|
||||
0x4B => Some(37), // K
|
||||
0x4C => Some(38), // L
|
||||
0x4D => Some(50), // M
|
||||
0x4E => Some(49), // N
|
||||
0x4F => Some(24), // O
|
||||
0x50 => Some(25), // P
|
||||
0x51 => Some(16), // Q
|
||||
0x52 => Some(19), // R
|
||||
0x53 => Some(31), // S
|
||||
0x54 => Some(20), // T
|
||||
0x55 => Some(22), // U
|
||||
0x56 => Some(47), // V
|
||||
0x57 => Some(17), // W
|
||||
0x58 => Some(45), // X
|
||||
0x59 => Some(21), // Y
|
||||
0x5A => Some(44), // Z
|
||||
|
||||
// --- Meta / context-menu ---
|
||||
0x5B => Some(125), // VK_LWIN -> KEY_LEFTMETA
|
||||
0x5C => Some(126), // VK_RWIN -> KEY_RIGHTMETA
|
||||
0x5D => Some(127), // VK_APPS -> KEY_COMPOSE
|
||||
|
||||
// --- Numpad ---
|
||||
0x60 => Some(82), // KP0
|
||||
0x61 => Some(79), // KP1
|
||||
0x62 => Some(80), // KP2
|
||||
0x63 => Some(81), // KP3
|
||||
0x64 => Some(75), // KP4
|
||||
0x65 => Some(76), // KP5
|
||||
0x66 => Some(77), // KP6
|
||||
0x67 => Some(71), // KP7
|
||||
0x68 => Some(72), // KP8
|
||||
0x69 => Some(73), // KP9
|
||||
0x6A => Some(55), // VK_MULTIPLY -> KEY_KPASTERISK
|
||||
0x6B => Some(78), // VK_ADD -> KEY_KPPLUS
|
||||
0x6C => Some(96), // VK_SEPARATOR -> KEY_KPENTER
|
||||
0x6D => Some(74), // VK_SUBTRACT -> KEY_KPMINUS
|
||||
0x6E => Some(83), // VK_DECIMAL -> KEY_KPDOT
|
||||
0x6F => Some(98), // VK_DIVIDE -> KEY_KPSLASH
|
||||
|
||||
// --- Function keys (F1..F10 = 59..68, F11/F12 = 87/88) ---
|
||||
0x70 => Some(59),
|
||||
0x71 => Some(60),
|
||||
0x72 => Some(61),
|
||||
0x73 => Some(62),
|
||||
0x74 => Some(63),
|
||||
0x75 => Some(64),
|
||||
0x76 => Some(65),
|
||||
0x77 => Some(66),
|
||||
0x78 => Some(67),
|
||||
0x79 => Some(68),
|
||||
0x7A => Some(87),
|
||||
0x7B => Some(88),
|
||||
|
||||
// --- Locks ---
|
||||
0x90 => Some(69), // VK_NUMLOCK -> KEY_NUMLOCK
|
||||
0x91 => Some(70), // VK_SCROLL -> KEY_SCROLLLOCK
|
||||
|
||||
// --- Left/right modifiers ---
|
||||
0xA0 => Some(42), // VK_LSHIFT -> KEY_LEFTSHIFT
|
||||
0xA1 => Some(54), // VK_RSHIFT -> KEY_RIGHTSHIFT
|
||||
0xA2 => Some(29), // VK_LCONTROL -> KEY_LEFTCTRL
|
||||
0xA3 => Some(97), // VK_RCONTROL -> KEY_RIGHTCTRL
|
||||
0xA4 => Some(56), // VK_LMENU -> KEY_LEFTALT
|
||||
0xA5 => Some(100), // VK_RMENU -> KEY_RIGHTALT
|
||||
|
||||
// --- OEM punctuation (US layout) ---
|
||||
0xBA => Some(39), // VK_OEM_1 -> KEY_SEMICOLON
|
||||
0xBB => Some(13), // VK_OEM_PLUS -> KEY_EQUAL
|
||||
0xBC => Some(51), // VK_OEM_COMMA -> KEY_COMMA
|
||||
0xBD => Some(12), // VK_OEM_MINUS -> KEY_MINUS
|
||||
0xBE => Some(52), // VK_OEM_PERIOD -> KEY_DOT
|
||||
0xBF => Some(53), // VK_OEM_2 -> KEY_SLASH
|
||||
0xC0 => Some(41), // VK_OEM_3 -> KEY_GRAVE
|
||||
0xDB => Some(26), // VK_OEM_4 -> KEY_LEFTBRACE
|
||||
0xDC => Some(43), // VK_OEM_5 -> KEY_BACKSLASH
|
||||
0xDD => Some(27), // VK_OEM_6 -> KEY_RIGHTBRACE
|
||||
0xDE => Some(40), // VK_OEM_7 -> KEY_APOSTROPHE
|
||||
0xE2 => Some(86), // VK_OEM_102 -> KEY_102ND
|
||||
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Map a GameStream mouse button id (1=left … 5=X2) to a Linux evdev `BTN_*` code.
|
||||
#[cfg(target_os = "linux")]
|
||||
fn gs_button_to_evdev(b: u32) -> Option<u32> {
|
||||
Some(match b {
|
||||
1 => 0x110, // BTN_LEFT
|
||||
2 => 0x112, // BTN_MIDDLE
|
||||
3 => 0x111, // BTN_RIGHT
|
||||
4 => 0x113, // BTN_SIDE (X1)
|
||||
5 => 0x114, // BTN_EXTRA (X2)
|
||||
_ => return None,
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
pub mod gamepad;
|
||||
/// Stub — virtual gamepads need Linux uinput; events are dropped elsewhere.
|
||||
#[cfg(not(target_os = "linux"))]
|
||||
pub mod gamepad {
|
||||
#[derive(Default)]
|
||||
pub struct GamepadManager;
|
||||
impl GamepadManager {
|
||||
pub fn new() -> Self {
|
||||
GamepadManager
|
||||
}
|
||||
pub fn handle(&mut self, _ev: &crate::gamestream::gamepad::GamepadEvent) {}
|
||||
pub fn pump_rumble(&mut self, _send: impl FnMut(u16, u16, u16)) {}
|
||||
}
|
||||
}
|
||||
#[cfg(target_os = "linux")]
|
||||
mod libei;
|
||||
#[cfg(target_os = "linux")]
|
||||
mod wlr;
|
||||
@@ -0,0 +1,515 @@
|
||||
//! Virtual gamepads via `/dev/uinput`, cloning the kernel `xpad` identity ("Microsoft X-Box
|
||||
//! 360 pad", `045e:028e`) so SDL/Steam/Proton match their built-in mapping with zero
|
||||
//! configuration — exactly what Sunshine emulates. One [`VirtualPad`] per attached client
|
||||
//! controller, managed by [`GamepadManager`] from decoded
|
||||
//! [`GamepadFrame`](crate::gamestream::gamepad::GamepadFrame)s.
|
||||
//!
|
||||
//! Rumble flows the *other* way on the same fd: games upload force-feedback effects
|
||||
//! (`EV_UINPUT`/`UI_FF_UPLOAD` → `UI_BEGIN/END_FF_UPLOAD` ioctls) and trigger them with
|
||||
//! `EV_FF` writes; [`GamepadManager::pump_rumble`] services that protocol non-blockingly
|
||||
//! (the control thread calls it every tick) and reports mixed `(low, high)` motor levels for
|
||||
//! the host to send to the client. Note: a game's `EVIOCSFF` ioctl BLOCKS until we answer
|
||||
//! `UI_END_FF_UPLOAD`, so the pump must run regularly.
|
||||
//!
|
||||
//! All ioctl numbers/struct layouts below were verified against this generation's
|
||||
//! `<linux/uinput.h>` on x86_64. `/dev/uinput` needs a udev rule + `input` group membership
|
||||
//! (see `scripts/60-punktfunk.rules`); creation fails with a clear error otherwise.
|
||||
|
||||
use crate::gamestream::gamepad::{self, GamepadFrame, MAX_PADS};
|
||||
use anyhow::{bail, Result};
|
||||
use std::collections::HashMap;
|
||||
use std::os::fd::{AsRawFd, OwnedFd};
|
||||
use std::time::Instant;
|
||||
|
||||
// ioctls (x86_64).
|
||||
const UI_DEV_CREATE: libc::c_ulong = 0x5501;
|
||||
const UI_DEV_DESTROY: libc::c_ulong = 0x5502;
|
||||
const UI_DEV_SETUP: libc::c_ulong = 0x405c_5503;
|
||||
const UI_ABS_SETUP: libc::c_ulong = 0x401c_5504;
|
||||
const UI_SET_EVBIT: libc::c_ulong = 0x4004_5564;
|
||||
const UI_SET_KEYBIT: libc::c_ulong = 0x4004_5565;
|
||||
const UI_SET_FFBIT: libc::c_ulong = 0x4004_556b;
|
||||
const UI_BEGIN_FF_UPLOAD: libc::c_ulong = 0xc068_55c8;
|
||||
const UI_END_FF_UPLOAD: libc::c_ulong = 0x4068_55c9;
|
||||
const UI_BEGIN_FF_ERASE: libc::c_ulong = 0xc00c_55ca;
|
||||
const UI_END_FF_ERASE: libc::c_ulong = 0x400c_55cb;
|
||||
|
||||
// Event types/codes.
|
||||
const EV_SYN: u16 = 0x00;
|
||||
const EV_KEY: u16 = 0x01;
|
||||
const EV_ABS: u16 = 0x03;
|
||||
const EV_FF: u16 = 0x15;
|
||||
const EV_UINPUT: u16 = 0x0101;
|
||||
const SYN_REPORT: u16 = 0;
|
||||
const UI_FF_UPLOAD: u16 = 1;
|
||||
const UI_FF_ERASE: u16 = 2;
|
||||
const FF_RUMBLE: u16 = 0x50;
|
||||
const FF_GAIN: u16 = 0x60;
|
||||
|
||||
const ABS_X: u16 = 0x00;
|
||||
const ABS_Y: u16 = 0x01;
|
||||
const ABS_Z: u16 = 0x02;
|
||||
const ABS_RX: u16 = 0x03;
|
||||
const ABS_RY: u16 = 0x04;
|
||||
const ABS_RZ: u16 = 0x05;
|
||||
const ABS_HAT0X: u16 = 0x10;
|
||||
const ABS_HAT0Y: u16 = 0x11;
|
||||
|
||||
const BTN_SOUTH: u16 = 0x130; // A
|
||||
const BTN_EAST: u16 = 0x131; // B
|
||||
const BTN_NORTH: u16 = 0x133; // X (kernel calls it BTN_NORTH/BTN_X)
|
||||
const BTN_WEST: u16 = 0x134; // Y
|
||||
const BTN_TL: u16 = 0x136;
|
||||
const BTN_TR: u16 = 0x137;
|
||||
const BTN_SELECT: u16 = 0x13a;
|
||||
const BTN_START: u16 = 0x13b;
|
||||
const BTN_MODE: u16 = 0x13c;
|
||||
const BTN_THUMBL: u16 = 0x13d;
|
||||
const BTN_THUMBR: u16 = 0x13e;
|
||||
|
||||
/// `(GameStream button bit, evdev key code)` — D-pad is emitted as HAT axes instead.
|
||||
const BUTTON_MAP: [(u32, u16); 11] = [
|
||||
(gamepad::BTN_A, BTN_SOUTH),
|
||||
(gamepad::BTN_B, BTN_EAST),
|
||||
(gamepad::BTN_X, BTN_NORTH),
|
||||
(gamepad::BTN_Y, BTN_WEST),
|
||||
(gamepad::BTN_LB, BTN_TL),
|
||||
(gamepad::BTN_RB, BTN_TR),
|
||||
(gamepad::BTN_BACK, BTN_SELECT),
|
||||
(gamepad::BTN_START, BTN_START),
|
||||
(gamepad::BTN_GUIDE, BTN_MODE),
|
||||
(gamepad::BTN_LS_CLK, BTN_THUMBL),
|
||||
(gamepad::BTN_RS_CLK, BTN_THUMBR),
|
||||
];
|
||||
|
||||
#[repr(C)]
|
||||
struct InputId {
|
||||
bustype: u16,
|
||||
vendor: u16,
|
||||
product: u16,
|
||||
version: u16,
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
struct UinputSetup {
|
||||
id: InputId,
|
||||
name: [u8; 80],
|
||||
ff_effects_max: u32,
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Default, Clone, Copy)]
|
||||
struct AbsInfo {
|
||||
value: i32,
|
||||
minimum: i32,
|
||||
maximum: i32,
|
||||
fuzz: i32,
|
||||
flat: i32,
|
||||
resolution: i32,
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
struct UinputAbsSetup {
|
||||
code: u16,
|
||||
_pad: u16,
|
||||
absinfo: AbsInfo,
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Clone, Copy)]
|
||||
struct InputEventRaw {
|
||||
time: libc::timeval,
|
||||
type_: u16,
|
||||
code: u16,
|
||||
value: i32,
|
||||
}
|
||||
|
||||
/// `struct ff_effect` (48 bytes; the union starts 8-aligned at offset 16).
|
||||
#[repr(C)]
|
||||
#[derive(Clone, Copy)]
|
||||
struct FfEffect {
|
||||
type_: u16,
|
||||
id: i16,
|
||||
direction: u16,
|
||||
trigger_button: u16,
|
||||
trigger_interval: u16,
|
||||
replay_length: u16,
|
||||
replay_delay: u16,
|
||||
_pad: u16,
|
||||
/// Union; for `FF_RUMBLE`: `u16 strong_magnitude` at [0..2], `u16 weak_magnitude` at [2..4].
|
||||
u: [u8; 32],
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Clone, Copy)]
|
||||
struct UinputFfUpload {
|
||||
request_id: u32,
|
||||
retval: i32,
|
||||
effect: FfEffect,
|
||||
old: FfEffect,
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Clone, Copy)]
|
||||
struct UinputFfErase {
|
||||
request_id: u32,
|
||||
retval: i32,
|
||||
effect_id: u32,
|
||||
}
|
||||
|
||||
// Layouts verified by compiling a probe against this generation's <linux/uinput.h> (x86_64).
|
||||
const _: () = {
|
||||
assert!(std::mem::size_of::<UinputSetup>() == 92);
|
||||
assert!(std::mem::size_of::<UinputAbsSetup>() == 28);
|
||||
assert!(std::mem::size_of::<InputEventRaw>() == 24);
|
||||
assert!(std::mem::size_of::<FfEffect>() == 48);
|
||||
assert!(std::mem::size_of::<UinputFfUpload>() == 104);
|
||||
assert!(std::mem::size_of::<UinputFfErase>() == 12);
|
||||
};
|
||||
|
||||
fn ioctl_int(fd: i32, req: libc::c_ulong, arg: libc::c_int, what: &str) -> Result<()> {
|
||||
if unsafe { libc::ioctl(fd, req, arg) } < 0 {
|
||||
bail!("{what}: {}", std::io::Error::last_os_error());
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn ioctl_ptr<T>(fd: i32, req: libc::c_ulong, arg: *mut T, what: &str) -> Result<()> {
|
||||
if unsafe { libc::ioctl(fd, req, arg) } < 0 {
|
||||
bail!("{what}: {}", std::io::Error::last_os_error());
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// One FF effect a game uploaded: rumble magnitudes + playback state.
|
||||
struct Effect {
|
||||
strong: u16,
|
||||
weak: u16,
|
||||
/// `Some(deadline)` while playing (replay length 0 = until stopped).
|
||||
playing: Option<Option<Instant>>,
|
||||
replay_ms: u16,
|
||||
}
|
||||
|
||||
/// One virtual X-Box-360 pad backed by a uinput device.
|
||||
pub struct VirtualPad {
|
||||
fd: OwnedFd,
|
||||
prev_buttons: u32,
|
||||
effects: HashMap<i16, Effect>,
|
||||
next_effect_id: i16,
|
||||
gain: u32,
|
||||
/// Last `(low, high)` reported, to dedup.
|
||||
last_mix: (u16, u16),
|
||||
}
|
||||
|
||||
impl VirtualPad {
|
||||
pub fn create(index: usize) -> Result<VirtualPad> {
|
||||
use std::os::fd::FromRawFd;
|
||||
let raw = unsafe {
|
||||
libc::open(
|
||||
c"/dev/uinput".as_ptr(),
|
||||
libc::O_RDWR | libc::O_NONBLOCK | libc::O_CLOEXEC,
|
||||
)
|
||||
};
|
||||
if raw < 0 {
|
||||
bail!(
|
||||
"open /dev/uinput: {} (install the udev rule granting the 'input' group access \
|
||||
— see scripts/60-punktfunk.rules — and add the user to the 'input' group)",
|
||||
std::io::Error::last_os_error()
|
||||
);
|
||||
}
|
||||
let fd = unsafe { OwnedFd::from_raw_fd(raw) };
|
||||
|
||||
ioctl_int(raw, UI_SET_EVBIT, EV_KEY as i32, "UI_SET_EVBIT(EV_KEY)")?;
|
||||
ioctl_int(raw, UI_SET_EVBIT, EV_ABS as i32, "UI_SET_EVBIT(EV_ABS)")?;
|
||||
ioctl_int(raw, UI_SET_EVBIT, EV_FF as i32, "UI_SET_EVBIT(EV_FF)")?;
|
||||
for (_, key) in BUTTON_MAP {
|
||||
ioctl_int(raw, UI_SET_KEYBIT, key as i32, "UI_SET_KEYBIT")?;
|
||||
}
|
||||
ioctl_int(
|
||||
raw,
|
||||
UI_SET_FFBIT,
|
||||
FF_RUMBLE as i32,
|
||||
"UI_SET_FFBIT(FF_RUMBLE)",
|
||||
)?;
|
||||
ioctl_int(raw, UI_SET_FFBIT, FF_GAIN as i32, "UI_SET_FFBIT(FF_GAIN)")?;
|
||||
|
||||
let stick = AbsInfo {
|
||||
minimum: -32768,
|
||||
maximum: 32767,
|
||||
fuzz: 16,
|
||||
flat: 128,
|
||||
..Default::default()
|
||||
};
|
||||
let trigger = AbsInfo {
|
||||
minimum: 0,
|
||||
maximum: 255,
|
||||
..Default::default()
|
||||
};
|
||||
let hat = AbsInfo {
|
||||
minimum: -1,
|
||||
maximum: 1,
|
||||
..Default::default()
|
||||
};
|
||||
for (code, info) in [
|
||||
(ABS_X, stick),
|
||||
(ABS_Y, stick),
|
||||
(ABS_RX, stick),
|
||||
(ABS_RY, stick),
|
||||
(ABS_Z, trigger),
|
||||
(ABS_RZ, trigger),
|
||||
(ABS_HAT0X, hat),
|
||||
(ABS_HAT0Y, hat),
|
||||
] {
|
||||
let mut a = UinputAbsSetup {
|
||||
code,
|
||||
_pad: 0,
|
||||
absinfo: info,
|
||||
};
|
||||
ioctl_ptr(raw, UI_ABS_SETUP, &mut a, "UI_ABS_SETUP")?;
|
||||
}
|
||||
|
||||
// The xpad identity: SDL keys its built-in mapping off bustype/vendor/product/version.
|
||||
let mut setup = UinputSetup {
|
||||
id: InputId {
|
||||
bustype: 0x0003, // BUS_USB
|
||||
vendor: 0x045e,
|
||||
product: 0x028e,
|
||||
version: 0x0110,
|
||||
},
|
||||
name: [0; 80],
|
||||
ff_effects_max: 16, // must be > 0 or FF uploads are never delivered
|
||||
};
|
||||
let name = b"Microsoft X-Box 360 pad";
|
||||
setup.name[..name.len()].copy_from_slice(name);
|
||||
ioctl_ptr(raw, UI_DEV_SETUP, &mut setup, "UI_DEV_SETUP")?;
|
||||
ioctl_int(raw, UI_DEV_CREATE, 0, "UI_DEV_CREATE")?;
|
||||
tracing::info!(index, "virtual gamepad created (X-Box 360 pad via uinput)");
|
||||
|
||||
Ok(VirtualPad {
|
||||
fd,
|
||||
prev_buttons: 0,
|
||||
effects: HashMap::new(),
|
||||
next_effect_id: 0,
|
||||
gain: 0xFFFF,
|
||||
last_mix: (0, 0),
|
||||
})
|
||||
}
|
||||
|
||||
fn emit(&self, type_: u16, code: u16, value: i32) {
|
||||
let ev = InputEventRaw {
|
||||
time: libc::timeval {
|
||||
tv_sec: 0,
|
||||
tv_usec: 0,
|
||||
},
|
||||
type_,
|
||||
code,
|
||||
value,
|
||||
};
|
||||
let bytes = unsafe {
|
||||
std::slice::from_raw_parts(
|
||||
&ev as *const _ as *const u8,
|
||||
std::mem::size_of::<InputEventRaw>(),
|
||||
)
|
||||
};
|
||||
// Best-effort: a full kernel queue drops the event; the next frame re-syncs state.
|
||||
let _ = unsafe {
|
||||
libc::write(
|
||||
self.fd.as_raw_fd(),
|
||||
bytes.as_ptr() as *const libc::c_void,
|
||||
bytes.len(),
|
||||
)
|
||||
};
|
||||
}
|
||||
|
||||
/// Apply one decoded frame: button transitions, axes, D-pad hat, one SYN_REPORT.
|
||||
pub fn apply(&mut self, f: &GamepadFrame) {
|
||||
let changed = self.prev_buttons ^ f.buttons;
|
||||
for (bit, key) in BUTTON_MAP {
|
||||
if changed & bit != 0 {
|
||||
self.emit(EV_KEY, key, ((f.buttons & bit) != 0) as i32);
|
||||
}
|
||||
}
|
||||
self.prev_buttons = f.buttons;
|
||||
|
||||
// Moonlight: +Y = up; evdev: +Y = down → negate (i32 math avoids -(-32768) overflow).
|
||||
self.emit(EV_ABS, ABS_X, f.ls_x as i32);
|
||||
self.emit(EV_ABS, ABS_Y, -(f.ls_y as i32));
|
||||
self.emit(EV_ABS, ABS_RX, f.rs_x as i32);
|
||||
self.emit(EV_ABS, ABS_RY, -(f.rs_y as i32));
|
||||
self.emit(EV_ABS, ABS_Z, f.left_trigger as i32);
|
||||
self.emit(EV_ABS, ABS_RZ, f.right_trigger as i32);
|
||||
let hat_x = ((f.buttons & gamepad::BTN_DPAD_RIGHT != 0) as i32)
|
||||
- ((f.buttons & gamepad::BTN_DPAD_LEFT != 0) as i32);
|
||||
let hat_y = ((f.buttons & gamepad::BTN_DPAD_DOWN != 0) as i32)
|
||||
- ((f.buttons & gamepad::BTN_DPAD_UP != 0) as i32);
|
||||
self.emit(EV_ABS, ABS_HAT0X, hat_x);
|
||||
self.emit(EV_ABS, ABS_HAT0Y, hat_y);
|
||||
self.emit(EV_SYN, SYN_REPORT, 0);
|
||||
}
|
||||
|
||||
/// Service the FF protocol on this pad's fd (non-blocking). Returns the new mixed
|
||||
/// `(low, high)` motor levels if they changed since last call.
|
||||
fn pump_ff(&mut self) -> Option<(u16, u16)> {
|
||||
let raw = self.fd.as_raw_fd();
|
||||
let mut buf = [0u8; std::mem::size_of::<InputEventRaw>()];
|
||||
loop {
|
||||
let n = unsafe { libc::read(raw, buf.as_mut_ptr() as *mut libc::c_void, buf.len()) };
|
||||
if n != buf.len() as isize {
|
||||
break; // EAGAIN / short read — queue drained
|
||||
}
|
||||
let ev: InputEventRaw = unsafe { std::ptr::read(buf.as_ptr() as *const _) };
|
||||
match (ev.type_, ev.code) {
|
||||
(EV_UINPUT, UI_FF_UPLOAD) => {
|
||||
let mut up: UinputFfUpload = unsafe { std::mem::zeroed() };
|
||||
up.request_id = ev.value as u32;
|
||||
if ioctl_ptr(raw, UI_BEGIN_FF_UPLOAD, &mut up, "UI_BEGIN_FF_UPLOAD").is_ok() {
|
||||
let mut e = up.effect;
|
||||
if e.id == -1 {
|
||||
e.id = self.next_effect_id;
|
||||
self.next_effect_id = self.next_effect_id.wrapping_add(1);
|
||||
}
|
||||
if e.type_ == FF_RUMBLE {
|
||||
let strong = u16::from_ne_bytes([e.u[0], e.u[1]]);
|
||||
let weak = u16::from_ne_bytes([e.u[2], e.u[3]]);
|
||||
let slot = self.effects.entry(e.id).or_insert(Effect {
|
||||
strong: 0,
|
||||
weak: 0,
|
||||
playing: None,
|
||||
replay_ms: 0,
|
||||
});
|
||||
slot.strong = strong;
|
||||
slot.weak = weak;
|
||||
slot.replay_ms = e.replay_length;
|
||||
}
|
||||
up.effect.id = e.id; // hand the assigned slot back to the kernel
|
||||
up.retval = 0;
|
||||
let _ = ioctl_ptr(raw, UI_END_FF_UPLOAD, &mut up, "UI_END_FF_UPLOAD");
|
||||
}
|
||||
}
|
||||
(EV_UINPUT, UI_FF_ERASE) => {
|
||||
let mut er: UinputFfErase = unsafe { std::mem::zeroed() };
|
||||
er.request_id = ev.value as u32;
|
||||
if ioctl_ptr(raw, UI_BEGIN_FF_ERASE, &mut er, "UI_BEGIN_FF_ERASE").is_ok() {
|
||||
self.effects.remove(&(er.effect_id as i16));
|
||||
er.retval = 0;
|
||||
let _ = ioctl_ptr(raw, UI_END_FF_ERASE, &mut er, "UI_END_FF_ERASE");
|
||||
}
|
||||
}
|
||||
(EV_FF, FF_GAIN) => self.gain = (ev.value as u32).min(0xFFFF),
|
||||
(EV_FF, code) => {
|
||||
if let Some(e) = self.effects.get_mut(&(code as i16)) {
|
||||
e.playing = if ev.value != 0 {
|
||||
Some((e.replay_ms > 0).then(|| {
|
||||
Instant::now()
|
||||
+ std::time::Duration::from_millis(e.replay_ms as u64)
|
||||
}))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
// Mix: sum playing effects (expiring finished ones), scale by gain.
|
||||
let now = Instant::now();
|
||||
let (mut strong, mut weak) = (0u32, 0u32);
|
||||
for e in self.effects.values_mut() {
|
||||
if let Some(deadline) = e.playing {
|
||||
if deadline.is_some_and(|d| now >= d) {
|
||||
e.playing = None;
|
||||
} else {
|
||||
strong = strong.saturating_add(e.strong as u32);
|
||||
weak = weak.saturating_add(e.weak as u32);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Linux FF: strong = low-frequency (big) motor, weak = high-frequency motor.
|
||||
let low = ((strong.min(0xFFFF) * self.gain) >> 16) as u16;
|
||||
let high = ((weak.min(0xFFFF) * self.gain) >> 16) as u16;
|
||||
(self.last_mix != (low, high)).then(|| {
|
||||
self.last_mix = (low, high);
|
||||
(low, high)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for VirtualPad {
|
||||
fn drop(&mut self) {
|
||||
let _ = unsafe { libc::ioctl(self.fd.as_raw_fd(), UI_DEV_DESTROY, 0) };
|
||||
}
|
||||
}
|
||||
|
||||
/// All virtual pads of a session, driven from decoded controller events.
|
||||
#[derive(Default)]
|
||||
pub struct GamepadManager {
|
||||
pads: Vec<Option<VirtualPad>>,
|
||||
/// Pad creation failed (e.g. /dev/uinput permissions) — warn once, drop events.
|
||||
broken: bool,
|
||||
}
|
||||
|
||||
impl GamepadManager {
|
||||
pub fn new() -> GamepadManager {
|
||||
GamepadManager {
|
||||
pads: (0..MAX_PADS).map(|_| None).collect(),
|
||||
broken: false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Handle one decoded controller event (create/destroy by mask, then apply state).
|
||||
pub fn handle(&mut self, ev: &crate::gamestream::gamepad::GamepadEvent) {
|
||||
use crate::gamestream::gamepad::GamepadEvent;
|
||||
match ev {
|
||||
GamepadEvent::Arrival { index, kind, .. } => {
|
||||
tracing::info!(index, kind, "controller arrival");
|
||||
self.ensure(*index as usize);
|
||||
}
|
||||
GamepadEvent::State(f) => {
|
||||
let idx = f.index as usize;
|
||||
if idx >= MAX_PADS {
|
||||
return;
|
||||
}
|
||||
// Unplugs: drop any allocated pad whose mask bit cleared.
|
||||
for (i, slot) in self.pads.iter_mut().enumerate() {
|
||||
if slot.is_some() && f.active_mask & (1 << i) == 0 {
|
||||
tracing::info!(index = i, "controller unplugged");
|
||||
*slot = None;
|
||||
}
|
||||
}
|
||||
if f.active_mask & (1 << idx) == 0 {
|
||||
return; // this event WAS the unplug
|
||||
}
|
||||
self.ensure(idx);
|
||||
if let Some(pad) = self.pads[idx].as_mut() {
|
||||
pad.apply(f);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn ensure(&mut self, idx: usize) {
|
||||
if idx >= MAX_PADS || self.pads[idx].is_some() || self.broken {
|
||||
return;
|
||||
}
|
||||
match VirtualPad::create(idx) {
|
||||
Ok(p) => self.pads[idx] = Some(p),
|
||||
Err(e) => {
|
||||
tracing::error!(error = %format!("{e:#}"), "virtual gamepad creation failed — controller input disabled");
|
||||
self.broken = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Service every pad's FF protocol; `send(index, low, high)` is invoked for each pad whose
|
||||
/// mixed rumble level changed. Call frequently (games block in `EVIOCSFF` until answered).
|
||||
pub fn pump_rumble(&mut self, mut send: impl FnMut(u16, u16, u16)) {
|
||||
for (i, slot) in self.pads.iter_mut().enumerate() {
|
||||
if let Some(pad) = slot {
|
||||
if let Some((low, high)) = pad.pump_ff() {
|
||||
send(i as u16, low, high);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,409 @@
|
||||
//! libei input injection — the portable EI-sender path.
|
||||
//!
|
||||
//! Two ways to reach an EIS server ([`EiSource`]):
|
||||
//! * **Portal** — `org.freedesktop.portal.RemoteDesktop` via `ashpd` (KWin, GNOME/Mutter),
|
||||
//! which hands us the EIS socket fd after the session grant.
|
||||
//! * **Socket** — connect directly to a compositor's own EIS socket. gamescope runs an EIS
|
||||
//! server and exports its path to its children as `LIBEI_SOCKET`; our gamescope backend
|
||||
//! relays that path through a file so the injector can connect (no portal involved).
|
||||
//!
|
||||
//! Either way, `reis` drives the connection as an EI *sender*: bind the seat's
|
||||
//! pointer/keyboard/scroll/button capabilities and, per device, `start_emulating` → emit →
|
||||
//! `frame`. The session and the EIS connection must stay alive and the event stream must be
|
||||
//! polled continuously (resume/pause/ping/modifier traffic), so the whole thing runs on a
|
||||
//! dedicated thread with its own tokio runtime; the synchronous control thread reaches it
|
||||
//! through an unbounded channel and [`LibeiInjector::inject`] merely enqueues.
|
||||
//!
|
||||
//! Keyboard codes are Linux evdev (the same space our VK→evdev table produces) and the
|
||||
//! compositor supplies the keymap, so — unlike the wlr path — there is no keymap to upload and
|
||||
//! no modifier mask to serialize: pressing the modifier *keys* (which Moonlight sends as normal
|
||||
//! key events) is enough.
|
||||
|
||||
use super::{gs_button_to_evdev, vk_to_evdev, InputInjector};
|
||||
use anyhow::{anyhow, Result};
|
||||
use ashpd::desktop::{
|
||||
remote_desktop::{
|
||||
ConnectToEISOptions, DeviceType, RemoteDesktop, SelectDevicesOptions, StartOptions,
|
||||
},
|
||||
CreateSessionOptions, PersistMode,
|
||||
};
|
||||
use futures_util::StreamExt;
|
||||
use punktfunk_core::input::{InputEvent, InputKind};
|
||||
use reis::ei;
|
||||
use reis::event::{DeviceCapability, EiEvent};
|
||||
use std::os::unix::net::UnixStream;
|
||||
use std::time::{Duration, Instant};
|
||||
use tokio::sync::mpsc::{unbounded_channel, UnboundedReceiver, UnboundedSender};
|
||||
|
||||
/// `code` value marking a horizontal scroll event (mirrors `gamestream::input`).
|
||||
const SCROLL_HORIZONTAL: u32 = 1;
|
||||
|
||||
/// Where to find the EIS server.
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum EiSource {
|
||||
/// `org.freedesktop.portal.RemoteDesktop` (KWin, GNOME/Mutter).
|
||||
Portal,
|
||||
/// A file containing the EIS socket path/name (gamescope's relayed `LIBEI_SOCKET`); polled
|
||||
/// until it appears, since the compositor may still be starting.
|
||||
SocketPathFile(std::path::PathBuf),
|
||||
}
|
||||
|
||||
/// Handle held by the control thread; forwards events to the libei worker thread.
|
||||
pub struct LibeiInjector {
|
||||
tx: UnboundedSender<InputEvent>,
|
||||
}
|
||||
|
||||
impl LibeiInjector {
|
||||
pub fn open() -> Result<Self> {
|
||||
Self::open_with(EiSource::Portal)
|
||||
}
|
||||
|
||||
pub fn open_with(source: EiSource) -> Result<Self> {
|
||||
let (tx, rx) = unbounded_channel::<InputEvent>();
|
||||
std::thread::Builder::new()
|
||||
.name("punktfunk-libei".into())
|
||||
.spawn(move || worker(rx, source))
|
||||
.map_err(|e| anyhow!("spawn libei worker thread: {e}"))?;
|
||||
// Return immediately — the portal/socket handshake must NOT run on the caller's
|
||||
// (control) thread, or a slow/denied setup would freeze the ENet control stream and
|
||||
// drop the client. The worker establishes the session asynchronously and logs its
|
||||
// status; events enqueue until devices resume (a few startup events may be dropped).
|
||||
Ok(Self { tx })
|
||||
}
|
||||
}
|
||||
|
||||
impl InputInjector for LibeiInjector {
|
||||
fn inject(&mut self, event: &InputEvent) -> Result<()> {
|
||||
self.tx
|
||||
.send(*event)
|
||||
.map_err(|_| anyhow!("libei worker thread has exited"))
|
||||
}
|
||||
}
|
||||
|
||||
/// Worker thread entry: build a tokio runtime and run the session to completion.
|
||||
fn worker(rx: UnboundedReceiver<InputEvent>, source: EiSource) {
|
||||
let rt = match tokio::runtime::Builder::new_multi_thread()
|
||||
.worker_threads(1)
|
||||
.enable_all()
|
||||
.build()
|
||||
{
|
||||
Ok(rt) => rt,
|
||||
Err(e) => {
|
||||
tracing::error!(error = %e, "libei: build tokio runtime failed");
|
||||
return;
|
||||
}
|
||||
};
|
||||
rt.block_on(session_main(rx, source));
|
||||
}
|
||||
|
||||
/// Open the portal/socket + EIS (bounded), then pump events until disconnect or shutdown.
|
||||
async fn session_main(mut rx: UnboundedReceiver<InputEvent>, source: EiSource) {
|
||||
// Keep `_rd`/`_session` bound for the whole loop — dropping the portal session closes the
|
||||
// EIS connection. Bound the setup so a headless approval dialog (un-bypassed grant) can't
|
||||
// hang the worker forever.
|
||||
let (_portal, context, mut events) = match tokio::time::timeout(
|
||||
Duration::from_secs(30),
|
||||
connect(source),
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(Ok(t)) => t,
|
||||
Ok(Err(e)) => {
|
||||
tracing::error!(error = %format!("{e:#}"), "libei: portal/EIS setup failed");
|
||||
return;
|
||||
}
|
||||
Err(_) => {
|
||||
tracing::error!(
|
||||
"libei: EIS setup timed out (headless approval needed / kde-authorized grant not seeded / gamescope socket never appeared)"
|
||||
);
|
||||
return;
|
||||
}
|
||||
};
|
||||
tracing::info!("libei: EIS connected — awaiting devices");
|
||||
|
||||
let mut state = EiState::new();
|
||||
loop {
|
||||
tokio::select! {
|
||||
ei = events.next() => match ei {
|
||||
Some(Ok(ev)) => state.handle_ei(ev, &context),
|
||||
Some(Err(e)) => { tracing::warn!(error = %e, "libei: event stream error"); break; }
|
||||
None => { tracing::info!("libei: EIS disconnected"); break; }
|
||||
},
|
||||
msg = rx.recv() => match msg {
|
||||
Some(input) => state.inject(&input, &context),
|
||||
None => { tracing::info!("libei: injector closed — ending session"); break; }
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Tie down the verbose tuple the connect step returns. The portal pair must stay alive for
|
||||
/// the whole session (dropping it closes the EIS connection); `None` for the direct-socket path.
|
||||
type Connected = (
|
||||
Option<(RemoteDesktop, ashpd::desktop::Session<RemoteDesktop>)>,
|
||||
ei::Context,
|
||||
reis::tokio::EiConvertEventStream,
|
||||
);
|
||||
|
||||
/// Reach an EIS server per `source` and run the EI sender handshake.
|
||||
async fn connect(source: EiSource) -> Result<Connected> {
|
||||
let (portal, stream) = match source {
|
||||
EiSource::Portal => {
|
||||
let (rd, session, fd) = connect_portal().await?;
|
||||
(Some((rd, session)), UnixStream::from(fd))
|
||||
}
|
||||
EiSource::SocketPathFile(file) => (None, connect_socket_file(&file).await?),
|
||||
};
|
||||
let context = ei::Context::new(stream).map_err(|e| anyhow!("reis EI context: {e}"))?;
|
||||
let (_conn, events) = context
|
||||
.handshake_tokio("punktfunk-host", ei::handshake::ContextType::Sender)
|
||||
.await
|
||||
.map_err(|e| anyhow!("EI handshake: {e}"))?;
|
||||
Ok((portal, context, events))
|
||||
}
|
||||
|
||||
/// Open a RemoteDesktop portal session (pointer + keyboard) and obtain the EIS socket fd.
|
||||
async fn connect_portal() -> Result<(
|
||||
RemoteDesktop,
|
||||
ashpd::desktop::Session<RemoteDesktop>,
|
||||
std::os::fd::OwnedFd,
|
||||
)> {
|
||||
let rd = RemoteDesktop::new()
|
||||
.await
|
||||
.map_err(|e| anyhow!("open RemoteDesktop portal (is xdg-desktop-portal-kde/gnome running and XDG_CURRENT_DESKTOP set?): {e}"))?;
|
||||
let session = rd
|
||||
.create_session(CreateSessionOptions::default())
|
||||
.await
|
||||
.map_err(|e| anyhow!("create RemoteDesktop session: {e}"))?;
|
||||
rd.select_devices(
|
||||
&session,
|
||||
SelectDevicesOptions::default()
|
||||
.set_devices(DeviceType::Keyboard | DeviceType::Pointer)
|
||||
.set_persist_mode(PersistMode::DoNot),
|
||||
)
|
||||
.await
|
||||
.map_err(|e| anyhow!("select_devices: {e}"))?
|
||||
.response()
|
||||
.map_err(|e| anyhow!("select_devices response: {e}"))?;
|
||||
let started = rd
|
||||
.start(&session, None, StartOptions::default())
|
||||
.await
|
||||
.map_err(|e| anyhow!("start RemoteDesktop session: {e}"))?;
|
||||
let granted = started
|
||||
.response()
|
||||
.map_err(|e| anyhow!("RemoteDesktop start denied: {e}"))?;
|
||||
tracing::info!(devices = ?granted.devices(), "libei: portal granted devices");
|
||||
|
||||
let fd = rd
|
||||
.connect_to_eis(&session, ConnectToEISOptions::default())
|
||||
.await
|
||||
.map_err(|e| anyhow!("connect_to_eis (RemoteDesktop portal version < 2?): {e}"))?;
|
||||
Ok((rd, session, fd))
|
||||
}
|
||||
|
||||
/// Poll `file` for the EIS socket path (the gamescope backend relays `LIBEI_SOCKET` there once
|
||||
/// the nested app launches), then connect. A bare name is resolved against `XDG_RUNTIME_DIR`,
|
||||
/// mirroring libei's own `LIBEI_SOCKET` semantics.
|
||||
async fn connect_socket_file(file: &std::path::Path) -> Result<UnixStream> {
|
||||
let path = loop {
|
||||
match std::fs::read_to_string(file) {
|
||||
Ok(s) if !s.trim().is_empty() => break s.trim().to_string(),
|
||||
_ => tokio::time::sleep(Duration::from_millis(300)).await,
|
||||
}
|
||||
};
|
||||
let full = if path.starts_with('/') {
|
||||
std::path::PathBuf::from(&path)
|
||||
} else {
|
||||
let runtime = std::env::var("XDG_RUNTIME_DIR").map_err(|_| {
|
||||
anyhow!("XDG_RUNTIME_DIR unset (needed to resolve EIS socket '{path}')")
|
||||
})?;
|
||||
std::path::Path::new(&runtime).join(&path)
|
||||
};
|
||||
tracing::info!(socket = %full.display(), "libei: connecting to EIS socket");
|
||||
UnixStream::connect(&full).map_err(|e| anyhow!("connect EIS socket {}: {e}", full.display()))
|
||||
}
|
||||
|
||||
/// One EI device and its emulation state.
|
||||
struct DeviceSlot {
|
||||
device: reis::event::Device,
|
||||
/// The device is resumed (allowed to emit). Devices arrive paused and may pause again.
|
||||
resumed: bool,
|
||||
/// We have issued `start_emulating` since the last resume.
|
||||
emulating: bool,
|
||||
}
|
||||
|
||||
/// Tracks bound devices + the serial/sequence/timebase the EI protocol requires.
|
||||
struct EiState {
|
||||
devices: Vec<DeviceSlot>,
|
||||
last_serial: u32,
|
||||
sequence: u32,
|
||||
start: Instant,
|
||||
}
|
||||
|
||||
impl EiState {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
devices: Vec::new(),
|
||||
last_serial: 0,
|
||||
sequence: 0,
|
||||
start: Instant::now(),
|
||||
}
|
||||
}
|
||||
|
||||
fn now_us(&self) -> u64 {
|
||||
self.start.elapsed().as_micros() as u64
|
||||
}
|
||||
|
||||
/// Apply a server event: bind capabilities, track devices, and follow resume/pause.
|
||||
fn handle_ei(&mut self, ev: EiEvent, ctx: &ei::Context) {
|
||||
match ev {
|
||||
EiEvent::SeatAdded(e) => {
|
||||
e.seat.bind_capabilities(
|
||||
DeviceCapability::Pointer
|
||||
| DeviceCapability::PointerAbsolute
|
||||
| DeviceCapability::Keyboard
|
||||
| DeviceCapability::Scroll
|
||||
| DeviceCapability::Button,
|
||||
);
|
||||
let _ = ctx.flush();
|
||||
}
|
||||
EiEvent::DeviceAdded(e) => {
|
||||
tracing::info!(device = ?e.device.name(), ty = ?e.device.device_type(), "libei: device added");
|
||||
self.devices.push(DeviceSlot {
|
||||
device: e.device,
|
||||
resumed: false,
|
||||
emulating: false,
|
||||
});
|
||||
}
|
||||
EiEvent::DeviceRemoved(e) => {
|
||||
self.devices.retain(|d| d.device != e.device);
|
||||
}
|
||||
EiEvent::DeviceResumed(e) => {
|
||||
self.last_serial = e.serial;
|
||||
if let Some(d) = self.devices.iter_mut().find(|d| d.device == e.device) {
|
||||
d.resumed = true;
|
||||
d.emulating = false; // must re-issue start_emulating after a resume
|
||||
}
|
||||
}
|
||||
EiEvent::DevicePaused(e) => {
|
||||
if let Some(d) = self.devices.iter_mut().find(|d| d.device == e.device) {
|
||||
d.resumed = false;
|
||||
d.emulating = false;
|
||||
}
|
||||
}
|
||||
// Informational: the server reports resulting modifier/group state; we don't set it.
|
||||
EiEvent::KeyboardModifiers(e) => self.last_serial = e.serial,
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
/// Index of a resumed device exposing `cap`.
|
||||
fn device_for(&self, cap: DeviceCapability) -> Option<usize> {
|
||||
self.devices
|
||||
.iter()
|
||||
.position(|d| d.resumed && d.device.has_capability(cap))
|
||||
}
|
||||
|
||||
/// Ensure the device at `idx` is in `start_emulating` state before we emit on it.
|
||||
fn ensure_emulating(&mut self, idx: usize, dev: &ei::Device) {
|
||||
if !self.devices[idx].emulating {
|
||||
dev.start_emulating(self.last_serial, self.sequence);
|
||||
self.sequence = self.sequence.wrapping_add(1);
|
||||
self.devices[idx].emulating = true;
|
||||
}
|
||||
}
|
||||
|
||||
/// Translate and emit one client input event, committing it as a single `frame`.
|
||||
fn inject(&mut self, ev: &InputEvent, ctx: &ei::Context) {
|
||||
let cap = match ev.kind {
|
||||
InputKind::MouseMove => DeviceCapability::Pointer,
|
||||
InputKind::MouseMoveAbs => DeviceCapability::PointerAbsolute,
|
||||
InputKind::MouseButtonDown | InputKind::MouseButtonUp => DeviceCapability::Button,
|
||||
InputKind::MouseScroll => DeviceCapability::Scroll,
|
||||
InputKind::KeyDown | InputKind::KeyUp => DeviceCapability::Keyboard,
|
||||
InputKind::GamepadButton | InputKind::GamepadAxis => return, // uinput path (later)
|
||||
};
|
||||
let Some(idx) = self.device_for(cap) else {
|
||||
return; // no resumed device with this capability yet
|
||||
};
|
||||
let dev = self.devices[idx].device.device().clone();
|
||||
self.ensure_emulating(idx, &dev);
|
||||
|
||||
let mut emitted = true;
|
||||
let slot = &self.devices[idx].device;
|
||||
match ev.kind {
|
||||
InputKind::MouseMove => match slot.interface::<ei::Pointer>() {
|
||||
Some(p) => p.motion_relative(ev.x as f32, ev.y as f32),
|
||||
None => emitted = false,
|
||||
},
|
||||
InputKind::MouseMoveAbs => {
|
||||
let w = ((ev.flags >> 16) & 0xffff) as f32;
|
||||
let h = (ev.flags & 0xffff) as f32;
|
||||
match (
|
||||
slot.interface::<ei::PointerAbsolute>(),
|
||||
slot.regions().first(),
|
||||
) {
|
||||
(Some(p), Some(region)) if w > 0.0 && h > 0.0 => {
|
||||
// Map the normalized client position into the device's first region.
|
||||
let nx = (ev.x as f32 / w).clamp(0.0, 1.0);
|
||||
let ny = (ev.y as f32 / h).clamp(0.0, 1.0);
|
||||
let x = region.x as f32 + nx * region.width as f32;
|
||||
let y = region.y as f32 + ny * region.height as f32;
|
||||
p.motion_absolute(x, y);
|
||||
}
|
||||
_ => emitted = false,
|
||||
}
|
||||
}
|
||||
InputKind::MouseButtonDown | InputKind::MouseButtonUp => {
|
||||
match (slot.interface::<ei::Button>(), gs_button_to_evdev(ev.code)) {
|
||||
(Some(b), Some(btn)) => {
|
||||
let st = if ev.kind == InputKind::MouseButtonDown {
|
||||
ei::button::ButtonState::Press
|
||||
} else {
|
||||
ei::button::ButtonState::Released
|
||||
};
|
||||
b.button(btn, st);
|
||||
}
|
||||
_ => emitted = false,
|
||||
}
|
||||
}
|
||||
InputKind::MouseScroll => match slot.interface::<ei::Scroll>() {
|
||||
Some(s) => {
|
||||
// GameStream sends WHEEL_DELTA(120)-scaled deltas in `x`; ei scroll_discrete
|
||||
// uses the same 120-per-detent unit. Positive GameStream = up (vertical),
|
||||
// which is negative on the ei axis, but = RIGHT (horizontal), which is
|
||||
// already positive there (moonlight-qt/Sunshine pass horizontal through
|
||||
// unnegated) — only the vertical axis flips.
|
||||
if ev.code == SCROLL_HORIZONTAL {
|
||||
s.scroll_discrete(ev.x, 0);
|
||||
} else {
|
||||
s.scroll_discrete(0, -ev.x);
|
||||
}
|
||||
}
|
||||
None => emitted = false,
|
||||
},
|
||||
InputKind::KeyDown | InputKind::KeyUp => {
|
||||
match (slot.interface::<ei::Keyboard>(), vk_to_evdev(ev.code as u8)) {
|
||||
(Some(k), Some(evdev)) => {
|
||||
let st = if ev.kind == InputKind::KeyDown {
|
||||
ei::keyboard::KeyState::Press
|
||||
} else {
|
||||
ei::keyboard::KeyState::Released
|
||||
};
|
||||
k.key(evdev as u32, st);
|
||||
}
|
||||
_ => {
|
||||
emitted = false;
|
||||
tracing::debug!(vk = ev.code, "libei: unmapped VK keycode — dropped");
|
||||
}
|
||||
}
|
||||
}
|
||||
InputKind::GamepadButton | InputKind::GamepadAxis => emitted = false,
|
||||
}
|
||||
|
||||
if emitted {
|
||||
dev.frame(self.last_serial, self.now_us());
|
||||
}
|
||||
let _ = ctx.flush();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,273 @@
|
||||
//! Input injection through the wlroots virtual-input Wayland protocols
|
||||
//! (`zwlr_virtual_pointer_manager_v1` + `zwp_virtual_keyboard_manager_v1`) — the headless-Sway
|
||||
//! path. We connect as an ordinary Wayland client (the host inherits Sway's
|
||||
//! `WAYLAND_DISPLAY`/`XDG_RUNTIME_DIR`), bind the two managers, upload a standard evdev/US xkb
|
||||
//! keymap, and translate events into virtual pointer/keyboard requests, tracking modifier state
|
||||
//! so the compositor resolves shifted keysyms correctly.
|
||||
|
||||
use super::{gs_button_to_evdev, vk_to_evdev, InputEvent, InputInjector};
|
||||
use anyhow::{bail, Context, Result};
|
||||
use punktfunk_core::input::InputKind;
|
||||
use std::io::Write;
|
||||
use std::os::fd::{AsFd, FromRawFd};
|
||||
use std::time::Instant;
|
||||
use wayland_client::protocol::{wl_output::WlOutput, wl_pointer, wl_registry, wl_seat::WlSeat};
|
||||
use wayland_client::{Connection, Dispatch, EventQueue, Proxy, QueueHandle};
|
||||
use wayland_protocols_misc::zwp_virtual_keyboard_v1::client::{
|
||||
zwp_virtual_keyboard_manager_v1::ZwpVirtualKeyboardManagerV1,
|
||||
zwp_virtual_keyboard_v1::ZwpVirtualKeyboardV1,
|
||||
};
|
||||
use wayland_protocols_wlr::virtual_pointer::v1::client::{
|
||||
zwlr_virtual_pointer_manager_v1::ZwlrVirtualPointerManagerV1,
|
||||
zwlr_virtual_pointer_v1::ZwlrVirtualPointerV1,
|
||||
};
|
||||
use xkbcommon::xkb;
|
||||
|
||||
/// `code` value marking a horizontal scroll event (mirrors `gamestream::input`).
|
||||
const SCROLL_HORIZONTAL: u32 = 1;
|
||||
|
||||
/// Globals bound from the registry (the Wayland dispatch state).
|
||||
#[derive(Default)]
|
||||
struct Globals {
|
||||
pointer_mgr: Option<ZwlrVirtualPointerManagerV1>,
|
||||
keyboard_mgr: Option<ZwpVirtualKeyboardManagerV1>,
|
||||
seat: Option<WlSeat>,
|
||||
output: Option<WlOutput>,
|
||||
}
|
||||
|
||||
impl Dispatch<wl_registry::WlRegistry, ()> for Globals {
|
||||
fn event(
|
||||
state: &mut Self,
|
||||
registry: &wl_registry::WlRegistry,
|
||||
event: wl_registry::Event,
|
||||
_: &(),
|
||||
_: &Connection,
|
||||
qh: &QueueHandle<Self>,
|
||||
) {
|
||||
if let wl_registry::Event::Global {
|
||||
name,
|
||||
interface,
|
||||
version,
|
||||
} = event
|
||||
{
|
||||
match interface.as_str() {
|
||||
"zwlr_virtual_pointer_manager_v1" => {
|
||||
state.pointer_mgr = Some(registry.bind(name, version.min(2), qh, ()));
|
||||
}
|
||||
"zwp_virtual_keyboard_manager_v1" => {
|
||||
state.keyboard_mgr = Some(registry.bind(name, version.min(1), qh, ()));
|
||||
}
|
||||
"wl_seat" => {
|
||||
state.seat = Some(registry.bind(name, version.min(7), qh, ()));
|
||||
}
|
||||
"wl_output" if state.output.is_none() => {
|
||||
state.output = Some(registry.bind(name, version.min(3), qh, ()));
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// The managers, the two virtual devices, the seat and the output emit no events we use.
|
||||
macro_rules! ignore_events {
|
||||
($($t:ty),* $(,)?) => {$(
|
||||
impl Dispatch<$t, ()> for Globals {
|
||||
fn event(_: &mut Self, _: &$t, _: <$t as Proxy>::Event, _: &(), _: &Connection, _: &QueueHandle<Self>) {}
|
||||
}
|
||||
)*};
|
||||
}
|
||||
ignore_events!(
|
||||
WlSeat,
|
||||
WlOutput,
|
||||
ZwlrVirtualPointerManagerV1,
|
||||
ZwlrVirtualPointerV1,
|
||||
ZwpVirtualKeyboardManagerV1,
|
||||
ZwpVirtualKeyboardV1,
|
||||
);
|
||||
|
||||
pub struct WlrootsInjector {
|
||||
conn: Connection,
|
||||
queue: EventQueue<Globals>,
|
||||
globals: Globals,
|
||||
pointer: ZwlrVirtualPointerV1,
|
||||
keyboard: ZwpVirtualKeyboardV1,
|
||||
xkb_state: xkb::State,
|
||||
_keymap_file: std::fs::File, // keep the memfd alive for the compositor's mmap
|
||||
start: Instant,
|
||||
}
|
||||
|
||||
impl WlrootsInjector {
|
||||
pub fn open() -> Result<Self> {
|
||||
let conn = Connection::connect_to_env()
|
||||
.context("connect to Wayland (is Sway up + WAYLAND_DISPLAY/XDG_RUNTIME_DIR set?)")?;
|
||||
let mut queue = conn.new_event_queue();
|
||||
let qh = queue.handle();
|
||||
let _registry = conn.display().get_registry(&qh, ());
|
||||
let mut globals = Globals::default();
|
||||
queue
|
||||
.roundtrip(&mut globals)
|
||||
.context("Wayland registry roundtrip")?;
|
||||
|
||||
let pointer_mgr = globals
|
||||
.pointer_mgr
|
||||
.clone()
|
||||
.context("compositor lacks zwlr_virtual_pointer_manager_v1")?;
|
||||
let keyboard_mgr = globals
|
||||
.keyboard_mgr
|
||||
.clone()
|
||||
.context("compositor lacks zwp_virtual_keyboard_manager_v1")?;
|
||||
let seat = globals
|
||||
.seat
|
||||
.clone()
|
||||
.context("compositor advertised no wl_seat")?;
|
||||
|
||||
let pointer = pointer_mgr.create_virtual_pointer_with_output(
|
||||
Some(&seat),
|
||||
globals.output.as_ref(),
|
||||
&qh,
|
||||
(),
|
||||
);
|
||||
let keyboard = keyboard_mgr.create_virtual_keyboard(&seat, &qh, ());
|
||||
|
||||
// A standard evdev/US keymap so raw evdev keycodes resolve to the right keysyms.
|
||||
let ctx = xkb::Context::new(xkb::CONTEXT_NO_FLAGS);
|
||||
let keymap = xkb::Keymap::new_from_names(
|
||||
&ctx,
|
||||
"evdev",
|
||||
"pc105",
|
||||
"us",
|
||||
"",
|
||||
None,
|
||||
xkb::KEYMAP_COMPILE_NO_FLAGS,
|
||||
)
|
||||
.context("compile xkb keymap")?;
|
||||
let keymap_str = keymap.get_as_string(xkb::KEYMAP_FORMAT_TEXT_V1);
|
||||
let xkb_state = xkb::State::new(&keymap);
|
||||
|
||||
let file = memfd_with(&keymap_str)?;
|
||||
let size = keymap_str.len() as u32 + 1; // include the trailing NUL
|
||||
keyboard.keymap(1 /* XKB_V1 */, file.as_fd(), size);
|
||||
queue
|
||||
.roundtrip(&mut globals)
|
||||
.context("keymap upload roundtrip")?;
|
||||
conn.flush().ok();
|
||||
|
||||
tracing::info!(
|
||||
output = globals.output.is_some(),
|
||||
"wlroots virtual input ready (pointer + keyboard)"
|
||||
);
|
||||
Ok(Self {
|
||||
conn,
|
||||
queue,
|
||||
globals,
|
||||
pointer,
|
||||
keyboard,
|
||||
xkb_state,
|
||||
_keymap_file: file,
|
||||
start: Instant::now(),
|
||||
})
|
||||
}
|
||||
|
||||
fn now_ms(&self) -> u32 {
|
||||
self.start.elapsed().as_millis() as u32
|
||||
}
|
||||
|
||||
/// Update xkb state for a key and tell the compositor the resulting modifier mask.
|
||||
fn send_modifiers(&mut self, evdev: u16, down: bool) {
|
||||
let kc = xkb::Keycode::new(evdev as u32 + 8); // evdev -> xkb keycode
|
||||
let dir = if down {
|
||||
xkb::KeyDirection::Down
|
||||
} else {
|
||||
xkb::KeyDirection::Up
|
||||
};
|
||||
self.xkb_state.update_key(kc, dir);
|
||||
let depressed = self.xkb_state.serialize_mods(xkb::STATE_MODS_DEPRESSED);
|
||||
let latched = self.xkb_state.serialize_mods(xkb::STATE_MODS_LATCHED);
|
||||
let locked = self.xkb_state.serialize_mods(xkb::STATE_MODS_LOCKED);
|
||||
let group = self.xkb_state.serialize_layout(xkb::STATE_LAYOUT_EFFECTIVE);
|
||||
self.keyboard.modifiers(depressed, latched, locked, group);
|
||||
}
|
||||
}
|
||||
|
||||
impl InputInjector for WlrootsInjector {
|
||||
fn inject(&mut self, event: &InputEvent) -> Result<()> {
|
||||
let t = self.now_ms();
|
||||
match event.kind {
|
||||
InputKind::MouseMove => {
|
||||
self.pointer.motion(t, event.x as f64, event.y as f64);
|
||||
self.pointer.frame();
|
||||
}
|
||||
InputKind::MouseMoveAbs => {
|
||||
let w = (event.flags >> 16) & 0xffff;
|
||||
let h = event.flags & 0xffff;
|
||||
if w > 0 && h > 0 {
|
||||
let x = event.x.clamp(0, w as i32) as u32;
|
||||
let y = event.y.clamp(0, h as i32) as u32;
|
||||
self.pointer.motion_absolute(t, x, y, w, h);
|
||||
self.pointer.frame();
|
||||
}
|
||||
}
|
||||
InputKind::MouseButtonDown | InputKind::MouseButtonUp => {
|
||||
if let Some(btn) = gs_button_to_evdev(event.code) {
|
||||
let st = if event.kind == InputKind::MouseButtonDown {
|
||||
wl_pointer::ButtonState::Pressed
|
||||
} else {
|
||||
wl_pointer::ButtonState::Released
|
||||
};
|
||||
self.pointer.button(t, btn, st);
|
||||
self.pointer.frame();
|
||||
}
|
||||
}
|
||||
InputKind::MouseScroll => {
|
||||
let axis = if event.code == SCROLL_HORIZONTAL {
|
||||
wl_pointer::Axis::HorizontalScroll
|
||||
} else {
|
||||
wl_pointer::Axis::VerticalScroll
|
||||
};
|
||||
// GameStream sends WHEEL_DELTA(120)-scaled units; a notch ≈ 15px. Positive
|
||||
// GameStream = up (vertical), negative on the Wayland axis; but = RIGHT
|
||||
// (horizontal), already positive there (moonlight-qt/Sunshine pass
|
||||
// horizontal through unnegated) — only the vertical axis flips.
|
||||
let notches = event.x as f64 / 120.0;
|
||||
let sign = if event.code == SCROLL_HORIZONTAL {
|
||||
1.0
|
||||
} else {
|
||||
-1.0
|
||||
};
|
||||
self.pointer.axis_source(wl_pointer::AxisSource::Wheel);
|
||||
self.pointer.axis(t, axis, sign * notches * 15.0);
|
||||
self.pointer.frame();
|
||||
}
|
||||
InputKind::KeyDown | InputKind::KeyUp => {
|
||||
let down = event.kind == InputKind::KeyDown;
|
||||
if let Some(evdev) = vk_to_evdev(event.code as u8) {
|
||||
self.keyboard.key(t, evdev as u32, if down { 1 } else { 0 });
|
||||
self.send_modifiers(evdev, down);
|
||||
} else {
|
||||
tracing::debug!(vk = event.code, "unmapped VK keycode — dropped");
|
||||
}
|
||||
}
|
||||
InputKind::GamepadButton | InputKind::GamepadAxis => {} // not yet injected
|
||||
}
|
||||
// Surface protocol errors / disconnects, then push the batch to the compositor.
|
||||
self.queue
|
||||
.dispatch_pending(&mut self.globals)
|
||||
.context("wayland dispatch")?;
|
||||
self.conn.flush().context("wayland flush")?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Create an anonymous in-memory file holding `s` + a trailing NUL (for the keymap fd).
|
||||
fn memfd_with(s: &str) -> Result<std::fs::File> {
|
||||
let name = b"punktfunk-keymap\0";
|
||||
let fd = unsafe { libc::memfd_create(name.as_ptr() as *const libc::c_char, libc::MFD_CLOEXEC) };
|
||||
if fd < 0 {
|
||||
bail!("memfd_create failed: {}", std::io::Error::last_os_error());
|
||||
}
|
||||
let mut f = unsafe { std::fs::File::from_raw_fd(fd) };
|
||||
f.write_all(s.as_bytes()).context("write keymap")?;
|
||||
f.write_all(&[0]).context("write keymap NUL")?;
|
||||
Ok(f)
|
||||
}
|
||||
@@ -0,0 +1,265 @@
|
||||
//! M0 — the pipeline spike (plan §8): capture → NVENC encode → playable file, with the
|
||||
//! encoded access units also fed through a `punktfunk_core` host→client `Session` over an
|
||||
//! in-process loopback to prove the core's FEC + packetize + reassemble path on real
|
||||
//! encoder output.
|
||||
//!
|
||||
//! This is the spike runner, not the M2 hot path: it drives the stages on one thread (the
|
||||
//! per-stage-thread pipeline with bounded channels is [`crate::pipeline`]). Source is
|
||||
//! either a synthetic BGRx test pattern (no capture session needed) or the live xdg
|
||||
//! ScreenCast portal monitor.
|
||||
|
||||
use crate::capture::{self, Capturer, SyntheticCapturer};
|
||||
use crate::encode::{self, Codec, EncodedFrame, Encoder};
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use punktfunk_core::packet::{FLAG_PIC, FLAG_SOF};
|
||||
use punktfunk_core::{Config, Role, Session};
|
||||
use std::fs::File;
|
||||
use std::io::{BufWriter, Write};
|
||||
use std::path::PathBuf;
|
||||
use std::time::Instant;
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub enum Source {
|
||||
/// Deterministic moving BGRx test pattern — no capture session required.
|
||||
Synthetic,
|
||||
/// Live monitor via the xdg ScreenCast portal + PipeWire.
|
||||
Portal,
|
||||
/// KWin virtual output created at `width`x`height` (zkde_screencast). Lets us validate
|
||||
/// capture (and zero-copy) at an arbitrary client resolution against a headless KWin.
|
||||
KwinVirtual,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Options {
|
||||
pub source: Source,
|
||||
/// Synthetic-only; the portal source uses the PipeWire-negotiated size.
|
||||
pub width: u32,
|
||||
pub height: u32,
|
||||
pub fps: u32,
|
||||
pub seconds: u32,
|
||||
pub codec: Codec,
|
||||
pub bitrate_bps: u64,
|
||||
/// Raw Annex-B elementary-stream sink (`.h265`/`.h264`/`.ivf-less .obu`); playable.
|
||||
pub out: PathBuf,
|
||||
/// Also round-trip every AU through a `punktfunk_core` host→client loopback and verify.
|
||||
pub loopback: bool,
|
||||
}
|
||||
|
||||
pub fn run(opts: Options) -> Result<()> {
|
||||
let mut capturer: Box<dyn Capturer> = match opts.source {
|
||||
Source::Synthetic => {
|
||||
tracing::info!(
|
||||
width = opts.width,
|
||||
height = opts.height,
|
||||
fps = opts.fps,
|
||||
"M0 source: synthetic BGRx test pattern"
|
||||
);
|
||||
Box::new(SyntheticCapturer::new(opts.width, opts.height, opts.fps))
|
||||
}
|
||||
Source::Portal => {
|
||||
tracing::info!("M0 source: xdg ScreenCast portal (live monitor)");
|
||||
capture::open_portal_monitor().context("open portal capturer")?
|
||||
}
|
||||
Source::KwinVirtual => {
|
||||
let compositor = crate::vdisplay::detect().unwrap_or(crate::vdisplay::Compositor::Kwin);
|
||||
tracing::info!(
|
||||
width = opts.width,
|
||||
height = opts.height,
|
||||
?compositor,
|
||||
"M0 source: virtual output (PUNKTFUNK_COMPOSITOR)"
|
||||
);
|
||||
let mut vd = crate::vdisplay::open(compositor).context("open virtual display")?;
|
||||
let vout = vd
|
||||
.create(punktfunk_core::Mode {
|
||||
width: opts.width,
|
||||
height: opts.height,
|
||||
refresh_hz: opts.fps,
|
||||
})
|
||||
.context("create virtual output")?;
|
||||
capture::capture_virtual_output(vout).context("capture virtual output")?
|
||||
}
|
||||
};
|
||||
|
||||
// Activate the capturer so the portal/PipeWire process callback actually delivers frames
|
||||
// (it gates the per-frame de-pad on `active`; idle by default so reconnects are cheap).
|
||||
capturer.set_active(true);
|
||||
|
||||
// The first frame establishes the authoritative dimensions (the portal's negotiated
|
||||
// size, or the synthetic size) used to configure the encoder.
|
||||
let first = capturer.next_frame().context("capture first frame")?;
|
||||
let (w, h) = (first.width, first.height);
|
||||
tracing::info!(
|
||||
width = w,
|
||||
height = h,
|
||||
format = ?first.format,
|
||||
codec = ?opts.codec,
|
||||
bitrate_bps = opts.bitrate_bps,
|
||||
"opening NVENC encoder"
|
||||
);
|
||||
let mut encoder = encode::open_video(
|
||||
opts.codec,
|
||||
first.format,
|
||||
w,
|
||||
h,
|
||||
opts.fps,
|
||||
opts.bitrate_bps,
|
||||
first.is_cuda(),
|
||||
)
|
||||
.context("open encoder")?;
|
||||
|
||||
let mut sink = BufWriter::new(
|
||||
File::create(&opts.out).with_context(|| format!("create {}", opts.out.display()))?,
|
||||
);
|
||||
|
||||
let mut lb = if opts.loopback {
|
||||
Some(Loopback::new().context("build punktfunk-core loopback")?)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let target_frames = (opts.seconds as u64) * (opts.fps as u64);
|
||||
let started = Instant::now();
|
||||
let mut stats = Stats::default();
|
||||
|
||||
let mut frame = first;
|
||||
loop {
|
||||
encoder.submit(&frame).context("encoder submit")?;
|
||||
stats.submitted += 1;
|
||||
drain_encoder(encoder.as_mut(), &mut sink, lb.as_mut(), &mut stats)?;
|
||||
if stats.submitted >= target_frames {
|
||||
break;
|
||||
}
|
||||
frame = capturer.next_frame().context("capture frame")?;
|
||||
}
|
||||
|
||||
// NVENC buffers frames internally even at delay=0 — flush and drain the tail.
|
||||
encoder.flush().context("encoder flush")?;
|
||||
drain_encoder(encoder.as_mut(), &mut sink, lb.as_mut(), &mut stats)?;
|
||||
sink.flush().context("flush output file")?;
|
||||
|
||||
let elapsed = started.elapsed().as_secs_f64();
|
||||
tracing::info!(
|
||||
submitted = stats.submitted,
|
||||
encoded = stats.encoded,
|
||||
keyframes = stats.keyframes,
|
||||
bytes_out = stats.bytes_out,
|
||||
out = %opts.out.display(),
|
||||
elapsed_s = format!("{elapsed:.2}"),
|
||||
encode_fps = format!("{:.1}", stats.encoded as f64 / elapsed.max(1e-9)),
|
||||
"M0 capture→encode→file complete"
|
||||
);
|
||||
|
||||
if let Some(lb) = lb {
|
||||
lb.report();
|
||||
if lb.mismatches > 0 || lb.recovered != lb.submitted {
|
||||
return Err(anyhow!(
|
||||
"punktfunk-core loopback verification FAILED: {} mismatches, {}/{} AUs recovered",
|
||||
lb.mismatches,
|
||||
lb.recovered,
|
||||
lb.submitted
|
||||
));
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
struct Stats {
|
||||
submitted: u64,
|
||||
encoded: u64,
|
||||
keyframes: u64,
|
||||
bytes_out: u64,
|
||||
}
|
||||
|
||||
fn drain_encoder(
|
||||
encoder: &mut dyn Encoder,
|
||||
sink: &mut impl Write,
|
||||
mut lb: Option<&mut Loopback>,
|
||||
stats: &mut Stats,
|
||||
) -> Result<()> {
|
||||
while let Some(au) = encoder.poll().context("encoder poll")? {
|
||||
sink.write_all(&au.data).context("write AU to file")?;
|
||||
stats.encoded += 1;
|
||||
stats.bytes_out += au.data.len() as u64;
|
||||
if au.keyframe {
|
||||
stats.keyframes += 1;
|
||||
}
|
||||
if let Some(lb) = lb.as_deref_mut() {
|
||||
lb.submit(&au)?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// A host↔client `punktfunk_core` pair over a lossless in-process loopback. Each encoded AU is
|
||||
/// FEC-protected, packetized, sent, then reassembled on the client and byte-compared to the
|
||||
/// original — exercising the core on real encoder output (the M0 "feed into a Session" goal).
|
||||
struct Loopback {
|
||||
host: Session,
|
||||
client: Session,
|
||||
submitted: u64,
|
||||
recovered: u64,
|
||||
mismatches: u64,
|
||||
bytes: u64,
|
||||
}
|
||||
|
||||
impl Loopback {
|
||||
fn new() -> Result<Loopback> {
|
||||
let (host_tx, client_tx) = punktfunk_core::transport::loopback_pair(0, 0);
|
||||
let host = Session::new(Config::p1_defaults(Role::Host), Box::new(host_tx))
|
||||
.map_err(|e| anyhow!("host session: {e:?}"))?;
|
||||
let client = Session::new(Config::p1_defaults(Role::Client), Box::new(client_tx))
|
||||
.map_err(|e| anyhow!("client session: {e:?}"))?;
|
||||
Ok(Loopback {
|
||||
host,
|
||||
client,
|
||||
submitted: 0,
|
||||
recovered: 0,
|
||||
mismatches: 0,
|
||||
bytes: 0,
|
||||
})
|
||||
}
|
||||
|
||||
fn submit(&mut self, au: &EncodedFrame) -> Result<()> {
|
||||
let mut flags = FLAG_PIC as u32;
|
||||
if au.keyframe {
|
||||
flags |= FLAG_SOF as u32;
|
||||
}
|
||||
self.host
|
||||
.submit_frame(&au.data, au.pts_ns, flags)
|
||||
.map_err(|e| anyhow!("host submit_frame: {e:?}"))?;
|
||||
self.submitted += 1;
|
||||
self.bytes += au.data.len() as u64;
|
||||
|
||||
// Lossless + in-order loopback: each submit yields exactly the AU just sent.
|
||||
loop {
|
||||
match self.client.poll_frame() {
|
||||
Ok(frame) => {
|
||||
self.recovered += 1;
|
||||
if frame.data != au.data {
|
||||
self.mismatches += 1;
|
||||
tracing::warn!(
|
||||
recovered = self.recovered,
|
||||
got = frame.data.len(),
|
||||
expected = au.data.len(),
|
||||
"loopback AU mismatch"
|
||||
);
|
||||
}
|
||||
}
|
||||
Err(punktfunk_core::PunktfunkError::NoFrame) => break,
|
||||
Err(e) => return Err(anyhow!("client poll_frame: {e:?}")),
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn report(&self) {
|
||||
tracing::info!(
|
||||
submitted = self.submitted,
|
||||
recovered = self.recovered,
|
||||
mismatches = self.mismatches,
|
||||
bytes = self.bytes,
|
||||
"punktfunk-core loopback: AUs FEC-packetized → sent → reassembled & verified"
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,794 @@
|
||||
//! M3 — the `punktfunk/1` native host: QUIC control plane + the hardened M1 data plane over UDP.
|
||||
//! This is punktfunk's own protocol, past the GameStream compatibility layer:
|
||||
//!
|
||||
//! * the Welcome negotiates **GF(2¹⁶) Leopard FEC** (inexpressible in GameStream) + AES-GCM;
|
||||
//! * the client's Hello requests a display mode and the host creates a **native virtual
|
||||
//! output** at exactly that size/refresh (same vdisplay backends as the GameStream path);
|
||||
//! * **input arrives as QUIC datagrams** — encrypted, congestion-managed, no ENet
|
||||
//! retransmission spikes — and feeds the session's input injector;
|
||||
//! * video frames carry a wall-clock `pts_ns`, so a same-host client measures the full
|
||||
//! capture→encode→FEC→UDP→reassemble latency per frame.
|
||||
//!
|
||||
//! `punktfunk-host m3-host [--port 9777] [--source synthetic|virtual] [--seconds 30]
|
||||
//! [--frames 300]` serves sessions back to back (one at a time — the virtual output and
|
||||
//! encoder are single-tenant); `punktfunk-client-rs --connect host:9777` is the counterpart.
|
||||
//! The data plane runs on native threads (no async on the frame path).
|
||||
//!
|
||||
//! Alongside video + input, a session carries **audio** (desktop Opus, 5 ms frames, host →
|
||||
//! client QUIC datagrams tagged [`punktfunk_core::quic::AUDIO_MAGIC`]) and **gamepads** (client
|
||||
//! GamepadButton/GamepadAxis datagrams accumulated into per-pad state for the virtual xpad;
|
||||
//! force feedback flows back as [`punktfunk_core::quic::RUMBLE_MAGIC`] datagrams).
|
||||
//!
|
||||
//! Trust: the host serves with its persistent identity (`~/.config/punktfunk/cert.pem`, shared
|
||||
//! with GameStream pairing) and logs the SHA-256 fingerprint clients pin.
|
||||
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use punktfunk_core::config::{FecConfig, FecScheme, Role};
|
||||
use punktfunk_core::input::{InputEvent, InputKind};
|
||||
use punktfunk_core::packet::{FLAG_PIC, FLAG_SOF};
|
||||
use punktfunk_core::quic::{endpoint, io, Hello, Start, Welcome};
|
||||
use punktfunk_core::transport::UdpTransport;
|
||||
use punktfunk_core::Session;
|
||||
use rand::RngCore;
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::sync::Arc;
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub enum M3Source {
|
||||
/// Deterministic test frames (protocol verification; the client byte-checks them).
|
||||
Synthetic,
|
||||
/// Real capture: virtual display at the client's requested mode → NVENC.
|
||||
Virtual,
|
||||
}
|
||||
|
||||
pub struct M3Options {
|
||||
pub port: u16,
|
||||
pub source: M3Source,
|
||||
/// Virtual-source stream duration.
|
||||
pub seconds: u32,
|
||||
/// Synthetic-source frame count.
|
||||
pub frames: u32,
|
||||
/// Exit after this many sessions (0 = serve forever).
|
||||
pub max_sessions: u32,
|
||||
}
|
||||
|
||||
/// Deterministic test frame: `u32 LE index` then `data[i] = idx + i` (wrapping).
|
||||
pub fn test_frame(idx: u32, len: usize) -> Vec<u8> {
|
||||
let mut d = vec![0u8; len];
|
||||
d[0..4].copy_from_slice(&idx.to_le_bytes());
|
||||
for (i, b) in d.iter_mut().enumerate().skip(4) {
|
||||
*b = (idx as u8).wrapping_add(i as u8);
|
||||
}
|
||||
d
|
||||
}
|
||||
|
||||
fn now_ns() -> u64 {
|
||||
std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.map(|d| d.as_nanos() as u64)
|
||||
.unwrap_or(0)
|
||||
}
|
||||
|
||||
pub fn run(opts: M3Options) -> Result<()> {
|
||||
let rt = tokio::runtime::Builder::new_multi_thread()
|
||||
.worker_threads(2)
|
||||
.enable_all()
|
||||
.build()
|
||||
.context("tokio runtime")?;
|
||||
rt.block_on(serve(opts))
|
||||
}
|
||||
|
||||
fn fingerprint_hex(fp: &[u8; 32]) -> String {
|
||||
fp.iter().map(|b| format!("{b:02x}")).collect()
|
||||
}
|
||||
|
||||
/// The persistent listener: accept clients back to back on one endpoint. Sessions are
|
||||
/// served one at a time (the virtual output + NVENC are single-tenant); a client that
|
||||
/// connects mid-session waits in the accept queue. A failed session logs and the loop
|
||||
/// keeps serving — only endpoint-level failures are fatal.
|
||||
async fn serve(opts: M3Options) -> Result<()> {
|
||||
let identity = crate::gamestream::cert::ServerIdentity::load_or_create()
|
||||
.context("load host identity (~/.config/punktfunk)")?;
|
||||
let fingerprint = endpoint::fingerprint_of_pem(&identity.cert_pem)
|
||||
.map_err(|e| anyhow!("cert fingerprint: {e}"))?;
|
||||
let ep = endpoint::server_with_identity(
|
||||
([0, 0, 0, 0], opts.port).into(),
|
||||
&identity.cert_pem,
|
||||
&identity.key_pem,
|
||||
)
|
||||
.map_err(|e| anyhow!("QUIC server endpoint: {e}"))?;
|
||||
tracing::info!(
|
||||
port = opts.port,
|
||||
source = ?opts.source,
|
||||
fingerprint = %fingerprint_hex(&fingerprint),
|
||||
"punktfunk/1 host listening (QUIC) — clients pin this fingerprint"
|
||||
);
|
||||
|
||||
// One audio capturer for the whole host lifetime, handed from session to session
|
||||
// (PipeWire streams have no cheap teardown — see AudioCapSlot).
|
||||
let audio_cap: AudioCapSlot = Arc::new(std::sync::Mutex::new(None));
|
||||
|
||||
let mut served = 0u32;
|
||||
loop {
|
||||
let incoming = ep
|
||||
.accept()
|
||||
.await
|
||||
.ok_or_else(|| anyhow!("endpoint closed"))?;
|
||||
let conn = match incoming.await {
|
||||
Ok(c) => c,
|
||||
Err(e) => {
|
||||
tracing::warn!(error = %e, "QUIC accept failed");
|
||||
continue;
|
||||
}
|
||||
};
|
||||
let peer = conn.remote_address();
|
||||
tracing::info!(%peer, "punktfunk/1 client connected");
|
||||
if let Err(e) = serve_session(conn, &opts, &audio_cap).await {
|
||||
tracing::warn!(%peer, error = %format!("{e:#}"), "session ended with error");
|
||||
} else {
|
||||
tracing::info!(%peer, "session complete");
|
||||
}
|
||||
served += 1;
|
||||
if opts.max_sessions != 0 && served >= opts.max_sessions {
|
||||
break;
|
||||
}
|
||||
tracing::info!("ready for the next client");
|
||||
}
|
||||
ep.wait_idle().await;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// The accept loop is sequential, so the control phase must be bounded — a client that
|
||||
/// connects and never finishes the handshake would otherwise wedge the host for everyone.
|
||||
const HANDSHAKE_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10);
|
||||
|
||||
/// Persistent audio-capturer slot, reused across sessions (same pattern as the GameStream
|
||||
/// path): `PwAudioCapturer` has no teardown — dropping one per session would leak its
|
||||
/// PipeWire thread + core connection + live capture node on the daemon every session.
|
||||
type AudioCapSlot = Arc<std::sync::Mutex<Option<Box<dyn crate::audio::AudioCapturer>>>>;
|
||||
|
||||
/// One client session: handshake → input/audio planes → data plane until done/disconnect.
|
||||
/// Everything torn down on return (RAII: virtual output, encoder, threads via channel close).
|
||||
async fn serve_session(
|
||||
conn: quinn::Connection,
|
||||
opts: &M3Options,
|
||||
audio_cap: &AudioCapSlot,
|
||||
) -> Result<()> {
|
||||
let peer = conn.remote_address();
|
||||
|
||||
let source = opts.source;
|
||||
let frames = opts.frames;
|
||||
let handshake = async {
|
||||
let (mut send, mut recv) = conn.accept_bi().await.context("accept control stream")?;
|
||||
|
||||
let hello = Hello::decode(&io::read_msg(&mut recv).await?)
|
||||
.map_err(|e| anyhow!("Hello decode: {e:?}"))?;
|
||||
anyhow::ensure!(
|
||||
hello.abi_version == punktfunk_core::ABI_VERSION,
|
||||
"ABI mismatch: client {} host {}",
|
||||
hello.abi_version,
|
||||
punktfunk_core::ABI_VERSION
|
||||
);
|
||||
crate::encode::validate_dimensions(
|
||||
crate::encode::Codec::H265,
|
||||
hello.mode.width,
|
||||
hello.mode.height,
|
||||
)
|
||||
.context("client-requested mode")?;
|
||||
|
||||
// Reserve a UDP port for the data plane (bind, read it back, rebind in UdpTransport).
|
||||
let probe = std::net::UdpSocket::bind("0.0.0.0:0")?;
|
||||
let udp_port = probe.local_addr()?.port();
|
||||
drop(probe);
|
||||
|
||||
let mut key = [0u8; 16];
|
||||
rand::thread_rng().fill_bytes(&mut key);
|
||||
let welcome = Welcome {
|
||||
abi_version: punktfunk_core::ABI_VERSION,
|
||||
udp_port,
|
||||
mode: hello.mode,
|
||||
// The post-GameStream point of punktfunk/1: Leopard GF(2¹⁶) FEC + real encryption.
|
||||
fec: FecConfig {
|
||||
scheme: FecScheme::Gf16,
|
||||
fec_percent: 20,
|
||||
max_data_per_block: 4096,
|
||||
},
|
||||
shard_payload: 1200,
|
||||
encrypt: true,
|
||||
key,
|
||||
salt: *b"pkf1",
|
||||
frames: match source {
|
||||
M3Source::Synthetic => frames,
|
||||
M3Source::Virtual => 0, // unbounded — client streams until we close
|
||||
},
|
||||
};
|
||||
io::write_msg(&mut send, &welcome.encode()).await?;
|
||||
|
||||
let start = Start::decode(&io::read_msg(&mut recv).await?)
|
||||
.map_err(|e| anyhow!("Start decode: {e:?}"))?;
|
||||
Ok::<_, anyhow::Error>((hello, welcome, udp_port, start))
|
||||
};
|
||||
let (hello, welcome, udp_port, start) = tokio::time::timeout(HANDSHAKE_TIMEOUT, handshake)
|
||||
.await
|
||||
.map_err(|_| anyhow!("handshake timed out after {HANDSHAKE_TIMEOUT:?}"))??;
|
||||
let client_udp = std::net::SocketAddr::new(peer.ip(), start.client_udp_port);
|
||||
tracing::info!(%client_udp, udp_port, mode = ?hello.mode, "handshake complete — streaming");
|
||||
|
||||
// Input plane: QUIC datagrams → channel → a native injector thread (the injector owns
|
||||
// non-Send compositor state, so it lives on its own thread). The thread also owns the
|
||||
// session's virtual gamepads and sends force feedback back over `conn`. It exits when
|
||||
// the channel closes (datagram task ends on disconnect) — fresh state per session.
|
||||
let (input_tx, input_rx) = std::sync::mpsc::channel::<InputEvent>();
|
||||
let input_handle = {
|
||||
let conn = conn.clone();
|
||||
std::thread::Builder::new()
|
||||
.name("punktfunk-m3-input".into())
|
||||
.spawn(move || input_thread(input_rx, conn))
|
||||
.context("spawn input thread")?
|
||||
};
|
||||
let input_conn = conn.clone();
|
||||
tokio::spawn(async move {
|
||||
let mut count = 0u64;
|
||||
while let Ok(d) = input_conn.read_datagram().await {
|
||||
if let Some(ev) = InputEvent::decode(&d) {
|
||||
count += 1;
|
||||
if input_tx.send(ev).is_err() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
tracing::info!(count, "input datagram stream ended");
|
||||
});
|
||||
|
||||
// Stop signal: stream duration elapsed or the client went away.
|
||||
let stop = Arc::new(AtomicBool::new(false));
|
||||
{
|
||||
let stop = stop.clone();
|
||||
let conn = conn.clone();
|
||||
tokio::spawn(async move {
|
||||
conn.closed().await;
|
||||
stop.store(true, Ordering::SeqCst);
|
||||
});
|
||||
}
|
||||
|
||||
// Audio plane (virtual source only — synthetic runs are protocol tests): desktop Opus
|
||||
// → host→client QUIC datagrams, on its own native thread. Best-effort on every failure
|
||||
// (no PipeWire audio, spawn error): the session continues without audio — and a spawn
|
||||
// error must NOT early-return here, the threads above are already running.
|
||||
let audio_handle = if opts.source == M3Source::Virtual {
|
||||
let conn = conn.clone();
|
||||
let stop = stop.clone();
|
||||
let cap = audio_cap.clone();
|
||||
std::thread::Builder::new()
|
||||
.name("punktfunk-m3-audio".into())
|
||||
.spawn(move || audio_thread(conn, stop, cap))
|
||||
.map_err(|e| tracing::error!(error = %e, "audio thread spawn failed — session continues without audio"))
|
||||
.ok()
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// Data plane on a native thread (no async on the hot path — design invariant).
|
||||
let cfg = welcome.session_config(Role::Host);
|
||||
let source = opts.source;
|
||||
let (seconds, frames) = (opts.seconds, opts.frames);
|
||||
let mode = hello.mode;
|
||||
let stop_stream = stop.clone();
|
||||
let result: Result<()> = async {
|
||||
tokio::task::spawn_blocking(move || -> Result<()> {
|
||||
let transport =
|
||||
UdpTransport::connect(&format!("0.0.0.0:{udp_port}"), &client_udp.to_string())
|
||||
.context("bind data plane")?;
|
||||
let mut session = Session::new(cfg, Box::new(transport))
|
||||
.map_err(|e| anyhow!("host session: {e:?}"))?;
|
||||
match source {
|
||||
M3Source::Synthetic => synthetic_stream(&mut session, frames, &stop_stream),
|
||||
M3Source::Virtual => virtual_stream(&mut session, mode, seconds, &stop_stream),
|
||||
}
|
||||
})
|
||||
.await
|
||||
.context("stream thread")??;
|
||||
// Give the client a moment to drain before the close.
|
||||
tokio::time::sleep(std::time::Duration::from_secs(1)).await;
|
||||
Ok(())
|
||||
}
|
||||
.await;
|
||||
|
||||
// Teardown on EVERY path (a failed data plane must not leave the connection open with
|
||||
// audio still streaming): stop the audio thread, close, then join both side-plane
|
||||
// threads so the next session starts fresh (closing the connection ends the datagram
|
||||
// task, which drops the input channel, which exits the input thread + its gamepads).
|
||||
stop.store(true, Ordering::SeqCst);
|
||||
conn.close(
|
||||
if result.is_ok() { 0u32 } else { 1u32 }.into(),
|
||||
if result.is_ok() { b"done" } else { b"error" },
|
||||
);
|
||||
let _ = tokio::task::spawn_blocking(move || {
|
||||
if let Some(h) = audio_handle {
|
||||
let _ = h.join();
|
||||
}
|
||||
let _ = input_handle.join();
|
||||
})
|
||||
.await;
|
||||
result
|
||||
}
|
||||
|
||||
/// Per-pad accumulated state: punktfunk/1 gamepad events are incremental (one button or axis
|
||||
/// per datagram, see `punktfunk_core::input::gamepad`), the virtual xpad applies full frames.
|
||||
#[derive(Clone, Copy, Default)]
|
||||
struct PadState {
|
||||
buttons: u32,
|
||||
left_trigger: u8,
|
||||
right_trigger: u8,
|
||||
ls_x: i16,
|
||||
ls_y: i16,
|
||||
rs_x: i16,
|
||||
rs_y: i16,
|
||||
}
|
||||
|
||||
impl PadState {
|
||||
/// Fold one wire event into the state. `false` = unknown axis id (event dropped).
|
||||
fn apply(&mut self, ev: &InputEvent) -> bool {
|
||||
if ev.kind == InputKind::GamepadButton {
|
||||
if ev.x != 0 {
|
||||
self.buttons |= ev.code;
|
||||
} else {
|
||||
self.buttons &= !ev.code;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
use punktfunk_core::input::gamepad::*;
|
||||
let stick = ev.x.clamp(i16::MIN as i32, i16::MAX as i32) as i16;
|
||||
let trigger = ev.x.clamp(0, 255) as u8;
|
||||
match ev.code {
|
||||
AXIS_LS_X => self.ls_x = stick,
|
||||
AXIS_LS_Y => self.ls_y = stick,
|
||||
AXIS_RS_X => self.rs_x = stick,
|
||||
AXIS_RS_Y => self.rs_y = stick,
|
||||
AXIS_LT => self.left_trigger = trigger,
|
||||
AXIS_RT => self.right_trigger = trigger,
|
||||
_ => return false,
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
fn frame(&self, index: usize, active_mask: u16) -> crate::gamestream::gamepad::GamepadFrame {
|
||||
crate::gamestream::gamepad::GamepadFrame {
|
||||
index: index as i16,
|
||||
active_mask,
|
||||
buttons: self.buttons,
|
||||
left_trigger: self.left_trigger,
|
||||
right_trigger: self.right_trigger,
|
||||
ls_x: self.ls_x,
|
||||
ls_y: self.ls_y,
|
||||
rs_x: self.rs_x,
|
||||
rs_y: self.rs_y,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Highest pad index addressable on the wire (`flags` field); the uinput manager caps
|
||||
/// actual pad creation at its own MAX_PADS.
|
||||
const MAX_WIRE_PADS: usize = 16;
|
||||
|
||||
/// The injector thread: open the session's input backend on first event, then inject.
|
||||
/// Gamepad kinds route to the session's [`GamepadManager`](crate::inject::gamepad), with
|
||||
/// force feedback pumped between events and sent back as rumble datagrams.
|
||||
fn input_thread(rx: std::sync::mpsc::Receiver<InputEvent>, conn: quinn::Connection) {
|
||||
let mut injector: Option<Box<dyn crate::inject::InputInjector>> = None;
|
||||
let mut injector_broken = false;
|
||||
let mut pads = crate::inject::gamepad::GamepadManager::new();
|
||||
let mut pad_state = [PadState::default(); MAX_WIRE_PADS];
|
||||
let mut pad_mask = 0u16;
|
||||
// Rumble is idempotent state on a lossy channel (client-side overflow drops datagrams),
|
||||
// so re-send the current state of every rumbling-capable pad every 500 ms — a dropped
|
||||
// transition (including a stop) heals on the next refresh.
|
||||
let mut rumble_state = [(0u16, 0u16); MAX_WIRE_PADS];
|
||||
let mut rumble_seen = [false; MAX_WIRE_PADS];
|
||||
let mut last_refresh = std::time::Instant::now();
|
||||
loop {
|
||||
match rx.recv_timeout(std::time::Duration::from_millis(4)) {
|
||||
Ok(ev) => match ev.kind {
|
||||
InputKind::GamepadButton | InputKind::GamepadAxis => {
|
||||
let idx = ev.flags as usize;
|
||||
if idx >= MAX_WIRE_PADS || !pad_state[idx].apply(&ev) {
|
||||
continue;
|
||||
}
|
||||
pad_mask |= 1 << idx;
|
||||
let frame = pad_state[idx].frame(idx, pad_mask);
|
||||
pads.handle(&crate::gamestream::gamepad::GamepadEvent::State(frame));
|
||||
}
|
||||
_ => {
|
||||
if injector.is_none() && !injector_broken {
|
||||
let backend = crate::inject::default_backend();
|
||||
match crate::inject::open(backend) {
|
||||
Ok(i) => {
|
||||
tracing::info!(?backend, "punktfunk/1 input injector opened");
|
||||
injector = Some(i);
|
||||
}
|
||||
Err(e) => {
|
||||
// Keep running for gamepads — uinput pads work even when
|
||||
// the pointer/keyboard backend doesn't.
|
||||
tracing::error!(error = %format!("{e:#}"), "pointer/keyboard injection unavailable");
|
||||
injector_broken = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
if let Some(inj) = injector.as_mut() {
|
||||
if let Err(e) = inj.inject(&ev) {
|
||||
tracing::warn!(error = %format!("{e:#}"), "inject failed");
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
Err(std::sync::mpsc::RecvTimeoutError::Timeout) => {}
|
||||
Err(std::sync::mpsc::RecvTimeoutError::Disconnected) => break,
|
||||
}
|
||||
// Service force feedback every iteration (≤4 ms latency; games block on EVIOCSFF).
|
||||
pads.pump_rumble(|pad, low, high| {
|
||||
if let Some(s) = rumble_state.get_mut(pad as usize) {
|
||||
*s = (low, high);
|
||||
rumble_seen[pad as usize] = true;
|
||||
}
|
||||
let d = punktfunk_core::quic::encode_rumble_datagram(pad, low, high);
|
||||
let _ = conn.send_datagram(d.to_vec().into());
|
||||
});
|
||||
if last_refresh.elapsed() >= std::time::Duration::from_millis(500) {
|
||||
last_refresh = std::time::Instant::now();
|
||||
for (i, &(low, high)) in rumble_state.iter().enumerate() {
|
||||
if rumble_seen[i] {
|
||||
let d = punktfunk_core::quic::encode_rumble_datagram(i as u16, low, high);
|
||||
let _ = conn.send_datagram(d.to_vec().into());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The audio thread: desktop capture → Opus (48 kHz stereo, 5 ms, CBR — same tuning as the
|
||||
/// GameStream path) → `AUDIO_MAGIC` datagrams. QUIC already encrypts; no extra layer.
|
||||
/// The capturer comes from (and returns to) the persistent slot — see [`AudioCapSlot`].
|
||||
#[cfg(target_os = "linux")]
|
||||
fn audio_thread(conn: quinn::Connection, stop: Arc<AtomicBool>, audio_cap: AudioCapSlot) {
|
||||
use crate::audio::{CHANNELS, SAMPLE_RATE};
|
||||
const FRAME_MS: usize = 5;
|
||||
const SAMPLES_PER_FRAME: usize = SAMPLE_RATE as usize * FRAME_MS / 1000; // 240
|
||||
|
||||
let mut capturer = match audio_cap.lock().unwrap().take() {
|
||||
Some(mut c) => {
|
||||
c.drain(); // discard audio captured between sessions
|
||||
c
|
||||
}
|
||||
None => match crate::audio::open_audio_capture() {
|
||||
Ok(c) => c,
|
||||
Err(e) => {
|
||||
tracing::warn!(error = %format!("{e:#}"), "punktfunk/1 audio unavailable — session continues without it");
|
||||
return;
|
||||
}
|
||||
},
|
||||
};
|
||||
let mut enc = match opus::Encoder::new(
|
||||
SAMPLE_RATE,
|
||||
opus::Channels::Stereo,
|
||||
opus::Application::LowDelay,
|
||||
) {
|
||||
Ok(e) => e,
|
||||
Err(e) => {
|
||||
tracing::error!(error = %e, "opus encoder");
|
||||
*audio_cap.lock().unwrap() = Some(capturer);
|
||||
return;
|
||||
}
|
||||
};
|
||||
enc.set_bitrate(opus::Bitrate::Bits(128_000)).ok();
|
||||
enc.set_vbr(false).ok();
|
||||
|
||||
let frame_len = SAMPLES_PER_FRAME * CHANNELS;
|
||||
let mut acc: Vec<f32> = Vec::with_capacity(frame_len * 4);
|
||||
let mut opus_buf = vec![0u8; 1500];
|
||||
let mut seq: u32 = 0;
|
||||
let mut capture_dead = false;
|
||||
tracing::info!("punktfunk/1 audio streaming (Opus 48 kHz stereo, 5 ms datagrams)");
|
||||
'session: while !stop.load(Ordering::SeqCst) {
|
||||
let chunk = match capturer.next_chunk() {
|
||||
Ok(c) => c,
|
||||
Err(e) => {
|
||||
tracing::warn!(error = %format!("{e:#}"), "audio capture ended");
|
||||
capture_dead = true;
|
||||
break;
|
||||
}
|
||||
};
|
||||
acc.extend_from_slice(&chunk);
|
||||
while acc.len() >= frame_len {
|
||||
let frame: Vec<f32> = acc.drain(..frame_len).collect();
|
||||
let pts_ns = now_ns();
|
||||
match enc.encode_float(&frame, &mut opus_buf) {
|
||||
Ok(n) => {
|
||||
let d =
|
||||
punktfunk_core::quic::encode_audio_datagram(seq, pts_ns, &opus_buf[..n]);
|
||||
if conn.send_datagram(d.into()).is_err() {
|
||||
break 'session; // connection gone
|
||||
}
|
||||
seq = seq.wrapping_add(1);
|
||||
}
|
||||
Err(e) => tracing::warn!(error = %e, "opus encode"),
|
||||
}
|
||||
}
|
||||
}
|
||||
// Return the live capturer for the next session; a dead one is dropped so the next
|
||||
// session reopens fresh.
|
||||
if !capture_dead {
|
||||
*audio_cap.lock().unwrap() = Some(capturer);
|
||||
}
|
||||
}
|
||||
|
||||
/// Stub — punktfunk/1 audio needs Linux (PipeWire capture + libopus); non-Linux dev builds
|
||||
/// run sessions without it, same as when the capturer fails to open.
|
||||
#[cfg(not(target_os = "linux"))]
|
||||
fn audio_thread(_conn: quinn::Connection, _stop: Arc<AtomicBool>, _audio_cap: AudioCapSlot) {
|
||||
tracing::warn!(
|
||||
"punktfunk/1 audio requires Linux (PipeWire + libopus) — session continues without it"
|
||||
);
|
||||
}
|
||||
|
||||
fn synthetic_stream(session: &mut Session, frames: u32, stop: &AtomicBool) -> Result<()> {
|
||||
let interval = std::time::Duration::from_millis(1000 / 60);
|
||||
for idx in 0..frames {
|
||||
if stop.load(Ordering::SeqCst) {
|
||||
break;
|
||||
}
|
||||
let data = test_frame(idx, 64 * 1024);
|
||||
session
|
||||
.submit_frame(&data, now_ns(), (FLAG_PIC | FLAG_SOF) as u32)
|
||||
.map_err(|e| anyhow!("submit_frame: {e:?}"))?;
|
||||
std::thread::sleep(interval);
|
||||
}
|
||||
tracing::info!(frames, "synthetic stream complete");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Real capture→encode→punktfunk/1: a native virtual output at the client's mode, NVENC AUs
|
||||
/// stamped with the capture wall clock (the client derives per-frame pipeline latency).
|
||||
fn virtual_stream(
|
||||
session: &mut Session,
|
||||
mode: punktfunk_core::Mode,
|
||||
seconds: u32,
|
||||
stop: &AtomicBool,
|
||||
) -> Result<()> {
|
||||
let compositor = crate::vdisplay::detect().context("detect compositor")?;
|
||||
tracing::info!(?compositor, ?mode, "punktfunk/1 virtual display");
|
||||
let mut vd = crate::vdisplay::open(compositor)?;
|
||||
let vout = vd.create(mode).context("create virtual output")?;
|
||||
let mut capturer =
|
||||
crate::capture::capture_virtual_output(vout).context("capture virtual output")?;
|
||||
capturer.set_active(true);
|
||||
|
||||
let mut frame = capturer.next_frame().context("first frame")?;
|
||||
let mut enc = crate::encode::open_video(
|
||||
crate::encode::Codec::H265,
|
||||
frame.format,
|
||||
frame.width,
|
||||
frame.height,
|
||||
mode.refresh_hz,
|
||||
20_000_000,
|
||||
frame.is_cuda(),
|
||||
)
|
||||
.context("open NVENC")?;
|
||||
|
||||
let interval = std::time::Duration::from_secs_f64(1.0 / mode.refresh_hz.max(1) as f64);
|
||||
let deadline = std::time::Instant::now() + std::time::Duration::from_secs(seconds as u64);
|
||||
let mut next = std::time::Instant::now();
|
||||
let mut sent: u64 = 0;
|
||||
while !stop.load(Ordering::SeqCst) && std::time::Instant::now() < deadline {
|
||||
if let Some(f) = capturer.try_latest().context("capture")? {
|
||||
frame = f;
|
||||
}
|
||||
let capture_ns = now_ns();
|
||||
enc.submit(&frame).context("encoder submit")?;
|
||||
while let Some(au) = enc.poll().context("encoder poll")? {
|
||||
let flags = if au.keyframe {
|
||||
(FLAG_PIC | FLAG_SOF) as u32
|
||||
} else {
|
||||
FLAG_PIC as u32
|
||||
};
|
||||
session
|
||||
.submit_frame(&au.data, capture_ns, flags)
|
||||
.map_err(|e| anyhow!("submit_frame: {e:?}"))?;
|
||||
sent += 1;
|
||||
}
|
||||
next += interval;
|
||||
match next.checked_duration_since(std::time::Instant::now()) {
|
||||
Some(d) => std::thread::sleep(d),
|
||||
None => next = std::time::Instant::now(),
|
||||
}
|
||||
}
|
||||
tracing::info!(sent, "punktfunk/1 virtual stream complete");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn gp(kind: InputKind, code: u32, x: i32, pad: u32) -> InputEvent {
|
||||
InputEvent {
|
||||
kind,
|
||||
_pad: [0; 3],
|
||||
code,
|
||||
x,
|
||||
y: 0,
|
||||
flags: pad,
|
||||
}
|
||||
}
|
||||
|
||||
/// Incremental wire events accumulate into the full pad frame the virtual xpad applies.
|
||||
#[test]
|
||||
fn gamepad_accumulator() {
|
||||
use punktfunk_core::input::gamepad::*;
|
||||
let mut s = PadState::default();
|
||||
assert!(s.apply(&gp(InputKind::GamepadButton, BTN_A, 1, 0)));
|
||||
assert!(s.apply(&gp(InputKind::GamepadButton, BTN_LB, 1, 0)));
|
||||
assert!(s.apply(&gp(InputKind::GamepadAxis, AXIS_LS_X, -32768, 0)));
|
||||
assert!(s.apply(&gp(InputKind::GamepadAxis, AXIS_RT, 255, 0)));
|
||||
let f = s.frame(2, 0b0100);
|
||||
assert_eq!(f.buttons, BTN_A | BTN_LB);
|
||||
assert_eq!((f.ls_x, f.right_trigger), (-32768, 255));
|
||||
assert_eq!((f.index, f.active_mask), (2, 0b0100));
|
||||
|
||||
// Release folds out; axis values clamp; unknown axis ids are rejected.
|
||||
assert!(s.apply(&gp(InputKind::GamepadButton, BTN_A, 0, 0)));
|
||||
assert_eq!(s.frame(0, 1).buttons, BTN_LB);
|
||||
assert!(s.apply(&gp(InputKind::GamepadAxis, AXIS_LT, 9_999, 0)));
|
||||
assert_eq!(s.left_trigger, 255);
|
||||
assert!(!s.apply(&gp(InputKind::GamepadAxis, 42, 1, 0)));
|
||||
|
||||
// The punktfunk/1 button bits are the GameStream bits — one wire contract end to end.
|
||||
assert_eq!(BTN_A, crate::gamestream::gamepad::BTN_A);
|
||||
assert_eq!(BTN_GUIDE, crate::gamestream::gamepad::BTN_GUIDE);
|
||||
assert_eq!(BTN_DPAD_UP, crate::gamestream::gamepad::BTN_DPAD_UP);
|
||||
}
|
||||
|
||||
/// Pull and byte-verify `count` synthetic frames through the C ABI connection.
|
||||
unsafe fn pull_verified(conn: *mut punktfunk_core::abi::PunktfunkConnection, count: u32) {
|
||||
use punktfunk_core::error::PunktfunkStatus;
|
||||
let mut got = 0u32;
|
||||
let mut frame = unsafe { std::mem::zeroed() };
|
||||
while got < count {
|
||||
match unsafe {
|
||||
punktfunk_core::abi::punktfunk_connection_next_au(conn, &mut frame, 2000)
|
||||
} {
|
||||
PunktfunkStatus::Ok => {
|
||||
let data = unsafe { std::slice::from_raw_parts(frame.data, frame.len) };
|
||||
let idx = u32::from_le_bytes(data[0..4].try_into().unwrap());
|
||||
assert_eq!(
|
||||
data,
|
||||
&test_frame(idx, data.len())[..],
|
||||
"frame {idx} content"
|
||||
);
|
||||
got += 1;
|
||||
}
|
||||
PunktfunkStatus::NoFrame => continue,
|
||||
other => panic!("next_au: {other:?}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// End-to-end through the C ABI — the exact contract platform clients (Swift) link:
|
||||
/// in-process punktfunk/1 host, `punktfunk_connect` (TOFU → pinned reconnect) →
|
||||
/// `punktfunk_connection_next_au` pulls verified frames → `punktfunk_connection_send_input`
|
||||
/// enqueues → `punktfunk_connection_close`. Three sequential sessions against ONE host
|
||||
/// process prove the persistent listener, and a wrong pin is rejected.
|
||||
#[test]
|
||||
fn c_abi_connection_roundtrip() {
|
||||
use punktfunk_core::abi::{
|
||||
punktfunk_connect, punktfunk_connection_close, punktfunk_connection_mode,
|
||||
punktfunk_connection_send_input,
|
||||
};
|
||||
use punktfunk_core::error::PunktfunkStatus;
|
||||
|
||||
let host = std::thread::spawn(|| {
|
||||
run(M3Options {
|
||||
port: 19777,
|
||||
source: M3Source::Synthetic,
|
||||
seconds: 0,
|
||||
frames: 25,
|
||||
max_sessions: 3,
|
||||
})
|
||||
});
|
||||
std::thread::sleep(std::time::Duration::from_millis(500));
|
||||
|
||||
// Session 1: TOFU (no pin) — observe the host fingerprint.
|
||||
let addr = std::ffi::CString::new("127.0.0.1").unwrap();
|
||||
let mut observed = [0u8; 32];
|
||||
let conn = unsafe {
|
||||
punktfunk_connect(
|
||||
addr.as_ptr(),
|
||||
19777,
|
||||
1280,
|
||||
720,
|
||||
60,
|
||||
std::ptr::null(),
|
||||
observed.as_mut_ptr(),
|
||||
10_000,
|
||||
)
|
||||
};
|
||||
assert!(!conn.is_null(), "punktfunk_connect failed");
|
||||
assert_ne!(observed, [0u8; 32], "fingerprint not reported");
|
||||
|
||||
let (mut w, mut h, mut hz) = (0u32, 0u32, 0u32);
|
||||
assert_eq!(
|
||||
unsafe { punktfunk_connection_mode(conn, &mut w, &mut h, &mut hz) },
|
||||
PunktfunkStatus::Ok
|
||||
);
|
||||
assert_eq!((w, h, hz), (1280, 720, 60));
|
||||
|
||||
unsafe { pull_verified(conn, 25) };
|
||||
|
||||
let ev = punktfunk_core::input::InputEvent {
|
||||
kind: punktfunk_core::input::InputKind::MouseMove,
|
||||
_pad: [0; 3],
|
||||
code: 0,
|
||||
x: 1,
|
||||
y: 2,
|
||||
flags: 0,
|
||||
};
|
||||
assert_eq!(
|
||||
unsafe { punktfunk_connection_send_input(conn, &ev) },
|
||||
PunktfunkStatus::Ok
|
||||
);
|
||||
unsafe { punktfunk_connection_close(conn) };
|
||||
|
||||
// Session 2 (same host process — the listener survived): pin the fingerprint.
|
||||
let conn2 = unsafe {
|
||||
punktfunk_connect(
|
||||
addr.as_ptr(),
|
||||
19777,
|
||||
1280,
|
||||
720,
|
||||
60,
|
||||
observed.as_ptr(),
|
||||
std::ptr::null_mut(),
|
||||
10_000,
|
||||
)
|
||||
};
|
||||
assert!(!conn2.is_null(), "pinned reconnect failed");
|
||||
unsafe { pull_verified(conn2, 25) };
|
||||
unsafe { punktfunk_connection_close(conn2) };
|
||||
|
||||
// Session 3: a wrong pin must be rejected by the handshake.
|
||||
let bad = [0xAAu8; 32];
|
||||
let conn3 = unsafe {
|
||||
punktfunk_connect(
|
||||
addr.as_ptr(),
|
||||
19777,
|
||||
1280,
|
||||
720,
|
||||
60,
|
||||
bad.as_ptr(),
|
||||
std::ptr::null_mut(),
|
||||
10_000,
|
||||
)
|
||||
};
|
||||
assert!(conn3.is_null(), "wrong pin must fail the handshake");
|
||||
|
||||
// The host saw the rejected handshake attempt as session 3? No — a TLS-failed
|
||||
// handshake never yields a connection, so accept() is still waiting. Connect once
|
||||
// more (TOFU) to complete the host's third session and let it exit.
|
||||
let conn4 = unsafe {
|
||||
punktfunk_connect(
|
||||
addr.as_ptr(),
|
||||
19777,
|
||||
1280,
|
||||
720,
|
||||
60,
|
||||
std::ptr::null(),
|
||||
std::ptr::null_mut(),
|
||||
10_000,
|
||||
)
|
||||
};
|
||||
assert!(!conn4.is_null());
|
||||
unsafe { pull_verified(conn4, 25) };
|
||||
unsafe { punktfunk_connection_close(conn4) };
|
||||
|
||||
host.join().unwrap().unwrap();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,340 @@
|
||||
//! `punktfunk-host` — the Linux streaming host (plan §2, §6, §7).
|
||||
//!
|
||||
//! Creates a client-sized virtual display, captures it via PipeWire, encodes with
|
||||
//! VAAPI/NVENC, and hands encoded access units to `punktfunk_core` for FEC + packetization +
|
||||
//! pacing + send. Input flows back via libei/uinput. The platform backends are
|
||||
//! `#[cfg(target_os = "linux")]`; the crate compiles everywhere so the workspace builds
|
||||
//! on non-Linux dev machines — it just can't run the pipeline there.
|
||||
//!
|
||||
//! Status: M0. The `m0` subcommand runs the capture→encode→file pipeline spike and feeds
|
||||
//! the encoded AUs through a `punktfunk_core` loopback. M2 wires the full P1 host that a stock
|
||||
//! Moonlight client connects to.
|
||||
|
||||
// Scaffold: trait methods and config paths are defined ahead of their backends.
|
||||
#![allow(dead_code)]
|
||||
|
||||
mod audio;
|
||||
mod capture;
|
||||
mod encode;
|
||||
mod gamestream;
|
||||
mod inject;
|
||||
mod m0;
|
||||
mod m3;
|
||||
mod mgmt;
|
||||
mod pipeline;
|
||||
mod pwinit;
|
||||
mod vdisplay;
|
||||
#[cfg(target_os = "linux")]
|
||||
mod zerocopy;
|
||||
|
||||
use anyhow::{bail, Result};
|
||||
use encode::Codec;
|
||||
use m0::{Options, Source};
|
||||
use std::path::PathBuf;
|
||||
|
||||
fn main() {
|
||||
// Logs go to stderr so stdout stays machine-readable (`punktfunk-host openapi > spec.json`).
|
||||
tracing_subscriber::fmt()
|
||||
.with_env_filter(
|
||||
tracing_subscriber::EnvFilter::try_from_default_env().unwrap_or_else(|_| "info".into()),
|
||||
)
|
||||
.with_writer(std::io::stderr)
|
||||
.init();
|
||||
|
||||
if let Err(e) = real_main() {
|
||||
tracing::error!("{e:#}");
|
||||
std::process::exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
fn real_main() -> Result<()> {
|
||||
tracing::info!(
|
||||
"punktfunk-host (punktfunk_core ABI v{})",
|
||||
punktfunk_core::ABI_VERSION
|
||||
);
|
||||
|
||||
let args: Vec<String> = std::env::args().skip(1).collect();
|
||||
match args.first().map(String::as_str) {
|
||||
// M2 GameStream host control plane (P1.1: mDNS + serverinfo) + management API.
|
||||
Some("serve") => gamestream::serve(parse_serve(&args[1..])?),
|
||||
// Print the management API's OpenAPI document (for client codegen).
|
||||
Some("openapi") => {
|
||||
print!("{}", mgmt::openapi_json());
|
||||
Ok(())
|
||||
}
|
||||
// Standalone input-injection smoke test (no client needed): open the session's input
|
||||
// backend and inject a scripted mouse/keyboard pattern. Watch a focused app / `wev`.
|
||||
Some("input-test") => input_test(),
|
||||
// Zero-copy FFI/GPU probe: init the EGL importer + CUDA context (no capture needed).
|
||||
#[cfg(target_os = "linux")]
|
||||
Some("zerocopy-probe") => zerocopy::probe(),
|
||||
// M0 pipeline spike.
|
||||
Some("m0") => m0::run(parse_m0(&args[1..])?),
|
||||
// M3: native punktfunk/1 host (QUIC control plane + UDP data plane).
|
||||
Some("m3-host") => {
|
||||
let get = |flag: &str| {
|
||||
args.iter()
|
||||
.skip_while(|a| *a != flag)
|
||||
.nth(1)
|
||||
.map(String::as_str)
|
||||
};
|
||||
let source = match get("--source") {
|
||||
Some("virtual") => m3::M3Source::Virtual,
|
||||
_ => m3::M3Source::Synthetic,
|
||||
};
|
||||
m3::run(m3::M3Options {
|
||||
port: get("--port").and_then(|s| s.parse().ok()).unwrap_or(9777),
|
||||
source,
|
||||
seconds: get("--seconds").and_then(|s| s.parse().ok()).unwrap_or(30),
|
||||
frames: get("--frames").and_then(|s| s.parse().ok()).unwrap_or(300),
|
||||
max_sessions: get("--max-sessions")
|
||||
.and_then(|s| s.parse().ok())
|
||||
.unwrap_or(0),
|
||||
})
|
||||
}
|
||||
Some("-h") | Some("--help") | Some("help") | None => {
|
||||
print_usage();
|
||||
Ok(())
|
||||
}
|
||||
// Bare flags (no subcommand) default to the m0 spike for back-compat.
|
||||
Some(_) => m0::run(parse_m0(&args)?),
|
||||
}
|
||||
}
|
||||
|
||||
/// Inject a scripted mouse + keyboard pattern through the session's input backend (libei on
|
||||
/// KWin/GNOME, wlr on Sway). Lets us validate input injection without a Moonlight client.
|
||||
#[cfg(target_os = "linux")]
|
||||
fn input_test() -> Result<()> {
|
||||
use punktfunk_core::input::{InputEvent, InputKind};
|
||||
use std::time::Duration;
|
||||
|
||||
let backend = inject::default_backend();
|
||||
tracing::info!(?backend, "input-test: opening injector");
|
||||
let mut inj = inject::open(backend)?;
|
||||
// An async backend (libei) needs a moment to establish its portal/EIS session + device
|
||||
// resume; events injected before then are dropped.
|
||||
std::thread::sleep(Duration::from_secs(4));
|
||||
|
||||
let ev = |kind, code, x, y| InputEvent {
|
||||
kind,
|
||||
_pad: [0; 3],
|
||||
code,
|
||||
x,
|
||||
y,
|
||||
flags: 0,
|
||||
};
|
||||
tracing::info!(
|
||||
"input-test: injecting a mouse square + 'A'/click taps for ~8s (watch wev / focused app)"
|
||||
);
|
||||
for i in 0..160u32 {
|
||||
let (dx, dy) = match (i / 10) % 4 {
|
||||
0 => (12, 0),
|
||||
1 => (0, 12),
|
||||
2 => (-12, 0),
|
||||
_ => (0, -12),
|
||||
};
|
||||
if let Err(e) = inj.inject(&ev(InputKind::MouseMove, 0, dx, dy)) {
|
||||
tracing::warn!(error = %format!("{e:#}"), "input-test: inject failed");
|
||||
}
|
||||
if i % 20 == 0 {
|
||||
let _ = inj.inject(&ev(InputKind::KeyDown, 0x41, 0, 0)); // 'A'
|
||||
let _ = inj.inject(&ev(InputKind::KeyUp, 0x41, 0, 0));
|
||||
let _ = inj.inject(&ev(InputKind::MouseButtonDown, 1, 0, 0)); // left click
|
||||
let _ = inj.inject(&ev(InputKind::MouseButtonUp, 1, 0, 0));
|
||||
}
|
||||
std::thread::sleep(Duration::from_millis(50));
|
||||
}
|
||||
tracing::info!("input-test: done");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(not(target_os = "linux"))]
|
||||
fn input_test() -> Result<()> {
|
||||
bail!("input-test requires Linux")
|
||||
}
|
||||
|
||||
/// `serve` options — all about the management API; the GameStream ports are protocol-fixed.
|
||||
fn parse_serve(args: &[String]) -> Result<mgmt::Options> {
|
||||
let mut opts = mgmt::Options::default();
|
||||
let mut i = 0;
|
||||
while i < args.len() {
|
||||
let arg = args[i].as_str();
|
||||
let mut next = || {
|
||||
i += 1;
|
||||
args.get(i)
|
||||
.cloned()
|
||||
.ok_or_else(|| anyhow::anyhow!("missing value for {arg}"))
|
||||
};
|
||||
match arg {
|
||||
"--mgmt-bind" => {
|
||||
opts.bind = next()?
|
||||
.parse()
|
||||
.map_err(|_| anyhow::anyhow!("bad --mgmt-bind (want IP:PORT)"))?
|
||||
}
|
||||
"--mgmt-token" => {
|
||||
let token = next()?;
|
||||
// An empty token would satisfy the non-loopback "token required" guard
|
||||
// while authenticating nobody (or, worse, everybody) — refuse it loudly
|
||||
// rather than letting `--mgmt-token "$UNSET_VAR"` ship a dead credential.
|
||||
if token.trim().is_empty() {
|
||||
bail!("--mgmt-token must not be empty");
|
||||
}
|
||||
opts.token = Some(token);
|
||||
}
|
||||
"-h" | "--help" => {
|
||||
print_usage();
|
||||
std::process::exit(0);
|
||||
}
|
||||
other => bail!("unknown argument '{other}' (try --help)"),
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
// Flag wins over the environment so a unit file can set a default and a shell override it.
|
||||
if opts.token.is_none() {
|
||||
opts.token = std::env::var("PUNKTFUNK_MGMT_TOKEN")
|
||||
.ok()
|
||||
.filter(|t| !t.is_empty());
|
||||
}
|
||||
Ok(opts)
|
||||
}
|
||||
|
||||
fn parse_m0(args: &[String]) -> Result<Options> {
|
||||
let mut source = Source::Portal;
|
||||
let mut width = 1920u32;
|
||||
let mut height = 1080u32;
|
||||
let mut fps = 60u32;
|
||||
let mut seconds = 5u32;
|
||||
let mut codec = Codec::H265;
|
||||
let mut bitrate_mbps = 20u64;
|
||||
let mut out: Option<PathBuf> = None;
|
||||
let mut loopback = true;
|
||||
|
||||
let mut i = 0;
|
||||
while i < args.len() {
|
||||
let arg = args[i].as_str();
|
||||
let mut next = || {
|
||||
i += 1;
|
||||
args.get(i)
|
||||
.cloned()
|
||||
.ok_or_else(|| anyhow::anyhow!("missing value for {arg}"))
|
||||
};
|
||||
match arg {
|
||||
"--source" => {
|
||||
source = match next()?.as_str() {
|
||||
"synthetic" => Source::Synthetic,
|
||||
"portal" => Source::Portal,
|
||||
"kwin-virtual" => Source::KwinVirtual,
|
||||
other => {
|
||||
bail!("unknown --source '{other}' (synthetic|portal|kwin-virtual)")
|
||||
}
|
||||
}
|
||||
}
|
||||
"--width" => {
|
||||
width = next()?
|
||||
.parse()
|
||||
.map_err(|_| anyhow::anyhow!("bad --width"))?
|
||||
}
|
||||
"--height" => {
|
||||
height = next()?
|
||||
.parse()
|
||||
.map_err(|_| anyhow::anyhow!("bad --height"))?
|
||||
}
|
||||
"--fps" => fps = next()?.parse().map_err(|_| anyhow::anyhow!("bad --fps"))?,
|
||||
"--seconds" => {
|
||||
seconds = next()?
|
||||
.parse()
|
||||
.map_err(|_| anyhow::anyhow!("bad --seconds"))?
|
||||
}
|
||||
"--codec" => {
|
||||
codec = match next()?.as_str() {
|
||||
"h264" => Codec::H264,
|
||||
"h265" | "hevc" => Codec::H265,
|
||||
"av1" => Codec::Av1,
|
||||
other => bail!("unknown --codec '{other}' (h264|h265|av1)"),
|
||||
}
|
||||
}
|
||||
"--bitrate" => {
|
||||
bitrate_mbps = next()?
|
||||
.parse()
|
||||
.map_err(|_| anyhow::anyhow!("bad --bitrate (Mbps)"))?
|
||||
}
|
||||
"--out" => out = Some(PathBuf::from(next()?)),
|
||||
"--no-loopback" => loopback = false,
|
||||
"-h" | "--help" => {
|
||||
print_usage();
|
||||
std::process::exit(0);
|
||||
}
|
||||
other => bail!("unknown argument '{other}' (try --help)"),
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
|
||||
if fps == 0 || width == 0 || height == 0 || seconds == 0 {
|
||||
bail!("--fps/--width/--height/--seconds must be > 0");
|
||||
}
|
||||
|
||||
let out = out.unwrap_or_else(|| {
|
||||
let ext = match codec {
|
||||
Codec::H264 => "h264",
|
||||
Codec::H265 => "h265",
|
||||
Codec::Av1 => "obu",
|
||||
};
|
||||
PathBuf::from(format!("/tmp/punktfunk-m0.{ext}"))
|
||||
});
|
||||
|
||||
Ok(Options {
|
||||
source,
|
||||
width,
|
||||
height,
|
||||
fps,
|
||||
seconds,
|
||||
codec,
|
||||
bitrate_bps: bitrate_mbps.saturating_mul(1_000_000),
|
||||
out,
|
||||
loopback,
|
||||
})
|
||||
}
|
||||
|
||||
fn print_usage() {
|
||||
eprintln!(
|
||||
"punktfunk-host — Linux streaming host
|
||||
|
||||
USAGE:
|
||||
punktfunk-host serve [OPTIONS] GameStream host control plane (M2: mDNS + serverinfo …)
|
||||
+ the management REST API
|
||||
punktfunk-host openapi print the management API's OpenAPI document (codegen)
|
||||
punktfunk-host m3-host [OPTIONS] native punktfunk/1 host (QUIC control plane + UDP data plane)
|
||||
punktfunk-host m0 [OPTIONS] M0 capture→encode→file pipeline spike
|
||||
|
||||
SERVE OPTIONS:
|
||||
--mgmt-bind <IP:PORT> management API address (default: 127.0.0.1:47990)
|
||||
--mgmt-token <TOKEN> bearer token for the management API (or PUNKTFUNK_MGMT_TOKEN);
|
||||
required when --mgmt-bind is not loopback
|
||||
|
||||
M3-HOST OPTIONS:
|
||||
--port <N> QUIC listen port (default: 9777)
|
||||
--source <synthetic|virtual> test frames, or virtual display + NVENC (default: synthetic)
|
||||
--seconds <N> per-session stream duration, virtual source (default: 30)
|
||||
--frames <N> per-session frame count, synthetic source (default: 300)
|
||||
--max-sessions <N> exit after N sessions; 0 = serve forever (default: 0)
|
||||
|
||||
M0 OPTIONS:
|
||||
--source <synthetic|portal|kwin-virtual>
|
||||
frame source (default: portal). 'kwin-virtual' creates a
|
||||
KWin virtual output at --width x --height and captures it
|
||||
--seconds <N> capture duration in seconds (default: 5)
|
||||
--fps <N> target frame rate (default: 60)
|
||||
--codec <h264|h265|av1> NVENC codec (default: h265)
|
||||
--bitrate <MBPS> target bitrate in Mbps (default: 20)
|
||||
--width <W> --height <H> synthetic source size (default: 1920x1080)
|
||||
--out <PATH> raw Annex-B output (default: /tmp/punktfunk-m0.<ext>)
|
||||
--no-loopback skip the punktfunk_core round-trip verification
|
||||
-h, --help this help
|
||||
|
||||
NOTES:
|
||||
'portal' needs headless Sway + xdg-desktop-portal-wlr running in this session
|
||||
(see docs/linux-setup.md). 'synthetic' needs no capture session and always runs.
|
||||
Encoded AUs are written to a playable file AND (unless --no-loopback) fed through a
|
||||
punktfunk_core host→client loopback that reassembles and byte-verifies each one."
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,979 @@
|
||||
//! Management REST API (plan §4) — the control-plane surface a control pane / CLI talks
|
||||
//! to: host identity + capabilities, runtime status, paired-client management, the pairing
|
||||
//! PIN flow, and session control. Control plane only — `tokio`/`axum` are permitted here;
|
||||
//! the per-frame pipeline never touches this module.
|
||||
//!
|
||||
//! The API is versioned under `/api/v1` and described by an OpenAPI 3.1 document generated
|
||||
//! at compile time with `utoipa` — `punktfunk-host openapi` prints it for client codegen, the
|
||||
//! running server serves it at `/api/v1/openapi.json` plus interactive docs at `/api/docs`,
|
||||
//! and a copy is checked in at `docs/api/openapi.json` (a test fails if it drifts, like the
|
||||
//! cbindgen header).
|
||||
//!
|
||||
//! Security: binds loopback by default. A bearer token (`--mgmt-token` / `PUNKTFUNK_MGMT_TOKEN`)
|
||||
//! is enforced on every `/api/v1` route except `/api/v1/health`, and is mandatory for
|
||||
//! non-loopback binds. The OpenAPI document and docs UI are served unauthenticated (the
|
||||
//! spec is public knowledge — it lives in this repo).
|
||||
|
||||
use crate::encode::Codec;
|
||||
use crate::gamestream::{
|
||||
AppState, APP_VERSION, AUDIO_PORT, CONTROL_PORT, GFE_VERSION, RTSP_PORT, VIDEO_PORT,
|
||||
};
|
||||
use anyhow::{bail, Context, Result};
|
||||
use axum::{
|
||||
extract::{Path, Request, State},
|
||||
http::{header, StatusCode},
|
||||
middleware::{self, Next},
|
||||
response::{IntoResponse, Response},
|
||||
routing::get,
|
||||
Json, Router,
|
||||
};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use sha2::{Digest, Sha256};
|
||||
use std::net::SocketAddr;
|
||||
use std::sync::atomic::Ordering;
|
||||
use std::sync::Arc;
|
||||
use utoipa::{Modify, OpenApi, ToSchema};
|
||||
use utoipa_axum::{router::OpenApiRouter, routes};
|
||||
use utoipa_scalar::{Scalar, Servable};
|
||||
|
||||
/// Default management port — adjacent to the GameStream block (47984…48010), and the same
|
||||
/// number Sunshine users already associate with "the config UI".
|
||||
pub const DEFAULT_PORT: u16 = 47990;
|
||||
|
||||
/// Management server options (CLI: `serve --mgmt-bind ADDR --mgmt-token TOKEN`).
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Options {
|
||||
pub bind: SocketAddr,
|
||||
/// Bearer token required on `/api/v1` (except `/health`). `None` ⇒ unauthenticated,
|
||||
/// which [`run`] only permits on loopback binds.
|
||||
pub token: Option<String>,
|
||||
}
|
||||
|
||||
impl Default for Options {
|
||||
fn default() -> Self {
|
||||
Options {
|
||||
bind: SocketAddr::from(([127, 0, 0, 1], DEFAULT_PORT)),
|
||||
token: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Axum state for the management routes: the shared control-plane state + auth config.
|
||||
struct MgmtState {
|
||||
app: Arc<AppState>,
|
||||
token: Option<String>,
|
||||
/// The port we serve on, echoed in [`PortMap`] so a client can persist a full endpoint map.
|
||||
port: u16,
|
||||
}
|
||||
|
||||
/// Run the management API server (control plane; spawned alongside the nvhttp servers).
|
||||
pub async fn run(state: Arc<AppState>, opts: Options) -> Result<()> {
|
||||
// A blank token is no token: it must neither satisfy the non-loopback guard below nor
|
||||
// become a credential an empty `Authorization: Bearer ` header would match.
|
||||
let token = opts.token.filter(|t| !t.trim().is_empty());
|
||||
if token.is_none() && !opts.bind.ip().is_loopback() {
|
||||
bail!(
|
||||
"management API bind {} is not loopback — set --mgmt-token (or PUNKTFUNK_MGMT_TOKEN) \
|
||||
to expose it beyond this machine",
|
||||
opts.bind
|
||||
);
|
||||
}
|
||||
tracing::info!(
|
||||
addr = %opts.bind,
|
||||
auth = if token.is_some() { "bearer" } else { "none (loopback)" },
|
||||
"management API listening (docs at /api/docs, spec at /api/v1/openapi.json)"
|
||||
);
|
||||
let app = app(state, token, opts.bind.port());
|
||||
axum_server::bind(opts.bind)
|
||||
.serve(app.into_make_service())
|
||||
.await
|
||||
.context("management API server")
|
||||
}
|
||||
|
||||
/// Compose the full management router (also used directly by the handler tests).
|
||||
fn app(state: Arc<AppState>, token: Option<String>, port: u16) -> Router {
|
||||
let shared = Arc::new(MgmtState {
|
||||
app: state,
|
||||
token,
|
||||
port,
|
||||
});
|
||||
let (api_routes, api) = api_router_parts();
|
||||
api_routes
|
||||
.route_layer(middleware::from_fn_with_state(shared.clone(), require_auth))
|
||||
.with_state(shared)
|
||||
.merge(Scalar::with_url("/api/docs", api.clone()))
|
||||
.route(
|
||||
"/api/v1/openapi.json",
|
||||
get(move || {
|
||||
let spec = api.clone();
|
||||
async move { Json(spec) }
|
||||
}),
|
||||
)
|
||||
}
|
||||
|
||||
/// The versioned API routes + the OpenAPI document collected from them. Single source of
|
||||
/// truth for both the live server and the `openapi` subcommand.
|
||||
fn api_router_parts() -> (Router<Arc<MgmtState>>, utoipa::openapi::OpenApi) {
|
||||
OpenApiRouter::with_openapi(ApiDoc::openapi())
|
||||
.nest(
|
||||
"/api/v1",
|
||||
OpenApiRouter::new()
|
||||
.routes(routes!(get_health))
|
||||
.routes(routes!(get_host_info))
|
||||
.routes(routes!(get_status))
|
||||
.routes(routes!(list_paired_clients))
|
||||
.routes(routes!(unpair_client))
|
||||
.routes(routes!(get_pairing_status))
|
||||
.routes(routes!(submit_pairing_pin))
|
||||
.routes(routes!(stop_session))
|
||||
.routes(routes!(request_idr)),
|
||||
)
|
||||
.split_for_parts()
|
||||
}
|
||||
|
||||
/// The OpenAPI document as pretty JSON — what `punktfunk-host openapi` prints and what is
|
||||
/// checked in at `docs/api/openapi.json` for client codegen.
|
||||
pub fn openapi_json() -> String {
|
||||
let (_, api) = api_router_parts();
|
||||
let mut json = api.to_pretty_json().expect("serialize OpenAPI document");
|
||||
json.push('\n');
|
||||
json
|
||||
}
|
||||
|
||||
#[derive(OpenApi)]
|
||||
#[openapi(
|
||||
info(
|
||||
title = "punktfunk management API",
|
||||
description = "Control-plane API for managing a punktfunk streaming host: host \
|
||||
capabilities, runtime status, paired clients, the pairing PIN flow, \
|
||||
and session control. Authentication: HTTP bearer token, enforced on \
|
||||
every route except `/api/v1/health` when the host is started with a \
|
||||
management token (mandatory for non-loopback binds)."
|
||||
),
|
||||
modifiers(&SecurityAddon),
|
||||
tags(
|
||||
(name = "host", description = "Host identity, capabilities, and liveness"),
|
||||
(name = "clients", description = "Paired Moonlight client management"),
|
||||
(name = "pairing", description = "Pairing PIN delivery (the out-of-band half of the GameStream pairing handshake)"),
|
||||
(name = "session", description = "Active streaming session control"),
|
||||
)
|
||||
)]
|
||||
struct ApiDoc;
|
||||
|
||||
/// Registers the `bearerAuth` scheme and applies it globally (utoipa has no first-class
|
||||
/// "all operations" shorthand, hence a modifier).
|
||||
struct SecurityAddon;
|
||||
|
||||
impl Modify for SecurityAddon {
|
||||
fn modify(&self, openapi: &mut utoipa::openapi::OpenApi) {
|
||||
use utoipa::openapi::security::{Http, HttpAuthScheme, SecurityScheme};
|
||||
openapi
|
||||
.components
|
||||
.get_or_insert_with(Default::default)
|
||||
.add_security_scheme(
|
||||
"bearerAuth",
|
||||
SecurityScheme::Http(Http::new(HttpAuthScheme::Bearer)),
|
||||
);
|
||||
openapi.security = Some(vec![utoipa::openapi::security::SecurityRequirement::new(
|
||||
"bearerAuth",
|
||||
Vec::<String>::new(),
|
||||
)]);
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------------------
|
||||
// Schemas
|
||||
// ---------------------------------------------------------------------------------------
|
||||
|
||||
/// Liveness + version probe.
|
||||
#[derive(Serialize, ToSchema)]
|
||||
struct Health {
|
||||
/// Always `"ok"` when the host responds.
|
||||
#[schema(example = "ok")]
|
||||
status: String,
|
||||
/// `punktfunk-host` crate version.
|
||||
version: String,
|
||||
/// `punktfunk-core` C ABI version.
|
||||
abi_version: u32,
|
||||
}
|
||||
|
||||
/// Host identity and advertised capabilities (static for the life of the process).
|
||||
#[derive(Serialize, ToSchema)]
|
||||
struct HostInfo {
|
||||
hostname: String,
|
||||
/// Stable per-host id (persisted across restarts), matched on pairing.
|
||||
uniqueid: String,
|
||||
/// Best-effort primary LAN IP.
|
||||
local_ip: String,
|
||||
/// `punktfunk-host` crate version.
|
||||
version: String,
|
||||
/// `punktfunk-core` C ABI version.
|
||||
abi_version: u32,
|
||||
/// GameStream host version advertised to Moonlight clients.
|
||||
app_version: String,
|
||||
/// GFE version advertised to Moonlight clients.
|
||||
gfe_version: String,
|
||||
/// Codecs the host can encode (NVENC).
|
||||
codecs: Vec<ApiCodec>,
|
||||
ports: PortMap,
|
||||
}
|
||||
|
||||
/// Every port a client integration may need (Moonlight derives the stream ports from the
|
||||
/// HTTP base; a control pane should not have to).
|
||||
#[derive(Serialize, ToSchema)]
|
||||
struct PortMap {
|
||||
/// This management API.
|
||||
mgmt: u16,
|
||||
/// nvhttp plain HTTP (serverinfo, pairing).
|
||||
http: u16,
|
||||
/// nvhttp mutual-TLS HTTPS (post-pairing).
|
||||
https: u16,
|
||||
rtsp: u16,
|
||||
video: u16,
|
||||
control: u16,
|
||||
audio: u16,
|
||||
}
|
||||
|
||||
/// Video codec identifier.
|
||||
#[derive(Clone, Copy, Serialize, Deserialize, ToSchema, PartialEq, Eq, Debug)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
enum ApiCodec {
|
||||
H264,
|
||||
H265,
|
||||
Av1,
|
||||
}
|
||||
|
||||
impl From<Codec> for ApiCodec {
|
||||
fn from(c: Codec) -> Self {
|
||||
match c {
|
||||
Codec::H264 => ApiCodec::H264,
|
||||
Codec::H265 => ApiCodec::H265,
|
||||
Codec::Av1 => ApiCodec::Av1,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Live host status (changes as clients launch/end sessions).
|
||||
#[derive(Serialize, ToSchema)]
|
||||
struct RuntimeStatus {
|
||||
/// True while the video stream thread is running.
|
||||
video_streaming: bool,
|
||||
/// True while the audio stream thread is running.
|
||||
audio_streaming: bool,
|
||||
/// True while a pairing handshake is parked waiting for the user's PIN
|
||||
/// (submit it via `POST /api/v1/pair/pin`).
|
||||
pin_pending: bool,
|
||||
/// Number of pinned (paired) client certificates.
|
||||
paired_clients: u32,
|
||||
/// The active launch session (set by Moonlight's `/launch`, cleared on cancel/stop).
|
||||
session: Option<SessionInfo>,
|
||||
/// The RTSP-negotiated stream parameters (present once a client has completed ANNOUNCE).
|
||||
stream: Option<StreamInfo>,
|
||||
}
|
||||
|
||||
/// Client-requested launch parameters (key material is never exposed here).
|
||||
#[derive(Serialize, ToSchema)]
|
||||
struct SessionInfo {
|
||||
width: u32,
|
||||
height: u32,
|
||||
fps: u32,
|
||||
}
|
||||
|
||||
/// RTSP-negotiated stream parameters.
|
||||
#[derive(Serialize, ToSchema)]
|
||||
struct StreamInfo {
|
||||
width: u32,
|
||||
height: u32,
|
||||
fps: u32,
|
||||
bitrate_kbps: u32,
|
||||
/// Video payload size per packet (bytes).
|
||||
packet_size: u32,
|
||||
/// Client's parity floor per FEC block (`minRequiredFecPackets`).
|
||||
min_fec: u8,
|
||||
codec: ApiCodec,
|
||||
}
|
||||
|
||||
/// A paired (certificate-pinned) Moonlight client.
|
||||
#[derive(Serialize, ToSchema)]
|
||||
struct PairedClient {
|
||||
/// Lowercase hex SHA-256 of the client certificate DER — the client's stable id here.
|
||||
#[schema(example = "9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08")]
|
||||
fingerprint: String,
|
||||
/// Certificate subject (e.g. `CN=NVIDIA GameStream Client`), if the DER parses.
|
||||
subject: Option<String>,
|
||||
/// Certificate validity start (unix seconds).
|
||||
not_before_unix: Option<i64>,
|
||||
/// Certificate validity end (unix seconds).
|
||||
not_after_unix: Option<i64>,
|
||||
}
|
||||
|
||||
/// Pairing-flow status.
|
||||
#[derive(Serialize, ToSchema)]
|
||||
struct PairingStatus {
|
||||
/// True while a pairing handshake is parked waiting for the user's PIN.
|
||||
pin_pending: bool,
|
||||
}
|
||||
|
||||
/// The PIN Moonlight displays during pairing.
|
||||
#[derive(Deserialize, ToSchema)]
|
||||
struct SubmitPin {
|
||||
/// 1–16 ASCII digits (Moonlight shows 4).
|
||||
#[schema(example = "1234")]
|
||||
pin: String,
|
||||
}
|
||||
|
||||
/// Error envelope for every non-2xx response.
|
||||
#[derive(Serialize, Deserialize, ToSchema)]
|
||||
struct ApiError {
|
||||
error: String,
|
||||
}
|
||||
|
||||
fn api_error(status: StatusCode, message: &str) -> Response {
|
||||
(
|
||||
status,
|
||||
Json(ApiError {
|
||||
error: message.to_string(),
|
||||
}),
|
||||
)
|
||||
.into_response()
|
||||
}
|
||||
|
||||
/// `axum::Json` whose rejections (bad JSON → 400/422, wrong content-type → 415) are
|
||||
/// rewrapped in the [`ApiError`] envelope, keeping "every non-2xx body is `ApiError`" true.
|
||||
struct ApiJson<T>(T);
|
||||
|
||||
impl<S, T> axum::extract::FromRequest<S> for ApiJson<T>
|
||||
where
|
||||
Json<T>: axum::extract::FromRequest<S, Rejection = axum::extract::rejection::JsonRejection>,
|
||||
S: Send + Sync,
|
||||
{
|
||||
type Rejection = Response;
|
||||
|
||||
async fn from_request(req: Request, state: &S) -> Result<Self, Self::Rejection> {
|
||||
match Json::<T>::from_request(req, state).await {
|
||||
Ok(Json(value)) => Ok(ApiJson(value)),
|
||||
Err(rejection) => Err(api_error(rejection.status(), &rejection.body_text())),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------------------
|
||||
// Auth
|
||||
// ---------------------------------------------------------------------------------------
|
||||
|
||||
/// Bearer-token gate on the `/api/v1` routes. No token configured ⇒ open (loopback-only,
|
||||
/// enforced in [`run`]); `/api/v1/health` stays open for monitoring probes either way.
|
||||
async fn require_auth(State(st): State<Arc<MgmtState>>, req: Request, next: Next) -> Response {
|
||||
let Some(expected) = st.token.as_deref() else {
|
||||
return next.run(req).await;
|
||||
};
|
||||
if req.uri().path() == "/api/v1/health" {
|
||||
return next.run(req).await;
|
||||
}
|
||||
let presented = req
|
||||
.headers()
|
||||
.get(header::AUTHORIZATION)
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.and_then(|v| v.strip_prefix("Bearer "));
|
||||
match presented {
|
||||
Some(token) if token_eq(token, expected) => next.run(req).await,
|
||||
_ => api_error(StatusCode::UNAUTHORIZED, "missing or invalid bearer token"),
|
||||
}
|
||||
}
|
||||
|
||||
/// Compare SHA-256 digests instead of the strings — constant-time with respect to the
|
||||
/// secret without pulling in a ct-eq dependency.
|
||||
fn token_eq(presented: &str, expected: &str) -> bool {
|
||||
Sha256::digest(presented.as_bytes()) == Sha256::digest(expected.as_bytes())
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------------------
|
||||
// Handlers
|
||||
// ---------------------------------------------------------------------------------------
|
||||
|
||||
/// Liveness probe
|
||||
///
|
||||
/// Always available without authentication.
|
||||
#[utoipa::path(
|
||||
get,
|
||||
path = "/health",
|
||||
tag = "host",
|
||||
operation_id = "getHealth",
|
||||
// Override the document-global bearerAuth: this route is exempt in `require_auth`.
|
||||
security(()),
|
||||
responses((status = OK, description = "Host is up", body = Health))
|
||||
)]
|
||||
async fn get_health() -> Json<Health> {
|
||||
Json(Health {
|
||||
status: "ok".into(),
|
||||
version: env!("CARGO_PKG_VERSION").into(),
|
||||
abi_version: punktfunk_core::ABI_VERSION,
|
||||
})
|
||||
}
|
||||
|
||||
/// Host identity and capabilities
|
||||
#[utoipa::path(
|
||||
get,
|
||||
path = "/host",
|
||||
tag = "host",
|
||||
operation_id = "getHostInfo",
|
||||
responses(
|
||||
(status = OK, description = "Host identity, versions, codecs, and port map", body = HostInfo),
|
||||
(status = UNAUTHORIZED, description = "Missing or invalid bearer token", body = ApiError),
|
||||
)
|
||||
)]
|
||||
async fn get_host_info(State(st): State<Arc<MgmtState>>) -> Json<HostInfo> {
|
||||
let h = &st.app.host;
|
||||
Json(HostInfo {
|
||||
hostname: h.hostname.clone(),
|
||||
uniqueid: h.uniqueid.clone(),
|
||||
local_ip: h.local_ip.to_string(),
|
||||
version: env!("CARGO_PKG_VERSION").into(),
|
||||
abi_version: punktfunk_core::ABI_VERSION,
|
||||
app_version: APP_VERSION.into(),
|
||||
gfe_version: GFE_VERSION.into(),
|
||||
// Everything NVENC encodes here (mirrors SERVER_CODEC_MODE_SUPPORT = 3843).
|
||||
codecs: vec![ApiCodec::H264, ApiCodec::H265, ApiCodec::Av1],
|
||||
ports: PortMap {
|
||||
mgmt: st.port,
|
||||
http: h.http_port,
|
||||
https: h.https_port,
|
||||
rtsp: RTSP_PORT,
|
||||
video: VIDEO_PORT,
|
||||
control: CONTROL_PORT,
|
||||
audio: AUDIO_PORT,
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
/// Live host status
|
||||
#[utoipa::path(
|
||||
get,
|
||||
path = "/status",
|
||||
tag = "host",
|
||||
operation_id = "getStatus",
|
||||
responses(
|
||||
(status = OK, description = "Streaming/pairing state and the active session, if any", body = RuntimeStatus),
|
||||
(status = UNAUTHORIZED, description = "Missing or invalid bearer token", body = ApiError),
|
||||
)
|
||||
)]
|
||||
async fn get_status(State(st): State<Arc<MgmtState>>) -> Json<RuntimeStatus> {
|
||||
let session = st.app.launch.lock().unwrap().map(|l| SessionInfo {
|
||||
width: l.width,
|
||||
height: l.height,
|
||||
fps: l.fps,
|
||||
});
|
||||
let stream = st.app.stream.lock().unwrap().as_ref().map(|c| StreamInfo {
|
||||
width: c.width,
|
||||
height: c.height,
|
||||
fps: c.fps,
|
||||
bitrate_kbps: c.bitrate_kbps,
|
||||
packet_size: c.packet_size as u32,
|
||||
min_fec: c.min_fec,
|
||||
codec: c.codec.into(),
|
||||
});
|
||||
Json(RuntimeStatus {
|
||||
video_streaming: st.app.streaming.load(Ordering::SeqCst),
|
||||
audio_streaming: st.app.audio_streaming.load(Ordering::SeqCst),
|
||||
pin_pending: st.app.pairing.pin.awaiting_pin(),
|
||||
paired_clients: st.app.paired.lock().unwrap().len() as u32,
|
||||
session,
|
||||
stream,
|
||||
})
|
||||
}
|
||||
|
||||
/// List paired clients
|
||||
#[utoipa::path(
|
||||
get,
|
||||
path = "/clients",
|
||||
tag = "clients",
|
||||
operation_id = "listPairedClients",
|
||||
responses(
|
||||
(status = OK, description = "All certificate-pinned clients", body = [PairedClient]),
|
||||
(status = UNAUTHORIZED, description = "Missing or invalid bearer token", body = ApiError),
|
||||
)
|
||||
)]
|
||||
async fn list_paired_clients(State(st): State<Arc<MgmtState>>) -> Json<Vec<PairedClient>> {
|
||||
let ders = st.app.paired.lock().unwrap().clone();
|
||||
Json(ders.iter().map(|der| client_info(der)).collect())
|
||||
}
|
||||
|
||||
fn client_info(der: &[u8]) -> PairedClient {
|
||||
let fingerprint = hex::encode(Sha256::digest(der));
|
||||
match x509_parser::parse_x509_certificate(der) {
|
||||
Ok((_, x509)) => PairedClient {
|
||||
fingerprint,
|
||||
subject: Some(x509.subject().to_string()),
|
||||
not_before_unix: Some(x509.validity().not_before.timestamp()),
|
||||
not_after_unix: Some(x509.validity().not_after.timestamp()),
|
||||
},
|
||||
Err(_) => PairedClient {
|
||||
fingerprint,
|
||||
subject: None,
|
||||
not_before_unix: None,
|
||||
not_after_unix: None,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Unpair a client
|
||||
///
|
||||
/// Removes the client's certificate from the pairing store. Caveat: the nvhttp TLS layer
|
||||
/// does not yet reject unlisted certificates (`gamestream/tls.rs` accepts any well-formed
|
||||
/// client cert — a planned hardening step), so until that lands this removes the client
|
||||
/// from the listing without severing its ability to reconnect.
|
||||
#[utoipa::path(
|
||||
delete,
|
||||
path = "/clients/{fingerprint}",
|
||||
tag = "clients",
|
||||
operation_id = "unpairClient",
|
||||
params(
|
||||
("fingerprint" = String, Path,
|
||||
description = "Hex SHA-256 fingerprint of the client certificate DER (64 chars, case-insensitive)")
|
||||
),
|
||||
responses(
|
||||
(status = NO_CONTENT, description = "Client unpaired"),
|
||||
(status = BAD_REQUEST, description = "Malformed fingerprint", body = ApiError),
|
||||
(status = UNAUTHORIZED, description = "Missing or invalid bearer token", body = ApiError),
|
||||
(status = NOT_FOUND, description = "No paired client with that fingerprint", body = ApiError),
|
||||
)
|
||||
)]
|
||||
async fn unpair_client(
|
||||
State(st): State<Arc<MgmtState>>,
|
||||
Path(fingerprint): Path<String>,
|
||||
) -> Response {
|
||||
if fingerprint.len() != 64 || !fingerprint.bytes().all(|b| b.is_ascii_hexdigit()) {
|
||||
return api_error(
|
||||
StatusCode::BAD_REQUEST,
|
||||
"fingerprint must be the 64-char hex SHA-256 of the client certificate DER",
|
||||
);
|
||||
}
|
||||
let mut paired = st.app.paired.lock().unwrap();
|
||||
let before = paired.len();
|
||||
paired.retain(|der| !hex::encode(Sha256::digest(der)).eq_ignore_ascii_case(&fingerprint));
|
||||
if paired.len() < before {
|
||||
tracing::info!(fingerprint, "management API: client unpaired");
|
||||
StatusCode::NO_CONTENT.into_response()
|
||||
} else {
|
||||
api_error(
|
||||
StatusCode::NOT_FOUND,
|
||||
"no paired client with that fingerprint",
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// Pairing-flow status
|
||||
///
|
||||
/// Poll this to know when to prompt the user for the PIN Moonlight displays.
|
||||
#[utoipa::path(
|
||||
get,
|
||||
path = "/pair",
|
||||
tag = "pairing",
|
||||
operation_id = "getPairingStatus",
|
||||
responses(
|
||||
(status = OK, description = "Whether a pairing handshake is waiting for a PIN", body = PairingStatus),
|
||||
(status = UNAUTHORIZED, description = "Missing or invalid bearer token", body = ApiError),
|
||||
)
|
||||
)]
|
||||
async fn get_pairing_status(State(st): State<Arc<MgmtState>>) -> Json<PairingStatus> {
|
||||
Json(PairingStatus {
|
||||
pin_pending: st.app.pairing.pin.awaiting_pin(),
|
||||
})
|
||||
}
|
||||
|
||||
/// Submit the pairing PIN
|
||||
///
|
||||
/// Delivers the PIN the Moonlight client is displaying, completing the out-of-band half
|
||||
/// of the pairing handshake.
|
||||
#[utoipa::path(
|
||||
post,
|
||||
path = "/pair/pin",
|
||||
tag = "pairing",
|
||||
operation_id = "submitPairingPin",
|
||||
request_body = SubmitPin,
|
||||
responses(
|
||||
(status = NO_CONTENT, description = "PIN delivered to the waiting handshake"),
|
||||
(status = BAD_REQUEST, description = "Malformed PIN or unparseable JSON body", body = ApiError),
|
||||
(status = UNAUTHORIZED, description = "Missing or invalid bearer token", body = ApiError),
|
||||
(status = CONFLICT, description = "No pairing handshake is waiting for a PIN", body = ApiError),
|
||||
(status = UNSUPPORTED_MEDIA_TYPE, description = "Body is not application/json", body = ApiError),
|
||||
(status = UNPROCESSABLE_ENTITY, description = "JSON body does not match the schema", body = ApiError),
|
||||
)
|
||||
)]
|
||||
async fn submit_pairing_pin(
|
||||
State(st): State<Arc<MgmtState>>,
|
||||
ApiJson(req): ApiJson<SubmitPin>,
|
||||
) -> Response {
|
||||
let pin = req.pin.trim();
|
||||
if pin.is_empty() || pin.len() > 16 || !pin.bytes().all(|b| b.is_ascii_digit()) {
|
||||
return api_error(StatusCode::BAD_REQUEST, "pin must be 1-16 ASCII digits");
|
||||
}
|
||||
if !st.app.pairing.pin.awaiting_pin() {
|
||||
// Refusing (rather than parking the PIN) prevents a stale PIN from silently
|
||||
// satisfying a *future* pairing attempt.
|
||||
return api_error(
|
||||
StatusCode::CONFLICT,
|
||||
"no pairing handshake is waiting for a PIN",
|
||||
);
|
||||
}
|
||||
st.app.pairing.pin.submit(pin.to_string());
|
||||
StatusCode::NO_CONTENT.into_response()
|
||||
}
|
||||
|
||||
/// Stop the active session
|
||||
///
|
||||
/// Kicks the connected client: stops the video/audio stream threads and clears the launch
|
||||
/// state. Idempotent — succeeds even when nothing is streaming.
|
||||
#[utoipa::path(
|
||||
delete,
|
||||
path = "/session",
|
||||
tag = "session",
|
||||
operation_id = "stopSession",
|
||||
responses(
|
||||
(status = NO_CONTENT, description = "Session stopped (or none was active)"),
|
||||
(status = UNAUTHORIZED, description = "Missing or invalid bearer token", body = ApiError),
|
||||
)
|
||||
)]
|
||||
async fn stop_session(State(st): State<Arc<MgmtState>>) -> StatusCode {
|
||||
let was_streaming = st.app.streaming.swap(false, Ordering::SeqCst);
|
||||
st.app.audio_streaming.store(false, Ordering::SeqCst);
|
||||
*st.app.launch.lock().unwrap() = None;
|
||||
*st.app.stream.lock().unwrap() = None;
|
||||
tracing::info!(was_streaming, "management API: session stopped");
|
||||
StatusCode::NO_CONTENT
|
||||
}
|
||||
|
||||
/// Force a keyframe
|
||||
///
|
||||
/// Asks the encoder for an IDR frame on the active video stream (what a client requests
|
||||
/// after unrecoverable loss — exposed for debugging).
|
||||
#[utoipa::path(
|
||||
post,
|
||||
path = "/session/idr",
|
||||
tag = "session",
|
||||
operation_id = "requestIdr",
|
||||
responses(
|
||||
(status = ACCEPTED, description = "Keyframe requested"),
|
||||
(status = UNAUTHORIZED, description = "Missing or invalid bearer token", body = ApiError),
|
||||
(status = CONFLICT, description = "No active video stream", body = ApiError),
|
||||
)
|
||||
)]
|
||||
async fn request_idr(State(st): State<Arc<MgmtState>>) -> Response {
|
||||
if !st.app.streaming.load(Ordering::SeqCst) {
|
||||
return api_error(StatusCode::CONFLICT, "no active video stream");
|
||||
}
|
||||
st.app.force_idr.store(true, Ordering::SeqCst);
|
||||
StatusCode::ACCEPTED.into_response()
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------------------
|
||||
// Tests
|
||||
// ---------------------------------------------------------------------------------------
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::gamestream::{cert::ServerIdentity, Host, LaunchSession, HTTPS_PORT, HTTP_PORT};
|
||||
use axum::body::Body;
|
||||
use http_body_util::BodyExt;
|
||||
use std::net::{IpAddr, Ipv4Addr};
|
||||
use tower::ServiceExt;
|
||||
|
||||
fn test_state() -> Arc<AppState> {
|
||||
let host = Host {
|
||||
hostname: "test-host".into(),
|
||||
uniqueid: "deadbeef".into(),
|
||||
local_ip: IpAddr::V4(Ipv4Addr::LOCALHOST),
|
||||
http_port: HTTP_PORT,
|
||||
https_port: HTTPS_PORT,
|
||||
};
|
||||
let identity = ServerIdentity::ephemeral().expect("ephemeral identity");
|
||||
Arc::new(AppState::new(host, identity))
|
||||
}
|
||||
|
||||
fn test_app(state: Arc<AppState>, token: Option<&str>) -> Router {
|
||||
app(state, token.map(String::from), DEFAULT_PORT)
|
||||
}
|
||||
|
||||
async fn send(app: &Router, req: axum::http::Request<Body>) -> (StatusCode, serde_json::Value) {
|
||||
let resp = app.clone().oneshot(req).await.expect("infallible");
|
||||
let status = resp.status();
|
||||
let bytes = resp.into_body().collect().await.unwrap().to_bytes();
|
||||
let json = if bytes.is_empty() {
|
||||
serde_json::Value::Null
|
||||
} else {
|
||||
serde_json::from_slice(&bytes).unwrap_or(serde_json::Value::Null)
|
||||
};
|
||||
(status, json)
|
||||
}
|
||||
|
||||
fn get_req(path: &str) -> axum::http::Request<Body> {
|
||||
axum::http::Request::get(path).body(Body::empty()).unwrap()
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn health_is_open_and_versioned() {
|
||||
let app = test_app(test_state(), None);
|
||||
let (status, body) = send(&app, get_req("/api/v1/health")).await;
|
||||
assert_eq!(status, StatusCode::OK);
|
||||
assert_eq!(body["status"], "ok");
|
||||
assert_eq!(body["abi_version"], punktfunk_core::ABI_VERSION);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn bearer_token_is_enforced() {
|
||||
let app = test_app(test_state(), Some("sekrit"));
|
||||
|
||||
// No/wrong token → 401 with the error envelope.
|
||||
let (status, body) = send(&app, get_req("/api/v1/status")).await;
|
||||
assert_eq!(status, StatusCode::UNAUTHORIZED);
|
||||
assert!(body["error"].as_str().unwrap().contains("bearer"));
|
||||
let wrong = axum::http::Request::get("/api/v1/status")
|
||||
.header("authorization", "Bearer nope")
|
||||
.body(Body::empty())
|
||||
.unwrap();
|
||||
assert_eq!(send(&app, wrong).await.0, StatusCode::UNAUTHORIZED);
|
||||
|
||||
// Right token → 200.
|
||||
let right = axum::http::Request::get("/api/v1/status")
|
||||
.header("authorization", "Bearer sekrit")
|
||||
.body(Body::empty())
|
||||
.unwrap();
|
||||
assert_eq!(send(&app, right).await.0, StatusCode::OK);
|
||||
|
||||
// Health + the spec/docs stay open.
|
||||
assert_eq!(
|
||||
send(&app, get_req("/api/v1/health")).await.0,
|
||||
StatusCode::OK
|
||||
);
|
||||
assert_eq!(
|
||||
send(&app, get_req("/api/v1/openapi.json")).await.0,
|
||||
StatusCode::OK
|
||||
);
|
||||
let docs = app.clone().oneshot(get_req("/api/docs")).await.unwrap();
|
||||
assert_eq!(docs.status(), StatusCode::OK);
|
||||
let html = docs.into_body().collect().await.unwrap().to_bytes();
|
||||
assert!(
|
||||
html.starts_with(b"<!doctype html>"),
|
||||
"Scalar UI should serve HTML"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn host_info_reports_identity_and_ports() {
|
||||
let app = test_app(test_state(), None);
|
||||
let (status, body) = send(&app, get_req("/api/v1/host")).await;
|
||||
assert_eq!(status, StatusCode::OK);
|
||||
assert_eq!(body["hostname"], "test-host");
|
||||
assert_eq!(body["uniqueid"], "deadbeef");
|
||||
assert_eq!(body["ports"]["http"], HTTP_PORT);
|
||||
assert_eq!(body["ports"]["mgmt"], DEFAULT_PORT);
|
||||
assert_eq!(body["codecs"], serde_json::json!(["h264", "h265", "av1"]));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn status_reflects_runtime_state() {
|
||||
let state = test_state();
|
||||
let app = test_app(state.clone(), None);
|
||||
|
||||
let (_, body) = send(&app, get_req("/api/v1/status")).await;
|
||||
assert_eq!(body["video_streaming"], false);
|
||||
assert_eq!(body["session"], serde_json::Value::Null);
|
||||
|
||||
*state.launch.lock().unwrap() = Some(LaunchSession {
|
||||
gcm_key: [0; 16],
|
||||
rikeyid: 1,
|
||||
width: 2560,
|
||||
height: 1440,
|
||||
fps: 120,
|
||||
appid: 1,
|
||||
});
|
||||
state.streaming.store(true, Ordering::SeqCst);
|
||||
|
||||
let (_, body) = send(&app, get_req("/api/v1/status")).await;
|
||||
assert_eq!(body["video_streaming"], true);
|
||||
assert_eq!(body["session"]["width"], 2560);
|
||||
assert_eq!(body["session"]["fps"], 120);
|
||||
// Key material must never appear anywhere in the response.
|
||||
assert!(!body.to_string().contains("gcm"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn paired_clients_list_and_unpair() {
|
||||
let state = test_state();
|
||||
let app = test_app(state.clone(), None);
|
||||
|
||||
// Pin the host's own cert DER as a stand-in client.
|
||||
let (_, pem) =
|
||||
x509_parser::pem::parse_x509_pem(state.identity.cert_pem.as_bytes()).unwrap();
|
||||
let der = pem.contents.clone();
|
||||
let fingerprint = hex::encode(Sha256::digest(&der));
|
||||
state.paired.lock().unwrap().push(der);
|
||||
|
||||
let (status, body) = send(&app, get_req("/api/v1/clients")).await;
|
||||
assert_eq!(status, StatusCode::OK);
|
||||
assert_eq!(body[0]["fingerprint"], fingerprint);
|
||||
assert_eq!(body[0]["subject"], "CN=punktfunk");
|
||||
|
||||
// Malformed fingerprint → 400.
|
||||
let bad = axum::http::Request::delete("/api/v1/clients/zz")
|
||||
.body(Body::empty())
|
||||
.unwrap();
|
||||
assert_eq!(send(&app, bad).await.0, StatusCode::BAD_REQUEST);
|
||||
|
||||
// Unpair (uppercase hex must match too) → 204, list empties, second delete → 404.
|
||||
let del = |fp: String| {
|
||||
axum::http::Request::delete(format!("/api/v1/clients/{fp}"))
|
||||
.body(Body::empty())
|
||||
.unwrap()
|
||||
};
|
||||
assert_eq!(
|
||||
send(&app, del(fingerprint.to_uppercase())).await.0,
|
||||
StatusCode::NO_CONTENT
|
||||
);
|
||||
let (_, body) = send(&app, get_req("/api/v1/clients")).await;
|
||||
assert_eq!(body, serde_json::json!([]));
|
||||
assert_eq!(send(&app, del(fingerprint)).await.0, StatusCode::NOT_FOUND);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn submit_pin_validates_and_requires_pending_pairing() {
|
||||
let app = test_app(test_state(), None);
|
||||
let post = |body: &str| {
|
||||
axum::http::Request::post("/api/v1/pair/pin")
|
||||
.header("content-type", "application/json")
|
||||
.body(Body::from(body.to_string()))
|
||||
.unwrap()
|
||||
};
|
||||
|
||||
// Malformed PINs → 400.
|
||||
assert_eq!(
|
||||
send(&app, post(r#"{"pin":""}"#)).await.0,
|
||||
StatusCode::BAD_REQUEST
|
||||
);
|
||||
assert_eq!(
|
||||
send(&app, post(r#"{"pin":"12ab"}"#)).await.0,
|
||||
StatusCode::BAD_REQUEST
|
||||
);
|
||||
|
||||
// Well-formed but nothing waiting → 409 (a parked stale PIN would poison the
|
||||
// next pairing attempt).
|
||||
assert_eq!(
|
||||
send(&app, post(r#"{"pin":"1234"}"#)).await.0,
|
||||
StatusCode::CONFLICT
|
||||
);
|
||||
|
||||
// axum's own body rejections must still wear the ApiError envelope (ApiJson).
|
||||
let (status, body) = send(&app, post("{not json")).await;
|
||||
assert_eq!(status, StatusCode::BAD_REQUEST);
|
||||
assert!(body["error"].is_string(), "syntax error: {body}");
|
||||
let (status, body) = send(&app, post(r#"{"wrong":"shape"}"#)).await;
|
||||
assert_eq!(status, StatusCode::UNPROCESSABLE_ENTITY);
|
||||
assert!(body["error"].is_string(), "schema mismatch: {body}");
|
||||
let no_ct = axum::http::Request::post("/api/v1/pair/pin")
|
||||
.body(Body::from(r#"{"pin":"1234"}"#))
|
||||
.unwrap();
|
||||
let (status, body) = send(&app, no_ct).await;
|
||||
assert_eq!(status, StatusCode::UNSUPPORTED_MEDIA_TYPE);
|
||||
assert!(body["error"].is_string(), "media type: {body}");
|
||||
}
|
||||
|
||||
/// A blank token must not satisfy the "non-loopback requires a token" guard.
|
||||
#[tokio::test]
|
||||
async fn blank_token_rejected_for_public_bind() {
|
||||
let opts = Options {
|
||||
bind: "0.0.0.0:0".parse().unwrap(),
|
||||
token: Some(" ".into()),
|
||||
};
|
||||
let err = run(test_state(), opts).await.unwrap_err();
|
||||
assert!(err.to_string().contains("not loopback"), "{err}");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn stop_session_clears_runtime_state() {
|
||||
let state = test_state();
|
||||
let app = test_app(state.clone(), None);
|
||||
state.streaming.store(true, Ordering::SeqCst);
|
||||
state.audio_streaming.store(true, Ordering::SeqCst);
|
||||
*state.launch.lock().unwrap() = Some(LaunchSession {
|
||||
gcm_key: [0; 16],
|
||||
rikeyid: 0,
|
||||
width: 1920,
|
||||
height: 1080,
|
||||
fps: 60,
|
||||
appid: 1,
|
||||
});
|
||||
|
||||
let del = axum::http::Request::delete("/api/v1/session")
|
||||
.body(Body::empty())
|
||||
.unwrap();
|
||||
assert_eq!(send(&app, del).await.0, StatusCode::NO_CONTENT);
|
||||
assert!(!state.streaming.load(Ordering::SeqCst));
|
||||
assert!(!state.audio_streaming.load(Ordering::SeqCst));
|
||||
assert!(state.launch.lock().unwrap().is_none());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn idr_requires_an_active_stream() {
|
||||
let state = test_state();
|
||||
let app = test_app(state.clone(), None);
|
||||
let post = || {
|
||||
axum::http::Request::post("/api/v1/session/idr")
|
||||
.body(Body::empty())
|
||||
.unwrap()
|
||||
};
|
||||
assert_eq!(send(&app, post()).await.0, StatusCode::CONFLICT);
|
||||
|
||||
state.streaming.store(true, Ordering::SeqCst);
|
||||
assert_eq!(send(&app, post()).await.0, StatusCode::ACCEPTED);
|
||||
assert!(state.force_idr.load(Ordering::SeqCst));
|
||||
}
|
||||
|
||||
/// The OpenAPI document lists every route with a unique operationId (codegen relies
|
||||
/// on both), and the checked-in copy is current.
|
||||
#[test]
|
||||
fn openapi_document_is_complete_and_checked_in() {
|
||||
let json = openapi_json();
|
||||
let doc: serde_json::Value = serde_json::from_str(&json).unwrap();
|
||||
|
||||
let paths = doc["paths"].as_object().unwrap();
|
||||
for p in [
|
||||
"/api/v1/health",
|
||||
"/api/v1/host",
|
||||
"/api/v1/status",
|
||||
"/api/v1/clients",
|
||||
"/api/v1/clients/{fingerprint}",
|
||||
"/api/v1/pair",
|
||||
"/api/v1/pair/pin",
|
||||
"/api/v1/session",
|
||||
"/api/v1/session/idr",
|
||||
] {
|
||||
assert!(paths.contains_key(p), "spec is missing {p}");
|
||||
}
|
||||
|
||||
let mut op_ids: Vec<&str> = paths
|
||||
.values()
|
||||
.flat_map(|ops| ops.as_object().unwrap().values())
|
||||
.filter_map(|op| op["operationId"].as_str())
|
||||
.collect();
|
||||
let total = op_ids.len();
|
||||
op_ids.sort_unstable();
|
||||
op_ids.dedup();
|
||||
assert_eq!(total, op_ids.len(), "duplicate operationIds");
|
||||
assert!(doc["components"]["securitySchemes"]["bearerAuth"].is_object());
|
||||
// The health probe overrides the document-global bearer requirement (the server
|
||||
// exempts it in `require_auth`; the spec must agree).
|
||||
assert_eq!(
|
||||
doc["paths"]["/api/v1/health"]["get"]["security"],
|
||||
serde_json::json!([{}])
|
||||
);
|
||||
|
||||
let checked_in = include_str!("../../../docs/api/openapi.json");
|
||||
assert_eq!(
|
||||
json.trim(),
|
||||
checked_in.trim(),
|
||||
"docs/api/openapi.json is stale — regenerate with: \
|
||||
cargo run -p punktfunk-host -- openapi > docs/api/openapi.json"
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,39 @@
|
||||
//! The host hot path (plan §7), wiring the platform stages to `punktfunk_core`:
|
||||
//!
|
||||
//! ```text
|
||||
//! capture(dmabuf) → encode(NVENC/VAAPI) → core[FEC+packetize+pace+send]
|
||||
//! ```
|
||||
//!
|
||||
//! Each stage runs on its own native OS thread, connected by bounded SPSC channels with
|
||||
//! drop-oldest on overflow so the encoder is never blocked. No async runtime here.
|
||||
|
||||
use crate::capture::Capturer;
|
||||
use crate::encode::{EncodedFrame, Encoder};
|
||||
use anyhow::Result;
|
||||
use punktfunk_core::packet::{FLAG_PIC, FLAG_SOF};
|
||||
use punktfunk_core::Session;
|
||||
|
||||
/// Drive one capture→encode→submit step. The real pipeline spawns threads and uses
|
||||
/// bounded channels; this documents the data flow and the `punktfunk_core` submit contract.
|
||||
pub fn pump_once(
|
||||
capturer: &mut dyn Capturer,
|
||||
encoder: &mut dyn Encoder,
|
||||
session: &mut Session,
|
||||
) -> Result<()> {
|
||||
let frame = capturer.next_frame()?;
|
||||
encoder.submit(&frame)?;
|
||||
while let Some(EncodedFrame {
|
||||
data,
|
||||
pts_ns,
|
||||
keyframe,
|
||||
}) = encoder.poll()?
|
||||
{
|
||||
let mut flags = FLAG_PIC as u32;
|
||||
if keyframe {
|
||||
flags |= FLAG_SOF as u32;
|
||||
}
|
||||
// core does FEC + packetize + pace + send.
|
||||
session.submit_frame(&data, pts_ns, flags)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
@@ -0,0 +1,11 @@
|
||||
//! One-time PipeWire library initialization, shared by the video (portal) and audio capture
|
||||
//! threads. `pw_init` must not be called concurrently from multiple threads on first use; both
|
||||
//! capture paths connect to PipeWire at nearly the same moment (RTSP PLAY starts video + audio
|
||||
//! together), so we serialize the init through a `Once`.
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
pub fn ensure_init() {
|
||||
use std::sync::Once;
|
||||
static ONCE: Once = Once::new();
|
||||
ONCE.call_once(pipewire::init);
|
||||
}
|
||||
@@ -0,0 +1,128 @@
|
||||
//! Virtual display orchestration (plan §6) — the project's differentiator.
|
||||
//!
|
||||
//! A [`VirtualDisplay`] creates a *client-sized* output on demand, rendered natively and
|
||||
//! headless (no scaling), to be captured and streamed, then torn down on disconnect. There is
|
||||
//! no cross-compositor Wayland protocol for this, so each compositor has its own backend behind
|
||||
//! this trait:
|
||||
//!
|
||||
//! * **KWin** — privileged `zkde_screencast_unstable_v1::stream_virtual_output` ([`kwin`]).
|
||||
//! * **wlroots/Sway** — `swaymsg create_output` + `output mode --custom` (TODO).
|
||||
//! * **Mutter/GNOME** — D-Bus `RemoteDesktop` + `ScreenCast.RecordVirtual` (TODO).
|
||||
//!
|
||||
//! [`VirtualDisplay::create`] returns a [`VirtualOutput`]: the PipeWire node to capture plus an
|
||||
//! owned keepalive whose `Drop` releases the output (RAII — no explicit `destroy`). Capture
|
||||
//! consumes the node via [`crate::capture::capture_virtual_output`].
|
||||
|
||||
use anyhow::Result;
|
||||
pub use punktfunk_core::Mode;
|
||||
use std::os::fd::OwnedFd;
|
||||
|
||||
/// A created virtual output: a PipeWire source to capture, plus an owned keepalive whose drop
|
||||
/// tears the output down (releases the compositor-side resource).
|
||||
///
|
||||
/// Allowed dead on non-Linux: the backends that construct it are all `cfg(target_os = "linux")`.
|
||||
#[allow(dead_code)]
|
||||
pub struct VirtualOutput {
|
||||
/// PipeWire node id of the output's screencast stream.
|
||||
pub node_id: u32,
|
||||
/// Portal/remote PipeWire fd when the node lives on a sandboxed remote (e.g. Mutter's
|
||||
/// RemoteDesktop+ScreenCast). `None` means the node is on the user's default PipeWire daemon
|
||||
/// (KWin `zkde_screencast`), captured by connecting to that daemon directly.
|
||||
pub remote_fd: Option<OwnedFd>,
|
||||
/// `(width, height, refresh_hz)` to prefer in the PipeWire format negotiation. KWin and
|
||||
/// gamescope outputs are created at the exact size, so this just confirms it; **Mutter sizes
|
||||
/// its virtual monitor FROM the negotiation**, so here it's what makes the client's mode real.
|
||||
pub preferred_mode: Option<(u32, u32, u32)>,
|
||||
/// Keeps the output — and whatever connection/thread backs it — alive; dropped on teardown.
|
||||
pub keepalive: Box<dyn Send>,
|
||||
}
|
||||
|
||||
/// Pluggable virtual-output creation, per compositor.
|
||||
pub trait VirtualDisplay: Send {
|
||||
/// Human-readable backend name (e.g. `"kwin"`, `"wlroots"`, `"mutter"`).
|
||||
fn name(&self) -> &'static str;
|
||||
/// Create a virtual output of the given mode. Teardown is RAII: drop the returned
|
||||
/// [`VirtualOutput`]'s `keepalive`.
|
||||
fn create(&mut self, mode: Mode) -> Result<VirtualOutput>;
|
||||
}
|
||||
|
||||
/// Compositors punktfunk knows how to drive (plan §6).
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub enum Compositor {
|
||||
/// KWin / Plasma 6 — `zkde_screencast` virtual output.
|
||||
Kwin,
|
||||
/// wlroots (Sway/Hyprland) — headless `create_output`.
|
||||
Wlroots,
|
||||
/// Mutter / GNOME — headless backend + Mutter DBus `RecordVirtual`.
|
||||
Mutter,
|
||||
/// gamescope — spawned headless at the client's size/refresh; capture its PipeWire node.
|
||||
Gamescope,
|
||||
}
|
||||
|
||||
/// Detect the compositor to drive: `PUNKTFUNK_COMPOSITOR` override, else `XDG_CURRENT_DESKTOP`.
|
||||
pub fn detect() -> Result<Compositor> {
|
||||
if let Ok(v) = std::env::var("PUNKTFUNK_COMPOSITOR") {
|
||||
return match v.trim().to_ascii_lowercase().as_str() {
|
||||
"kwin" | "kde" | "plasma" => Ok(Compositor::Kwin),
|
||||
"wlroots" | "sway" | "hyprland" | "wlr" => Ok(Compositor::Wlroots),
|
||||
"mutter" | "gnome" => Ok(Compositor::Mutter),
|
||||
"gamescope" => Ok(Compositor::Gamescope),
|
||||
other => {
|
||||
anyhow::bail!(
|
||||
"unknown PUNKTFUNK_COMPOSITOR '{other}' (kwin|wlroots|mutter|gamescope)"
|
||||
)
|
||||
}
|
||||
};
|
||||
}
|
||||
let desktop = std::env::var("XDG_CURRENT_DESKTOP")
|
||||
.unwrap_or_default()
|
||||
.to_ascii_uppercase();
|
||||
if desktop.contains("KDE") {
|
||||
Ok(Compositor::Kwin)
|
||||
} else if desktop.contains("GNOME") {
|
||||
Ok(Compositor::Mutter)
|
||||
} else if desktop.contains("SWAY")
|
||||
|| desktop.contains("WLROOTS")
|
||||
|| desktop.contains("HYPRLAND")
|
||||
{
|
||||
Ok(Compositor::Wlroots)
|
||||
} else {
|
||||
anyhow::bail!(
|
||||
"could not detect compositor from XDG_CURRENT_DESKTOP='{desktop}'; set PUNKTFUNK_COMPOSITOR"
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// Open the virtual-display driver for `compositor`.
|
||||
pub fn open(compositor: Compositor) -> Result<Box<dyn VirtualDisplay>> {
|
||||
#[cfg(target_os = "linux")]
|
||||
{
|
||||
match compositor {
|
||||
Compositor::Kwin => Ok(Box::new(kwin::KwinDisplay::new()?)),
|
||||
Compositor::Gamescope => Ok(Box::new(gamescope::GamescopeDisplay::new()?)),
|
||||
Compositor::Mutter => Ok(Box::new(mutter::MutterDisplay::new()?)),
|
||||
Compositor::Wlroots => {
|
||||
anyhow::bail!("wlroots virtual-output backend not yet implemented")
|
||||
}
|
||||
}
|
||||
}
|
||||
#[cfg(not(target_os = "linux"))]
|
||||
{
|
||||
let _ = compositor;
|
||||
anyhow::bail!("virtual displays require Linux (Wayland compositor)")
|
||||
}
|
||||
}
|
||||
|
||||
/// Path of the file where the gamescope backend relays the nested session's `LIBEI_SOCKET`
|
||||
/// (gamescope's EIS server) for the input injector.
|
||||
#[cfg(target_os = "linux")]
|
||||
pub fn gamescope_ei_socket_file() -> &'static str {
|
||||
gamescope::EI_SOCKET_FILE
|
||||
}
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
mod gamescope;
|
||||
#[cfg(target_os = "linux")]
|
||||
mod kwin;
|
||||
#[cfg(target_os = "linux")]
|
||||
mod mutter;
|
||||
@@ -0,0 +1,181 @@
|
||||
//! gamescope virtual-display backend.
|
||||
//!
|
||||
//! Unlike KWin/Mutter (which create a virtual output at runtime via a protocol), gamescope is a
|
||||
//! micro-compositor we *spawn*: `gamescope --backend headless -W w -H h -r hz -- <app>`. It runs
|
||||
//! the app nested, composites at the requested size/refresh (so the source rate is the client's
|
||||
//! rate natively — no separate refresh step), and exports a built-in PipeWire node named
|
||||
//! `gamescope` (media.class `Video/Source`, BGRx/NV12, dmabuf or shm) on the user's PipeWire
|
||||
//! daemon. We discover that node and capture it like any other; the gamescope *process* is the
|
||||
//! keepalive — dropping the [`VirtualOutput`] kills it (tearing the output down).
|
||||
//!
|
||||
//! Requirements: gamescope built with PipeWire + libei input emulation (distro packages are);
|
||||
//! a usable Vulkan device (the NVIDIA render node). Headless capture on the proprietary NVIDIA
|
||||
//! driver is plausible-by-architecture but not a well-trodden path — validate empirically.
|
||||
//! Input is a gamescope-specific libei/EIS socket (`LIBEI_SOCKET`), wired separately (TODO).
|
||||
|
||||
use super::{Mode, VirtualDisplay, VirtualOutput};
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use std::process::{Child, Command, Stdio};
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
/// The gamescope virtual-display driver. Each [`create`](VirtualDisplay::create) spawns one
|
||||
/// headless gamescope process sized to the requested mode.
|
||||
pub struct GamescopeDisplay;
|
||||
|
||||
impl GamescopeDisplay {
|
||||
pub fn new() -> Result<Self> {
|
||||
Ok(GamescopeDisplay)
|
||||
}
|
||||
}
|
||||
|
||||
impl VirtualDisplay for GamescopeDisplay {
|
||||
fn name(&self) -> &'static str {
|
||||
"gamescope"
|
||||
}
|
||||
|
||||
fn create(&mut self, mode: Mode) -> Result<VirtualOutput> {
|
||||
// Attach to an already-running gamescope (debug / Steam-launched session) instead of
|
||||
// spawning one: PUNKTFUNK_GAMESCOPE_NODE=<pipewire node id>.
|
||||
if let Ok(id) = std::env::var("PUNKTFUNK_GAMESCOPE_NODE") {
|
||||
let node_id: u32 = id
|
||||
.parse()
|
||||
.context("PUNKTFUNK_GAMESCOPE_NODE must be a node id")?;
|
||||
tracing::info!(node_id, "gamescope: attaching to existing PipeWire node");
|
||||
return Ok(VirtualOutput {
|
||||
node_id,
|
||||
remote_fd: None,
|
||||
preferred_mode: Some((mode.width, mode.height, mode.refresh_hz)),
|
||||
keepalive: Box::new(()),
|
||||
});
|
||||
}
|
||||
let proc = GamescopeProc(spawn(mode.width, mode.height, mode.refresh_hz.max(1))?);
|
||||
// gamescope creates its PipeWire node a moment after start; poll for it (the proc is held
|
||||
// alive meanwhile, and killed if we give up).
|
||||
let node_id = wait_for_node(Duration::from_secs(15)).ok_or_else(|| {
|
||||
anyhow!(
|
||||
"gamescope PipeWire node did not appear within 15s — gamescope may have failed to \
|
||||
start or headless capture is unsupported on this GPU/driver (see /tmp/punktfunk-gamescope.log)"
|
||||
)
|
||||
})?;
|
||||
tracing::info!(
|
||||
node_id,
|
||||
w = mode.width,
|
||||
h = mode.height,
|
||||
hz = mode.refresh_hz,
|
||||
"gamescope virtual output ready"
|
||||
);
|
||||
Ok(VirtualOutput {
|
||||
node_id,
|
||||
remote_fd: None,
|
||||
preferred_mode: Some((mode.width, mode.height, mode.refresh_hz)),
|
||||
keepalive: Box::new(proc),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// File where the wrapper below writes gamescope's `LIBEI_SOCKET` (its EIS server socket),
|
||||
/// read by the libei injector to drive input into the nested app. See [`crate::inject`].
|
||||
pub const EI_SOCKET_FILE: &str = "/tmp/punktfunk-gamescope-ei";
|
||||
|
||||
/// Spawn `gamescope --backend headless -W w -H h -r hz -- <app>`. The app comes from
|
||||
/// `PUNKTFUNK_GAMESCOPE_APP` (default a no-op that just keeps gamescope alive — set it to a real
|
||||
/// game/GL app for actual content, e.g. `steam -gamepadui` for the SteamOS-like session).
|
||||
/// stdout/stderr go to `/tmp/punktfunk-gamescope.log`. The app is launched through a tiny shell
|
||||
/// wrapper that relays gamescope's `LIBEI_SOCKET` (set for its children) to [`EI_SOCKET_FILE`]
|
||||
/// so the input injector can connect to gamescope's EIS server from outside.
|
||||
fn spawn(w: u32, h: u32, hz: u32) -> Result<Child> {
|
||||
let app =
|
||||
std::env::var("PUNKTFUNK_GAMESCOPE_APP").unwrap_or_else(|_| "sleep infinity".to_string());
|
||||
let _ = std::fs::remove_file(EI_SOCKET_FILE); // stale socket path from a previous session
|
||||
let mut cmd = Command::new("gamescope");
|
||||
cmd.args(["--backend", "headless"])
|
||||
.args(["-W", &w.to_string()])
|
||||
.args(["-H", &h.to_string()])
|
||||
.args(["-r", &hz.to_string()])
|
||||
.args(["--xwayland-count", "1", "--"])
|
||||
.args([
|
||||
"sh",
|
||||
"-c",
|
||||
&format!("printf %s \"$LIBEI_SOCKET\" > {EI_SOCKET_FILE}; exec \"$@\""),
|
||||
"sh",
|
||||
])
|
||||
.args(app.split_whitespace())
|
||||
// Prefer the NVIDIA GL vendor for the nested session (harmless on a pure-NVIDIA box).
|
||||
.env("__GLX_VENDOR_LIBRARY_NAME", "nvidia");
|
||||
if let Ok(log) = std::fs::File::create("/tmp/punktfunk-gamescope.log") {
|
||||
if let Ok(log2) = log.try_clone() {
|
||||
cmd.stdout(Stdio::from(log)).stderr(Stdio::from(log2));
|
||||
}
|
||||
} else {
|
||||
cmd.stdout(Stdio::null()).stderr(Stdio::null());
|
||||
}
|
||||
tracing::info!(w, h, hz, %app, "spawning gamescope (headless)");
|
||||
cmd.spawn()
|
||||
.context("spawn gamescope (is it installed? `apt install gamescope`)")
|
||||
}
|
||||
|
||||
/// Wait for gamescope to report its PipeWire node. Authoritative source: gamescope's own log
|
||||
/// line `stream available on node ID: N` (its node carries `node.name=gamescope` on TWO objects
|
||||
/// — the adapter and the inner stream — and only the advertised id is the correct capture
|
||||
/// target). Falls back to `pw-dump` discovery if the log line doesn't show.
|
||||
fn wait_for_node(timeout: Duration) -> Option<u32> {
|
||||
let deadline = Instant::now() + timeout;
|
||||
loop {
|
||||
if let Some(id) = node_from_log() {
|
||||
return Some(id);
|
||||
}
|
||||
if Instant::now() >= deadline {
|
||||
return find_gamescope_node(); // last-resort fallback
|
||||
}
|
||||
std::thread::sleep(Duration::from_millis(300));
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse `stream available on node ID: N` from the spawned gamescope's log (ANSI-colored).
|
||||
fn node_from_log() -> Option<u32> {
|
||||
let log = std::fs::read_to_string("/tmp/punktfunk-gamescope.log").ok()?;
|
||||
for line in log.lines().rev() {
|
||||
if let Some(pos) = line.find("stream available on node ID:") {
|
||||
let tail = &line[pos + "stream available on node ID:".len()..];
|
||||
let digits: String = tail.chars().filter(|c| c.is_ascii_digit()).collect();
|
||||
if let Ok(id) = digits.parse() {
|
||||
return Some(id);
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Find the `gamescope` `Video/Source` node id in a `pw-dump` snapshot of the default daemon.
|
||||
fn find_gamescope_node() -> Option<u32> {
|
||||
let out = Command::new("pw-dump").output().ok()?;
|
||||
let dump: serde_json::Value = serde_json::from_slice(&out.stdout).ok()?;
|
||||
for obj in dump.as_array()? {
|
||||
if obj.get("type").and_then(|t| t.as_str()) != Some("PipeWire:Interface:Node") {
|
||||
continue;
|
||||
}
|
||||
let props = obj.get("info").and_then(|i| i.get("props"));
|
||||
let name = props
|
||||
.and_then(|p| p.get("node.name"))
|
||||
.and_then(|n| n.as_str())
|
||||
.unwrap_or("");
|
||||
let class = props
|
||||
.and_then(|p| p.get("media.class"))
|
||||
.and_then(|n| n.as_str())
|
||||
.unwrap_or("");
|
||||
if name == "gamescope" || (class == "Video/Source" && name.contains("gamescope")) {
|
||||
return obj.get("id").and_then(|i| i.as_u64()).map(|x| x as u32);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Owns the spawned gamescope process; killing it tears the virtual output down.
|
||||
struct GamescopeProc(Child);
|
||||
|
||||
impl Drop for GamescopeProc {
|
||||
fn drop(&mut self) {
|
||||
let _ = self.0.kill();
|
||||
let _ = self.0.wait();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,313 @@
|
||||
//! KWin virtual-output backend via the privileged `zkde_screencast_unstable_v1` Wayland
|
||||
//! protocol (the mechanism KRdp / krfb-virtualmonitor use).
|
||||
//!
|
||||
//! `stream_virtual_output(name, width, height, scale, pointer)` asks KWin to create a new output
|
||||
//! sized to exactly `width`x`height`, rendered natively (no scaling), and hands back a PipeWire
|
||||
//! node for it. The node lives on the user's default PipeWire daemon, so [`VirtualOutput::remote_fd`]
|
||||
//! is `None` and capture connects to that daemon directly.
|
||||
//!
|
||||
//! Requirements: KWin must expose the privileged `zkde_screencast` global — a real Plasma session
|
||||
//! authorizes it for its own clients; the headless test exposes it to bare clients via
|
||||
//! `KWIN_WAYLAND_NO_PERMISSION_CHECKS=1`. The compositor backend must implement
|
||||
//! `createVirtualOutput`: the **DRM backend** (any version) or the **VirtualBackend since KWin
|
||||
//! 6.5.6** (`kwin_wayland --virtual`); on `--virtual` < 6.5.6 the request fails with
|
||||
//! "Could not find output". We talk raw Wayland on `$WAYLAND_DISPLAY`, so the host must run inside
|
||||
//! the KWin session's environment.
|
||||
|
||||
#![allow(clippy::all, dead_code, non_camel_case_types, non_snake_case, unused)]
|
||||
|
||||
use super::{Mode, VirtualDisplay, VirtualOutput};
|
||||
use anyhow::{anyhow, bail, Context, Result};
|
||||
use std::os::fd::{AsFd, AsRawFd};
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::sync::mpsc::Sender;
|
||||
use std::sync::Arc;
|
||||
use std::thread;
|
||||
use std::time::Duration;
|
||||
use wayland_client::protocol::wl_registry::{self, WlRegistry};
|
||||
use wayland_client::{Connection, Dispatch, Proxy, QueueHandle};
|
||||
|
||||
// Generate the client bindings for the vendored protocol XML inline (no build.rs). Path is
|
||||
// relative to CARGO_MANIFEST_DIR. See wayland-rs' "implementing a custom protocol" docs.
|
||||
#[allow(clippy::all, dead_code, non_camel_case_types, non_snake_case, unused)]
|
||||
pub mod zkde {
|
||||
use wayland_client;
|
||||
use wayland_client::protocol::*;
|
||||
|
||||
pub mod __interfaces {
|
||||
use wayland_client::protocol::__interfaces::*;
|
||||
wayland_scanner::generate_interfaces!("protocols/zkde-screencast-unstable-v1.xml");
|
||||
}
|
||||
use self::__interfaces::*;
|
||||
|
||||
wayland_scanner::generate_client_code!("protocols/zkde-screencast-unstable-v1.xml");
|
||||
}
|
||||
|
||||
use zkde::zkde_screencast_stream_unstable_v1::{
|
||||
Event as StreamEvent, ZkdeScreencastStreamUnstableV1 as ScreencastStream,
|
||||
};
|
||||
use zkde::zkde_screencast_unstable_v1::ZkdeScreencastUnstableV1 as Screencast;
|
||||
|
||||
/// `pointer` attachment mode (the protocol enum): render the cursor into the stream so the
|
||||
/// remote sees it move with injected input.
|
||||
const POINTER_EMBEDDED: u32 = 2;
|
||||
|
||||
/// The name we give the created output; KWin exposes it to output-management as `Virtual-<name>`.
|
||||
const VOUT_NAME: &str = "punktfunk";
|
||||
|
||||
/// Highest interface version we drive. KWin currently advertises 5; we rely on the `created`
|
||||
/// event (deprecated only since v6) for the node id, so cap the bind at 5.
|
||||
const MAX_VERSION: u32 = 5;
|
||||
|
||||
/// The KWin virtual-display driver. Stateless — each [`create`](VirtualDisplay::create) spins up
|
||||
/// its own Wayland connection/thread that owns the resulting output.
|
||||
pub struct KwinDisplay;
|
||||
|
||||
impl KwinDisplay {
|
||||
pub fn new() -> Result<Self> {
|
||||
Ok(KwinDisplay)
|
||||
}
|
||||
}
|
||||
|
||||
impl VirtualDisplay for KwinDisplay {
|
||||
fn name(&self) -> &'static str {
|
||||
"kwin"
|
||||
}
|
||||
|
||||
fn create(&mut self, mode: Mode) -> Result<VirtualOutput> {
|
||||
let (setup_tx, setup_rx) = std::sync::mpsc::channel::<Result<u32, String>>();
|
||||
let stop = Arc::new(AtomicBool::new(false));
|
||||
let stop_thread = stop.clone();
|
||||
let (width, height) = (mode.width, mode.height);
|
||||
thread::Builder::new()
|
||||
.name("punktfunk-kwin-vout".into())
|
||||
.spawn(move || virtual_output_thread(width, height, setup_tx, stop_thread))
|
||||
.context("spawn KWin virtual-output thread")?;
|
||||
|
||||
let node_id = match setup_rx.recv_timeout(Duration::from_secs(20)) {
|
||||
Ok(Ok(v)) => v,
|
||||
Ok(Err(e)) => bail!("KWin virtual output failed: {e}"),
|
||||
Err(_) => bail!("timed out creating the KWin virtual output"),
|
||||
};
|
||||
tracing::info!(node_id, width, height, "KWin virtual output ready");
|
||||
// KWin creates virtual outputs at a hardcoded 60 Hz and `stream_virtual_output` has no
|
||||
// refresh argument, so when the client wants more we install + select a custom mode
|
||||
// (supported on virtual outputs since KWin 6.6). Done before capture connects PipeWire so
|
||||
// the stream negotiates at the higher rate. First cut shells out to kscreen-doctor; the
|
||||
// in-process kde_output_management_v2 client is a follow-up.
|
||||
if mode.refresh_hz > 60 {
|
||||
set_custom_refresh(width, height, mode.refresh_hz);
|
||||
}
|
||||
Ok(VirtualOutput {
|
||||
node_id,
|
||||
remote_fd: None,
|
||||
preferred_mode: Some((mode.width, mode.height, mode.refresh_hz)),
|
||||
keepalive: Box::new(StopGuard(stop)),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Best-effort: raise the just-created virtual output's refresh above KWin's default 60 Hz by
|
||||
/// installing + selecting a custom mode via `kscreen-doctor` (the output is `Virtual-<VOUT_NAME>`,
|
||||
/// refresh given in mHz). Failure leaves the source at 60 Hz — the stream still works, just capped.
|
||||
fn set_custom_refresh(width: u32, height: u32, hz: u32) {
|
||||
let output = format!("Virtual-{VOUT_NAME}");
|
||||
let mhz = hz.saturating_mul(1000);
|
||||
let run = |arg: String| {
|
||||
std::process::Command::new("kscreen-doctor")
|
||||
.arg(arg)
|
||||
.status()
|
||||
.map(|s| s.success())
|
||||
.unwrap_or(false)
|
||||
};
|
||||
// Add the custom mode (a fresh output has none), then select it.
|
||||
let _ = run(format!(
|
||||
"output.{output}.addCustomMode.{width}.{height}.{mhz}.full"
|
||||
));
|
||||
if run(format!("output.{output}.mode.{width}x{height}@{hz}")) {
|
||||
tracing::info!(output, hz, "KWin virtual output: custom refresh applied");
|
||||
} else {
|
||||
tracing::warn!(
|
||||
output,
|
||||
hz,
|
||||
"kscreen-doctor refresh set failed — source stays 60 Hz (is kscreen-doctor installed?)"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// Dropping this releases the KWin virtual output: it flips the keepalive thread's `stop`, which
|
||||
/// drops the Wayland connection and makes KWin reclaim the output.
|
||||
struct StopGuard(Arc<AtomicBool>);
|
||||
|
||||
impl Drop for StopGuard {
|
||||
fn drop(&mut self) {
|
||||
self.0.store(true, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
struct State {
|
||||
screencast: Option<Screencast>,
|
||||
node_id: Option<u32>,
|
||||
failed: Option<String>,
|
||||
closed: bool,
|
||||
}
|
||||
|
||||
impl Dispatch<WlRegistry, ()> for State {
|
||||
fn event(
|
||||
state: &mut Self,
|
||||
registry: &WlRegistry,
|
||||
event: wl_registry::Event,
|
||||
_: &(),
|
||||
_: &Connection,
|
||||
qh: &QueueHandle<Self>,
|
||||
) {
|
||||
if let wl_registry::Event::Global {
|
||||
name,
|
||||
interface,
|
||||
version,
|
||||
} = event
|
||||
{
|
||||
if interface == Screencast::interface().name {
|
||||
let v = version.min(MAX_VERSION);
|
||||
state.screencast = Some(registry.bind::<Screencast, _, _>(name, v, qh, ()));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// The manager has no events.
|
||||
impl Dispatch<Screencast, ()> for State {
|
||||
fn event(
|
||||
_: &mut Self,
|
||||
_: &Screencast,
|
||||
_: zkde::zkde_screencast_unstable_v1::Event,
|
||||
_: &(),
|
||||
_: &Connection,
|
||||
_: &QueueHandle<Self>,
|
||||
) {
|
||||
}
|
||||
}
|
||||
|
||||
impl Dispatch<ScreencastStream, ()> for State {
|
||||
fn event(
|
||||
state: &mut Self,
|
||||
_: &ScreencastStream,
|
||||
event: StreamEvent,
|
||||
_: &(),
|
||||
_: &Connection,
|
||||
_: &QueueHandle<Self>,
|
||||
) {
|
||||
match event {
|
||||
StreamEvent::Created { node } => state.node_id = Some(node),
|
||||
StreamEvent::Failed { error } => state.failed = Some(error),
|
||||
StreamEvent::Closed => state.closed = true,
|
||||
// `serial` (v6) — we use the node id from `created`, so ignore.
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Worker thread: create a `width`x`height` virtual output on KWin, send its PipeWire node id
|
||||
/// back over `setup_tx`, then keep the Wayland connection alive (so the output isn't destroyed)
|
||||
/// until `stop` is set. Mirrors the portal thread's "park to keep the session alive".
|
||||
fn virtual_output_thread(
|
||||
width: u32,
|
||||
height: u32,
|
||||
setup_tx: Sender<Result<u32, String>>,
|
||||
stop: Arc<AtomicBool>,
|
||||
) {
|
||||
if let Err(e) = run(width, height, &setup_tx, &stop) {
|
||||
// If we never delivered a node id, report the failure to the waiting opener.
|
||||
let _ = setup_tx.send(Err(format!("{e:#}")));
|
||||
}
|
||||
}
|
||||
|
||||
fn run(
|
||||
width: u32,
|
||||
height: u32,
|
||||
setup_tx: &Sender<Result<u32, String>>,
|
||||
stop: &AtomicBool,
|
||||
) -> Result<()> {
|
||||
let conn = Connection::connect_to_env()
|
||||
.context("connect to KWin Wayland (is WAYLAND_DISPLAY set to the KWin socket?)")?;
|
||||
let mut queue = conn.new_event_queue();
|
||||
let qh = queue.handle();
|
||||
let _registry = conn.display().get_registry(&qh, ());
|
||||
|
||||
let mut state = State::default();
|
||||
queue.roundtrip(&mut state).context("registry roundtrip")?;
|
||||
|
||||
let screencast = state.screencast.clone().ok_or_else(|| {
|
||||
anyhow!(
|
||||
"KWin does not expose zkde_screencast_unstable_v1 (need a real KDE session, or run \
|
||||
KWin with KWIN_WAYLAND_NO_PERMISSION_CHECKS=1 for the headless test)"
|
||||
)
|
||||
})?;
|
||||
|
||||
// Create the virtual output sized to the client, cursor composited into the stream.
|
||||
let stream = screencast.stream_virtual_output(
|
||||
VOUT_NAME.to_string(),
|
||||
width as i32,
|
||||
height as i32,
|
||||
1.0, // scale (logical == physical)
|
||||
POINTER_EMBEDDED,
|
||||
&qh,
|
||||
(),
|
||||
);
|
||||
tracing::info!(
|
||||
width,
|
||||
height,
|
||||
"KWin: requested virtual output; awaiting PipeWire node"
|
||||
);
|
||||
|
||||
// Pump events until KWin reports the node id (or an error).
|
||||
let node_id = loop {
|
||||
queue
|
||||
.blocking_dispatch(&mut state)
|
||||
.context("wayland dispatch (awaiting created)")?;
|
||||
if let Some(node) = state.node_id {
|
||||
break node;
|
||||
}
|
||||
if let Some(e) = state.failed.take() {
|
||||
bail!("stream_virtual_output failed: {e}");
|
||||
}
|
||||
if state.closed {
|
||||
bail!("KWin closed the stream before it was created");
|
||||
}
|
||||
};
|
||||
setup_tx
|
||||
.send(Ok(node_id))
|
||||
.map_err(|_| anyhow!("virtual-output opener went away"))?;
|
||||
|
||||
// Keep the connection (and thus the virtual output) alive until told to stop, observing
|
||||
// `closed`. blocking_dispatch can't be interrupted, so poll the connection fd with a short
|
||||
// timeout so `stop` is honored within ~200 ms.
|
||||
while !stop.load(Ordering::Relaxed) {
|
||||
queue
|
||||
.dispatch_pending(&mut state)
|
||||
.context("dispatch_pending")?;
|
||||
if state.closed {
|
||||
tracing::warn!("KWin closed the virtual-output stream");
|
||||
break;
|
||||
}
|
||||
conn.flush().context("wayland flush")?;
|
||||
let Some(guard) = conn.prepare_read() else {
|
||||
continue; // events already queued — loop dispatches them
|
||||
};
|
||||
let mut pfd = libc::pollfd {
|
||||
fd: conn.as_fd().as_raw_fd(),
|
||||
events: libc::POLLIN,
|
||||
revents: 0,
|
||||
};
|
||||
let r = unsafe { libc::poll(&mut pfd, 1, 200) };
|
||||
if r > 0 && (pfd.revents & libc::POLLIN) != 0 {
|
||||
let _ = guard.read();
|
||||
} // else: timeout or signal — drop the guard, re-check `stop`
|
||||
}
|
||||
|
||||
// Best-effort clean teardown; dropping the connection also makes KWin reclaim the output.
|
||||
stream.close();
|
||||
let _ = conn.flush();
|
||||
Ok(())
|
||||
}
|
||||
@@ -0,0 +1,226 @@
|
||||
//! GNOME/Mutter virtual-display backend via Mutter's *direct* D-Bus APIs (the same path
|
||||
//! gnome-remote-desktop uses for headless sessions — not the xdg portal, which needs an
|
||||
//! interactive grant):
|
||||
//!
|
||||
//! 1. `org.gnome.Mutter.RemoteDesktop.CreateSession()` → a remote-desktop session (read its
|
||||
//! `SessionId`). The cast is anchored to it, and it's also the future input path.
|
||||
//! 2. `org.gnome.Mutter.ScreenCast.CreateSession({"remote-desktop-session-id": id})`.
|
||||
//! 3. `ScreenCast.Session.RecordVirtual({"cursor-mode": embedded})` → Mutter creates a **virtual
|
||||
//! monitor** and returns a Stream object.
|
||||
//! 4. `RemoteDesktop.Session.Start()` → the Stream signals `PipeWireStreamAdded(node_id)`.
|
||||
//!
|
||||
//! The virtual monitor's *size* follows the PipeWire format negotiation — Mutter adapts it to
|
||||
//! what the consumer asks for — so the client's exact WxH is plumbed into our consumer's format
|
||||
//! pod as the preferred size ([`VirtualOutput::preferred_mode`]) rather than passed here.
|
||||
//! Sessions die with the D-Bus connection, so a keepalive thread owns it (RAII teardown).
|
||||
//!
|
||||
//! Requires a running Mutter (`gnome-shell` session, or `gnome-shell --headless` for the
|
||||
//! headless host) on the session bus. GNOME is detected via `XDG_CURRENT_DESKTOP=GNOME` or
|
||||
//! forced with `PUNKTFUNK_COMPOSITOR=mutter`.
|
||||
|
||||
use super::{Mode, VirtualDisplay, VirtualOutput};
|
||||
use anyhow::{anyhow, bail, Context, Result};
|
||||
use ashpd::zbus;
|
||||
use futures_util::StreamExt;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::sync::mpsc::Sender;
|
||||
use std::sync::Arc;
|
||||
use std::thread;
|
||||
use std::time::Duration;
|
||||
use zbus::zvariant::{OwnedObjectPath, Value};
|
||||
|
||||
const BUS_RD: &str = "org.gnome.Mutter.RemoteDesktop";
|
||||
const BUS_SC: &str = "org.gnome.Mutter.ScreenCast";
|
||||
|
||||
/// Mutter cursor mode: render the cursor into the stream (matches the KWin/gamescope backends).
|
||||
const CURSOR_EMBEDDED: u32 = 1;
|
||||
|
||||
/// The Mutter virtual-display driver. Each [`create`](VirtualDisplay::create) spins up a
|
||||
/// keepalive thread owning the D-Bus sessions behind the virtual monitor.
|
||||
pub struct MutterDisplay;
|
||||
|
||||
impl MutterDisplay {
|
||||
pub fn new() -> Result<Self> {
|
||||
Ok(MutterDisplay)
|
||||
}
|
||||
}
|
||||
|
||||
impl VirtualDisplay for MutterDisplay {
|
||||
fn name(&self) -> &'static str {
|
||||
"mutter"
|
||||
}
|
||||
|
||||
fn create(&mut self, mode: Mode) -> Result<VirtualOutput> {
|
||||
let (setup_tx, setup_rx) = std::sync::mpsc::channel::<Result<u32, String>>();
|
||||
let stop = Arc::new(AtomicBool::new(false));
|
||||
let stop_thread = stop.clone();
|
||||
thread::Builder::new()
|
||||
.name("punktfunk-mutter-vout".into())
|
||||
.spawn(move || session_thread(setup_tx, stop_thread))
|
||||
.context("spawn Mutter virtual-output thread")?;
|
||||
|
||||
let node_id = match setup_rx.recv_timeout(Duration::from_secs(20)) {
|
||||
Ok(Ok(v)) => v,
|
||||
Ok(Err(e)) => bail!("Mutter virtual monitor failed: {e}"),
|
||||
Err(_) => bail!("timed out creating the Mutter virtual monitor"),
|
||||
};
|
||||
tracing::info!(
|
||||
node_id,
|
||||
w = mode.width,
|
||||
h = mode.height,
|
||||
"Mutter virtual monitor ready"
|
||||
);
|
||||
Ok(VirtualOutput {
|
||||
node_id,
|
||||
remote_fd: None,
|
||||
preferred_mode: Some((mode.width, mode.height, mode.refresh_hz)),
|
||||
keepalive: Box::new(StopGuard(stop)),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Dropping this ends the keepalive thread, closing the D-Bus connection — Mutter then tears
|
||||
/// the remote-desktop + screencast sessions (and the virtual monitor) down.
|
||||
struct StopGuard(Arc<AtomicBool>);
|
||||
|
||||
impl Drop for StopGuard {
|
||||
fn drop(&mut self) {
|
||||
self.0.store(true, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
|
||||
/// Keepalive thread: run the D-Bus handshake on a private tokio runtime, report the PipeWire
|
||||
/// node id, then hold the connection until stopped.
|
||||
fn session_thread(setup_tx: Sender<Result<u32, String>>, stop: Arc<AtomicBool>) {
|
||||
let rt = match tokio::runtime::Builder::new_multi_thread()
|
||||
.worker_threads(1)
|
||||
.enable_all()
|
||||
.build()
|
||||
{
|
||||
Ok(rt) => rt,
|
||||
Err(e) => {
|
||||
let _ = setup_tx.send(Err(format!("build tokio runtime: {e}")));
|
||||
return;
|
||||
}
|
||||
};
|
||||
rt.block_on(async move {
|
||||
let session = match connect().await {
|
||||
Ok(s) => s,
|
||||
Err(e) => {
|
||||
let _ = setup_tx.send(Err(format!("{e:#}")));
|
||||
return;
|
||||
}
|
||||
};
|
||||
let _ = setup_tx.send(Ok(session.node_id));
|
||||
// Park, keeping `session` (and its zbus connection) alive until told to stop.
|
||||
while !stop.load(Ordering::Relaxed) {
|
||||
tokio::time::sleep(Duration::from_millis(200)).await;
|
||||
}
|
||||
// Best-effort explicit teardown before the connection drops.
|
||||
let _ = session.rd_session.call_method("Stop", &()).await;
|
||||
});
|
||||
}
|
||||
|
||||
/// The live session objects (held for the stream's lifetime) + the PipeWire node id.
|
||||
struct MutterSession {
|
||||
rd_session: zbus::Proxy<'static>,
|
||||
_sc_session: zbus::Proxy<'static>,
|
||||
_conn: zbus::Connection,
|
||||
node_id: u32,
|
||||
}
|
||||
|
||||
/// Run the four-step handshake (see module docs).
|
||||
async fn connect() -> Result<MutterSession> {
|
||||
let conn = zbus::Connection::session()
|
||||
.await
|
||||
.context("connect session D-Bus")?;
|
||||
|
||||
// 1. RemoteDesktop session (the anchor; also the future input path).
|
||||
let rd = zbus::Proxy::new(
|
||||
&conn,
|
||||
BUS_RD,
|
||||
"/org/gnome/Mutter/RemoteDesktop",
|
||||
"org.gnome.Mutter.RemoteDesktop",
|
||||
)
|
||||
.await
|
||||
.context("RemoteDesktop proxy (is gnome-shell / `gnome-shell --headless` running?)")?;
|
||||
let rd_path: OwnedObjectPath = rd
|
||||
.call("CreateSession", &())
|
||||
.await
|
||||
.context("RemoteDesktop.CreateSession")?;
|
||||
let rd_session = zbus::Proxy::new(
|
||||
&conn,
|
||||
BUS_RD,
|
||||
rd_path,
|
||||
"org.gnome.Mutter.RemoteDesktop.Session",
|
||||
)
|
||||
.await?;
|
||||
let session_id: String = rd_session
|
||||
.get_property("SessionId")
|
||||
.await
|
||||
.context("read SessionId")?;
|
||||
|
||||
// 2. ScreenCast session anchored to it.
|
||||
let sc = zbus::Proxy::new(
|
||||
&conn,
|
||||
BUS_SC,
|
||||
"/org/gnome/Mutter/ScreenCast",
|
||||
"org.gnome.Mutter.ScreenCast",
|
||||
)
|
||||
.await
|
||||
.context("ScreenCast proxy")?;
|
||||
let mut props: HashMap<&str, Value> = HashMap::new();
|
||||
props.insert("remote-desktop-session-id", Value::from(session_id));
|
||||
let sc_path: OwnedObjectPath = sc
|
||||
.call("CreateSession", &(props,))
|
||||
.await
|
||||
.context("ScreenCast.CreateSession")?;
|
||||
let sc_session = zbus::Proxy::new(
|
||||
&conn,
|
||||
BUS_SC,
|
||||
sc_path,
|
||||
"org.gnome.Mutter.ScreenCast.Session",
|
||||
)
|
||||
.await?;
|
||||
|
||||
// 3. The virtual monitor. Size/refresh follow the PipeWire format negotiation.
|
||||
let mut rec: HashMap<&str, Value> = HashMap::new();
|
||||
rec.insert("cursor-mode", Value::from(CURSOR_EMBEDDED));
|
||||
let stream_path: OwnedObjectPath = sc_session
|
||||
.call("RecordVirtual", &(rec,))
|
||||
.await
|
||||
.context("Session.RecordVirtual")?;
|
||||
let stream = zbus::Proxy::new(
|
||||
&conn,
|
||||
BUS_SC,
|
||||
stream_path,
|
||||
"org.gnome.Mutter.ScreenCast.Stream",
|
||||
)
|
||||
.await?;
|
||||
|
||||
// 4. Subscribe to the node-id signal BEFORE starting, then start the (combined) session.
|
||||
let mut added = stream
|
||||
.receive_signal("PipeWireStreamAdded")
|
||||
.await
|
||||
.context("subscribe PipeWireStreamAdded")?;
|
||||
rd_session
|
||||
.call_method("Start", &())
|
||||
.await
|
||||
.context("RemoteDesktop.Session.Start")?;
|
||||
let msg = tokio::time::timeout(Duration::from_secs(10), added.next())
|
||||
.await
|
||||
.map_err(|_| anyhow!("PipeWireStreamAdded did not arrive within 10s"))?
|
||||
.ok_or_else(|| anyhow!("signal stream ended before PipeWireStreamAdded"))?;
|
||||
let (node_id,): (u32,) = msg
|
||||
.body()
|
||||
.deserialize()
|
||||
.context("PipeWireStreamAdded body")?;
|
||||
|
||||
Ok(MutterSession {
|
||||
rd_session,
|
||||
_sc_session: sc_session,
|
||||
_conn: conn,
|
||||
node_id,
|
||||
})
|
||||
}
|
||||
@@ -0,0 +1,509 @@
|
||||
//! Minimal CUDA Driver API FFI for the zero-copy path. No Rust crate exposes the GL-interop
|
||||
//! driver calls we need (`cuGraphicsGLRegisterImage` & co.), so we hand-roll exactly those and
|
||||
//! link `libcuda.so.1` (the driver library — NOT `libcudart`). Symbol names verified against
|
||||
//! `cust_raw` + `cudaGL.h`: the context/mem ops use the `_v2` ABI suffix; the graphics-interop
|
||||
//! ops are unsuffixed. (We use GL interop, not EGL interop: `cuGraphicsEGLRegisterImage` is
|
||||
//! Tegra-only on the desktop driver — see [`super::egl`].)
|
||||
//!
|
||||
//! One process-wide `CUcontext` is created lazily and shared by the EGL importer (capture
|
||||
//! thread) and ffmpeg's `hevc_nvenc` (encode thread); each thread makes it current before use.
|
||||
|
||||
#![allow(non_camel_case_types, non_snake_case)]
|
||||
|
||||
use anyhow::{bail, Result};
|
||||
use std::os::raw::{c_int, c_uint, c_void};
|
||||
use std::sync::{Arc, Mutex, OnceLock};
|
||||
|
||||
pub type CUresult = c_uint; // CUDA_SUCCESS == 0
|
||||
pub type CUdevice = c_int;
|
||||
pub type CUcontext = *mut c_void; // opaque CUctx_st*
|
||||
pub type CUstream = *mut c_void; // opaque CUstream_st*
|
||||
pub type CUdeviceptr = u64;
|
||||
pub type CUgraphicsResource = *mut c_void;
|
||||
pub type CUarray = *mut c_void;
|
||||
pub type CUexternalMemory = *mut c_void; // opaque CUextMemory_st*
|
||||
|
||||
/// `CUmemorytype` (cuda.h): HOST=1, DEVICE=2, ARRAY=3, UNIFIED=4.
|
||||
pub const CU_MEMORYTYPE_DEVICE: c_uint = 2;
|
||||
pub const CU_MEMORYTYPE_ARRAY: c_uint = 3;
|
||||
|
||||
/// `CUDA_MEMCPY2D` (cuda.h, `_v2` ABI). Field order is load-bearing.
|
||||
#[repr(C)]
|
||||
#[derive(Default)]
|
||||
pub struct CUDA_MEMCPY2D {
|
||||
pub srcXInBytes: usize,
|
||||
pub srcY: usize,
|
||||
pub srcMemoryType: c_uint,
|
||||
pub srcHost: *const c_void,
|
||||
pub srcDevice: CUdeviceptr,
|
||||
pub srcArray: CUarray,
|
||||
pub srcPitch: usize,
|
||||
pub dstXInBytes: usize,
|
||||
pub dstY: usize,
|
||||
pub dstMemoryType: c_uint,
|
||||
pub dstHost: *mut c_void,
|
||||
pub dstDevice: CUdeviceptr,
|
||||
pub dstArray: CUarray,
|
||||
pub dstPitch: usize,
|
||||
pub WidthInBytes: usize,
|
||||
pub Height: usize,
|
||||
}
|
||||
|
||||
/// `CUDA_EXTERNAL_MEMORY_HANDLE_DESC` (cuda.h, 64-bit layout). `handle` is a union whose
|
||||
/// largest member is the win32 two-pointer struct (16 bytes, align 8); for the OPAQUE_FD type
|
||||
/// only the first 4 bytes (the `int fd`) are read.
|
||||
#[repr(C)]
|
||||
#[derive(Default)]
|
||||
pub struct CUDA_EXTERNAL_MEMORY_HANDLE_DESC {
|
||||
pub type_: c_uint, // CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD = 1
|
||||
_pad: u32,
|
||||
pub handle: [u64; 2], // union { int fd; {void*,void*} win32; void* nvSciBufObject }
|
||||
pub size: u64,
|
||||
pub flags: c_uint,
|
||||
reserved: [c_uint; 16],
|
||||
_pad2: u32,
|
||||
}
|
||||
|
||||
/// `CUDA_EXTERNAL_MEMORY_BUFFER_DESC` (cuda.h, 64-bit layout).
|
||||
#[repr(C)]
|
||||
#[derive(Default)]
|
||||
pub struct CUDA_EXTERNAL_MEMORY_BUFFER_DESC {
|
||||
pub offset: u64,
|
||||
pub size: u64,
|
||||
pub flags: c_uint,
|
||||
reserved: [c_uint; 16],
|
||||
_pad: u32,
|
||||
}
|
||||
|
||||
pub const CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD: c_uint = 1;
|
||||
|
||||
#[link(name = "cuda")]
|
||||
extern "C" {
|
||||
fn cuInit(flags: c_uint) -> CUresult;
|
||||
fn cuDeviceGet(device: *mut CUdevice, ordinal: c_int) -> CUresult;
|
||||
fn cuCtxCreate_v2(pctx: *mut CUcontext, flags: c_uint, dev: CUdevice) -> CUresult;
|
||||
fn cuCtxSetCurrent(ctx: CUcontext) -> CUresult;
|
||||
fn cuMemAllocPitch_v2(
|
||||
dptr: *mut CUdeviceptr,
|
||||
pitch: *mut usize,
|
||||
width_bytes: usize,
|
||||
height: usize,
|
||||
element_size: c_uint,
|
||||
) -> CUresult;
|
||||
fn cuMemFree_v2(dptr: CUdeviceptr) -> CUresult;
|
||||
fn cuMemcpy2D_v2(copy: *const CUDA_MEMCPY2D) -> CUresult;
|
||||
fn cuCtxSynchronize() -> CUresult;
|
||||
|
||||
// GL interop (cudaGL.h) — these symbols have NO `_v2` suffix. `cuGraphicsEGLRegisterImage`
|
||||
// is Tegra-only on the desktop driver, so we go EGLImage → GL texture → register the texture.
|
||||
fn cuGraphicsGLRegisterImage(
|
||||
resource: *mut CUgraphicsResource,
|
||||
texture: c_uint, // GLuint
|
||||
target: c_uint, // GL_TEXTURE_2D = 0x0DE1
|
||||
flags: c_uint, // CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY = 0x01
|
||||
) -> CUresult;
|
||||
fn cuGraphicsMapResources(
|
||||
count: c_uint,
|
||||
resources: *mut CUgraphicsResource,
|
||||
stream: *mut c_void,
|
||||
) -> CUresult;
|
||||
fn cuGraphicsUnmapResources(
|
||||
count: c_uint,
|
||||
resources: *mut CUgraphicsResource,
|
||||
stream: *mut c_void,
|
||||
) -> CUresult;
|
||||
fn cuGraphicsSubResourceGetMappedArray(
|
||||
array: *mut CUarray,
|
||||
resource: CUgraphicsResource,
|
||||
array_index: c_uint,
|
||||
mip_level: c_uint,
|
||||
) -> CUresult;
|
||||
fn cuGraphicsUnregisterResource(resource: CUgraphicsResource) -> CUresult;
|
||||
|
||||
// External memory (cuda.h, no `_v2` suffix) — imports a (Vulkan-exported) dmabuf fd as
|
||||
// device memory. Used for LINEAR dmabufs (gamescope), which EGL/GL interop can't sample.
|
||||
fn cuImportExternalMemory(
|
||||
ext_mem_out: *mut CUexternalMemory,
|
||||
mem_handle_desc: *const CUDA_EXTERNAL_MEMORY_HANDLE_DESC,
|
||||
) -> CUresult;
|
||||
fn cuExternalMemoryGetMappedBuffer(
|
||||
dev_ptr: *mut CUdeviceptr,
|
||||
ext_mem: CUexternalMemory,
|
||||
buffer_desc: *const CUDA_EXTERNAL_MEMORY_BUFFER_DESC,
|
||||
) -> CUresult;
|
||||
fn cuDestroyExternalMemory(ext_mem: CUexternalMemory) -> CUresult;
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ck(r: CUresult, what: &str) -> Result<()> {
|
||||
if r == 0 {
|
||||
Ok(())
|
||||
} else {
|
||||
bail!("CUDA driver error {r} in {what}")
|
||||
}
|
||||
}
|
||||
|
||||
/// The shared process-wide CUDA context (created once). Wrapped so it's `Send`/`Sync` to live
|
||||
/// in a `OnceLock`; the raw `CUcontext` is thread-safe to make current from any thread.
|
||||
#[derive(Clone, Copy)]
|
||||
pub struct Context(pub CUcontext);
|
||||
unsafe impl Send for Context {}
|
||||
unsafe impl Sync for Context {}
|
||||
|
||||
static CONTEXT: OnceLock<Context> = OnceLock::new();
|
||||
|
||||
/// Get (lazily creating) the shared CUDA context on device 0.
|
||||
pub fn context() -> Result<CUcontext> {
|
||||
if let Some(c) = CONTEXT.get() {
|
||||
return Ok(c.0);
|
||||
}
|
||||
let ctx = unsafe {
|
||||
ck(cuInit(0), "cuInit")?;
|
||||
let mut dev: CUdevice = 0;
|
||||
ck(cuDeviceGet(&mut dev, 0), "cuDeviceGet")?;
|
||||
let mut ctx: CUcontext = std::ptr::null_mut();
|
||||
ck(cuCtxCreate_v2(&mut ctx, 0, dev), "cuCtxCreate_v2")?;
|
||||
ctx
|
||||
};
|
||||
// Racy first-init is fine: the winner's context is used; a loser leaks one context (rare,
|
||||
// process-lifetime). `get_or_init` keeps a single shared value.
|
||||
Ok(CONTEXT.get_or_init(|| Context(ctx)).0)
|
||||
}
|
||||
|
||||
/// Make the shared context current on the calling thread (required before any CUDA op here).
|
||||
pub fn make_current() -> Result<()> {
|
||||
let ctx = context()?;
|
||||
unsafe { ck(cuCtxSetCurrent(ctx), "cuCtxSetCurrent") }
|
||||
}
|
||||
|
||||
/// Allocate one pitched device buffer for `width`x`height` 4-byte pixels; returns `(ptr, pitch)`.
|
||||
fn alloc_pitched(width: u32, height: u32) -> Result<(CUdeviceptr, usize)> {
|
||||
let mut ptr: CUdeviceptr = 0;
|
||||
let mut pitch: usize = 0;
|
||||
unsafe {
|
||||
ck(
|
||||
cuMemAllocPitch_v2(
|
||||
&mut ptr,
|
||||
&mut pitch,
|
||||
width as usize * 4,
|
||||
height as usize,
|
||||
16,
|
||||
),
|
||||
"cuMemAllocPitch_v2",
|
||||
)?;
|
||||
}
|
||||
Ok((ptr, pitch))
|
||||
}
|
||||
|
||||
/// Free-list of recycled device allocations for one resolution. Shared (via `Arc`) between the
|
||||
/// capture thread that hands out buffers and the encode thread where a [`DeviceBuffer`] drops and
|
||||
/// returns its allocation here. Bulk-freed when the last reference drops.
|
||||
struct PoolInner {
|
||||
free: Vec<CUdeviceptr>,
|
||||
}
|
||||
|
||||
impl Drop for PoolInner {
|
||||
fn drop(&mut self) {
|
||||
unsafe {
|
||||
if let Some(c) = CONTEXT.get() {
|
||||
let _ = cuCtxSetCurrent(c.0);
|
||||
}
|
||||
for &p in &self.free {
|
||||
let _ = cuMemFree_v2(p);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A pool of reusable pitched device buffers for a fixed resolution. Eliminates the per-frame
|
||||
/// `cuMemAllocPitch`/`cuMemFree` (a ~29 MB allocation at 5K) that takes the device allocator lock
|
||||
/// and serializes against the GPU every frame.
|
||||
#[derive(Clone)]
|
||||
pub struct BufferPool {
|
||||
inner: Arc<Mutex<PoolInner>>,
|
||||
width: u32,
|
||||
height: u32,
|
||||
pitch: usize,
|
||||
}
|
||||
|
||||
impl BufferPool {
|
||||
/// Create a pool for `width`x`height` 4-byte buffers (allocates one up front to learn the
|
||||
/// driver's pitch, which is constant for a given width).
|
||||
pub fn new(width: u32, height: u32) -> Result<BufferPool> {
|
||||
let (ptr, pitch) = alloc_pitched(width, height)?;
|
||||
Ok(BufferPool {
|
||||
inner: Arc::new(Mutex::new(PoolInner { free: vec![ptr] })),
|
||||
width,
|
||||
height,
|
||||
pitch,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn width(&self) -> u32 {
|
||||
self.width
|
||||
}
|
||||
|
||||
pub fn height(&self) -> u32 {
|
||||
self.height
|
||||
}
|
||||
|
||||
/// Take a buffer — recycled if one is free, else freshly allocated. The buffer returns to this
|
||||
/// pool when dropped (after the consumer has synchronized, so the GPU is done with it).
|
||||
pub fn get(&self) -> Result<DeviceBuffer> {
|
||||
let reuse = self.inner.lock().unwrap().free.pop();
|
||||
let ptr = match reuse {
|
||||
Some(p) => p,
|
||||
None => alloc_pitched(self.width, self.height)?.0,
|
||||
};
|
||||
Ok(DeviceBuffer {
|
||||
ptr,
|
||||
pitch: self.pitch,
|
||||
width: self.width,
|
||||
height: self.height,
|
||||
pool: Some(self.inner.clone()),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// A pitched device buffer holding one captured frame. Filled by a copy from the EGL-mapped
|
||||
/// dmabuf (so the dmabuf can be returned to the compositor immediately) and read by the encoder.
|
||||
/// When it came from a [`BufferPool`] it recycles on drop; otherwise it frees.
|
||||
pub struct DeviceBuffer {
|
||||
pub ptr: CUdeviceptr,
|
||||
pub pitch: usize,
|
||||
pub width: u32,
|
||||
pub height: u32,
|
||||
pool: Option<Arc<Mutex<PoolInner>>>,
|
||||
}
|
||||
|
||||
impl DeviceBuffer {
|
||||
/// Allocate a standalone (un-pooled) pitched buffer. Prefer [`BufferPool`] on the hot path.
|
||||
pub fn alloc(width: u32, height: u32) -> Result<DeviceBuffer> {
|
||||
let (ptr, pitch) = alloc_pitched(width, height)?;
|
||||
Ok(DeviceBuffer {
|
||||
ptr,
|
||||
pitch,
|
||||
width,
|
||||
height,
|
||||
pool: None,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for DeviceBuffer {
|
||||
fn drop(&mut self) {
|
||||
if self.ptr == 0 {
|
||||
return;
|
||||
}
|
||||
if let Some(pool) = &self.pool {
|
||||
// Recycle (the consumer synchronized before dropping, so the GPU is done with it).
|
||||
pool.lock().unwrap().free.push(self.ptr);
|
||||
} else {
|
||||
// The buffer may be freed on the encode thread; cuMemFree needs a current context.
|
||||
unsafe {
|
||||
if let Some(c) = CONTEXT.get() {
|
||||
let _ = cuCtxSetCurrent(c.0);
|
||||
}
|
||||
let _ = cuMemFree_v2(self.ptr);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A *persistent* GL-texture→CUDA registration. The desktop NVIDIA driver only supports CUDA
|
||||
/// interop through GL textures (not dmabuf EGLImages directly), so the importer renders the
|
||||
/// dmabuf into a reusable `GL_RGBA8` texture and registers *that* once — then each frame only
|
||||
/// maps → copies the mapped array out → unmaps (the map/unmap pair is the GL↔CUDA sync point),
|
||||
/// instead of registering/unregistering every frame. Unregisters on drop.
|
||||
pub struct RegisteredTexture {
|
||||
resource: CUgraphicsResource,
|
||||
}
|
||||
|
||||
impl RegisteredTexture {
|
||||
/// Register a `GL_TEXTURE_2D` once.
|
||||
///
|
||||
/// # Safety
|
||||
/// The GL context and the shared CUDA context must both be current on this thread, and
|
||||
/// `texture` must be a valid `GL_TEXTURE_2D`.
|
||||
pub unsafe fn register_gl(texture: u32) -> Result<RegisteredTexture> {
|
||||
const GL_TEXTURE_2D: c_uint = 0x0DE1;
|
||||
const CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY: c_uint = 0x01;
|
||||
let mut resource: CUgraphicsResource = std::ptr::null_mut();
|
||||
ck(
|
||||
cuGraphicsGLRegisterImage(
|
||||
&mut resource,
|
||||
texture,
|
||||
GL_TEXTURE_2D,
|
||||
CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY,
|
||||
),
|
||||
"cuGraphicsGLRegisterImage",
|
||||
)?;
|
||||
Ok(RegisteredTexture { resource })
|
||||
}
|
||||
|
||||
/// Map the texture for this frame, copy its (already-linear RGBA8) array into `dst`, then
|
||||
/// unmap. The `cuCtxSynchronize` ensures `dst` is ready before the source dmabuf is recycled.
|
||||
pub fn copy_mapped_to(&mut self, dst: &DeviceBuffer) -> Result<()> {
|
||||
unsafe {
|
||||
ck(
|
||||
cuGraphicsMapResources(1, &mut self.resource, std::ptr::null_mut()),
|
||||
"cuGraphicsMapResources",
|
||||
)?;
|
||||
let mut array: CUarray = std::ptr::null_mut();
|
||||
if cuGraphicsSubResourceGetMappedArray(&mut array, self.resource, 0, 0) != 0 {
|
||||
let _ = cuGraphicsUnmapResources(1, &mut self.resource, std::ptr::null_mut());
|
||||
bail!("cuGraphicsSubResourceGetMappedArray failed");
|
||||
}
|
||||
let copy = CUDA_MEMCPY2D {
|
||||
srcMemoryType: CU_MEMORYTYPE_ARRAY,
|
||||
srcArray: array,
|
||||
dstMemoryType: CU_MEMORYTYPE_DEVICE,
|
||||
dstDevice: dst.ptr,
|
||||
dstPitch: dst.pitch,
|
||||
WidthInBytes: dst.width as usize * 4, // 4 bytes/px (BGRx)
|
||||
Height: dst.height as usize,
|
||||
..Default::default()
|
||||
};
|
||||
let r = cuMemcpy2D_v2(©);
|
||||
let s = cuCtxSynchronize();
|
||||
let _ = cuGraphicsUnmapResources(1, &mut self.resource, std::ptr::null_mut());
|
||||
ck(r, "cuMemcpy2D_v2")?;
|
||||
ck(s, "cuCtxSynchronize")?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Copy a pitched device buffer into another device region (device→device), e.g. our imported
|
||||
/// [`DeviceBuffer`] into a pooled CUDA surface NVENC owns. Both are 4-byte (BGRx) pixels.
|
||||
/// The caller must have the shared context current on this thread (see [`make_current`]).
|
||||
pub fn copy_device_to_device(
|
||||
src: &DeviceBuffer,
|
||||
dst_ptr: CUdeviceptr,
|
||||
dst_pitch: usize,
|
||||
) -> Result<()> {
|
||||
let copy = CUDA_MEMCPY2D {
|
||||
srcMemoryType: CU_MEMORYTYPE_DEVICE,
|
||||
srcDevice: src.ptr,
|
||||
srcPitch: src.pitch,
|
||||
dstMemoryType: CU_MEMORYTYPE_DEVICE,
|
||||
dstDevice: dst_ptr,
|
||||
dstPitch: dst_pitch,
|
||||
WidthInBytes: src.width as usize * 4,
|
||||
Height: src.height as usize,
|
||||
..Default::default()
|
||||
};
|
||||
unsafe {
|
||||
ck(cuMemcpy2D_v2(©), "cuMemcpy2D_v2(dev->dev)")?;
|
||||
ck(cuCtxSynchronize(), "cuCtxSynchronize")?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
impl Drop for RegisteredTexture {
|
||||
fn drop(&mut self) {
|
||||
if !self.resource.is_null() {
|
||||
unsafe {
|
||||
let _ = cuGraphicsUnregisterResource(self.resource);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A dmabuf fd imported as CUDA external memory and mapped to a device pointer — the LINEAR
|
||||
/// path (gamescope): the buffer's bytes are directly addressable, no GL de-tiling needed.
|
||||
/// Cached per PipeWire buffer (the fd pool is stable for a stream's life); destroyed on drop.
|
||||
pub struct ExternalDmabuf {
|
||||
ext: CUexternalMemory,
|
||||
pub ptr: CUdeviceptr,
|
||||
pub size: u64,
|
||||
}
|
||||
|
||||
// Raw driver handles; used from the single capture thread but moved with the importer.
|
||||
unsafe impl Send for ExternalDmabuf {}
|
||||
|
||||
impl ExternalDmabuf {
|
||||
/// Import `fd` (NOT consumed — an internal `dup` is handed to the driver, which owns it
|
||||
/// from then on) and map its full `size` bytes to a device pointer. The shared context
|
||||
/// must be current.
|
||||
pub fn import(fd: i32, size: u64) -> Result<ExternalDmabuf> {
|
||||
let dup = unsafe { libc::dup(fd) };
|
||||
if dup < 0 {
|
||||
bail!("dup(dmabuf fd) failed");
|
||||
}
|
||||
Self::import_owned_fd(dup, size)
|
||||
}
|
||||
|
||||
/// Import an fd the caller hands over (e.g. a Vulkan-exported `OPAQUE_FD`) — consumed by
|
||||
/// the driver on success, closed by us on failure.
|
||||
pub fn import_owned_fd(dup: i32, size: u64) -> Result<ExternalDmabuf> {
|
||||
let mut desc = CUDA_EXTERNAL_MEMORY_HANDLE_DESC {
|
||||
type_: CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD,
|
||||
size,
|
||||
..Default::default()
|
||||
};
|
||||
desc.handle[0] = dup as u32 as u64; // union member `int fd` (little-endian low bytes)
|
||||
let mut ext: CUexternalMemory = std::ptr::null_mut();
|
||||
let r = unsafe { cuImportExternalMemory(&mut ext, &desc) };
|
||||
if r != 0 {
|
||||
unsafe { libc::close(dup) }; // import failed → the driver did not take the fd
|
||||
bail!("cuImportExternalMemory failed ({r}) — LINEAR dmabuf import unsupported?");
|
||||
}
|
||||
let buf = CUDA_EXTERNAL_MEMORY_BUFFER_DESC {
|
||||
offset: 0,
|
||||
size,
|
||||
..Default::default()
|
||||
};
|
||||
let mut ptr: CUdeviceptr = 0;
|
||||
let r = unsafe { cuExternalMemoryGetMappedBuffer(&mut ptr, ext, &buf) };
|
||||
if r != 0 {
|
||||
unsafe {
|
||||
let _ = cuDestroyExternalMemory(ext);
|
||||
}
|
||||
bail!("cuExternalMemoryGetMappedBuffer failed ({r})");
|
||||
}
|
||||
Ok(ExternalDmabuf { ext, ptr, size })
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for ExternalDmabuf {
|
||||
fn drop(&mut self) {
|
||||
unsafe {
|
||||
if let Some(c) = CONTEXT.get() {
|
||||
let _ = cuCtxSetCurrent(c.0);
|
||||
}
|
||||
if self.ptr != 0 {
|
||||
let _ = cuMemFree_v2(self.ptr); // mapped buffers are freed like device memory
|
||||
}
|
||||
if !self.ext.is_null() {
|
||||
let _ = cuDestroyExternalMemory(self.ext);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Copy a pitched span starting at `src_ptr` (e.g. an [`ExternalDmabuf`] mapping at the chunk
|
||||
/// offset) into `dst`. The shared context must be current on this thread.
|
||||
pub fn copy_pitched_to_buffer(
|
||||
src_ptr: CUdeviceptr,
|
||||
src_pitch: usize,
|
||||
dst: &DeviceBuffer,
|
||||
) -> Result<()> {
|
||||
let copy = CUDA_MEMCPY2D {
|
||||
srcMemoryType: CU_MEMORYTYPE_DEVICE,
|
||||
srcDevice: src_ptr,
|
||||
srcPitch: src_pitch,
|
||||
dstMemoryType: CU_MEMORYTYPE_DEVICE,
|
||||
dstDevice: dst.ptr,
|
||||
dstPitch: dst.pitch,
|
||||
WidthInBytes: dst.width as usize * 4,
|
||||
Height: dst.height as usize,
|
||||
..Default::default()
|
||||
};
|
||||
unsafe {
|
||||
ck(cuMemcpy2D_v2(©), "cuMemcpy2D_v2(ext->dev)")?;
|
||||
// The copy must finish before the dmabuf is requeued to the producer.
|
||||
ck(cuCtxSynchronize(), "cuCtxSynchronize")?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
@@ -0,0 +1,528 @@
|
||||
//! EGL side of the zero-copy path: open a headless EGLDisplay on the NVIDIA GPU (GBM platform on
|
||||
//! the render node) and import a PipeWire dmabuf as an `EGLImage` with `EGL_LINUX_DMA_BUF_EXT`.
|
||||
//! The DRM format **modifier** is mandatory on NVIDIA (its buffers are tiled; importing without
|
||||
//! the modifier yields a corrupt image or `EGL_BAD_MATCH`).
|
||||
//!
|
||||
//! Desktop NVIDIA can't register a dmabuf `EGLImage` with CUDA directly — `cuGraphicsEGLRegisterImage`
|
||||
//! is Tegra-only and `cuGraphicsGLRegisterImage` rejects EGLImage-backed textures (their internal
|
||||
//! format is opaque). So we follow OBS/Sunshine: bind the `EGLImage` to a GL texture
|
||||
//! (`glEGLImageTargetTexture2DOES`), render it through a fullscreen-triangle shader into a plain
|
||||
//! immutable `GL_RGBA8` texture (de-tiling and swizzling to the BGRx the encoder wants), then
|
||||
//! register *that* texture with CUDA ([`MappedTexture`]) and copy it device-to-device into an
|
||||
//! owned [`DeviceBuffer`] so the dmabuf can be returned to the compositor immediately.
|
||||
|
||||
#![allow(non_upper_case_globals)]
|
||||
|
||||
use super::cuda::{self, DeviceBuffer};
|
||||
use anyhow::{bail, ensure, Context as _, Result};
|
||||
use khronos_egl as egl;
|
||||
use std::os::raw::{c_int, c_void};
|
||||
|
||||
// EGL_EXT_image_dma_buf_import / _modifiers + platform enums (not defined by khronos-egl).
|
||||
const EGL_LINUX_DMA_BUF_EXT: egl::Enum = 0x3270;
|
||||
const EGL_PLATFORM_GBM_KHR: egl::Enum = 0x31D7;
|
||||
const EGL_LINUX_DRM_FOURCC_EXT: egl::Attrib = 0x3271;
|
||||
const EGL_DMA_BUF_PLANE0_FD_EXT: egl::Attrib = 0x3272;
|
||||
const EGL_DMA_BUF_PLANE0_OFFSET_EXT: egl::Attrib = 0x3273;
|
||||
const EGL_DMA_BUF_PLANE0_PITCH_EXT: egl::Attrib = 0x3274;
|
||||
const EGL_DMA_BUF_PLANE0_MODIFIER_LO_EXT: egl::Attrib = 0x3443;
|
||||
const EGL_DMA_BUF_PLANE0_MODIFIER_HI_EXT: egl::Attrib = 0x3444;
|
||||
|
||||
const GL_TEXTURE_2D: u32 = 0x0DE1;
|
||||
const GL_TEXTURE_MIN_FILTER: u32 = 0x2801;
|
||||
const GL_TEXTURE_MAG_FILTER: u32 = 0x2800;
|
||||
const GL_LINEAR: c_int = 0x2601;
|
||||
const GL_NEAREST: c_int = 0x2600;
|
||||
const GL_RGBA8: u32 = 0x8058;
|
||||
const GL_FRAMEBUFFER: u32 = 0x8D40;
|
||||
const GL_COLOR_ATTACHMENT0: u32 = 0x8CE0;
|
||||
const GL_FRAMEBUFFER_COMPLETE: u32 = 0x8CD5;
|
||||
const GL_TEXTURE0: u32 = 0x84C0;
|
||||
const GL_TRIANGLES: u32 = 0x0004;
|
||||
const GL_VERTEX_SHADER: u32 = 0x8B31;
|
||||
const GL_FRAGMENT_SHADER: u32 = 0x8B30;
|
||||
const GL_COMPILE_STATUS: u32 = 0x8B81;
|
||||
const GL_LINK_STATUS: u32 = 0x8B82;
|
||||
|
||||
// libglvnd's libGL dispatches these to the NVIDIA driver based on the current EGL/GL context.
|
||||
#[link(name = "GL")]
|
||||
extern "C" {
|
||||
fn glGenTextures(n: c_int, textures: *mut u32);
|
||||
fn glBindTexture(target: u32, texture: u32);
|
||||
fn glTexParameteri(target: u32, pname: u32, param: c_int);
|
||||
fn glDeleteTextures(n: c_int, textures: *const u32);
|
||||
fn glTexStorage2D(target: u32, levels: c_int, internalformat: u32, width: c_int, height: c_int);
|
||||
fn glGetError() -> u32;
|
||||
fn glGenFramebuffers(n: c_int, framebuffers: *mut u32);
|
||||
fn glBindFramebuffer(target: u32, framebuffer: u32);
|
||||
fn glFramebufferTexture2D(
|
||||
target: u32,
|
||||
attachment: u32,
|
||||
textarget: u32,
|
||||
texture: u32,
|
||||
level: c_int,
|
||||
);
|
||||
fn glCheckFramebufferStatus(target: u32) -> u32;
|
||||
fn glViewport(x: c_int, y: c_int, width: c_int, height: c_int);
|
||||
fn glGenVertexArrays(n: c_int, arrays: *mut u32);
|
||||
fn glBindVertexArray(array: u32);
|
||||
fn glDrawArrays(mode: u32, first: c_int, count: c_int);
|
||||
fn glActiveTexture(texture: u32);
|
||||
fn glUseProgram(program: u32);
|
||||
fn glFlush();
|
||||
fn glCreateShader(shader_type: u32) -> u32;
|
||||
fn glShaderSource(shader: u32, count: c_int, string: *const *const i8, length: *const c_int);
|
||||
fn glCompileShader(shader: u32);
|
||||
fn glGetShaderiv(shader: u32, pname: u32, params: *mut c_int);
|
||||
fn glDeleteShader(shader: u32);
|
||||
fn glCreateProgram() -> u32;
|
||||
fn glAttachShader(program: u32, shader: u32);
|
||||
fn glLinkProgram(program: u32);
|
||||
fn glGetProgramiv(program: u32, pname: u32, params: *mut c_int);
|
||||
fn glGetUniformLocation(program: u32, name: *const i8) -> c_int;
|
||||
fn glUniform1i(location: c_int, v0: c_int);
|
||||
}
|
||||
|
||||
#[link(name = "gbm")]
|
||||
extern "C" {
|
||||
fn gbm_create_device(fd: c_int) -> *mut c_void;
|
||||
fn gbm_device_destroy(device: *mut c_void);
|
||||
}
|
||||
|
||||
/// `glEGLImageTargetTexture2DOES(target, EGLImage)` — loaded via `eglGetProcAddress`.
|
||||
type EglImageTargetFn = unsafe extern "system" fn(u32, *mut c_void);
|
||||
|
||||
// Fullscreen-triangle blit: sample the dmabuf EGLImage texture and write it (swizzled to BGRA,
|
||||
// to match the BGRx the encoder expects) into a normal GL_RGBA8 texture that CUDA *can* register.
|
||||
const VERT_SRC: &[u8] = b"#version 330 core\nout vec2 v_tex;\nvoid main(){vec2 p=vec2(float((gl_VertexID<<1)&2),float(gl_VertexID&2));v_tex=p;gl_Position=vec4(p*2.0-1.0,0.0,1.0);}\n";
|
||||
const FRAG_SRC: &[u8] = b"#version 330 core\nuniform sampler2D image;\nin vec2 v_tex;\nout vec4 o_color;\nvoid main(){o_color=texture(image,v_tex).bgra;}\n";
|
||||
|
||||
unsafe fn compile_shader(kind: u32, src: &[u8]) -> Result<u32> {
|
||||
let sh = glCreateShader(kind);
|
||||
ensure!(sh != 0, "glCreateShader failed");
|
||||
let ptr = src.as_ptr() as *const i8;
|
||||
let len = src.len() as c_int;
|
||||
glShaderSource(sh, 1, &ptr, &len);
|
||||
glCompileShader(sh);
|
||||
let mut ok: c_int = 0;
|
||||
glGetShaderiv(sh, GL_COMPILE_STATUS, &mut ok);
|
||||
if ok == 0 {
|
||||
glDeleteShader(sh);
|
||||
bail!("GL shader compile failed");
|
||||
}
|
||||
Ok(sh)
|
||||
}
|
||||
|
||||
unsafe fn compile_program() -> Result<u32> {
|
||||
let vs = compile_shader(GL_VERTEX_SHADER, VERT_SRC)?;
|
||||
let fs = compile_shader(GL_FRAGMENT_SHADER, FRAG_SRC)?;
|
||||
let prog = glCreateProgram();
|
||||
glAttachShader(prog, vs);
|
||||
glAttachShader(prog, fs);
|
||||
glLinkProgram(prog);
|
||||
glDeleteShader(vs);
|
||||
glDeleteShader(fs);
|
||||
let mut ok: c_int = 0;
|
||||
glGetProgramiv(prog, GL_LINK_STATUS, &mut ok);
|
||||
ensure!(ok != 0, "GL program link failed");
|
||||
glUseProgram(prog);
|
||||
let loc = glGetUniformLocation(prog, c"image".as_ptr());
|
||||
if loc >= 0 {
|
||||
glUniform1i(loc, 0); // sampler -> texture unit 0
|
||||
}
|
||||
glUseProgram(0);
|
||||
Ok(prog)
|
||||
}
|
||||
|
||||
/// Per-size GL machinery to blit a dmabuf EGLImage into a CUDA-registrable `GL_RGBA8` texture.
|
||||
struct GlBlit {
|
||||
program: u32,
|
||||
vao: u32,
|
||||
fbo: u32,
|
||||
/// CUDA-registrable destination (immutable GL_RGBA8).
|
||||
dst_tex: u32,
|
||||
/// Source texture re-targeted to each frame's EGLImage.
|
||||
src_tex: u32,
|
||||
width: u32,
|
||||
height: u32,
|
||||
/// `dst_tex` registered with CUDA once (not per frame); mapped+copied each frame.
|
||||
registered: cuda::RegisteredTexture,
|
||||
/// Recycled CUDA device buffers (the imported frames handed to the encoder).
|
||||
pool: cuda::BufferPool,
|
||||
}
|
||||
|
||||
impl GlBlit {
|
||||
unsafe fn new(width: u32, height: u32) -> Result<GlBlit> {
|
||||
let program = compile_program()?;
|
||||
let mut vao = 0u32;
|
||||
glGenVertexArrays(1, &mut vao); // core profile needs a bound VAO for glDrawArrays
|
||||
let mut fbo = 0u32;
|
||||
glGenFramebuffers(1, &mut fbo);
|
||||
|
||||
let mut dst_tex = 0u32;
|
||||
glGenTextures(1, &mut dst_tex);
|
||||
glBindTexture(GL_TEXTURE_2D, dst_tex);
|
||||
glTexStorage2D(GL_TEXTURE_2D, 1, GL_RGBA8, width as c_int, height as c_int);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
|
||||
|
||||
let mut src_tex = 0u32;
|
||||
glGenTextures(1, &mut src_tex);
|
||||
glBindTexture(GL_TEXTURE_2D, src_tex);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
|
||||
glBindTexture(GL_TEXTURE_2D, 0);
|
||||
|
||||
glBindFramebuffer(GL_FRAMEBUFFER, fbo);
|
||||
glFramebufferTexture2D(
|
||||
GL_FRAMEBUFFER,
|
||||
GL_COLOR_ATTACHMENT0,
|
||||
GL_TEXTURE_2D,
|
||||
dst_tex,
|
||||
0,
|
||||
);
|
||||
let status = glCheckFramebufferStatus(GL_FRAMEBUFFER);
|
||||
glBindFramebuffer(GL_FRAMEBUFFER, 0);
|
||||
ensure!(
|
||||
status == GL_FRAMEBUFFER_COMPLETE,
|
||||
"blit FBO incomplete ({status:#x})"
|
||||
);
|
||||
// Register the (immutable, reused) destination texture with CUDA once, and stand up the
|
||||
// device-buffer pool — both per-resolution, not per-frame. Requires the CUDA context to be
|
||||
// current (the caller makes it current before constructing the blit).
|
||||
let registered = cuda::RegisteredTexture::register_gl(dst_tex)?;
|
||||
let pool = cuda::BufferPool::new(width, height)?;
|
||||
Ok(GlBlit {
|
||||
program,
|
||||
vao,
|
||||
fbo,
|
||||
dst_tex,
|
||||
src_tex,
|
||||
width,
|
||||
height,
|
||||
registered,
|
||||
pool,
|
||||
})
|
||||
}
|
||||
|
||||
/// Bind `image` to the source texture and render it into `dst_tex`.
|
||||
///
|
||||
/// # Safety: the GL context is current on this thread; `image` is a valid `EGLImage`.
|
||||
unsafe fn run(&self, egl_image_target: EglImageTargetFn, image: *mut c_void) -> Result<()> {
|
||||
glBindTexture(GL_TEXTURE_2D, self.src_tex);
|
||||
let _ = glGetError();
|
||||
egl_image_target(GL_TEXTURE_2D, image);
|
||||
let e = glGetError();
|
||||
glBindTexture(GL_TEXTURE_2D, 0);
|
||||
ensure!(e == 0, "glEGLImageTargetTexture2DOES failed ({e:#x})");
|
||||
|
||||
glBindFramebuffer(GL_FRAMEBUFFER, self.fbo);
|
||||
glViewport(0, 0, self.width as c_int, self.height as c_int);
|
||||
glUseProgram(self.program);
|
||||
glActiveTexture(GL_TEXTURE0);
|
||||
glBindTexture(GL_TEXTURE_2D, self.src_tex);
|
||||
glBindVertexArray(self.vao);
|
||||
glDrawArrays(GL_TRIANGLES, 0, 3);
|
||||
glBindVertexArray(0);
|
||||
glBindFramebuffer(GL_FRAMEBUFFER, 0);
|
||||
glFlush(); // submit GL work before CUDA maps the texture
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// One dmabuf plane as delivered by PipeWire (single-plane for BGRx).
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub struct DmabufPlane {
|
||||
pub fd: i32,
|
||||
pub offset: u32,
|
||||
pub stride: u32,
|
||||
}
|
||||
|
||||
type Egl = egl::DynamicInstance<egl::EGL1_5>;
|
||||
|
||||
/// Headless EGLDisplay (NVIDIA device platform) + a surfaceless desktop-GL context used to
|
||||
/// import dmabufs and bridge them to CUDA via a GL texture. Lives on the capture thread (the GL
|
||||
/// context is made current there once).
|
||||
pub struct EglImporter {
|
||||
egl: Egl,
|
||||
display: egl::Display,
|
||||
no_ctx: egl::Context,
|
||||
/// Surfaceless GL context (current on the capture thread) for the EGLImage→texture bind.
|
||||
_gl_ctx: egl::Context,
|
||||
egl_image_target: EglImageTargetFn,
|
||||
/// Lazily-created GL blit machinery (recreated if the frame size changes).
|
||||
blit: Option<GlBlit>,
|
||||
/// LINEAR-dmabuf path (gamescope): a Vulkan bridge (dmabuf → exportable OPAQUE_FD → CUDA),
|
||||
/// created lazily on the first LINEAR frame, + the destination pool.
|
||||
vk: Option<super::vulkan::VkBridge>,
|
||||
linear_pool: Option<cuda::BufferPool>,
|
||||
gbm: *mut c_void,
|
||||
render_fd: c_int,
|
||||
}
|
||||
|
||||
// The EGL handles are confined to the capture thread; the struct is moved there once.
|
||||
unsafe impl Send for EglImporter {}
|
||||
|
||||
impl EglImporter {
|
||||
/// Open a headless EGLDisplay on the NVIDIA EGL device. Also forces the shared CUDA context
|
||||
/// to exist (so a later `import` only touches the hot path).
|
||||
pub fn new() -> Result<EglImporter> {
|
||||
// GBM platform on the NVIDIA render node: this ties the EGLDisplay (and its GL contexts)
|
||||
// to the same DRM device CUDA-GL interop associates with, which the EGL device platform
|
||||
// did not (cuGraphicsGLRegisterImage rejected device-platform GL textures).
|
||||
let path = std::ffi::CString::new("/dev/dri/renderD128").unwrap();
|
||||
let render_fd = unsafe { libc::open(path.as_ptr(), libc::O_RDWR | libc::O_CLOEXEC) };
|
||||
ensure!(render_fd >= 0, "open /dev/dri/renderD128 for GBM");
|
||||
let gbm = unsafe { gbm_create_device(render_fd) };
|
||||
if gbm.is_null() {
|
||||
unsafe { libc::close(render_fd) };
|
||||
anyhow::bail!("gbm_create_device failed");
|
||||
}
|
||||
|
||||
let egl: Egl =
|
||||
unsafe { Egl::load_required() }.context("load libEGL (EGL 1.5 dynamic instance)")?;
|
||||
let display = unsafe {
|
||||
egl.get_platform_display(
|
||||
EGL_PLATFORM_GBM_KHR,
|
||||
gbm as egl::NativeDisplayType,
|
||||
&[egl::ATTRIB_NONE],
|
||||
)
|
||||
}
|
||||
.context("eglGetPlatformDisplay(GBM) on the NVIDIA render node")?;
|
||||
egl.initialize(display).context("eglInitialize")?;
|
||||
|
||||
let exts = egl
|
||||
.query_string(Some(display), egl::EXTENSIONS)
|
||||
.context("query EGL extensions")?
|
||||
.to_string_lossy()
|
||||
.into_owned();
|
||||
ensure!(
|
||||
exts.contains("EGL_EXT_image_dma_buf_import"),
|
||||
"EGL lacks EGL_EXT_image_dma_buf_import"
|
||||
);
|
||||
ensure!(
|
||||
exts.contains("EGL_EXT_image_dma_buf_import_modifiers"),
|
||||
"EGL lacks EGL_EXT_image_dma_buf_import_modifiers (needed for NVIDIA tiled dmabufs)"
|
||||
);
|
||||
|
||||
// A surfaceless desktop-GL context so we can bind the dmabuf EGLImage to a GL texture
|
||||
// (cuGraphicsEGLRegisterImage is Tegra-only; desktop CUDA interop goes through GL).
|
||||
egl.bind_api(egl::OPENGL_API)
|
||||
.context("eglBindAPI(OpenGL)")?;
|
||||
// The default EGL_SURFACE_TYPE in eglChooseConfig is WINDOW_BIT, which a headless device
|
||||
// display has none of — request a pbuffer-capable config (we run surfaceless anyway).
|
||||
let config = egl
|
||||
.choose_first_config(
|
||||
display,
|
||||
&[
|
||||
egl::SURFACE_TYPE,
|
||||
egl::PBUFFER_BIT,
|
||||
egl::RENDERABLE_TYPE,
|
||||
egl::OPENGL_BIT,
|
||||
egl::NONE,
|
||||
],
|
||||
)
|
||||
.context("eglChooseConfig")?
|
||||
.context("no EGL config for OpenGL")?;
|
||||
let gl_ctx = egl
|
||||
.create_context(
|
||||
display,
|
||||
config,
|
||||
None,
|
||||
&[egl::CONTEXT_CLIENT_VERSION, 3, egl::NONE],
|
||||
)
|
||||
.context("eglCreateContext(OpenGL)")?;
|
||||
egl.make_current(display, None, None, Some(gl_ctx))
|
||||
.context("eglMakeCurrent surfaceless (needs EGL_KHR_surfaceless_context)")?;
|
||||
let egl_image_target: EglImageTargetFn = unsafe {
|
||||
std::mem::transmute(
|
||||
egl.get_proc_address("glEGLImageTargetTexture2DOES")
|
||||
.context("glEGLImageTargetTexture2DOES unavailable")?,
|
||||
)
|
||||
};
|
||||
|
||||
// Create the shared CUDA context up front so import() is pure hot path.
|
||||
cuda::context().context("create CUDA context")?;
|
||||
|
||||
let no_ctx = unsafe { egl::Context::from_ptr(egl::NO_CONTEXT) };
|
||||
tracing::info!(
|
||||
"zero-copy EGL importer ready (GBM platform + GL texture interop, dma_buf_import + modifiers)"
|
||||
);
|
||||
Ok(EglImporter {
|
||||
egl,
|
||||
display,
|
||||
no_ctx,
|
||||
_gl_ctx: gl_ctx,
|
||||
egl_image_target,
|
||||
blit: None,
|
||||
vk: None,
|
||||
linear_pool: None,
|
||||
gbm,
|
||||
render_fd,
|
||||
})
|
||||
}
|
||||
|
||||
/// Import a LINEAR dmabuf via the Vulkan bridge (no EGL/GL involved — NVIDIA's EGL can't
|
||||
/// sample LINEAR, and the CUDA driver rejects raw dmabuf fds; Vulkan imports the dmabuf,
|
||||
/// GPU-copies into an exportable allocation, and CUDA reads that). See [`super::vulkan`].
|
||||
pub fn import_linear(
|
||||
&mut self,
|
||||
plane: &DmabufPlane,
|
||||
width: u32,
|
||||
height: u32,
|
||||
) -> Result<DeviceBuffer> {
|
||||
cuda::make_current()?;
|
||||
if self.linear_pool.as_ref().map(|p| (p.width(), p.height())) != Some((width, height)) {
|
||||
self.linear_pool = Some(cuda::BufferPool::new(width, height)?);
|
||||
}
|
||||
if self.vk.is_none() {
|
||||
self.vk = Some(super::vulkan::VkBridge::new()?);
|
||||
}
|
||||
self.vk.as_mut().unwrap().import_linear(
|
||||
plane.fd,
|
||||
plane.offset,
|
||||
plane.stride,
|
||||
height,
|
||||
self.linear_pool.as_ref().unwrap(),
|
||||
)
|
||||
}
|
||||
|
||||
/// The DRM format modifiers the NVIDIA EGL stack can import for `fourcc`, via
|
||||
/// `eglQueryDmaBufModifiersEXT`. We advertise these to PipeWire so the compositor allocates
|
||||
/// a dmabuf in a layout we can import. Empty on failure (caller falls back).
|
||||
pub fn supported_modifiers(&self, fourcc: u32) -> Vec<u64> {
|
||||
type QueryFn = unsafe extern "system" fn(
|
||||
dpy: *mut c_void,
|
||||
format: i32,
|
||||
max_modifiers: i32,
|
||||
modifiers: *mut u64,
|
||||
external_only: *mut u32,
|
||||
num_modifiers: *mut i32,
|
||||
) -> u32;
|
||||
let Some(sym) = self.egl.get_proc_address("eglQueryDmaBufModifiersEXT") else {
|
||||
return Vec::new();
|
||||
};
|
||||
let query: QueryFn = unsafe { std::mem::transmute(sym) };
|
||||
let dpy = self.display.as_ptr();
|
||||
unsafe {
|
||||
let mut count: i32 = 0;
|
||||
if query(
|
||||
dpy,
|
||||
fourcc as i32,
|
||||
0,
|
||||
std::ptr::null_mut(),
|
||||
std::ptr::null_mut(),
|
||||
&mut count,
|
||||
) == 0
|
||||
|| count <= 0
|
||||
{
|
||||
return Vec::new();
|
||||
}
|
||||
let mut mods = vec![0u64; count as usize];
|
||||
let mut ext = vec![0u32; count as usize];
|
||||
let mut n: i32 = 0;
|
||||
if query(
|
||||
dpy,
|
||||
fourcc as i32,
|
||||
count,
|
||||
mods.as_mut_ptr(),
|
||||
ext.as_mut_ptr(),
|
||||
&mut n,
|
||||
) == 0
|
||||
{
|
||||
return Vec::new();
|
||||
}
|
||||
mods.truncate(n.max(0) as usize);
|
||||
mods
|
||||
}
|
||||
}
|
||||
|
||||
/// Import one dmabuf and copy it device-to-device into a fresh owned CUDA buffer. `fourcc`
|
||||
/// is the DRM FourCC; `modifier` is the explicit 64-bit DRM format modifier when one was
|
||||
/// negotiated, or `None` to import with the buffer's implicit modifier (base
|
||||
/// `EGL_EXT_image_dma_buf_import`, which the NVIDIA driver resolves for its own buffers).
|
||||
pub fn import(
|
||||
&mut self,
|
||||
plane: &DmabufPlane,
|
||||
width: u32,
|
||||
height: u32,
|
||||
fourcc: u32,
|
||||
modifier: Option<u64>,
|
||||
) -> Result<DeviceBuffer> {
|
||||
let mut attrs: Vec<egl::Attrib> = vec![
|
||||
egl::WIDTH as egl::Attrib,
|
||||
width as egl::Attrib,
|
||||
egl::HEIGHT as egl::Attrib,
|
||||
height as egl::Attrib,
|
||||
EGL_LINUX_DRM_FOURCC_EXT,
|
||||
fourcc as egl::Attrib,
|
||||
EGL_DMA_BUF_PLANE0_FD_EXT,
|
||||
plane.fd as egl::Attrib,
|
||||
EGL_DMA_BUF_PLANE0_OFFSET_EXT,
|
||||
plane.offset as egl::Attrib,
|
||||
EGL_DMA_BUF_PLANE0_PITCH_EXT,
|
||||
plane.stride as egl::Attrib,
|
||||
];
|
||||
if let Some(m) = modifier {
|
||||
attrs.extend_from_slice(&[
|
||||
EGL_DMA_BUF_PLANE0_MODIFIER_LO_EXT,
|
||||
(m & 0xFFFF_FFFF) as egl::Attrib,
|
||||
EGL_DMA_BUF_PLANE0_MODIFIER_HI_EXT,
|
||||
(m >> 32) as egl::Attrib,
|
||||
]);
|
||||
}
|
||||
attrs.push(egl::ATTRIB_NONE);
|
||||
let client = unsafe { egl::ClientBuffer::from_ptr(std::ptr::null_mut()) };
|
||||
let image = self
|
||||
.egl
|
||||
.create_image(
|
||||
self.display,
|
||||
self.no_ctx,
|
||||
EGL_LINUX_DMA_BUF_EXT,
|
||||
client,
|
||||
&attrs,
|
||||
)
|
||||
.context("eglCreateImage(EGL_LINUX_DMA_BUF_EXT) — modifier mismatch?")?;
|
||||
|
||||
// EGLImage → (sampled by a shader) → GL_RGBA8 texture → register *that* with CUDA → map
|
||||
// → array → copy out. Registering the EGLImage texture directly fails (its layout isn't a
|
||||
// CUDA-registrable format); the RGBA8 render target is.
|
||||
let result = self.blit_and_copy(image.as_ptr(), width, height);
|
||||
let _ = self.egl.destroy_image(self.display, image);
|
||||
result
|
||||
}
|
||||
|
||||
/// Render the dmabuf `image` into the registrable RGBA8 texture and copy it to an owned CUDA
|
||||
/// buffer. (Re)creates the per-size GL blit machinery as needed.
|
||||
fn blit_and_copy(
|
||||
&mut self,
|
||||
image: *mut c_void,
|
||||
width: u32,
|
||||
height: u32,
|
||||
) -> Result<DeviceBuffer> {
|
||||
cuda::make_current()?;
|
||||
if self.blit.as_ref().map(|b| (b.width, b.height)) != Some((width, height)) {
|
||||
self.blit = Some(unsafe { GlBlit::new(width, height)? });
|
||||
}
|
||||
let egl_image_target = self.egl_image_target;
|
||||
let blit = self.blit.as_mut().unwrap();
|
||||
// SAFETY: GL + CUDA contexts current on this thread; `image` is a valid EGLImage.
|
||||
unsafe { blit.run(egl_image_target, image)? };
|
||||
// Persistent registration (mapped per frame) + a pooled buffer — no per-frame
|
||||
// cuGraphicsGLRegisterImage / cuMemAllocPitch.
|
||||
let dst = blit.pool.get()?;
|
||||
blit.registered.copy_mapped_to(&dst)?;
|
||||
Ok(dst)
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for EglImporter {
|
||||
fn drop(&mut self) {
|
||||
if !self.gbm.is_null() {
|
||||
unsafe { gbm_device_destroy(self.gbm) };
|
||||
}
|
||||
if self.render_fd >= 0 {
|
||||
unsafe { libc::close(self.render_fd) };
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,50 @@
|
||||
//! Zero-copy capture→encode (plan §9): the PipeWire dmabuf is imported into CUDA via EGL and
|
||||
//! handed straight to NVENC, eliminating the per-frame CPU copies (at 5K the CPU-copy path
|
||||
//! moves ~3.5 GB/s). Opt in with `PUNKTFUNK_ZEROCOPY=1`; the CPU-copy path stays the default and
|
||||
//! the runtime fallback (foreign-allocator / no-dmabuf / import failure).
|
||||
//!
|
||||
//! Pieces: [`cuda`] (driver-API FFI + the shared `CUcontext` + device buffers), [`egl`] (the
|
||||
//! headless EGLDisplay + dmabuf→`EGLImage`→CUDA import). The encoder's CUDA-frame path lives in
|
||||
//! `encode/linux.rs`; the dmabuf negotiation lives in `capture/linux.rs`.
|
||||
|
||||
pub mod cuda;
|
||||
pub mod egl;
|
||||
pub mod vulkan;
|
||||
|
||||
pub use cuda::DeviceBuffer;
|
||||
pub use egl::{DmabufPlane, EglImporter};
|
||||
|
||||
/// Whether the zero-copy path is opted in (`PUNKTFUNK_ZEROCOPY` truthy).
|
||||
pub fn enabled() -> bool {
|
||||
std::env::var("PUNKTFUNK_ZEROCOPY")
|
||||
.map(|v| matches!(v.trim(), "1" | "true" | "yes" | "on"))
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
/// DRM FourCC for a packed 32-bit format name (little-endian, e.g. `b"XR24"`).
|
||||
const fn fourcc(c: &[u8; 4]) -> u32 {
|
||||
(c[0] as u32) | ((c[1] as u32) << 8) | ((c[2] as u32) << 16) | ((c[3] as u32) << 24)
|
||||
}
|
||||
|
||||
/// Map a SPA/our [`crate::capture::PixelFormat`] to the DRM FourCC EGL expects for import.
|
||||
/// SPA byte order `BGRx` ⇒ DRM `XRGB8888` (memory B,G,R,X), etc.
|
||||
pub fn drm_fourcc(format: crate::capture::PixelFormat) -> Option<u32> {
|
||||
use crate::capture::PixelFormat::*;
|
||||
Some(match format {
|
||||
Bgrx => fourcc(b"XR24"), // DRM_FORMAT_XRGB8888
|
||||
Bgra => fourcc(b"AR24"), // DRM_FORMAT_ARGB8888
|
||||
Rgbx => fourcc(b"XB24"), // DRM_FORMAT_XBGR8888
|
||||
Rgba => fourcc(b"AB24"), // DRM_FORMAT_ABGR8888
|
||||
// 24-bit packed RGB/BGR have no straightforward dmabuf import here; use the CPU path.
|
||||
Rgb | Bgr => return None,
|
||||
})
|
||||
}
|
||||
|
||||
/// Standalone probe (the `zerocopy-probe` subcommand): initialize the EGL importer + CUDA
|
||||
/// context and report. De-risks the FFI/linking/GPU-access without needing a capture session.
|
||||
pub fn probe() -> anyhow::Result<()> {
|
||||
let _importer = EglImporter::new()?;
|
||||
let ctx = cuda::context()?;
|
||||
tracing::info!(cuda_ctx = ?ctx, "zero-copy probe OK — EGL display + CUDA context initialized");
|
||||
Ok(())
|
||||
}
|
||||
@@ -0,0 +1,366 @@
|
||||
//! Vulkan bridge for LINEAR dmabufs (gamescope's only offer), completing zero-copy where the
|
||||
//! other interops can't: NVIDIA's EGL won't sample LINEAR, and the CUDA driver rejects raw
|
||||
//! dmabuf fds as external memory. Vulkan *does* import dmabufs (`VK_EXT_external_memory_dma_buf`)
|
||||
//! and *does* export `OPAQUE_FD` memory that CUDA officially imports. So:
|
||||
//!
|
||||
//! ```text
|
||||
//! dmabuf fd ──VkImportMemoryFdInfoKHR(DMA_BUF)──▶ VkBuffer (cached per fd)
|
||||
//! │ vkCmdCopyBuffer (GPU, device-local)
|
||||
//! ▼
|
||||
//! exportable VkBuffer ──vkGetMemoryFdKHR(OPAQUE_FD)──▶ cuImportExternalMemory ──▶ CUdeviceptr
|
||||
//! ```
|
||||
//!
|
||||
//! The exportable buffer + its CUDA mapping are created once per resolution; per frame it's one
|
||||
//! GPU buffer copy (fence-waited) and one pitched CUDA copy into the encoder's pooled buffer.
|
||||
//! No CPU ever touches pixels. Imports are cached per fd (PipeWire's buffer pool is stable for
|
||||
//! a stream's life). Falls back cleanly: any init/import error disables the importer and the
|
||||
//! CPU mmap path takes over.
|
||||
|
||||
use super::cuda::{self, DeviceBuffer};
|
||||
use anyhow::{anyhow, bail, Context as _, Result};
|
||||
use ash::vk;
|
||||
use std::collections::HashMap;
|
||||
|
||||
/// Vulkan objects for one imported source dmabuf (cached per fd).
|
||||
struct SrcBuf {
|
||||
buffer: vk::Buffer,
|
||||
memory: vk::DeviceMemory,
|
||||
size: u64,
|
||||
}
|
||||
|
||||
/// The per-resolution destination: exportable Vulkan memory mapped into CUDA.
|
||||
struct DstBuf {
|
||||
buffer: vk::Buffer,
|
||||
memory: vk::DeviceMemory,
|
||||
size: u64,
|
||||
/// CUDA's view of the same memory (owns the exported OPAQUE_FD).
|
||||
cuda: cuda::ExternalDmabuf,
|
||||
}
|
||||
|
||||
pub struct VkBridge {
|
||||
_entry: ash::Entry,
|
||||
instance: ash::Instance,
|
||||
device: ash::Device,
|
||||
ext_fd: ash::khr::external_memory_fd::Device,
|
||||
queue: vk::Queue,
|
||||
cmd_pool: vk::CommandPool,
|
||||
cmd: vk::CommandBuffer,
|
||||
fence: vk::Fence,
|
||||
mem_props: vk::PhysicalDeviceMemoryProperties,
|
||||
src_cache: HashMap<i32, SrcBuf>,
|
||||
dst: Option<DstBuf>,
|
||||
}
|
||||
|
||||
// Confined to the capture thread; moved there once.
|
||||
unsafe impl Send for VkBridge {}
|
||||
|
||||
impl VkBridge {
|
||||
/// Bring up Vulkan on the NVIDIA GPU with the external-memory extensions.
|
||||
pub fn new() -> Result<VkBridge> {
|
||||
unsafe {
|
||||
let entry = ash::Entry::load().context("load libvulkan")?;
|
||||
let app = vk::ApplicationInfo::default().api_version(vk::API_VERSION_1_1);
|
||||
let instance = entry
|
||||
.create_instance(
|
||||
&vk::InstanceCreateInfo::default().application_info(&app),
|
||||
None,
|
||||
)
|
||||
.context("vkCreateInstance")?;
|
||||
|
||||
// Pick the NVIDIA GPU (matches CUDA device 0 on this single-dGPU host).
|
||||
let phys = instance
|
||||
.enumerate_physical_devices()
|
||||
.context("enumerate GPUs")?
|
||||
.into_iter()
|
||||
.find(|&p| instance.get_physical_device_properties(p).vendor_id == 0x10DE)
|
||||
.ok_or_else(|| anyhow!("no NVIDIA Vulkan device"))?;
|
||||
let mem_props = instance.get_physical_device_memory_properties(phys);
|
||||
|
||||
// Any queue family supporting transfer (graphics/compute imply it).
|
||||
let qf = instance
|
||||
.get_physical_device_queue_family_properties(phys)
|
||||
.iter()
|
||||
.position(|q| {
|
||||
q.queue_flags.intersects(
|
||||
vk::QueueFlags::TRANSFER
|
||||
| vk::QueueFlags::GRAPHICS
|
||||
| vk::QueueFlags::COMPUTE,
|
||||
)
|
||||
})
|
||||
.ok_or_else(|| anyhow!("no transfer-capable queue family"))?
|
||||
as u32;
|
||||
|
||||
let exts = [
|
||||
ash::khr::external_memory_fd::NAME.as_ptr(),
|
||||
ash::ext::external_memory_dma_buf::NAME.as_ptr(),
|
||||
];
|
||||
let prio = [1.0f32];
|
||||
let qci = [vk::DeviceQueueCreateInfo::default()
|
||||
.queue_family_index(qf)
|
||||
.queue_priorities(&prio)];
|
||||
let device = instance
|
||||
.create_device(
|
||||
phys,
|
||||
&vk::DeviceCreateInfo::default()
|
||||
.queue_create_infos(&qci)
|
||||
.enabled_extension_names(&exts),
|
||||
None,
|
||||
)
|
||||
.context("vkCreateDevice (external-memory extensions supported?)")?;
|
||||
let ext_fd = ash::khr::external_memory_fd::Device::new(&instance, &device);
|
||||
let queue = device.get_device_queue(qf, 0);
|
||||
|
||||
let cmd_pool = device
|
||||
.create_command_pool(
|
||||
&vk::CommandPoolCreateInfo::default()
|
||||
.queue_family_index(qf)
|
||||
.flags(vk::CommandPoolCreateFlags::RESET_COMMAND_BUFFER),
|
||||
None,
|
||||
)
|
||||
.context("create command pool")?;
|
||||
let cmd = device
|
||||
.allocate_command_buffers(
|
||||
&vk::CommandBufferAllocateInfo::default()
|
||||
.command_pool(cmd_pool)
|
||||
.level(vk::CommandBufferLevel::PRIMARY)
|
||||
.command_buffer_count(1),
|
||||
)
|
||||
.context("allocate command buffer")?[0];
|
||||
let fence = device
|
||||
.create_fence(&vk::FenceCreateInfo::default(), None)
|
||||
.context("create fence")?;
|
||||
|
||||
tracing::info!("Vulkan bridge ready (dmabuf import → OPAQUE_FD export → CUDA)");
|
||||
Ok(VkBridge {
|
||||
_entry: entry,
|
||||
instance,
|
||||
device,
|
||||
ext_fd,
|
||||
queue,
|
||||
cmd_pool,
|
||||
cmd,
|
||||
fence,
|
||||
mem_props,
|
||||
src_cache: HashMap::new(),
|
||||
dst: None,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
fn memory_type(&self, type_bits: u32, flags: vk::MemoryPropertyFlags) -> Result<u32> {
|
||||
(0..self.mem_props.memory_type_count)
|
||||
.find(|&i| {
|
||||
type_bits & (1 << i) != 0
|
||||
&& self.mem_props.memory_types[i as usize]
|
||||
.property_flags
|
||||
.contains(flags)
|
||||
})
|
||||
.ok_or_else(|| anyhow!("no compatible Vulkan memory type"))
|
||||
}
|
||||
|
||||
/// Import `fd` (dup'd internally; Vulkan owns the dup) as a transfer-src buffer of `size`.
|
||||
unsafe fn import_src(&mut self, fd: i32, size: u64) -> Result<()> {
|
||||
let dup = libc::dup(fd);
|
||||
if dup < 0 {
|
||||
bail!("dup(dmabuf fd)");
|
||||
}
|
||||
let mut ext_info = vk::ExternalMemoryBufferCreateInfo::default()
|
||||
.handle_types(vk::ExternalMemoryHandleTypeFlags::DMA_BUF_EXT);
|
||||
let buffer = self
|
||||
.device
|
||||
.create_buffer(
|
||||
&vk::BufferCreateInfo::default()
|
||||
.size(size)
|
||||
.usage(vk::BufferUsageFlags::TRANSFER_SRC)
|
||||
.push_next(&mut ext_info),
|
||||
None,
|
||||
)
|
||||
.context("create import buffer")?;
|
||||
let mut fd_props = vk::MemoryFdPropertiesKHR::default();
|
||||
self.ext_fd
|
||||
.get_memory_fd_properties(
|
||||
vk::ExternalMemoryHandleTypeFlags::DMA_BUF_EXT,
|
||||
dup,
|
||||
&mut fd_props,
|
||||
)
|
||||
.context("vkGetMemoryFdPropertiesKHR")?;
|
||||
let reqs = self.device.get_buffer_memory_requirements(buffer);
|
||||
let mem_type = self.memory_type(
|
||||
reqs.memory_type_bits & fd_props.memory_type_bits,
|
||||
vk::MemoryPropertyFlags::empty(),
|
||||
)?;
|
||||
let mut import = vk::ImportMemoryFdInfoKHR::default()
|
||||
.handle_type(vk::ExternalMemoryHandleTypeFlags::DMA_BUF_EXT)
|
||||
.fd(dup); // Vulkan takes ownership of `dup` on success
|
||||
let mut dedicated = vk::MemoryDedicatedAllocateInfo::default().buffer(buffer);
|
||||
let memory = self
|
||||
.device
|
||||
.allocate_memory(
|
||||
&vk::MemoryAllocateInfo::default()
|
||||
.allocation_size(reqs.size.max(size))
|
||||
.memory_type_index(mem_type)
|
||||
.push_next(&mut import)
|
||||
.push_next(&mut dedicated),
|
||||
None,
|
||||
)
|
||||
.map_err(|e| {
|
||||
libc::close(dup); // failed import does not consume the fd
|
||||
anyhow!("import dmabuf memory: {e}")
|
||||
})?;
|
||||
self.device
|
||||
.bind_buffer_memory(buffer, memory, 0)
|
||||
.context("bind import memory")?;
|
||||
self.src_cache.insert(
|
||||
fd,
|
||||
SrcBuf {
|
||||
buffer,
|
||||
memory,
|
||||
size,
|
||||
},
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// (Re)create the exportable destination of at least `size` bytes + its CUDA mapping.
|
||||
unsafe fn ensure_dst(&mut self, size: u64) -> Result<()> {
|
||||
if self.dst.as_ref().is_some_and(|d| d.size >= size) {
|
||||
return Ok(());
|
||||
}
|
||||
if let Some(old) = self.dst.take() {
|
||||
self.device.destroy_buffer(old.buffer, None);
|
||||
self.device.free_memory(old.memory, None);
|
||||
// old.cuda drops its mapping with it
|
||||
}
|
||||
let mut ext_info = vk::ExternalMemoryBufferCreateInfo::default()
|
||||
.handle_types(vk::ExternalMemoryHandleTypeFlags::OPAQUE_FD);
|
||||
let buffer = self
|
||||
.device
|
||||
.create_buffer(
|
||||
&vk::BufferCreateInfo::default()
|
||||
.size(size)
|
||||
.usage(vk::BufferUsageFlags::TRANSFER_DST)
|
||||
.push_next(&mut ext_info),
|
||||
None,
|
||||
)
|
||||
.context("create export buffer")?;
|
||||
let reqs = self.device.get_buffer_memory_requirements(buffer);
|
||||
let mem_type =
|
||||
self.memory_type(reqs.memory_type_bits, vk::MemoryPropertyFlags::DEVICE_LOCAL)?;
|
||||
let mut export = vk::ExportMemoryAllocateInfo::default()
|
||||
.handle_types(vk::ExternalMemoryHandleTypeFlags::OPAQUE_FD);
|
||||
let mut dedicated = vk::MemoryDedicatedAllocateInfo::default().buffer(buffer);
|
||||
let memory = self
|
||||
.device
|
||||
.allocate_memory(
|
||||
&vk::MemoryAllocateInfo::default()
|
||||
.allocation_size(reqs.size)
|
||||
.memory_type_index(mem_type)
|
||||
.push_next(&mut export)
|
||||
.push_next(&mut dedicated),
|
||||
None,
|
||||
)
|
||||
.context("allocate exportable memory")?;
|
||||
self.device
|
||||
.bind_buffer_memory(buffer, memory, 0)
|
||||
.context("bind export memory")?;
|
||||
let opaque_fd = self
|
||||
.ext_fd
|
||||
.get_memory_fd(
|
||||
&vk::MemoryGetFdInfoKHR::default()
|
||||
.memory(memory)
|
||||
.handle_type(vk::ExternalMemoryHandleTypeFlags::OPAQUE_FD),
|
||||
)
|
||||
.context("vkGetMemoryFdKHR")?;
|
||||
// CUDA imports (and on success owns) the exported fd. Size must match the allocation.
|
||||
let cuda = cuda::ExternalDmabuf::import_owned_fd(opaque_fd, reqs.size)
|
||||
.context("cuImportExternalMemory(OPAQUE_FD from Vulkan)")?;
|
||||
tracing::info!(size, "Vulkan→CUDA exportable staging buffer ready");
|
||||
self.dst = Some(DstBuf {
|
||||
buffer,
|
||||
memory,
|
||||
size: reqs.size,
|
||||
cuda,
|
||||
});
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Bridge one LINEAR dmabuf frame into a pooled CUDA buffer: GPU copy dmabuf→exportable,
|
||||
/// then pitched CUDA copy exportable→`pool` buffer.
|
||||
pub fn import_linear(
|
||||
&mut self,
|
||||
fd: i32,
|
||||
offset: u32,
|
||||
stride: u32,
|
||||
height: u32,
|
||||
pool: &cuda::BufferPool,
|
||||
) -> Result<DeviceBuffer> {
|
||||
unsafe {
|
||||
let span = offset as u64 + stride as u64 * height as u64;
|
||||
if !self.src_cache.contains_key(&fd) {
|
||||
let size = libc::lseek(fd, 0, libc::SEEK_END);
|
||||
anyhow::ensure!(size > 0, "lseek(dmabuf)");
|
||||
anyhow::ensure!(size as u64 >= span, "dmabuf smaller than frame span");
|
||||
self.import_src(fd, size as u64)?;
|
||||
}
|
||||
let (src_buffer, src_size) = {
|
||||
let s = &self.src_cache[&fd];
|
||||
(s.buffer, s.size)
|
||||
};
|
||||
let copy_size = src_size.min(span);
|
||||
self.ensure_dst(copy_size)?;
|
||||
let dst = self.dst.as_ref().unwrap();
|
||||
|
||||
// Record + submit the GPU copy, wait on the fence (GPU-GPU, sub-millisecond).
|
||||
self.device
|
||||
.begin_command_buffer(
|
||||
self.cmd,
|
||||
&vk::CommandBufferBeginInfo::default()
|
||||
.flags(vk::CommandBufferUsageFlags::ONE_TIME_SUBMIT),
|
||||
)
|
||||
.context("begin cmd")?;
|
||||
let region = vk::BufferCopy::default().size(copy_size);
|
||||
self.device
|
||||
.cmd_copy_buffer(self.cmd, src_buffer, dst.buffer, &[region]);
|
||||
self.device
|
||||
.end_command_buffer(self.cmd)
|
||||
.context("end cmd")?;
|
||||
let cmds = [self.cmd];
|
||||
let submit = vk::SubmitInfo::default().command_buffers(&cmds);
|
||||
self.device
|
||||
.queue_submit(self.queue, &[submit], self.fence)
|
||||
.context("queue submit")?;
|
||||
self.device
|
||||
.wait_for_fences(&[self.fence], true, 1_000_000_000)
|
||||
.context("fence wait")?;
|
||||
self.device
|
||||
.reset_fences(&[self.fence])
|
||||
.context("reset fence")?;
|
||||
|
||||
// De-stride from the CUDA view of the exportable memory into a pooled buffer.
|
||||
cuda::make_current()?;
|
||||
let out = pool.get()?;
|
||||
cuda::copy_pitched_to_buffer(dst.cuda.ptr + offset as u64, stride as usize, &out)?;
|
||||
Ok(out)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for VkBridge {
|
||||
fn drop(&mut self) {
|
||||
unsafe {
|
||||
let _ = self.device.device_wait_idle();
|
||||
for (_, s) in self.src_cache.drain() {
|
||||
self.device.destroy_buffer(s.buffer, None);
|
||||
self.device.free_memory(s.memory, None);
|
||||
}
|
||||
if let Some(d) = self.dst.take() {
|
||||
self.device.destroy_buffer(d.buffer, None);
|
||||
self.device.free_memory(d.memory, None);
|
||||
}
|
||||
self.device.destroy_fence(self.fence, None);
|
||||
self.device.destroy_command_pool(self.cmd_pool, None);
|
||||
self.device.destroy_device(None);
|
||||
self.instance.destroy_instance(None);
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user