rename: lumen → punktfunk, everywhere
ci / rust (push) Has been cancelled

Full project rename, decided 2026-06-10:
- Crates/binaries: punktfunk-core / punktfunk-host / punktfunk-client-rs.
- C ABI: punktfunk_* symbols, Punktfunk* types, include/punktfunk_core.h,
  PUNKTFUNK_FEATURE_QUIC guard (header regenerated; cbindgen renames updated, incl.
  PUNKTFUNK_BTN_*/PUNKTFUNK_AXIS_* wire constants).
- Protocol: punktfunk/1 — control-plane magic LMN1 → PKF1, nonce salt lmn1 → pkf1.
  WIRE BREAK: clients must be rebuilt from this revision.
- Env knobs: PUNKTFUNK_VIDEO_SOURCE / PUNKTFUNK_COMPOSITOR / PUNKTFUNK_ZEROCOPY / ….
- Host config dir: ~/.config/punktfunk (the box's dir was migrated in place — the
  persistent identity is unchanged, pinned fingerprints stay valid).
- Swift package: PunktfunkKit + PunktfunkCore.xcframework + PunktfunkConnection
  (Sources/PunktfunkClient app + tests renamed with it); build-xcframework.sh updated.
- scripts/: 60-punktfunk.rules, punktfunk-host.service; OpenAPI doc regenerated.

Also: scripts/headless/run-headless-kde.sh — full headless Plasma bringup. Root cause of
"desktop but no apps/settings" over the stream: plasmashell launched without
XDG_MENU_PREFIX=plasma-, so the launcher resolved a nonexistent applications.menu and
rendered an empty menu. The script sets the complete KDE session env (menu prefix,
KDE_FULL_SESSION, session version) and rebuilds ksycoca before starting plasmashell.

Gate: 97/97 tests, clippy -D warnings (both feature sets), fmt, C-ABI harness PASS,
zero lumen references left outside .git.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-10 13:11:59 +00:00
parent b8b23c8fb2
commit bfd64ce871
119 changed files with 1245 additions and 1185 deletions
+36
View File
@@ -0,0 +1,36 @@
//! Desktop audio capture for the GameStream audio stream. On Linux: a PipeWire stream that
//! records the default sink's monitor (i.e. everything playing out of the system), delivered
//! as interleaved `f32` stereo PCM at 48 kHz. The audio data plane (`gamestream::audio`)
//! reframes this into fixed Opus frames, encodes, and sends it.
use anyhow::Result;
/// Opus/GameStream audio is 48 kHz stereo.
pub const SAMPLE_RATE: u32 = 48_000;
pub const CHANNELS: usize = 2;
/// Produces interleaved `f32` stereo PCM (L,R,L,R,…) at [`SAMPLE_RATE`]. Lives on its own
/// thread; never blocks the capture loop (drops if the consumer falls behind).
pub trait AudioCapturer: Send {
/// Block until the next chunk of interleaved samples is available (variable size). The
/// caller reframes into fixed Opus frames.
fn next_chunk(&mut self) -> Result<Vec<f32>>;
/// Discard any buffered chunks (called when a persistent capturer is reused for a new
/// stream, so the client doesn't hear stale audio captured while idle). Default: no-op.
fn drain(&mut self) {}
}
/// Open a live capturer for the default sink monitor (system output) via PipeWire.
#[cfg(target_os = "linux")]
pub fn open_audio_capture() -> Result<Box<dyn AudioCapturer>> {
linux::PwAudioCapturer::open().map(|c| Box::new(c) as Box<dyn AudioCapturer>)
}
#[cfg(not(target_os = "linux"))]
pub fn open_audio_capture() -> Result<Box<dyn AudioCapturer>> {
anyhow::bail!("audio capture requires Linux + PipeWire")
}
#[cfg(target_os = "linux")]
mod linux;
+172
View File
@@ -0,0 +1,172 @@
//! PipeWire audio capture of the default sink's monitor (system output).
//!
//! Connects to the user's PipeWire daemon (via `XDG_RUNTIME_DIR`, inherited from the Sway
//! session) and opens an input stream with `stream.capture.sink=true`, which routes the
//! default sink's monitor into us — no portal needed (unlike screen capture). The (`!Send`)
//! MainLoop/Stream live on a dedicated thread; interleaved `f32` chunks leave over a bounded
//! channel (dropped if the encoder falls behind, never blocking the PipeWire loop).
use super::{AudioCapturer, CHANNELS, SAMPLE_RATE};
use anyhow::{anyhow, Context, Result};
use std::sync::mpsc::{sync_channel, Receiver, RecvTimeoutError};
use std::thread;
use std::time::Duration;
pub struct PwAudioCapturer {
chunks: Receiver<Vec<f32>>,
}
impl PwAudioCapturer {
pub fn open() -> Result<PwAudioCapturer> {
let (tx, rx) = sync_channel::<Vec<f32>>(64);
thread::Builder::new()
.name("punktfunk-pw-audio".into())
.spawn(move || {
if let Err(e) = pw_thread(tx) {
tracing::error!(error = %format!("{e:#}"), "pipewire audio thread failed");
}
})
.context("spawn pipewire audio thread")?;
Ok(PwAudioCapturer { chunks: rx })
}
}
impl AudioCapturer for PwAudioCapturer {
fn next_chunk(&mut self) -> Result<Vec<f32>> {
match self.chunks.recv_timeout(Duration::from_secs(5)) {
Ok(c) => Ok(c),
Err(RecvTimeoutError::Timeout) => Err(anyhow!("no PipeWire audio within 5s")),
Err(RecvTimeoutError::Disconnected) => Err(anyhow!("pipewire audio thread ended")),
}
}
fn drain(&mut self) {
while self.chunks.try_recv().is_ok() {}
}
}
fn pw_thread(tx: std::sync::mpsc::SyncSender<Vec<f32>>) -> Result<()> {
use pipewire as pw;
use pw::{properties::properties, spa};
use spa::param::audio::{AudioFormat, AudioInfoRaw};
use spa::pod::Pod;
crate::pwinit::ensure_init();
let mainloop = pw::main_loop::MainLoopRc::new(None).context("pw audio MainLoop")?;
let context = pw::context::ContextRc::new(&mainloop, None).context("pw audio Context")?;
let core = context
.connect_rc(None)
.context("pw audio connect (is PipeWire running in this session?)")?;
let stream = pw::stream::StreamBox::new(
&core,
"punktfunk-audio",
properties! {
*pw::keys::MEDIA_TYPE => "Audio",
*pw::keys::MEDIA_CATEGORY => "Capture",
*pw::keys::MEDIA_ROLE => "Music",
// Capture the default sink's monitor (system output), not a microphone.
*pw::keys::STREAM_CAPTURE_SINK => "true",
// Ask for a ~5ms quantum (= one Opus frame) so buffers arrive smoothly rather than
// in large bursts the client's low-latency jitter buffer would hear as glitching.
*pw::keys::NODE_LATENCY => "240/48000",
},
)
.context("pw audio Stream")?;
let _listener = stream
.add_local_listener_with_user_data(tx)
.state_changed(|_s, _ud, old, new| {
tracing::info!(?old, ?new, "pipewire audio stream state");
})
.param_changed(|_stream, _tx, id, param| {
let Some(param) = param else { return };
if id != pw::spa::param::ParamType::Format.as_raw() {
return;
}
let mut info = AudioInfoRaw::default();
if info.parse(param).is_ok() {
tracing::info!(
format = ?info.format(),
rate = info.rate(),
channels = info.channels(),
"audio format negotiated"
);
}
})
.process(|stream, tx| {
let outcome = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
let Some(mut buffer) = stream.dequeue_buffer() else {
return;
};
let datas = buffer.datas_mut();
if datas.is_empty() {
return;
}
let d = &mut datas[0];
let (offset, size) = {
let c = d.chunk();
(c.offset() as usize, c.size() as usize)
};
let Some(buf) = d.data() else { return };
if offset > buf.len() {
return;
}
let region = &buf[offset..(offset + size).min(buf.len())];
// Negotiated as F32LE; reinterpret the byte region as interleaved f32.
let n = region.len() / 4;
static FIRST: std::sync::atomic::AtomicBool =
std::sync::atomic::AtomicBool::new(true);
if FIRST.swap(false, std::sync::atomic::Ordering::Relaxed) {
tracing::info!(samples = n, frames = n / 2, "audio first capture buffer");
}
let mut samples = Vec::with_capacity(n);
for i in 0..n {
let b = [
region[i * 4],
region[i * 4 + 1],
region[i * 4 + 2],
region[i * 4 + 3],
];
samples.push(f32::from_le_bytes(b));
}
let _ = tx.try_send(samples); // drop if the encoder is behind
}));
if outcome.is_err() {
tracing::error!("panic in pipewire audio callback — chunk dropped");
}
})
.register()
.context("register audio stream listener")?;
// Request F32LE, 48 kHz, stereo.
let mut info = AudioInfoRaw::new();
info.set_format(AudioFormat::F32LE);
info.set_rate(SAMPLE_RATE);
info.set_channels(CHANNELS as u32);
let obj = pw::spa::pod::Object {
type_: pw::spa::utils::SpaTypes::ObjectParamFormat.as_raw(),
id: pw::spa::param::ParamType::EnumFormat.as_raw(),
properties: info.into(),
};
let values: Vec<u8> = pw::spa::pod::serialize::PodSerializer::serialize(
std::io::Cursor::new(Vec::new()),
&pw::spa::pod::Value::Object(obj),
)
.context("serialize audio format pod")?
.0
.into_inner();
let mut params = [Pod::from_bytes(&values).context("audio pod from bytes")?];
stream
.connect(
spa::utils::Direction::Input,
None, // PW_ID_ANY — autoconnect to the default sink monitor
pw::stream::StreamFlags::AUTOCONNECT | pw::stream::StreamFlags::MAP_BUFFERS,
&mut params,
)
.context("pw audio stream connect")?;
mainloop.run();
Ok(())
}
+236
View File
@@ -0,0 +1,236 @@
//! Frame capture (plan §7). On Linux: a PipeWire ScreenCast portal stream. M0 uses the
//! CPU-copy fallback (the portal delivers a CPU buffer; the encoder uploads it to the GPU
//! internally). Zero-copy dmabuf→NVENC import is deferred (plan §9 risk).
use anyhow::Result;
/// Packed pixel layout of a [`CapturedFrame`]. The ScreenCast portal negotiates the
/// format; on wlroots it is commonly packed `RGB` (3 bytes/pixel). The encoder maps these
/// to an NVENC-accepted input format (`rgb0`/`bgr0`/`rgba`/`bgra`), expanding 3→4 bytes
/// where needed — no host-side colour conversion.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum PixelFormat {
/// `[B,G,R,x]`, 4 bpp.
Bgrx,
/// `[R,G,B,x]`, 4 bpp.
Rgbx,
/// `[B,G,R,A]`, 4 bpp.
Bgra,
/// `[R,G,B,A]`, 4 bpp.
Rgba,
/// `[R,G,B]`, 3 bpp.
Rgb,
/// `[B,G,R]`, 3 bpp.
Bgr,
}
impl PixelFormat {
pub fn bytes_per_pixel(self) -> usize {
match self {
PixelFormat::Rgb | PixelFormat::Bgr => 3,
_ => 4,
}
}
}
/// A captured frame. [`format`](Self::format)/dimensions describe the pixels regardless of
/// where they live — [`payload`](Self::payload) is either a CPU buffer (the M0/fallback path)
/// or a GPU buffer already on the device (the zero-copy path, plan §9).
pub struct CapturedFrame {
pub width: u32,
pub height: u32,
pub pts_ns: u64,
/// Pixel layout of the payload.
pub format: PixelFormat,
pub payload: FramePayload,
}
/// Where a captured frame's pixels live.
pub enum FramePayload {
/// Tightly-packed CPU pixels in `format`, `width*height*bytes_per_pixel` (no row padding).
Cpu(Vec<u8>),
/// A pitched GPU buffer (BGRA-order, on the shared CUDA context) — the zero-copy path. The
/// dmabuf has already been imported + copied into this owned device buffer.
#[cfg(target_os = "linux")]
Cuda(crate::zerocopy::DeviceBuffer),
}
impl CapturedFrame {
/// True if the frame's pixels are a GPU/CUDA buffer (the zero-copy path).
pub fn is_cuda(&self) -> bool {
#[cfg(target_os = "linux")]
{
matches!(self.payload, FramePayload::Cuda(_))
}
#[cfg(not(target_os = "linux"))]
{
false
}
}
}
/// Produces frames from a captured output. Lives on its own thread, feeding the encoder
/// over a bounded drop-oldest channel (never block the compositor).
pub trait Capturer: Send {
fn next_frame(&mut self) -> Result<CapturedFrame>;
/// Non-blocking: the freshest frame available since the last call, or `None` if none has
/// arrived (the caller reuses its last frame to hold a steady output rate). The default
/// just produces a frame each call — fine for instant synthetic sources; the portal
/// overrides it to drain its channel without blocking.
fn try_latest(&mut self) -> Result<Option<CapturedFrame>> {
self.next_frame().map(Some)
}
/// Gate expensive per-frame work so the capturer can be kept alive (reused) between
/// streams without burning CPU. The portal capturer skips the de-pad copy while inactive;
/// the default is a no-op (synthetic sources are produced on demand). Set `true` for the
/// duration of a stream, `false` when it ends.
fn set_active(&self, _active: bool) {}
}
/// A deterministic moving test pattern (BGRx). Lets M0 exercise the encode → file →
/// `punktfunk_core` path with no live capture session, and produces obviously non-static
/// content (a sweeping bar + animated gradient) so the encoded output is verifiable.
pub struct SyntheticCapturer {
width: u32,
height: u32,
fps: u32,
frame_idx: u64,
buf: Vec<u8>,
}
impl SyntheticCapturer {
const BPP: usize = 4; // emits BGRx
pub fn new(width: u32, height: u32, fps: u32) -> Self {
assert!(width > 0 && height > 0 && fps > 0);
let buf = vec![0u8; width as usize * height as usize * Self::BPP];
SyntheticCapturer {
width,
height,
fps,
frame_idx: 0,
buf,
}
}
}
impl Capturer for SyntheticCapturer {
fn next_frame(&mut self) -> Result<CapturedFrame> {
let w = self.width as usize;
let h = self.height as usize;
let bpp = Self::BPP;
let t = self.frame_idx;
// A vertical bar sweeps left→right once every ~2s; the background is a gradient
// whose phase advances each frame, so every pixel changes frame-to-frame.
let bar_x = ((t * w as u64) / (self.fps as u64 * 2)) % w as u64;
let phase = (t % 256) as usize;
for y in 0..h {
let row = y * w * bpp;
for x in 0..w {
let i = row + x * bpp;
let on_bar = (x as u64).abs_diff(bar_x) < 8;
// BGRx byte order: [B, G, R, x]
self.buf[i] = if on_bar {
255
} else {
((x + phase) & 0xff) as u8
};
self.buf[i + 1] = if on_bar {
255
} else {
((y + phase) & 0xff) as u8
};
self.buf[i + 2] = if on_bar { 255 } else { ((x + y) & 0xff) as u8 };
self.buf[i + 3] = 0;
}
}
let pts_ns = self.frame_idx * 1_000_000_000 / self.fps as u64;
self.frame_idx += 1;
Ok(CapturedFrame {
width: self.width,
height: self.height,
pts_ns,
format: PixelFormat::Bgrx,
payload: FramePayload::Cpu(self.buf.clone()),
})
}
}
/// A cheap moving test pattern (BGRx) for the streaming path: a pulsing field + a white band
/// sweeping down, generated with whole-buffer `fill`s so it stays real-time even at 5K.
pub struct FastSyntheticCapturer {
width: u32,
height: u32,
frame_idx: u64,
buf: Vec<u8>,
}
impl FastSyntheticCapturer {
pub fn new(width: u32, height: u32) -> Self {
assert!(width > 0 && height > 0);
FastSyntheticCapturer {
width,
height,
frame_idx: 0,
buf: vec![0u8; width as usize * height as usize * 4],
}
}
}
impl Capturer for FastSyntheticCapturer {
fn next_frame(&mut self) -> Result<CapturedFrame> {
let (w, h) = (self.width as usize, self.height as usize);
let row = w * 4;
let shade = (self.frame_idx % 256) as u8;
self.buf.fill(shade);
let band_h = (h / 20).max(1);
let band_y = (self.frame_idx as usize * 6) % h;
for y in band_y..(band_y + band_h).min(h) {
self.buf[y * row..(y + 1) * row].fill(0xff);
}
self.frame_idx += 1;
Ok(CapturedFrame {
width: self.width,
height: self.height,
pts_ns: 0,
format: PixelFormat::Bgrx,
payload: FramePayload::Cpu(self.buf.clone()),
})
}
}
/// Open a live capturer for a client-sized monitor via the xdg ScreenCast portal
/// (`ashpd`) → PipeWire (`pipewire`). Implemented in the `linux` submodule.
#[cfg(target_os = "linux")]
pub fn open_portal_monitor() -> Result<Box<dyn Capturer>> {
// On RemoteDesktop-capable desktops (KWin/GNOME) anchor ScreenCast to a RemoteDesktop
// session so it inherits that grant headlessly; wlroots/Sway has no RemoteDesktop portal,
// so use a plain ScreenCast session there.
let anchored = crate::inject::default_backend() == crate::inject::Backend::Libei;
linux::PortalCapturer::open(anchored).map(|c| Box::new(c) as Box<dyn Capturer>)
}
#[cfg(not(target_os = "linux"))]
pub fn open_portal_monitor() -> Result<Box<dyn Capturer>> {
anyhow::bail!("portal capture requires Linux (xdg-desktop-portal + PipeWire)")
}
/// Build a capturer from an already-created virtual output (see [`crate::vdisplay`]). Consumes
/// the output's PipeWire node + optional remote fd + keepalive — the capturer owns the keepalive,
/// so dropping the capturer releases the virtual output. Compositor-agnostic: works for any
/// [`crate::vdisplay::VirtualDisplay`] backend. The captured size is the size the output was
/// created at — native, no scaling.
#[cfg(target_os = "linux")]
pub fn capture_virtual_output(vout: crate::vdisplay::VirtualOutput) -> Result<Box<dyn Capturer>> {
linux::PortalCapturer::from_virtual_output(vout).map(|c| Box::new(c) as Box<dyn Capturer>)
}
#[cfg(not(target_os = "linux"))]
pub fn capture_virtual_output(_vout: crate::vdisplay::VirtualOutput) -> Result<Box<dyn Capturer>> {
anyhow::bail!("virtual-output capture requires Linux")
}
#[cfg(target_os = "linux")]
mod linux;
+963
View File
@@ -0,0 +1,963 @@
//! Live capture: xdg ScreenCast portal (`ashpd`) → PipeWire (`pipewire`), CPU-copy path.
//!
//! Two dedicated threads, because both stacks are tied to their thread:
//! * **portal thread** drives the async ashpd handshake on a multi-thread tokio runtime
//! (control plane — never the per-frame path), then parks on a pending future so the
//! `proxy` + its zbus connection stay alive (the cast is torn down when that connection
//! drops; ashpd's `Session` has no `Drop`);
//! * **pipewire thread** owns the (`!Send`) MainLoop/Stream and pumps frames.
//!
//! The portal hands the PipeWire remote fd + node id to the pipewire thread; decoded BGRx
//! frames leave the pipewire thread over a bounded channel. The authoritative frame size
//! comes from the negotiated PipeWire format, not the portal's size hint.
//!
//! Cleanup note (M0): the two threads are detached and torn down at process exit. A
//! graceful stop (pipewire `channel` quit + Session close) belongs with the M2 session
//! lifecycle.
use super::{CapturedFrame, Capturer, FramePayload, PixelFormat};
use anyhow::{anyhow, Context, Result};
use std::os::fd::OwnedFd;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::mpsc::{sync_channel, Receiver, RecvTimeoutError, TryRecvError};
use std::sync::Arc;
use std::thread;
use std::time::Duration;
/// Live monitor capturer backed by the portal + PipeWire threads. Kept alive (reused) across
/// streams — [`set_active`](Capturer::set_active) gates the per-frame de-pad copy so it costs
/// almost nothing between streams while the screencast session stays up (instant reconnect,
/// and no second session to conflict with).
pub struct PortalCapturer {
frames: Receiver<CapturedFrame>,
active: Arc<AtomicBool>,
/// Owns the virtual output (if this capturer was built from one) — dropped when the capturer
/// is, releasing the compositor-side output via the keepalive's own `Drop`. `None` for the
/// portal source (its session ends with the portal thread's zbus connection).
_keepalive: Option<Box<dyn Send>>,
}
impl PortalCapturer {
/// `anchored` drives ScreenCast off a RemoteDesktop session (KWin/GNOME) so it inherits the
/// RemoteDesktop grant and never raises a separate ScreenCast dialog; `false` uses a plain
/// ScreenCast session (wlroots, which has no RemoteDesktop portal).
pub fn open(anchored: bool) -> Result<PortalCapturer> {
// Portal handshake (async) on its own thread; hands back the PW fd + node id.
let (setup_tx, setup_rx) = std::sync::mpsc::channel::<Result<(OwnedFd, u32), String>>();
thread::Builder::new()
.name("punktfunk-portal".into())
.spawn(move || {
if anchored {
portal_thread_remote_desktop(setup_tx)
} else {
portal_thread(setup_tx)
}
})
.context("spawn portal thread")?;
let (fd, node_id) = match setup_rx.recv_timeout(Duration::from_secs(20)) {
Ok(Ok(v)) => v,
Ok(Err(e)) => return Err(anyhow!("ScreenCast portal setup failed: {e}")),
Err(_) => return Err(anyhow!("timed out waiting for the ScreenCast portal")),
};
tracing::info!(
node_id,
"ScreenCast portal session started; connecting PipeWire"
);
let (frames, active) = spawn_pipewire(Some(fd), node_id, None)?;
Ok(PortalCapturer {
frames,
active,
_keepalive: None,
})
}
/// Build a capturer from an already-created virtual output ([`crate::vdisplay::VirtualOutput`]):
/// connect PipeWire to its node (`remote_fd` selects portal-remote vs. default-daemon) and
/// take ownership of its keepalive so the output lives exactly as long as this capturer. This
/// is how the client's requested resolution becomes the captured resolution without scaling.
pub fn from_virtual_output(vout: crate::vdisplay::VirtualOutput) -> Result<PortalCapturer> {
tracing::info!(
node_id = vout.node_id,
"connecting PipeWire to virtual output"
);
let (frames, active) = spawn_pipewire(vout.remote_fd, vout.node_id, vout.preferred_mode)?;
Ok(PortalCapturer {
frames,
active,
_keepalive: Some(vout.keepalive),
})
}
}
/// Spawn the PipeWire consumer thread for `node_id` (fd `Some` = portal remote, `None` =
/// default daemon) and return the frame channel + the activation flag it gates on.
/// `preferred` seeds the format negotiation's default size/framerate — for Mutter virtual
/// monitors this is what actually sizes the monitor.
fn spawn_pipewire(
fd: Option<OwnedFd>,
node_id: u32,
preferred: Option<(u32, u32, u32)>,
) -> Result<(Receiver<CapturedFrame>, Arc<AtomicBool>)> {
// Frames flow from the pipewire thread over a small bounded channel.
let (frame_tx, frame_rx) = sync_channel::<CapturedFrame>(8);
let active = Arc::new(AtomicBool::new(false));
let active_cb = active.clone();
let zerocopy = crate::zerocopy::enabled();
thread::Builder::new()
.name("punktfunk-pipewire".into())
.spawn(move || {
if let Err(e) =
pipewire::pipewire_thread(fd, node_id, frame_tx, active_cb, zerocopy, preferred)
{
tracing::error!(error = %format!("{e:#}"), "pipewire capture thread failed");
}
})
.context("spawn pipewire thread")?;
Ok((frame_rx, active))
}
impl Capturer for PortalCapturer {
fn next_frame(&mut self) -> Result<CapturedFrame> {
// First frame can lag behind format negotiation; later frames arrive at ~fps.
match self.frames.recv_timeout(Duration::from_secs(10)) {
Ok(frame) => Ok(frame),
Err(RecvTimeoutError::Timeout) => Err(anyhow!("no PipeWire frame within 10s")),
Err(RecvTimeoutError::Disconnected) => Err(anyhow!("PipeWire capture thread ended")),
}
}
fn try_latest(&mut self) -> Result<Option<CapturedFrame>> {
// Drain to the newest queued frame without blocking; `None` means the compositor
// hasn't produced a new frame since last call (static/idle desktop).
let mut latest = None;
loop {
match self.frames.try_recv() {
Ok(frame) => latest = Some(frame),
Err(TryRecvError::Empty) => break,
Err(TryRecvError::Disconnected) => {
return Err(anyhow!("PipeWire capture thread ended"))
}
}
}
Ok(latest)
}
fn set_active(&self, active: bool) {
self.active.store(active, Ordering::Relaxed);
}
}
/// The portal handshake: connect ScreenCast, select a single monitor, start, open the
/// PipeWire remote, hand the fd + node id back, then keep the session alive.
fn portal_thread(setup_tx: std::sync::mpsc::Sender<Result<(OwnedFd, u32), String>>) {
use ashpd::desktop::screencast::{CursorMode, Screencast, SelectSourcesOptions, SourceType};
use ashpd::desktop::PersistMode;
use ashpd::enumflags2::BitFlags;
// Multi-thread runtime: the zbus connection's background reader must be pumped
// continuously across the create_session → select_sources → start handshake, or the
// portal reports "Invalid session". (A current-thread runtime starves it.)
let rt = match tokio::runtime::Builder::new_multi_thread()
.worker_threads(2)
.enable_all()
.build()
{
Ok(rt) => rt,
Err(e) => {
let _ = setup_tx.send(Err(format!("build tokio runtime: {e}")));
return;
}
};
let err_tx = setup_tx.clone();
rt.block_on(async move {
let result: Result<()> = async {
let proxy = Screencast::new()
.await
.context("connect ScreenCast portal")?;
let session = proxy
.create_session(Default::default())
.await
.context("create_session")?;
proxy
.select_sources(
&session,
SelectSourcesOptions::default()
.set_cursor_mode(CursorMode::Embedded)
// Only MONITOR is offered by the wlroots backend
// (AvailableSourceTypes=1); requesting unsupported types
// invalidates the session.
.set_sources(BitFlags::from_flag(SourceType::Monitor))
.set_multiple(false)
.set_persist_mode(PersistMode::DoNot),
)
.await
.context("select_sources")?
.response()
.context("select_sources rejected (unsupported source type / cursor mode?)")?;
let streams = proxy
.start(&session, None, Default::default())
.await
.context("start cast")?
.response()
.context("start response (chooser cancelled? portal misconfigured?)")?;
let stream = streams
.streams()
.first()
.context("portal returned no streams")?
.clone();
let node_id = stream.pipe_wire_node_id();
let fd = proxy
.open_pipe_wire_remote(&session, Default::default())
.await
.context("open_pipe_wire_remote")?;
setup_tx
.send(Ok((fd, node_id)))
.map_err(|_| anyhow!("capturer dropped before setup completed"))?;
// Keep `proxy` + `session` (and the underlying zbus connection) alive for the
// capture; the cast is torn down when the connection drops (ashpd's `Session`
// has no `Drop`), which here happens at process exit.
let _keep_alive = (&proxy, &session);
std::future::pending::<()>().await;
Ok(())
}
.await;
if let Err(e) = result {
let _ = err_tx.send(Err(format!("{e:#}")));
}
});
}
/// Combined RemoteDesktop+ScreenCast portal setup (KWin/GNOME). ScreenCast sources are selected
/// on a session created via RemoteDesktop, so a single RemoteDesktop `start` grant —
/// pre-authorized headlessly via the `kde-authorized` permission, exactly like the libei input
/// path — also covers screen capture, with no separate ScreenCast dialog (which has no such
/// bypass). Yields the same PipeWire fd + node id as the standalone path; the consumer is
/// identical.
fn portal_thread_remote_desktop(setup_tx: std::sync::mpsc::Sender<Result<(OwnedFd, u32), String>>) {
use ashpd::desktop::remote_desktop::{DeviceType, RemoteDesktop, SelectDevicesOptions};
use ashpd::desktop::screencast::{CursorMode, Screencast, SelectSourcesOptions, SourceType};
use ashpd::desktop::PersistMode;
use ashpd::enumflags2::BitFlags;
let rt = match tokio::runtime::Builder::new_multi_thread()
.worker_threads(2)
.enable_all()
.build()
{
Ok(rt) => rt,
Err(e) => {
let _ = setup_tx.send(Err(format!("build tokio runtime: {e}")));
return;
}
};
let err_tx = setup_tx.clone();
rt.block_on(async move {
let result: Result<()> = async {
let remote = RemoteDesktop::new()
.await
.context("connect RemoteDesktop portal")?;
let screencast = Screencast::new()
.await
.context("connect ScreenCast portal")?;
let session = remote
.create_session(Default::default())
.await
.context("create RemoteDesktop session")?;
// RemoteDesktop requires a device selection; we never connect_to_eis on this session
// (input injection runs its own), but selecting devices is what makes `start` the
// RemoteDesktop grant the kde-authorized bypass covers.
remote
.select_devices(
&session,
SelectDevicesOptions::default()
.set_devices(DeviceType::Keyboard | DeviceType::Pointer)
.set_persist_mode(PersistMode::DoNot),
)
.await
.context("select_devices")?
.response()
.context("select_devices rejected")?;
screencast
.select_sources(
&session,
SelectSourcesOptions::default()
.set_cursor_mode(CursorMode::Embedded)
.set_sources(BitFlags::from_flag(SourceType::Monitor))
.set_multiple(false)
.set_persist_mode(PersistMode::DoNot),
)
.await
.context("select_sources")?
.response()
.context("select_sources rejected (unsupported source type?)")?;
let streams = remote
.start(&session, None, Default::default())
.await
.context("start RemoteDesktop+ScreenCast")?
.response()
.context("start response (grant not pre-authorized / headless dialog?)")?;
let stream = streams
.streams()
.first()
.context("portal returned no screencast streams")?
.clone();
let node_id = stream.pipe_wire_node_id();
let fd = screencast
.open_pipe_wire_remote(&session, Default::default())
.await
.context("open_pipe_wire_remote")?;
setup_tx
.send(Ok((fd, node_id)))
.map_err(|_| anyhow!("capturer dropped before setup completed"))?;
// Keep the proxies + session (and their zbus connection) alive for the capture.
let _keep_alive = (&remote, &screencast, &session);
std::future::pending::<()>().await;
Ok(())
}
.await;
if let Err(e) = result {
let _ = err_tx.send(Err(format!("{e:#}")));
}
});
}
mod pipewire {
//! The PipeWire consumer, confined to its own thread (the PW types are `!Send`).
use super::{CapturedFrame, FramePayload, PixelFormat};
use anyhow::{Context, Result};
use pipewire as pw;
use pw::{properties::properties, spa};
use std::os::fd::OwnedFd;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::mpsc::SyncSender;
use std::sync::Arc;
use std::time::{SystemTime, UNIX_EPOCH};
use spa::param::video::{VideoFormat, VideoInfoRaw};
use spa::pod::Pod;
/// Map a negotiated SPA video format to a layout the encoder can consume. Returns
/// `None` for formats we don't handle (the frame is then skipped).
fn map_format(f: VideoFormat) -> Option<PixelFormat> {
Some(match f {
VideoFormat::BGRx => PixelFormat::Bgrx,
VideoFormat::RGBx => PixelFormat::Rgbx,
VideoFormat::BGRA => PixelFormat::Bgra,
VideoFormat::RGBA => PixelFormat::Rgba,
VideoFormat::RGB => PixelFormat::Rgb,
VideoFormat::BGR => PixelFormat::Bgr,
_ => return None,
})
}
struct UserData {
info: VideoInfoRaw,
/// Negotiated layout (`None` until param_changed, or if unsupported).
format: Option<PixelFormat>,
/// Negotiated DRM format modifier (for dmabuf import); 0 = LINEAR.
modifier: u64,
tx: SyncSender<CapturedFrame>,
/// When false (no active stream), skip the de-pad copy — the buffer is just released.
active: Arc<AtomicBool>,
/// Present when zero-copy is enabled: imports a dmabuf → CUDA device buffer.
importer: Option<crate::zerocopy::EglImporter>,
}
/// Log a frame-drop reason once per process (the process callback runs per frame; a stuck
/// pipeline must say why without flooding).
fn warn_once(msg: &'static str) {
use std::sync::Mutex;
static SEEN: Mutex<Vec<&'static str>> = Mutex::new(Vec::new());
let mut seen = SEEN.lock().unwrap();
if !seen.contains(&msg) {
seen.push(msg);
tracing::warn!("{msg}");
}
}
/// A read-only mmap of a dmabuf fd, unmapped on drop. Used when MAP_BUFFERS didn't map the
/// buffer (producers don't always flag dmabufs mappable, e.g. gamescope's Vulkan exports).
struct DmabufMap {
ptr: *mut std::ffi::c_void,
len: usize,
}
impl DmabufMap {
fn new(fd: i32, len: usize) -> Option<DmabufMap> {
let ptr = unsafe {
libc::mmap(
std::ptr::null_mut(),
len,
libc::PROT_READ,
libc::MAP_SHARED,
fd,
0,
)
};
(ptr != libc::MAP_FAILED).then_some(DmabufMap { ptr, len })
}
}
impl Drop for DmabufMap {
fn drop(&mut self) {
unsafe {
libc::munmap(self.ptr, self.len);
}
}
}
fn serialize_pod(obj: pw::spa::pod::Object) -> Result<Vec<u8>> {
Ok(pw::spa::pod::serialize::PodSerializer::serialize(
std::io::Cursor::new(Vec::new()),
&pw::spa::pod::Value::Object(obj),
)
.context("serialize pod")?
.0
.into_inner())
}
/// Build a BGRx dmabuf `EnumFormat` pod advertising the EGL-importable `modifiers` as a
/// mandatory enum Choice; the compositor fixates to one of them that it can allocate, which
/// we read back in `param_changed`.
fn build_dmabuf_format(
modifiers: &[u64],
preferred: Option<(u32, u32, u32)>,
) -> Result<Vec<u8>> {
let (dw, dh, dhz) = preferred.unwrap_or((1920, 1080, 60));
use pw::spa::param::format::{FormatProperties, MediaSubtype, MediaType};
let mut obj = pw::spa::pod::object!(
pw::spa::utils::SpaTypes::ObjectParamFormat,
pw::spa::param::ParamType::EnumFormat,
pw::spa::pod::property!(FormatProperties::MediaType, Id, MediaType::Video),
pw::spa::pod::property!(FormatProperties::MediaSubtype, Id, MediaSubtype::Raw),
pw::spa::pod::property!(FormatProperties::VideoFormat, Id, VideoFormat::BGRx),
pw::spa::pod::property!(
FormatProperties::VideoSize,
Choice,
Range,
Rectangle,
pw::spa::utils::Rectangle {
width: dw,
height: dh
},
pw::spa::utils::Rectangle {
width: 1,
height: 1
},
pw::spa::utils::Rectangle {
width: 8192,
height: 8192
}
),
pw::spa::pod::property!(
FormatProperties::VideoFramerate,
Choice,
Range,
Fraction,
pw::spa::utils::Fraction { num: dhz, denom: 1 },
pw::spa::utils::Fraction { num: 0, denom: 1 },
pw::spa::utils::Fraction { num: 240, denom: 1 }
),
);
obj.properties.push(pw::spa::pod::Property {
key: pw::spa::sys::SPA_FORMAT_VIDEO_modifier,
flags: pw::spa::pod::PropertyFlags::MANDATORY,
value: pw::spa::pod::Value::Choice(pw::spa::pod::ChoiceValue::Long(
pw::spa::utils::Choice(
pw::spa::utils::ChoiceFlags::empty(),
pw::spa::utils::ChoiceEnum::Enum {
default: modifiers[0] as i64,
alternatives: modifiers.iter().map(|&m| m as i64).collect(),
},
),
)),
});
serialize_pod(obj)
}
/// The default (shm/CPU-path) format offer: raw video in any encoder-mappable layout, any
/// size, any framerate (0/1 = variable allowed — gamescope fixates exactly that).
fn build_default_format_obj(preferred: Option<(u32, u32, u32)>) -> pw::spa::pod::Object {
let (dw, dh, dhz) = preferred.unwrap_or((1920, 1080, 60));
pw::spa::pod::object!(
pw::spa::utils::SpaTypes::ObjectParamFormat,
pw::spa::param::ParamType::EnumFormat,
pw::spa::pod::property!(
pw::spa::param::format::FormatProperties::MediaType,
Id,
pw::spa::param::format::MediaType::Video
),
pw::spa::pod::property!(
pw::spa::param::format::FormatProperties::MediaSubtype,
Id,
pw::spa::param::format::MediaSubtype::Raw
),
// Offer the layouts the encoder can map to an NVENC input format. wlroots
// commonly fixates packed RGB (3 bpp); other compositors offer 4 bpp. Only
// these are requested, so negotiation fails loudly rather than handing us a
// format we'd misinterpret.
pw::spa::pod::property!(
pw::spa::param::format::FormatProperties::VideoFormat,
Choice,
Enum,
Id,
VideoFormat::RGB,
VideoFormat::RGB,
VideoFormat::BGR,
VideoFormat::RGBx,
VideoFormat::BGRx,
VideoFormat::RGBA,
VideoFormat::BGRA,
),
pw::spa::pod::property!(
pw::spa::param::format::FormatProperties::VideoSize,
Choice,
Range,
Rectangle,
pw::spa::utils::Rectangle {
width: dw,
height: dh
},
pw::spa::utils::Rectangle {
width: 1,
height: 1
},
pw::spa::utils::Rectangle {
width: 8192,
height: 8192
}
),
pw::spa::pod::property!(
pw::spa::param::format::FormatProperties::VideoFramerate,
Choice,
Range,
Fraction,
pw::spa::utils::Fraction { num: dhz, denom: 1 },
pw::spa::utils::Fraction { num: 0, denom: 1 },
pw::spa::utils::Fraction { num: 240, denom: 1 }
),
)
}
/// Build a Buffers param for the CPU path accepting anything mappable: MemPtr, MemFd, and
/// DmaBuf. The DmaBuf bit matters for producers like gamescope whose format intersection
/// lands on their modifier-bearing (LINEAR) pod: they then offer *only* DmaBuf buffers, and
/// without this bit the buffer-type intersection is empty and the link silently stalls in
/// "negotiating". A LINEAR dmabuf is mmap-able by MAP_BUFFERS, so the CPU de-pad copy works.
fn build_mappable_buffers() -> Result<Vec<u8>> {
serialize_pod(pw::spa::pod::Object {
type_: pw::spa::utils::SpaTypes::ObjectParamBuffers.as_raw(),
id: pw::spa::param::ParamType::Buffers.as_raw(),
properties: vec![pw::spa::pod::Property {
key: pw::spa::sys::SPA_PARAM_BUFFERS_dataType,
flags: pw::spa::pod::PropertyFlags::empty(),
value: pw::spa::pod::Value::Int(
(1i32 << pw::spa::sys::SPA_DATA_MemPtr)
| (1i32 << pw::spa::sys::SPA_DATA_MemFd)
| (1i32 << pw::spa::sys::SPA_DATA_DmaBuf),
),
}],
})
}
/// Build a Buffers param requesting dmabuf-only buffers.
fn build_dmabuf_buffers() -> Result<Vec<u8>> {
serialize_pod(pw::spa::pod::Object {
type_: pw::spa::utils::SpaTypes::ObjectParamBuffers.as_raw(),
id: pw::spa::param::ParamType::Buffers.as_raw(),
properties: vec![pw::spa::pod::Property {
key: pw::spa::sys::SPA_PARAM_BUFFERS_dataType,
flags: pw::spa::pod::PropertyFlags::empty(),
value: pw::spa::pod::Value::Int(1i32 << pw::spa::sys::SPA_DATA_DmaBuf),
}],
})
}
pub fn pipewire_thread(
fd: Option<OwnedFd>,
node_id: u32,
tx: SyncSender<CapturedFrame>,
active: Arc<AtomicBool>,
zerocopy: bool,
preferred: Option<(u32, u32, u32)>,
) -> Result<()> {
crate::pwinit::ensure_init();
let mainloop = pw::main_loop::MainLoopRc::new(None).context("pw MainLoop")?;
let context = pw::context::ContextRc::new(&mainloop, None).context("pw Context")?;
// A portal source hands us an fd to a (sandboxed) PipeWire remote; the KWin
// virtual-output source has no fd — its node lives on the user's default daemon.
let core = match fd {
Some(fd) => context
.connect_fd_rc(fd, None)
.context("pw connect_fd (portal remote)")?,
None => context
.connect_rc(None)
.context("pw connect (default daemon)")?,
};
// Build the EGL→CUDA importer up front; if it fails, log and fall back to the CPU path
// (we simply won't request dmabuf below).
let importer = if zerocopy {
match crate::zerocopy::EglImporter::new() {
Ok(i) => Some(i),
Err(e) => {
tracing::warn!(error = %format!("{e:#}"), "zero-copy import unavailable — using CPU path");
None
}
}
} else {
None
};
// Modifiers our import stack handles for BGRx: the EGL-importable (tiled) set, plus
// LINEAR (0) — NVIDIA's EGL won't list it, but LINEAR dmabufs (gamescope's only offer)
// import via CUDA external memory instead. Tiled stays first so allocators that can do
// both (KWin) prefer it. If none, we can't negotiate dmabuf → shm path.
let mut modifiers = importer
.as_ref()
.map(|i| i.supported_modifiers(crate::zerocopy::drm_fourcc(PixelFormat::Bgrx).unwrap()))
.unwrap_or_default();
if importer.is_some() && !modifiers.contains(&0) {
modifiers.push(0); // DRM_FORMAT_MOD_LINEAR
}
let want_dmabuf = importer.is_some() && !modifiers.is_empty();
if zerocopy && !want_dmabuf {
tracing::warn!("zero-copy: no EGL-importable dmabuf modifiers — using CPU path");
} else if want_dmabuf {
tracing::info!(
count = modifiers.len(),
sample = ?&modifiers[..modifiers.len().min(6)],
"zero-copy: advertising EGL-importable dmabuf modifiers"
);
}
let data = UserData {
info: VideoInfoRaw::default(),
format: None,
modifier: 0,
tx,
active,
importer,
};
let stream = pw::stream::StreamBox::new(
&core,
"punktfunk-screencast",
properties! {
*pw::keys::MEDIA_TYPE => "Video",
*pw::keys::MEDIA_CATEGORY => "Capture",
*pw::keys::MEDIA_ROLE => "Screen",
// Never let the session manager re-target this stream to a different node when
// its target goes away: an orphaned stream auto-linked to a fresh Video/Source
// wedges that node — and a stuck link head-blocks the PipeWire daemon's shared
// work queue, stalling ALL new link negotiation system-wide.
"node.dont-reconnect" => "true",
},
)
.context("pw Stream")?;
let _listener = stream
.add_local_listener_with_user_data(data)
.state_changed(|_stream, _ud, old, new| {
tracing::info!(?old, ?new, "pipewire stream state");
})
.param_changed(|_stream, ud, id, param| {
let Some(param) = param else { return };
if id != pw::spa::param::ParamType::Format.as_raw() {
return;
}
let Ok((media_type, media_subtype)) =
pw::spa::param::format_utils::parse_format(param)
else {
return;
};
if media_type != pw::spa::param::format::MediaType::Video
|| media_subtype != pw::spa::param::format::MediaSubtype::Raw
{
return;
}
if ud.info.parse(param).is_ok() {
let sz = ud.info.size();
ud.format = map_format(ud.info.format());
ud.modifier = ud.info.modifier();
tracing::info!(
width = sz.width,
height = sz.height,
spa_format = ?ud.info.format(),
mapped = ?ud.format,
modifier = ud.modifier,
"pipewire format negotiated"
);
if ud.format.is_none() {
tracing::error!(
spa_format = ?ud.info.format(),
"negotiated a pixel format the encoder cannot consume — frames will be skipped"
);
}
}
})
.process(|stream, ud| {
// PipeWire dispatches this from a C trampoline with no catch_unwind; a
// panic crossing that FFI boundary would abort the whole host. Contain it.
let outcome = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
let Some(mut buffer) = stream.dequeue_buffer() else {
return;
};
// No active stream: release the buffer without the (expensive at 5K) de-pad.
if !ud.active.load(Ordering::Relaxed) {
return;
}
let datas = buffer.datas_mut();
if datas.is_empty() {
return;
}
let sz = ud.info.size();
let (w, h) = (sz.width as usize, sz.height as usize);
if w == 0 || h == 0 {
return; // format not negotiated yet
}
// Zero-copy path: if the buffer is a dmabuf and we have an importer, import it
// into a CUDA device buffer (no CPU touch) and deliver that. Otherwise fall
// through to the shm de-pad copy below.
let mut gpu_import_broken = false;
if let (Some(importer), Some(fmt)) = (ud.importer.as_mut(), ud.format) {
if datas[0].type_() == pw::spa::buffer::DataType::DmaBuf {
let plane = crate::zerocopy::DmabufPlane {
fd: datas[0].fd(),
offset: datas[0].chunk().offset(),
stride: datas[0].chunk().stride().max(0) as u32,
};
// Tiled modifier → EGL/GL de-tile import; LINEAR (0/unset, e.g.
// gamescope) → direct CUDA external-memory import (NVIDIA EGL can't
// sample LINEAR).
let modifier = (ud.modifier != 0).then_some(ud.modifier);
if let Some(fourcc) = crate::zerocopy::drm_fourcc(fmt) {
let imported = if modifier.is_some() {
importer.import(&plane, w as u32, h as u32, fourcc, modifier)
} else {
importer.import_linear(&plane, w as u32, h as u32)
};
match imported {
Ok(devbuf) => {
static ONCE: std::sync::atomic::AtomicBool =
std::sync::atomic::AtomicBool::new(true);
if ONCE.swap(false, Ordering::Relaxed) {
tracing::info!(w, h, modifier = ud.modifier,
"zero-copy: dmabuf imported to CUDA (no CPU copy)");
}
let pts_ns = SystemTime::now()
.duration_since(UNIX_EPOCH)
.map(|d| d.as_nanos() as u64)
.unwrap_or(0);
let _ = ud.tx.try_send(CapturedFrame {
width: w as u32,
height: h as u32,
pts_ns,
format: fmt,
payload: FramePayload::Cuda(devbuf),
});
return;
}
Err(e) => {
// GPU import unavailable for this buffer kind (e.g. the
// driver rejects LINEAR external-memory import). Disable
// the importer and fall through to the CPU mmap path —
// degraded, not dead.
tracing::warn!(error = %format!("{e:#}"),
"dmabuf GPU import failed — falling back to the CPU copy path");
gpu_import_broken = true;
}
}
} else {
return; // format has no DRM fourcc mapping — skip the frame
}
}
}
if gpu_import_broken {
ud.importer = None;
}
let d = &mut datas[0];
// CPU path may also receive LINEAR dmabufs (gamescope offers only those once its
// modifier-bearing format pod wins); capture the fd before `data()` borrows `d`.
let dmabuf_fd =
(d.type_() == pw::spa::buffer::DataType::DmaBuf).then(|| d.fd());
let (size, offset, stride) = {
let c = d.chunk();
(
c.size() as usize,
c.offset() as usize,
c.stride().max(0) as usize,
)
};
let Some(fmt) = ud.format else { return }; // unsupported/not negotiated
let bpp = fmt.bytes_per_pixel();
let row = w * bpp;
let stride = if stride == 0 { row } else { stride };
if stride < row {
warn_once("chunk stride < row — frames dropped");
return;
}
let needed = stride * (h - 1) + row;
// dmabuf chunks commonly report size 0; fall back to the computed span.
let size = if size == 0 { needed } else { size };
// MAP_BUFFERS only maps buffers flagged mappable; Vulkan-exported dmabufs
// (gamescope) usually aren't, so mmap the fd ourselves for the de-pad read.
let _mapping; // keeps a manual mmap alive for the copy below
let buf: &[u8] = if let Some(data) = d.data() {
data
} else if let Some(fd) = dmabuf_fd.filter(|&fd| fd > 0) {
match DmabufMap::new(fd, offset + needed) {
Some(m) => {
_mapping = m;
unsafe {
std::slice::from_raw_parts(_mapping.ptr as *const u8, _mapping.len)
}
}
None => {
warn_once("mmap(dmabuf) failed — frames dropped");
return;
}
}
} else {
warn_once("buffer has no mappable data — frames dropped");
return;
};
// Need stride*(h-1)+row valid bytes within [offset, offset+size).
if offset > buf.len() {
return;
}
let avail = buf.len() - offset;
if needed > avail || needed > size {
warn_once("buffer smaller than frame span — frames dropped");
return;
}
let region = &buf[offset..offset + size.min(avail)];
// De-pad into a tightly-packed buffer (chunk stride may exceed w*bpp).
let mut tight = vec![0u8; row * h];
for y in 0..h {
tight[y * row..y * row + row]
.copy_from_slice(&region[y * stride..y * stride + row]);
}
let pts_ns = SystemTime::now()
.duration_since(UNIX_EPOCH)
.map(|d| d.as_nanos() as u64)
.unwrap_or(0);
let frame = CapturedFrame {
width: w as u32,
height: h as u32,
pts_ns,
format: fmt,
payload: FramePayload::Cpu(tight),
};
// Drop if the encoder is behind — never block the pipewire loop.
let _ = ud.tx.try_send(frame);
}));
if outcome.is_err() {
tracing::error!("panic in pipewire process callback — frame dropped");
}
})
.register()
.context("register stream listener")?;
// Debug knob: offer a single fixed format (PUNKTFUNK_PW_FIXED_POD="WxH") to bisect
// negotiation failures against a producer's exact EnumFormat (e.g. gamescope).
let fixed_pod: Option<(u32, u32)> = std::env::var("PUNKTFUNK_PW_FIXED_POD")
.ok()
.and_then(|v| v.split_once('x').map(|(w, h)| (w.parse(), h.parse())))
.and_then(|(w, h)| Some((w.ok()?, h.ok()?)));
// Request raw video in any encoder-mappable layout, any size/framerate.
let obj = if let Some((fw, fh)) = fixed_pod {
tracing::info!(fw, fh, "PW DEBUG: offering fixed BGRx pod");
pw::spa::pod::object!(
pw::spa::utils::SpaTypes::ObjectParamFormat,
pw::spa::param::ParamType::EnumFormat,
pw::spa::pod::property!(
pw::spa::param::format::FormatProperties::MediaType,
Id,
pw::spa::param::format::MediaType::Video
),
pw::spa::pod::property!(
pw::spa::param::format::FormatProperties::MediaSubtype,
Id,
pw::spa::param::format::MediaSubtype::Raw
),
pw::spa::pod::property!(
pw::spa::param::format::FormatProperties::VideoFormat,
Id,
VideoFormat::BGRx
),
pw::spa::pod::property!(
pw::spa::param::format::FormatProperties::VideoSize,
Rectangle,
pw::spa::utils::Rectangle {
width: fw,
height: fh
}
),
pw::spa::pod::property!(
pw::spa::param::format::FormatProperties::VideoFramerate,
Fraction,
pw::spa::utils::Fraction { num: 0, denom: 1 }
),
)
} else {
build_default_format_obj(preferred)
};
// When zero-copy is on, offer ONLY a BGRx dmabuf format with our EGL-importable modifiers
// (offering shm too makes the compositor pick shm). The modifier list is advertised with
// DONT_FIXATE so the compositor's allocator chooses one; we re-emit the fixated format in
// `param_changed` (the two-step DMA-BUF handshake). Otherwise offer the multi-format shm
// pod and let MAP_BUFFERS map it.
let shm_values = serialize_pod(obj)?;
let (dmabuf_values, buffers_values) = if want_dmabuf {
(
Some(build_dmabuf_format(&modifiers, preferred)?),
Some(build_dmabuf_buffers()?),
)
} else {
// CPU path still accepts mappable dmabufs (gamescope offers only those once its
// modifier-bearing format pod wins the intersection).
(None, Some(build_mappable_buffers()?))
};
let mut byte_slices: Vec<&[u8]> = Vec::new();
match &dmabuf_values {
Some(d) => byte_slices.push(d),
None => byte_slices.push(&shm_values),
}
if let Some(b) = &buffers_values {
byte_slices.push(b);
}
let mut params: Vec<&Pod> = byte_slices
.iter()
.map(|&b| Pod::from_bytes(b).context("pod from bytes"))
.collect::<Result<_>>()?;
stream
.connect(
spa::utils::Direction::Input,
Some(node_id),
pw::stream::StreamFlags::AUTOCONNECT | pw::stream::StreamFlags::MAP_BUFFERS,
&mut params,
)
.context("pw stream connect")?;
// Blocks this thread, pumping frame callbacks until process exit.
mainloop.run();
Ok(())
}
}
+154
View File
@@ -0,0 +1,154 @@
//! Hardware video encode (plan §7). Binds FFmpeg (NVENC); never rewrites codecs.
//! Low-latency preset, B-frames off. M0 feeds BGRx CPU frames directly — `*_nvenc`
//! accepts `bgr0` input and converts to YUV on the GPU, so no host-side swscale is
//! needed (dmabuf zero-copy import is deferred; plan §9).
use crate::capture::{CapturedFrame, PixelFormat};
use anyhow::Result;
/// An encoded access unit (one NAL/AU) to hand to `punktfunk_core` for FEC + packetization.
/// `data` is in-band Annex-B (the encoder is opened without a global header), so each
/// keyframe carries its own VPS/SPS/PPS — the bytes are both a playable elementary
/// stream and a self-contained AU for the wire.
pub struct EncodedFrame {
pub data: Vec<u8>,
pub pts_ns: u64,
/// True for IDR/keyframes (sets the SOF/keyframe wire flags).
pub keyframe: bool,
}
/// Codec selection negotiated with the client.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum Codec {
H264,
H265,
Av1,
}
impl Codec {
/// The FFmpeg NVENC encoder name (selected by name, not codec id — the latter would
/// pick the software encoder).
pub fn nvenc_name(self) -> &'static str {
match self {
Codec::H264 => "h264_nvenc",
Codec::H265 => "hevc_nvenc",
Codec::Av1 => "av1_nvenc",
}
}
}
/// A hardware encoder. One per session; runs on the encode thread.
pub trait Encoder: Send {
fn submit(&mut self, frame: &CapturedFrame) -> Result<()>;
/// Force the next submitted frame to be an IDR keyframe (e.g. after a client
/// reference-frame-invalidation request). Default: no-op.
fn request_keyframe(&mut self) {}
/// Pull the next encoded AU if one is ready.
fn poll(&mut self) -> Result<Option<EncodedFrame>>;
/// Signal end-of-stream. After this, drain the remaining AUs with [`poll`](Self::poll)
/// until it returns `None` — NVENC buffers frames internally even at `delay=0`.
fn flush(&mut self) -> Result<()>;
}
impl Codec {
/// Maximum encodable dimension (px) per side for this codec on NVENC. H.264 tops out at
/// 4096 (level constraint); HEVC and AV1 allow 8192. Used to reject out-of-range client
/// modes up front (see [`validate_dimensions`]).
pub fn max_dimension(self) -> u32 {
match self {
Codec::H264 => 4096,
Codec::H265 | Codec::Av1 => 8192,
}
}
}
/// Validate a requested encode resolution before we allocate buffers or open NVENC. Rejects
/// zero/odd-sized and out-of-range modes with a clear error instead of letting buffer math
/// overflow or the encoder open fail with an opaque NVENC code. A client can request any
/// `mode=WxHxFPS`, so this is the gate on attacker/typo-controlled dimensions.
pub fn validate_dimensions(codec: Codec, width: u32, height: u32) -> Result<()> {
if width == 0 || height == 0 {
anyhow::bail!("invalid encode resolution {width}x{height}: dimensions must be non-zero");
}
// NVENC requires even dimensions for the chroma subsampling it does internally.
if width % 2 != 0 || height % 2 != 0 {
anyhow::bail!("invalid encode resolution {width}x{height}: dimensions must be even");
}
let max = codec.max_dimension();
if width > max || height > max {
anyhow::bail!(
"{codec:?} max dimension is {max}px; requested {width}x{height} \
(use HEVC/AV1 above 4096, or lower the client resolution)"
);
}
Ok(())
}
/// Open an NVENC encoder for frames of the given `format` and mode. When `cuda` is true the
/// encoder takes GPU frames (`AV_PIX_FMT_CUDA`) from the zero-copy path; otherwise it takes
/// packed RGB/BGR CPU frames. `format`/`bitrate_bps`/`codec`/mode come from session
/// negotiation; the caller derives `cuda` from the first captured frame's payload.
pub fn open_video(
codec: Codec,
format: PixelFormat,
width: u32,
height: u32,
fps: u32,
bitrate_bps: u64,
cuda: bool,
) -> Result<Box<dyn Encoder>> {
validate_dimensions(codec, width, height)?;
#[cfg(target_os = "linux")]
{
let enc = linux::NvencEncoder::open(codec, format, width, height, fps, bitrate_bps, cuda)?;
Ok(Box::new(enc) as Box<dyn Encoder>)
}
#[cfg(not(target_os = "linux"))]
{
let _ = (codec, format, width, height, fps, bitrate_bps, cuda);
anyhow::bail!("NVENC encode requires Linux (FFmpeg + NVIDIA driver)")
}
}
#[cfg(target_os = "linux")]
mod linux;
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn rejects_zero_and_odd_dimensions() {
assert!(validate_dimensions(Codec::H265, 0, 1080).is_err());
assert!(validate_dimensions(Codec::H265, 1920, 0).is_err());
assert!(validate_dimensions(Codec::H265, 1921, 1080).is_err()); // odd width
assert!(validate_dimensions(Codec::H265, 1920, 1081).is_err()); // odd height
}
#[test]
fn h264_capped_at_4096() {
assert!(validate_dimensions(Codec::H264, 3840, 2160).is_ok()); // 4K fits (width < 4096)
assert!(validate_dimensions(Codec::H264, 4096, 4096).is_ok()); // exactly at the limit
assert!(validate_dimensions(Codec::H264, 4098, 2160).is_err());
assert!(validate_dimensions(Codec::H264, 3840, 4098).is_err());
}
#[test]
fn hevc_and_av1_allow_up_to_8192() {
for c in [Codec::H265, Codec::Av1] {
assert!(validate_dimensions(c, 3840, 2160).is_ok());
assert!(validate_dimensions(c, 7680, 4320).is_ok()); // 8K fits
assert!(validate_dimensions(c, 8192, 8192).is_ok());
assert!(validate_dimensions(c, 8194, 4320).is_err());
}
}
#[test]
fn common_modes_accepted() {
for c in [Codec::H264, Codec::H265, Codec::Av1] {
for (w, h) in [(1280, 720), (1920, 1080), (2560, 1440)] {
assert!(validate_dimensions(c, w, h).is_ok(), "{c:?} {w}x{h}");
}
}
}
}
+406
View File
@@ -0,0 +1,406 @@
//! NVENC encoder via `ffmpeg-next` (binds the system FFmpeg 8.x / libavcodec 62).
//!
//! Input is a packed RGB/BGR CPU frame; `*_nvenc` accepts `rgb0`/`bgr0`/`rgba`/`bgra`
//! directly and does the RGB→YUV conversion on the GPU, so the host stays off the
//! colour-conversion path. The portal commonly negotiates packed 24-bit `RGB`, which NVENC
//! does *not* accept — we expand it to `rgb0` (one padding byte/pixel, no colour math).
//! The encoder is opened *without* a global header so VPS/SPS/PPS are emitted in-band on
//! every IDR — the output is both a playable raw Annex-B stream and self-contained AUs.
use super::{Codec, EncodedFrame, Encoder};
use crate::capture::{CapturedFrame, FramePayload, PixelFormat};
use anyhow::{anyhow, bail, Context, Result};
use ffmpeg::format::Pixel;
use ffmpeg::util::frame::Video as VideoFrame;
use ffmpeg::{codec, encoder, Dictionary, Packet, Rational};
use ffmpeg_next as ffmpeg;
use std::os::raw::c_int;
use ffmpeg::ffi; // = ffmpeg_sys_next
/// `AVCUDADeviceContext` (libavutil/hwcontext_cuda.h) — not in the ffmpeg-sys bindings (the
/// crate doesn't allowlist that header), so mirror its stable 3-pointer layout. We set the
/// first field to *our* `CUcontext` so NVENC shares the context the EGL importer maps into.
#[repr(C)]
struct AVCUDADeviceContext {
cuda_ctx: *mut std::ffi::c_void, // CUcontext
stream: *mut std::ffi::c_void, // CUstream (null = default)
internal: *mut std::ffi::c_void, // filled by ctx_init
}
/// CUDA hardware-frame contexts that wrap our shared `CUcontext`, so `hevc_nvenc` reads the
/// imported device buffer directly. Owns two `AVBufferRef`s, unref'd on drop.
struct CudaHw {
device_ref: *mut ffi::AVBufferRef,
frames_ref: *mut ffi::AVBufferRef,
}
impl CudaHw {
/// Build a CUDA hwdevice wrapping `cu_ctx` and a frames pool (`sw_format` = `pixel`).
unsafe fn new(cu_ctx: *mut std::ffi::c_void, sw_format: Pixel, w: u32, h: u32) -> Result<Self> {
let mut device_ref = ffi::av_hwdevice_ctx_alloc(ffi::AVHWDeviceType::AV_HWDEVICE_TYPE_CUDA);
if device_ref.is_null() {
bail!("av_hwdevice_ctx_alloc(CUDA) failed");
}
let dev_ctx = (*device_ref).data as *mut ffi::AVHWDeviceContext;
let cu = (*dev_ctx).hwctx as *mut AVCUDADeviceContext;
(*cu).cuda_ctx = cu_ctx; // share the importer's context
let r = ffi::av_hwdevice_ctx_init(device_ref);
if r < 0 {
ffi::av_buffer_unref(&mut device_ref);
bail!("av_hwdevice_ctx_init failed ({r})");
}
let mut frames_ref = ffi::av_hwframe_ctx_alloc(device_ref);
if frames_ref.is_null() {
ffi::av_buffer_unref(&mut device_ref);
bail!("av_hwframe_ctx_alloc failed");
}
let fc = (*frames_ref).data as *mut ffi::AVHWFramesContext;
(*fc).format = ffi::AVPixelFormat::AV_PIX_FMT_CUDA;
(*fc).sw_format = pixel_to_av(sw_format);
(*fc).width = w as c_int;
(*fc).height = h as c_int;
(*fc).initial_pool_size = 0; // we supply the device pointers
let r = ffi::av_hwframe_ctx_init(frames_ref);
if r < 0 {
ffi::av_buffer_unref(&mut frames_ref);
ffi::av_buffer_unref(&mut device_ref);
bail!("av_hwframe_ctx_init failed ({r})");
}
Ok(CudaHw {
device_ref,
frames_ref,
})
}
}
impl Drop for CudaHw {
fn drop(&mut self) {
unsafe {
ffi::av_buffer_unref(&mut self.frames_ref);
ffi::av_buffer_unref(&mut self.device_ref);
}
}
}
/// `ffmpeg::format::Pixel` → raw `AVPixelFormat`.
fn pixel_to_av(p: Pixel) -> ffi::AVPixelFormat {
// `Pixel` is `#[repr(i32)]`-compatible with `AVPixelFormat` (the bindgen enum) via this
// documented conversion in ffmpeg-next.
ffi::AVPixelFormat::from(p)
}
/// Map a captured layout to the NVENC input pixel format, and whether a 3→4 byte expand is
/// needed (packed RGB/BGR have no padding byte; the NVENC `*0` formats do).
fn nvenc_input(format: PixelFormat) -> (Pixel, bool) {
match format {
PixelFormat::Bgrx => (Pixel::BGRZ, false), // bgr0
PixelFormat::Rgbx => (Pixel::RGBZ, false), // rgb0
PixelFormat::Bgra => (Pixel::BGRA, false),
PixelFormat::Rgba => (Pixel::RGBA, false),
PixelFormat::Rgb => (Pixel::RGBZ, true), // RGB -> rgb0
PixelFormat::Bgr => (Pixel::BGRZ, true), // BGR -> bgr0
}
}
pub struct NvencEncoder {
enc: encoder::video::Encoder,
/// Reusable 4-bpp CPU input frame (CPU path only; `None` for the zero-copy/CUDA path).
/// Mutating it in place across frames is sound only because the encoder is opened with
/// `delay=0`/`bf=0`/`max_b_frames=0` and the caller drains `poll()` after each `submit`,
/// so libavcodec holds no reference to the previous frame's buffer when we overwrite it.
frame: Option<VideoFrame>,
/// Zero-copy path: CUDA hwdevice/hwframes contexts (the encoder takes `AV_PIX_FMT_CUDA`).
cuda: Option<CudaHw>,
src_format: PixelFormat,
expand: bool,
width: u32,
height: u32,
fps: u32,
/// Monotonic presentation index, in `1/fps` time-base units.
frame_idx: i64,
/// Force the next submitted frame to be an IDR (set by [`request_keyframe`]).
force_kf: bool,
}
// `CudaHw` holds raw `AVBufferRef`s; the encoder lives on a single thread. The CPU encoder is
// already `Send` via ffmpeg-next; assert it for the CUDA fields too.
unsafe impl Send for NvencEncoder {}
impl NvencEncoder {
pub fn open(
codec: Codec,
format: PixelFormat,
width: u32,
height: u32,
fps: u32,
bitrate_bps: u64,
cuda: bool,
) -> Result<Self> {
ffmpeg::init().context("ffmpeg init")?;
if std::env::var_os("PUNKTFUNK_FFMPEG_DEBUG").is_some() {
unsafe { ffi::av_log_set_level(48) }; // AV_LOG_DEBUG — surface NVENC hw-frame rejects
}
let name = codec.nvenc_name();
let av_codec = encoder::find_by_name(name)
.ok_or_else(|| anyhow!("{name} not built into libavcodec"))?;
let (nvenc_pixel, expand) = nvenc_input(format);
let mut video = codec::context::Context::new_with_codec(av_codec)
.encoder()
.video()
.context("alloc video encoder")?;
video.set_width(width);
video.set_height(height);
video.set_format(nvenc_pixel); // NVENC converts RGB→YUV internally
video.set_time_base(Rational(1, fps as i32));
video.set_frame_rate(Some(Rational(fps as i32, 1)));
video.set_bit_rate(bitrate_bps as usize);
video.set_max_bit_rate(bitrate_bps as usize);
video.set_max_b_frames(0);
// Infinite GOP — NO periodic IDR. A keyframe at 5120x1440 is ~20-40x a P-frame, so a
// periodic IDR is a recurring multi-millisecond encode+packetize+send spike — the ~2s
// "freeze". NVENC emits one IDR at stream start, then P-frames only; `forced-idr` (below)
// turns a client recovery request (RFI, via `request_keyframe`) into an IDR on demand.
// This is the Moonlight/Sunshine low-latency model.
unsafe {
(*video.as_mut_ptr()).gop_size = -1;
}
// For the zero-copy path, take CUDA surfaces: wrap the shared CUcontext in CUDA
// hwdevice/hwframes contexts and set `pix_fmt = CUDA` on the raw encoder context
// *before* open (NVENC derives the device from `hw_frames_ctx`).
let cuda_hw = if cuda {
let cu_ctx = crate::zerocopy::cuda::context().context("shared CUDA context")?;
let hw = unsafe { CudaHw::new(cu_ctx, nvenc_pixel, width, height)? };
unsafe {
let raw = video.as_mut_ptr();
(*raw).pix_fmt = ffi::AVPixelFormat::AV_PIX_FMT_CUDA;
(*raw).hw_device_ctx = ffi::av_buffer_ref(hw.device_ref);
(*raw).hw_frames_ctx = ffi::av_buffer_ref(hw.frames_ref);
}
Some(hw)
} else {
None
};
// Low-latency NVENC tuning (plan §7 / linux-setup doc).
let mut opts = Dictionary::new();
opts.set("preset", "p1"); // fastest
opts.set("tune", "ull"); // ultra-low-latency
opts.set("rc", "cbr");
opts.set("bf", "0");
opts.set("delay", "0");
opts.set("forced-idr", "1"); // RFI/request_keyframe → real IDR under the infinite GOP
// Split-frame encode across both NVENC engines (GB203 has 2) when the pixel rate exceeds
// a single engine's HEVC capacity (~1 Gpix/s); e.g. 5120x1440@240 = 1.77 Gpix/s needs it,
// @120 = 0.88 Gpix/s does not. HEVC/AV1 only (not H.264). AUTO won't engage below ~2112px
// height, so we force `2`; below the threshold we leave it AUTO (split costs ~2% BD-rate).
// Output is standard HEVC — transparent to the client. Override with PUNKTFUNK_SPLIT_ENCODE.
let pix_rate = width as u64 * height as u64 * fps as u64;
let split = std::env::var("PUNKTFUNK_SPLIT_ENCODE").ok();
match split.as_deref() {
Some(mode) => opts.set("split_encode_mode", mode),
None if matches!(codec, Codec::H265 | Codec::Av1) && pix_rate > 1_000_000_000 => {
opts.set("split_encode_mode", "2");
tracing::info!(
pix_rate,
"NVENC: forcing 2-way split encode (high pixel rate)"
);
}
None => {}
}
let enc = video
.open_with(opts)
.with_context(|| format!("open {name} ({width}x{height}@{fps}, {bitrate_bps} bps)"))?;
let frame = if cuda {
None
} else {
Some(VideoFrame::new(nvenc_pixel, width, height))
};
Ok(NvencEncoder {
enc,
frame,
cuda: cuda_hw,
src_format: format,
expand,
width,
height,
fps,
frame_idx: 0,
force_kf: false,
})
}
}
impl Encoder for NvencEncoder {
fn submit(&mut self, captured: &CapturedFrame) -> Result<()> {
anyhow::ensure!(
captured.width == self.width && captured.height == self.height,
"captured frame {}x{} != encoder {}x{}",
captured.width,
captured.height,
self.width,
self.height
);
let pts = self.frame_idx;
self.frame_idx += 1;
// Force an IDR when requested (client RFI); otherwise let NVENC pick (GOP/P-frame).
let idr = self.force_kf;
self.force_kf = false;
match &captured.payload {
FramePayload::Cuda(buf) => self.submit_cuda(buf, pts, idr),
FramePayload::Cpu(bytes) => self.submit_cpu(bytes, captured.format, pts, idr),
}
}
fn request_keyframe(&mut self) {
self.force_kf = true;
}
fn poll(&mut self) -> Result<Option<EncodedFrame>> {
let mut pkt = Packet::empty();
match self.enc.receive_packet(&mut pkt) {
Ok(()) => {
let data = pkt.data().map(|d| d.to_vec()).unwrap_or_default();
let pts = pkt.pts().unwrap_or(0).max(0) as u64;
let pts_ns = pts * 1_000_000_000 / self.fps as u64;
Ok(Some(EncodedFrame {
data,
pts_ns,
keyframe: pkt.is_key(),
}))
}
// No packet ready yet (need another input frame).
Err(ffmpeg::Error::Other { errno })
if errno == ffmpeg::util::error::EAGAIN
|| errno == ffmpeg::util::error::EWOULDBLOCK =>
{
Ok(None)
}
// Fully drained after flush().
Err(ffmpeg::Error::Eof) => Ok(None),
Err(e) => Err(e).context("receive_packet"),
}
}
fn flush(&mut self) -> Result<()> {
self.enc.send_eof().context("send_eof")?;
Ok(())
}
}
impl NvencEncoder {
/// CPU path: expand/copy the packed RGB/BGR bytes into the reusable 4-bpp frame, then send.
fn submit_cpu(&mut self, bytes: &[u8], format: PixelFormat, pts: i64, idr: bool) -> Result<()> {
anyhow::ensure!(
format == self.src_format,
"captured format {:?} != encoder source {:?}",
format,
self.src_format
);
let w = self.width as usize;
let h = self.height as usize;
let src_bpp = self.src_format.bytes_per_pixel();
let src_row = w * src_bpp;
anyhow::ensure!(
bytes.len() >= src_row * h,
"captured buffer {} bytes < required {}",
bytes.len(),
src_row * h
);
let frame = self
.frame
.as_mut()
.context("CPU frame missing (encoder opened in CUDA mode)")?;
let stride = frame.stride(0); // dst is 4-bpp, aligned
let dst = frame.data_mut(0);
if self.expand {
// packed 3-bpp RGB/BGR → 4-bpp *0 (copy 3 bytes, zero the pad byte)
for y in 0..h {
let s = &bytes[y * src_row..y * src_row + src_row];
let drow = &mut dst[y * stride..y * stride + w * 4];
for x in 0..w {
drow[x * 4..x * 4 + 3].copy_from_slice(&s[x * 3..x * 3 + 3]);
drow[x * 4 + 3] = 0;
}
}
} else {
// 4-bpp → 4-bpp, honoring the (possibly larger) dst stride
for y in 0..h {
dst[y * stride..y * stride + src_row]
.copy_from_slice(&bytes[y * src_row..y * src_row + src_row]);
}
}
frame.set_pts(Some(pts));
frame.set_kind(if idr {
ffmpeg::picture::Type::I
} else {
ffmpeg::picture::Type::None
});
self.enc.send_frame(frame).context("send_frame")?;
Ok(())
}
/// Zero-copy path: hand the imported CUDA device buffer to NVENC with no CPU touch.
///
/// We take a *pooled* surface from the CUDA hwframes context (`av_hwframe_get_buffer`) and
/// device→device-copy our imported buffer into it, rather than wrapping our own pointer in a
/// bare frame. Two reasons: (1) NVENC's `nvenc_send_frame` ignores frames whose `buf[0]` is
/// null and the generic encode path's `av_frame_ref` needs a refcounted buffer — a bare
/// frame is rejected with `EINVAL`; (2) NVENC caches CUDA-resource *registrations* keyed by
/// device pointer with a bounded table, so a fresh pointer every frame would thrash/overflow
/// it — the pool recycles a small set of pointers. The extra copy is device-local (~8 MB at
/// 1080p, sub-millisecond on the GPU) and keeps the host fully off the pixel path.
fn submit_cuda(
&mut self,
buf: &crate::zerocopy::DeviceBuffer,
pts: i64,
idr: bool,
) -> Result<()> {
let frames_ref = self
.cuda
.as_ref()
.context("CUDA hw context missing (encoder opened in CPU mode)")?
.frames_ref;
// The device→device copy below uses our shared context directly; make it current on the
// encode thread (ffmpeg pushes its own around the pool alloc, so order is fine).
crate::zerocopy::cuda::make_current().context("CUDA context current (encode thread)")?;
unsafe {
let mut f = ffi::av_frame_alloc();
if f.is_null() {
bail!("av_frame_alloc failed");
}
// Pooled CUDA surface: sets format, width/height, data[0]/linesize[0], buf[0] and
// hw_frames_ctx. Reused across frames (the pool recycles), keeping NVENC's
// registration cache warm.
let r = ffi::av_hwframe_get_buffer(frames_ref, f, 0);
if r < 0 {
ffi::av_frame_free(&mut f);
bail!("av_hwframe_get_buffer(CUDA) failed ({r})");
}
let dst_ptr = (*f).data[0] as crate::zerocopy::cuda::CUdeviceptr;
let dst_pitch = (*f).linesize[0] as usize;
if let Err(e) = crate::zerocopy::cuda::copy_device_to_device(buf, dst_ptr, dst_pitch) {
ffi::av_frame_free(&mut f);
return Err(e).context("copy imported buffer into NVENC surface");
}
(*f).pts = pts;
(*f).pict_type = if idr {
ffi::AVPictureType::AV_PICTURE_TYPE_I
} else {
ffi::AVPictureType::AV_PICTURE_TYPE_NONE
};
let r = ffi::avcodec_send_frame(self.enc.as_mut_ptr(), f);
ffi::av_frame_free(&mut f);
if r < 0 {
bail!("avcodec_send_frame(CUDA) failed ({r})");
}
}
Ok(())
}
}
@@ -0,0 +1,143 @@
//! The app catalog: what `/applist` advertises and what `/launch?appid=N` selects. Each entry
//! maps to a session recipe — which compositor backend hosts it and (for gamescope) which
//! command runs nested. Loaded from `~/.config/punktfunk/apps.json`; sensible defaults otherwise.
//!
//! ```json
//! [ {"id":1,"title":"Desktop"},
//! {"id":2,"title":"Steam","compositor":"gamescope","cmd":"steam -gamepadui"} ]
//! ```
use serde_json::Value;
#[derive(Clone, Debug)]
pub struct AppEntry {
pub id: u32,
pub title: String,
/// `None` = auto-detect (the desktop session's compositor).
pub compositor: Option<crate::vdisplay::Compositor>,
/// Command gamescope runs nested (gamescope entries only).
pub cmd: Option<String>,
}
fn config_path() -> Option<std::path::PathBuf> {
Some(std::path::Path::new(&std::env::var("HOME").ok()?).join(".config/punktfunk/apps.json"))
}
fn parse_compositor(s: &str) -> Option<crate::vdisplay::Compositor> {
use crate::vdisplay::Compositor::*;
match s.to_ascii_lowercase().as_str() {
"kwin" | "kde" => Some(Kwin),
"mutter" | "gnome" => Some(Mutter),
"gamescope" => Some(Gamescope),
"wlroots" | "sway" => Some(Wlroots),
_ => None,
}
}
/// The catalog: the user's `apps.json` if present, else defaults (Desktop, plus gamescope
/// entries when gamescope is installed).
pub fn catalog() -> Vec<AppEntry> {
if let Some(path) = config_path() {
if let Ok(raw) = std::fs::read_to_string(&path) {
match serde_json::from_str::<Value>(&raw) {
Ok(Value::Array(items)) => {
let apps: Vec<AppEntry> = items
.iter()
.filter_map(|it| {
Some(AppEntry {
id: it.get("id")?.as_u64()? as u32,
title: it.get("title")?.as_str()?.to_string(),
compositor: it
.get("compositor")
.and_then(|c| c.as_str())
.and_then(parse_compositor),
cmd: it.get("cmd").and_then(|c| c.as_str()).map(String::from),
})
})
.collect();
if !apps.is_empty() {
return apps;
}
tracing::warn!(path = %path.display(), "apps.json parsed to zero entries — using defaults");
}
_ => {
tracing::warn!(path = %path.display(), "apps.json malformed — using defaults")
}
}
}
}
let mut apps = vec![AppEntry {
id: 1,
title: "Desktop".into(),
compositor: None,
cmd: None,
}];
if which("gamescope") {
if which("steam") {
apps.push(AppEntry {
id: 2,
title: "Steam".into(),
compositor: Some(crate::vdisplay::Compositor::Gamescope),
cmd: Some("steam -gamepadui".into()),
});
}
if which("vkcube") {
apps.push(AppEntry {
id: 3,
title: "vkcube (test)".into(),
compositor: Some(crate::vdisplay::Compositor::Gamescope),
cmd: Some("vkcube".into()),
});
}
}
apps
}
pub fn by_id(id: u32) -> Option<AppEntry> {
catalog().into_iter().find(|a| a.id == id)
}
/// Render the GameStream `/applist` XML.
pub fn applist_xml() -> String {
let mut xml =
String::from("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<root status_code=\"200\">\n");
for app in catalog() {
xml.push_str(&format!(
"<App>\n<IsHdrSupported>0</IsHdrSupported>\n<AppTitle>{}</AppTitle>\n<ID>{}</ID>\n</App>\n",
xml_escape(&app.title),
app.id
));
}
xml.push_str("</root>\n");
xml
}
fn xml_escape(s: &str) -> String {
s.replace('&', "&amp;")
.replace('<', "&lt;")
.replace('>', "&gt;")
}
fn which(bin: &str) -> bool {
std::env::var_os("PATH")
.is_some_and(|paths| std::env::split_paths(&paths).any(|d| d.join(bin).is_file()))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn default_catalog_has_desktop() {
let apps = catalog();
assert!(apps.iter().any(|a| a.id == 1 && a.title == "Desktop"));
}
#[test]
fn applist_xml_is_wellformed_ish() {
let xml = applist_xml();
assert!(xml.contains("<AppTitle>Desktop</AppTitle>"));
assert!(xml.starts_with("<?xml"));
assert_eq!(xml.matches("<App>").count(), xml.matches("</App>").count());
}
}
@@ -0,0 +1,188 @@
//! The audio data plane (UDP 48000). On RTSP PLAY we learn the client's audio endpoint from
//! its port-learning ping, capture the default-sink monitor, Opus-encode 5 ms stereo frames,
//! and send each as a GameStream RTP audio packet.
//!
//! Wire format (moonlight-common-c `AudioStream.c`): a 12-byte big-endian `RTP_PACKET`
//! (`packetType = 97`, `sequenceNumber++`, `timestamp += packetDuration`, `ssrc = 0`)
//! followed by the AES-128-CBC-encrypted Opus payload. Stereo Opus is a single coupled
//! multistream, so a plain `opus_encode` bitstream is what the client's multistream decoder
//! expects. Like the control stream, modern Moonlight always AES-CBC-decrypts audio (it
//! reports "Failed to decrypt audio packet" on plaintext), so we encrypt the payload under the
//! `/launch` `rikey` with a per-packet IV `BE32(rikeyid + seq)` (PKCS7 padding, RTP header
//! left in the clear). Reed-Solomon audio FEC is layered on top in P1.5.
use super::AUDIO_PORT;
use crate::audio::{self, AudioCapturer, CHANNELS, SAMPLE_RATE};
use anyhow::{Context, Result};
use cbc::cipher::{block_padding::Pkcs7, BlockEncryptMut, KeyIvInit};
use opus::{Application, Bitrate, Channels, Encoder};
use std::net::UdpSocket;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;
use std::time::{Duration, Instant};
type Aes128CbcEnc = cbc::Encryptor<aes::Aes128>;
/// Opus frame duration; 5 ms is moonlight's default (`x-nv-aqos.packetDuration`).
const FRAME_MS: usize = 5;
/// Samples per channel per Opus frame (48 kHz · 5 ms = 240).
const SAMPLES_PER_FRAME: usize = SAMPLE_RATE as usize * FRAME_MS / 1000;
/// RTP payload type for audio (moonlight `AudioStream.c` checks `packetType == 97`).
const AUDIO_PACKET_TYPE: u8 = 97;
const OPUS_BITRATE: i32 = 128_000;
/// Slot for the persistent audio capturer, reused across streams (no leaked PipeWire thread).
pub type AudioCapSlot = Arc<std::sync::Mutex<Option<Box<dyn AudioCapturer>>>>;
/// Spawn the audio stream thread (idempotent via `running`). Stops when `running` clears.
/// `gcm_key`/`rikeyid` come from `/launch` and key the AES-CBC payload encryption.
pub fn start(running: Arc<AtomicBool>, gcm_key: [u8; 16], rikeyid: i32, audio_cap: AudioCapSlot) {
let _ = std::thread::Builder::new()
.name("punktfunk-audio".into())
.spawn(move || {
tracing::info!("audio stream starting");
if let Err(e) = run(&running, &gcm_key, rikeyid, &audio_cap) {
tracing::error!(error = %format!("{e:#}"), "audio stream failed");
}
running.store(false, Ordering::SeqCst);
tracing::info!("audio stream stopped");
});
}
fn run(
running: &AtomicBool,
gcm_key: &[u8; 16],
rikeyid: i32,
audio_cap: &std::sync::Mutex<Option<Box<dyn AudioCapturer>>>,
) -> Result<()> {
let sock = UdpSocket::bind(("0.0.0.0", AUDIO_PORT)).context("bind audio UDP")?;
// The client pings the audio port (~every 500ms) so we learn where to send.
sock.set_read_timeout(Some(Duration::from_secs(10)))?;
tracing::info!(port = AUDIO_PORT, "audio: awaiting client ping");
let mut probe = [0u8; 256];
let (_, client) = sock
.recv_from(&mut probe)
.context("audio: no client ping within 10s")?;
sock.connect(client)
.context("connect client audio endpoint")?;
tracing::info!(%client, "audio: client endpoint learned");
// Reuse the persistent capturer (create on first stream); drain stale buffered audio.
let mut cap = match audio_cap.lock().unwrap().take() {
Some(mut c) => {
c.drain();
c
}
None => audio::open_audio_capture().context("open audio capture")?,
};
let result = audio_body(&mut *cap, &sock, gcm_key, rikeyid, running);
*audio_cap.lock().unwrap() = Some(cap);
result
}
fn audio_body(
cap: &mut dyn AudioCapturer,
sock: &UdpSocket,
gcm_key: &[u8; 16],
rikeyid: i32,
running: &AtomicBool,
) -> Result<()> {
// RESTRICTED_LOWDELAY + CBR, matching Sunshine — CBR keeps the Opus TOC byte constant,
// which the client asserts per stream.
let mut enc = Encoder::new(SAMPLE_RATE, Channels::Stereo, Application::LowDelay)
.context("create Opus encoder")?;
enc.set_bitrate(Bitrate::Bits(OPUS_BITRATE)).ok();
enc.set_vbr(false).ok();
let frame_len = SAMPLES_PER_FRAME * CHANNELS; // interleaved samples per Opus frame
let mut acc: Vec<f32> = Vec::with_capacity(frame_len * 4);
let mut out = vec![0u8; 1400];
let mut seq: u16 = 0;
let mut timestamp: u32 = 0;
let mut sent: u64 = 0;
// Pacing anchor: PipeWire hands us large capture buffers (~1024 frames), so we'd otherwise
// emit packets in bursts the client's low-latency jitter buffer hears as glitching. Emit
// each frame at its 5 ms slot instead. Production is real-time, so the backlog stays small.
let start = Instant::now();
let mut frame_no: u64 = 0;
// Optional linear gain for quiet capture sources (PUNKTFUNK_AUDIO_GAIN, default 1.0).
let gain: f32 = std::env::var("PUNKTFUNK_AUDIO_GAIN")
.ok()
.and_then(|v| v.parse().ok())
.unwrap_or(1.0);
while running.load(Ordering::SeqCst) {
let chunk = cap.next_chunk().context("capture audio chunk")?;
acc.extend_from_slice(&chunk);
while acc.len() >= frame_len {
let mut frame: Vec<f32> = acc.drain(..frame_len).collect();
if gain != 1.0 {
for s in &mut frame {
*s = (*s * gain).clamp(-1.0, 1.0);
}
}
let n = enc.encode_float(&frame, &mut out).context("opus encode")?;
// AES-128-CBC the Opus payload (RTP header stays plaintext). Per-packet IV =
// BE32(rikeyid + seq) in [0..4], zero elsewhere; PKCS7 padding.
let iv_seq = (rikeyid as u32).wrapping_add(seq as u32);
let mut iv = [0u8; 16];
iv[0..4].copy_from_slice(&iv_seq.to_be_bytes());
let ct = Aes128CbcEnc::new(gcm_key.into(), (&iv).into())
.encrypt_padded_vec_mut::<Pkcs7>(&out[..n]);
let pkt = build_rtp(seq, timestamp, &ct);
if sock.send(&pkt).is_err() {
tracing::info!(sent, "audio: client unreachable — stopping");
return Ok(());
}
seq = seq.wrapping_add(1);
// GameStream's audio RTP timestamp ticks by packetDuration (ms), not by samples.
timestamp = timestamp.wrapping_add(FRAME_MS as u32);
sent += 1;
if sent % 400 == 0 {
tracing::info!(sent, "audio: streaming");
}
// Hold each frame to its 5 ms slot (skip if we've fallen behind a burst).
frame_no += 1;
let scheduled = start + Duration::from_millis(5 * frame_no);
let now = Instant::now();
if scheduled > now {
std::thread::sleep((scheduled - now).min(Duration::from_millis(20)));
}
}
}
Ok(())
}
/// Build a GameStream RTP audio packet: 12-byte BE `RTP_PACKET` header + Opus payload.
fn build_rtp(seq: u16, timestamp: u32, opus: &[u8]) -> Vec<u8> {
let mut p = Vec::with_capacity(12 + opus.len());
p.push(0x80); // RTP version 2, no padding/extension/CSRC
p.push(AUDIO_PACKET_TYPE);
p.extend_from_slice(&seq.to_be_bytes());
p.extend_from_slice(&timestamp.to_be_bytes());
p.extend_from_slice(&0u32.to_be_bytes()); // ssrc
p.extend_from_slice(opus);
p
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn rtp_header_layout() {
let p = build_rtp(0x0102, 0x03040506, &[0xaa, 0xbb]);
assert_eq!(p[0], 0x80);
assert_eq!(p[1], 97);
assert_eq!(&p[2..4], &[0x01, 0x02]); // seq BE
assert_eq!(&p[4..8], &[0x03, 0x04, 0x05, 0x06]); // timestamp BE
assert_eq!(&p[8..12], &[0, 0, 0, 0]); // ssrc
assert_eq!(&p[12..], &[0xaa, 0xbb]); // opus payload
}
#[test]
fn frame_sizing() {
assert_eq!(SAMPLES_PER_FRAME, 240);
}
}
@@ -0,0 +1,86 @@
//! The host's self-signed RSA-2048 identity: the cert returned to clients as `plaincert`
//! during pairing AND presented as the TLS server cert on 47984 (Moonlight pins it). The
//! cert's own X.509 signature bytes are an input to the pairing hashes, so we extract them.
use super::config_dir;
use anyhow::{anyhow, Context, Result};
use rsa::pkcs1v15::SigningKey;
use rsa::pkcs8::DecodePrivateKey;
use rsa::RsaPrivateKey;
use sha2::Sha256;
use std::fs;
pub struct ServerIdentity {
/// PEM of the cert (returned hex-encoded as `plaincert`; also the TLS server cert).
pub cert_pem: String,
/// PKCS#8 PEM of the private key (TLS server key).
pub key_pem: String,
/// The cert's X.509 `signatureValue` bytes — bound into the pairing challenge hashes.
pub signature: Vec<u8>,
/// RSA-PKCS1v15-SHA256 signer over the host key (the pairing `sign256`).
pub signing_key: SigningKey<Sha256>,
}
impl ServerIdentity {
pub fn load_or_create() -> Result<ServerIdentity> {
let dir = config_dir();
let cert_path = dir.join("cert.pem");
let key_path = dir.join("key.pem");
let (cert_pem, key_pem) = match (
fs::read_to_string(&cert_path),
fs::read_to_string(&key_path),
) {
(Ok(c), Ok(k)) if !c.trim().is_empty() && !k.trim().is_empty() => (c, k),
_ => {
let (c, k) = generate()?;
fs::create_dir_all(&dir).ok();
fs::write(&cert_path, &c)
.with_context(|| format!("write {}", cert_path.display()))?;
fs::write(&key_path, &k)
.with_context(|| format!("write {}", key_path.display()))?;
tracing::info!(path = %cert_path.display(), "generated punktfunk host certificate (RSA-2048)");
(c, k)
}
};
Self::from_pems(cert_pem, key_pem)
}
/// Build an identity from PEMs (no I/O).
pub fn from_pems(cert_pem: String, key_pem: String) -> Result<ServerIdentity> {
let priv_key = RsaPrivateKey::from_pkcs8_pem(&key_pem).context("parse host private key")?;
let signing_key = SigningKey::<Sha256>::new(priv_key);
let signature = cert_signature(&cert_pem)?;
Ok(ServerIdentity {
cert_pem,
key_pem,
signature,
signing_key,
})
}
/// Throwaway in-memory identity — nothing touches the config dir (used by tests).
pub fn ephemeral() -> Result<ServerIdentity> {
let (cert_pem, key_pem) = generate()?;
Self::from_pems(cert_pem, key_pem)
}
}
fn generate() -> Result<(String, String)> {
let key = rcgen::KeyPair::generate_for(&rcgen::PKCS_RSA_SHA256).context("rcgen RSA keygen")?;
let mut params = rcgen::CertificateParams::new(Vec::<String>::new()).context("cert params")?;
params
.distinguished_name
.push(rcgen::DnType::CommonName, "punktfunk");
params.not_before = rcgen::date_time_ymd(2020, 1, 1);
params.not_after = rcgen::date_time_ymd(2040, 1, 1);
let cert = params.self_signed(&key).context("self-sign cert")?;
Ok((cert.pem(), key.serialize_pem()))
}
/// Extract the X.509 `signatureValue` bytes from a cert PEM.
fn cert_signature(cert_pem: &str) -> Result<Vec<u8>> {
let (_, pem) = x509_parser::pem::parse_x509_pem(cert_pem.as_bytes())
.map_err(|e| anyhow!("parse cert pem: {e}"))?;
let x509 = pem.parse_x509().context("parse x509")?;
Ok(x509.signature_value.data.to_vec())
}
@@ -0,0 +1,428 @@
//! The GameStream control stream: an ENet host on UDP 47999. Moonlight connects this
//! BEFORE the video stream starts (`STAGE_CONTROL_STREAM_START` precedes
//! `STAGE_VIDEO_STREAM_START`), so it must be up or the whole connection aborts. It carries
//! input (mouse/keyboard/gamepad), keepalives, and QoS feedback.
//!
//! Sunshine-mode hosts (we advertise `state=SUNSHINE_SERVER_FREE`) make Moonlight encrypt the
//! control stream with AES-128-GCM under the `/launch` `rikey`, even though we negotiate no
//! media encryption. Wire framing (all little-endian):
//!
//! ```text
//! u16 encType = 0x0001 | u16 length | u32 seq | [16-byte GCM tag] | ciphertext
//! length = sizeof(seq) + 16 (tag) + plaintext
//! ```
//!
//! The GCM nonce depends on what Moonlight negotiated (`encryptControlMessage` in
//! moonlight-common-c). For `SS_ENC_CONTROL_V2` it is a 12-byte nonce with `seq` (LE) in bytes
//! [0..4] and `b"CC"` (client→host) at [10..12]. For the legacy path — which we hit, since we
//! advertise no encryption — it is a 16-byte nonce with only `iv[0] = seq & 0xff` and the rest
//! zero. The tag is prepended to the ciphertext; there is no AAD; the key is the forward
//! `hex::decode(rikey)`. We auto-detect the exact scheme via [`decrypt_control`] on the first
//! packet that authenticates, since GCM gives no partial credit.
//!
//! Runs on its own native thread for the host's lifetime.
use super::{AppState, CONTROL_PORT};
use crate::inject::gamepad::GamepadManager;
use crate::inject::InputInjector;
use anyhow::{anyhow, Context, Result};
use rusty_enet::{Event, Host, HostSettings, Packet, PeerID};
use std::net::UdpSocket;
use std::sync::Arc;
use std::time::Duration;
/// Bind the ENet control host on 47999 and service it forever on a dedicated thread.
pub fn spawn(state: Arc<AppState>) -> Result<()> {
let socket = UdpSocket::bind(("0.0.0.0", CONTROL_PORT)).context("bind control UDP")?;
socket
.set_nonblocking(true)
.context("control socket nonblocking")?;
let mut host = Host::new(
socket,
HostSettings {
peer_limit: 4,
// Moonlight connects with CTRL_CHANNEL_COUNT (0x30) channels and sends gamepad
// input on channel 0x10+n — a smaller limit silently discards controller input.
channel_limit: 0x30,
..Default::default()
},
)
.map_err(|e| anyhow!("ENet host init: {e:?}"))?;
tracing::info!(port = CONTROL_PORT, "ENet control listening");
std::thread::Builder::new()
.name("punktfunk-control".into())
.spawn(move || {
// Thread-local (the injector owns non-Send Wayland/xkb state, so it must be
// created and live here rather than be captured into the closure).
// GCM scheme detected from the first authenticating packet; reused thereafter.
let mut detected: Option<Scheme> = None;
// Lazily opened on the first input event (Sway's Wayland socket is up by then).
let mut injector: Option<Box<dyn InputInjector>> = None;
// Virtual gamepads (uinput) + the host→client rumble sequence counter.
let mut pads = GamepadManager::new();
let mut rumble_seq: u32 = 0;
let mut peer: Option<PeerID> = None;
loop {
loop {
match host.service() {
Ok(Some(event)) => match event {
Event::Connect { peer: p, .. } => {
tracing::info!("control: client connected");
peer = Some(p.id());
}
Event::Disconnect { .. } => {
tracing::info!("control: client disconnected");
detected = None;
peer = None;
// Unplug the session's virtual pads.
pads = GamepadManager::new();
}
Event::Receive {
channel_id, packet, ..
} => {
on_receive(
&state,
channel_id,
packet.data(),
&mut detected,
&mut injector,
&mut pads,
);
}
},
Ok(None) => break,
Err(e) => {
tracing::warn!(error = %format!("{e:?}"), "control: service error");
break;
}
}
}
// Service the pads' force-feedback protocol every tick (games block inside
// EVIOCSFF until answered) and relay mixed rumble levels to the client.
if let (Some(pid), Some(scheme)) = (peer, detected) {
let key = state.launch.lock().unwrap().map(|s| s.gcm_key);
if let Some(key) = key {
let mut out: Vec<Vec<u8>> = Vec::new();
pads.pump_rumble(|index, low, high| {
let pt = super::gamepad::rumble_plaintext(index, low, high);
out.push(encrypt_control(&key, &scheme, rumble_seq, &pt));
rumble_seq = rumble_seq.wrapping_add(1);
});
for wire in out {
if let Err(e) = host.peer_mut(pid).send(0, &Packet::reliable(&wire[..]))
{
tracing::warn!(error = %format!("{e:?}"), "rumble send failed");
}
}
}
} else {
// No client/scheme yet: still answer FF uploads so games don't block.
pads.pump_rumble(|_, _, _| {});
}
// ENet needs frequent servicing for handshake/keepalive/retransmit.
std::thread::sleep(Duration::from_millis(2));
}
})
.context("spawn control thread")?;
Ok(())
}
/// Handle one received control packet: decrypt it (learning the GCM scheme on the first one),
/// decode any input event, and inject it into the host session.
fn on_receive(
state: &AppState,
_channel_id: u8,
d: &[u8],
detected: &mut Option<Scheme>,
injector: &mut Option<Box<dyn InputInjector>>,
pads: &mut GamepadManager,
) {
let Some(key) = state.launch.lock().unwrap().map(|s| s.gcm_key) else {
return; // control traffic before /launch — no key yet
};
// Encrypted control packets begin with u16 LE encType = 0x0001 and an 8-byte header.
if d.len() < 8 || d[0] != 0x01 || d[1] != 0x00 {
return;
}
let pt = match decrypt_control(&key, d, detected) {
Some((scheme, pt)) => {
if detected.is_none() {
tracing::info!(?scheme, "control: GCM scheme locked in");
}
*detected = Some(scheme);
pt
}
None => {
tracing::warn!(len = d.len(), "control: GCM decrypt failed");
return;
}
};
// Recovery requests after loss: invalidate-reference-frames (0x0301, Gen7) or request-IDR
// (0x0302, Gen7Enc). Force a keyframe so the client can resync without a multi-second stall.
if pt.len() >= 2 {
let inner = u16::from_le_bytes([pt[0], pt[1]]);
if matches!(inner, 0x0301 | 0x0302 | 0x0305) {
state
.force_idr
.store(true, std::sync::atomic::Ordering::SeqCst);
tracing::info!(
ty = format!("{inner:#06x}"),
"control: IDR/RFI request → keyframe"
);
return;
}
}
// Controller events go to the uinput virtual pads (created on demand per the mask).
if let Some(gp) = super::gamepad::decode(&pt) {
pads.handle(&gp);
return;
}
let events = super::input::decode(&pt);
if events.is_empty() {
return; // keepalive / QoS / unhandled input kind
}
// Open the injector on demand — by the first input event the compositor session is up.
// Backend auto-selects per desktop (wlr on Sway, libei on KWin/GNOME); override with
// PUNKTFUNK_INPUT_BACKEND.
if injector.is_none() {
let backend = crate::inject::default_backend();
match crate::inject::open(backend) {
Ok(i) => {
tracing::info!(?backend, "input injection backend opened");
*injector = Some(i);
}
Err(e) => {
tracing::error!(error = %format!("{e:#}"), "input injection unavailable");
return;
}
}
}
let inj = injector.as_mut().unwrap();
for ev in events {
if let Err(e) = inj.inject(&ev) {
tracing::warn!(error = %format!("{e:#}"), "inject failed");
}
}
}
/// How a control packet's nonce is built — Moonlight picks one based on the negotiated flags.
#[derive(Clone, Copy, Debug)]
enum NonceKind {
/// `SS_ENC_CONTROL_V2`: 12-byte nonce, `seq` in [0..4], marker bytes at [10..12].
V2 { seq_be: bool, marker: [u8; 2] },
/// Legacy: 16-byte nonce, only `iv[0] = seq & 0xff` (the rest zero).
LegacyLowByte,
/// Legacy variant: 16-byte nonce, full `seq` in [0..4] (the rest zero).
Legacy16Seq { seq_be: bool },
}
impl NonceKind {
fn nonce(&self, seq: u32) -> Vec<u8> {
let seq_bytes = |be: bool| {
if be {
seq.to_be_bytes()
} else {
seq.to_le_bytes()
}
};
match *self {
NonceKind::V2 { seq_be, marker } => {
let mut iv = vec![0u8; 12];
iv[0..4].copy_from_slice(&seq_bytes(seq_be));
iv[10] = marker[0];
iv[11] = marker[1];
iv
}
NonceKind::LegacyLowByte => {
let mut iv = vec![0u8; 16];
iv[0] = (seq & 0xff) as u8;
iv
}
NonceKind::Legacy16Seq { seq_be } => {
let mut iv = vec![0u8; 16];
iv[0..4].copy_from_slice(&seq_bytes(seq_be));
iv
}
}
}
}
/// The byte-exact GCM scheme that opened a control packet. Determined empirically once per
/// connection (AES-GCM gives no partial credit, so an authenticating combination is proof).
#[derive(Clone, Copy, Debug)]
struct Scheme {
/// `gcm_key` is byte-reversed before use (defensive; Sunshine's net effect is forward).
key_rev: bool,
nonce: NonceKind,
/// GCM tag sits before the ciphertext (vs after).
tag_first: bool,
aad: Aad,
}
#[derive(Clone, Copy, Debug)]
enum Aad {
None,
/// The 4-byte cleartext header prefix (encType + length), `d[0..4]`.
Header4,
}
impl Scheme {
fn key(&self, base: &[u8; 16]) -> [u8; 16] {
let mut k = *base;
if self.key_rev {
k.reverse();
}
k
}
}
/// Open an encrypted control packet `d` (8-byte cleartext header + `[tag?][ciphertext]`). If
/// `detected` is set only that scheme is tried (fast path); otherwise the full cross-product
/// of plausible schemes (nonce construction × key byte-order × tag position × AAD) is swept
/// and the combination whose GCM tag authenticates is returned.
fn decrypt_control(
key: &[u8; 16],
d: &[u8],
detected: &Option<Scheme>,
) -> Option<(Scheme, Vec<u8>)> {
let seq = u32::from_le_bytes([d[4], d[5], d[6], d[7]]);
let payload = &d[8..];
if payload.len() < 16 {
return None;
}
let attempt = |s: Scheme| -> Option<Vec<u8>> {
// aes-gcm wants `ciphertext || tag`; reassemble from whichever wire order this is.
let (ct, tag) = if s.tag_first {
(&payload[16..], &payload[..16])
} else {
(
&payload[..payload.len() - 16],
&payload[payload.len() - 16..],
)
};
let mut ct_tag = Vec::with_capacity(ct.len() + 16);
ct_tag.extend_from_slice(ct);
ct_tag.extend_from_slice(tag);
let aad: &[u8] = match s.aad {
Aad::None => &[],
Aad::Header4 => &d[0..4],
};
gcm_open(&s.key(key), &s.nonce.nonce(seq), &ct_tag, aad)
};
if let Some(s) = *detected {
return attempt(s).map(|pt| (s, pt));
}
// Candidate nonce constructions, most-likely first.
const MARKERS: [[u8; 2]; 3] = [*b"CC", *b"HC", *b"CH"];
let mut kinds: Vec<NonceKind> = vec![NonceKind::LegacyLowByte];
for seq_be in [false, true] {
for marker in MARKERS {
kinds.push(NonceKind::V2 { seq_be, marker });
}
kinds.push(NonceKind::Legacy16Seq { seq_be });
}
for &nonce in &kinds {
for key_rev in [false, true] {
for tag_first in [true, false] {
for aad in [Aad::None, Aad::Header4] {
let s = Scheme {
key_rev,
nonce,
tag_first,
aad,
};
if let Some(pt) = attempt(s) {
return Some((s, pt));
}
}
}
}
}
None
}
/// Seal a host→client control message, mirroring the client's `detected` scheme with the
/// direction flipped: V2 nonces use marker `H?` (host-originated) instead of `C?`; legacy
/// nonces keep their construction with our own independent `seq` counter. Wire layout matches
/// what the client sends us: `[0x0001][length][seq][tag|ct per scheme.tag_first]`.
fn encrypt_control(key: &[u8; 16], scheme: &Scheme, seq: u32, pt: &[u8]) -> Vec<u8> {
let nonce_kind = match scheme.nonce {
NonceKind::V2 { seq_be, marker } => NonceKind::V2 {
seq_be,
marker: [b'H', marker[1]],
},
other => other,
};
let length = (4 + 16 + pt.len()) as u16;
let mut wire = Vec::with_capacity(8 + 16 + pt.len());
wire.extend_from_slice(&0x0001u16.to_le_bytes());
wire.extend_from_slice(&length.to_le_bytes());
wire.extend_from_slice(&seq.to_le_bytes());
let aad: Vec<u8> = match scheme.aad {
Aad::None => Vec::new(),
Aad::Header4 => wire[0..4].to_vec(),
};
let ct_tag = gcm_seal(&scheme.key(key), &nonce_kind.nonce(seq), pt, &aad);
let (ct, tag) = ct_tag.split_at(ct_tag.len() - 16);
if scheme.tag_first {
wire.extend_from_slice(tag);
wire.extend_from_slice(ct);
} else {
wire.extend_from_slice(ct);
wire.extend_from_slice(tag);
}
wire
}
/// AES-128-GCM seal (companion to [`gcm_open`]); returns `ciphertext || tag`.
fn gcm_seal(key: &[u8; 16], nonce: &[u8], pt: &[u8], aad: &[u8]) -> Vec<u8> {
use aes_gcm::aead::consts::{U12, U16};
use aes_gcm::aead::generic_array::GenericArray;
use aes_gcm::aead::{Aead, KeyInit, Payload};
use aes_gcm::{aes::Aes128, AesGcm};
let p = Payload { msg: pt, aad };
match nonce.len() {
12 => AesGcm::<Aes128, U12>::new_from_slice(key)
.unwrap()
.encrypt(GenericArray::from_slice(nonce), p)
.expect("GCM seal"),
16 => AesGcm::<Aes128, U16>::new_from_slice(key)
.unwrap()
.encrypt(GenericArray::from_slice(nonce), p)
.expect("GCM seal"),
_ => unreachable!("nonce length"),
}
}
/// AES-128-GCM open with a 12- or 16-byte nonce and explicit AAD. Returns the plaintext iff
/// the tag authenticates. `ct_tag` is `ciphertext || tag` (aes-gcm's expected order).
fn gcm_open(key: &[u8; 16], nonce: &[u8], ct_tag: &[u8], aad: &[u8]) -> Option<Vec<u8>> {
use aes_gcm::aead::consts::{U12, U16};
use aes_gcm::aead::generic_array::GenericArray;
use aes_gcm::aead::{Aead, KeyInit, Payload};
use aes_gcm::{aes::Aes128, AesGcm};
let p = Payload { msg: ct_tag, aad };
match nonce.len() {
12 => AesGcm::<Aes128, U12>::new_from_slice(key)
.ok()?
.decrypt(GenericArray::from_slice(nonce), p)
.ok(),
16 => AesGcm::<Aes128, U16>::new_from_slice(key)
.ok()?
.decrypt(GenericArray::from_slice(nonce), p)
.ok(),
_ => None,
}
}
@@ -0,0 +1,60 @@
//! Pairing crypto primitives (control plane only — distinct from `punktfunk_core`'s AES-GCM
//! data-plane sealing). GameStream pairing uses: AES-128-**ECB** with **no padding**,
//! SHA-256 (host appversion major ≥ 7), and RSA-PKCS1v15-SHA256 signatures. See the
//! `serverinfo + pairing` section of `docs/research/gamestream-protocol-research.json`.
use aes::cipher::generic_array::GenericArray;
use aes::cipher::{BlockDecrypt, BlockEncrypt, KeyInit};
use aes::Aes128;
use rand::RngCore;
use sha2::{Digest, Sha256};
/// `n` cryptographically-random bytes.
pub fn random<const N: usize>() -> [u8; N] {
let mut b = [0u8; N];
rand::thread_rng().fill_bytes(&mut b);
b
}
/// SHA-256 over the concatenation of `parts`.
pub fn sha256(parts: &[&[u8]]) -> [u8; 32] {
let mut h = Sha256::new();
for p in parts {
h.update(p);
}
h.finalize().into()
}
/// The PIN-derived AES-128 key: `SHA-256(salt || pin)[..16]` (salt first, PIN as ASCII).
pub fn pin_key(salt: &[u8; 16], pin: &str) -> [u8; 16] {
let d = sha256(&[salt, pin.as_bytes()]);
let mut k = [0u8; 16];
k.copy_from_slice(&d[..16]);
k
}
/// AES-128-ECB encrypt, no padding: input is zero-extended to a 16-byte multiple.
pub fn ecb_encrypt(key: &[u8; 16], data: &[u8]) -> Vec<u8> {
let cipher = Aes128::new(GenericArray::from_slice(key));
let mut out = data.to_vec();
let rem = out.len() % 16;
if rem != 0 {
out.resize(out.len() + (16 - rem), 0);
}
for chunk in out.chunks_mut(16) {
cipher.encrypt_block(GenericArray::from_mut_slice(chunk));
}
out
}
/// AES-128-ECB decrypt, no padding: trailing bytes past the last whole block are ignored.
pub fn ecb_decrypt(key: &[u8; 16], data: &[u8]) -> Vec<u8> {
let cipher = Aes128::new(GenericArray::from_slice(key));
let mut out = Vec::with_capacity(data.len());
for chunk in data.chunks_exact(16) {
let mut block = *GenericArray::from_slice(chunk);
cipher.decrypt_block(&mut block);
out.extend_from_slice(&block);
}
out
}
@@ -0,0 +1,203 @@
//! Decode GameStream controller packets (carried on the same encrypted control stream as
//! mouse/keyboard — see [`super::input`]) into [`GamepadFrame`]s for the uinput virtual pads.
//!
//! Layouts mirror moonlight-common-c `Input.h` (all `#pragma pack(1)`; the `size` header field
//! is big-endian, everything else little-endian). We implement the Gen5+ `MULTI_CONTROLLER`
//! event (magic `0x0C`) — the only controller event Sunshine-class hosts receive — plus the
//! Sunshine-extension `CONTROLLER_ARRIVAL` (`0x55000004`). Because our serverinfo advertises a
//! Sunshine appversion (4th component negative), clients also send `buttonFlags2` (paddles /
//! touchpad-click / Share) inside the MC packet.
/// Inner control-message type for input (same as [`super::input`]).
const INPUT_DATA_TYPE: u16 = 0x0206;
/// `NV_INPUT_HEADER.magic` for the Gen5+ multi-controller event.
const MAGIC_MULTI_CONTROLLER: u32 = 0x0C;
/// Sunshine extension: controller arrival metadata (type/capabilities).
const MAGIC_CONTROLLER_ARRIVAL: u32 = 0x5500_0004;
/// Most controllers a session tracks (Sunshine's MAX_GAMEPADS).
pub const MAX_PADS: usize = 16;
/// One decoded controller event.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum GamepadEvent {
/// Full state of one controller + the set of attached controllers.
State(GamepadFrame),
/// Sunshine arrival metadata (precedes the first State for that pad).
Arrival {
index: u8,
/// 0 unknown, 1 xbox, 2 ps, 3 nintendo.
kind: u8,
/// LI_CCAP_* bits (0x02 = rumble).
capabilities: u16,
},
}
/// Snapshot of one controller's inputs (Moonlight conventions: sticks 32768..32767 with +Y
/// up, triggers 0..255, buttons = `buttonFlags | buttonFlags2 << 16`).
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
pub struct GamepadFrame {
pub index: i16,
/// Bit n set = controller n attached; a clear bit for an allocated pad means unplug.
pub active_mask: u16,
pub buttons: u32,
pub left_trigger: u8,
pub right_trigger: u8,
pub ls_x: i16,
pub ls_y: i16,
pub rs_x: i16,
pub rs_y: i16,
}
// buttonFlags bits (Limelight.h).
pub const BTN_DPAD_UP: u32 = 0x0001;
pub const BTN_DPAD_DOWN: u32 = 0x0002;
pub const BTN_DPAD_LEFT: u32 = 0x0004;
pub const BTN_DPAD_RIGHT: u32 = 0x0008;
pub const BTN_START: u32 = 0x0010;
pub const BTN_BACK: u32 = 0x0020;
pub const BTN_LS_CLK: u32 = 0x0040;
pub const BTN_RS_CLK: u32 = 0x0080;
pub const BTN_LB: u32 = 0x0100;
pub const BTN_RB: u32 = 0x0200;
pub const BTN_GUIDE: u32 = 0x0400;
pub const BTN_A: u32 = 0x1000;
pub const BTN_B: u32 = 0x2000;
pub const BTN_X: u32 = 0x4000;
pub const BTN_Y: u32 = 0x8000;
/// Decode one decrypted control plaintext into a controller event, if it is one. Mouse,
/// keyboard, keepalives etc. yield `None` (they're handled by [`super::input::decode`]).
pub fn decode(plaintext: &[u8]) -> Option<GamepadEvent> {
if plaintext.len() < 4 || u16::from_le_bytes([plaintext[0], plaintext[1]]) != INPUT_DATA_TYPE {
return None;
}
let p = &plaintext[4..];
if p.len() < 8 {
return None;
}
let magic = u32::from_le_bytes([p[4], p[5], p[6], p[7]]);
let b = &p[8..]; // body after NV_INPUT_HEADER
let le16 = |o: usize| -> Option<i16> { Some(i16::from_le_bytes([*b.get(o)?, *b.get(o + 1)?])) };
match magic {
MAGIC_MULTI_CONTROLLER => {
// Body: headerB@0, controllerNumber@2, activeGamepadMask@4, midB@6, buttonFlags@8,
// LT@10, RT@11, lsX@12, lsY@14, rsX@16, rsY@18, tailA@20, buttonFlags2@22, tailB@24.
// The constants (headerB/midB/tail*) are never validated, mirroring Sunshine.
let buttons_lo = le16(8)? as u16 as u32;
// buttonFlags2 is absent on pre-extension clients (shorter packet) — treat as 0.
let buttons_hi = le16(22).map(|v| v as u16 as u32).unwrap_or(0);
Some(GamepadEvent::State(GamepadFrame {
index: le16(2)?,
active_mask: le16(4)? as u16,
buttons: buttons_lo | (buttons_hi << 16),
left_trigger: *b.get(10)?,
right_trigger: *b.get(11)?,
ls_x: le16(12)?,
ls_y: le16(14)?,
rs_x: le16(16)?,
rs_y: le16(18)?,
}))
}
MAGIC_CONTROLLER_ARRIVAL => Some(GamepadEvent::Arrival {
index: *b.first()?,
kind: *b.get(1)?,
capabilities: le16(2)? as u16,
}),
_ => None,
}
}
/// Build the host→client rumble plaintext (type `0x010B`): `[type][len=10][u32 filler]
/// [controllerNumber][lowFreqMotor][highFreqMotor]` (all LE; motors 0..0xFFFF). The caller
/// seals it with the host-direction GCM scheme and sends it on the ENet control peer.
pub fn rumble_plaintext(index: u16, low: u16, high: u16) -> Vec<u8> {
let mut pt = Vec::with_capacity(14);
pt.extend_from_slice(&0x010Bu16.to_le_bytes());
pt.extend_from_slice(&10u16.to_le_bytes());
pt.extend_from_slice(&0x00C0_FFEEu32.to_le_bytes()); // filler — present but ignored
pt.extend_from_slice(&index.to_le_bytes());
pt.extend_from_slice(&low.to_le_bytes());
pt.extend_from_slice(&high.to_le_bytes());
pt
}
#[cfg(test)]
mod tests {
use super::*;
fn wrap(magic: u32, body: &[u8]) -> Vec<u8> {
let mut inp = Vec::new();
inp.extend_from_slice(&((4 + body.len()) as u32).to_be_bytes());
inp.extend_from_slice(&magic.to_le_bytes());
inp.extend_from_slice(body);
let mut pt = Vec::new();
pt.extend_from_slice(&INPUT_DATA_TYPE.to_le_bytes());
pt.extend_from_slice(&(inp.len() as u16).to_le_bytes());
pt.extend_from_slice(&inp);
pt
}
#[test]
fn decodes_multi_controller() {
// Pad 1 attached (mask 0b10), A+RB held, LT=10 RT=200, LS=(1000,-2000), RS=(-1,32767),
// paddle1 via buttonFlags2.
let mut body = Vec::new();
body.extend_from_slice(&0x001Ai16.to_le_bytes()); // headerB
body.extend_from_slice(&1i16.to_le_bytes()); // controllerNumber
body.extend_from_slice(&0b10i16.to_le_bytes()); // activeGamepadMask
body.extend_from_slice(&0x0014i16.to_le_bytes()); // midB
body.extend_from_slice(&((BTN_A | BTN_RB) as u16).to_le_bytes());
body.push(10); // LT
body.push(200); // RT
body.extend_from_slice(&1000i16.to_le_bytes());
body.extend_from_slice(&(-2000i16).to_le_bytes());
body.extend_from_slice(&(-1i16).to_le_bytes());
body.extend_from_slice(&32767i16.to_le_bytes());
body.extend_from_slice(&0x009Ci16.to_le_bytes()); // tailA
body.extend_from_slice(&0x0001u16.to_le_bytes()); // buttonFlags2 (paddle1)
body.extend_from_slice(&0x0055i16.to_le_bytes()); // tailB
let Some(GamepadEvent::State(f)) = decode(&wrap(MAGIC_MULTI_CONTROLLER, &body)) else {
panic!("expected State");
};
assert_eq!(f.index, 1);
assert_eq!(f.active_mask, 0b10);
assert_eq!(f.buttons, BTN_A | BTN_RB | 0x0001_0000);
assert_eq!((f.left_trigger, f.right_trigger), (10, 200));
assert_eq!((f.ls_x, f.ls_y, f.rs_x, f.rs_y), (1000, -2000, -1, 32767));
}
#[test]
fn decodes_arrival() {
let body = [0u8, 1, 0x02, 0x00, 0xFF, 0xFF, 0x0F, 0x00]; // pad 0, xbox, rumble cap
let Some(GamepadEvent::Arrival {
index,
kind,
capabilities,
}) = decode(&wrap(MAGIC_CONTROLLER_ARRIVAL, &body))
else {
panic!("expected Arrival");
};
assert_eq!((index, kind, capabilities), (0, 1, 0x0002));
}
#[test]
fn ignores_mouse_and_short_packets() {
assert!(decode(&wrap(0x07, &[0, 1, 0, 2])).is_none()); // relative mouse
assert!(decode(&[0u8; 3]).is_none());
}
#[test]
fn rumble_layout() {
let pt = rumble_plaintext(2, 0x1234, 0xBEEF);
assert_eq!(pt.len(), 14);
assert_eq!(u16::from_le_bytes([pt[0], pt[1]]), 0x010B);
assert_eq!(u16::from_le_bytes([pt[2], pt[3]]), 10);
assert_eq!(u16::from_le_bytes([pt[8], pt[9]]), 2);
assert_eq!(u16::from_le_bytes([pt[10], pt[11]]), 0x1234);
assert_eq!(u16::from_le_bytes([pt[12], pt[13]]), 0xBEEF);
}
}
@@ -0,0 +1,143 @@
//! Decode the GameStream input wire format (carried AES-GCM-encrypted on the ENet control
//! stream — see [`super::control`]) into platform-agnostic
//! [`punktfunk_core::input::InputEvent`]s for injection.
//!
//! A decrypted control message is `[u16 type LE][u16 length LE][NV_INPUT packet]`. We only
//! handle the input type (`0x0206`); the packet is an 8-byte `NV_INPUT_HEADER` (`size` BE,
//! `magic` LE) followed by a magic-specific body. Multi-byte body fields are big-endian
//! (network order) except `magic` and the keyboard `keyCode` (little-endian). Struct layouts
//! mirror moonlight-common-c `Input.h`; the magic dispatch matches Sunshine `input.cpp`
//! (Gen5+, where scroll is `0x0A` and controllers are `0x0C`, so there's no ambiguity).
use punktfunk_core::input::{InputEvent, InputKind};
/// Inner control-message type for input (moonlight `packetTypesGen7[IDX_INPUT_DATA]`).
const INPUT_DATA_TYPE: u16 = 0x0206;
// NV_INPUT_HEADER.magic values (Input.h), with the Gen5+ variants where they differ.
const MAGIC_KEY_DOWN: u32 = 0x03;
const MAGIC_KEY_UP: u32 = 0x04;
const MAGIC_MOUSE_ABS: u32 = 0x05;
const MAGIC_MOUSE_REL: u32 = 0x06;
const MAGIC_MOUSE_REL_GEN5: u32 = 0x07;
const MAGIC_MOUSE_BTN_DOWN: u32 = 0x08;
const MAGIC_MOUSE_BTN_UP: u32 = 0x09;
const MAGIC_SCROLL_GEN5: u32 = 0x0A;
const MAGIC_UTF8: u32 = 0x17;
const MAGIC_HSCROLL: u32 = 0x5500_0001;
/// `code` value marking a [`InputKind::MouseScroll`] as horizontal (vs `0` = vertical).
pub const SCROLL_HORIZONTAL: u32 = 1;
/// Decode one decrypted control plaintext into zero or more input events. Non-input control
/// messages (keepalives, QoS) and unhandled input kinds (gamepad/pen/touch) yield nothing.
pub fn decode(plaintext: &[u8]) -> Vec<InputEvent> {
if plaintext.len() < 4 || u16::from_le_bytes([plaintext[0], plaintext[1]]) != INPUT_DATA_TYPE {
return Vec::new();
}
decode_input_packet(&plaintext[4..]).into_iter().collect()
}
fn decode_input_packet(p: &[u8]) -> Option<InputEvent> {
if p.len() < 8 {
return None;
}
// NV_INPUT_HEADER: size (BE u32, excludes itself) + magic (LE u32). Body follows.
let magic = u32::from_le_bytes([p[4], p[5], p[6], p[7]]);
let b = &p[8..];
let be16 = |o: usize| -> Option<i16> { Some(i16::from_be_bytes([*b.get(o)?, *b.get(o + 1)?])) };
Some(match magic {
MAGIC_MOUSE_REL | MAGIC_MOUSE_REL_GEN5 => {
ev(InputKind::MouseMove, 0, be16(0)? as i32, be16(2)? as i32, 0)
}
MAGIC_MOUSE_ABS => {
// short x, y, unused, width, height (all BE). Carry the client's reference extent
// (width<<16 | height) in `flags` so the injector can scale to its output.
let (x, y) = (be16(0)? as i32, be16(2)? as i32);
let flags = ((be16(6)? as u16 as u32) << 16) | (be16(8)? as u16 as u32);
ev(InputKind::MouseMoveAbs, 0, x, y, flags)
}
MAGIC_MOUSE_BTN_DOWN => ev(InputKind::MouseButtonDown, *b.first()? as u32, 0, 0, 0),
MAGIC_MOUSE_BTN_UP => ev(InputKind::MouseButtonUp, *b.first()? as u32, 0, 0, 0),
MAGIC_SCROLL_GEN5 => ev(InputKind::MouseScroll, 0, be16(0)? as i32, 0, 0),
MAGIC_HSCROLL => ev(
InputKind::MouseScroll,
SCROLL_HORIZONTAL,
be16(0)? as i32,
0,
0,
),
MAGIC_KEY_DOWN | MAGIC_KEY_UP => {
// char flags, short keyCode (LE), char modifiers, short zero2. The client stuffs a
// 0x80 high byte on key-down; Sunshine masks to the low-byte VK (`& 0xFF`).
let key_code = (u16::from_le_bytes([*b.get(1)?, *b.get(2)?]) & 0x00FF) as u32;
let modifiers = *b.get(3)? as u32;
let kind = if magic == MAGIC_KEY_DOWN {
InputKind::KeyDown
} else {
InputKind::KeyUp
};
ev(kind, key_code, 0, 0, modifiers)
}
// UTF-8 text, gamepad, pen, touch, haptics — not yet injected.
_ => return None,
})
}
fn ev(kind: InputKind, code: u32, x: i32, y: i32, flags: u32) -> InputEvent {
InputEvent {
kind,
_pad: [0; 3],
code,
x,
y,
flags,
}
}
#[cfg(test)]
mod tests {
use super::*;
/// Build a control plaintext: inner header + NV_INPUT_HEADER + body.
fn wrap(magic: u32, body: &[u8]) -> Vec<u8> {
let mut inp = Vec::new();
inp.extend_from_slice(&((4 + body.len()) as u32).to_be_bytes()); // size (excl. itself)
inp.extend_from_slice(&magic.to_le_bytes());
inp.extend_from_slice(body);
let mut pt = Vec::new();
pt.extend_from_slice(&INPUT_DATA_TYPE.to_le_bytes());
pt.extend_from_slice(&(inp.len() as u16).to_le_bytes());
pt.extend_from_slice(&inp);
pt
}
#[test]
fn decodes_relative_mouse() {
// deltaX = -1 (ffff BE), deltaY = +2 (0002 BE) — matches a real captured packet.
let pt = wrap(MAGIC_MOUSE_REL_GEN5, &[0xff, 0xff, 0x00, 0x02]);
let ev = decode(&pt);
assert_eq!(ev.len(), 1);
assert_eq!(ev[0].kind, InputKind::MouseMove);
assert_eq!((ev[0].x, ev[0].y), (-1, 2));
}
#[test]
fn decodes_key_down_masking_high_byte() {
// keyCode 0x80A4 (LE a4 80) → VK 0xA4 (VK_LMENU); modifiers 0x04 (Alt).
let pt = wrap(MAGIC_KEY_DOWN, &[0x00, 0xa4, 0x80, 0x04, 0x00, 0x00]);
let ev = decode(&pt);
assert_eq!(ev.len(), 1);
assert_eq!(ev[0].kind, InputKind::KeyDown);
assert_eq!(ev[0].code, 0xA4);
assert_eq!(ev[0].flags, 0x04);
}
#[test]
fn ignores_non_input_type() {
let mut pt = vec![0x00, 0x02]; // type 0x0200 (keepalive)
pt.extend_from_slice(&[0x08, 0x00, 0x04, 0, 0, 0, 0, 0, 0, 0]);
assert!(decode(&pt).is_empty());
}
}
@@ -0,0 +1,37 @@
//! mDNS advertisement of `_nvstream._tcp.local.` so Moonlight auto-discovers the host.
//! (Manual "add host by IP" also works as a fallback, which is what we test with first.)
use super::Host;
use anyhow::{Context, Result};
use mdns_sd::{ServiceDaemon, ServiceInfo};
use std::collections::HashMap;
/// Holds the mDNS daemon; dropping it unregisters the service.
pub struct Advert {
_daemon: ServiceDaemon,
}
pub fn advertise(host: &Host) -> Result<Advert> {
let daemon = ServiceDaemon::new().context("create mDNS daemon")?;
let host_name = format!("{}.local.", host.hostname);
// No TXT records are required for Moonlight discovery; it resolves the A record and then
// GETs /serverinfo for capabilities.
let props: HashMap<String, String> = HashMap::new();
let service = ServiceInfo::new(
"_nvstream._tcp.local.",
&host.hostname,
&host_name,
host.local_ip,
host.http_port,
props,
)
.context("build mDNS ServiceInfo")?;
daemon.register(service).context("register mDNS service")?;
tracing::info!(
service = "_nvstream._tcp",
port = host.http_port,
host = %host_name,
"mDNS advertising"
);
Ok(Advert { _daemon: daemon })
}
+252
View File
@@ -0,0 +1,252 @@
//! GameStream (P1) control plane — what a stock Moonlight/Artemis client talks to around
//! the media streams: mDNS discovery, the nvhttp serverinfo + pairing HTTP(S) API, RTSP,
//! and the ENet control stream. `tokio`/`axum` live here (control plane, I/O-bound — never
//! the per-frame hot path; that is `punktfunk_core`'s P1 wire codec). See `docs/m2-plan.md`.
//!
//! Status: P1.1 — mDNS `_nvstream._tcp` advertisement + `/serverinfo`. Pairing, RTSP, and
//! the media streams follow (see the M2 task list / plan).
pub mod apps;
#[cfg(target_os = "linux")]
mod audio;
/// Stub — the audio plane needs Linux (PipeWire capture + libopus); this keeps non-Linux
/// dev builds compiling (crate doc: "the crate compiles everywhere"). Reports failure the
/// same way the real stream thread does: by clearing `running`.
#[cfg(not(target_os = "linux"))]
mod audio {
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::{Arc, Mutex};
pub fn start(
running: Arc<AtomicBool>,
_gcm_key: [u8; 16],
_rikeyid: i32,
_audio_cap: Arc<Mutex<Option<Box<dyn crate::audio::AudioCapturer>>>>,
) {
tracing::error!("GameStream audio requires Linux (PipeWire + libopus)");
running.store(false, Ordering::SeqCst);
}
}
pub(crate) mod cert;
mod control;
mod crypto;
pub mod gamepad;
mod input;
mod mdns;
mod nvhttp;
mod pairing;
mod rtsp;
mod serverinfo;
mod stream;
mod tls;
mod video;
use anyhow::{Context, Result};
use std::net::{IpAddr, Ipv4Addr, UdpSocket};
use std::path::PathBuf;
use std::sync::Arc;
/// nvhttp ports (Moonlight derives all stream ports by offset from the HTTP base 47989).
pub const HTTP_PORT: u16 = 47989;
pub const HTTPS_PORT: u16 = 47984;
pub const RTSP_PORT: u16 = 48010;
pub const VIDEO_PORT: u16 = 47998;
pub const CONTROL_PORT: u16 = 47999;
pub const AUDIO_PORT: u16 = 48000;
/// Advertised host version. Major ≥ 7 tells Moonlight to use SHA-256 for pairing.
pub const APP_VERSION: &str = "7.1.431.-1";
pub const GFE_VERSION: &str = "3.23.0.74";
/// Codec support bitmask: 3=H264, 259=+HEVC, 3843=+AV1 (we encode HEVC/H264/AV1 via NVENC).
pub const SERVER_CODEC_MODE_SUPPORT: u32 = 3843;
/// Stable host identity + advertised capabilities, shared across control-plane handlers.
pub struct Host {
pub hostname: String,
/// Stable per-host id (persisted), echoed in serverinfo + matched on pairing.
pub uniqueid: String,
pub local_ip: IpAddr,
pub http_port: u16,
pub https_port: u16,
// Pairing state (server cert, paired client certs) lands in the next P1.1 slice.
}
impl Host {
pub fn detect() -> Result<Host> {
Ok(Host {
hostname: hostname_string(),
uniqueid: load_or_create_uniqueid()?,
local_ip: primary_local_ip().unwrap_or(IpAddr::V4(Ipv4Addr::LOCALHOST)),
http_port: HTTP_PORT,
https_port: HTTPS_PORT,
})
}
}
/// The stream parameters a client passes at `/launch`, shared with the RTSP + media stages.
#[derive(Clone, Copy, Debug)]
pub struct LaunchSession {
/// AES-128 key for the RTSP/control/video/audio planes (from `rikey`).
pub gcm_key: [u8; 16],
/// `rikeyid` — seeds the per-stream GCM IVs.
pub rikeyid: i32,
pub width: u32,
pub height: u32,
pub fps: u32,
/// `/launch?appid=N` — selects the app-catalog entry (session recipe).
pub appid: u32,
}
/// Shared control-plane state used as the axum app state.
pub struct AppState {
pub host: Host,
pub identity: cert::ServerIdentity,
pub pairing: pairing::Pairing,
/// Pinned (paired) client certificate DERs — the post-pair allow-list.
pub paired: std::sync::Mutex<Vec<Vec<u8>>>,
/// The active launch session (set by `/launch`, consumed by RTSP/media).
pub launch: std::sync::Mutex<Option<LaunchSession>>,
/// Negotiated video config from RTSP ANNOUNCE (consumed by the stream on PLAY).
pub stream: std::sync::Mutex<Option<stream::StreamConfig>>,
/// True while the video stream thread is running (also its keep-running flag).
pub streaming: std::sync::Arc<std::sync::atomic::AtomicBool>,
/// True while the audio stream thread is running (also its keep-running flag).
pub audio_streaming: std::sync::Arc<std::sync::atomic::AtomicBool>,
/// Set by the control stream when the client requests an IDR / invalidates reference
/// frames (recovery after loss); the video thread forces a keyframe and clears it.
pub force_idr: std::sync::Arc<std::sync::atomic::AtomicBool>,
/// Persistent screen capturer, reused across streams so reconnects don't spawn a second
/// (conflicting) screencast session. The video thread borrows it for the stream's duration
/// and returns it; `set_active` gates its cost while idle.
pub video_cap: std::sync::Arc<std::sync::Mutex<Option<Box<dyn crate::capture::Capturer>>>>,
/// Persistent audio capturer, reused across streams (avoids leaking a PipeWire capture
/// thread per reconnect); drained on reuse so no stale audio is sent.
pub audio_cap: std::sync::Arc<std::sync::Mutex<Option<Box<dyn crate::audio::AudioCapturer>>>>,
}
impl AppState {
/// Fresh control-plane state: no active session; the pairing allow-list is loaded from
/// disk (pairings persist across restarts).
pub fn new(host: Host, identity: cert::ServerIdentity) -> AppState {
AppState {
host,
identity,
pairing: pairing::Pairing::new(),
paired: std::sync::Mutex::new(load_paired()),
launch: std::sync::Mutex::new(None),
stream: std::sync::Mutex::new(None),
streaming: std::sync::Arc::new(std::sync::atomic::AtomicBool::new(false)),
audio_streaming: std::sync::Arc::new(std::sync::atomic::AtomicBool::new(false)),
force_idr: std::sync::Arc::new(std::sync::atomic::AtomicBool::new(false)),
video_cap: std::sync::Arc::new(std::sync::Mutex::new(None)),
audio_cap: std::sync::Arc::new(std::sync::Mutex::new(None)),
}
}
}
/// Run the GameStream control plane (blocks): mDNS advertisement, the nvhttp servers, and
/// the management REST API.
pub fn serve(mgmt: crate::mgmt::Options) -> Result<()> {
let host = Host::detect()?;
let identity = cert::ServerIdentity::load_or_create().context("host certificate")?;
let state = Arc::new(AppState::new(host, identity));
tracing::info!(
hostname = %state.host.hostname,
uniqueid = %state.host.uniqueid,
ip = %state.host.local_ip,
"punktfunk GameStream host (P1.1: serverinfo + pairing + mDNS)"
);
let rt = tokio::runtime::Runtime::new().context("build tokio runtime")?;
rt.block_on(async move {
// rustls needs a process-wide crypto provider before any TLS config is built.
let _ = rustls::crypto::aws_lc_rs::default_provider().install_default();
let _advert = mdns::advertise(&state.host).context("mDNS advertise")?;
rtsp::spawn(state.clone()).context("start RTSP server")?;
control::spawn(state.clone()).context("start ENet control server")?;
tokio::try_join!(nvhttp::run(state.clone()), crate::mgmt::run(state, mgmt))?;
Ok(())
})
}
/// `~/.config/punktfunk`, created on demand — host identity + (later) pairing state live here.
fn config_dir() -> PathBuf {
let base = std::env::var_os("XDG_CONFIG_HOME")
.map(PathBuf::from)
.or_else(|| std::env::var_os("HOME").map(|h| PathBuf::from(h).join(".config")))
.unwrap_or_else(|| PathBuf::from("."));
base.join("punktfunk")
}
fn hostname_string() -> String {
std::fs::read_to_string("/proc/sys/kernel/hostname")
.ok()
.map(|s| s.trim().to_string())
.filter(|s| !s.is_empty())
.unwrap_or_else(|| "punktfunk-host".to_string())
}
/// Load the persisted host uniqueid, or mint one (from the kernel UUID source) and store it.
fn load_or_create_uniqueid() -> Result<String> {
let path = config_dir().join("uniqueid");
if let Ok(s) = std::fs::read_to_string(&path) {
let t = s.trim();
if !t.is_empty() {
return Ok(t.to_string());
}
}
let id = std::fs::read_to_string("/proc/sys/kernel/random/uuid")
.map(|u| u.trim().replace('-', ""))
.unwrap_or_else(|_| format!("{:016x}{:016x}", std::process::id(), HTTP_PORT));
std::fs::create_dir_all(config_dir()).ok();
std::fs::write(&path, &id).with_context(|| format!("write {}", path.display()))?;
Ok(id)
}
/// Best-effort primary LAN IP: open a UDP socket "toward" a public address and read the
/// local address the OS would route through. No packets are actually sent.
fn primary_local_ip() -> Option<IpAddr> {
let sock = UdpSocket::bind("0.0.0.0:0").ok()?;
sock.connect("8.8.8.8:80").ok()?;
sock.local_addr().ok().map(|a| a.ip())
}
/// Where the paired-client allow-list persists (survives host restarts, like Sunshine).
fn paired_path() -> Option<std::path::PathBuf> {
Some(std::path::Path::new(&std::env::var("HOME").ok()?).join(".config/punktfunk/paired.json"))
}
/// Load the persisted paired-client certificate DERs (empty on first run / parse failure).
fn load_paired() -> Vec<Vec<u8>> {
let Some(path) = paired_path() else {
return Vec::new();
};
let Ok(raw) = std::fs::read(&path) else {
return Vec::new();
};
match serde_json::from_slice::<Vec<Vec<u8>>>(&raw) {
Ok(v) => {
tracing::info!(clients = v.len(), "loaded persisted pairings");
v
}
Err(e) => {
tracing::warn!(error = %e, "paired.json unreadable — starting unpaired");
Vec::new()
}
}
}
/// Persist the paired-client allow-list (called after each successful pairing).
pub(crate) fn save_paired(paired: &[Vec<u8>]) {
let Some(path) = paired_path() else { return };
if let Some(dir) = path.parent() {
let _ = std::fs::create_dir_all(dir);
}
match serde_json::to_vec(paired) {
Ok(bytes) => {
if let Err(e) = std::fs::write(&path, bytes) {
tracing::warn!(error = %e, "persisting pairings failed");
}
}
Err(e) => tracing::warn!(error = %e, "serializing pairings failed"),
}
}
@@ -0,0 +1,236 @@
//! The nvhttp servers: plain HTTP on 47989 and mutual-TLS on 47984. Serves `/serverinfo`,
//! the `/pair` flow, `/applist`, and `/launch`/`/resume`/`/cancel`, plus a punktfunk-only
//! `/pin` endpoint to deliver the Moonlight-displayed PIN. Over HTTPS the client is
//! mutual-TLS-authenticated, so `/serverinfo` reports `PairStatus=1` there.
use super::{serverinfo, AppState, LaunchSession, HTTPS_PORT, HTTP_PORT, RTSP_PORT};
use anyhow::{anyhow, Context, Result};
use axum::{
extract::{Query, State},
http::header,
response::IntoResponse,
routing::get,
Extension, Router,
};
use std::collections::HashMap;
use std::net::SocketAddr;
use std::sync::Arc;
/// Which listener a request arrived on — HTTPS means a mutual-TLS-authenticated client.
#[derive(Clone, Copy)]
struct Https(bool);
pub async fn run(state: Arc<AppState>) -> Result<()> {
// Mutual-TLS: request + verify the client cert (Moonlight presents one for the
// post-pairing pairchallenge + all post-pair endpoints).
let tls = axum_server::tls_rustls::RustlsConfig::from_config(super::tls::server_config(
&state.identity.cert_pem,
&state.identity.key_pem,
)?);
let http_addr = SocketAddr::from(([0, 0, 0, 0], HTTP_PORT));
let https_addr = SocketAddr::from(([0, 0, 0, 0], HTTPS_PORT));
tracing::info!(%http_addr, %https_addr, "nvhttp listening (serverinfo + pair + launch)");
let http = axum_server::bind(http_addr).serve(router(state.clone(), false).into_make_service());
let https =
axum_server::bind_rustls(https_addr, tls).serve(router(state, true).into_make_service());
tokio::try_join!(async { http.await.context("nvhttp HTTP server") }, async {
https.await.context("nvhttp HTTPS server")
},)?;
Ok(())
}
fn router(state: Arc<AppState>, https: bool) -> Router {
Router::new()
.route("/serverinfo", get(h_serverinfo))
.route("/pair", get(h_pair))
.route("/pin", get(h_pin))
.route("/applist", get(h_applist))
.route("/launch", get(h_launch))
.route("/resume", get(h_resume))
.route("/cancel", get(h_cancel))
.layer(Extension(Https(https)))
.with_state(state)
}
fn xml(body: String) -> impl IntoResponse {
([(header::CONTENT_TYPE, "application/xml")], body)
}
async fn h_serverinfo(
State(st): State<Arc<AppState>>,
Extension(Https(https)): Extension<Https>,
) -> impl IntoResponse {
// Over the mutual-TLS port the peer is an authenticated (paired) client → PairStatus=1.
xml(serverinfo::serverinfo_xml(&st.host, https))
}
async fn h_pin(
State(st): State<Arc<AppState>>,
Query(q): Query<HashMap<String, String>>,
) -> impl IntoResponse {
match q.get("pin").filter(|p| !p.is_empty()) {
Some(pin) => {
st.pairing.pin.submit(pin.clone());
"PIN accepted\n".to_string()
}
None => "usage: GET /pin?pin=NNNN\n".to_string(),
}
}
async fn h_applist(State(_st): State<Arc<AppState>>) -> impl IntoResponse {
// One app for now: the headless desktop (the wlroots virtual output).
xml(super::apps::applist_xml())
}
async fn h_launch(
State(st): State<Arc<AppState>>,
Query(q): Query<HashMap<String, String>>,
) -> impl IntoResponse {
match launch(&st, &q) {
Ok(session) => {
*st.launch.lock().unwrap() = Some(session);
tracing::info!(
w = session.width,
h = session.height,
fps = session.fps,
rikeyid = session.rikeyid,
"launch — session created; RTSP at rtsp://{}:{RTSP_PORT}",
st.host.local_ip
);
xml(session_url_xml(&st, "gamesession"))
}
Err(e) => {
tracing::warn!(error = %format!("{e:#}"), "launch failed");
xml(error_xml())
}
}
}
async fn h_resume(State(st): State<Arc<AppState>>) -> impl IntoResponse {
if st.launch.lock().unwrap().is_some() {
xml(session_url_xml(&st, "resume"))
} else {
xml(error_xml())
}
}
async fn h_cancel(State(st): State<Arc<AppState>>) -> impl IntoResponse {
*st.launch.lock().unwrap() = None;
// Quit semantics: stop the running media threads (they observe these flags) so the session
// actually ends — the virtual output/gamescope teardown follows via the capturer's RAII.
st.streaming
.store(false, std::sync::atomic::Ordering::SeqCst);
st.audio_streaming
.store(false, std::sync::atomic::Ordering::SeqCst);
tracing::info!("cancel — launch session cleared, streams stopping");
xml("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<root status_code=\"200\"><cancel>1</cancel></root>\n".to_string())
}
/// Parse the `/launch` query (rikey/rikeyid/mode) into a [`LaunchSession`].
fn launch(_st: &AppState, q: &HashMap<String, String>) -> Result<LaunchSession> {
let rikey = q.get("rikey").ok_or_else(|| anyhow!("missing rikey"))?;
let key_bytes = hex::decode(rikey).context("rikey hex")?;
if key_bytes.len() < 16 {
return Err(anyhow!("rikey too short"));
}
let mut gcm_key = [0u8; 16];
gcm_key.copy_from_slice(&key_bytes[..16]);
// rikeyid is a signed 32-bit int (negative values wrap to a big-endian u32 IV later).
let rikeyid: i32 = q.get("rikeyid").and_then(|s| s.parse().ok()).unwrap_or(0);
let (width, height, fps) = q
.get("mode")
.and_then(|m| parse_mode(m))
.unwrap_or((1920, 1080, 60));
let appid = q.get("appid").and_then(|s| s.parse().ok()).unwrap_or(1);
Ok(LaunchSession {
gcm_key,
rikeyid,
width,
height,
fps,
appid,
})
}
/// `"1920x1080x60"` → `(1920, 1080, 60)`.
fn parse_mode(mode: &str) -> Option<(u32, u32, u32)> {
let mut it = mode.split('x');
let w = it.next()?.parse().ok()?;
let h = it.next()?.parse().ok()?;
let fps = it.next()?.parse().ok()?;
Some((w, h, fps))
}
fn session_url_xml(st: &AppState, tag: &str) -> String {
format!(
"<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<root status_code=\"200\">\n<sessionUrl0>rtsp://{}:{RTSP_PORT}</sessionUrl0>\n<{tag}>1</{tag}>\n</root>\n",
st.host.local_ip
)
}
async fn h_pair(
State(st): State<Arc<AppState>>,
Query(q): Query<HashMap<String, String>>,
) -> impl IntoResponse {
let uniqueid = q.get("uniqueid").cloned().unwrap_or_default();
let phrase = q.get("phrase").map(String::as_str);
let step = phrase
.filter(|p| *p == "getservercert" || *p == "pairchallenge")
.or_else(|| {
[
"clientchallenge",
"serverchallengeresp",
"clientpairingsecret",
]
.into_iter()
.find(|k| q.contains_key(*k))
})
.unwrap_or("?");
tracing::info!(uniqueid, step, "pair request");
let result = if phrase == Some("getservercert") {
match (q.get("salt"), q.get("clientcert")) {
(Some(salt), Some(cc)) => {
st.pairing
.getservercert(&st.identity, &uniqueid, salt, cc)
.await
}
_ => Ok(pair_error_xml()),
}
} else if phrase == Some("pairchallenge") {
// Reached only over the TLS port with the pinned host cert; the handshake is the
// proof, so acknowledge success.
Ok(paired_ok_xml())
} else if let Some(v) = q.get("clientchallenge") {
st.pairing.clientchallenge(&st.identity, &uniqueid, v)
} else if let Some(v) = q.get("serverchallengeresp") {
st.pairing.serverchallengeresp(&st.identity, &uniqueid, v)
} else if let Some(v) = q.get("clientpairingsecret") {
st.pairing.clientpairingsecret(&uniqueid, v, &st.paired)
} else {
Ok(pair_error_xml())
};
let body = result.unwrap_or_else(|e| {
tracing::warn!(error = %format!("{e:#}"), uniqueid, "pair handler error");
pair_error_xml()
});
xml(body)
}
fn paired_ok_xml() -> String {
"<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<root status_code=\"200\"><paired>1</paired></root>\n"
.to_string()
}
fn pair_error_xml() -> String {
"<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<root status_code=\"200\"><paired>0</paired></root>\n"
.to_string()
}
fn error_xml() -> String {
"<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<root status_code=\"400\"></root>\n".to_string()
}
@@ -0,0 +1,306 @@
//! The 4-phase GameStream pairing state machine (over HTTP), keyed by `uniqueid`. Proves
//! both sides know the PIN (via the SHA-256(salt||pin) AES-ECB key) and own their certs
//! (RSA signatures), then pins the client cert. The final `pairchallenge` happens over
//! HTTPS (handled in `nvhttp`). Byte-exact spec: `docs/research/…-research.json`.
use super::cert::ServerIdentity;
use super::crypto;
use anyhow::{anyhow, bail, Context, Result};
use rsa::pkcs1v15::{Signature, VerifyingKey};
use rsa::pkcs8::DecodePublicKey;
use rsa::signature::{SignatureEncoding, Signer, Verifier};
use rsa::RsaPublicKey;
use sha2::Sha256;
use std::collections::HashMap;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Mutex;
use std::time::Duration;
use tokio::sync::Notify;
/// Out-of-band PIN delivery. Moonlight generates + displays a PIN; the user submits it
/// (via the management API's `POST /api/v1/pair/pin` or nvhttp's `GET /pin?pin=NNNN`).
/// `getservercert` parks until a PIN arrives.
pub struct PinGate {
pin: Mutex<Option<String>>,
notify: Notify,
/// Handshakes currently parked in [`take`](Self::take) — drives the management API's
/// `pin_pending` so a control pane knows when to prompt for the PIN.
waiters: AtomicUsize,
}
impl PinGate {
fn new() -> Self {
PinGate {
pin: Mutex::new(None),
notify: Notify::new(),
waiters: AtomicUsize::new(0),
}
}
pub fn submit(&self, pin: String) {
*self.pin.lock().unwrap() = Some(pin);
self.notify.notify_waiters();
}
/// True while a pairing handshake is parked waiting for the user's PIN.
pub fn awaiting_pin(&self) -> bool {
self.waiters.load(Ordering::SeqCst) > 0
}
async fn take(&self, timeout: Duration) -> Option<String> {
self.waiters.fetch_add(1, Ordering::SeqCst);
// Decrement on every exit path (PIN delivered, timeout, or future cancellation).
struct WaiterGuard<'a>(&'a AtomicUsize);
impl Drop for WaiterGuard<'_> {
fn drop(&mut self) {
self.0.fetch_sub(1, Ordering::SeqCst);
}
}
let _guard = WaiterGuard(&self.waiters);
let deadline = tokio::time::Instant::now() + timeout;
loop {
if let Some(p) = self.pin.lock().unwrap().take() {
return Some(p);
}
if tokio::time::timeout_at(deadline, self.notify.notified())
.await
.is_err()
{
return None;
}
}
}
}
/// Per-client pairing session carried across the 4 separate HTTP GETs.
struct Session {
aes_key: [u8; 16],
client_cert_der: Vec<u8>,
client_cert_sig: Vec<u8>,
client_pubkey: RsaPublicKey,
serversecret: [u8; 16],
server_challenge: [u8; 16],
/// The client's phase-3 hash, recomputed + checked in phase 4.
client_hash: Vec<u8>,
}
pub struct Pairing {
sessions: Mutex<HashMap<String, Session>>,
pub pin: PinGate,
}
impl Pairing {
pub fn new() -> Self {
Pairing {
sessions: Mutex::new(HashMap::new()),
pin: PinGate::new(),
}
}
/// Phase 1: store the client cert, await the PIN, derive the AES key, return our cert.
pub async fn getservercert(
&self,
id: &ServerIdentity,
uniqueid: &str,
salt_hex: &str,
clientcert_hex: &str,
) -> Result<String> {
let salt_bytes = hex::decode(salt_hex).context("salt hex")?;
if salt_bytes.len() < 16 {
bail!("salt too short");
}
let mut salt = [0u8; 16];
salt.copy_from_slice(&salt_bytes[..16]);
let pem_bytes = hex::decode(clientcert_hex).context("clientcert hex")?;
let (der, sig, pubkey) = parse_client_cert(&pem_bytes)?;
tracing::info!(
uniqueid,
"pairing phase 1 (getservercert) — awaiting PIN: submit `GET /pin?pin=NNNN`"
);
let pin = self
.pin
.take(Duration::from_secs(300))
.await
.ok_or_else(|| anyhow!("no PIN submitted within 300s"))?;
let aes_key = crypto::pin_key(&salt, &pin);
self.sessions.lock().unwrap().insert(
uniqueid.to_string(),
Session {
aes_key,
client_cert_der: der,
client_cert_sig: sig,
client_pubkey: pubkey,
serversecret: [0; 16],
server_challenge: [0; 16],
client_hash: Vec::new(),
},
);
tracing::info!(
uniqueid,
"pairing phase 1 — PIN accepted, returning host cert"
);
let inner = format!(
"<plaincert>{}</plaincert>",
hex::encode(id.cert_pem.as_bytes())
);
Ok(paired_xml(&inner, true))
}
/// Phase 2: decrypt the client challenge, return our hash + server challenge.
pub fn clientchallenge(
&self,
id: &ServerIdentity,
uniqueid: &str,
hexv: &str,
) -> Result<String> {
let mut map = self.sessions.lock().unwrap();
let s = map
.get_mut(uniqueid)
.ok_or_else(|| anyhow!("no pairing session"))?;
let enc = hex::decode(hexv).context("clientchallenge hex")?;
let client_challenge = crypto::ecb_decrypt(&s.aes_key, &enc);
if client_challenge.len() < 16 {
bail!("short client challenge");
}
s.serversecret = crypto::random();
s.server_challenge = crypto::random();
let server_hash =
crypto::sha256(&[&client_challenge[..16], &id.signature, &s.serversecret]);
let mut plain = Vec::with_capacity(48);
plain.extend_from_slice(&server_hash);
plain.extend_from_slice(&s.server_challenge);
let resp = crypto::ecb_encrypt(&s.aes_key, &plain);
let inner = format!(
"<challengeresponse>{}</challengeresponse>",
hex::encode(resp)
);
Ok(paired_xml(&inner, true))
}
/// Phase 3: store the client's hash, return our RSA-signed serversecret.
pub fn serverchallengeresp(
&self,
id: &ServerIdentity,
uniqueid: &str,
hexv: &str,
) -> Result<String> {
let mut map = self.sessions.lock().unwrap();
let s = map
.get_mut(uniqueid)
.ok_or_else(|| anyhow!("no pairing session"))?;
let enc = hex::decode(hexv).context("serverchallengeresp hex")?;
let client_hash = crypto::ecb_decrypt(&s.aes_key, &enc);
if client_hash.len() < 32 {
bail!("short challenge response");
}
s.client_hash = client_hash[..32].to_vec();
let sig: Signature = id.signing_key.sign(&s.serversecret);
let mut secret = Vec::with_capacity(16 + 256);
secret.extend_from_slice(&s.serversecret);
secret.extend_from_slice(&sig.to_vec());
let inner = format!("<pairingsecret>{}</pairingsecret>", hex::encode(secret));
Ok(paired_xml(&inner, true))
}
/// Phase 4: verify the client knew the PIN (hash match) and owns its cert (RSA verify);
/// on success, pin the client cert.
pub fn clientpairingsecret(
&self,
uniqueid: &str,
hexv: &str,
paired_store: &Mutex<Vec<Vec<u8>>>,
) -> Result<String> {
let mut map = self.sessions.lock().unwrap();
let s = map
.get_mut(uniqueid)
.ok_or_else(|| anyhow!("no pairing session"))?;
let data = hex::decode(hexv).context("clientpairingsecret hex")?;
if data.len() < 16 {
bail!("short pairing secret");
}
let client_secret = &data[..16];
let client_sig = &data[16..];
let expected = crypto::sha256(&[&s.server_challenge, &s.client_cert_sig, client_secret]);
let hash_ok = expected[..] == s.client_hash[..];
let sig_ok = verify256(&s.client_pubkey, client_secret, client_sig).is_ok();
if hash_ok && sig_ok {
{
let mut store = paired_store.lock().unwrap();
store.push(s.client_cert_der.clone());
super::save_paired(&store);
}
tracing::info!(uniqueid, "pairing phase 4 — SUCCESS, client cert pinned");
Ok(paired_xml("", true))
} else {
tracing::warn!(
uniqueid,
hash_ok,
sig_ok,
"pairing phase 4 — FAILED (PIN/cert)"
);
map.remove(uniqueid);
Ok(paired_xml("", false))
}
}
}
fn verify256(pubkey: &RsaPublicKey, msg: &[u8], sig: &[u8]) -> Result<()> {
let vk = VerifyingKey::<Sha256>::new(pubkey.clone());
let signature = Signature::try_from(sig).context("parse client signature")?;
vk.verify(msg, &signature)
.context("verify client signature")?;
Ok(())
}
fn parse_client_cert(pem_bytes: &[u8]) -> Result<(Vec<u8>, Vec<u8>, RsaPublicKey)> {
let (_, pem) =
x509_parser::pem::parse_x509_pem(pem_bytes).map_err(|e| anyhow!("client cert pem: {e}"))?;
let der = pem.contents.clone();
let x509 = pem.parse_x509().context("parse client x509")?;
let sig = x509.signature_value.data.to_vec();
let pubkey =
RsaPublicKey::from_public_key_der(x509.public_key().raw).context("client rsa pubkey")?;
Ok((der, sig, pubkey))
}
/// `<root status_code="200"><paired>0|1</paired> inner </root>`.
fn paired_xml(inner: &str, paired: bool) -> String {
format!(
"<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<root status_code=\"200\">\n<paired>{}</paired>\n{}</root>\n",
u8::from(paired),
inner
)
}
#[cfg(test)]
mod tests {
use super::*;
use std::sync::Arc;
/// `awaiting_pin` flips true while `take` is parked and back to false on every exit
/// path (delivered + timeout) — the management API's pairing UX depends on it.
#[tokio::test]
async fn pin_gate_reports_waiting() {
let pairing = Arc::new(Pairing::new());
assert!(!pairing.pin.awaiting_pin());
let waiter = {
let p = pairing.clone();
tokio::spawn(async move { p.pin.take(Duration::from_secs(5)).await })
};
while !pairing.pin.awaiting_pin() {
tokio::time::sleep(Duration::from_millis(2)).await;
}
pairing.pin.submit("1234".into());
assert_eq!(waiter.await.unwrap().as_deref(), Some("1234"));
assert!(!pairing.pin.awaiting_pin());
// Timeout path also clears the flag.
assert_eq!(pairing.pin.take(Duration::from_millis(10)).await, None);
assert!(!pairing.pin.awaiting_pin());
}
}
@@ -0,0 +1,318 @@
//! The GameStream RTSP handshake (TCP 48010). Hand-rolled because GameStream's RTSP is
//! non-standard (streamid= targets, the literal `DEADBEEFCAFE` session, the X-SS-* headers)
//! and off-the-shelf RTSP crates assume standard semantics. Sequence Moonlight drives:
//! OPTIONS → DESCRIBE → SETUP(audio/video/control) → ANNOUNCE → PLAY. ANNOUNCE carries the
//! negotiated stream config; PLAY is where the media stages start (P1.3+).
//!
//! Runs on its own native thread (control-plane setup, not the per-frame hot path), one
//! thread per connection. Plaintext only for now (encryption is negotiated; P1.5).
use super::audio;
use super::stream::{self, StreamConfig};
use super::{AppState, AUDIO_PORT, CONTROL_PORT, RTSP_PORT, VIDEO_PORT};
use crate::encode::Codec;
use anyhow::{Context, Result};
use std::collections::HashMap;
use std::io::{Read, Write};
use std::net::{TcpListener, TcpStream};
use std::sync::atomic::Ordering;
use std::sync::Arc;
/// Opaque per-session payload the client echoes as its first UDP datagram (port-learning).
const PING_PAYLOAD: &str = "0011223344556677";
/// Bind 48010 and accept RTSP connections on a dedicated thread.
pub fn spawn(state: Arc<AppState>) -> Result<()> {
let listener = TcpListener::bind(("0.0.0.0", RTSP_PORT))
.with_context(|| format!("bind RTSP {RTSP_PORT}"))?;
tracing::info!(port = RTSP_PORT, "RTSP listening");
std::thread::Builder::new()
.name("punktfunk-rtsp".into())
.spawn(move || {
for conn in listener.incoming() {
match conn {
Ok(stream) => {
let st = state.clone();
std::thread::spawn(move || {
if let Err(e) = handle_conn(stream, st) {
tracing::warn!(error = %format!("{e:#}"), "RTSP connection ended");
}
});
}
Err(e) => tracing::warn!(error = %e, "RTSP accept failed"),
}
}
})
.context("spawn RTSP thread")?;
Ok(())
}
struct Request {
method: String,
uri: String,
cseq: String,
head: String,
body: String,
}
fn handle_conn(mut stream: TcpStream, state: Arc<AppState>) -> Result<()> {
let peer = stream.peer_addr().ok();
let mut buf: Vec<u8> = Vec::new();
// GameStream RTSP is one request per TCP connection: moonlight-common-c reads the
// response until EOF, so we answer one message and close the connection (which signals
// the end of the response). Session state lives in `AppState`, not the connection.
if let Some(req) = read_message(&mut stream, &mut buf)? {
tracing::info!(
method = %req.method, cseq = %req.cseq,
"RTSP {} | {}", req.head.replace("\r\n", " | "),
if req.body.is_empty() { String::new() } else { format!("body: {}", req.body.replace("\r\n", " | ")) }
);
let resp = handle_request(&req, &state);
stream.write_all(resp.as_bytes()).context("RTSP write")?;
stream.flush().ok();
// Close (FIN after the flushed response) so the client detects end-of-response.
let _ = stream.shutdown(std::net::Shutdown::Both);
}
let _ = peer;
Ok(())
}
/// Read one complete RTSP message (headers + any Content-Length body) from the stream,
/// buffering across reads and leaving any pipelined remainder in `buf`.
fn read_message(stream: &mut TcpStream, buf: &mut Vec<u8>) -> Result<Option<Request>> {
loop {
if let Some(end) = find_subslice(buf, b"\r\n\r\n") {
let head = std::str::from_utf8(&buf[..end]).context("RTSP header utf8")?;
let content_len = header_value(head, "content-length")
.and_then(|v| v.trim().parse::<usize>().ok())
.unwrap_or(0);
let total = end + 4 + content_len;
if buf.len() < total {
// headers complete but body still arriving — read more
} else {
let head = head.to_string();
let body = String::from_utf8_lossy(&buf[end + 4..total]).into_owned();
buf.drain(..total);
return Ok(Some(parse_request(&head, body)));
}
}
let mut tmp = [0u8; 8192];
let n = stream.read(&mut tmp).context("RTSP read")?;
if n == 0 {
return Ok(None); // peer closed
}
buf.extend_from_slice(&tmp[..n]);
}
}
fn parse_request(head: &str, body: String) -> Request {
let mut lines = head.split("\r\n");
let request_line = lines.next().unwrap_or("");
let mut parts = request_line.split_whitespace();
let method = parts.next().unwrap_or("").to_string();
let uri = parts.next().unwrap_or("").to_string();
let cseq = header_value(head, "cseq").unwrap_or("0").trim().to_string();
Request {
method,
uri,
cseq,
head: head.to_string(),
body,
}
}
fn handle_request(req: &Request, state: &AppState) -> String {
match req.method.as_str() {
"OPTIONS" => response(
&req.cseq,
&[("Public", "OPTIONS DESCRIBE SETUP ANNOUNCE PLAY TEARDOWN")],
None,
),
"DESCRIBE" => response(
&req.cseq,
&[("Content-Type", "application/sdp")],
Some(&describe_sdp()),
),
"SETUP" => {
let (port, extra_key) = match stream_type(&req.uri) {
Some("audio") => (AUDIO_PORT, "X-SS-Ping-Payload"),
Some("video") => (VIDEO_PORT, "X-SS-Ping-Payload"),
Some("control") => (CONTROL_PORT, "X-SS-Connect-Data"),
_ => return response_status("404 Not Found", &req.cseq, &[], None),
};
let transport = format!("server_port={port}");
response(
&req.cseq,
&[
("Session", "DEADBEEFCAFE;timeout = 90"),
("Transport", &transport),
(extra_key, PING_PAYLOAD),
],
None,
)
}
"ANNOUNCE" => {
let map = parse_announce(&req.body);
match stream_config(&map) {
Some(cfg) => {
tracing::info!(?cfg, "RTSP ANNOUNCE — negotiated stream config");
*state.stream.lock().unwrap() = Some(cfg);
}
None => tracing::warn!("RTSP ANNOUNCE — missing required video config keys"),
}
response(&req.cseq, &[], None)
}
"PLAY" => {
let cfg = *state.stream.lock().unwrap();
match cfg {
Some(cfg) if !state.streaming.swap(true, Ordering::SeqCst) => {
// Resolve the launched catalog entry (session recipe) for the stream.
let app = state
.launch
.lock()
.unwrap()
.map(|l| l.appid)
.and_then(super::apps::by_id);
tracing::info!(app = ?app.as_ref().map(|a| &a.title), "RTSP PLAY — starting video stream");
stream::start(
cfg,
app,
state.streaming.clone(),
state.force_idr.clone(),
state.video_cap.clone(),
);
}
Some(_) => tracing::info!("RTSP PLAY — stream already running"),
None => tracing::warn!("RTSP PLAY — no negotiated config (ANNOUNCE missing)"),
}
// Audio runs independently (stereo Opus on UDP 48000); it needs the launch key for
// the AES-CBC payload encryption the client expects.
let launch = *state.launch.lock().unwrap();
if let Some(ls) = launch {
if !state.audio_streaming.swap(true, Ordering::SeqCst) {
tracing::info!("RTSP PLAY — starting audio stream");
audio::start(
state.audio_streaming.clone(),
ls.gcm_key,
ls.rikeyid,
state.audio_cap.clone(),
);
}
}
response(&req.cseq, &[("Session", "DEADBEEFCAFE;timeout = 90")], None)
}
"TEARDOWN" => {
// Signal both stream threads to stop.
state.streaming.store(false, Ordering::SeqCst);
state.audio_streaming.store(false, Ordering::SeqCst);
response(&req.cseq, &[], None)
}
other => {
tracing::warn!(method = other, "RTSP unsupported method");
response_status("501 Not Implemented", &req.cseq, &[], None)
}
}
}
/// Host capability SDP returned by DESCRIBE. Advertises HEVC + AV1 and no encryption
/// (plaintext streams for now; P1.5 adds the negotiated AES paths).
fn describe_sdp() -> String {
// Line-oriented a=key:value, matching what moonlight-common-c scans for.
[
"a=x-ss-general.featureFlags:0",
"a=x-ss-general.encryptionSupported:0",
"a=x-ss-general.encryptionRequested:0",
"sprop-parameter-sets=AAAAAU", // HEVC capability indicator
"a=rtpmap:98 AV1/90000", // AV1 capability indicator
// Opus config the client matches by channel count (Sunshine emits one per config):
// surround-params = channelCount, streams, coupledStreams, then the channel mapping.
// The client negotiated stereo, so advertise just that.
"a=fmtp:97 surround-params=21101", // stereo: 2ch, 1 stream, 1 coupled, mapping [0,1]
"",
]
.join("\r\n")
}
/// Parse an ANNOUNCE SDP body's `a=key:value` lines into a map.
fn parse_announce(body: &str) -> HashMap<String, String> {
let mut map = HashMap::new();
for line in body.lines() {
if let Some(rest) = line.strip_prefix("a=") {
if let Some((k, v)) = rest.split_once(':') {
map.insert(k.to_string(), v.to_string());
}
}
}
map
}
/// Map the negotiated ANNOUNCE keys to a [`StreamConfig`] (resolution/packetSize required).
fn stream_config(map: &HashMap<String, String>) -> Option<StreamConfig> {
let parse_u = |k: &str| map.get(k).and_then(|s| s.trim().parse::<u32>().ok());
let width = parse_u("x-nv-video[0].clientViewportWd")?;
let height = parse_u("x-nv-video[0].clientViewportHt")?;
let packet_size = parse_u("x-nv-video[0].packetSize")? as usize;
let fps = parse_u("x-nv-video[0].maxFPS")
.filter(|&f| f > 0)
.unwrap_or(60);
let bitrate_kbps = parse_u("x-nv-vqos[0].bw.maximumBitrateKbps").unwrap_or(20_000);
let codec = match map.get("x-nv-vqos[0].bitStreamFormat").map(|s| s.trim()) {
Some("1") => Codec::H265,
Some("2") => Codec::Av1,
_ => Codec::H264,
};
// Parity floor the client asks for (protects small frames); clamp to a sane max.
let min_fec = parse_u("x-nv-vqos[0].fec.minRequiredFecPackets")
.unwrap_or(2)
.min(16) as u8;
Some(StreamConfig {
width,
height,
fps,
packet_size,
bitrate_kbps,
codec,
min_fec,
})
}
/// Extract the stream type from a SETUP URI like `…/streamid=video/0/0`.
fn stream_type(uri: &str) -> Option<&str> {
let after = uri.split("streamid=").nth(1)?;
let token = after.split('/').next()?;
match token {
"audio" | "video" | "control" => Some(token),
_ => None,
}
}
fn response(cseq: &str, headers: &[(&str, &str)], body: Option<&str>) -> String {
response_status("200 OK", cseq, headers, body)
}
fn response_status(
status: &str,
cseq: &str,
headers: &[(&str, &str)],
body: Option<&str>,
) -> String {
let body = body.unwrap_or("");
let mut out = format!("RTSP/1.0 {status}\r\nCSeq: {cseq}\r\n");
for (k, v) in headers {
out.push_str(&format!("{k}: {v}\r\n"));
}
out.push_str(&format!("Content-Length: {}\r\n\r\n", body.len()));
out.push_str(body);
out
}
fn find_subslice(hay: &[u8], needle: &[u8]) -> Option<usize> {
hay.windows(needle.len()).position(|w| w == needle)
}
fn header_value<'a>(head: &'a str, key_lower: &str) -> Option<&'a str> {
head.split("\r\n").find_map(|line| {
let (k, v) = line.split_once(':')?;
(k.trim().eq_ignore_ascii_case(key_lower)).then(|| v.trim_start())
})
}
@@ -0,0 +1,42 @@
//! The `/serverinfo` capability/status XML Moonlight GETs before pairing and each launch.
use super::{Host, APP_VERSION, GFE_VERSION, SERVER_CODEC_MODE_SUPPORT};
/// Build the `<root status_code="200">…</root>` serverinfo document. `https` selects the
/// paired-HTTPS variant (real MAC). Element names are case-sensitive and match what
/// moonlight-common-c parses.
pub fn serverinfo_xml(host: &Host, https: bool) -> String {
// MAC is hidden over plain HTTP; PairStatus reflects the pairing store once the HTTPS
// path carries per-client identity (a hardening follow-up — 0 for now).
let mac = if https {
"01:02:03:04:05:06"
} else {
"00:00:00:00:00:00"
};
// Over the mutual-TLS HTTPS port the peer is an authenticated (paired) client.
let pair_status = u8::from(https);
format!(
r#"<?xml version="1.0" encoding="utf-8"?>
<root status_code="200">
<hostname>{hostname}</hostname>
<appversion>{APP_VERSION}</appversion>
<GfeVersion>{GFE_VERSION}</GfeVersion>
<uniqueid>{uniqueid}</uniqueid>
<HttpsPort>{https_port}</HttpsPort>
<ExternalPort>{http_port}</ExternalPort>
<MaxLumaPixelsHEVC>1869449984</MaxLumaPixelsHEVC>
<mac>{mac}</mac>
<LocalIP>{local_ip}</LocalIP>
<ServerCodecModeSupport>{SERVER_CODEC_MODE_SUPPORT}</ServerCodecModeSupport>
<PairStatus>{pair_status}</PairStatus>
<currentgame>0</currentgame>
<state>SUNSHINE_SERVER_FREE</state>
</root>
"#,
hostname = host.hostname,
uniqueid = host.uniqueid,
https_port = host.https_port,
http_port = host.http_port,
local_ip = host.local_ip,
)
}
@@ -0,0 +1,478 @@
//! The video data plane: on RTSP PLAY, learn the client's UDP endpoint (it pings the video
//! port), then run capture → NVENC encode → [`VideoPacketizer`] → UDP send. The source is
//! either real portal desktop capture (`PUNKTFUNK_VIDEO_SOURCE=portal`, the M0 PipeWire path) or
//! a synthetic test pattern (default). Runs on its own native thread.
use super::video::{FrameType, VideoPacketizer};
use super::VIDEO_PORT;
use crate::capture::{self, Capturer, FastSyntheticCapturer};
use crate::encode::{self, Codec};
use anyhow::{Context, Result};
use rand::Rng;
use std::net::UdpSocket;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;
use std::time::{Duration, Instant};
/// Negotiated video parameters from the RTSP ANNOUNCE.
#[derive(Clone, Copy, Debug)]
pub struct StreamConfig {
pub width: u32,
pub height: u32,
pub fps: u32,
pub packet_size: usize,
pub bitrate_kbps: u32,
pub codec: Codec,
/// Client's `x-nv-vqos[0].fec.minRequiredFecPackets` — parity floor per FEC block.
pub min_fec: u8,
}
/// Slot for the persistent screen capturer, shared with the control plane and reused across
/// streams so a reconnect doesn't open a second (conflicting) screencast session.
pub type CapturerSlot = Arc<std::sync::Mutex<Option<Box<dyn Capturer>>>>;
/// Spawn the video stream thread (idempotent via `running`). Stops when `running` clears.
/// `force_idr` is set by the control stream on a client recovery request; `video_cap` holds
/// the persistent capturer the thread borrows for the stream's duration.
pub fn start(
cfg: StreamConfig,
app: Option<super::apps::AppEntry>,
running: Arc<AtomicBool>,
force_idr: Arc<AtomicBool>,
video_cap: CapturerSlot,
) {
let _ = std::thread::Builder::new()
.name("punktfunk-video".into())
.spawn(move || {
tracing::info!(?cfg, "video stream starting");
if let Err(e) = run(cfg, app.as_ref(), &running, &force_idr, &video_cap) {
tracing::error!(error = %format!("{e:#}"), "video stream failed");
}
running.store(false, Ordering::SeqCst);
tracing::info!("video stream stopped");
});
}
fn run(
cfg: StreamConfig,
app: Option<&super::apps::AppEntry>,
running: &Arc<AtomicBool>,
force_idr: &AtomicBool,
video_cap: &std::sync::Mutex<Option<Box<dyn Capturer>>>,
) -> Result<()> {
// Reject an out-of-range client mode before allocating capture/encode buffers.
encode::validate_dimensions(cfg.codec, cfg.width, cfg.height)
.context("client-requested video mode")?;
let sock = UdpSocket::bind(("0.0.0.0", VIDEO_PORT)).context("bind video UDP")?;
// The client pings the video port so we learn where to send; it re-pings until video
// flows, so a missed early ping is fine.
sock.set_read_timeout(Some(Duration::from_secs(10)))?;
tracing::info!(
port = VIDEO_PORT,
"video: awaiting client ping to learn endpoint"
);
let mut probe = [0u8; 256];
let (_, client) = sock
.recv_from(&mut probe)
.context("video: no client ping within 10s")?;
sock.connect(client)
.context("connect client video endpoint")?;
tracing::info!(%client, "video: client endpoint learned");
// Native client-resolution source: create a compositor virtual output sized to the client's
// request and capture it (no scaling). Self-contained — deliberately NOT pooled in
// `video_cap`, since a reconnect at a different resolution needs a freshly-sized output; the
// output is released when this capturer drops at stream end (RAII via its keepalive).
if std::env::var("PUNKTFUNK_VIDEO_SOURCE").as_deref() == Ok("virtual") {
// The launched app picks the compositor (e.g. gamescope for game entries) and the
// nested command; env vars remain manual overrides / fallbacks.
let compositor = app
.and_then(|a| a.compositor)
.map(Ok)
.unwrap_or_else(|| crate::vdisplay::detect().context("detect compositor"))?;
if let Some(cmd) = app.and_then(|a| a.cmd.as_deref()) {
// The gamescope backend reads the nested command from this env var; setting it
// per-launch is safe (one stream session at a time).
std::env::set_var("PUNKTFUNK_GAMESCOPE_APP", cmd);
}
tracing::info!(
?compositor,
app = ?app.map(|a| &a.title),
w = cfg.width,
h = cfg.height,
"video source: virtual display (native client resolution)"
);
let mut vd = crate::vdisplay::open(compositor).context("open virtual display")?;
let vout = vd
.create(punktfunk_core::Mode {
width: cfg.width,
height: cfg.height,
refresh_hz: cfg.fps,
})
.context("create virtual output at client resolution")?;
let mut capturer =
capture::capture_virtual_output(vout).context("capture virtual output")?;
capturer.set_active(true);
return stream_body(&mut *capturer, &sock, cfg, running, force_idr);
}
// Reuse the persistent capturer (one screencast session → clean reconnect); create it on
// the first stream. Borrow it for this stream and return it on exit.
let mut capturer: Box<dyn Capturer> = match video_cap.lock().unwrap().take() {
Some(c) => {
tracing::info!("video source: reusing capturer");
c
}
None if std::env::var("PUNKTFUNK_VIDEO_SOURCE").is_ok_and(|v| v == "portal") => {
tracing::info!("video source: portal desktop capture");
capture::open_portal_monitor().context("open portal capturer")?
}
None => {
tracing::info!("video source: synthetic test pattern");
Box::new(FastSyntheticCapturer::new(cfg.width, cfg.height))
}
};
capturer.set_active(true);
let result = stream_body(&mut *capturer, &sock, cfg, running, force_idr);
capturer.set_active(false);
*video_cap.lock().unwrap() = Some(capturer);
result
}
/// One frame's packets, handed from the encode thread to the send thread.
type PacketBatch = Vec<Vec<u8>>;
/// Send `pkts` with as few syscalls as possible (`sendmmsg`, up to 64 per call). The socket is
/// connected, so no per-message address. Returns an error on the first send failure.
#[cfg(target_os = "linux")]
fn sendmmsg_all(sock: &UdpSocket, pkts: &[Vec<u8>]) -> std::io::Result<()> {
use std::os::fd::AsRawFd;
const CHUNK: usize = 64;
let fd = sock.as_raw_fd();
for chunk in pkts.chunks(CHUNK) {
let mut iovs: Vec<libc::iovec> = chunk
.iter()
.map(|p| libc::iovec {
iov_base: p.as_ptr() as *mut libc::c_void,
iov_len: p.len(),
})
.collect();
let mut hdrs: Vec<libc::mmsghdr> = iovs
.iter_mut()
.map(|iov| {
let mut h: libc::mmsghdr = unsafe { std::mem::zeroed() };
h.msg_hdr.msg_iov = iov;
h.msg_hdr.msg_iovlen = 1;
h
})
.collect();
let mut off = 0usize;
while off < hdrs.len() {
let n = unsafe {
libc::sendmmsg(fd, hdrs[off..].as_mut_ptr(), (hdrs.len() - off) as u32, 0)
};
if n < 0 {
return Err(std::io::Error::last_os_error());
}
off += n as usize;
}
}
Ok(())
}
/// Portable fallback (non-Linux dev builds — GameStream hosting never ships there): one
/// syscall per packet.
#[cfg(not(target_os = "linux"))]
fn sendmmsg_all(sock: &UdpSocket, pkts: &[Vec<u8>]) -> std::io::Result<()> {
for p in pkts {
sock.send(p)?;
}
Ok(())
}
/// Dedicated send thread: one [`PacketBatch`] per frame arrives on `rx`; its packets go out in
/// `sendmmsg` chunks, paced so the frame's data spreads over ~3/4 of the frame interval
/// (microburst shaping at chunk granularity — a real link drops line-rate bursts; the encode
/// thread is never blocked by this). On send failure (client gone) it clears `running`.
fn spawn_sender(
sock: UdpSocket,
rx: std::sync::mpsc::Receiver<PacketBatch>,
frame_interval: Duration,
running: Arc<AtomicBool>,
drop_pct: u32,
) -> Result<()> {
std::thread::Builder::new()
.name("punktfunk-send".into())
.spawn(move || {
// Chunk pacing: 16 packets per burst, bursts spread across the send budget.
const PACE_CHUNK: usize = 16;
let budget = frame_interval.mul_f32(0.75);
let mut rng = rand::thread_rng();
let mut sent: u64 = 0;
let mut dropped: u64 = 0;
while let Ok(mut batch) = rx.recv() {
if drop_pct > 0 {
batch.retain(|_| {
let keep = rng.gen_range(0..100) >= drop_pct;
if !keep {
dropped += 1;
}
keep
});
}
let n = batch.len();
if n == 0 {
continue;
}
let per_chunk = budget.mul_f64((PACE_CHUNK as f64 / n as f64).min(1.0));
let start = Instant::now();
for (i, chunk) in batch.chunks(PACE_CHUNK).enumerate() {
if let Err(e) = sendmmsg_all(&sock, chunk) {
tracing::info!(error = %e, sent, "video: client unreachable — stopping stream");
running.store(false, Ordering::SeqCst);
return;
}
sent += chunk.len() as u64;
// Sleep toward the next chunk's deadline; skip sub-500µs sleeps (jitter).
let target = start + per_chunk.mul_f64((i + 1) as f64);
if let Some(ahead) = target.checked_duration_since(Instant::now()) {
if ahead >= Duration::from_micros(500) {
std::thread::sleep(ahead);
}
}
}
}
tracing::debug!(sent, dropped, "video sender exiting");
})
.context("spawn send thread")?;
Ok(())
}
/// The encode → packetize loop, over a borrowed capturer. Sending runs on a dedicated thread
/// (see [`spawn_sender`]) so a send spike can never stall capture/encode.
fn stream_body(
capturer: &mut dyn Capturer,
sock: &UdpSocket,
cfg: StreamConfig,
running: &Arc<AtomicBool>,
force_idr: &AtomicBool,
) -> Result<()> {
// The first frame establishes the authoritative size/format for the encoder.
let mut frame = capturer.next_frame().context("capture first frame")?;
if frame.width != cfg.width || frame.height != cfg.height {
tracing::warn!(
captured = ?(frame.width, frame.height),
negotiated = ?(cfg.width, cfg.height),
"captured size != negotiated size — Moonlight expects the negotiated size; resize the output"
);
}
let mut enc = encode::open_video(
cfg.codec,
frame.format,
frame.width,
frame.height,
cfg.fps,
cfg.bitrate_kbps as u64 * 1000,
frame.is_cuda(),
)
.context("open NVENC for stream")?;
// FEC overhead percent (Sunshine default 20). Override with PUNKTFUNK_FEC_PCT (0 = data-only).
let fec_pct: u8 = std::env::var("PUNKTFUNK_FEC_PCT")
.ok()
.and_then(|v| v.parse().ok())
.unwrap_or(20);
let mut pk = VideoPacketizer::new(cfg.packet_size, fec_pct, cfg.min_fec);
// Pace at the client's negotiated frame rate, re-encoding the last captured frame when the
// compositor produced no new one. Compositors only emit frames on damage, so a static or
// slow-updating desktop would otherwise starve the client into a "network too slow" abort.
// Re-encoding an unchanged frame is cheap — NVENC emits a near-empty P-frame. The upper
// bound just guards against an absurd client request (the encoder is opened at `cfg.fps`).
let target_fps = cfg.fps.clamp(1, 240);
let frame_interval = Duration::from_secs_f64(1.0 / target_fps as f64);
let mut fps_count: u32 = 0;
let mut fps_t = Instant::now();
let stream_start = Instant::now();
// Test knob: drop this % of outbound packets to exercise FEC recovery (0 = off).
let drop_pct: u32 = std::env::var("PUNKTFUNK_VIDEO_DROP")
.ok()
.and_then(|v| v.parse().ok())
.unwrap_or(0);
let mut sent_batches: u64 = 0;
let mut dropped_batches: u64 = 0;
// The send thread: one frame's batch at a time over a small bounded queue. Depth 2 means a
// slow send can buffer one frame while the next encodes; beyond that the NEWEST batch is
// dropped (the client recovers via FEC/RFI) rather than ever stalling the encode loop.
let (batch_tx, batch_rx) = std::sync::mpsc::sync_channel::<PacketBatch>(2);
spawn_sender(
sock.try_clone().context("clone video socket")?,
batch_rx,
Duration::from_secs_f64(1.0 / target_fps as f64),
running.clone(),
drop_pct,
)?;
// Per-stage timing (PUNKTFUNK_PERF=1): max µs/stage per second + unique vs re-encoded frames,
// to pinpoint stalls. `unique` counts genuinely-new captured frames (vs re-encoded holds).
let perf = std::env::var_os("PUNKTFUNK_PERF").is_some();
let (mut mx_cap, mut mx_enc, mut mx_pkt, mut mx_send, mut mx_pkts, mut uniq) =
(0u128, 0u128, 0u128, 0u128, 0usize, 0u32);
// Absolute next-frame deadline — the single pacing clock for the loop.
let mut next_frame = Instant::now();
while running.load(Ordering::SeqCst) {
let tick = Instant::now();
// Advance to the freshest captured frame if one arrived; otherwise reuse the last.
if let Some(f) = capturer.try_latest().context("capture frame")? {
frame = f;
uniq += 1;
}
let t_cap = tick.elapsed();
// Honor a client recovery request (RFI / request-IDR): force a keyframe so the client
// resyncs immediately instead of waiting for the next GOP boundary.
if force_idr.swap(false, Ordering::SeqCst) {
enc.request_keyframe();
}
enc.submit(&frame).context("encoder submit")?;
let t_enc = tick.elapsed();
// 90 kHz RTP timestamp from wall-clock, so a variable capture rate stays correct.
let ts = (stream_start.elapsed().as_secs_f64() * 90_000.0) as u32;
let mut batch: Vec<Vec<u8>> = Vec::new();
while let Some(au) = enc.poll().context("encoder poll")? {
let ft = if au.keyframe {
FrameType::Idr
} else {
FrameType::P
};
batch.extend(pk.packetize(&au.data, ft, ts));
}
let t_pkt = tick.elapsed();
// Hand the frame's packets to the send thread; never block here. A full queue means
// the sender is behind — drop this batch (FEC/RFI covers the client) and keep encoding.
let n = batch.len();
if n > 0 {
match batch_tx.try_send(batch) {
Ok(()) => sent_batches += 1,
Err(std::sync::mpsc::TrySendError::Full(_)) => {
dropped_batches += 1;
if dropped_batches.is_power_of_two() {
tracing::warn!(dropped_batches, "video: send queue full — frame dropped");
}
}
Err(std::sync::mpsc::TrySendError::Disconnected(_)) => {
break; // sender exited (client gone)
}
}
}
if perf {
let t_send = tick.elapsed();
mx_cap = mx_cap.max(t_cap.as_micros());
mx_enc = mx_enc.max((t_enc - t_cap).as_micros());
mx_pkt = mx_pkt.max((t_pkt - t_enc).as_micros());
mx_send = mx_send.max((t_send - t_pkt).as_micros());
mx_pkts = mx_pkts.max(n);
}
fps_count += 1;
if fps_t.elapsed() >= Duration::from_secs(1) {
if perf {
// Max µs/stage this second: cap=drain channel, enc=submit (zero-copy device
// copy + NVENC), pkt=poll+FEC+packetize, send=paced packet send. `uniq`=new
// captured frames (vs re-encoded). `pkts`=max packets in one frame (IDR spike).
tracing::info!(
fps = fps_count,
uniq,
enc_us = mx_enc,
pkt_us = mx_pkt,
send_us = mx_send,
cap_us = mx_cap,
max_pkts = mx_pkts,
"video: streaming (perf)"
);
mx_cap = 0;
mx_enc = 0;
mx_pkt = 0;
mx_send = 0;
mx_pkts = 0;
uniq = 0;
} else {
tracing::info!(
fps = fps_count,
sent_batches,
dropped_batches,
"video: streaming"
);
}
fps_count = 0;
fps_t = Instant::now();
}
// Single pacing authority: hold a steady cadence at the target rate from an absolute
// clock. No double-sleep. If a slow frame put us behind, resync to now rather than
// bursting to catch up.
next_frame += frame_interval;
match next_frame.checked_duration_since(Instant::now()) {
Some(d) => std::thread::sleep(d),
None => next_frame = Instant::now(),
}
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
/// End-to-end check of the send thread: batches pushed on the channel arrive, complete and
/// byte-identical, at a peer socket via the paced sendmmsg path.
#[test]
fn sender_delivers_batches() {
let rx_sock = UdpSocket::bind("127.0.0.1:0").unwrap();
rx_sock
.set_read_timeout(Some(Duration::from_secs(3)))
.unwrap();
let tx_sock = UdpSocket::bind("127.0.0.1:0").unwrap();
tx_sock.connect(rx_sock.local_addr().unwrap()).unwrap();
let running = Arc::new(AtomicBool::new(true));
let (tx, rx) = std::sync::mpsc::sync_channel::<PacketBatch>(2);
spawn_sender(
tx_sock,
rx,
Duration::from_millis(8), // ~120fps frame interval
running.clone(),
0,
)
.unwrap();
// 3 frames of 100 packets, content-tagged for verification.
let mut sent = Vec::new();
for f in 0..3u8 {
let batch: PacketBatch = (0..100u8)
.map(|i| {
let mut p = vec![0u8; 1200];
p[0] = f;
p[1] = i;
p
})
.collect();
sent.extend(batch.iter().cloned());
tx.send(batch).unwrap();
}
drop(tx); // sender drains then exits
let mut got = 0usize;
let mut buf = [0u8; 2048];
while got < sent.len() {
let n = rx_sock.recv(&mut buf).expect("packet within timeout");
assert_eq!(n, 1200);
let (f, i) = (buf[0] as usize, buf[1] as usize);
assert_eq!(&buf[..n], &sent[f * 100 + i][..], "payload intact");
got += 1;
}
assert_eq!(got, 300);
assert!(running.load(Ordering::SeqCst), "no spurious client-gone");
}
}
@@ -0,0 +1,99 @@
//! TLS for the HTTPS nvhttp port (47984). Moonlight does **mutual TLS** — it presents its
//! client cert and expects the server to request one — so a plain server-auth config makes
//! the post-pairing `pairchallenge` fail. This config requests the client cert and verifies
//! the client owns its key, but (for now) accepts any well-formed cert; enforcing the
//! paired allow-list (rejecting unpaired clients on /launch) is a follow-up hardening step.
use anyhow::{anyhow, Context, Result};
use rustls::client::danger::HandshakeSignatureValid;
use rustls::crypto::{verify_tls12_signature, verify_tls13_signature, CryptoProvider};
use rustls::pki_types::{CertificateDer, UnixTime};
use rustls::server::danger::{ClientCertVerified, ClientCertVerifier};
use rustls::{DigitallySignedStruct, DistinguishedName, ServerConfig, SignatureScheme};
use std::sync::Arc;
/// Requests + signature-checks the client cert but accepts any (the pairing handshake is
/// the real proof). Pinning to the paired set is a hardening follow-up.
#[derive(Debug)]
struct AcceptAnyClientCert {
provider: Arc<CryptoProvider>,
}
impl ClientCertVerifier for AcceptAnyClientCert {
fn offer_client_auth(&self) -> bool {
true
}
fn client_auth_mandatory(&self) -> bool {
true
}
fn root_hint_subjects(&self) -> &[DistinguishedName] {
&[]
}
fn verify_client_cert(
&self,
_end_entity: &CertificateDer,
_intermediates: &[CertificateDer],
_now: UnixTime,
) -> Result<ClientCertVerified, rustls::Error> {
Ok(ClientCertVerified::assertion())
}
fn verify_tls12_signature(
&self,
message: &[u8],
cert: &CertificateDer,
dss: &DigitallySignedStruct,
) -> Result<HandshakeSignatureValid, rustls::Error> {
verify_tls12_signature(
message,
cert,
dss,
&self.provider.signature_verification_algorithms,
)
}
fn verify_tls13_signature(
&self,
message: &[u8],
cert: &CertificateDer,
dss: &DigitallySignedStruct,
) -> Result<HandshakeSignatureValid, rustls::Error> {
verify_tls13_signature(
message,
cert,
dss,
&self.provider.signature_verification_algorithms,
)
}
fn supported_verify_schemes(&self) -> Vec<SignatureScheme> {
self.provider
.signature_verification_algorithms
.supported_schemes()
}
}
/// Build a mutual-TLS `ServerConfig` presenting the host cert/key.
pub fn server_config(cert_pem: &str, key_pem: &str) -> Result<Arc<ServerConfig>> {
let provider = Arc::new(rustls::crypto::aws_lc_rs::default_provider());
let certs = rustls_pemfile::certs(&mut cert_pem.as_bytes())
.collect::<std::result::Result<Vec<_>, _>>()
.context("parse host cert PEM")?;
let key = rustls_pemfile::private_key(&mut key_pem.as_bytes())
.context("parse host key PEM")?
.ok_or_else(|| anyhow!("no private key in host key PEM"))?;
let verifier = Arc::new(AcceptAnyClientCert {
provider: provider.clone(),
});
let config = ServerConfig::builder_with_provider(provider)
.with_safe_default_protocol_versions()
.context("rustls protocol versions")?
.with_client_cert_verifier(verifier)
.with_single_cert(certs, key)
.context("rustls server cert")?;
Ok(Arc::new(config))
}
@@ -0,0 +1,312 @@
//! GameStream video wire packetization: an encoded access unit → UDP datagrams a stock
//! Moonlight client decodes (and recovers under loss). Each datagram is
//! `RTP_PACKET(12, big-endian) + reserved[4] + NV_VIDEO_PACKET(16, little-endian) + payload`
//! and the frame's bitstream is prefixed with an 8-byte `video_short_frame_header_t`, then
//! striped into ≤4 FEC blocks of ≤255 shards. Byte-exact spec:
//! `docs/research/gamestream-protocol-research.json` (video plane).
//!
//! FEC (P1.5): each block carries `m = ⌈k·pct/100⌉` ReedSolomon parity shards generated by
//! `punktfunk_core::fec::Gf8Coder` (the nanors-compatible Cauchy GF(2⁸) coder). Crucially, RS runs
//! over the **whole `blocksize` shard** — Moonlight decodes over `packetSize + 16` bytes from
//! the datagram start (`RtpVideoQueue.c`), and rejects a recovered shard whose reconstructed
//! `flags` byte isn't valid — so the NV header fields RS must reproduce (streamPacketIndex,
//! frameIndex, flags, multiFec*) are written into the data shards **before** encoding, and only
//! the transport fields (RTP header/seq/timestamp + fecInfo) are stamped **after**, matching
//! Sunshine `stream.cpp`. `pct = 0` falls back to data-shards-only. Plaintext (AES-GCM video
//! encryption is negotiated off for now).
use punktfunk_core::fec::{ErasureCoder, Gf8Coder};
/// RTP `header` byte: version 2 (0x80) | extension (0x10) — Moonlight keys on the extension.
const RTP_HEADER_BYTE: u8 = 0x80 | 0x10;
const FLAG_PIC: u8 = 0x1;
const FLAG_EOF: u8 = 0x2;
const FLAG_SOF: u8 = 0x4;
const MULTI_FEC_FLAGS: u8 = 0x10;
const MAX_DATA_SHARDS_PER_BLOCK: usize = 255;
const MAX_FEC_BLOCKS: usize = 4;
/// Per-shard header: RTP(12) + reserved(4) + NV_VIDEO_PACKET(16).
const SHARD_HEADER: usize = 32;
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum FrameType {
Idr,
P,
}
/// Splits encoded access units into GameStream video datagrams (data + FEC parity shards).
pub struct VideoPacketizer {
/// Negotiated `packetSize` (ANNOUNCE `x-nv-video[0].packetSize`).
packet_size: usize,
/// Per-shard payload bytes = `blocksize - SHARD_HEADER`, `blocksize = packetSize + 16`.
payload_per_shard: usize,
/// Requested FEC overhead percent (0 = data shards only). The wire carries the recomputed
/// per-block `(100·m)/k` so Moonlight derives the same parity count.
fec_percentage: usize,
/// Minimum parity shards per block (the client's `fec.minRequiredFecPackets`) — protects
/// small frames whose `⌈k·pct/100⌉` would otherwise be just 1.
min_fec: usize,
frame_index: u32,
/// Monotonic per-stream packet counter (the RTP sequence / streamPacketIndex source).
seq: u32,
}
impl VideoPacketizer {
pub fn new(packet_size: usize, fec_percentage: u8, min_fec: u8) -> Self {
VideoPacketizer {
packet_size,
payload_per_shard: packet_size + 16 - SHARD_HEADER,
fec_percentage: fec_percentage as usize,
min_fec: min_fec as usize,
frame_index: 0,
seq: 0,
}
}
/// Packetize one encoded AU into wire datagrams (data shards + Cauchy RS parity shards).
pub fn packetize(
&mut self,
au: &[u8],
frame_type: FrameType,
timestamp_90k: u32,
) -> Vec<Vec<u8>> {
let frame_index = self.frame_index;
self.frame_index = self.frame_index.wrapping_add(1);
let pps = self.payload_per_shard;
let blocksize = SHARD_HEADER + pps; // = packet_size + 16
let pct = self.fec_percentage;
// frame payload = 8-byte short frame header + the AU bitstream.
let total_len = 8 + au.len();
let last_payload_len = match total_len % pps {
0 => pps,
r => r,
};
let mut fp = Vec::with_capacity(total_len);
fp.extend_from_slice(&short_frame_header(frame_type, last_payload_len as u16));
fp.extend_from_slice(au);
let total_data = total_len.div_ceil(pps).max(1);
// With parity, cap per-block data so k + m ≤ 255 (the GF(2⁸) ceiling): parity for k
// data shards is ⌈k·pct/100⌉, so k ≤ 255·100/(100+pct).
let max_data = if pct > 0 {
(255 * 100) / (100 + pct)
} else {
MAX_DATA_SHARDS_PER_BLOCK
};
let n_blocks = total_data.div_ceil(max_data).clamp(1, MAX_FEC_BLOCKS);
let per_block = total_data.div_ceil(n_blocks);
let mut packets = Vec::with_capacity(total_data + total_data * pct / 100 + n_blocks);
for b in 0..n_blocks {
let first = b * per_block;
let last = ((b + 1) * per_block).min(total_data);
if first >= last {
break;
}
let k = last - first;
let block_seq_base = self.seq;
let multi_fec_blocks = ((b as u8) << 4) | (((n_blocks - 1) as u8) << 6);
// 1. Build this block's k data-shard datagrams (full `blocksize`), writing the NV
// header fields RS must reproduce on recovery (streamPacketIndex, frameIndex,
// flags, multiFec*). The RTP header + fecInfo are left zero (stamped post-RS).
let mut shards: Vec<Vec<u8>> = Vec::with_capacity(k);
for i in 0..k {
let global = first + i;
let seq = block_seq_base + i as u32;
let mut buf = vec![0u8; blocksize];
let mut flags = FLAG_PIC;
if global == 0 {
flags |= FLAG_SOF;
}
if global == total_data - 1 {
flags |= FLAG_EOF;
}
buf[16..20].copy_from_slice(&(seq << 8).to_le_bytes()); // streamPacketIndex
buf[20..24].copy_from_slice(&frame_index.to_le_bytes()); // frameIndex
buf[24] = flags;
buf[26] = MULTI_FEC_FLAGS;
buf[27] = multi_fec_blocks;
let ps = global * pps;
let pe = (ps + pps).min(fp.len());
buf[SHARD_HEADER..SHARD_HEADER + (pe - ps)].copy_from_slice(&fp[ps..pe]);
shards.push(buf);
}
// 2. m = ⌈k·pct/100⌉ parity shards (floored at the client's min, capped so k+m≤255)
// over the full datagrams. The wire percentage is recomputed from m so the client
// derives the same count.
let m = if pct > 0 {
(k * pct).div_ceil(100).max(self.min_fec).min(255 - k)
} else {
0
};
let wire_pct = if m > 0 { (100 * m) / k } else { 0 };
let parity = if m > 0 {
Gf8Coder.encode(&shards, m).unwrap_or_default()
} else {
Vec::new()
};
// 3. Stamp transport headers (RTP + fecInfo) on every shard. We do NOT touch the
// flags/streamPacketIndex bytes, so a recovered data shard's RS-reconstructed
// NV header stays valid.
self.seq = block_seq_base + k as u32;
for (i, mut buf) in shards.into_iter().enumerate() {
let seq = block_seq_base + i as u32;
finalize(
&mut buf,
seq,
timestamp_90k,
frame_index,
multi_fec_blocks,
fec_info(k, i, wire_pct),
);
packets.push(buf);
}
for (j, mut buf) in parity.into_iter().enumerate() {
let seq = self.seq;
self.seq = self.seq.wrapping_add(1);
finalize(
&mut buf,
seq,
timestamp_90k,
frame_index,
multi_fec_blocks,
fec_info(k, k + j, wire_pct),
);
packets.push(buf);
}
}
packets
}
}
/// `fecInfo` (u32, little-endian): `dataShards<<22 | fecIndex<<12 | fecPercentage<<4`.
fn fec_info(k: usize, fec_index: usize, pct: usize) -> u32 {
((k as u32) << 22) | ((fec_index as u32) << 12) | ((pct as u32) << 4)
}
/// Stamp the post-RS transport fields into a shard datagram (in place). Leaves the NV
/// `flags`/`streamPacketIndex`/`multiFecFlags` bytes untouched (RS-covered).
fn finalize(
buf: &mut [u8],
seq: u32,
ts_90k: u32,
frame_index: u32,
multi_fec_blocks: u8,
fec_info: u32,
) {
buf[0] = RTP_HEADER_BYTE; // header (version 2 + extension)
buf[2..4].copy_from_slice(&(seq as u16).to_be_bytes()); // sequenceNumber (BE)
buf[4..8].copy_from_slice(&ts_90k.to_be_bytes()); // timestamp (90 kHz, BE)
buf[20..24].copy_from_slice(&frame_index.to_le_bytes()); // frameIndex (re-affirm for parity)
buf[27] = multi_fec_blocks; // re-affirm for parity
buf[28..32].copy_from_slice(&fec_info.to_le_bytes()); // fecInfo (LE)
}
/// 8-byte `video_short_frame_header_t` (little-endian), prefixed to the AU bitstream.
fn short_frame_header(frame_type: FrameType, last_payload_len: u16) -> [u8; 8] {
let mut h = [0u8; 8];
h[0] = 0x01; // headerType
h[1..3].copy_from_slice(&0u16.to_le_bytes()); // frame_processing_latency
h[3] = match frame_type {
FrameType::Idr => 2,
FrameType::P => 1,
};
h[4..6].copy_from_slice(&last_payload_len.to_le_bytes());
// h[6..8] unknown = 0
h
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn single_block_layout() {
let mut pk = VideoPacketizer::new(1392, 0, 0); // data-only; pps = 1392+16-32 = 1376
assert_eq!(pk.payload_per_shard, 1376);
let au = vec![0xABu8; 4000]; // 8+4000 = 4008 → ceil(4008/1376) = 3 data shards
let pkts = pk.packetize(&au, FrameType::Idr, 90_000);
assert_eq!(pkts.len(), 3);
for p in &pkts {
assert_eq!(p.len(), SHARD_HEADER + 1376);
assert_eq!(p[0], 0x90); // RTP header byte
}
let first = &pkts[0];
assert_eq!(first[24] & FLAG_SOF, FLAG_SOF);
assert_eq!(first[24] & FLAG_PIC, FLAG_PIC);
let frame_index = u32::from_le_bytes(first[20..24].try_into().unwrap());
assert_eq!(frame_index, 0);
let fec_info = u32::from_le_bytes(first[28..32].try_into().unwrap());
assert_eq!(fec_info >> 22, 3); // dataShards = 3
assert_eq!((fec_info >> 12) & 0x3ff, 0); // fecIndex 0
let last = &pkts[2];
assert_eq!(last[24] & FLAG_EOF, FLAG_EOF);
let fec_info_last = u32::from_le_bytes(last[28..32].try_into().unwrap());
assert_eq!((fec_info_last >> 12) & 0x3ff, 2);
for (i, p) in pkts.iter().enumerate() {
assert_eq!(u16::from_be_bytes(p[2..4].try_into().unwrap()), i as u16);
}
}
#[test]
fn multi_block_split() {
let mut pk = VideoPacketizer::new(1392, 0, 0); // data-only
let au = vec![0u8; 600_000];
let pkts = pk.packetize(&au, FrameType::P, 0);
let total = (8 + au.len()).div_ceil(1376);
assert_eq!(pkts.len(), total);
let n_blocks = total.div_ceil(255).clamp(1, 4);
let last_block = ((pkts.last().unwrap()[27]) >> 6) & 0x3;
assert_eq!(last_block as usize, n_blocks - 1);
}
#[test]
fn emits_parity_shards() {
let mut pk = VideoPacketizer::new(1392, 20, 0); // pps = 1376, 20% FEC
let au = vec![0xABu8; 4000]; // 8+4000 = 4008 → 3 data shards (k=3)
let pkts = pk.packetize(&au, FrameType::Idr, 0);
// m = ceil(3*20/100) = 1 parity shard → 4 packets; wire_pct = 100*1/3 = 33.
assert_eq!(pkts.len(), 4);
for p in &pkts {
let fec_info = u32::from_le_bytes(p[28..32].try_into().unwrap());
assert_eq!(fec_info >> 22, 3); // dataShards = k = 3
assert_eq!((fec_info >> 4) & 0xff, 33); // wire fecPercentage
}
// The parity shard is last: fecIndex = k = 3.
let parity = &pkts[3];
let fec_info = u32::from_le_bytes(parity[28..32].try_into().unwrap());
assert_eq!((fec_info >> 12) & 0x3ff, 3);
// Data shards keep SOF (first) / EOF (last data shard) / PIC.
assert_eq!(pkts[0][24] & FLAG_SOF, FLAG_SOF);
assert_eq!(pkts[2][24] & FLAG_EOF, FLAG_EOF);
// RTP sequence numbers are contiguous across data + parity (0,1,2,3).
for (i, p) in pkts.iter().enumerate() {
assert_eq!(u16::from_be_bytes(p[2..4].try_into().unwrap()), i as u16);
}
}
/// End-to-end recovery: parity over the full datagram reconstructs a dropped data shard's
/// payload AND its NV `flags` byte (the byte Moonlight validates), proving the layout.
#[test]
fn parity_recovers_full_datagram_incl_flags() {
let mut pk = VideoPacketizer::new(1392, 50, 0); // high pct → plenty of parity
let au = vec![0x5Au8; 4000]; // k = 3
let pkts = pk.packetize(&au, FrameType::Idr, 0);
let k = 3usize;
let m = pkts.len() - k;
assert!(m >= 1);
// Drop data shard 1; reconstruct from the rest via the same Cauchy coder.
let mut received: Vec<Option<Vec<u8>>> = pkts.iter().map(|p| Some(p.clone())).collect();
received[1] = None;
let recovered = Gf8Coder.reconstruct(k, m, &mut received).unwrap();
// The recovered shard equals the original data shard's RS-covered bytes: its flags
// byte (offset 24) is PIC (middle shard), proving the NV header recovers correctly.
assert_eq!(recovered[1][24], FLAG_PIC);
// ...and the payload region matches the original.
assert_eq!(recovered[1][SHARD_HEADER..], pkts[1][SHARD_HEADER..]);
}
}
+274
View File
@@ -0,0 +1,274 @@
//! Input injection (plan §4): turn client [`punktfunk_core::input::InputEvent`]s into host input.
//!
//! The headless Sway compositor runs with `WLR_LIBINPUT_NO_DEVICES=1`, so kernel `uinput`
//! devices are never picked up. Instead we inject through the wlroots virtual-input Wayland
//! protocols — `zwlr_virtual_pointer_manager_v1` + `zwp_virtual_keyboard_manager_v1` — which
//! Sway always advertises. We connect as an ordinary Wayland client (the host process
//! inherits Sway's `WAYLAND_DISPLAY`/`XDG_RUNTIME_DIR`), bind the two managers, and translate
//! events into virtual pointer/keyboard requests. Keyboard codes are Linux evdev; we upload a
//! standard evdev/US xkb keymap and track modifier state so the compositor resolves shifted
//! keysyms correctly.
use anyhow::Result;
use punktfunk_core::input::InputEvent;
/// Injects input events into the host session. Not `Send`: an injector owns compositor
/// resources (a Wayland connection, an xkb state) and lives entirely on the control thread
/// that creates it.
pub trait InputInjector {
fn inject(&mut self, event: &InputEvent) -> Result<()>;
}
/// Preferred injection backend.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum Backend {
/// wlroots virtual pointer + keyboard Wayland protocols — the headless-Sway path.
WlrVirtual,
/// libei via `reis` — Wayland-native (RemoteDesktop portal). Not yet implemented.
Libei,
/// libei directly against gamescope's own EIS socket (no portal): input lands in the
/// nested game — the SteamOS-like session.
GamescopeEi,
/// `/dev/uinput` — universal fallback (but invisible to `WLR_LIBINPUT_NO_DEVICES=1`).
Uinput,
}
pub fn open(backend: Backend) -> Result<Box<dyn InputInjector>> {
match backend {
Backend::WlrVirtual => {
#[cfg(target_os = "linux")]
{
Ok(Box::new(wlr::WlrootsInjector::open()?))
}
#[cfg(not(target_os = "linux"))]
{
anyhow::bail!("wlroots virtual input requires Linux + a Wayland compositor")
}
}
Backend::Libei => {
#[cfg(target_os = "linux")]
{
Ok(Box::new(libei::LibeiInjector::open()?))
}
#[cfg(not(target_os = "linux"))]
{
anyhow::bail!("libei input requires Linux + a RemoteDesktop portal")
}
}
Backend::GamescopeEi => {
#[cfg(target_os = "linux")]
{
Ok(Box::new(libei::LibeiInjector::open_with(
libei::EiSource::SocketPathFile(
crate::vdisplay::gamescope_ei_socket_file().into(),
),
)?))
}
#[cfg(not(target_os = "linux"))]
{
anyhow::bail!("gamescope EIS input requires Linux")
}
}
other => anyhow::bail!("injection backend {other:?} not implemented"),
}
}
/// Pick the injection backend for the current session. gamescope hosts its own EIS server (no
/// portal), so a gamescope session injects directly into it. wlroots/Sway only implements the
/// ScreenCast portal (no RemoteDesktop), so libei can't run there — use the wlr virtual-input
/// protocols. KWin and GNOME implement RemoteDesktop but not the wlr protocols, so use libei.
/// `PUNKTFUNK_INPUT_BACKEND=wlr|libei|gamescope|uinput` overrides the auto-detection.
pub fn default_backend() -> Backend {
if let Ok(v) = std::env::var("PUNKTFUNK_INPUT_BACKEND") {
match v.trim().to_ascii_lowercase().as_str() {
"wlr" | "wlroots" | "wlrvirtual" => return Backend::WlrVirtual,
"libei" | "ei" | "portal" => return Backend::Libei,
"gamescope" | "gamescope-ei" => return Backend::GamescopeEi,
"uinput" => return Backend::Uinput,
other => tracing::warn!(
value = other,
"unknown PUNKTFUNK_INPUT_BACKEND — auto-detecting"
),
}
}
if std::env::var("PUNKTFUNK_COMPOSITOR")
.is_ok_and(|v| v.trim().eq_ignore_ascii_case("gamescope"))
{
return Backend::GamescopeEi;
}
let desktop = std::env::var("XDG_CURRENT_DESKTOP").unwrap_or_default();
let d = desktop.to_ascii_uppercase();
if d.contains("KDE") || d.contains("GNOME") {
Backend::Libei
} else {
Backend::WlrVirtual
}
}
/// Map a Windows Virtual-Key code (as sent by Moonlight/GameStream) to a Linux evdev key code.
pub fn vk_to_evdev(vk: u8) -> Option<u16> {
match vk {
// --- Navigation / editing / whitespace ---
0x08 => Some(14), // VK_BACK -> KEY_BACKSPACE
0x09 => Some(15), // VK_TAB -> KEY_TAB
0x0D => Some(28), // VK_RETURN -> KEY_ENTER
0x13 => Some(119), // VK_PAUSE -> KEY_PAUSE
0x14 => Some(58), // VK_CAPITAL -> KEY_CAPSLOCK
0x1B => Some(1), // VK_ESCAPE -> KEY_ESC
0x20 => Some(57), // VK_SPACE -> KEY_SPACE
0x21 => Some(104), // VK_PRIOR -> KEY_PAGEUP
0x22 => Some(109), // VK_NEXT -> KEY_PAGEDOWN
0x23 => Some(107), // VK_END -> KEY_END
0x24 => Some(102), // VK_HOME -> KEY_HOME
0x25 => Some(105), // VK_LEFT -> KEY_LEFT
0x26 => Some(103), // VK_UP -> KEY_UP
0x27 => Some(106), // VK_RIGHT -> KEY_RIGHT
0x28 => Some(108), // VK_DOWN -> KEY_DOWN
0x2C => Some(99), // VK_SNAPSHOT -> KEY_SYSRQ
0x2D => Some(110), // VK_INSERT -> KEY_INSERT
0x2E => Some(111), // VK_DELETE -> KEY_DELETE
// --- Generic modifiers ---
0x10 => Some(42), // VK_SHIFT -> KEY_LEFTSHIFT
0x11 => Some(29), // VK_CONTROL -> KEY_LEFTCTRL
0x12 => Some(56), // VK_MENU -> KEY_LEFTALT
// --- Digit row (KEY_0 is 11, KEY_1..KEY_9 are 2..10) ---
0x30 => Some(11), // VK_0
0x31 => Some(2), // VK_1
0x32 => Some(3), // VK_2
0x33 => Some(4), // VK_3
0x34 => Some(5), // VK_4
0x35 => Some(6), // VK_5
0x36 => Some(7), // VK_6
0x37 => Some(8), // VK_7
0x38 => Some(9), // VK_8
0x39 => Some(10), // VK_9
// --- Letters A-Z (NOT sequential in evdev) ---
0x41 => Some(30), // A
0x42 => Some(48), // B
0x43 => Some(46), // C
0x44 => Some(32), // D
0x45 => Some(18), // E
0x46 => Some(33), // F
0x47 => Some(34), // G
0x48 => Some(35), // H
0x49 => Some(23), // I
0x4A => Some(36), // J
0x4B => Some(37), // K
0x4C => Some(38), // L
0x4D => Some(50), // M
0x4E => Some(49), // N
0x4F => Some(24), // O
0x50 => Some(25), // P
0x51 => Some(16), // Q
0x52 => Some(19), // R
0x53 => Some(31), // S
0x54 => Some(20), // T
0x55 => Some(22), // U
0x56 => Some(47), // V
0x57 => Some(17), // W
0x58 => Some(45), // X
0x59 => Some(21), // Y
0x5A => Some(44), // Z
// --- Meta / context-menu ---
0x5B => Some(125), // VK_LWIN -> KEY_LEFTMETA
0x5C => Some(126), // VK_RWIN -> KEY_RIGHTMETA
0x5D => Some(127), // VK_APPS -> KEY_COMPOSE
// --- Numpad ---
0x60 => Some(82), // KP0
0x61 => Some(79), // KP1
0x62 => Some(80), // KP2
0x63 => Some(81), // KP3
0x64 => Some(75), // KP4
0x65 => Some(76), // KP5
0x66 => Some(77), // KP6
0x67 => Some(71), // KP7
0x68 => Some(72), // KP8
0x69 => Some(73), // KP9
0x6A => Some(55), // VK_MULTIPLY -> KEY_KPASTERISK
0x6B => Some(78), // VK_ADD -> KEY_KPPLUS
0x6C => Some(96), // VK_SEPARATOR -> KEY_KPENTER
0x6D => Some(74), // VK_SUBTRACT -> KEY_KPMINUS
0x6E => Some(83), // VK_DECIMAL -> KEY_KPDOT
0x6F => Some(98), // VK_DIVIDE -> KEY_KPSLASH
// --- Function keys (F1..F10 = 59..68, F11/F12 = 87/88) ---
0x70 => Some(59),
0x71 => Some(60),
0x72 => Some(61),
0x73 => Some(62),
0x74 => Some(63),
0x75 => Some(64),
0x76 => Some(65),
0x77 => Some(66),
0x78 => Some(67),
0x79 => Some(68),
0x7A => Some(87),
0x7B => Some(88),
// --- Locks ---
0x90 => Some(69), // VK_NUMLOCK -> KEY_NUMLOCK
0x91 => Some(70), // VK_SCROLL -> KEY_SCROLLLOCK
// --- Left/right modifiers ---
0xA0 => Some(42), // VK_LSHIFT -> KEY_LEFTSHIFT
0xA1 => Some(54), // VK_RSHIFT -> KEY_RIGHTSHIFT
0xA2 => Some(29), // VK_LCONTROL -> KEY_LEFTCTRL
0xA3 => Some(97), // VK_RCONTROL -> KEY_RIGHTCTRL
0xA4 => Some(56), // VK_LMENU -> KEY_LEFTALT
0xA5 => Some(100), // VK_RMENU -> KEY_RIGHTALT
// --- OEM punctuation (US layout) ---
0xBA => Some(39), // VK_OEM_1 -> KEY_SEMICOLON
0xBB => Some(13), // VK_OEM_PLUS -> KEY_EQUAL
0xBC => Some(51), // VK_OEM_COMMA -> KEY_COMMA
0xBD => Some(12), // VK_OEM_MINUS -> KEY_MINUS
0xBE => Some(52), // VK_OEM_PERIOD -> KEY_DOT
0xBF => Some(53), // VK_OEM_2 -> KEY_SLASH
0xC0 => Some(41), // VK_OEM_3 -> KEY_GRAVE
0xDB => Some(26), // VK_OEM_4 -> KEY_LEFTBRACE
0xDC => Some(43), // VK_OEM_5 -> KEY_BACKSLASH
0xDD => Some(27), // VK_OEM_6 -> KEY_RIGHTBRACE
0xDE => Some(40), // VK_OEM_7 -> KEY_APOSTROPHE
0xE2 => Some(86), // VK_OEM_102 -> KEY_102ND
_ => None,
}
}
/// Map a GameStream mouse button id (1=left … 5=X2) to a Linux evdev `BTN_*` code.
#[cfg(target_os = "linux")]
fn gs_button_to_evdev(b: u32) -> Option<u32> {
Some(match b {
1 => 0x110, // BTN_LEFT
2 => 0x112, // BTN_MIDDLE
3 => 0x111, // BTN_RIGHT
4 => 0x113, // BTN_SIDE (X1)
5 => 0x114, // BTN_EXTRA (X2)
_ => return None,
})
}
#[cfg(target_os = "linux")]
pub mod gamepad;
/// Stub — virtual gamepads need Linux uinput; events are dropped elsewhere.
#[cfg(not(target_os = "linux"))]
pub mod gamepad {
#[derive(Default)]
pub struct GamepadManager;
impl GamepadManager {
pub fn new() -> Self {
GamepadManager
}
pub fn handle(&mut self, _ev: &crate::gamestream::gamepad::GamepadEvent) {}
pub fn pump_rumble(&mut self, _send: impl FnMut(u16, u16, u16)) {}
}
}
#[cfg(target_os = "linux")]
mod libei;
#[cfg(target_os = "linux")]
mod wlr;
+515
View File
@@ -0,0 +1,515 @@
//! Virtual gamepads via `/dev/uinput`, cloning the kernel `xpad` identity ("Microsoft X-Box
//! 360 pad", `045e:028e`) so SDL/Steam/Proton match their built-in mapping with zero
//! configuration — exactly what Sunshine emulates. One [`VirtualPad`] per attached client
//! controller, managed by [`GamepadManager`] from decoded
//! [`GamepadFrame`](crate::gamestream::gamepad::GamepadFrame)s.
//!
//! Rumble flows the *other* way on the same fd: games upload force-feedback effects
//! (`EV_UINPUT`/`UI_FF_UPLOAD` → `UI_BEGIN/END_FF_UPLOAD` ioctls) and trigger them with
//! `EV_FF` writes; [`GamepadManager::pump_rumble`] services that protocol non-blockingly
//! (the control thread calls it every tick) and reports mixed `(low, high)` motor levels for
//! the host to send to the client. Note: a game's `EVIOCSFF` ioctl BLOCKS until we answer
//! `UI_END_FF_UPLOAD`, so the pump must run regularly.
//!
//! All ioctl numbers/struct layouts below were verified against this generation's
//! `<linux/uinput.h>` on x86_64. `/dev/uinput` needs a udev rule + `input` group membership
//! (see `scripts/60-punktfunk.rules`); creation fails with a clear error otherwise.
use crate::gamestream::gamepad::{self, GamepadFrame, MAX_PADS};
use anyhow::{bail, Result};
use std::collections::HashMap;
use std::os::fd::{AsRawFd, OwnedFd};
use std::time::Instant;
// ioctls (x86_64).
const UI_DEV_CREATE: libc::c_ulong = 0x5501;
const UI_DEV_DESTROY: libc::c_ulong = 0x5502;
const UI_DEV_SETUP: libc::c_ulong = 0x405c_5503;
const UI_ABS_SETUP: libc::c_ulong = 0x401c_5504;
const UI_SET_EVBIT: libc::c_ulong = 0x4004_5564;
const UI_SET_KEYBIT: libc::c_ulong = 0x4004_5565;
const UI_SET_FFBIT: libc::c_ulong = 0x4004_556b;
const UI_BEGIN_FF_UPLOAD: libc::c_ulong = 0xc068_55c8;
const UI_END_FF_UPLOAD: libc::c_ulong = 0x4068_55c9;
const UI_BEGIN_FF_ERASE: libc::c_ulong = 0xc00c_55ca;
const UI_END_FF_ERASE: libc::c_ulong = 0x400c_55cb;
// Event types/codes.
const EV_SYN: u16 = 0x00;
const EV_KEY: u16 = 0x01;
const EV_ABS: u16 = 0x03;
const EV_FF: u16 = 0x15;
const EV_UINPUT: u16 = 0x0101;
const SYN_REPORT: u16 = 0;
const UI_FF_UPLOAD: u16 = 1;
const UI_FF_ERASE: u16 = 2;
const FF_RUMBLE: u16 = 0x50;
const FF_GAIN: u16 = 0x60;
const ABS_X: u16 = 0x00;
const ABS_Y: u16 = 0x01;
const ABS_Z: u16 = 0x02;
const ABS_RX: u16 = 0x03;
const ABS_RY: u16 = 0x04;
const ABS_RZ: u16 = 0x05;
const ABS_HAT0X: u16 = 0x10;
const ABS_HAT0Y: u16 = 0x11;
const BTN_SOUTH: u16 = 0x130; // A
const BTN_EAST: u16 = 0x131; // B
const BTN_NORTH: u16 = 0x133; // X (kernel calls it BTN_NORTH/BTN_X)
const BTN_WEST: u16 = 0x134; // Y
const BTN_TL: u16 = 0x136;
const BTN_TR: u16 = 0x137;
const BTN_SELECT: u16 = 0x13a;
const BTN_START: u16 = 0x13b;
const BTN_MODE: u16 = 0x13c;
const BTN_THUMBL: u16 = 0x13d;
const BTN_THUMBR: u16 = 0x13e;
/// `(GameStream button bit, evdev key code)` — D-pad is emitted as HAT axes instead.
const BUTTON_MAP: [(u32, u16); 11] = [
(gamepad::BTN_A, BTN_SOUTH),
(gamepad::BTN_B, BTN_EAST),
(gamepad::BTN_X, BTN_NORTH),
(gamepad::BTN_Y, BTN_WEST),
(gamepad::BTN_LB, BTN_TL),
(gamepad::BTN_RB, BTN_TR),
(gamepad::BTN_BACK, BTN_SELECT),
(gamepad::BTN_START, BTN_START),
(gamepad::BTN_GUIDE, BTN_MODE),
(gamepad::BTN_LS_CLK, BTN_THUMBL),
(gamepad::BTN_RS_CLK, BTN_THUMBR),
];
#[repr(C)]
struct InputId {
bustype: u16,
vendor: u16,
product: u16,
version: u16,
}
#[repr(C)]
struct UinputSetup {
id: InputId,
name: [u8; 80],
ff_effects_max: u32,
}
#[repr(C)]
#[derive(Default, Clone, Copy)]
struct AbsInfo {
value: i32,
minimum: i32,
maximum: i32,
fuzz: i32,
flat: i32,
resolution: i32,
}
#[repr(C)]
struct UinputAbsSetup {
code: u16,
_pad: u16,
absinfo: AbsInfo,
}
#[repr(C)]
#[derive(Clone, Copy)]
struct InputEventRaw {
time: libc::timeval,
type_: u16,
code: u16,
value: i32,
}
/// `struct ff_effect` (48 bytes; the union starts 8-aligned at offset 16).
#[repr(C)]
#[derive(Clone, Copy)]
struct FfEffect {
type_: u16,
id: i16,
direction: u16,
trigger_button: u16,
trigger_interval: u16,
replay_length: u16,
replay_delay: u16,
_pad: u16,
/// Union; for `FF_RUMBLE`: `u16 strong_magnitude` at [0..2], `u16 weak_magnitude` at [2..4].
u: [u8; 32],
}
#[repr(C)]
#[derive(Clone, Copy)]
struct UinputFfUpload {
request_id: u32,
retval: i32,
effect: FfEffect,
old: FfEffect,
}
#[repr(C)]
#[derive(Clone, Copy)]
struct UinputFfErase {
request_id: u32,
retval: i32,
effect_id: u32,
}
// Layouts verified by compiling a probe against this generation's <linux/uinput.h> (x86_64).
const _: () = {
assert!(std::mem::size_of::<UinputSetup>() == 92);
assert!(std::mem::size_of::<UinputAbsSetup>() == 28);
assert!(std::mem::size_of::<InputEventRaw>() == 24);
assert!(std::mem::size_of::<FfEffect>() == 48);
assert!(std::mem::size_of::<UinputFfUpload>() == 104);
assert!(std::mem::size_of::<UinputFfErase>() == 12);
};
fn ioctl_int(fd: i32, req: libc::c_ulong, arg: libc::c_int, what: &str) -> Result<()> {
if unsafe { libc::ioctl(fd, req, arg) } < 0 {
bail!("{what}: {}", std::io::Error::last_os_error());
}
Ok(())
}
fn ioctl_ptr<T>(fd: i32, req: libc::c_ulong, arg: *mut T, what: &str) -> Result<()> {
if unsafe { libc::ioctl(fd, req, arg) } < 0 {
bail!("{what}: {}", std::io::Error::last_os_error());
}
Ok(())
}
/// One FF effect a game uploaded: rumble magnitudes + playback state.
struct Effect {
strong: u16,
weak: u16,
/// `Some(deadline)` while playing (replay length 0 = until stopped).
playing: Option<Option<Instant>>,
replay_ms: u16,
}
/// One virtual X-Box-360 pad backed by a uinput device.
pub struct VirtualPad {
fd: OwnedFd,
prev_buttons: u32,
effects: HashMap<i16, Effect>,
next_effect_id: i16,
gain: u32,
/// Last `(low, high)` reported, to dedup.
last_mix: (u16, u16),
}
impl VirtualPad {
pub fn create(index: usize) -> Result<VirtualPad> {
use std::os::fd::FromRawFd;
let raw = unsafe {
libc::open(
c"/dev/uinput".as_ptr(),
libc::O_RDWR | libc::O_NONBLOCK | libc::O_CLOEXEC,
)
};
if raw < 0 {
bail!(
"open /dev/uinput: {} (install the udev rule granting the 'input' group access \
— see scripts/60-punktfunk.rules — and add the user to the 'input' group)",
std::io::Error::last_os_error()
);
}
let fd = unsafe { OwnedFd::from_raw_fd(raw) };
ioctl_int(raw, UI_SET_EVBIT, EV_KEY as i32, "UI_SET_EVBIT(EV_KEY)")?;
ioctl_int(raw, UI_SET_EVBIT, EV_ABS as i32, "UI_SET_EVBIT(EV_ABS)")?;
ioctl_int(raw, UI_SET_EVBIT, EV_FF as i32, "UI_SET_EVBIT(EV_FF)")?;
for (_, key) in BUTTON_MAP {
ioctl_int(raw, UI_SET_KEYBIT, key as i32, "UI_SET_KEYBIT")?;
}
ioctl_int(
raw,
UI_SET_FFBIT,
FF_RUMBLE as i32,
"UI_SET_FFBIT(FF_RUMBLE)",
)?;
ioctl_int(raw, UI_SET_FFBIT, FF_GAIN as i32, "UI_SET_FFBIT(FF_GAIN)")?;
let stick = AbsInfo {
minimum: -32768,
maximum: 32767,
fuzz: 16,
flat: 128,
..Default::default()
};
let trigger = AbsInfo {
minimum: 0,
maximum: 255,
..Default::default()
};
let hat = AbsInfo {
minimum: -1,
maximum: 1,
..Default::default()
};
for (code, info) in [
(ABS_X, stick),
(ABS_Y, stick),
(ABS_RX, stick),
(ABS_RY, stick),
(ABS_Z, trigger),
(ABS_RZ, trigger),
(ABS_HAT0X, hat),
(ABS_HAT0Y, hat),
] {
let mut a = UinputAbsSetup {
code,
_pad: 0,
absinfo: info,
};
ioctl_ptr(raw, UI_ABS_SETUP, &mut a, "UI_ABS_SETUP")?;
}
// The xpad identity: SDL keys its built-in mapping off bustype/vendor/product/version.
let mut setup = UinputSetup {
id: InputId {
bustype: 0x0003, // BUS_USB
vendor: 0x045e,
product: 0x028e,
version: 0x0110,
},
name: [0; 80],
ff_effects_max: 16, // must be > 0 or FF uploads are never delivered
};
let name = b"Microsoft X-Box 360 pad";
setup.name[..name.len()].copy_from_slice(name);
ioctl_ptr(raw, UI_DEV_SETUP, &mut setup, "UI_DEV_SETUP")?;
ioctl_int(raw, UI_DEV_CREATE, 0, "UI_DEV_CREATE")?;
tracing::info!(index, "virtual gamepad created (X-Box 360 pad via uinput)");
Ok(VirtualPad {
fd,
prev_buttons: 0,
effects: HashMap::new(),
next_effect_id: 0,
gain: 0xFFFF,
last_mix: (0, 0),
})
}
fn emit(&self, type_: u16, code: u16, value: i32) {
let ev = InputEventRaw {
time: libc::timeval {
tv_sec: 0,
tv_usec: 0,
},
type_,
code,
value,
};
let bytes = unsafe {
std::slice::from_raw_parts(
&ev as *const _ as *const u8,
std::mem::size_of::<InputEventRaw>(),
)
};
// Best-effort: a full kernel queue drops the event; the next frame re-syncs state.
let _ = unsafe {
libc::write(
self.fd.as_raw_fd(),
bytes.as_ptr() as *const libc::c_void,
bytes.len(),
)
};
}
/// Apply one decoded frame: button transitions, axes, D-pad hat, one SYN_REPORT.
pub fn apply(&mut self, f: &GamepadFrame) {
let changed = self.prev_buttons ^ f.buttons;
for (bit, key) in BUTTON_MAP {
if changed & bit != 0 {
self.emit(EV_KEY, key, ((f.buttons & bit) != 0) as i32);
}
}
self.prev_buttons = f.buttons;
// Moonlight: +Y = up; evdev: +Y = down → negate (i32 math avoids -(-32768) overflow).
self.emit(EV_ABS, ABS_X, f.ls_x as i32);
self.emit(EV_ABS, ABS_Y, -(f.ls_y as i32));
self.emit(EV_ABS, ABS_RX, f.rs_x as i32);
self.emit(EV_ABS, ABS_RY, -(f.rs_y as i32));
self.emit(EV_ABS, ABS_Z, f.left_trigger as i32);
self.emit(EV_ABS, ABS_RZ, f.right_trigger as i32);
let hat_x = ((f.buttons & gamepad::BTN_DPAD_RIGHT != 0) as i32)
- ((f.buttons & gamepad::BTN_DPAD_LEFT != 0) as i32);
let hat_y = ((f.buttons & gamepad::BTN_DPAD_DOWN != 0) as i32)
- ((f.buttons & gamepad::BTN_DPAD_UP != 0) as i32);
self.emit(EV_ABS, ABS_HAT0X, hat_x);
self.emit(EV_ABS, ABS_HAT0Y, hat_y);
self.emit(EV_SYN, SYN_REPORT, 0);
}
/// Service the FF protocol on this pad's fd (non-blocking). Returns the new mixed
/// `(low, high)` motor levels if they changed since last call.
fn pump_ff(&mut self) -> Option<(u16, u16)> {
let raw = self.fd.as_raw_fd();
let mut buf = [0u8; std::mem::size_of::<InputEventRaw>()];
loop {
let n = unsafe { libc::read(raw, buf.as_mut_ptr() as *mut libc::c_void, buf.len()) };
if n != buf.len() as isize {
break; // EAGAIN / short read — queue drained
}
let ev: InputEventRaw = unsafe { std::ptr::read(buf.as_ptr() as *const _) };
match (ev.type_, ev.code) {
(EV_UINPUT, UI_FF_UPLOAD) => {
let mut up: UinputFfUpload = unsafe { std::mem::zeroed() };
up.request_id = ev.value as u32;
if ioctl_ptr(raw, UI_BEGIN_FF_UPLOAD, &mut up, "UI_BEGIN_FF_UPLOAD").is_ok() {
let mut e = up.effect;
if e.id == -1 {
e.id = self.next_effect_id;
self.next_effect_id = self.next_effect_id.wrapping_add(1);
}
if e.type_ == FF_RUMBLE {
let strong = u16::from_ne_bytes([e.u[0], e.u[1]]);
let weak = u16::from_ne_bytes([e.u[2], e.u[3]]);
let slot = self.effects.entry(e.id).or_insert(Effect {
strong: 0,
weak: 0,
playing: None,
replay_ms: 0,
});
slot.strong = strong;
slot.weak = weak;
slot.replay_ms = e.replay_length;
}
up.effect.id = e.id; // hand the assigned slot back to the kernel
up.retval = 0;
let _ = ioctl_ptr(raw, UI_END_FF_UPLOAD, &mut up, "UI_END_FF_UPLOAD");
}
}
(EV_UINPUT, UI_FF_ERASE) => {
let mut er: UinputFfErase = unsafe { std::mem::zeroed() };
er.request_id = ev.value as u32;
if ioctl_ptr(raw, UI_BEGIN_FF_ERASE, &mut er, "UI_BEGIN_FF_ERASE").is_ok() {
self.effects.remove(&(er.effect_id as i16));
er.retval = 0;
let _ = ioctl_ptr(raw, UI_END_FF_ERASE, &mut er, "UI_END_FF_ERASE");
}
}
(EV_FF, FF_GAIN) => self.gain = (ev.value as u32).min(0xFFFF),
(EV_FF, code) => {
if let Some(e) = self.effects.get_mut(&(code as i16)) {
e.playing = if ev.value != 0 {
Some((e.replay_ms > 0).then(|| {
Instant::now()
+ std::time::Duration::from_millis(e.replay_ms as u64)
}))
} else {
None
};
}
}
_ => {}
}
}
// Mix: sum playing effects (expiring finished ones), scale by gain.
let now = Instant::now();
let (mut strong, mut weak) = (0u32, 0u32);
for e in self.effects.values_mut() {
if let Some(deadline) = e.playing {
if deadline.is_some_and(|d| now >= d) {
e.playing = None;
} else {
strong = strong.saturating_add(e.strong as u32);
weak = weak.saturating_add(e.weak as u32);
}
}
}
// Linux FF: strong = low-frequency (big) motor, weak = high-frequency motor.
let low = ((strong.min(0xFFFF) * self.gain) >> 16) as u16;
let high = ((weak.min(0xFFFF) * self.gain) >> 16) as u16;
(self.last_mix != (low, high)).then(|| {
self.last_mix = (low, high);
(low, high)
})
}
}
impl Drop for VirtualPad {
fn drop(&mut self) {
let _ = unsafe { libc::ioctl(self.fd.as_raw_fd(), UI_DEV_DESTROY, 0) };
}
}
/// All virtual pads of a session, driven from decoded controller events.
#[derive(Default)]
pub struct GamepadManager {
pads: Vec<Option<VirtualPad>>,
/// Pad creation failed (e.g. /dev/uinput permissions) — warn once, drop events.
broken: bool,
}
impl GamepadManager {
pub fn new() -> GamepadManager {
GamepadManager {
pads: (0..MAX_PADS).map(|_| None).collect(),
broken: false,
}
}
/// Handle one decoded controller event (create/destroy by mask, then apply state).
pub fn handle(&mut self, ev: &crate::gamestream::gamepad::GamepadEvent) {
use crate::gamestream::gamepad::GamepadEvent;
match ev {
GamepadEvent::Arrival { index, kind, .. } => {
tracing::info!(index, kind, "controller arrival");
self.ensure(*index as usize);
}
GamepadEvent::State(f) => {
let idx = f.index as usize;
if idx >= MAX_PADS {
return;
}
// Unplugs: drop any allocated pad whose mask bit cleared.
for (i, slot) in self.pads.iter_mut().enumerate() {
if slot.is_some() && f.active_mask & (1 << i) == 0 {
tracing::info!(index = i, "controller unplugged");
*slot = None;
}
}
if f.active_mask & (1 << idx) == 0 {
return; // this event WAS the unplug
}
self.ensure(idx);
if let Some(pad) = self.pads[idx].as_mut() {
pad.apply(f);
}
}
}
}
fn ensure(&mut self, idx: usize) {
if idx >= MAX_PADS || self.pads[idx].is_some() || self.broken {
return;
}
match VirtualPad::create(idx) {
Ok(p) => self.pads[idx] = Some(p),
Err(e) => {
tracing::error!(error = %format!("{e:#}"), "virtual gamepad creation failed — controller input disabled");
self.broken = true;
}
}
}
/// Service every pad's FF protocol; `send(index, low, high)` is invoked for each pad whose
/// mixed rumble level changed. Call frequently (games block in `EVIOCSFF` until answered).
pub fn pump_rumble(&mut self, mut send: impl FnMut(u16, u16, u16)) {
for (i, slot) in self.pads.iter_mut().enumerate() {
if let Some(pad) = slot {
if let Some((low, high)) = pad.pump_ff() {
send(i as u16, low, high);
}
}
}
}
}
+409
View File
@@ -0,0 +1,409 @@
//! libei input injection — the portable EI-sender path.
//!
//! Two ways to reach an EIS server ([`EiSource`]):
//! * **Portal** — `org.freedesktop.portal.RemoteDesktop` via `ashpd` (KWin, GNOME/Mutter),
//! which hands us the EIS socket fd after the session grant.
//! * **Socket** — connect directly to a compositor's own EIS socket. gamescope runs an EIS
//! server and exports its path to its children as `LIBEI_SOCKET`; our gamescope backend
//! relays that path through a file so the injector can connect (no portal involved).
//!
//! Either way, `reis` drives the connection as an EI *sender*: bind the seat's
//! pointer/keyboard/scroll/button capabilities and, per device, `start_emulating` → emit →
//! `frame`. The session and the EIS connection must stay alive and the event stream must be
//! polled continuously (resume/pause/ping/modifier traffic), so the whole thing runs on a
//! dedicated thread with its own tokio runtime; the synchronous control thread reaches it
//! through an unbounded channel and [`LibeiInjector::inject`] merely enqueues.
//!
//! Keyboard codes are Linux evdev (the same space our VK→evdev table produces) and the
//! compositor supplies the keymap, so — unlike the wlr path — there is no keymap to upload and
//! no modifier mask to serialize: pressing the modifier *keys* (which Moonlight sends as normal
//! key events) is enough.
use super::{gs_button_to_evdev, vk_to_evdev, InputInjector};
use anyhow::{anyhow, Result};
use ashpd::desktop::{
remote_desktop::{
ConnectToEISOptions, DeviceType, RemoteDesktop, SelectDevicesOptions, StartOptions,
},
CreateSessionOptions, PersistMode,
};
use futures_util::StreamExt;
use punktfunk_core::input::{InputEvent, InputKind};
use reis::ei;
use reis::event::{DeviceCapability, EiEvent};
use std::os::unix::net::UnixStream;
use std::time::{Duration, Instant};
use tokio::sync::mpsc::{unbounded_channel, UnboundedReceiver, UnboundedSender};
/// `code` value marking a horizontal scroll event (mirrors `gamestream::input`).
const SCROLL_HORIZONTAL: u32 = 1;
/// Where to find the EIS server.
#[derive(Clone, Debug)]
pub enum EiSource {
/// `org.freedesktop.portal.RemoteDesktop` (KWin, GNOME/Mutter).
Portal,
/// A file containing the EIS socket path/name (gamescope's relayed `LIBEI_SOCKET`); polled
/// until it appears, since the compositor may still be starting.
SocketPathFile(std::path::PathBuf),
}
/// Handle held by the control thread; forwards events to the libei worker thread.
pub struct LibeiInjector {
tx: UnboundedSender<InputEvent>,
}
impl LibeiInjector {
pub fn open() -> Result<Self> {
Self::open_with(EiSource::Portal)
}
pub fn open_with(source: EiSource) -> Result<Self> {
let (tx, rx) = unbounded_channel::<InputEvent>();
std::thread::Builder::new()
.name("punktfunk-libei".into())
.spawn(move || worker(rx, source))
.map_err(|e| anyhow!("spawn libei worker thread: {e}"))?;
// Return immediately — the portal/socket handshake must NOT run on the caller's
// (control) thread, or a slow/denied setup would freeze the ENet control stream and
// drop the client. The worker establishes the session asynchronously and logs its
// status; events enqueue until devices resume (a few startup events may be dropped).
Ok(Self { tx })
}
}
impl InputInjector for LibeiInjector {
fn inject(&mut self, event: &InputEvent) -> Result<()> {
self.tx
.send(*event)
.map_err(|_| anyhow!("libei worker thread has exited"))
}
}
/// Worker thread entry: build a tokio runtime and run the session to completion.
fn worker(rx: UnboundedReceiver<InputEvent>, source: EiSource) {
let rt = match tokio::runtime::Builder::new_multi_thread()
.worker_threads(1)
.enable_all()
.build()
{
Ok(rt) => rt,
Err(e) => {
tracing::error!(error = %e, "libei: build tokio runtime failed");
return;
}
};
rt.block_on(session_main(rx, source));
}
/// Open the portal/socket + EIS (bounded), then pump events until disconnect or shutdown.
async fn session_main(mut rx: UnboundedReceiver<InputEvent>, source: EiSource) {
// Keep `_rd`/`_session` bound for the whole loop — dropping the portal session closes the
// EIS connection. Bound the setup so a headless approval dialog (un-bypassed grant) can't
// hang the worker forever.
let (_portal, context, mut events) = match tokio::time::timeout(
Duration::from_secs(30),
connect(source),
)
.await
{
Ok(Ok(t)) => t,
Ok(Err(e)) => {
tracing::error!(error = %format!("{e:#}"), "libei: portal/EIS setup failed");
return;
}
Err(_) => {
tracing::error!(
"libei: EIS setup timed out (headless approval needed / kde-authorized grant not seeded / gamescope socket never appeared)"
);
return;
}
};
tracing::info!("libei: EIS connected — awaiting devices");
let mut state = EiState::new();
loop {
tokio::select! {
ei = events.next() => match ei {
Some(Ok(ev)) => state.handle_ei(ev, &context),
Some(Err(e)) => { tracing::warn!(error = %e, "libei: event stream error"); break; }
None => { tracing::info!("libei: EIS disconnected"); break; }
},
msg = rx.recv() => match msg {
Some(input) => state.inject(&input, &context),
None => { tracing::info!("libei: injector closed — ending session"); break; }
},
}
}
}
/// Tie down the verbose tuple the connect step returns. The portal pair must stay alive for
/// the whole session (dropping it closes the EIS connection); `None` for the direct-socket path.
type Connected = (
Option<(RemoteDesktop, ashpd::desktop::Session<RemoteDesktop>)>,
ei::Context,
reis::tokio::EiConvertEventStream,
);
/// Reach an EIS server per `source` and run the EI sender handshake.
async fn connect(source: EiSource) -> Result<Connected> {
let (portal, stream) = match source {
EiSource::Portal => {
let (rd, session, fd) = connect_portal().await?;
(Some((rd, session)), UnixStream::from(fd))
}
EiSource::SocketPathFile(file) => (None, connect_socket_file(&file).await?),
};
let context = ei::Context::new(stream).map_err(|e| anyhow!("reis EI context: {e}"))?;
let (_conn, events) = context
.handshake_tokio("punktfunk-host", ei::handshake::ContextType::Sender)
.await
.map_err(|e| anyhow!("EI handshake: {e}"))?;
Ok((portal, context, events))
}
/// Open a RemoteDesktop portal session (pointer + keyboard) and obtain the EIS socket fd.
async fn connect_portal() -> Result<(
RemoteDesktop,
ashpd::desktop::Session<RemoteDesktop>,
std::os::fd::OwnedFd,
)> {
let rd = RemoteDesktop::new()
.await
.map_err(|e| anyhow!("open RemoteDesktop portal (is xdg-desktop-portal-kde/gnome running and XDG_CURRENT_DESKTOP set?): {e}"))?;
let session = rd
.create_session(CreateSessionOptions::default())
.await
.map_err(|e| anyhow!("create RemoteDesktop session: {e}"))?;
rd.select_devices(
&session,
SelectDevicesOptions::default()
.set_devices(DeviceType::Keyboard | DeviceType::Pointer)
.set_persist_mode(PersistMode::DoNot),
)
.await
.map_err(|e| anyhow!("select_devices: {e}"))?
.response()
.map_err(|e| anyhow!("select_devices response: {e}"))?;
let started = rd
.start(&session, None, StartOptions::default())
.await
.map_err(|e| anyhow!("start RemoteDesktop session: {e}"))?;
let granted = started
.response()
.map_err(|e| anyhow!("RemoteDesktop start denied: {e}"))?;
tracing::info!(devices = ?granted.devices(), "libei: portal granted devices");
let fd = rd
.connect_to_eis(&session, ConnectToEISOptions::default())
.await
.map_err(|e| anyhow!("connect_to_eis (RemoteDesktop portal version < 2?): {e}"))?;
Ok((rd, session, fd))
}
/// Poll `file` for the EIS socket path (the gamescope backend relays `LIBEI_SOCKET` there once
/// the nested app launches), then connect. A bare name is resolved against `XDG_RUNTIME_DIR`,
/// mirroring libei's own `LIBEI_SOCKET` semantics.
async fn connect_socket_file(file: &std::path::Path) -> Result<UnixStream> {
let path = loop {
match std::fs::read_to_string(file) {
Ok(s) if !s.trim().is_empty() => break s.trim().to_string(),
_ => tokio::time::sleep(Duration::from_millis(300)).await,
}
};
let full = if path.starts_with('/') {
std::path::PathBuf::from(&path)
} else {
let runtime = std::env::var("XDG_RUNTIME_DIR").map_err(|_| {
anyhow!("XDG_RUNTIME_DIR unset (needed to resolve EIS socket '{path}')")
})?;
std::path::Path::new(&runtime).join(&path)
};
tracing::info!(socket = %full.display(), "libei: connecting to EIS socket");
UnixStream::connect(&full).map_err(|e| anyhow!("connect EIS socket {}: {e}", full.display()))
}
/// One EI device and its emulation state.
struct DeviceSlot {
device: reis::event::Device,
/// The device is resumed (allowed to emit). Devices arrive paused and may pause again.
resumed: bool,
/// We have issued `start_emulating` since the last resume.
emulating: bool,
}
/// Tracks bound devices + the serial/sequence/timebase the EI protocol requires.
struct EiState {
devices: Vec<DeviceSlot>,
last_serial: u32,
sequence: u32,
start: Instant,
}
impl EiState {
fn new() -> Self {
Self {
devices: Vec::new(),
last_serial: 0,
sequence: 0,
start: Instant::now(),
}
}
fn now_us(&self) -> u64 {
self.start.elapsed().as_micros() as u64
}
/// Apply a server event: bind capabilities, track devices, and follow resume/pause.
fn handle_ei(&mut self, ev: EiEvent, ctx: &ei::Context) {
match ev {
EiEvent::SeatAdded(e) => {
e.seat.bind_capabilities(
DeviceCapability::Pointer
| DeviceCapability::PointerAbsolute
| DeviceCapability::Keyboard
| DeviceCapability::Scroll
| DeviceCapability::Button,
);
let _ = ctx.flush();
}
EiEvent::DeviceAdded(e) => {
tracing::info!(device = ?e.device.name(), ty = ?e.device.device_type(), "libei: device added");
self.devices.push(DeviceSlot {
device: e.device,
resumed: false,
emulating: false,
});
}
EiEvent::DeviceRemoved(e) => {
self.devices.retain(|d| d.device != e.device);
}
EiEvent::DeviceResumed(e) => {
self.last_serial = e.serial;
if let Some(d) = self.devices.iter_mut().find(|d| d.device == e.device) {
d.resumed = true;
d.emulating = false; // must re-issue start_emulating after a resume
}
}
EiEvent::DevicePaused(e) => {
if let Some(d) = self.devices.iter_mut().find(|d| d.device == e.device) {
d.resumed = false;
d.emulating = false;
}
}
// Informational: the server reports resulting modifier/group state; we don't set it.
EiEvent::KeyboardModifiers(e) => self.last_serial = e.serial,
_ => {}
}
}
/// Index of a resumed device exposing `cap`.
fn device_for(&self, cap: DeviceCapability) -> Option<usize> {
self.devices
.iter()
.position(|d| d.resumed && d.device.has_capability(cap))
}
/// Ensure the device at `idx` is in `start_emulating` state before we emit on it.
fn ensure_emulating(&mut self, idx: usize, dev: &ei::Device) {
if !self.devices[idx].emulating {
dev.start_emulating(self.last_serial, self.sequence);
self.sequence = self.sequence.wrapping_add(1);
self.devices[idx].emulating = true;
}
}
/// Translate and emit one client input event, committing it as a single `frame`.
fn inject(&mut self, ev: &InputEvent, ctx: &ei::Context) {
let cap = match ev.kind {
InputKind::MouseMove => DeviceCapability::Pointer,
InputKind::MouseMoveAbs => DeviceCapability::PointerAbsolute,
InputKind::MouseButtonDown | InputKind::MouseButtonUp => DeviceCapability::Button,
InputKind::MouseScroll => DeviceCapability::Scroll,
InputKind::KeyDown | InputKind::KeyUp => DeviceCapability::Keyboard,
InputKind::GamepadButton | InputKind::GamepadAxis => return, // uinput path (later)
};
let Some(idx) = self.device_for(cap) else {
return; // no resumed device with this capability yet
};
let dev = self.devices[idx].device.device().clone();
self.ensure_emulating(idx, &dev);
let mut emitted = true;
let slot = &self.devices[idx].device;
match ev.kind {
InputKind::MouseMove => match slot.interface::<ei::Pointer>() {
Some(p) => p.motion_relative(ev.x as f32, ev.y as f32),
None => emitted = false,
},
InputKind::MouseMoveAbs => {
let w = ((ev.flags >> 16) & 0xffff) as f32;
let h = (ev.flags & 0xffff) as f32;
match (
slot.interface::<ei::PointerAbsolute>(),
slot.regions().first(),
) {
(Some(p), Some(region)) if w > 0.0 && h > 0.0 => {
// Map the normalized client position into the device's first region.
let nx = (ev.x as f32 / w).clamp(0.0, 1.0);
let ny = (ev.y as f32 / h).clamp(0.0, 1.0);
let x = region.x as f32 + nx * region.width as f32;
let y = region.y as f32 + ny * region.height as f32;
p.motion_absolute(x, y);
}
_ => emitted = false,
}
}
InputKind::MouseButtonDown | InputKind::MouseButtonUp => {
match (slot.interface::<ei::Button>(), gs_button_to_evdev(ev.code)) {
(Some(b), Some(btn)) => {
let st = if ev.kind == InputKind::MouseButtonDown {
ei::button::ButtonState::Press
} else {
ei::button::ButtonState::Released
};
b.button(btn, st);
}
_ => emitted = false,
}
}
InputKind::MouseScroll => match slot.interface::<ei::Scroll>() {
Some(s) => {
// GameStream sends WHEEL_DELTA(120)-scaled deltas in `x`; ei scroll_discrete
// uses the same 120-per-detent unit. Positive GameStream = up (vertical),
// which is negative on the ei axis, but = RIGHT (horizontal), which is
// already positive there (moonlight-qt/Sunshine pass horizontal through
// unnegated) — only the vertical axis flips.
if ev.code == SCROLL_HORIZONTAL {
s.scroll_discrete(ev.x, 0);
} else {
s.scroll_discrete(0, -ev.x);
}
}
None => emitted = false,
},
InputKind::KeyDown | InputKind::KeyUp => {
match (slot.interface::<ei::Keyboard>(), vk_to_evdev(ev.code as u8)) {
(Some(k), Some(evdev)) => {
let st = if ev.kind == InputKind::KeyDown {
ei::keyboard::KeyState::Press
} else {
ei::keyboard::KeyState::Released
};
k.key(evdev as u32, st);
}
_ => {
emitted = false;
tracing::debug!(vk = ev.code, "libei: unmapped VK keycode — dropped");
}
}
}
InputKind::GamepadButton | InputKind::GamepadAxis => emitted = false,
}
if emitted {
dev.frame(self.last_serial, self.now_us());
}
let _ = ctx.flush();
}
}
+273
View File
@@ -0,0 +1,273 @@
//! Input injection through the wlroots virtual-input Wayland protocols
//! (`zwlr_virtual_pointer_manager_v1` + `zwp_virtual_keyboard_manager_v1`) — the headless-Sway
//! path. We connect as an ordinary Wayland client (the host inherits Sway's
//! `WAYLAND_DISPLAY`/`XDG_RUNTIME_DIR`), bind the two managers, upload a standard evdev/US xkb
//! keymap, and translate events into virtual pointer/keyboard requests, tracking modifier state
//! so the compositor resolves shifted keysyms correctly.
use super::{gs_button_to_evdev, vk_to_evdev, InputEvent, InputInjector};
use anyhow::{bail, Context, Result};
use punktfunk_core::input::InputKind;
use std::io::Write;
use std::os::fd::{AsFd, FromRawFd};
use std::time::Instant;
use wayland_client::protocol::{wl_output::WlOutput, wl_pointer, wl_registry, wl_seat::WlSeat};
use wayland_client::{Connection, Dispatch, EventQueue, Proxy, QueueHandle};
use wayland_protocols_misc::zwp_virtual_keyboard_v1::client::{
zwp_virtual_keyboard_manager_v1::ZwpVirtualKeyboardManagerV1,
zwp_virtual_keyboard_v1::ZwpVirtualKeyboardV1,
};
use wayland_protocols_wlr::virtual_pointer::v1::client::{
zwlr_virtual_pointer_manager_v1::ZwlrVirtualPointerManagerV1,
zwlr_virtual_pointer_v1::ZwlrVirtualPointerV1,
};
use xkbcommon::xkb;
/// `code` value marking a horizontal scroll event (mirrors `gamestream::input`).
const SCROLL_HORIZONTAL: u32 = 1;
/// Globals bound from the registry (the Wayland dispatch state).
#[derive(Default)]
struct Globals {
pointer_mgr: Option<ZwlrVirtualPointerManagerV1>,
keyboard_mgr: Option<ZwpVirtualKeyboardManagerV1>,
seat: Option<WlSeat>,
output: Option<WlOutput>,
}
impl Dispatch<wl_registry::WlRegistry, ()> for Globals {
fn event(
state: &mut Self,
registry: &wl_registry::WlRegistry,
event: wl_registry::Event,
_: &(),
_: &Connection,
qh: &QueueHandle<Self>,
) {
if let wl_registry::Event::Global {
name,
interface,
version,
} = event
{
match interface.as_str() {
"zwlr_virtual_pointer_manager_v1" => {
state.pointer_mgr = Some(registry.bind(name, version.min(2), qh, ()));
}
"zwp_virtual_keyboard_manager_v1" => {
state.keyboard_mgr = Some(registry.bind(name, version.min(1), qh, ()));
}
"wl_seat" => {
state.seat = Some(registry.bind(name, version.min(7), qh, ()));
}
"wl_output" if state.output.is_none() => {
state.output = Some(registry.bind(name, version.min(3), qh, ()));
}
_ => {}
}
}
}
}
// The managers, the two virtual devices, the seat and the output emit no events we use.
macro_rules! ignore_events {
($($t:ty),* $(,)?) => {$(
impl Dispatch<$t, ()> for Globals {
fn event(_: &mut Self, _: &$t, _: <$t as Proxy>::Event, _: &(), _: &Connection, _: &QueueHandle<Self>) {}
}
)*};
}
ignore_events!(
WlSeat,
WlOutput,
ZwlrVirtualPointerManagerV1,
ZwlrVirtualPointerV1,
ZwpVirtualKeyboardManagerV1,
ZwpVirtualKeyboardV1,
);
pub struct WlrootsInjector {
conn: Connection,
queue: EventQueue<Globals>,
globals: Globals,
pointer: ZwlrVirtualPointerV1,
keyboard: ZwpVirtualKeyboardV1,
xkb_state: xkb::State,
_keymap_file: std::fs::File, // keep the memfd alive for the compositor's mmap
start: Instant,
}
impl WlrootsInjector {
pub fn open() -> Result<Self> {
let conn = Connection::connect_to_env()
.context("connect to Wayland (is Sway up + WAYLAND_DISPLAY/XDG_RUNTIME_DIR set?)")?;
let mut queue = conn.new_event_queue();
let qh = queue.handle();
let _registry = conn.display().get_registry(&qh, ());
let mut globals = Globals::default();
queue
.roundtrip(&mut globals)
.context("Wayland registry roundtrip")?;
let pointer_mgr = globals
.pointer_mgr
.clone()
.context("compositor lacks zwlr_virtual_pointer_manager_v1")?;
let keyboard_mgr = globals
.keyboard_mgr
.clone()
.context("compositor lacks zwp_virtual_keyboard_manager_v1")?;
let seat = globals
.seat
.clone()
.context("compositor advertised no wl_seat")?;
let pointer = pointer_mgr.create_virtual_pointer_with_output(
Some(&seat),
globals.output.as_ref(),
&qh,
(),
);
let keyboard = keyboard_mgr.create_virtual_keyboard(&seat, &qh, ());
// A standard evdev/US keymap so raw evdev keycodes resolve to the right keysyms.
let ctx = xkb::Context::new(xkb::CONTEXT_NO_FLAGS);
let keymap = xkb::Keymap::new_from_names(
&ctx,
"evdev",
"pc105",
"us",
"",
None,
xkb::KEYMAP_COMPILE_NO_FLAGS,
)
.context("compile xkb keymap")?;
let keymap_str = keymap.get_as_string(xkb::KEYMAP_FORMAT_TEXT_V1);
let xkb_state = xkb::State::new(&keymap);
let file = memfd_with(&keymap_str)?;
let size = keymap_str.len() as u32 + 1; // include the trailing NUL
keyboard.keymap(1 /* XKB_V1 */, file.as_fd(), size);
queue
.roundtrip(&mut globals)
.context("keymap upload roundtrip")?;
conn.flush().ok();
tracing::info!(
output = globals.output.is_some(),
"wlroots virtual input ready (pointer + keyboard)"
);
Ok(Self {
conn,
queue,
globals,
pointer,
keyboard,
xkb_state,
_keymap_file: file,
start: Instant::now(),
})
}
fn now_ms(&self) -> u32 {
self.start.elapsed().as_millis() as u32
}
/// Update xkb state for a key and tell the compositor the resulting modifier mask.
fn send_modifiers(&mut self, evdev: u16, down: bool) {
let kc = xkb::Keycode::new(evdev as u32 + 8); // evdev -> xkb keycode
let dir = if down {
xkb::KeyDirection::Down
} else {
xkb::KeyDirection::Up
};
self.xkb_state.update_key(kc, dir);
let depressed = self.xkb_state.serialize_mods(xkb::STATE_MODS_DEPRESSED);
let latched = self.xkb_state.serialize_mods(xkb::STATE_MODS_LATCHED);
let locked = self.xkb_state.serialize_mods(xkb::STATE_MODS_LOCKED);
let group = self.xkb_state.serialize_layout(xkb::STATE_LAYOUT_EFFECTIVE);
self.keyboard.modifiers(depressed, latched, locked, group);
}
}
impl InputInjector for WlrootsInjector {
fn inject(&mut self, event: &InputEvent) -> Result<()> {
let t = self.now_ms();
match event.kind {
InputKind::MouseMove => {
self.pointer.motion(t, event.x as f64, event.y as f64);
self.pointer.frame();
}
InputKind::MouseMoveAbs => {
let w = (event.flags >> 16) & 0xffff;
let h = event.flags & 0xffff;
if w > 0 && h > 0 {
let x = event.x.clamp(0, w as i32) as u32;
let y = event.y.clamp(0, h as i32) as u32;
self.pointer.motion_absolute(t, x, y, w, h);
self.pointer.frame();
}
}
InputKind::MouseButtonDown | InputKind::MouseButtonUp => {
if let Some(btn) = gs_button_to_evdev(event.code) {
let st = if event.kind == InputKind::MouseButtonDown {
wl_pointer::ButtonState::Pressed
} else {
wl_pointer::ButtonState::Released
};
self.pointer.button(t, btn, st);
self.pointer.frame();
}
}
InputKind::MouseScroll => {
let axis = if event.code == SCROLL_HORIZONTAL {
wl_pointer::Axis::HorizontalScroll
} else {
wl_pointer::Axis::VerticalScroll
};
// GameStream sends WHEEL_DELTA(120)-scaled units; a notch ≈ 15px. Positive
// GameStream = up (vertical), negative on the Wayland axis; but = RIGHT
// (horizontal), already positive there (moonlight-qt/Sunshine pass
// horizontal through unnegated) — only the vertical axis flips.
let notches = event.x as f64 / 120.0;
let sign = if event.code == SCROLL_HORIZONTAL {
1.0
} else {
-1.0
};
self.pointer.axis_source(wl_pointer::AxisSource::Wheel);
self.pointer.axis(t, axis, sign * notches * 15.0);
self.pointer.frame();
}
InputKind::KeyDown | InputKind::KeyUp => {
let down = event.kind == InputKind::KeyDown;
if let Some(evdev) = vk_to_evdev(event.code as u8) {
self.keyboard.key(t, evdev as u32, if down { 1 } else { 0 });
self.send_modifiers(evdev, down);
} else {
tracing::debug!(vk = event.code, "unmapped VK keycode — dropped");
}
}
InputKind::GamepadButton | InputKind::GamepadAxis => {} // not yet injected
}
// Surface protocol errors / disconnects, then push the batch to the compositor.
self.queue
.dispatch_pending(&mut self.globals)
.context("wayland dispatch")?;
self.conn.flush().context("wayland flush")?;
Ok(())
}
}
/// Create an anonymous in-memory file holding `s` + a trailing NUL (for the keymap fd).
fn memfd_with(s: &str) -> Result<std::fs::File> {
let name = b"punktfunk-keymap\0";
let fd = unsafe { libc::memfd_create(name.as_ptr() as *const libc::c_char, libc::MFD_CLOEXEC) };
if fd < 0 {
bail!("memfd_create failed: {}", std::io::Error::last_os_error());
}
let mut f = unsafe { std::fs::File::from_raw_fd(fd) };
f.write_all(s.as_bytes()).context("write keymap")?;
f.write_all(&[0]).context("write keymap NUL")?;
Ok(f)
}
+265
View File
@@ -0,0 +1,265 @@
//! M0 — the pipeline spike (plan §8): capture → NVENC encode → playable file, with the
//! encoded access units also fed through a `punktfunk_core` host→client `Session` over an
//! in-process loopback to prove the core's FEC + packetize + reassemble path on real
//! encoder output.
//!
//! This is the spike runner, not the M2 hot path: it drives the stages on one thread (the
//! per-stage-thread pipeline with bounded channels is [`crate::pipeline`]). Source is
//! either a synthetic BGRx test pattern (no capture session needed) or the live xdg
//! ScreenCast portal monitor.
use crate::capture::{self, Capturer, SyntheticCapturer};
use crate::encode::{self, Codec, EncodedFrame, Encoder};
use anyhow::{anyhow, Context, Result};
use punktfunk_core::packet::{FLAG_PIC, FLAG_SOF};
use punktfunk_core::{Config, Role, Session};
use std::fs::File;
use std::io::{BufWriter, Write};
use std::path::PathBuf;
use std::time::Instant;
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum Source {
/// Deterministic moving BGRx test pattern — no capture session required.
Synthetic,
/// Live monitor via the xdg ScreenCast portal + PipeWire.
Portal,
/// KWin virtual output created at `width`x`height` (zkde_screencast). Lets us validate
/// capture (and zero-copy) at an arbitrary client resolution against a headless KWin.
KwinVirtual,
}
#[derive(Clone, Debug)]
pub struct Options {
pub source: Source,
/// Synthetic-only; the portal source uses the PipeWire-negotiated size.
pub width: u32,
pub height: u32,
pub fps: u32,
pub seconds: u32,
pub codec: Codec,
pub bitrate_bps: u64,
/// Raw Annex-B elementary-stream sink (`.h265`/`.h264`/`.ivf-less .obu`); playable.
pub out: PathBuf,
/// Also round-trip every AU through a `punktfunk_core` host→client loopback and verify.
pub loopback: bool,
}
pub fn run(opts: Options) -> Result<()> {
let mut capturer: Box<dyn Capturer> = match opts.source {
Source::Synthetic => {
tracing::info!(
width = opts.width,
height = opts.height,
fps = opts.fps,
"M0 source: synthetic BGRx test pattern"
);
Box::new(SyntheticCapturer::new(opts.width, opts.height, opts.fps))
}
Source::Portal => {
tracing::info!("M0 source: xdg ScreenCast portal (live monitor)");
capture::open_portal_monitor().context("open portal capturer")?
}
Source::KwinVirtual => {
let compositor = crate::vdisplay::detect().unwrap_or(crate::vdisplay::Compositor::Kwin);
tracing::info!(
width = opts.width,
height = opts.height,
?compositor,
"M0 source: virtual output (PUNKTFUNK_COMPOSITOR)"
);
let mut vd = crate::vdisplay::open(compositor).context("open virtual display")?;
let vout = vd
.create(punktfunk_core::Mode {
width: opts.width,
height: opts.height,
refresh_hz: opts.fps,
})
.context("create virtual output")?;
capture::capture_virtual_output(vout).context("capture virtual output")?
}
};
// Activate the capturer so the portal/PipeWire process callback actually delivers frames
// (it gates the per-frame de-pad on `active`; idle by default so reconnects are cheap).
capturer.set_active(true);
// The first frame establishes the authoritative dimensions (the portal's negotiated
// size, or the synthetic size) used to configure the encoder.
let first = capturer.next_frame().context("capture first frame")?;
let (w, h) = (first.width, first.height);
tracing::info!(
width = w,
height = h,
format = ?first.format,
codec = ?opts.codec,
bitrate_bps = opts.bitrate_bps,
"opening NVENC encoder"
);
let mut encoder = encode::open_video(
opts.codec,
first.format,
w,
h,
opts.fps,
opts.bitrate_bps,
first.is_cuda(),
)
.context("open encoder")?;
let mut sink = BufWriter::new(
File::create(&opts.out).with_context(|| format!("create {}", opts.out.display()))?,
);
let mut lb = if opts.loopback {
Some(Loopback::new().context("build punktfunk-core loopback")?)
} else {
None
};
let target_frames = (opts.seconds as u64) * (opts.fps as u64);
let started = Instant::now();
let mut stats = Stats::default();
let mut frame = first;
loop {
encoder.submit(&frame).context("encoder submit")?;
stats.submitted += 1;
drain_encoder(encoder.as_mut(), &mut sink, lb.as_mut(), &mut stats)?;
if stats.submitted >= target_frames {
break;
}
frame = capturer.next_frame().context("capture frame")?;
}
// NVENC buffers frames internally even at delay=0 — flush and drain the tail.
encoder.flush().context("encoder flush")?;
drain_encoder(encoder.as_mut(), &mut sink, lb.as_mut(), &mut stats)?;
sink.flush().context("flush output file")?;
let elapsed = started.elapsed().as_secs_f64();
tracing::info!(
submitted = stats.submitted,
encoded = stats.encoded,
keyframes = stats.keyframes,
bytes_out = stats.bytes_out,
out = %opts.out.display(),
elapsed_s = format!("{elapsed:.2}"),
encode_fps = format!("{:.1}", stats.encoded as f64 / elapsed.max(1e-9)),
"M0 capture→encode→file complete"
);
if let Some(lb) = lb {
lb.report();
if lb.mismatches > 0 || lb.recovered != lb.submitted {
return Err(anyhow!(
"punktfunk-core loopback verification FAILED: {} mismatches, {}/{} AUs recovered",
lb.mismatches,
lb.recovered,
lb.submitted
));
}
}
Ok(())
}
#[derive(Default)]
struct Stats {
submitted: u64,
encoded: u64,
keyframes: u64,
bytes_out: u64,
}
fn drain_encoder(
encoder: &mut dyn Encoder,
sink: &mut impl Write,
mut lb: Option<&mut Loopback>,
stats: &mut Stats,
) -> Result<()> {
while let Some(au) = encoder.poll().context("encoder poll")? {
sink.write_all(&au.data).context("write AU to file")?;
stats.encoded += 1;
stats.bytes_out += au.data.len() as u64;
if au.keyframe {
stats.keyframes += 1;
}
if let Some(lb) = lb.as_deref_mut() {
lb.submit(&au)?;
}
}
Ok(())
}
/// A host↔client `punktfunk_core` pair over a lossless in-process loopback. Each encoded AU is
/// FEC-protected, packetized, sent, then reassembled on the client and byte-compared to the
/// original — exercising the core on real encoder output (the M0 "feed into a Session" goal).
struct Loopback {
host: Session,
client: Session,
submitted: u64,
recovered: u64,
mismatches: u64,
bytes: u64,
}
impl Loopback {
fn new() -> Result<Loopback> {
let (host_tx, client_tx) = punktfunk_core::transport::loopback_pair(0, 0);
let host = Session::new(Config::p1_defaults(Role::Host), Box::new(host_tx))
.map_err(|e| anyhow!("host session: {e:?}"))?;
let client = Session::new(Config::p1_defaults(Role::Client), Box::new(client_tx))
.map_err(|e| anyhow!("client session: {e:?}"))?;
Ok(Loopback {
host,
client,
submitted: 0,
recovered: 0,
mismatches: 0,
bytes: 0,
})
}
fn submit(&mut self, au: &EncodedFrame) -> Result<()> {
let mut flags = FLAG_PIC as u32;
if au.keyframe {
flags |= FLAG_SOF as u32;
}
self.host
.submit_frame(&au.data, au.pts_ns, flags)
.map_err(|e| anyhow!("host submit_frame: {e:?}"))?;
self.submitted += 1;
self.bytes += au.data.len() as u64;
// Lossless + in-order loopback: each submit yields exactly the AU just sent.
loop {
match self.client.poll_frame() {
Ok(frame) => {
self.recovered += 1;
if frame.data != au.data {
self.mismatches += 1;
tracing::warn!(
recovered = self.recovered,
got = frame.data.len(),
expected = au.data.len(),
"loopback AU mismatch"
);
}
}
Err(punktfunk_core::PunktfunkError::NoFrame) => break,
Err(e) => return Err(anyhow!("client poll_frame: {e:?}")),
}
}
Ok(())
}
fn report(&self) {
tracing::info!(
submitted = self.submitted,
recovered = self.recovered,
mismatches = self.mismatches,
bytes = self.bytes,
"punktfunk-core loopback: AUs FEC-packetized → sent → reassembled & verified"
);
}
}
+794
View File
@@ -0,0 +1,794 @@
//! M3 — the `punktfunk/1` native host: QUIC control plane + the hardened M1 data plane over UDP.
//! This is punktfunk's own protocol, past the GameStream compatibility layer:
//!
//! * the Welcome negotiates **GF(2¹⁶) Leopard FEC** (inexpressible in GameStream) + AES-GCM;
//! * the client's Hello requests a display mode and the host creates a **native virtual
//! output** at exactly that size/refresh (same vdisplay backends as the GameStream path);
//! * **input arrives as QUIC datagrams** — encrypted, congestion-managed, no ENet
//! retransmission spikes — and feeds the session's input injector;
//! * video frames carry a wall-clock `pts_ns`, so a same-host client measures the full
//! capture→encode→FEC→UDP→reassemble latency per frame.
//!
//! `punktfunk-host m3-host [--port 9777] [--source synthetic|virtual] [--seconds 30]
//! [--frames 300]` serves sessions back to back (one at a time — the virtual output and
//! encoder are single-tenant); `punktfunk-client-rs --connect host:9777` is the counterpart.
//! The data plane runs on native threads (no async on the frame path).
//!
//! Alongside video + input, a session carries **audio** (desktop Opus, 5 ms frames, host →
//! client QUIC datagrams tagged [`punktfunk_core::quic::AUDIO_MAGIC`]) and **gamepads** (client
//! GamepadButton/GamepadAxis datagrams accumulated into per-pad state for the virtual xpad;
//! force feedback flows back as [`punktfunk_core::quic::RUMBLE_MAGIC`] datagrams).
//!
//! Trust: the host serves with its persistent identity (`~/.config/punktfunk/cert.pem`, shared
//! with GameStream pairing) and logs the SHA-256 fingerprint clients pin.
use anyhow::{anyhow, Context, Result};
use punktfunk_core::config::{FecConfig, FecScheme, Role};
use punktfunk_core::input::{InputEvent, InputKind};
use punktfunk_core::packet::{FLAG_PIC, FLAG_SOF};
use punktfunk_core::quic::{endpoint, io, Hello, Start, Welcome};
use punktfunk_core::transport::UdpTransport;
use punktfunk_core::Session;
use rand::RngCore;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum M3Source {
/// Deterministic test frames (protocol verification; the client byte-checks them).
Synthetic,
/// Real capture: virtual display at the client's requested mode → NVENC.
Virtual,
}
pub struct M3Options {
pub port: u16,
pub source: M3Source,
/// Virtual-source stream duration.
pub seconds: u32,
/// Synthetic-source frame count.
pub frames: u32,
/// Exit after this many sessions (0 = serve forever).
pub max_sessions: u32,
}
/// Deterministic test frame: `u32 LE index` then `data[i] = idx + i` (wrapping).
pub fn test_frame(idx: u32, len: usize) -> Vec<u8> {
let mut d = vec![0u8; len];
d[0..4].copy_from_slice(&idx.to_le_bytes());
for (i, b) in d.iter_mut().enumerate().skip(4) {
*b = (idx as u8).wrapping_add(i as u8);
}
d
}
fn now_ns() -> u64 {
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_nanos() as u64)
.unwrap_or(0)
}
pub fn run(opts: M3Options) -> Result<()> {
let rt = tokio::runtime::Builder::new_multi_thread()
.worker_threads(2)
.enable_all()
.build()
.context("tokio runtime")?;
rt.block_on(serve(opts))
}
fn fingerprint_hex(fp: &[u8; 32]) -> String {
fp.iter().map(|b| format!("{b:02x}")).collect()
}
/// The persistent listener: accept clients back to back on one endpoint. Sessions are
/// served one at a time (the virtual output + NVENC are single-tenant); a client that
/// connects mid-session waits in the accept queue. A failed session logs and the loop
/// keeps serving — only endpoint-level failures are fatal.
async fn serve(opts: M3Options) -> Result<()> {
let identity = crate::gamestream::cert::ServerIdentity::load_or_create()
.context("load host identity (~/.config/punktfunk)")?;
let fingerprint = endpoint::fingerprint_of_pem(&identity.cert_pem)
.map_err(|e| anyhow!("cert fingerprint: {e}"))?;
let ep = endpoint::server_with_identity(
([0, 0, 0, 0], opts.port).into(),
&identity.cert_pem,
&identity.key_pem,
)
.map_err(|e| anyhow!("QUIC server endpoint: {e}"))?;
tracing::info!(
port = opts.port,
source = ?opts.source,
fingerprint = %fingerprint_hex(&fingerprint),
"punktfunk/1 host listening (QUIC) — clients pin this fingerprint"
);
// One audio capturer for the whole host lifetime, handed from session to session
// (PipeWire streams have no cheap teardown — see AudioCapSlot).
let audio_cap: AudioCapSlot = Arc::new(std::sync::Mutex::new(None));
let mut served = 0u32;
loop {
let incoming = ep
.accept()
.await
.ok_or_else(|| anyhow!("endpoint closed"))?;
let conn = match incoming.await {
Ok(c) => c,
Err(e) => {
tracing::warn!(error = %e, "QUIC accept failed");
continue;
}
};
let peer = conn.remote_address();
tracing::info!(%peer, "punktfunk/1 client connected");
if let Err(e) = serve_session(conn, &opts, &audio_cap).await {
tracing::warn!(%peer, error = %format!("{e:#}"), "session ended with error");
} else {
tracing::info!(%peer, "session complete");
}
served += 1;
if opts.max_sessions != 0 && served >= opts.max_sessions {
break;
}
tracing::info!("ready for the next client");
}
ep.wait_idle().await;
Ok(())
}
/// The accept loop is sequential, so the control phase must be bounded — a client that
/// connects and never finishes the handshake would otherwise wedge the host for everyone.
const HANDSHAKE_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10);
/// Persistent audio-capturer slot, reused across sessions (same pattern as the GameStream
/// path): `PwAudioCapturer` has no teardown — dropping one per session would leak its
/// PipeWire thread + core connection + live capture node on the daemon every session.
type AudioCapSlot = Arc<std::sync::Mutex<Option<Box<dyn crate::audio::AudioCapturer>>>>;
/// One client session: handshake → input/audio planes → data plane until done/disconnect.
/// Everything torn down on return (RAII: virtual output, encoder, threads via channel close).
async fn serve_session(
conn: quinn::Connection,
opts: &M3Options,
audio_cap: &AudioCapSlot,
) -> Result<()> {
let peer = conn.remote_address();
let source = opts.source;
let frames = opts.frames;
let handshake = async {
let (mut send, mut recv) = conn.accept_bi().await.context("accept control stream")?;
let hello = Hello::decode(&io::read_msg(&mut recv).await?)
.map_err(|e| anyhow!("Hello decode: {e:?}"))?;
anyhow::ensure!(
hello.abi_version == punktfunk_core::ABI_VERSION,
"ABI mismatch: client {} host {}",
hello.abi_version,
punktfunk_core::ABI_VERSION
);
crate::encode::validate_dimensions(
crate::encode::Codec::H265,
hello.mode.width,
hello.mode.height,
)
.context("client-requested mode")?;
// Reserve a UDP port for the data plane (bind, read it back, rebind in UdpTransport).
let probe = std::net::UdpSocket::bind("0.0.0.0:0")?;
let udp_port = probe.local_addr()?.port();
drop(probe);
let mut key = [0u8; 16];
rand::thread_rng().fill_bytes(&mut key);
let welcome = Welcome {
abi_version: punktfunk_core::ABI_VERSION,
udp_port,
mode: hello.mode,
// The post-GameStream point of punktfunk/1: Leopard GF(2¹⁶) FEC + real encryption.
fec: FecConfig {
scheme: FecScheme::Gf16,
fec_percent: 20,
max_data_per_block: 4096,
},
shard_payload: 1200,
encrypt: true,
key,
salt: *b"pkf1",
frames: match source {
M3Source::Synthetic => frames,
M3Source::Virtual => 0, // unbounded — client streams until we close
},
};
io::write_msg(&mut send, &welcome.encode()).await?;
let start = Start::decode(&io::read_msg(&mut recv).await?)
.map_err(|e| anyhow!("Start decode: {e:?}"))?;
Ok::<_, anyhow::Error>((hello, welcome, udp_port, start))
};
let (hello, welcome, udp_port, start) = tokio::time::timeout(HANDSHAKE_TIMEOUT, handshake)
.await
.map_err(|_| anyhow!("handshake timed out after {HANDSHAKE_TIMEOUT:?}"))??;
let client_udp = std::net::SocketAddr::new(peer.ip(), start.client_udp_port);
tracing::info!(%client_udp, udp_port, mode = ?hello.mode, "handshake complete — streaming");
// Input plane: QUIC datagrams → channel → a native injector thread (the injector owns
// non-Send compositor state, so it lives on its own thread). The thread also owns the
// session's virtual gamepads and sends force feedback back over `conn`. It exits when
// the channel closes (datagram task ends on disconnect) — fresh state per session.
let (input_tx, input_rx) = std::sync::mpsc::channel::<InputEvent>();
let input_handle = {
let conn = conn.clone();
std::thread::Builder::new()
.name("punktfunk-m3-input".into())
.spawn(move || input_thread(input_rx, conn))
.context("spawn input thread")?
};
let input_conn = conn.clone();
tokio::spawn(async move {
let mut count = 0u64;
while let Ok(d) = input_conn.read_datagram().await {
if let Some(ev) = InputEvent::decode(&d) {
count += 1;
if input_tx.send(ev).is_err() {
break;
}
}
}
tracing::info!(count, "input datagram stream ended");
});
// Stop signal: stream duration elapsed or the client went away.
let stop = Arc::new(AtomicBool::new(false));
{
let stop = stop.clone();
let conn = conn.clone();
tokio::spawn(async move {
conn.closed().await;
stop.store(true, Ordering::SeqCst);
});
}
// Audio plane (virtual source only — synthetic runs are protocol tests): desktop Opus
// → host→client QUIC datagrams, on its own native thread. Best-effort on every failure
// (no PipeWire audio, spawn error): the session continues without audio — and a spawn
// error must NOT early-return here, the threads above are already running.
let audio_handle = if opts.source == M3Source::Virtual {
let conn = conn.clone();
let stop = stop.clone();
let cap = audio_cap.clone();
std::thread::Builder::new()
.name("punktfunk-m3-audio".into())
.spawn(move || audio_thread(conn, stop, cap))
.map_err(|e| tracing::error!(error = %e, "audio thread spawn failed — session continues without audio"))
.ok()
} else {
None
};
// Data plane on a native thread (no async on the hot path — design invariant).
let cfg = welcome.session_config(Role::Host);
let source = opts.source;
let (seconds, frames) = (opts.seconds, opts.frames);
let mode = hello.mode;
let stop_stream = stop.clone();
let result: Result<()> = async {
tokio::task::spawn_blocking(move || -> Result<()> {
let transport =
UdpTransport::connect(&format!("0.0.0.0:{udp_port}"), &client_udp.to_string())
.context("bind data plane")?;
let mut session = Session::new(cfg, Box::new(transport))
.map_err(|e| anyhow!("host session: {e:?}"))?;
match source {
M3Source::Synthetic => synthetic_stream(&mut session, frames, &stop_stream),
M3Source::Virtual => virtual_stream(&mut session, mode, seconds, &stop_stream),
}
})
.await
.context("stream thread")??;
// Give the client a moment to drain before the close.
tokio::time::sleep(std::time::Duration::from_secs(1)).await;
Ok(())
}
.await;
// Teardown on EVERY path (a failed data plane must not leave the connection open with
// audio still streaming): stop the audio thread, close, then join both side-plane
// threads so the next session starts fresh (closing the connection ends the datagram
// task, which drops the input channel, which exits the input thread + its gamepads).
stop.store(true, Ordering::SeqCst);
conn.close(
if result.is_ok() { 0u32 } else { 1u32 }.into(),
if result.is_ok() { b"done" } else { b"error" },
);
let _ = tokio::task::spawn_blocking(move || {
if let Some(h) = audio_handle {
let _ = h.join();
}
let _ = input_handle.join();
})
.await;
result
}
/// Per-pad accumulated state: punktfunk/1 gamepad events are incremental (one button or axis
/// per datagram, see `punktfunk_core::input::gamepad`), the virtual xpad applies full frames.
#[derive(Clone, Copy, Default)]
struct PadState {
buttons: u32,
left_trigger: u8,
right_trigger: u8,
ls_x: i16,
ls_y: i16,
rs_x: i16,
rs_y: i16,
}
impl PadState {
/// Fold one wire event into the state. `false` = unknown axis id (event dropped).
fn apply(&mut self, ev: &InputEvent) -> bool {
if ev.kind == InputKind::GamepadButton {
if ev.x != 0 {
self.buttons |= ev.code;
} else {
self.buttons &= !ev.code;
}
return true;
}
use punktfunk_core::input::gamepad::*;
let stick = ev.x.clamp(i16::MIN as i32, i16::MAX as i32) as i16;
let trigger = ev.x.clamp(0, 255) as u8;
match ev.code {
AXIS_LS_X => self.ls_x = stick,
AXIS_LS_Y => self.ls_y = stick,
AXIS_RS_X => self.rs_x = stick,
AXIS_RS_Y => self.rs_y = stick,
AXIS_LT => self.left_trigger = trigger,
AXIS_RT => self.right_trigger = trigger,
_ => return false,
}
true
}
fn frame(&self, index: usize, active_mask: u16) -> crate::gamestream::gamepad::GamepadFrame {
crate::gamestream::gamepad::GamepadFrame {
index: index as i16,
active_mask,
buttons: self.buttons,
left_trigger: self.left_trigger,
right_trigger: self.right_trigger,
ls_x: self.ls_x,
ls_y: self.ls_y,
rs_x: self.rs_x,
rs_y: self.rs_y,
}
}
}
/// Highest pad index addressable on the wire (`flags` field); the uinput manager caps
/// actual pad creation at its own MAX_PADS.
const MAX_WIRE_PADS: usize = 16;
/// The injector thread: open the session's input backend on first event, then inject.
/// Gamepad kinds route to the session's [`GamepadManager`](crate::inject::gamepad), with
/// force feedback pumped between events and sent back as rumble datagrams.
fn input_thread(rx: std::sync::mpsc::Receiver<InputEvent>, conn: quinn::Connection) {
let mut injector: Option<Box<dyn crate::inject::InputInjector>> = None;
let mut injector_broken = false;
let mut pads = crate::inject::gamepad::GamepadManager::new();
let mut pad_state = [PadState::default(); MAX_WIRE_PADS];
let mut pad_mask = 0u16;
// Rumble is idempotent state on a lossy channel (client-side overflow drops datagrams),
// so re-send the current state of every rumbling-capable pad every 500 ms — a dropped
// transition (including a stop) heals on the next refresh.
let mut rumble_state = [(0u16, 0u16); MAX_WIRE_PADS];
let mut rumble_seen = [false; MAX_WIRE_PADS];
let mut last_refresh = std::time::Instant::now();
loop {
match rx.recv_timeout(std::time::Duration::from_millis(4)) {
Ok(ev) => match ev.kind {
InputKind::GamepadButton | InputKind::GamepadAxis => {
let idx = ev.flags as usize;
if idx >= MAX_WIRE_PADS || !pad_state[idx].apply(&ev) {
continue;
}
pad_mask |= 1 << idx;
let frame = pad_state[idx].frame(idx, pad_mask);
pads.handle(&crate::gamestream::gamepad::GamepadEvent::State(frame));
}
_ => {
if injector.is_none() && !injector_broken {
let backend = crate::inject::default_backend();
match crate::inject::open(backend) {
Ok(i) => {
tracing::info!(?backend, "punktfunk/1 input injector opened");
injector = Some(i);
}
Err(e) => {
// Keep running for gamepads — uinput pads work even when
// the pointer/keyboard backend doesn't.
tracing::error!(error = %format!("{e:#}"), "pointer/keyboard injection unavailable");
injector_broken = true;
}
}
}
if let Some(inj) = injector.as_mut() {
if let Err(e) = inj.inject(&ev) {
tracing::warn!(error = %format!("{e:#}"), "inject failed");
}
}
}
},
Err(std::sync::mpsc::RecvTimeoutError::Timeout) => {}
Err(std::sync::mpsc::RecvTimeoutError::Disconnected) => break,
}
// Service force feedback every iteration (≤4 ms latency; games block on EVIOCSFF).
pads.pump_rumble(|pad, low, high| {
if let Some(s) = rumble_state.get_mut(pad as usize) {
*s = (low, high);
rumble_seen[pad as usize] = true;
}
let d = punktfunk_core::quic::encode_rumble_datagram(pad, low, high);
let _ = conn.send_datagram(d.to_vec().into());
});
if last_refresh.elapsed() >= std::time::Duration::from_millis(500) {
last_refresh = std::time::Instant::now();
for (i, &(low, high)) in rumble_state.iter().enumerate() {
if rumble_seen[i] {
let d = punktfunk_core::quic::encode_rumble_datagram(i as u16, low, high);
let _ = conn.send_datagram(d.to_vec().into());
}
}
}
}
}
/// The audio thread: desktop capture → Opus (48 kHz stereo, 5 ms, CBR — same tuning as the
/// GameStream path) → `AUDIO_MAGIC` datagrams. QUIC already encrypts; no extra layer.
/// The capturer comes from (and returns to) the persistent slot — see [`AudioCapSlot`].
#[cfg(target_os = "linux")]
fn audio_thread(conn: quinn::Connection, stop: Arc<AtomicBool>, audio_cap: AudioCapSlot) {
use crate::audio::{CHANNELS, SAMPLE_RATE};
const FRAME_MS: usize = 5;
const SAMPLES_PER_FRAME: usize = SAMPLE_RATE as usize * FRAME_MS / 1000; // 240
let mut capturer = match audio_cap.lock().unwrap().take() {
Some(mut c) => {
c.drain(); // discard audio captured between sessions
c
}
None => match crate::audio::open_audio_capture() {
Ok(c) => c,
Err(e) => {
tracing::warn!(error = %format!("{e:#}"), "punktfunk/1 audio unavailable — session continues without it");
return;
}
},
};
let mut enc = match opus::Encoder::new(
SAMPLE_RATE,
opus::Channels::Stereo,
opus::Application::LowDelay,
) {
Ok(e) => e,
Err(e) => {
tracing::error!(error = %e, "opus encoder");
*audio_cap.lock().unwrap() = Some(capturer);
return;
}
};
enc.set_bitrate(opus::Bitrate::Bits(128_000)).ok();
enc.set_vbr(false).ok();
let frame_len = SAMPLES_PER_FRAME * CHANNELS;
let mut acc: Vec<f32> = Vec::with_capacity(frame_len * 4);
let mut opus_buf = vec![0u8; 1500];
let mut seq: u32 = 0;
let mut capture_dead = false;
tracing::info!("punktfunk/1 audio streaming (Opus 48 kHz stereo, 5 ms datagrams)");
'session: while !stop.load(Ordering::SeqCst) {
let chunk = match capturer.next_chunk() {
Ok(c) => c,
Err(e) => {
tracing::warn!(error = %format!("{e:#}"), "audio capture ended");
capture_dead = true;
break;
}
};
acc.extend_from_slice(&chunk);
while acc.len() >= frame_len {
let frame: Vec<f32> = acc.drain(..frame_len).collect();
let pts_ns = now_ns();
match enc.encode_float(&frame, &mut opus_buf) {
Ok(n) => {
let d =
punktfunk_core::quic::encode_audio_datagram(seq, pts_ns, &opus_buf[..n]);
if conn.send_datagram(d.into()).is_err() {
break 'session; // connection gone
}
seq = seq.wrapping_add(1);
}
Err(e) => tracing::warn!(error = %e, "opus encode"),
}
}
}
// Return the live capturer for the next session; a dead one is dropped so the next
// session reopens fresh.
if !capture_dead {
*audio_cap.lock().unwrap() = Some(capturer);
}
}
/// Stub — punktfunk/1 audio needs Linux (PipeWire capture + libopus); non-Linux dev builds
/// run sessions without it, same as when the capturer fails to open.
#[cfg(not(target_os = "linux"))]
fn audio_thread(_conn: quinn::Connection, _stop: Arc<AtomicBool>, _audio_cap: AudioCapSlot) {
tracing::warn!(
"punktfunk/1 audio requires Linux (PipeWire + libopus) — session continues without it"
);
}
fn synthetic_stream(session: &mut Session, frames: u32, stop: &AtomicBool) -> Result<()> {
let interval = std::time::Duration::from_millis(1000 / 60);
for idx in 0..frames {
if stop.load(Ordering::SeqCst) {
break;
}
let data = test_frame(idx, 64 * 1024);
session
.submit_frame(&data, now_ns(), (FLAG_PIC | FLAG_SOF) as u32)
.map_err(|e| anyhow!("submit_frame: {e:?}"))?;
std::thread::sleep(interval);
}
tracing::info!(frames, "synthetic stream complete");
Ok(())
}
/// Real capture→encode→punktfunk/1: a native virtual output at the client's mode, NVENC AUs
/// stamped with the capture wall clock (the client derives per-frame pipeline latency).
fn virtual_stream(
session: &mut Session,
mode: punktfunk_core::Mode,
seconds: u32,
stop: &AtomicBool,
) -> Result<()> {
let compositor = crate::vdisplay::detect().context("detect compositor")?;
tracing::info!(?compositor, ?mode, "punktfunk/1 virtual display");
let mut vd = crate::vdisplay::open(compositor)?;
let vout = vd.create(mode).context("create virtual output")?;
let mut capturer =
crate::capture::capture_virtual_output(vout).context("capture virtual output")?;
capturer.set_active(true);
let mut frame = capturer.next_frame().context("first frame")?;
let mut enc = crate::encode::open_video(
crate::encode::Codec::H265,
frame.format,
frame.width,
frame.height,
mode.refresh_hz,
20_000_000,
frame.is_cuda(),
)
.context("open NVENC")?;
let interval = std::time::Duration::from_secs_f64(1.0 / mode.refresh_hz.max(1) as f64);
let deadline = std::time::Instant::now() + std::time::Duration::from_secs(seconds as u64);
let mut next = std::time::Instant::now();
let mut sent: u64 = 0;
while !stop.load(Ordering::SeqCst) && std::time::Instant::now() < deadline {
if let Some(f) = capturer.try_latest().context("capture")? {
frame = f;
}
let capture_ns = now_ns();
enc.submit(&frame).context("encoder submit")?;
while let Some(au) = enc.poll().context("encoder poll")? {
let flags = if au.keyframe {
(FLAG_PIC | FLAG_SOF) as u32
} else {
FLAG_PIC as u32
};
session
.submit_frame(&au.data, capture_ns, flags)
.map_err(|e| anyhow!("submit_frame: {e:?}"))?;
sent += 1;
}
next += interval;
match next.checked_duration_since(std::time::Instant::now()) {
Some(d) => std::thread::sleep(d),
None => next = std::time::Instant::now(),
}
}
tracing::info!(sent, "punktfunk/1 virtual stream complete");
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
fn gp(kind: InputKind, code: u32, x: i32, pad: u32) -> InputEvent {
InputEvent {
kind,
_pad: [0; 3],
code,
x,
y: 0,
flags: pad,
}
}
/// Incremental wire events accumulate into the full pad frame the virtual xpad applies.
#[test]
fn gamepad_accumulator() {
use punktfunk_core::input::gamepad::*;
let mut s = PadState::default();
assert!(s.apply(&gp(InputKind::GamepadButton, BTN_A, 1, 0)));
assert!(s.apply(&gp(InputKind::GamepadButton, BTN_LB, 1, 0)));
assert!(s.apply(&gp(InputKind::GamepadAxis, AXIS_LS_X, -32768, 0)));
assert!(s.apply(&gp(InputKind::GamepadAxis, AXIS_RT, 255, 0)));
let f = s.frame(2, 0b0100);
assert_eq!(f.buttons, BTN_A | BTN_LB);
assert_eq!((f.ls_x, f.right_trigger), (-32768, 255));
assert_eq!((f.index, f.active_mask), (2, 0b0100));
// Release folds out; axis values clamp; unknown axis ids are rejected.
assert!(s.apply(&gp(InputKind::GamepadButton, BTN_A, 0, 0)));
assert_eq!(s.frame(0, 1).buttons, BTN_LB);
assert!(s.apply(&gp(InputKind::GamepadAxis, AXIS_LT, 9_999, 0)));
assert_eq!(s.left_trigger, 255);
assert!(!s.apply(&gp(InputKind::GamepadAxis, 42, 1, 0)));
// The punktfunk/1 button bits are the GameStream bits — one wire contract end to end.
assert_eq!(BTN_A, crate::gamestream::gamepad::BTN_A);
assert_eq!(BTN_GUIDE, crate::gamestream::gamepad::BTN_GUIDE);
assert_eq!(BTN_DPAD_UP, crate::gamestream::gamepad::BTN_DPAD_UP);
}
/// Pull and byte-verify `count` synthetic frames through the C ABI connection.
unsafe fn pull_verified(conn: *mut punktfunk_core::abi::PunktfunkConnection, count: u32) {
use punktfunk_core::error::PunktfunkStatus;
let mut got = 0u32;
let mut frame = unsafe { std::mem::zeroed() };
while got < count {
match unsafe {
punktfunk_core::abi::punktfunk_connection_next_au(conn, &mut frame, 2000)
} {
PunktfunkStatus::Ok => {
let data = unsafe { std::slice::from_raw_parts(frame.data, frame.len) };
let idx = u32::from_le_bytes(data[0..4].try_into().unwrap());
assert_eq!(
data,
&test_frame(idx, data.len())[..],
"frame {idx} content"
);
got += 1;
}
PunktfunkStatus::NoFrame => continue,
other => panic!("next_au: {other:?}"),
}
}
}
/// End-to-end through the C ABI — the exact contract platform clients (Swift) link:
/// in-process punktfunk/1 host, `punktfunk_connect` (TOFU → pinned reconnect) →
/// `punktfunk_connection_next_au` pulls verified frames → `punktfunk_connection_send_input`
/// enqueues → `punktfunk_connection_close`. Three sequential sessions against ONE host
/// process prove the persistent listener, and a wrong pin is rejected.
#[test]
fn c_abi_connection_roundtrip() {
use punktfunk_core::abi::{
punktfunk_connect, punktfunk_connection_close, punktfunk_connection_mode,
punktfunk_connection_send_input,
};
use punktfunk_core::error::PunktfunkStatus;
let host = std::thread::spawn(|| {
run(M3Options {
port: 19777,
source: M3Source::Synthetic,
seconds: 0,
frames: 25,
max_sessions: 3,
})
});
std::thread::sleep(std::time::Duration::from_millis(500));
// Session 1: TOFU (no pin) — observe the host fingerprint.
let addr = std::ffi::CString::new("127.0.0.1").unwrap();
let mut observed = [0u8; 32];
let conn = unsafe {
punktfunk_connect(
addr.as_ptr(),
19777,
1280,
720,
60,
std::ptr::null(),
observed.as_mut_ptr(),
10_000,
)
};
assert!(!conn.is_null(), "punktfunk_connect failed");
assert_ne!(observed, [0u8; 32], "fingerprint not reported");
let (mut w, mut h, mut hz) = (0u32, 0u32, 0u32);
assert_eq!(
unsafe { punktfunk_connection_mode(conn, &mut w, &mut h, &mut hz) },
PunktfunkStatus::Ok
);
assert_eq!((w, h, hz), (1280, 720, 60));
unsafe { pull_verified(conn, 25) };
let ev = punktfunk_core::input::InputEvent {
kind: punktfunk_core::input::InputKind::MouseMove,
_pad: [0; 3],
code: 0,
x: 1,
y: 2,
flags: 0,
};
assert_eq!(
unsafe { punktfunk_connection_send_input(conn, &ev) },
PunktfunkStatus::Ok
);
unsafe { punktfunk_connection_close(conn) };
// Session 2 (same host process — the listener survived): pin the fingerprint.
let conn2 = unsafe {
punktfunk_connect(
addr.as_ptr(),
19777,
1280,
720,
60,
observed.as_ptr(),
std::ptr::null_mut(),
10_000,
)
};
assert!(!conn2.is_null(), "pinned reconnect failed");
unsafe { pull_verified(conn2, 25) };
unsafe { punktfunk_connection_close(conn2) };
// Session 3: a wrong pin must be rejected by the handshake.
let bad = [0xAAu8; 32];
let conn3 = unsafe {
punktfunk_connect(
addr.as_ptr(),
19777,
1280,
720,
60,
bad.as_ptr(),
std::ptr::null_mut(),
10_000,
)
};
assert!(conn3.is_null(), "wrong pin must fail the handshake");
// The host saw the rejected handshake attempt as session 3? No — a TLS-failed
// handshake never yields a connection, so accept() is still waiting. Connect once
// more (TOFU) to complete the host's third session and let it exit.
let conn4 = unsafe {
punktfunk_connect(
addr.as_ptr(),
19777,
1280,
720,
60,
std::ptr::null(),
std::ptr::null_mut(),
10_000,
)
};
assert!(!conn4.is_null());
unsafe { pull_verified(conn4, 25) };
unsafe { punktfunk_connection_close(conn4) };
host.join().unwrap().unwrap();
}
}
+340
View File
@@ -0,0 +1,340 @@
//! `punktfunk-host` — the Linux streaming host (plan §2, §6, §7).
//!
//! Creates a client-sized virtual display, captures it via PipeWire, encodes with
//! VAAPI/NVENC, and hands encoded access units to `punktfunk_core` for FEC + packetization +
//! pacing + send. Input flows back via libei/uinput. The platform backends are
//! `#[cfg(target_os = "linux")]`; the crate compiles everywhere so the workspace builds
//! on non-Linux dev machines — it just can't run the pipeline there.
//!
//! Status: M0. The `m0` subcommand runs the capture→encode→file pipeline spike and feeds
//! the encoded AUs through a `punktfunk_core` loopback. M2 wires the full P1 host that a stock
//! Moonlight client connects to.
// Scaffold: trait methods and config paths are defined ahead of their backends.
#![allow(dead_code)]
mod audio;
mod capture;
mod encode;
mod gamestream;
mod inject;
mod m0;
mod m3;
mod mgmt;
mod pipeline;
mod pwinit;
mod vdisplay;
#[cfg(target_os = "linux")]
mod zerocopy;
use anyhow::{bail, Result};
use encode::Codec;
use m0::{Options, Source};
use std::path::PathBuf;
fn main() {
// Logs go to stderr so stdout stays machine-readable (`punktfunk-host openapi > spec.json`).
tracing_subscriber::fmt()
.with_env_filter(
tracing_subscriber::EnvFilter::try_from_default_env().unwrap_or_else(|_| "info".into()),
)
.with_writer(std::io::stderr)
.init();
if let Err(e) = real_main() {
tracing::error!("{e:#}");
std::process::exit(1);
}
}
fn real_main() -> Result<()> {
tracing::info!(
"punktfunk-host (punktfunk_core ABI v{})",
punktfunk_core::ABI_VERSION
);
let args: Vec<String> = std::env::args().skip(1).collect();
match args.first().map(String::as_str) {
// M2 GameStream host control plane (P1.1: mDNS + serverinfo) + management API.
Some("serve") => gamestream::serve(parse_serve(&args[1..])?),
// Print the management API's OpenAPI document (for client codegen).
Some("openapi") => {
print!("{}", mgmt::openapi_json());
Ok(())
}
// Standalone input-injection smoke test (no client needed): open the session's input
// backend and inject a scripted mouse/keyboard pattern. Watch a focused app / `wev`.
Some("input-test") => input_test(),
// Zero-copy FFI/GPU probe: init the EGL importer + CUDA context (no capture needed).
#[cfg(target_os = "linux")]
Some("zerocopy-probe") => zerocopy::probe(),
// M0 pipeline spike.
Some("m0") => m0::run(parse_m0(&args[1..])?),
// M3: native punktfunk/1 host (QUIC control plane + UDP data plane).
Some("m3-host") => {
let get = |flag: &str| {
args.iter()
.skip_while(|a| *a != flag)
.nth(1)
.map(String::as_str)
};
let source = match get("--source") {
Some("virtual") => m3::M3Source::Virtual,
_ => m3::M3Source::Synthetic,
};
m3::run(m3::M3Options {
port: get("--port").and_then(|s| s.parse().ok()).unwrap_or(9777),
source,
seconds: get("--seconds").and_then(|s| s.parse().ok()).unwrap_or(30),
frames: get("--frames").and_then(|s| s.parse().ok()).unwrap_or(300),
max_sessions: get("--max-sessions")
.and_then(|s| s.parse().ok())
.unwrap_or(0),
})
}
Some("-h") | Some("--help") | Some("help") | None => {
print_usage();
Ok(())
}
// Bare flags (no subcommand) default to the m0 spike for back-compat.
Some(_) => m0::run(parse_m0(&args)?),
}
}
/// Inject a scripted mouse + keyboard pattern through the session's input backend (libei on
/// KWin/GNOME, wlr on Sway). Lets us validate input injection without a Moonlight client.
#[cfg(target_os = "linux")]
fn input_test() -> Result<()> {
use punktfunk_core::input::{InputEvent, InputKind};
use std::time::Duration;
let backend = inject::default_backend();
tracing::info!(?backend, "input-test: opening injector");
let mut inj = inject::open(backend)?;
// An async backend (libei) needs a moment to establish its portal/EIS session + device
// resume; events injected before then are dropped.
std::thread::sleep(Duration::from_secs(4));
let ev = |kind, code, x, y| InputEvent {
kind,
_pad: [0; 3],
code,
x,
y,
flags: 0,
};
tracing::info!(
"input-test: injecting a mouse square + 'A'/click taps for ~8s (watch wev / focused app)"
);
for i in 0..160u32 {
let (dx, dy) = match (i / 10) % 4 {
0 => (12, 0),
1 => (0, 12),
2 => (-12, 0),
_ => (0, -12),
};
if let Err(e) = inj.inject(&ev(InputKind::MouseMove, 0, dx, dy)) {
tracing::warn!(error = %format!("{e:#}"), "input-test: inject failed");
}
if i % 20 == 0 {
let _ = inj.inject(&ev(InputKind::KeyDown, 0x41, 0, 0)); // 'A'
let _ = inj.inject(&ev(InputKind::KeyUp, 0x41, 0, 0));
let _ = inj.inject(&ev(InputKind::MouseButtonDown, 1, 0, 0)); // left click
let _ = inj.inject(&ev(InputKind::MouseButtonUp, 1, 0, 0));
}
std::thread::sleep(Duration::from_millis(50));
}
tracing::info!("input-test: done");
Ok(())
}
#[cfg(not(target_os = "linux"))]
fn input_test() -> Result<()> {
bail!("input-test requires Linux")
}
/// `serve` options — all about the management API; the GameStream ports are protocol-fixed.
fn parse_serve(args: &[String]) -> Result<mgmt::Options> {
let mut opts = mgmt::Options::default();
let mut i = 0;
while i < args.len() {
let arg = args[i].as_str();
let mut next = || {
i += 1;
args.get(i)
.cloned()
.ok_or_else(|| anyhow::anyhow!("missing value for {arg}"))
};
match arg {
"--mgmt-bind" => {
opts.bind = next()?
.parse()
.map_err(|_| anyhow::anyhow!("bad --mgmt-bind (want IP:PORT)"))?
}
"--mgmt-token" => {
let token = next()?;
// An empty token would satisfy the non-loopback "token required" guard
// while authenticating nobody (or, worse, everybody) — refuse it loudly
// rather than letting `--mgmt-token "$UNSET_VAR"` ship a dead credential.
if token.trim().is_empty() {
bail!("--mgmt-token must not be empty");
}
opts.token = Some(token);
}
"-h" | "--help" => {
print_usage();
std::process::exit(0);
}
other => bail!("unknown argument '{other}' (try --help)"),
}
i += 1;
}
// Flag wins over the environment so a unit file can set a default and a shell override it.
if opts.token.is_none() {
opts.token = std::env::var("PUNKTFUNK_MGMT_TOKEN")
.ok()
.filter(|t| !t.is_empty());
}
Ok(opts)
}
fn parse_m0(args: &[String]) -> Result<Options> {
let mut source = Source::Portal;
let mut width = 1920u32;
let mut height = 1080u32;
let mut fps = 60u32;
let mut seconds = 5u32;
let mut codec = Codec::H265;
let mut bitrate_mbps = 20u64;
let mut out: Option<PathBuf> = None;
let mut loopback = true;
let mut i = 0;
while i < args.len() {
let arg = args[i].as_str();
let mut next = || {
i += 1;
args.get(i)
.cloned()
.ok_or_else(|| anyhow::anyhow!("missing value for {arg}"))
};
match arg {
"--source" => {
source = match next()?.as_str() {
"synthetic" => Source::Synthetic,
"portal" => Source::Portal,
"kwin-virtual" => Source::KwinVirtual,
other => {
bail!("unknown --source '{other}' (synthetic|portal|kwin-virtual)")
}
}
}
"--width" => {
width = next()?
.parse()
.map_err(|_| anyhow::anyhow!("bad --width"))?
}
"--height" => {
height = next()?
.parse()
.map_err(|_| anyhow::anyhow!("bad --height"))?
}
"--fps" => fps = next()?.parse().map_err(|_| anyhow::anyhow!("bad --fps"))?,
"--seconds" => {
seconds = next()?
.parse()
.map_err(|_| anyhow::anyhow!("bad --seconds"))?
}
"--codec" => {
codec = match next()?.as_str() {
"h264" => Codec::H264,
"h265" | "hevc" => Codec::H265,
"av1" => Codec::Av1,
other => bail!("unknown --codec '{other}' (h264|h265|av1)"),
}
}
"--bitrate" => {
bitrate_mbps = next()?
.parse()
.map_err(|_| anyhow::anyhow!("bad --bitrate (Mbps)"))?
}
"--out" => out = Some(PathBuf::from(next()?)),
"--no-loopback" => loopback = false,
"-h" | "--help" => {
print_usage();
std::process::exit(0);
}
other => bail!("unknown argument '{other}' (try --help)"),
}
i += 1;
}
if fps == 0 || width == 0 || height == 0 || seconds == 0 {
bail!("--fps/--width/--height/--seconds must be > 0");
}
let out = out.unwrap_or_else(|| {
let ext = match codec {
Codec::H264 => "h264",
Codec::H265 => "h265",
Codec::Av1 => "obu",
};
PathBuf::from(format!("/tmp/punktfunk-m0.{ext}"))
});
Ok(Options {
source,
width,
height,
fps,
seconds,
codec,
bitrate_bps: bitrate_mbps.saturating_mul(1_000_000),
out,
loopback,
})
}
fn print_usage() {
eprintln!(
"punktfunk-host — Linux streaming host
USAGE:
punktfunk-host serve [OPTIONS] GameStream host control plane (M2: mDNS + serverinfo …)
+ the management REST API
punktfunk-host openapi print the management API's OpenAPI document (codegen)
punktfunk-host m3-host [OPTIONS] native punktfunk/1 host (QUIC control plane + UDP data plane)
punktfunk-host m0 [OPTIONS] M0 capture→encode→file pipeline spike
SERVE OPTIONS:
--mgmt-bind <IP:PORT> management API address (default: 127.0.0.1:47990)
--mgmt-token <TOKEN> bearer token for the management API (or PUNKTFUNK_MGMT_TOKEN);
required when --mgmt-bind is not loopback
M3-HOST OPTIONS:
--port <N> QUIC listen port (default: 9777)
--source <synthetic|virtual> test frames, or virtual display + NVENC (default: synthetic)
--seconds <N> per-session stream duration, virtual source (default: 30)
--frames <N> per-session frame count, synthetic source (default: 300)
--max-sessions <N> exit after N sessions; 0 = serve forever (default: 0)
M0 OPTIONS:
--source <synthetic|portal|kwin-virtual>
frame source (default: portal). 'kwin-virtual' creates a
KWin virtual output at --width x --height and captures it
--seconds <N> capture duration in seconds (default: 5)
--fps <N> target frame rate (default: 60)
--codec <h264|h265|av1> NVENC codec (default: h265)
--bitrate <MBPS> target bitrate in Mbps (default: 20)
--width <W> --height <H> synthetic source size (default: 1920x1080)
--out <PATH> raw Annex-B output (default: /tmp/punktfunk-m0.<ext>)
--no-loopback skip the punktfunk_core round-trip verification
-h, --help this help
NOTES:
'portal' needs headless Sway + xdg-desktop-portal-wlr running in this session
(see docs/linux-setup.md). 'synthetic' needs no capture session and always runs.
Encoded AUs are written to a playable file AND (unless --no-loopback) fed through a
punktfunk_core host→client loopback that reassembles and byte-verifies each one."
);
}
+979
View File
@@ -0,0 +1,979 @@
//! Management REST API (plan §4) — the control-plane surface a control pane / CLI talks
//! to: host identity + capabilities, runtime status, paired-client management, the pairing
//! PIN flow, and session control. Control plane only — `tokio`/`axum` are permitted here;
//! the per-frame pipeline never touches this module.
//!
//! The API is versioned under `/api/v1` and described by an OpenAPI 3.1 document generated
//! at compile time with `utoipa` — `punktfunk-host openapi` prints it for client codegen, the
//! running server serves it at `/api/v1/openapi.json` plus interactive docs at `/api/docs`,
//! and a copy is checked in at `docs/api/openapi.json` (a test fails if it drifts, like the
//! cbindgen header).
//!
//! Security: binds loopback by default. A bearer token (`--mgmt-token` / `PUNKTFUNK_MGMT_TOKEN`)
//! is enforced on every `/api/v1` route except `/api/v1/health`, and is mandatory for
//! non-loopback binds. The OpenAPI document and docs UI are served unauthenticated (the
//! spec is public knowledge — it lives in this repo).
use crate::encode::Codec;
use crate::gamestream::{
AppState, APP_VERSION, AUDIO_PORT, CONTROL_PORT, GFE_VERSION, RTSP_PORT, VIDEO_PORT,
};
use anyhow::{bail, Context, Result};
use axum::{
extract::{Path, Request, State},
http::{header, StatusCode},
middleware::{self, Next},
response::{IntoResponse, Response},
routing::get,
Json, Router,
};
use serde::{Deserialize, Serialize};
use sha2::{Digest, Sha256};
use std::net::SocketAddr;
use std::sync::atomic::Ordering;
use std::sync::Arc;
use utoipa::{Modify, OpenApi, ToSchema};
use utoipa_axum::{router::OpenApiRouter, routes};
use utoipa_scalar::{Scalar, Servable};
/// Default management port — adjacent to the GameStream block (47984…48010), and the same
/// number Sunshine users already associate with "the config UI".
pub const DEFAULT_PORT: u16 = 47990;
/// Management server options (CLI: `serve --mgmt-bind ADDR --mgmt-token TOKEN`).
#[derive(Clone, Debug)]
pub struct Options {
pub bind: SocketAddr,
/// Bearer token required on `/api/v1` (except `/health`). `None` ⇒ unauthenticated,
/// which [`run`] only permits on loopback binds.
pub token: Option<String>,
}
impl Default for Options {
fn default() -> Self {
Options {
bind: SocketAddr::from(([127, 0, 0, 1], DEFAULT_PORT)),
token: None,
}
}
}
/// Axum state for the management routes: the shared control-plane state + auth config.
struct MgmtState {
app: Arc<AppState>,
token: Option<String>,
/// The port we serve on, echoed in [`PortMap`] so a client can persist a full endpoint map.
port: u16,
}
/// Run the management API server (control plane; spawned alongside the nvhttp servers).
pub async fn run(state: Arc<AppState>, opts: Options) -> Result<()> {
// A blank token is no token: it must neither satisfy the non-loopback guard below nor
// become a credential an empty `Authorization: Bearer ` header would match.
let token = opts.token.filter(|t| !t.trim().is_empty());
if token.is_none() && !opts.bind.ip().is_loopback() {
bail!(
"management API bind {} is not loopback — set --mgmt-token (or PUNKTFUNK_MGMT_TOKEN) \
to expose it beyond this machine",
opts.bind
);
}
tracing::info!(
addr = %opts.bind,
auth = if token.is_some() { "bearer" } else { "none (loopback)" },
"management API listening (docs at /api/docs, spec at /api/v1/openapi.json)"
);
let app = app(state, token, opts.bind.port());
axum_server::bind(opts.bind)
.serve(app.into_make_service())
.await
.context("management API server")
}
/// Compose the full management router (also used directly by the handler tests).
fn app(state: Arc<AppState>, token: Option<String>, port: u16) -> Router {
let shared = Arc::new(MgmtState {
app: state,
token,
port,
});
let (api_routes, api) = api_router_parts();
api_routes
.route_layer(middleware::from_fn_with_state(shared.clone(), require_auth))
.with_state(shared)
.merge(Scalar::with_url("/api/docs", api.clone()))
.route(
"/api/v1/openapi.json",
get(move || {
let spec = api.clone();
async move { Json(spec) }
}),
)
}
/// The versioned API routes + the OpenAPI document collected from them. Single source of
/// truth for both the live server and the `openapi` subcommand.
fn api_router_parts() -> (Router<Arc<MgmtState>>, utoipa::openapi::OpenApi) {
OpenApiRouter::with_openapi(ApiDoc::openapi())
.nest(
"/api/v1",
OpenApiRouter::new()
.routes(routes!(get_health))
.routes(routes!(get_host_info))
.routes(routes!(get_status))
.routes(routes!(list_paired_clients))
.routes(routes!(unpair_client))
.routes(routes!(get_pairing_status))
.routes(routes!(submit_pairing_pin))
.routes(routes!(stop_session))
.routes(routes!(request_idr)),
)
.split_for_parts()
}
/// The OpenAPI document as pretty JSON — what `punktfunk-host openapi` prints and what is
/// checked in at `docs/api/openapi.json` for client codegen.
pub fn openapi_json() -> String {
let (_, api) = api_router_parts();
let mut json = api.to_pretty_json().expect("serialize OpenAPI document");
json.push('\n');
json
}
#[derive(OpenApi)]
#[openapi(
info(
title = "punktfunk management API",
description = "Control-plane API for managing a punktfunk streaming host: host \
capabilities, runtime status, paired clients, the pairing PIN flow, \
and session control. Authentication: HTTP bearer token, enforced on \
every route except `/api/v1/health` when the host is started with a \
management token (mandatory for non-loopback binds)."
),
modifiers(&SecurityAddon),
tags(
(name = "host", description = "Host identity, capabilities, and liveness"),
(name = "clients", description = "Paired Moonlight client management"),
(name = "pairing", description = "Pairing PIN delivery (the out-of-band half of the GameStream pairing handshake)"),
(name = "session", description = "Active streaming session control"),
)
)]
struct ApiDoc;
/// Registers the `bearerAuth` scheme and applies it globally (utoipa has no first-class
/// "all operations" shorthand, hence a modifier).
struct SecurityAddon;
impl Modify for SecurityAddon {
fn modify(&self, openapi: &mut utoipa::openapi::OpenApi) {
use utoipa::openapi::security::{Http, HttpAuthScheme, SecurityScheme};
openapi
.components
.get_or_insert_with(Default::default)
.add_security_scheme(
"bearerAuth",
SecurityScheme::Http(Http::new(HttpAuthScheme::Bearer)),
);
openapi.security = Some(vec![utoipa::openapi::security::SecurityRequirement::new(
"bearerAuth",
Vec::<String>::new(),
)]);
}
}
// ---------------------------------------------------------------------------------------
// Schemas
// ---------------------------------------------------------------------------------------
/// Liveness + version probe.
#[derive(Serialize, ToSchema)]
struct Health {
/// Always `"ok"` when the host responds.
#[schema(example = "ok")]
status: String,
/// `punktfunk-host` crate version.
version: String,
/// `punktfunk-core` C ABI version.
abi_version: u32,
}
/// Host identity and advertised capabilities (static for the life of the process).
#[derive(Serialize, ToSchema)]
struct HostInfo {
hostname: String,
/// Stable per-host id (persisted across restarts), matched on pairing.
uniqueid: String,
/// Best-effort primary LAN IP.
local_ip: String,
/// `punktfunk-host` crate version.
version: String,
/// `punktfunk-core` C ABI version.
abi_version: u32,
/// GameStream host version advertised to Moonlight clients.
app_version: String,
/// GFE version advertised to Moonlight clients.
gfe_version: String,
/// Codecs the host can encode (NVENC).
codecs: Vec<ApiCodec>,
ports: PortMap,
}
/// Every port a client integration may need (Moonlight derives the stream ports from the
/// HTTP base; a control pane should not have to).
#[derive(Serialize, ToSchema)]
struct PortMap {
/// This management API.
mgmt: u16,
/// nvhttp plain HTTP (serverinfo, pairing).
http: u16,
/// nvhttp mutual-TLS HTTPS (post-pairing).
https: u16,
rtsp: u16,
video: u16,
control: u16,
audio: u16,
}
/// Video codec identifier.
#[derive(Clone, Copy, Serialize, Deserialize, ToSchema, PartialEq, Eq, Debug)]
#[serde(rename_all = "lowercase")]
enum ApiCodec {
H264,
H265,
Av1,
}
impl From<Codec> for ApiCodec {
fn from(c: Codec) -> Self {
match c {
Codec::H264 => ApiCodec::H264,
Codec::H265 => ApiCodec::H265,
Codec::Av1 => ApiCodec::Av1,
}
}
}
/// Live host status (changes as clients launch/end sessions).
#[derive(Serialize, ToSchema)]
struct RuntimeStatus {
/// True while the video stream thread is running.
video_streaming: bool,
/// True while the audio stream thread is running.
audio_streaming: bool,
/// True while a pairing handshake is parked waiting for the user's PIN
/// (submit it via `POST /api/v1/pair/pin`).
pin_pending: bool,
/// Number of pinned (paired) client certificates.
paired_clients: u32,
/// The active launch session (set by Moonlight's `/launch`, cleared on cancel/stop).
session: Option<SessionInfo>,
/// The RTSP-negotiated stream parameters (present once a client has completed ANNOUNCE).
stream: Option<StreamInfo>,
}
/// Client-requested launch parameters (key material is never exposed here).
#[derive(Serialize, ToSchema)]
struct SessionInfo {
width: u32,
height: u32,
fps: u32,
}
/// RTSP-negotiated stream parameters.
#[derive(Serialize, ToSchema)]
struct StreamInfo {
width: u32,
height: u32,
fps: u32,
bitrate_kbps: u32,
/// Video payload size per packet (bytes).
packet_size: u32,
/// Client's parity floor per FEC block (`minRequiredFecPackets`).
min_fec: u8,
codec: ApiCodec,
}
/// A paired (certificate-pinned) Moonlight client.
#[derive(Serialize, ToSchema)]
struct PairedClient {
/// Lowercase hex SHA-256 of the client certificate DER — the client's stable id here.
#[schema(example = "9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08")]
fingerprint: String,
/// Certificate subject (e.g. `CN=NVIDIA GameStream Client`), if the DER parses.
subject: Option<String>,
/// Certificate validity start (unix seconds).
not_before_unix: Option<i64>,
/// Certificate validity end (unix seconds).
not_after_unix: Option<i64>,
}
/// Pairing-flow status.
#[derive(Serialize, ToSchema)]
struct PairingStatus {
/// True while a pairing handshake is parked waiting for the user's PIN.
pin_pending: bool,
}
/// The PIN Moonlight displays during pairing.
#[derive(Deserialize, ToSchema)]
struct SubmitPin {
/// 116 ASCII digits (Moonlight shows 4).
#[schema(example = "1234")]
pin: String,
}
/// Error envelope for every non-2xx response.
#[derive(Serialize, Deserialize, ToSchema)]
struct ApiError {
error: String,
}
fn api_error(status: StatusCode, message: &str) -> Response {
(
status,
Json(ApiError {
error: message.to_string(),
}),
)
.into_response()
}
/// `axum::Json` whose rejections (bad JSON → 400/422, wrong content-type → 415) are
/// rewrapped in the [`ApiError`] envelope, keeping "every non-2xx body is `ApiError`" true.
struct ApiJson<T>(T);
impl<S, T> axum::extract::FromRequest<S> for ApiJson<T>
where
Json<T>: axum::extract::FromRequest<S, Rejection = axum::extract::rejection::JsonRejection>,
S: Send + Sync,
{
type Rejection = Response;
async fn from_request(req: Request, state: &S) -> Result<Self, Self::Rejection> {
match Json::<T>::from_request(req, state).await {
Ok(Json(value)) => Ok(ApiJson(value)),
Err(rejection) => Err(api_error(rejection.status(), &rejection.body_text())),
}
}
}
// ---------------------------------------------------------------------------------------
// Auth
// ---------------------------------------------------------------------------------------
/// Bearer-token gate on the `/api/v1` routes. No token configured ⇒ open (loopback-only,
/// enforced in [`run`]); `/api/v1/health` stays open for monitoring probes either way.
async fn require_auth(State(st): State<Arc<MgmtState>>, req: Request, next: Next) -> Response {
let Some(expected) = st.token.as_deref() else {
return next.run(req).await;
};
if req.uri().path() == "/api/v1/health" {
return next.run(req).await;
}
let presented = req
.headers()
.get(header::AUTHORIZATION)
.and_then(|v| v.to_str().ok())
.and_then(|v| v.strip_prefix("Bearer "));
match presented {
Some(token) if token_eq(token, expected) => next.run(req).await,
_ => api_error(StatusCode::UNAUTHORIZED, "missing or invalid bearer token"),
}
}
/// Compare SHA-256 digests instead of the strings — constant-time with respect to the
/// secret without pulling in a ct-eq dependency.
fn token_eq(presented: &str, expected: &str) -> bool {
Sha256::digest(presented.as_bytes()) == Sha256::digest(expected.as_bytes())
}
// ---------------------------------------------------------------------------------------
// Handlers
// ---------------------------------------------------------------------------------------
/// Liveness probe
///
/// Always available without authentication.
#[utoipa::path(
get,
path = "/health",
tag = "host",
operation_id = "getHealth",
// Override the document-global bearerAuth: this route is exempt in `require_auth`.
security(()),
responses((status = OK, description = "Host is up", body = Health))
)]
async fn get_health() -> Json<Health> {
Json(Health {
status: "ok".into(),
version: env!("CARGO_PKG_VERSION").into(),
abi_version: punktfunk_core::ABI_VERSION,
})
}
/// Host identity and capabilities
#[utoipa::path(
get,
path = "/host",
tag = "host",
operation_id = "getHostInfo",
responses(
(status = OK, description = "Host identity, versions, codecs, and port map", body = HostInfo),
(status = UNAUTHORIZED, description = "Missing or invalid bearer token", body = ApiError),
)
)]
async fn get_host_info(State(st): State<Arc<MgmtState>>) -> Json<HostInfo> {
let h = &st.app.host;
Json(HostInfo {
hostname: h.hostname.clone(),
uniqueid: h.uniqueid.clone(),
local_ip: h.local_ip.to_string(),
version: env!("CARGO_PKG_VERSION").into(),
abi_version: punktfunk_core::ABI_VERSION,
app_version: APP_VERSION.into(),
gfe_version: GFE_VERSION.into(),
// Everything NVENC encodes here (mirrors SERVER_CODEC_MODE_SUPPORT = 3843).
codecs: vec![ApiCodec::H264, ApiCodec::H265, ApiCodec::Av1],
ports: PortMap {
mgmt: st.port,
http: h.http_port,
https: h.https_port,
rtsp: RTSP_PORT,
video: VIDEO_PORT,
control: CONTROL_PORT,
audio: AUDIO_PORT,
},
})
}
/// Live host status
#[utoipa::path(
get,
path = "/status",
tag = "host",
operation_id = "getStatus",
responses(
(status = OK, description = "Streaming/pairing state and the active session, if any", body = RuntimeStatus),
(status = UNAUTHORIZED, description = "Missing or invalid bearer token", body = ApiError),
)
)]
async fn get_status(State(st): State<Arc<MgmtState>>) -> Json<RuntimeStatus> {
let session = st.app.launch.lock().unwrap().map(|l| SessionInfo {
width: l.width,
height: l.height,
fps: l.fps,
});
let stream = st.app.stream.lock().unwrap().as_ref().map(|c| StreamInfo {
width: c.width,
height: c.height,
fps: c.fps,
bitrate_kbps: c.bitrate_kbps,
packet_size: c.packet_size as u32,
min_fec: c.min_fec,
codec: c.codec.into(),
});
Json(RuntimeStatus {
video_streaming: st.app.streaming.load(Ordering::SeqCst),
audio_streaming: st.app.audio_streaming.load(Ordering::SeqCst),
pin_pending: st.app.pairing.pin.awaiting_pin(),
paired_clients: st.app.paired.lock().unwrap().len() as u32,
session,
stream,
})
}
/// List paired clients
#[utoipa::path(
get,
path = "/clients",
tag = "clients",
operation_id = "listPairedClients",
responses(
(status = OK, description = "All certificate-pinned clients", body = [PairedClient]),
(status = UNAUTHORIZED, description = "Missing or invalid bearer token", body = ApiError),
)
)]
async fn list_paired_clients(State(st): State<Arc<MgmtState>>) -> Json<Vec<PairedClient>> {
let ders = st.app.paired.lock().unwrap().clone();
Json(ders.iter().map(|der| client_info(der)).collect())
}
fn client_info(der: &[u8]) -> PairedClient {
let fingerprint = hex::encode(Sha256::digest(der));
match x509_parser::parse_x509_certificate(der) {
Ok((_, x509)) => PairedClient {
fingerprint,
subject: Some(x509.subject().to_string()),
not_before_unix: Some(x509.validity().not_before.timestamp()),
not_after_unix: Some(x509.validity().not_after.timestamp()),
},
Err(_) => PairedClient {
fingerprint,
subject: None,
not_before_unix: None,
not_after_unix: None,
},
}
}
/// Unpair a client
///
/// Removes the client's certificate from the pairing store. Caveat: the nvhttp TLS layer
/// does not yet reject unlisted certificates (`gamestream/tls.rs` accepts any well-formed
/// client cert — a planned hardening step), so until that lands this removes the client
/// from the listing without severing its ability to reconnect.
#[utoipa::path(
delete,
path = "/clients/{fingerprint}",
tag = "clients",
operation_id = "unpairClient",
params(
("fingerprint" = String, Path,
description = "Hex SHA-256 fingerprint of the client certificate DER (64 chars, case-insensitive)")
),
responses(
(status = NO_CONTENT, description = "Client unpaired"),
(status = BAD_REQUEST, description = "Malformed fingerprint", body = ApiError),
(status = UNAUTHORIZED, description = "Missing or invalid bearer token", body = ApiError),
(status = NOT_FOUND, description = "No paired client with that fingerprint", body = ApiError),
)
)]
async fn unpair_client(
State(st): State<Arc<MgmtState>>,
Path(fingerprint): Path<String>,
) -> Response {
if fingerprint.len() != 64 || !fingerprint.bytes().all(|b| b.is_ascii_hexdigit()) {
return api_error(
StatusCode::BAD_REQUEST,
"fingerprint must be the 64-char hex SHA-256 of the client certificate DER",
);
}
let mut paired = st.app.paired.lock().unwrap();
let before = paired.len();
paired.retain(|der| !hex::encode(Sha256::digest(der)).eq_ignore_ascii_case(&fingerprint));
if paired.len() < before {
tracing::info!(fingerprint, "management API: client unpaired");
StatusCode::NO_CONTENT.into_response()
} else {
api_error(
StatusCode::NOT_FOUND,
"no paired client with that fingerprint",
)
}
}
/// Pairing-flow status
///
/// Poll this to know when to prompt the user for the PIN Moonlight displays.
#[utoipa::path(
get,
path = "/pair",
tag = "pairing",
operation_id = "getPairingStatus",
responses(
(status = OK, description = "Whether a pairing handshake is waiting for a PIN", body = PairingStatus),
(status = UNAUTHORIZED, description = "Missing or invalid bearer token", body = ApiError),
)
)]
async fn get_pairing_status(State(st): State<Arc<MgmtState>>) -> Json<PairingStatus> {
Json(PairingStatus {
pin_pending: st.app.pairing.pin.awaiting_pin(),
})
}
/// Submit the pairing PIN
///
/// Delivers the PIN the Moonlight client is displaying, completing the out-of-band half
/// of the pairing handshake.
#[utoipa::path(
post,
path = "/pair/pin",
tag = "pairing",
operation_id = "submitPairingPin",
request_body = SubmitPin,
responses(
(status = NO_CONTENT, description = "PIN delivered to the waiting handshake"),
(status = BAD_REQUEST, description = "Malformed PIN or unparseable JSON body", body = ApiError),
(status = UNAUTHORIZED, description = "Missing or invalid bearer token", body = ApiError),
(status = CONFLICT, description = "No pairing handshake is waiting for a PIN", body = ApiError),
(status = UNSUPPORTED_MEDIA_TYPE, description = "Body is not application/json", body = ApiError),
(status = UNPROCESSABLE_ENTITY, description = "JSON body does not match the schema", body = ApiError),
)
)]
async fn submit_pairing_pin(
State(st): State<Arc<MgmtState>>,
ApiJson(req): ApiJson<SubmitPin>,
) -> Response {
let pin = req.pin.trim();
if pin.is_empty() || pin.len() > 16 || !pin.bytes().all(|b| b.is_ascii_digit()) {
return api_error(StatusCode::BAD_REQUEST, "pin must be 1-16 ASCII digits");
}
if !st.app.pairing.pin.awaiting_pin() {
// Refusing (rather than parking the PIN) prevents a stale PIN from silently
// satisfying a *future* pairing attempt.
return api_error(
StatusCode::CONFLICT,
"no pairing handshake is waiting for a PIN",
);
}
st.app.pairing.pin.submit(pin.to_string());
StatusCode::NO_CONTENT.into_response()
}
/// Stop the active session
///
/// Kicks the connected client: stops the video/audio stream threads and clears the launch
/// state. Idempotent — succeeds even when nothing is streaming.
#[utoipa::path(
delete,
path = "/session",
tag = "session",
operation_id = "stopSession",
responses(
(status = NO_CONTENT, description = "Session stopped (or none was active)"),
(status = UNAUTHORIZED, description = "Missing or invalid bearer token", body = ApiError),
)
)]
async fn stop_session(State(st): State<Arc<MgmtState>>) -> StatusCode {
let was_streaming = st.app.streaming.swap(false, Ordering::SeqCst);
st.app.audio_streaming.store(false, Ordering::SeqCst);
*st.app.launch.lock().unwrap() = None;
*st.app.stream.lock().unwrap() = None;
tracing::info!(was_streaming, "management API: session stopped");
StatusCode::NO_CONTENT
}
/// Force a keyframe
///
/// Asks the encoder for an IDR frame on the active video stream (what a client requests
/// after unrecoverable loss — exposed for debugging).
#[utoipa::path(
post,
path = "/session/idr",
tag = "session",
operation_id = "requestIdr",
responses(
(status = ACCEPTED, description = "Keyframe requested"),
(status = UNAUTHORIZED, description = "Missing or invalid bearer token", body = ApiError),
(status = CONFLICT, description = "No active video stream", body = ApiError),
)
)]
async fn request_idr(State(st): State<Arc<MgmtState>>) -> Response {
if !st.app.streaming.load(Ordering::SeqCst) {
return api_error(StatusCode::CONFLICT, "no active video stream");
}
st.app.force_idr.store(true, Ordering::SeqCst);
StatusCode::ACCEPTED.into_response()
}
// ---------------------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------------------
#[cfg(test)]
mod tests {
use super::*;
use crate::gamestream::{cert::ServerIdentity, Host, LaunchSession, HTTPS_PORT, HTTP_PORT};
use axum::body::Body;
use http_body_util::BodyExt;
use std::net::{IpAddr, Ipv4Addr};
use tower::ServiceExt;
fn test_state() -> Arc<AppState> {
let host = Host {
hostname: "test-host".into(),
uniqueid: "deadbeef".into(),
local_ip: IpAddr::V4(Ipv4Addr::LOCALHOST),
http_port: HTTP_PORT,
https_port: HTTPS_PORT,
};
let identity = ServerIdentity::ephemeral().expect("ephemeral identity");
Arc::new(AppState::new(host, identity))
}
fn test_app(state: Arc<AppState>, token: Option<&str>) -> Router {
app(state, token.map(String::from), DEFAULT_PORT)
}
async fn send(app: &Router, req: axum::http::Request<Body>) -> (StatusCode, serde_json::Value) {
let resp = app.clone().oneshot(req).await.expect("infallible");
let status = resp.status();
let bytes = resp.into_body().collect().await.unwrap().to_bytes();
let json = if bytes.is_empty() {
serde_json::Value::Null
} else {
serde_json::from_slice(&bytes).unwrap_or(serde_json::Value::Null)
};
(status, json)
}
fn get_req(path: &str) -> axum::http::Request<Body> {
axum::http::Request::get(path).body(Body::empty()).unwrap()
}
#[tokio::test]
async fn health_is_open_and_versioned() {
let app = test_app(test_state(), None);
let (status, body) = send(&app, get_req("/api/v1/health")).await;
assert_eq!(status, StatusCode::OK);
assert_eq!(body["status"], "ok");
assert_eq!(body["abi_version"], punktfunk_core::ABI_VERSION);
}
#[tokio::test]
async fn bearer_token_is_enforced() {
let app = test_app(test_state(), Some("sekrit"));
// No/wrong token → 401 with the error envelope.
let (status, body) = send(&app, get_req("/api/v1/status")).await;
assert_eq!(status, StatusCode::UNAUTHORIZED);
assert!(body["error"].as_str().unwrap().contains("bearer"));
let wrong = axum::http::Request::get("/api/v1/status")
.header("authorization", "Bearer nope")
.body(Body::empty())
.unwrap();
assert_eq!(send(&app, wrong).await.0, StatusCode::UNAUTHORIZED);
// Right token → 200.
let right = axum::http::Request::get("/api/v1/status")
.header("authorization", "Bearer sekrit")
.body(Body::empty())
.unwrap();
assert_eq!(send(&app, right).await.0, StatusCode::OK);
// Health + the spec/docs stay open.
assert_eq!(
send(&app, get_req("/api/v1/health")).await.0,
StatusCode::OK
);
assert_eq!(
send(&app, get_req("/api/v1/openapi.json")).await.0,
StatusCode::OK
);
let docs = app.clone().oneshot(get_req("/api/docs")).await.unwrap();
assert_eq!(docs.status(), StatusCode::OK);
let html = docs.into_body().collect().await.unwrap().to_bytes();
assert!(
html.starts_with(b"<!doctype html>"),
"Scalar UI should serve HTML"
);
}
#[tokio::test]
async fn host_info_reports_identity_and_ports() {
let app = test_app(test_state(), None);
let (status, body) = send(&app, get_req("/api/v1/host")).await;
assert_eq!(status, StatusCode::OK);
assert_eq!(body["hostname"], "test-host");
assert_eq!(body["uniqueid"], "deadbeef");
assert_eq!(body["ports"]["http"], HTTP_PORT);
assert_eq!(body["ports"]["mgmt"], DEFAULT_PORT);
assert_eq!(body["codecs"], serde_json::json!(["h264", "h265", "av1"]));
}
#[tokio::test]
async fn status_reflects_runtime_state() {
let state = test_state();
let app = test_app(state.clone(), None);
let (_, body) = send(&app, get_req("/api/v1/status")).await;
assert_eq!(body["video_streaming"], false);
assert_eq!(body["session"], serde_json::Value::Null);
*state.launch.lock().unwrap() = Some(LaunchSession {
gcm_key: [0; 16],
rikeyid: 1,
width: 2560,
height: 1440,
fps: 120,
appid: 1,
});
state.streaming.store(true, Ordering::SeqCst);
let (_, body) = send(&app, get_req("/api/v1/status")).await;
assert_eq!(body["video_streaming"], true);
assert_eq!(body["session"]["width"], 2560);
assert_eq!(body["session"]["fps"], 120);
// Key material must never appear anywhere in the response.
assert!(!body.to_string().contains("gcm"));
}
#[tokio::test]
async fn paired_clients_list_and_unpair() {
let state = test_state();
let app = test_app(state.clone(), None);
// Pin the host's own cert DER as a stand-in client.
let (_, pem) =
x509_parser::pem::parse_x509_pem(state.identity.cert_pem.as_bytes()).unwrap();
let der = pem.contents.clone();
let fingerprint = hex::encode(Sha256::digest(&der));
state.paired.lock().unwrap().push(der);
let (status, body) = send(&app, get_req("/api/v1/clients")).await;
assert_eq!(status, StatusCode::OK);
assert_eq!(body[0]["fingerprint"], fingerprint);
assert_eq!(body[0]["subject"], "CN=punktfunk");
// Malformed fingerprint → 400.
let bad = axum::http::Request::delete("/api/v1/clients/zz")
.body(Body::empty())
.unwrap();
assert_eq!(send(&app, bad).await.0, StatusCode::BAD_REQUEST);
// Unpair (uppercase hex must match too) → 204, list empties, second delete → 404.
let del = |fp: String| {
axum::http::Request::delete(format!("/api/v1/clients/{fp}"))
.body(Body::empty())
.unwrap()
};
assert_eq!(
send(&app, del(fingerprint.to_uppercase())).await.0,
StatusCode::NO_CONTENT
);
let (_, body) = send(&app, get_req("/api/v1/clients")).await;
assert_eq!(body, serde_json::json!([]));
assert_eq!(send(&app, del(fingerprint)).await.0, StatusCode::NOT_FOUND);
}
#[tokio::test]
async fn submit_pin_validates_and_requires_pending_pairing() {
let app = test_app(test_state(), None);
let post = |body: &str| {
axum::http::Request::post("/api/v1/pair/pin")
.header("content-type", "application/json")
.body(Body::from(body.to_string()))
.unwrap()
};
// Malformed PINs → 400.
assert_eq!(
send(&app, post(r#"{"pin":""}"#)).await.0,
StatusCode::BAD_REQUEST
);
assert_eq!(
send(&app, post(r#"{"pin":"12ab"}"#)).await.0,
StatusCode::BAD_REQUEST
);
// Well-formed but nothing waiting → 409 (a parked stale PIN would poison the
// next pairing attempt).
assert_eq!(
send(&app, post(r#"{"pin":"1234"}"#)).await.0,
StatusCode::CONFLICT
);
// axum's own body rejections must still wear the ApiError envelope (ApiJson).
let (status, body) = send(&app, post("{not json")).await;
assert_eq!(status, StatusCode::BAD_REQUEST);
assert!(body["error"].is_string(), "syntax error: {body}");
let (status, body) = send(&app, post(r#"{"wrong":"shape"}"#)).await;
assert_eq!(status, StatusCode::UNPROCESSABLE_ENTITY);
assert!(body["error"].is_string(), "schema mismatch: {body}");
let no_ct = axum::http::Request::post("/api/v1/pair/pin")
.body(Body::from(r#"{"pin":"1234"}"#))
.unwrap();
let (status, body) = send(&app, no_ct).await;
assert_eq!(status, StatusCode::UNSUPPORTED_MEDIA_TYPE);
assert!(body["error"].is_string(), "media type: {body}");
}
/// A blank token must not satisfy the "non-loopback requires a token" guard.
#[tokio::test]
async fn blank_token_rejected_for_public_bind() {
let opts = Options {
bind: "0.0.0.0:0".parse().unwrap(),
token: Some(" ".into()),
};
let err = run(test_state(), opts).await.unwrap_err();
assert!(err.to_string().contains("not loopback"), "{err}");
}
#[tokio::test]
async fn stop_session_clears_runtime_state() {
let state = test_state();
let app = test_app(state.clone(), None);
state.streaming.store(true, Ordering::SeqCst);
state.audio_streaming.store(true, Ordering::SeqCst);
*state.launch.lock().unwrap() = Some(LaunchSession {
gcm_key: [0; 16],
rikeyid: 0,
width: 1920,
height: 1080,
fps: 60,
appid: 1,
});
let del = axum::http::Request::delete("/api/v1/session")
.body(Body::empty())
.unwrap();
assert_eq!(send(&app, del).await.0, StatusCode::NO_CONTENT);
assert!(!state.streaming.load(Ordering::SeqCst));
assert!(!state.audio_streaming.load(Ordering::SeqCst));
assert!(state.launch.lock().unwrap().is_none());
}
#[tokio::test]
async fn idr_requires_an_active_stream() {
let state = test_state();
let app = test_app(state.clone(), None);
let post = || {
axum::http::Request::post("/api/v1/session/idr")
.body(Body::empty())
.unwrap()
};
assert_eq!(send(&app, post()).await.0, StatusCode::CONFLICT);
state.streaming.store(true, Ordering::SeqCst);
assert_eq!(send(&app, post()).await.0, StatusCode::ACCEPTED);
assert!(state.force_idr.load(Ordering::SeqCst));
}
/// The OpenAPI document lists every route with a unique operationId (codegen relies
/// on both), and the checked-in copy is current.
#[test]
fn openapi_document_is_complete_and_checked_in() {
let json = openapi_json();
let doc: serde_json::Value = serde_json::from_str(&json).unwrap();
let paths = doc["paths"].as_object().unwrap();
for p in [
"/api/v1/health",
"/api/v1/host",
"/api/v1/status",
"/api/v1/clients",
"/api/v1/clients/{fingerprint}",
"/api/v1/pair",
"/api/v1/pair/pin",
"/api/v1/session",
"/api/v1/session/idr",
] {
assert!(paths.contains_key(p), "spec is missing {p}");
}
let mut op_ids: Vec<&str> = paths
.values()
.flat_map(|ops| ops.as_object().unwrap().values())
.filter_map(|op| op["operationId"].as_str())
.collect();
let total = op_ids.len();
op_ids.sort_unstable();
op_ids.dedup();
assert_eq!(total, op_ids.len(), "duplicate operationIds");
assert!(doc["components"]["securitySchemes"]["bearerAuth"].is_object());
// The health probe overrides the document-global bearer requirement (the server
// exempts it in `require_auth`; the spec must agree).
assert_eq!(
doc["paths"]["/api/v1/health"]["get"]["security"],
serde_json::json!([{}])
);
let checked_in = include_str!("../../../docs/api/openapi.json");
assert_eq!(
json.trim(),
checked_in.trim(),
"docs/api/openapi.json is stale — regenerate with: \
cargo run -p punktfunk-host -- openapi > docs/api/openapi.json"
);
}
}
+39
View File
@@ -0,0 +1,39 @@
//! The host hot path (plan §7), wiring the platform stages to `punktfunk_core`:
//!
//! ```text
//! capture(dmabuf) → encode(NVENC/VAAPI) → core[FEC+packetize+pace+send]
//! ```
//!
//! Each stage runs on its own native OS thread, connected by bounded SPSC channels with
//! drop-oldest on overflow so the encoder is never blocked. No async runtime here.
use crate::capture::Capturer;
use crate::encode::{EncodedFrame, Encoder};
use anyhow::Result;
use punktfunk_core::packet::{FLAG_PIC, FLAG_SOF};
use punktfunk_core::Session;
/// Drive one capture→encode→submit step. The real pipeline spawns threads and uses
/// bounded channels; this documents the data flow and the `punktfunk_core` submit contract.
pub fn pump_once(
capturer: &mut dyn Capturer,
encoder: &mut dyn Encoder,
session: &mut Session,
) -> Result<()> {
let frame = capturer.next_frame()?;
encoder.submit(&frame)?;
while let Some(EncodedFrame {
data,
pts_ns,
keyframe,
}) = encoder.poll()?
{
let mut flags = FLAG_PIC as u32;
if keyframe {
flags |= FLAG_SOF as u32;
}
// core does FEC + packetize + pace + send.
session.submit_frame(&data, pts_ns, flags)?;
}
Ok(())
}
+11
View File
@@ -0,0 +1,11 @@
//! One-time PipeWire library initialization, shared by the video (portal) and audio capture
//! threads. `pw_init` must not be called concurrently from multiple threads on first use; both
//! capture paths connect to PipeWire at nearly the same moment (RTSP PLAY starts video + audio
//! together), so we serialize the init through a `Once`.
#[cfg(target_os = "linux")]
pub fn ensure_init() {
use std::sync::Once;
static ONCE: Once = Once::new();
ONCE.call_once(pipewire::init);
}
+128
View File
@@ -0,0 +1,128 @@
//! Virtual display orchestration (plan §6) — the project's differentiator.
//!
//! A [`VirtualDisplay`] creates a *client-sized* output on demand, rendered natively and
//! headless (no scaling), to be captured and streamed, then torn down on disconnect. There is
//! no cross-compositor Wayland protocol for this, so each compositor has its own backend behind
//! this trait:
//!
//! * **KWin** — privileged `zkde_screencast_unstable_v1::stream_virtual_output` ([`kwin`]).
//! * **wlroots/Sway** — `swaymsg create_output` + `output mode --custom` (TODO).
//! * **Mutter/GNOME** — D-Bus `RemoteDesktop` + `ScreenCast.RecordVirtual` (TODO).
//!
//! [`VirtualDisplay::create`] returns a [`VirtualOutput`]: the PipeWire node to capture plus an
//! owned keepalive whose `Drop` releases the output (RAII — no explicit `destroy`). Capture
//! consumes the node via [`crate::capture::capture_virtual_output`].
use anyhow::Result;
pub use punktfunk_core::Mode;
use std::os::fd::OwnedFd;
/// A created virtual output: a PipeWire source to capture, plus an owned keepalive whose drop
/// tears the output down (releases the compositor-side resource).
///
/// Allowed dead on non-Linux: the backends that construct it are all `cfg(target_os = "linux")`.
#[allow(dead_code)]
pub struct VirtualOutput {
/// PipeWire node id of the output's screencast stream.
pub node_id: u32,
/// Portal/remote PipeWire fd when the node lives on a sandboxed remote (e.g. Mutter's
/// RemoteDesktop+ScreenCast). `None` means the node is on the user's default PipeWire daemon
/// (KWin `zkde_screencast`), captured by connecting to that daemon directly.
pub remote_fd: Option<OwnedFd>,
/// `(width, height, refresh_hz)` to prefer in the PipeWire format negotiation. KWin and
/// gamescope outputs are created at the exact size, so this just confirms it; **Mutter sizes
/// its virtual monitor FROM the negotiation**, so here it's what makes the client's mode real.
pub preferred_mode: Option<(u32, u32, u32)>,
/// Keeps the output — and whatever connection/thread backs it — alive; dropped on teardown.
pub keepalive: Box<dyn Send>,
}
/// Pluggable virtual-output creation, per compositor.
pub trait VirtualDisplay: Send {
/// Human-readable backend name (e.g. `"kwin"`, `"wlroots"`, `"mutter"`).
fn name(&self) -> &'static str;
/// Create a virtual output of the given mode. Teardown is RAII: drop the returned
/// [`VirtualOutput`]'s `keepalive`.
fn create(&mut self, mode: Mode) -> Result<VirtualOutput>;
}
/// Compositors punktfunk knows how to drive (plan §6).
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum Compositor {
/// KWin / Plasma 6 — `zkde_screencast` virtual output.
Kwin,
/// wlroots (Sway/Hyprland) — headless `create_output`.
Wlroots,
/// Mutter / GNOME — headless backend + Mutter DBus `RecordVirtual`.
Mutter,
/// gamescope — spawned headless at the client's size/refresh; capture its PipeWire node.
Gamescope,
}
/// Detect the compositor to drive: `PUNKTFUNK_COMPOSITOR` override, else `XDG_CURRENT_DESKTOP`.
pub fn detect() -> Result<Compositor> {
if let Ok(v) = std::env::var("PUNKTFUNK_COMPOSITOR") {
return match v.trim().to_ascii_lowercase().as_str() {
"kwin" | "kde" | "plasma" => Ok(Compositor::Kwin),
"wlroots" | "sway" | "hyprland" | "wlr" => Ok(Compositor::Wlroots),
"mutter" | "gnome" => Ok(Compositor::Mutter),
"gamescope" => Ok(Compositor::Gamescope),
other => {
anyhow::bail!(
"unknown PUNKTFUNK_COMPOSITOR '{other}' (kwin|wlroots|mutter|gamescope)"
)
}
};
}
let desktop = std::env::var("XDG_CURRENT_DESKTOP")
.unwrap_or_default()
.to_ascii_uppercase();
if desktop.contains("KDE") {
Ok(Compositor::Kwin)
} else if desktop.contains("GNOME") {
Ok(Compositor::Mutter)
} else if desktop.contains("SWAY")
|| desktop.contains("WLROOTS")
|| desktop.contains("HYPRLAND")
{
Ok(Compositor::Wlroots)
} else {
anyhow::bail!(
"could not detect compositor from XDG_CURRENT_DESKTOP='{desktop}'; set PUNKTFUNK_COMPOSITOR"
)
}
}
/// Open the virtual-display driver for `compositor`.
pub fn open(compositor: Compositor) -> Result<Box<dyn VirtualDisplay>> {
#[cfg(target_os = "linux")]
{
match compositor {
Compositor::Kwin => Ok(Box::new(kwin::KwinDisplay::new()?)),
Compositor::Gamescope => Ok(Box::new(gamescope::GamescopeDisplay::new()?)),
Compositor::Mutter => Ok(Box::new(mutter::MutterDisplay::new()?)),
Compositor::Wlroots => {
anyhow::bail!("wlroots virtual-output backend not yet implemented")
}
}
}
#[cfg(not(target_os = "linux"))]
{
let _ = compositor;
anyhow::bail!("virtual displays require Linux (Wayland compositor)")
}
}
/// Path of the file where the gamescope backend relays the nested session's `LIBEI_SOCKET`
/// (gamescope's EIS server) for the input injector.
#[cfg(target_os = "linux")]
pub fn gamescope_ei_socket_file() -> &'static str {
gamescope::EI_SOCKET_FILE
}
#[cfg(target_os = "linux")]
mod gamescope;
#[cfg(target_os = "linux")]
mod kwin;
#[cfg(target_os = "linux")]
mod mutter;
@@ -0,0 +1,181 @@
//! gamescope virtual-display backend.
//!
//! Unlike KWin/Mutter (which create a virtual output at runtime via a protocol), gamescope is a
//! micro-compositor we *spawn*: `gamescope --backend headless -W w -H h -r hz -- <app>`. It runs
//! the app nested, composites at the requested size/refresh (so the source rate is the client's
//! rate natively — no separate refresh step), and exports a built-in PipeWire node named
//! `gamescope` (media.class `Video/Source`, BGRx/NV12, dmabuf or shm) on the user's PipeWire
//! daemon. We discover that node and capture it like any other; the gamescope *process* is the
//! keepalive — dropping the [`VirtualOutput`] kills it (tearing the output down).
//!
//! Requirements: gamescope built with PipeWire + libei input emulation (distro packages are);
//! a usable Vulkan device (the NVIDIA render node). Headless capture on the proprietary NVIDIA
//! driver is plausible-by-architecture but not a well-trodden path — validate empirically.
//! Input is a gamescope-specific libei/EIS socket (`LIBEI_SOCKET`), wired separately (TODO).
use super::{Mode, VirtualDisplay, VirtualOutput};
use anyhow::{anyhow, Context, Result};
use std::process::{Child, Command, Stdio};
use std::time::{Duration, Instant};
/// The gamescope virtual-display driver. Each [`create`](VirtualDisplay::create) spawns one
/// headless gamescope process sized to the requested mode.
pub struct GamescopeDisplay;
impl GamescopeDisplay {
pub fn new() -> Result<Self> {
Ok(GamescopeDisplay)
}
}
impl VirtualDisplay for GamescopeDisplay {
fn name(&self) -> &'static str {
"gamescope"
}
fn create(&mut self, mode: Mode) -> Result<VirtualOutput> {
// Attach to an already-running gamescope (debug / Steam-launched session) instead of
// spawning one: PUNKTFUNK_GAMESCOPE_NODE=<pipewire node id>.
if let Ok(id) = std::env::var("PUNKTFUNK_GAMESCOPE_NODE") {
let node_id: u32 = id
.parse()
.context("PUNKTFUNK_GAMESCOPE_NODE must be a node id")?;
tracing::info!(node_id, "gamescope: attaching to existing PipeWire node");
return Ok(VirtualOutput {
node_id,
remote_fd: None,
preferred_mode: Some((mode.width, mode.height, mode.refresh_hz)),
keepalive: Box::new(()),
});
}
let proc = GamescopeProc(spawn(mode.width, mode.height, mode.refresh_hz.max(1))?);
// gamescope creates its PipeWire node a moment after start; poll for it (the proc is held
// alive meanwhile, and killed if we give up).
let node_id = wait_for_node(Duration::from_secs(15)).ok_or_else(|| {
anyhow!(
"gamescope PipeWire node did not appear within 15s — gamescope may have failed to \
start or headless capture is unsupported on this GPU/driver (see /tmp/punktfunk-gamescope.log)"
)
})?;
tracing::info!(
node_id,
w = mode.width,
h = mode.height,
hz = mode.refresh_hz,
"gamescope virtual output ready"
);
Ok(VirtualOutput {
node_id,
remote_fd: None,
preferred_mode: Some((mode.width, mode.height, mode.refresh_hz)),
keepalive: Box::new(proc),
})
}
}
/// File where the wrapper below writes gamescope's `LIBEI_SOCKET` (its EIS server socket),
/// read by the libei injector to drive input into the nested app. See [`crate::inject`].
pub const EI_SOCKET_FILE: &str = "/tmp/punktfunk-gamescope-ei";
/// Spawn `gamescope --backend headless -W w -H h -r hz -- <app>`. The app comes from
/// `PUNKTFUNK_GAMESCOPE_APP` (default a no-op that just keeps gamescope alive — set it to a real
/// game/GL app for actual content, e.g. `steam -gamepadui` for the SteamOS-like session).
/// stdout/stderr go to `/tmp/punktfunk-gamescope.log`. The app is launched through a tiny shell
/// wrapper that relays gamescope's `LIBEI_SOCKET` (set for its children) to [`EI_SOCKET_FILE`]
/// so the input injector can connect to gamescope's EIS server from outside.
fn spawn(w: u32, h: u32, hz: u32) -> Result<Child> {
let app =
std::env::var("PUNKTFUNK_GAMESCOPE_APP").unwrap_or_else(|_| "sleep infinity".to_string());
let _ = std::fs::remove_file(EI_SOCKET_FILE); // stale socket path from a previous session
let mut cmd = Command::new("gamescope");
cmd.args(["--backend", "headless"])
.args(["-W", &w.to_string()])
.args(["-H", &h.to_string()])
.args(["-r", &hz.to_string()])
.args(["--xwayland-count", "1", "--"])
.args([
"sh",
"-c",
&format!("printf %s \"$LIBEI_SOCKET\" > {EI_SOCKET_FILE}; exec \"$@\""),
"sh",
])
.args(app.split_whitespace())
// Prefer the NVIDIA GL vendor for the nested session (harmless on a pure-NVIDIA box).
.env("__GLX_VENDOR_LIBRARY_NAME", "nvidia");
if let Ok(log) = std::fs::File::create("/tmp/punktfunk-gamescope.log") {
if let Ok(log2) = log.try_clone() {
cmd.stdout(Stdio::from(log)).stderr(Stdio::from(log2));
}
} else {
cmd.stdout(Stdio::null()).stderr(Stdio::null());
}
tracing::info!(w, h, hz, %app, "spawning gamescope (headless)");
cmd.spawn()
.context("spawn gamescope (is it installed? `apt install gamescope`)")
}
/// Wait for gamescope to report its PipeWire node. Authoritative source: gamescope's own log
/// line `stream available on node ID: N` (its node carries `node.name=gamescope` on TWO objects
/// — the adapter and the inner stream — and only the advertised id is the correct capture
/// target). Falls back to `pw-dump` discovery if the log line doesn't show.
fn wait_for_node(timeout: Duration) -> Option<u32> {
let deadline = Instant::now() + timeout;
loop {
if let Some(id) = node_from_log() {
return Some(id);
}
if Instant::now() >= deadline {
return find_gamescope_node(); // last-resort fallback
}
std::thread::sleep(Duration::from_millis(300));
}
}
/// Parse `stream available on node ID: N` from the spawned gamescope's log (ANSI-colored).
fn node_from_log() -> Option<u32> {
let log = std::fs::read_to_string("/tmp/punktfunk-gamescope.log").ok()?;
for line in log.lines().rev() {
if let Some(pos) = line.find("stream available on node ID:") {
let tail = &line[pos + "stream available on node ID:".len()..];
let digits: String = tail.chars().filter(|c| c.is_ascii_digit()).collect();
if let Ok(id) = digits.parse() {
return Some(id);
}
}
}
None
}
/// Find the `gamescope` `Video/Source` node id in a `pw-dump` snapshot of the default daemon.
fn find_gamescope_node() -> Option<u32> {
let out = Command::new("pw-dump").output().ok()?;
let dump: serde_json::Value = serde_json::from_slice(&out.stdout).ok()?;
for obj in dump.as_array()? {
if obj.get("type").and_then(|t| t.as_str()) != Some("PipeWire:Interface:Node") {
continue;
}
let props = obj.get("info").and_then(|i| i.get("props"));
let name = props
.and_then(|p| p.get("node.name"))
.and_then(|n| n.as_str())
.unwrap_or("");
let class = props
.and_then(|p| p.get("media.class"))
.and_then(|n| n.as_str())
.unwrap_or("");
if name == "gamescope" || (class == "Video/Source" && name.contains("gamescope")) {
return obj.get("id").and_then(|i| i.as_u64()).map(|x| x as u32);
}
}
None
}
/// Owns the spawned gamescope process; killing it tears the virtual output down.
struct GamescopeProc(Child);
impl Drop for GamescopeProc {
fn drop(&mut self) {
let _ = self.0.kill();
let _ = self.0.wait();
}
}
+313
View File
@@ -0,0 +1,313 @@
//! KWin virtual-output backend via the privileged `zkde_screencast_unstable_v1` Wayland
//! protocol (the mechanism KRdp / krfb-virtualmonitor use).
//!
//! `stream_virtual_output(name, width, height, scale, pointer)` asks KWin to create a new output
//! sized to exactly `width`x`height`, rendered natively (no scaling), and hands back a PipeWire
//! node for it. The node lives on the user's default PipeWire daemon, so [`VirtualOutput::remote_fd`]
//! is `None` and capture connects to that daemon directly.
//!
//! Requirements: KWin must expose the privileged `zkde_screencast` global — a real Plasma session
//! authorizes it for its own clients; the headless test exposes it to bare clients via
//! `KWIN_WAYLAND_NO_PERMISSION_CHECKS=1`. The compositor backend must implement
//! `createVirtualOutput`: the **DRM backend** (any version) or the **VirtualBackend since KWin
//! 6.5.6** (`kwin_wayland --virtual`); on `--virtual` < 6.5.6 the request fails with
//! "Could not find output". We talk raw Wayland on `$WAYLAND_DISPLAY`, so the host must run inside
//! the KWin session's environment.
#![allow(clippy::all, dead_code, non_camel_case_types, non_snake_case, unused)]
use super::{Mode, VirtualDisplay, VirtualOutput};
use anyhow::{anyhow, bail, Context, Result};
use std::os::fd::{AsFd, AsRawFd};
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::mpsc::Sender;
use std::sync::Arc;
use std::thread;
use std::time::Duration;
use wayland_client::protocol::wl_registry::{self, WlRegistry};
use wayland_client::{Connection, Dispatch, Proxy, QueueHandle};
// Generate the client bindings for the vendored protocol XML inline (no build.rs). Path is
// relative to CARGO_MANIFEST_DIR. See wayland-rs' "implementing a custom protocol" docs.
#[allow(clippy::all, dead_code, non_camel_case_types, non_snake_case, unused)]
pub mod zkde {
use wayland_client;
use wayland_client::protocol::*;
pub mod __interfaces {
use wayland_client::protocol::__interfaces::*;
wayland_scanner::generate_interfaces!("protocols/zkde-screencast-unstable-v1.xml");
}
use self::__interfaces::*;
wayland_scanner::generate_client_code!("protocols/zkde-screencast-unstable-v1.xml");
}
use zkde::zkde_screencast_stream_unstable_v1::{
Event as StreamEvent, ZkdeScreencastStreamUnstableV1 as ScreencastStream,
};
use zkde::zkde_screencast_unstable_v1::ZkdeScreencastUnstableV1 as Screencast;
/// `pointer` attachment mode (the protocol enum): render the cursor into the stream so the
/// remote sees it move with injected input.
const POINTER_EMBEDDED: u32 = 2;
/// The name we give the created output; KWin exposes it to output-management as `Virtual-<name>`.
const VOUT_NAME: &str = "punktfunk";
/// Highest interface version we drive. KWin currently advertises 5; we rely on the `created`
/// event (deprecated only since v6) for the node id, so cap the bind at 5.
const MAX_VERSION: u32 = 5;
/// The KWin virtual-display driver. Stateless — each [`create`](VirtualDisplay::create) spins up
/// its own Wayland connection/thread that owns the resulting output.
pub struct KwinDisplay;
impl KwinDisplay {
pub fn new() -> Result<Self> {
Ok(KwinDisplay)
}
}
impl VirtualDisplay for KwinDisplay {
fn name(&self) -> &'static str {
"kwin"
}
fn create(&mut self, mode: Mode) -> Result<VirtualOutput> {
let (setup_tx, setup_rx) = std::sync::mpsc::channel::<Result<u32, String>>();
let stop = Arc::new(AtomicBool::new(false));
let stop_thread = stop.clone();
let (width, height) = (mode.width, mode.height);
thread::Builder::new()
.name("punktfunk-kwin-vout".into())
.spawn(move || virtual_output_thread(width, height, setup_tx, stop_thread))
.context("spawn KWin virtual-output thread")?;
let node_id = match setup_rx.recv_timeout(Duration::from_secs(20)) {
Ok(Ok(v)) => v,
Ok(Err(e)) => bail!("KWin virtual output failed: {e}"),
Err(_) => bail!("timed out creating the KWin virtual output"),
};
tracing::info!(node_id, width, height, "KWin virtual output ready");
// KWin creates virtual outputs at a hardcoded 60 Hz and `stream_virtual_output` has no
// refresh argument, so when the client wants more we install + select a custom mode
// (supported on virtual outputs since KWin 6.6). Done before capture connects PipeWire so
// the stream negotiates at the higher rate. First cut shells out to kscreen-doctor; the
// in-process kde_output_management_v2 client is a follow-up.
if mode.refresh_hz > 60 {
set_custom_refresh(width, height, mode.refresh_hz);
}
Ok(VirtualOutput {
node_id,
remote_fd: None,
preferred_mode: Some((mode.width, mode.height, mode.refresh_hz)),
keepalive: Box::new(StopGuard(stop)),
})
}
}
/// Best-effort: raise the just-created virtual output's refresh above KWin's default 60 Hz by
/// installing + selecting a custom mode via `kscreen-doctor` (the output is `Virtual-<VOUT_NAME>`,
/// refresh given in mHz). Failure leaves the source at 60 Hz — the stream still works, just capped.
fn set_custom_refresh(width: u32, height: u32, hz: u32) {
let output = format!("Virtual-{VOUT_NAME}");
let mhz = hz.saturating_mul(1000);
let run = |arg: String| {
std::process::Command::new("kscreen-doctor")
.arg(arg)
.status()
.map(|s| s.success())
.unwrap_or(false)
};
// Add the custom mode (a fresh output has none), then select it.
let _ = run(format!(
"output.{output}.addCustomMode.{width}.{height}.{mhz}.full"
));
if run(format!("output.{output}.mode.{width}x{height}@{hz}")) {
tracing::info!(output, hz, "KWin virtual output: custom refresh applied");
} else {
tracing::warn!(
output,
hz,
"kscreen-doctor refresh set failed — source stays 60 Hz (is kscreen-doctor installed?)"
);
}
}
/// Dropping this releases the KWin virtual output: it flips the keepalive thread's `stop`, which
/// drops the Wayland connection and makes KWin reclaim the output.
struct StopGuard(Arc<AtomicBool>);
impl Drop for StopGuard {
fn drop(&mut self) {
self.0.store(true, Ordering::Relaxed);
}
}
#[derive(Default)]
struct State {
screencast: Option<Screencast>,
node_id: Option<u32>,
failed: Option<String>,
closed: bool,
}
impl Dispatch<WlRegistry, ()> for State {
fn event(
state: &mut Self,
registry: &WlRegistry,
event: wl_registry::Event,
_: &(),
_: &Connection,
qh: &QueueHandle<Self>,
) {
if let wl_registry::Event::Global {
name,
interface,
version,
} = event
{
if interface == Screencast::interface().name {
let v = version.min(MAX_VERSION);
state.screencast = Some(registry.bind::<Screencast, _, _>(name, v, qh, ()));
}
}
}
}
// The manager has no events.
impl Dispatch<Screencast, ()> for State {
fn event(
_: &mut Self,
_: &Screencast,
_: zkde::zkde_screencast_unstable_v1::Event,
_: &(),
_: &Connection,
_: &QueueHandle<Self>,
) {
}
}
impl Dispatch<ScreencastStream, ()> for State {
fn event(
state: &mut Self,
_: &ScreencastStream,
event: StreamEvent,
_: &(),
_: &Connection,
_: &QueueHandle<Self>,
) {
match event {
StreamEvent::Created { node } => state.node_id = Some(node),
StreamEvent::Failed { error } => state.failed = Some(error),
StreamEvent::Closed => state.closed = true,
// `serial` (v6) — we use the node id from `created`, so ignore.
_ => {}
}
}
}
/// Worker thread: create a `width`x`height` virtual output on KWin, send its PipeWire node id
/// back over `setup_tx`, then keep the Wayland connection alive (so the output isn't destroyed)
/// until `stop` is set. Mirrors the portal thread's "park to keep the session alive".
fn virtual_output_thread(
width: u32,
height: u32,
setup_tx: Sender<Result<u32, String>>,
stop: Arc<AtomicBool>,
) {
if let Err(e) = run(width, height, &setup_tx, &stop) {
// If we never delivered a node id, report the failure to the waiting opener.
let _ = setup_tx.send(Err(format!("{e:#}")));
}
}
fn run(
width: u32,
height: u32,
setup_tx: &Sender<Result<u32, String>>,
stop: &AtomicBool,
) -> Result<()> {
let conn = Connection::connect_to_env()
.context("connect to KWin Wayland (is WAYLAND_DISPLAY set to the KWin socket?)")?;
let mut queue = conn.new_event_queue();
let qh = queue.handle();
let _registry = conn.display().get_registry(&qh, ());
let mut state = State::default();
queue.roundtrip(&mut state).context("registry roundtrip")?;
let screencast = state.screencast.clone().ok_or_else(|| {
anyhow!(
"KWin does not expose zkde_screencast_unstable_v1 (need a real KDE session, or run \
KWin with KWIN_WAYLAND_NO_PERMISSION_CHECKS=1 for the headless test)"
)
})?;
// Create the virtual output sized to the client, cursor composited into the stream.
let stream = screencast.stream_virtual_output(
VOUT_NAME.to_string(),
width as i32,
height as i32,
1.0, // scale (logical == physical)
POINTER_EMBEDDED,
&qh,
(),
);
tracing::info!(
width,
height,
"KWin: requested virtual output; awaiting PipeWire node"
);
// Pump events until KWin reports the node id (or an error).
let node_id = loop {
queue
.blocking_dispatch(&mut state)
.context("wayland dispatch (awaiting created)")?;
if let Some(node) = state.node_id {
break node;
}
if let Some(e) = state.failed.take() {
bail!("stream_virtual_output failed: {e}");
}
if state.closed {
bail!("KWin closed the stream before it was created");
}
};
setup_tx
.send(Ok(node_id))
.map_err(|_| anyhow!("virtual-output opener went away"))?;
// Keep the connection (and thus the virtual output) alive until told to stop, observing
// `closed`. blocking_dispatch can't be interrupted, so poll the connection fd with a short
// timeout so `stop` is honored within ~200 ms.
while !stop.load(Ordering::Relaxed) {
queue
.dispatch_pending(&mut state)
.context("dispatch_pending")?;
if state.closed {
tracing::warn!("KWin closed the virtual-output stream");
break;
}
conn.flush().context("wayland flush")?;
let Some(guard) = conn.prepare_read() else {
continue; // events already queued — loop dispatches them
};
let mut pfd = libc::pollfd {
fd: conn.as_fd().as_raw_fd(),
events: libc::POLLIN,
revents: 0,
};
let r = unsafe { libc::poll(&mut pfd, 1, 200) };
if r > 0 && (pfd.revents & libc::POLLIN) != 0 {
let _ = guard.read();
} // else: timeout or signal — drop the guard, re-check `stop`
}
// Best-effort clean teardown; dropping the connection also makes KWin reclaim the output.
stream.close();
let _ = conn.flush();
Ok(())
}
@@ -0,0 +1,226 @@
//! GNOME/Mutter virtual-display backend via Mutter's *direct* D-Bus APIs (the same path
//! gnome-remote-desktop uses for headless sessions — not the xdg portal, which needs an
//! interactive grant):
//!
//! 1. `org.gnome.Mutter.RemoteDesktop.CreateSession()` → a remote-desktop session (read its
//! `SessionId`). The cast is anchored to it, and it's also the future input path.
//! 2. `org.gnome.Mutter.ScreenCast.CreateSession({"remote-desktop-session-id": id})`.
//! 3. `ScreenCast.Session.RecordVirtual({"cursor-mode": embedded})` → Mutter creates a **virtual
//! monitor** and returns a Stream object.
//! 4. `RemoteDesktop.Session.Start()` → the Stream signals `PipeWireStreamAdded(node_id)`.
//!
//! The virtual monitor's *size* follows the PipeWire format negotiation — Mutter adapts it to
//! what the consumer asks for — so the client's exact WxH is plumbed into our consumer's format
//! pod as the preferred size ([`VirtualOutput::preferred_mode`]) rather than passed here.
//! Sessions die with the D-Bus connection, so a keepalive thread owns it (RAII teardown).
//!
//! Requires a running Mutter (`gnome-shell` session, or `gnome-shell --headless` for the
//! headless host) on the session bus. GNOME is detected via `XDG_CURRENT_DESKTOP=GNOME` or
//! forced with `PUNKTFUNK_COMPOSITOR=mutter`.
use super::{Mode, VirtualDisplay, VirtualOutput};
use anyhow::{anyhow, bail, Context, Result};
use ashpd::zbus;
use futures_util::StreamExt;
use std::collections::HashMap;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::mpsc::Sender;
use std::sync::Arc;
use std::thread;
use std::time::Duration;
use zbus::zvariant::{OwnedObjectPath, Value};
const BUS_RD: &str = "org.gnome.Mutter.RemoteDesktop";
const BUS_SC: &str = "org.gnome.Mutter.ScreenCast";
/// Mutter cursor mode: render the cursor into the stream (matches the KWin/gamescope backends).
const CURSOR_EMBEDDED: u32 = 1;
/// The Mutter virtual-display driver. Each [`create`](VirtualDisplay::create) spins up a
/// keepalive thread owning the D-Bus sessions behind the virtual monitor.
pub struct MutterDisplay;
impl MutterDisplay {
pub fn new() -> Result<Self> {
Ok(MutterDisplay)
}
}
impl VirtualDisplay for MutterDisplay {
fn name(&self) -> &'static str {
"mutter"
}
fn create(&mut self, mode: Mode) -> Result<VirtualOutput> {
let (setup_tx, setup_rx) = std::sync::mpsc::channel::<Result<u32, String>>();
let stop = Arc::new(AtomicBool::new(false));
let stop_thread = stop.clone();
thread::Builder::new()
.name("punktfunk-mutter-vout".into())
.spawn(move || session_thread(setup_tx, stop_thread))
.context("spawn Mutter virtual-output thread")?;
let node_id = match setup_rx.recv_timeout(Duration::from_secs(20)) {
Ok(Ok(v)) => v,
Ok(Err(e)) => bail!("Mutter virtual monitor failed: {e}"),
Err(_) => bail!("timed out creating the Mutter virtual monitor"),
};
tracing::info!(
node_id,
w = mode.width,
h = mode.height,
"Mutter virtual monitor ready"
);
Ok(VirtualOutput {
node_id,
remote_fd: None,
preferred_mode: Some((mode.width, mode.height, mode.refresh_hz)),
keepalive: Box::new(StopGuard(stop)),
})
}
}
/// Dropping this ends the keepalive thread, closing the D-Bus connection — Mutter then tears
/// the remote-desktop + screencast sessions (and the virtual monitor) down.
struct StopGuard(Arc<AtomicBool>);
impl Drop for StopGuard {
fn drop(&mut self) {
self.0.store(true, Ordering::Relaxed);
}
}
/// Keepalive thread: run the D-Bus handshake on a private tokio runtime, report the PipeWire
/// node id, then hold the connection until stopped.
fn session_thread(setup_tx: Sender<Result<u32, String>>, stop: Arc<AtomicBool>) {
let rt = match tokio::runtime::Builder::new_multi_thread()
.worker_threads(1)
.enable_all()
.build()
{
Ok(rt) => rt,
Err(e) => {
let _ = setup_tx.send(Err(format!("build tokio runtime: {e}")));
return;
}
};
rt.block_on(async move {
let session = match connect().await {
Ok(s) => s,
Err(e) => {
let _ = setup_tx.send(Err(format!("{e:#}")));
return;
}
};
let _ = setup_tx.send(Ok(session.node_id));
// Park, keeping `session` (and its zbus connection) alive until told to stop.
while !stop.load(Ordering::Relaxed) {
tokio::time::sleep(Duration::from_millis(200)).await;
}
// Best-effort explicit teardown before the connection drops.
let _ = session.rd_session.call_method("Stop", &()).await;
});
}
/// The live session objects (held for the stream's lifetime) + the PipeWire node id.
struct MutterSession {
rd_session: zbus::Proxy<'static>,
_sc_session: zbus::Proxy<'static>,
_conn: zbus::Connection,
node_id: u32,
}
/// Run the four-step handshake (see module docs).
async fn connect() -> Result<MutterSession> {
let conn = zbus::Connection::session()
.await
.context("connect session D-Bus")?;
// 1. RemoteDesktop session (the anchor; also the future input path).
let rd = zbus::Proxy::new(
&conn,
BUS_RD,
"/org/gnome/Mutter/RemoteDesktop",
"org.gnome.Mutter.RemoteDesktop",
)
.await
.context("RemoteDesktop proxy (is gnome-shell / `gnome-shell --headless` running?)")?;
let rd_path: OwnedObjectPath = rd
.call("CreateSession", &())
.await
.context("RemoteDesktop.CreateSession")?;
let rd_session = zbus::Proxy::new(
&conn,
BUS_RD,
rd_path,
"org.gnome.Mutter.RemoteDesktop.Session",
)
.await?;
let session_id: String = rd_session
.get_property("SessionId")
.await
.context("read SessionId")?;
// 2. ScreenCast session anchored to it.
let sc = zbus::Proxy::new(
&conn,
BUS_SC,
"/org/gnome/Mutter/ScreenCast",
"org.gnome.Mutter.ScreenCast",
)
.await
.context("ScreenCast proxy")?;
let mut props: HashMap<&str, Value> = HashMap::new();
props.insert("remote-desktop-session-id", Value::from(session_id));
let sc_path: OwnedObjectPath = sc
.call("CreateSession", &(props,))
.await
.context("ScreenCast.CreateSession")?;
let sc_session = zbus::Proxy::new(
&conn,
BUS_SC,
sc_path,
"org.gnome.Mutter.ScreenCast.Session",
)
.await?;
// 3. The virtual monitor. Size/refresh follow the PipeWire format negotiation.
let mut rec: HashMap<&str, Value> = HashMap::new();
rec.insert("cursor-mode", Value::from(CURSOR_EMBEDDED));
let stream_path: OwnedObjectPath = sc_session
.call("RecordVirtual", &(rec,))
.await
.context("Session.RecordVirtual")?;
let stream = zbus::Proxy::new(
&conn,
BUS_SC,
stream_path,
"org.gnome.Mutter.ScreenCast.Stream",
)
.await?;
// 4. Subscribe to the node-id signal BEFORE starting, then start the (combined) session.
let mut added = stream
.receive_signal("PipeWireStreamAdded")
.await
.context("subscribe PipeWireStreamAdded")?;
rd_session
.call_method("Start", &())
.await
.context("RemoteDesktop.Session.Start")?;
let msg = tokio::time::timeout(Duration::from_secs(10), added.next())
.await
.map_err(|_| anyhow!("PipeWireStreamAdded did not arrive within 10s"))?
.ok_or_else(|| anyhow!("signal stream ended before PipeWireStreamAdded"))?;
let (node_id,): (u32,) = msg
.body()
.deserialize()
.context("PipeWireStreamAdded body")?;
Ok(MutterSession {
rd_session,
_sc_session: sc_session,
_conn: conn,
node_id,
})
}
+509
View File
@@ -0,0 +1,509 @@
//! Minimal CUDA Driver API FFI for the zero-copy path. No Rust crate exposes the GL-interop
//! driver calls we need (`cuGraphicsGLRegisterImage` & co.), so we hand-roll exactly those and
//! link `libcuda.so.1` (the driver library — NOT `libcudart`). Symbol names verified against
//! `cust_raw` + `cudaGL.h`: the context/mem ops use the `_v2` ABI suffix; the graphics-interop
//! ops are unsuffixed. (We use GL interop, not EGL interop: `cuGraphicsEGLRegisterImage` is
//! Tegra-only on the desktop driver — see [`super::egl`].)
//!
//! One process-wide `CUcontext` is created lazily and shared by the EGL importer (capture
//! thread) and ffmpeg's `hevc_nvenc` (encode thread); each thread makes it current before use.
#![allow(non_camel_case_types, non_snake_case)]
use anyhow::{bail, Result};
use std::os::raw::{c_int, c_uint, c_void};
use std::sync::{Arc, Mutex, OnceLock};
pub type CUresult = c_uint; // CUDA_SUCCESS == 0
pub type CUdevice = c_int;
pub type CUcontext = *mut c_void; // opaque CUctx_st*
pub type CUstream = *mut c_void; // opaque CUstream_st*
pub type CUdeviceptr = u64;
pub type CUgraphicsResource = *mut c_void;
pub type CUarray = *mut c_void;
pub type CUexternalMemory = *mut c_void; // opaque CUextMemory_st*
/// `CUmemorytype` (cuda.h): HOST=1, DEVICE=2, ARRAY=3, UNIFIED=4.
pub const CU_MEMORYTYPE_DEVICE: c_uint = 2;
pub const CU_MEMORYTYPE_ARRAY: c_uint = 3;
/// `CUDA_MEMCPY2D` (cuda.h, `_v2` ABI). Field order is load-bearing.
#[repr(C)]
#[derive(Default)]
pub struct CUDA_MEMCPY2D {
pub srcXInBytes: usize,
pub srcY: usize,
pub srcMemoryType: c_uint,
pub srcHost: *const c_void,
pub srcDevice: CUdeviceptr,
pub srcArray: CUarray,
pub srcPitch: usize,
pub dstXInBytes: usize,
pub dstY: usize,
pub dstMemoryType: c_uint,
pub dstHost: *mut c_void,
pub dstDevice: CUdeviceptr,
pub dstArray: CUarray,
pub dstPitch: usize,
pub WidthInBytes: usize,
pub Height: usize,
}
/// `CUDA_EXTERNAL_MEMORY_HANDLE_DESC` (cuda.h, 64-bit layout). `handle` is a union whose
/// largest member is the win32 two-pointer struct (16 bytes, align 8); for the OPAQUE_FD type
/// only the first 4 bytes (the `int fd`) are read.
#[repr(C)]
#[derive(Default)]
pub struct CUDA_EXTERNAL_MEMORY_HANDLE_DESC {
pub type_: c_uint, // CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD = 1
_pad: u32,
pub handle: [u64; 2], // union { int fd; {void*,void*} win32; void* nvSciBufObject }
pub size: u64,
pub flags: c_uint,
reserved: [c_uint; 16],
_pad2: u32,
}
/// `CUDA_EXTERNAL_MEMORY_BUFFER_DESC` (cuda.h, 64-bit layout).
#[repr(C)]
#[derive(Default)]
pub struct CUDA_EXTERNAL_MEMORY_BUFFER_DESC {
pub offset: u64,
pub size: u64,
pub flags: c_uint,
reserved: [c_uint; 16],
_pad: u32,
}
pub const CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD: c_uint = 1;
#[link(name = "cuda")]
extern "C" {
fn cuInit(flags: c_uint) -> CUresult;
fn cuDeviceGet(device: *mut CUdevice, ordinal: c_int) -> CUresult;
fn cuCtxCreate_v2(pctx: *mut CUcontext, flags: c_uint, dev: CUdevice) -> CUresult;
fn cuCtxSetCurrent(ctx: CUcontext) -> CUresult;
fn cuMemAllocPitch_v2(
dptr: *mut CUdeviceptr,
pitch: *mut usize,
width_bytes: usize,
height: usize,
element_size: c_uint,
) -> CUresult;
fn cuMemFree_v2(dptr: CUdeviceptr) -> CUresult;
fn cuMemcpy2D_v2(copy: *const CUDA_MEMCPY2D) -> CUresult;
fn cuCtxSynchronize() -> CUresult;
// GL interop (cudaGL.h) — these symbols have NO `_v2` suffix. `cuGraphicsEGLRegisterImage`
// is Tegra-only on the desktop driver, so we go EGLImage → GL texture → register the texture.
fn cuGraphicsGLRegisterImage(
resource: *mut CUgraphicsResource,
texture: c_uint, // GLuint
target: c_uint, // GL_TEXTURE_2D = 0x0DE1
flags: c_uint, // CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY = 0x01
) -> CUresult;
fn cuGraphicsMapResources(
count: c_uint,
resources: *mut CUgraphicsResource,
stream: *mut c_void,
) -> CUresult;
fn cuGraphicsUnmapResources(
count: c_uint,
resources: *mut CUgraphicsResource,
stream: *mut c_void,
) -> CUresult;
fn cuGraphicsSubResourceGetMappedArray(
array: *mut CUarray,
resource: CUgraphicsResource,
array_index: c_uint,
mip_level: c_uint,
) -> CUresult;
fn cuGraphicsUnregisterResource(resource: CUgraphicsResource) -> CUresult;
// External memory (cuda.h, no `_v2` suffix) — imports a (Vulkan-exported) dmabuf fd as
// device memory. Used for LINEAR dmabufs (gamescope), which EGL/GL interop can't sample.
fn cuImportExternalMemory(
ext_mem_out: *mut CUexternalMemory,
mem_handle_desc: *const CUDA_EXTERNAL_MEMORY_HANDLE_DESC,
) -> CUresult;
fn cuExternalMemoryGetMappedBuffer(
dev_ptr: *mut CUdeviceptr,
ext_mem: CUexternalMemory,
buffer_desc: *const CUDA_EXTERNAL_MEMORY_BUFFER_DESC,
) -> CUresult;
fn cuDestroyExternalMemory(ext_mem: CUexternalMemory) -> CUresult;
}
#[inline]
fn ck(r: CUresult, what: &str) -> Result<()> {
if r == 0 {
Ok(())
} else {
bail!("CUDA driver error {r} in {what}")
}
}
/// The shared process-wide CUDA context (created once). Wrapped so it's `Send`/`Sync` to live
/// in a `OnceLock`; the raw `CUcontext` is thread-safe to make current from any thread.
#[derive(Clone, Copy)]
pub struct Context(pub CUcontext);
unsafe impl Send for Context {}
unsafe impl Sync for Context {}
static CONTEXT: OnceLock<Context> = OnceLock::new();
/// Get (lazily creating) the shared CUDA context on device 0.
pub fn context() -> Result<CUcontext> {
if let Some(c) = CONTEXT.get() {
return Ok(c.0);
}
let ctx = unsafe {
ck(cuInit(0), "cuInit")?;
let mut dev: CUdevice = 0;
ck(cuDeviceGet(&mut dev, 0), "cuDeviceGet")?;
let mut ctx: CUcontext = std::ptr::null_mut();
ck(cuCtxCreate_v2(&mut ctx, 0, dev), "cuCtxCreate_v2")?;
ctx
};
// Racy first-init is fine: the winner's context is used; a loser leaks one context (rare,
// process-lifetime). `get_or_init` keeps a single shared value.
Ok(CONTEXT.get_or_init(|| Context(ctx)).0)
}
/// Make the shared context current on the calling thread (required before any CUDA op here).
pub fn make_current() -> Result<()> {
let ctx = context()?;
unsafe { ck(cuCtxSetCurrent(ctx), "cuCtxSetCurrent") }
}
/// Allocate one pitched device buffer for `width`x`height` 4-byte pixels; returns `(ptr, pitch)`.
fn alloc_pitched(width: u32, height: u32) -> Result<(CUdeviceptr, usize)> {
let mut ptr: CUdeviceptr = 0;
let mut pitch: usize = 0;
unsafe {
ck(
cuMemAllocPitch_v2(
&mut ptr,
&mut pitch,
width as usize * 4,
height as usize,
16,
),
"cuMemAllocPitch_v2",
)?;
}
Ok((ptr, pitch))
}
/// Free-list of recycled device allocations for one resolution. Shared (via `Arc`) between the
/// capture thread that hands out buffers and the encode thread where a [`DeviceBuffer`] drops and
/// returns its allocation here. Bulk-freed when the last reference drops.
struct PoolInner {
free: Vec<CUdeviceptr>,
}
impl Drop for PoolInner {
fn drop(&mut self) {
unsafe {
if let Some(c) = CONTEXT.get() {
let _ = cuCtxSetCurrent(c.0);
}
for &p in &self.free {
let _ = cuMemFree_v2(p);
}
}
}
}
/// A pool of reusable pitched device buffers for a fixed resolution. Eliminates the per-frame
/// `cuMemAllocPitch`/`cuMemFree` (a ~29 MB allocation at 5K) that takes the device allocator lock
/// and serializes against the GPU every frame.
#[derive(Clone)]
pub struct BufferPool {
inner: Arc<Mutex<PoolInner>>,
width: u32,
height: u32,
pitch: usize,
}
impl BufferPool {
/// Create a pool for `width`x`height` 4-byte buffers (allocates one up front to learn the
/// driver's pitch, which is constant for a given width).
pub fn new(width: u32, height: u32) -> Result<BufferPool> {
let (ptr, pitch) = alloc_pitched(width, height)?;
Ok(BufferPool {
inner: Arc::new(Mutex::new(PoolInner { free: vec![ptr] })),
width,
height,
pitch,
})
}
pub fn width(&self) -> u32 {
self.width
}
pub fn height(&self) -> u32 {
self.height
}
/// Take a buffer — recycled if one is free, else freshly allocated. The buffer returns to this
/// pool when dropped (after the consumer has synchronized, so the GPU is done with it).
pub fn get(&self) -> Result<DeviceBuffer> {
let reuse = self.inner.lock().unwrap().free.pop();
let ptr = match reuse {
Some(p) => p,
None => alloc_pitched(self.width, self.height)?.0,
};
Ok(DeviceBuffer {
ptr,
pitch: self.pitch,
width: self.width,
height: self.height,
pool: Some(self.inner.clone()),
})
}
}
/// A pitched device buffer holding one captured frame. Filled by a copy from the EGL-mapped
/// dmabuf (so the dmabuf can be returned to the compositor immediately) and read by the encoder.
/// When it came from a [`BufferPool`] it recycles on drop; otherwise it frees.
pub struct DeviceBuffer {
pub ptr: CUdeviceptr,
pub pitch: usize,
pub width: u32,
pub height: u32,
pool: Option<Arc<Mutex<PoolInner>>>,
}
impl DeviceBuffer {
/// Allocate a standalone (un-pooled) pitched buffer. Prefer [`BufferPool`] on the hot path.
pub fn alloc(width: u32, height: u32) -> Result<DeviceBuffer> {
let (ptr, pitch) = alloc_pitched(width, height)?;
Ok(DeviceBuffer {
ptr,
pitch,
width,
height,
pool: None,
})
}
}
impl Drop for DeviceBuffer {
fn drop(&mut self) {
if self.ptr == 0 {
return;
}
if let Some(pool) = &self.pool {
// Recycle (the consumer synchronized before dropping, so the GPU is done with it).
pool.lock().unwrap().free.push(self.ptr);
} else {
// The buffer may be freed on the encode thread; cuMemFree needs a current context.
unsafe {
if let Some(c) = CONTEXT.get() {
let _ = cuCtxSetCurrent(c.0);
}
let _ = cuMemFree_v2(self.ptr);
}
}
}
}
/// A *persistent* GL-texture→CUDA registration. The desktop NVIDIA driver only supports CUDA
/// interop through GL textures (not dmabuf EGLImages directly), so the importer renders the
/// dmabuf into a reusable `GL_RGBA8` texture and registers *that* once — then each frame only
/// maps → copies the mapped array out → unmaps (the map/unmap pair is the GL↔CUDA sync point),
/// instead of registering/unregistering every frame. Unregisters on drop.
pub struct RegisteredTexture {
resource: CUgraphicsResource,
}
impl RegisteredTexture {
/// Register a `GL_TEXTURE_2D` once.
///
/// # Safety
/// The GL context and the shared CUDA context must both be current on this thread, and
/// `texture` must be a valid `GL_TEXTURE_2D`.
pub unsafe fn register_gl(texture: u32) -> Result<RegisteredTexture> {
const GL_TEXTURE_2D: c_uint = 0x0DE1;
const CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY: c_uint = 0x01;
let mut resource: CUgraphicsResource = std::ptr::null_mut();
ck(
cuGraphicsGLRegisterImage(
&mut resource,
texture,
GL_TEXTURE_2D,
CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY,
),
"cuGraphicsGLRegisterImage",
)?;
Ok(RegisteredTexture { resource })
}
/// Map the texture for this frame, copy its (already-linear RGBA8) array into `dst`, then
/// unmap. The `cuCtxSynchronize` ensures `dst` is ready before the source dmabuf is recycled.
pub fn copy_mapped_to(&mut self, dst: &DeviceBuffer) -> Result<()> {
unsafe {
ck(
cuGraphicsMapResources(1, &mut self.resource, std::ptr::null_mut()),
"cuGraphicsMapResources",
)?;
let mut array: CUarray = std::ptr::null_mut();
if cuGraphicsSubResourceGetMappedArray(&mut array, self.resource, 0, 0) != 0 {
let _ = cuGraphicsUnmapResources(1, &mut self.resource, std::ptr::null_mut());
bail!("cuGraphicsSubResourceGetMappedArray failed");
}
let copy = CUDA_MEMCPY2D {
srcMemoryType: CU_MEMORYTYPE_ARRAY,
srcArray: array,
dstMemoryType: CU_MEMORYTYPE_DEVICE,
dstDevice: dst.ptr,
dstPitch: dst.pitch,
WidthInBytes: dst.width as usize * 4, // 4 bytes/px (BGRx)
Height: dst.height as usize,
..Default::default()
};
let r = cuMemcpy2D_v2(&copy);
let s = cuCtxSynchronize();
let _ = cuGraphicsUnmapResources(1, &mut self.resource, std::ptr::null_mut());
ck(r, "cuMemcpy2D_v2")?;
ck(s, "cuCtxSynchronize")?;
}
Ok(())
}
}
/// Copy a pitched device buffer into another device region (device→device), e.g. our imported
/// [`DeviceBuffer`] into a pooled CUDA surface NVENC owns. Both are 4-byte (BGRx) pixels.
/// The caller must have the shared context current on this thread (see [`make_current`]).
pub fn copy_device_to_device(
src: &DeviceBuffer,
dst_ptr: CUdeviceptr,
dst_pitch: usize,
) -> Result<()> {
let copy = CUDA_MEMCPY2D {
srcMemoryType: CU_MEMORYTYPE_DEVICE,
srcDevice: src.ptr,
srcPitch: src.pitch,
dstMemoryType: CU_MEMORYTYPE_DEVICE,
dstDevice: dst_ptr,
dstPitch: dst_pitch,
WidthInBytes: src.width as usize * 4,
Height: src.height as usize,
..Default::default()
};
unsafe {
ck(cuMemcpy2D_v2(&copy), "cuMemcpy2D_v2(dev->dev)")?;
ck(cuCtxSynchronize(), "cuCtxSynchronize")?;
}
Ok(())
}
impl Drop for RegisteredTexture {
fn drop(&mut self) {
if !self.resource.is_null() {
unsafe {
let _ = cuGraphicsUnregisterResource(self.resource);
}
}
}
}
/// A dmabuf fd imported as CUDA external memory and mapped to a device pointer — the LINEAR
/// path (gamescope): the buffer's bytes are directly addressable, no GL de-tiling needed.
/// Cached per PipeWire buffer (the fd pool is stable for a stream's life); destroyed on drop.
pub struct ExternalDmabuf {
ext: CUexternalMemory,
pub ptr: CUdeviceptr,
pub size: u64,
}
// Raw driver handles; used from the single capture thread but moved with the importer.
unsafe impl Send for ExternalDmabuf {}
impl ExternalDmabuf {
/// Import `fd` (NOT consumed — an internal `dup` is handed to the driver, which owns it
/// from then on) and map its full `size` bytes to a device pointer. The shared context
/// must be current.
pub fn import(fd: i32, size: u64) -> Result<ExternalDmabuf> {
let dup = unsafe { libc::dup(fd) };
if dup < 0 {
bail!("dup(dmabuf fd) failed");
}
Self::import_owned_fd(dup, size)
}
/// Import an fd the caller hands over (e.g. a Vulkan-exported `OPAQUE_FD`) — consumed by
/// the driver on success, closed by us on failure.
pub fn import_owned_fd(dup: i32, size: u64) -> Result<ExternalDmabuf> {
let mut desc = CUDA_EXTERNAL_MEMORY_HANDLE_DESC {
type_: CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD,
size,
..Default::default()
};
desc.handle[0] = dup as u32 as u64; // union member `int fd` (little-endian low bytes)
let mut ext: CUexternalMemory = std::ptr::null_mut();
let r = unsafe { cuImportExternalMemory(&mut ext, &desc) };
if r != 0 {
unsafe { libc::close(dup) }; // import failed → the driver did not take the fd
bail!("cuImportExternalMemory failed ({r}) — LINEAR dmabuf import unsupported?");
}
let buf = CUDA_EXTERNAL_MEMORY_BUFFER_DESC {
offset: 0,
size,
..Default::default()
};
let mut ptr: CUdeviceptr = 0;
let r = unsafe { cuExternalMemoryGetMappedBuffer(&mut ptr, ext, &buf) };
if r != 0 {
unsafe {
let _ = cuDestroyExternalMemory(ext);
}
bail!("cuExternalMemoryGetMappedBuffer failed ({r})");
}
Ok(ExternalDmabuf { ext, ptr, size })
}
}
impl Drop for ExternalDmabuf {
fn drop(&mut self) {
unsafe {
if let Some(c) = CONTEXT.get() {
let _ = cuCtxSetCurrent(c.0);
}
if self.ptr != 0 {
let _ = cuMemFree_v2(self.ptr); // mapped buffers are freed like device memory
}
if !self.ext.is_null() {
let _ = cuDestroyExternalMemory(self.ext);
}
}
}
}
/// Copy a pitched span starting at `src_ptr` (e.g. an [`ExternalDmabuf`] mapping at the chunk
/// offset) into `dst`. The shared context must be current on this thread.
pub fn copy_pitched_to_buffer(
src_ptr: CUdeviceptr,
src_pitch: usize,
dst: &DeviceBuffer,
) -> Result<()> {
let copy = CUDA_MEMCPY2D {
srcMemoryType: CU_MEMORYTYPE_DEVICE,
srcDevice: src_ptr,
srcPitch: src_pitch,
dstMemoryType: CU_MEMORYTYPE_DEVICE,
dstDevice: dst.ptr,
dstPitch: dst.pitch,
WidthInBytes: dst.width as usize * 4,
Height: dst.height as usize,
..Default::default()
};
unsafe {
ck(cuMemcpy2D_v2(&copy), "cuMemcpy2D_v2(ext->dev)")?;
// The copy must finish before the dmabuf is requeued to the producer.
ck(cuCtxSynchronize(), "cuCtxSynchronize")?;
}
Ok(())
}
+528
View File
@@ -0,0 +1,528 @@
//! EGL side of the zero-copy path: open a headless EGLDisplay on the NVIDIA GPU (GBM platform on
//! the render node) and import a PipeWire dmabuf as an `EGLImage` with `EGL_LINUX_DMA_BUF_EXT`.
//! The DRM format **modifier** is mandatory on NVIDIA (its buffers are tiled; importing without
//! the modifier yields a corrupt image or `EGL_BAD_MATCH`).
//!
//! Desktop NVIDIA can't register a dmabuf `EGLImage` with CUDA directly — `cuGraphicsEGLRegisterImage`
//! is Tegra-only and `cuGraphicsGLRegisterImage` rejects EGLImage-backed textures (their internal
//! format is opaque). So we follow OBS/Sunshine: bind the `EGLImage` to a GL texture
//! (`glEGLImageTargetTexture2DOES`), render it through a fullscreen-triangle shader into a plain
//! immutable `GL_RGBA8` texture (de-tiling and swizzling to the BGRx the encoder wants), then
//! register *that* texture with CUDA ([`MappedTexture`]) and copy it device-to-device into an
//! owned [`DeviceBuffer`] so the dmabuf can be returned to the compositor immediately.
#![allow(non_upper_case_globals)]
use super::cuda::{self, DeviceBuffer};
use anyhow::{bail, ensure, Context as _, Result};
use khronos_egl as egl;
use std::os::raw::{c_int, c_void};
// EGL_EXT_image_dma_buf_import / _modifiers + platform enums (not defined by khronos-egl).
const EGL_LINUX_DMA_BUF_EXT: egl::Enum = 0x3270;
const EGL_PLATFORM_GBM_KHR: egl::Enum = 0x31D7;
const EGL_LINUX_DRM_FOURCC_EXT: egl::Attrib = 0x3271;
const EGL_DMA_BUF_PLANE0_FD_EXT: egl::Attrib = 0x3272;
const EGL_DMA_BUF_PLANE0_OFFSET_EXT: egl::Attrib = 0x3273;
const EGL_DMA_BUF_PLANE0_PITCH_EXT: egl::Attrib = 0x3274;
const EGL_DMA_BUF_PLANE0_MODIFIER_LO_EXT: egl::Attrib = 0x3443;
const EGL_DMA_BUF_PLANE0_MODIFIER_HI_EXT: egl::Attrib = 0x3444;
const GL_TEXTURE_2D: u32 = 0x0DE1;
const GL_TEXTURE_MIN_FILTER: u32 = 0x2801;
const GL_TEXTURE_MAG_FILTER: u32 = 0x2800;
const GL_LINEAR: c_int = 0x2601;
const GL_NEAREST: c_int = 0x2600;
const GL_RGBA8: u32 = 0x8058;
const GL_FRAMEBUFFER: u32 = 0x8D40;
const GL_COLOR_ATTACHMENT0: u32 = 0x8CE0;
const GL_FRAMEBUFFER_COMPLETE: u32 = 0x8CD5;
const GL_TEXTURE0: u32 = 0x84C0;
const GL_TRIANGLES: u32 = 0x0004;
const GL_VERTEX_SHADER: u32 = 0x8B31;
const GL_FRAGMENT_SHADER: u32 = 0x8B30;
const GL_COMPILE_STATUS: u32 = 0x8B81;
const GL_LINK_STATUS: u32 = 0x8B82;
// libglvnd's libGL dispatches these to the NVIDIA driver based on the current EGL/GL context.
#[link(name = "GL")]
extern "C" {
fn glGenTextures(n: c_int, textures: *mut u32);
fn glBindTexture(target: u32, texture: u32);
fn glTexParameteri(target: u32, pname: u32, param: c_int);
fn glDeleteTextures(n: c_int, textures: *const u32);
fn glTexStorage2D(target: u32, levels: c_int, internalformat: u32, width: c_int, height: c_int);
fn glGetError() -> u32;
fn glGenFramebuffers(n: c_int, framebuffers: *mut u32);
fn glBindFramebuffer(target: u32, framebuffer: u32);
fn glFramebufferTexture2D(
target: u32,
attachment: u32,
textarget: u32,
texture: u32,
level: c_int,
);
fn glCheckFramebufferStatus(target: u32) -> u32;
fn glViewport(x: c_int, y: c_int, width: c_int, height: c_int);
fn glGenVertexArrays(n: c_int, arrays: *mut u32);
fn glBindVertexArray(array: u32);
fn glDrawArrays(mode: u32, first: c_int, count: c_int);
fn glActiveTexture(texture: u32);
fn glUseProgram(program: u32);
fn glFlush();
fn glCreateShader(shader_type: u32) -> u32;
fn glShaderSource(shader: u32, count: c_int, string: *const *const i8, length: *const c_int);
fn glCompileShader(shader: u32);
fn glGetShaderiv(shader: u32, pname: u32, params: *mut c_int);
fn glDeleteShader(shader: u32);
fn glCreateProgram() -> u32;
fn glAttachShader(program: u32, shader: u32);
fn glLinkProgram(program: u32);
fn glGetProgramiv(program: u32, pname: u32, params: *mut c_int);
fn glGetUniformLocation(program: u32, name: *const i8) -> c_int;
fn glUniform1i(location: c_int, v0: c_int);
}
#[link(name = "gbm")]
extern "C" {
fn gbm_create_device(fd: c_int) -> *mut c_void;
fn gbm_device_destroy(device: *mut c_void);
}
/// `glEGLImageTargetTexture2DOES(target, EGLImage)` — loaded via `eglGetProcAddress`.
type EglImageTargetFn = unsafe extern "system" fn(u32, *mut c_void);
// Fullscreen-triangle blit: sample the dmabuf EGLImage texture and write it (swizzled to BGRA,
// to match the BGRx the encoder expects) into a normal GL_RGBA8 texture that CUDA *can* register.
const VERT_SRC: &[u8] = b"#version 330 core\nout vec2 v_tex;\nvoid main(){vec2 p=vec2(float((gl_VertexID<<1)&2),float(gl_VertexID&2));v_tex=p;gl_Position=vec4(p*2.0-1.0,0.0,1.0);}\n";
const FRAG_SRC: &[u8] = b"#version 330 core\nuniform sampler2D image;\nin vec2 v_tex;\nout vec4 o_color;\nvoid main(){o_color=texture(image,v_tex).bgra;}\n";
unsafe fn compile_shader(kind: u32, src: &[u8]) -> Result<u32> {
let sh = glCreateShader(kind);
ensure!(sh != 0, "glCreateShader failed");
let ptr = src.as_ptr() as *const i8;
let len = src.len() as c_int;
glShaderSource(sh, 1, &ptr, &len);
glCompileShader(sh);
let mut ok: c_int = 0;
glGetShaderiv(sh, GL_COMPILE_STATUS, &mut ok);
if ok == 0 {
glDeleteShader(sh);
bail!("GL shader compile failed");
}
Ok(sh)
}
unsafe fn compile_program() -> Result<u32> {
let vs = compile_shader(GL_VERTEX_SHADER, VERT_SRC)?;
let fs = compile_shader(GL_FRAGMENT_SHADER, FRAG_SRC)?;
let prog = glCreateProgram();
glAttachShader(prog, vs);
glAttachShader(prog, fs);
glLinkProgram(prog);
glDeleteShader(vs);
glDeleteShader(fs);
let mut ok: c_int = 0;
glGetProgramiv(prog, GL_LINK_STATUS, &mut ok);
ensure!(ok != 0, "GL program link failed");
glUseProgram(prog);
let loc = glGetUniformLocation(prog, c"image".as_ptr());
if loc >= 0 {
glUniform1i(loc, 0); // sampler -> texture unit 0
}
glUseProgram(0);
Ok(prog)
}
/// Per-size GL machinery to blit a dmabuf EGLImage into a CUDA-registrable `GL_RGBA8` texture.
struct GlBlit {
program: u32,
vao: u32,
fbo: u32,
/// CUDA-registrable destination (immutable GL_RGBA8).
dst_tex: u32,
/// Source texture re-targeted to each frame's EGLImage.
src_tex: u32,
width: u32,
height: u32,
/// `dst_tex` registered with CUDA once (not per frame); mapped+copied each frame.
registered: cuda::RegisteredTexture,
/// Recycled CUDA device buffers (the imported frames handed to the encoder).
pool: cuda::BufferPool,
}
impl GlBlit {
unsafe fn new(width: u32, height: u32) -> Result<GlBlit> {
let program = compile_program()?;
let mut vao = 0u32;
glGenVertexArrays(1, &mut vao); // core profile needs a bound VAO for glDrawArrays
let mut fbo = 0u32;
glGenFramebuffers(1, &mut fbo);
let mut dst_tex = 0u32;
glGenTextures(1, &mut dst_tex);
glBindTexture(GL_TEXTURE_2D, dst_tex);
glTexStorage2D(GL_TEXTURE_2D, 1, GL_RGBA8, width as c_int, height as c_int);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
let mut src_tex = 0u32;
glGenTextures(1, &mut src_tex);
glBindTexture(GL_TEXTURE_2D, src_tex);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glBindTexture(GL_TEXTURE_2D, 0);
glBindFramebuffer(GL_FRAMEBUFFER, fbo);
glFramebufferTexture2D(
GL_FRAMEBUFFER,
GL_COLOR_ATTACHMENT0,
GL_TEXTURE_2D,
dst_tex,
0,
);
let status = glCheckFramebufferStatus(GL_FRAMEBUFFER);
glBindFramebuffer(GL_FRAMEBUFFER, 0);
ensure!(
status == GL_FRAMEBUFFER_COMPLETE,
"blit FBO incomplete ({status:#x})"
);
// Register the (immutable, reused) destination texture with CUDA once, and stand up the
// device-buffer pool — both per-resolution, not per-frame. Requires the CUDA context to be
// current (the caller makes it current before constructing the blit).
let registered = cuda::RegisteredTexture::register_gl(dst_tex)?;
let pool = cuda::BufferPool::new(width, height)?;
Ok(GlBlit {
program,
vao,
fbo,
dst_tex,
src_tex,
width,
height,
registered,
pool,
})
}
/// Bind `image` to the source texture and render it into `dst_tex`.
///
/// # Safety: the GL context is current on this thread; `image` is a valid `EGLImage`.
unsafe fn run(&self, egl_image_target: EglImageTargetFn, image: *mut c_void) -> Result<()> {
glBindTexture(GL_TEXTURE_2D, self.src_tex);
let _ = glGetError();
egl_image_target(GL_TEXTURE_2D, image);
let e = glGetError();
glBindTexture(GL_TEXTURE_2D, 0);
ensure!(e == 0, "glEGLImageTargetTexture2DOES failed ({e:#x})");
glBindFramebuffer(GL_FRAMEBUFFER, self.fbo);
glViewport(0, 0, self.width as c_int, self.height as c_int);
glUseProgram(self.program);
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, self.src_tex);
glBindVertexArray(self.vao);
glDrawArrays(GL_TRIANGLES, 0, 3);
glBindVertexArray(0);
glBindFramebuffer(GL_FRAMEBUFFER, 0);
glFlush(); // submit GL work before CUDA maps the texture
Ok(())
}
}
/// One dmabuf plane as delivered by PipeWire (single-plane for BGRx).
#[derive(Clone, Copy, Debug)]
pub struct DmabufPlane {
pub fd: i32,
pub offset: u32,
pub stride: u32,
}
type Egl = egl::DynamicInstance<egl::EGL1_5>;
/// Headless EGLDisplay (NVIDIA device platform) + a surfaceless desktop-GL context used to
/// import dmabufs and bridge them to CUDA via a GL texture. Lives on the capture thread (the GL
/// context is made current there once).
pub struct EglImporter {
egl: Egl,
display: egl::Display,
no_ctx: egl::Context,
/// Surfaceless GL context (current on the capture thread) for the EGLImage→texture bind.
_gl_ctx: egl::Context,
egl_image_target: EglImageTargetFn,
/// Lazily-created GL blit machinery (recreated if the frame size changes).
blit: Option<GlBlit>,
/// LINEAR-dmabuf path (gamescope): a Vulkan bridge (dmabuf → exportable OPAQUE_FD → CUDA),
/// created lazily on the first LINEAR frame, + the destination pool.
vk: Option<super::vulkan::VkBridge>,
linear_pool: Option<cuda::BufferPool>,
gbm: *mut c_void,
render_fd: c_int,
}
// The EGL handles are confined to the capture thread; the struct is moved there once.
unsafe impl Send for EglImporter {}
impl EglImporter {
/// Open a headless EGLDisplay on the NVIDIA EGL device. Also forces the shared CUDA context
/// to exist (so a later `import` only touches the hot path).
pub fn new() -> Result<EglImporter> {
// GBM platform on the NVIDIA render node: this ties the EGLDisplay (and its GL contexts)
// to the same DRM device CUDA-GL interop associates with, which the EGL device platform
// did not (cuGraphicsGLRegisterImage rejected device-platform GL textures).
let path = std::ffi::CString::new("/dev/dri/renderD128").unwrap();
let render_fd = unsafe { libc::open(path.as_ptr(), libc::O_RDWR | libc::O_CLOEXEC) };
ensure!(render_fd >= 0, "open /dev/dri/renderD128 for GBM");
let gbm = unsafe { gbm_create_device(render_fd) };
if gbm.is_null() {
unsafe { libc::close(render_fd) };
anyhow::bail!("gbm_create_device failed");
}
let egl: Egl =
unsafe { Egl::load_required() }.context("load libEGL (EGL 1.5 dynamic instance)")?;
let display = unsafe {
egl.get_platform_display(
EGL_PLATFORM_GBM_KHR,
gbm as egl::NativeDisplayType,
&[egl::ATTRIB_NONE],
)
}
.context("eglGetPlatformDisplay(GBM) on the NVIDIA render node")?;
egl.initialize(display).context("eglInitialize")?;
let exts = egl
.query_string(Some(display), egl::EXTENSIONS)
.context("query EGL extensions")?
.to_string_lossy()
.into_owned();
ensure!(
exts.contains("EGL_EXT_image_dma_buf_import"),
"EGL lacks EGL_EXT_image_dma_buf_import"
);
ensure!(
exts.contains("EGL_EXT_image_dma_buf_import_modifiers"),
"EGL lacks EGL_EXT_image_dma_buf_import_modifiers (needed for NVIDIA tiled dmabufs)"
);
// A surfaceless desktop-GL context so we can bind the dmabuf EGLImage to a GL texture
// (cuGraphicsEGLRegisterImage is Tegra-only; desktop CUDA interop goes through GL).
egl.bind_api(egl::OPENGL_API)
.context("eglBindAPI(OpenGL)")?;
// The default EGL_SURFACE_TYPE in eglChooseConfig is WINDOW_BIT, which a headless device
// display has none of — request a pbuffer-capable config (we run surfaceless anyway).
let config = egl
.choose_first_config(
display,
&[
egl::SURFACE_TYPE,
egl::PBUFFER_BIT,
egl::RENDERABLE_TYPE,
egl::OPENGL_BIT,
egl::NONE,
],
)
.context("eglChooseConfig")?
.context("no EGL config for OpenGL")?;
let gl_ctx = egl
.create_context(
display,
config,
None,
&[egl::CONTEXT_CLIENT_VERSION, 3, egl::NONE],
)
.context("eglCreateContext(OpenGL)")?;
egl.make_current(display, None, None, Some(gl_ctx))
.context("eglMakeCurrent surfaceless (needs EGL_KHR_surfaceless_context)")?;
let egl_image_target: EglImageTargetFn = unsafe {
std::mem::transmute(
egl.get_proc_address("glEGLImageTargetTexture2DOES")
.context("glEGLImageTargetTexture2DOES unavailable")?,
)
};
// Create the shared CUDA context up front so import() is pure hot path.
cuda::context().context("create CUDA context")?;
let no_ctx = unsafe { egl::Context::from_ptr(egl::NO_CONTEXT) };
tracing::info!(
"zero-copy EGL importer ready (GBM platform + GL texture interop, dma_buf_import + modifiers)"
);
Ok(EglImporter {
egl,
display,
no_ctx,
_gl_ctx: gl_ctx,
egl_image_target,
blit: None,
vk: None,
linear_pool: None,
gbm,
render_fd,
})
}
/// Import a LINEAR dmabuf via the Vulkan bridge (no EGL/GL involved — NVIDIA's EGL can't
/// sample LINEAR, and the CUDA driver rejects raw dmabuf fds; Vulkan imports the dmabuf,
/// GPU-copies into an exportable allocation, and CUDA reads that). See [`super::vulkan`].
pub fn import_linear(
&mut self,
plane: &DmabufPlane,
width: u32,
height: u32,
) -> Result<DeviceBuffer> {
cuda::make_current()?;
if self.linear_pool.as_ref().map(|p| (p.width(), p.height())) != Some((width, height)) {
self.linear_pool = Some(cuda::BufferPool::new(width, height)?);
}
if self.vk.is_none() {
self.vk = Some(super::vulkan::VkBridge::new()?);
}
self.vk.as_mut().unwrap().import_linear(
plane.fd,
plane.offset,
plane.stride,
height,
self.linear_pool.as_ref().unwrap(),
)
}
/// The DRM format modifiers the NVIDIA EGL stack can import for `fourcc`, via
/// `eglQueryDmaBufModifiersEXT`. We advertise these to PipeWire so the compositor allocates
/// a dmabuf in a layout we can import. Empty on failure (caller falls back).
pub fn supported_modifiers(&self, fourcc: u32) -> Vec<u64> {
type QueryFn = unsafe extern "system" fn(
dpy: *mut c_void,
format: i32,
max_modifiers: i32,
modifiers: *mut u64,
external_only: *mut u32,
num_modifiers: *mut i32,
) -> u32;
let Some(sym) = self.egl.get_proc_address("eglQueryDmaBufModifiersEXT") else {
return Vec::new();
};
let query: QueryFn = unsafe { std::mem::transmute(sym) };
let dpy = self.display.as_ptr();
unsafe {
let mut count: i32 = 0;
if query(
dpy,
fourcc as i32,
0,
std::ptr::null_mut(),
std::ptr::null_mut(),
&mut count,
) == 0
|| count <= 0
{
return Vec::new();
}
let mut mods = vec![0u64; count as usize];
let mut ext = vec![0u32; count as usize];
let mut n: i32 = 0;
if query(
dpy,
fourcc as i32,
count,
mods.as_mut_ptr(),
ext.as_mut_ptr(),
&mut n,
) == 0
{
return Vec::new();
}
mods.truncate(n.max(0) as usize);
mods
}
}
/// Import one dmabuf and copy it device-to-device into a fresh owned CUDA buffer. `fourcc`
/// is the DRM FourCC; `modifier` is the explicit 64-bit DRM format modifier when one was
/// negotiated, or `None` to import with the buffer's implicit modifier (base
/// `EGL_EXT_image_dma_buf_import`, which the NVIDIA driver resolves for its own buffers).
pub fn import(
&mut self,
plane: &DmabufPlane,
width: u32,
height: u32,
fourcc: u32,
modifier: Option<u64>,
) -> Result<DeviceBuffer> {
let mut attrs: Vec<egl::Attrib> = vec![
egl::WIDTH as egl::Attrib,
width as egl::Attrib,
egl::HEIGHT as egl::Attrib,
height as egl::Attrib,
EGL_LINUX_DRM_FOURCC_EXT,
fourcc as egl::Attrib,
EGL_DMA_BUF_PLANE0_FD_EXT,
plane.fd as egl::Attrib,
EGL_DMA_BUF_PLANE0_OFFSET_EXT,
plane.offset as egl::Attrib,
EGL_DMA_BUF_PLANE0_PITCH_EXT,
plane.stride as egl::Attrib,
];
if let Some(m) = modifier {
attrs.extend_from_slice(&[
EGL_DMA_BUF_PLANE0_MODIFIER_LO_EXT,
(m & 0xFFFF_FFFF) as egl::Attrib,
EGL_DMA_BUF_PLANE0_MODIFIER_HI_EXT,
(m >> 32) as egl::Attrib,
]);
}
attrs.push(egl::ATTRIB_NONE);
let client = unsafe { egl::ClientBuffer::from_ptr(std::ptr::null_mut()) };
let image = self
.egl
.create_image(
self.display,
self.no_ctx,
EGL_LINUX_DMA_BUF_EXT,
client,
&attrs,
)
.context("eglCreateImage(EGL_LINUX_DMA_BUF_EXT) — modifier mismatch?")?;
// EGLImage → (sampled by a shader) → GL_RGBA8 texture → register *that* with CUDA → map
// → array → copy out. Registering the EGLImage texture directly fails (its layout isn't a
// CUDA-registrable format); the RGBA8 render target is.
let result = self.blit_and_copy(image.as_ptr(), width, height);
let _ = self.egl.destroy_image(self.display, image);
result
}
/// Render the dmabuf `image` into the registrable RGBA8 texture and copy it to an owned CUDA
/// buffer. (Re)creates the per-size GL blit machinery as needed.
fn blit_and_copy(
&mut self,
image: *mut c_void,
width: u32,
height: u32,
) -> Result<DeviceBuffer> {
cuda::make_current()?;
if self.blit.as_ref().map(|b| (b.width, b.height)) != Some((width, height)) {
self.blit = Some(unsafe { GlBlit::new(width, height)? });
}
let egl_image_target = self.egl_image_target;
let blit = self.blit.as_mut().unwrap();
// SAFETY: GL + CUDA contexts current on this thread; `image` is a valid EGLImage.
unsafe { blit.run(egl_image_target, image)? };
// Persistent registration (mapped per frame) + a pooled buffer — no per-frame
// cuGraphicsGLRegisterImage / cuMemAllocPitch.
let dst = blit.pool.get()?;
blit.registered.copy_mapped_to(&dst)?;
Ok(dst)
}
}
impl Drop for EglImporter {
fn drop(&mut self) {
if !self.gbm.is_null() {
unsafe { gbm_device_destroy(self.gbm) };
}
if self.render_fd >= 0 {
unsafe { libc::close(self.render_fd) };
}
}
}
+50
View File
@@ -0,0 +1,50 @@
//! Zero-copy capture→encode (plan §9): the PipeWire dmabuf is imported into CUDA via EGL and
//! handed straight to NVENC, eliminating the per-frame CPU copies (at 5K the CPU-copy path
//! moves ~3.5 GB/s). Opt in with `PUNKTFUNK_ZEROCOPY=1`; the CPU-copy path stays the default and
//! the runtime fallback (foreign-allocator / no-dmabuf / import failure).
//!
//! Pieces: [`cuda`] (driver-API FFI + the shared `CUcontext` + device buffers), [`egl`] (the
//! headless EGLDisplay + dmabuf→`EGLImage`→CUDA import). The encoder's CUDA-frame path lives in
//! `encode/linux.rs`; the dmabuf negotiation lives in `capture/linux.rs`.
pub mod cuda;
pub mod egl;
pub mod vulkan;
pub use cuda::DeviceBuffer;
pub use egl::{DmabufPlane, EglImporter};
/// Whether the zero-copy path is opted in (`PUNKTFUNK_ZEROCOPY` truthy).
pub fn enabled() -> bool {
std::env::var("PUNKTFUNK_ZEROCOPY")
.map(|v| matches!(v.trim(), "1" | "true" | "yes" | "on"))
.unwrap_or(false)
}
/// DRM FourCC for a packed 32-bit format name (little-endian, e.g. `b"XR24"`).
const fn fourcc(c: &[u8; 4]) -> u32 {
(c[0] as u32) | ((c[1] as u32) << 8) | ((c[2] as u32) << 16) | ((c[3] as u32) << 24)
}
/// Map a SPA/our [`crate::capture::PixelFormat`] to the DRM FourCC EGL expects for import.
/// SPA byte order `BGRx` ⇒ DRM `XRGB8888` (memory B,G,R,X), etc.
pub fn drm_fourcc(format: crate::capture::PixelFormat) -> Option<u32> {
use crate::capture::PixelFormat::*;
Some(match format {
Bgrx => fourcc(b"XR24"), // DRM_FORMAT_XRGB8888
Bgra => fourcc(b"AR24"), // DRM_FORMAT_ARGB8888
Rgbx => fourcc(b"XB24"), // DRM_FORMAT_XBGR8888
Rgba => fourcc(b"AB24"), // DRM_FORMAT_ABGR8888
// 24-bit packed RGB/BGR have no straightforward dmabuf import here; use the CPU path.
Rgb | Bgr => return None,
})
}
/// Standalone probe (the `zerocopy-probe` subcommand): initialize the EGL importer + CUDA
/// context and report. De-risks the FFI/linking/GPU-access without needing a capture session.
pub fn probe() -> anyhow::Result<()> {
let _importer = EglImporter::new()?;
let ctx = cuda::context()?;
tracing::info!(cuda_ctx = ?ctx, "zero-copy probe OK — EGL display + CUDA context initialized");
Ok(())
}
@@ -0,0 +1,366 @@
//! Vulkan bridge for LINEAR dmabufs (gamescope's only offer), completing zero-copy where the
//! other interops can't: NVIDIA's EGL won't sample LINEAR, and the CUDA driver rejects raw
//! dmabuf fds as external memory. Vulkan *does* import dmabufs (`VK_EXT_external_memory_dma_buf`)
//! and *does* export `OPAQUE_FD` memory that CUDA officially imports. So:
//!
//! ```text
//! dmabuf fd ──VkImportMemoryFdInfoKHR(DMA_BUF)──▶ VkBuffer (cached per fd)
//! │ vkCmdCopyBuffer (GPU, device-local)
//! ▼
//! exportable VkBuffer ──vkGetMemoryFdKHR(OPAQUE_FD)──▶ cuImportExternalMemory ──▶ CUdeviceptr
//! ```
//!
//! The exportable buffer + its CUDA mapping are created once per resolution; per frame it's one
//! GPU buffer copy (fence-waited) and one pitched CUDA copy into the encoder's pooled buffer.
//! No CPU ever touches pixels. Imports are cached per fd (PipeWire's buffer pool is stable for
//! a stream's life). Falls back cleanly: any init/import error disables the importer and the
//! CPU mmap path takes over.
use super::cuda::{self, DeviceBuffer};
use anyhow::{anyhow, bail, Context as _, Result};
use ash::vk;
use std::collections::HashMap;
/// Vulkan objects for one imported source dmabuf (cached per fd).
struct SrcBuf {
buffer: vk::Buffer,
memory: vk::DeviceMemory,
size: u64,
}
/// The per-resolution destination: exportable Vulkan memory mapped into CUDA.
struct DstBuf {
buffer: vk::Buffer,
memory: vk::DeviceMemory,
size: u64,
/// CUDA's view of the same memory (owns the exported OPAQUE_FD).
cuda: cuda::ExternalDmabuf,
}
pub struct VkBridge {
_entry: ash::Entry,
instance: ash::Instance,
device: ash::Device,
ext_fd: ash::khr::external_memory_fd::Device,
queue: vk::Queue,
cmd_pool: vk::CommandPool,
cmd: vk::CommandBuffer,
fence: vk::Fence,
mem_props: vk::PhysicalDeviceMemoryProperties,
src_cache: HashMap<i32, SrcBuf>,
dst: Option<DstBuf>,
}
// Confined to the capture thread; moved there once.
unsafe impl Send for VkBridge {}
impl VkBridge {
/// Bring up Vulkan on the NVIDIA GPU with the external-memory extensions.
pub fn new() -> Result<VkBridge> {
unsafe {
let entry = ash::Entry::load().context("load libvulkan")?;
let app = vk::ApplicationInfo::default().api_version(vk::API_VERSION_1_1);
let instance = entry
.create_instance(
&vk::InstanceCreateInfo::default().application_info(&app),
None,
)
.context("vkCreateInstance")?;
// Pick the NVIDIA GPU (matches CUDA device 0 on this single-dGPU host).
let phys = instance
.enumerate_physical_devices()
.context("enumerate GPUs")?
.into_iter()
.find(|&p| instance.get_physical_device_properties(p).vendor_id == 0x10DE)
.ok_or_else(|| anyhow!("no NVIDIA Vulkan device"))?;
let mem_props = instance.get_physical_device_memory_properties(phys);
// Any queue family supporting transfer (graphics/compute imply it).
let qf = instance
.get_physical_device_queue_family_properties(phys)
.iter()
.position(|q| {
q.queue_flags.intersects(
vk::QueueFlags::TRANSFER
| vk::QueueFlags::GRAPHICS
| vk::QueueFlags::COMPUTE,
)
})
.ok_or_else(|| anyhow!("no transfer-capable queue family"))?
as u32;
let exts = [
ash::khr::external_memory_fd::NAME.as_ptr(),
ash::ext::external_memory_dma_buf::NAME.as_ptr(),
];
let prio = [1.0f32];
let qci = [vk::DeviceQueueCreateInfo::default()
.queue_family_index(qf)
.queue_priorities(&prio)];
let device = instance
.create_device(
phys,
&vk::DeviceCreateInfo::default()
.queue_create_infos(&qci)
.enabled_extension_names(&exts),
None,
)
.context("vkCreateDevice (external-memory extensions supported?)")?;
let ext_fd = ash::khr::external_memory_fd::Device::new(&instance, &device);
let queue = device.get_device_queue(qf, 0);
let cmd_pool = device
.create_command_pool(
&vk::CommandPoolCreateInfo::default()
.queue_family_index(qf)
.flags(vk::CommandPoolCreateFlags::RESET_COMMAND_BUFFER),
None,
)
.context("create command pool")?;
let cmd = device
.allocate_command_buffers(
&vk::CommandBufferAllocateInfo::default()
.command_pool(cmd_pool)
.level(vk::CommandBufferLevel::PRIMARY)
.command_buffer_count(1),
)
.context("allocate command buffer")?[0];
let fence = device
.create_fence(&vk::FenceCreateInfo::default(), None)
.context("create fence")?;
tracing::info!("Vulkan bridge ready (dmabuf import → OPAQUE_FD export → CUDA)");
Ok(VkBridge {
_entry: entry,
instance,
device,
ext_fd,
queue,
cmd_pool,
cmd,
fence,
mem_props,
src_cache: HashMap::new(),
dst: None,
})
}
}
fn memory_type(&self, type_bits: u32, flags: vk::MemoryPropertyFlags) -> Result<u32> {
(0..self.mem_props.memory_type_count)
.find(|&i| {
type_bits & (1 << i) != 0
&& self.mem_props.memory_types[i as usize]
.property_flags
.contains(flags)
})
.ok_or_else(|| anyhow!("no compatible Vulkan memory type"))
}
/// Import `fd` (dup'd internally; Vulkan owns the dup) as a transfer-src buffer of `size`.
unsafe fn import_src(&mut self, fd: i32, size: u64) -> Result<()> {
let dup = libc::dup(fd);
if dup < 0 {
bail!("dup(dmabuf fd)");
}
let mut ext_info = vk::ExternalMemoryBufferCreateInfo::default()
.handle_types(vk::ExternalMemoryHandleTypeFlags::DMA_BUF_EXT);
let buffer = self
.device
.create_buffer(
&vk::BufferCreateInfo::default()
.size(size)
.usage(vk::BufferUsageFlags::TRANSFER_SRC)
.push_next(&mut ext_info),
None,
)
.context("create import buffer")?;
let mut fd_props = vk::MemoryFdPropertiesKHR::default();
self.ext_fd
.get_memory_fd_properties(
vk::ExternalMemoryHandleTypeFlags::DMA_BUF_EXT,
dup,
&mut fd_props,
)
.context("vkGetMemoryFdPropertiesKHR")?;
let reqs = self.device.get_buffer_memory_requirements(buffer);
let mem_type = self.memory_type(
reqs.memory_type_bits & fd_props.memory_type_bits,
vk::MemoryPropertyFlags::empty(),
)?;
let mut import = vk::ImportMemoryFdInfoKHR::default()
.handle_type(vk::ExternalMemoryHandleTypeFlags::DMA_BUF_EXT)
.fd(dup); // Vulkan takes ownership of `dup` on success
let mut dedicated = vk::MemoryDedicatedAllocateInfo::default().buffer(buffer);
let memory = self
.device
.allocate_memory(
&vk::MemoryAllocateInfo::default()
.allocation_size(reqs.size.max(size))
.memory_type_index(mem_type)
.push_next(&mut import)
.push_next(&mut dedicated),
None,
)
.map_err(|e| {
libc::close(dup); // failed import does not consume the fd
anyhow!("import dmabuf memory: {e}")
})?;
self.device
.bind_buffer_memory(buffer, memory, 0)
.context("bind import memory")?;
self.src_cache.insert(
fd,
SrcBuf {
buffer,
memory,
size,
},
);
Ok(())
}
/// (Re)create the exportable destination of at least `size` bytes + its CUDA mapping.
unsafe fn ensure_dst(&mut self, size: u64) -> Result<()> {
if self.dst.as_ref().is_some_and(|d| d.size >= size) {
return Ok(());
}
if let Some(old) = self.dst.take() {
self.device.destroy_buffer(old.buffer, None);
self.device.free_memory(old.memory, None);
// old.cuda drops its mapping with it
}
let mut ext_info = vk::ExternalMemoryBufferCreateInfo::default()
.handle_types(vk::ExternalMemoryHandleTypeFlags::OPAQUE_FD);
let buffer = self
.device
.create_buffer(
&vk::BufferCreateInfo::default()
.size(size)
.usage(vk::BufferUsageFlags::TRANSFER_DST)
.push_next(&mut ext_info),
None,
)
.context("create export buffer")?;
let reqs = self.device.get_buffer_memory_requirements(buffer);
let mem_type =
self.memory_type(reqs.memory_type_bits, vk::MemoryPropertyFlags::DEVICE_LOCAL)?;
let mut export = vk::ExportMemoryAllocateInfo::default()
.handle_types(vk::ExternalMemoryHandleTypeFlags::OPAQUE_FD);
let mut dedicated = vk::MemoryDedicatedAllocateInfo::default().buffer(buffer);
let memory = self
.device
.allocate_memory(
&vk::MemoryAllocateInfo::default()
.allocation_size(reqs.size)
.memory_type_index(mem_type)
.push_next(&mut export)
.push_next(&mut dedicated),
None,
)
.context("allocate exportable memory")?;
self.device
.bind_buffer_memory(buffer, memory, 0)
.context("bind export memory")?;
let opaque_fd = self
.ext_fd
.get_memory_fd(
&vk::MemoryGetFdInfoKHR::default()
.memory(memory)
.handle_type(vk::ExternalMemoryHandleTypeFlags::OPAQUE_FD),
)
.context("vkGetMemoryFdKHR")?;
// CUDA imports (and on success owns) the exported fd. Size must match the allocation.
let cuda = cuda::ExternalDmabuf::import_owned_fd(opaque_fd, reqs.size)
.context("cuImportExternalMemory(OPAQUE_FD from Vulkan)")?;
tracing::info!(size, "Vulkan→CUDA exportable staging buffer ready");
self.dst = Some(DstBuf {
buffer,
memory,
size: reqs.size,
cuda,
});
Ok(())
}
/// Bridge one LINEAR dmabuf frame into a pooled CUDA buffer: GPU copy dmabuf→exportable,
/// then pitched CUDA copy exportable→`pool` buffer.
pub fn import_linear(
&mut self,
fd: i32,
offset: u32,
stride: u32,
height: u32,
pool: &cuda::BufferPool,
) -> Result<DeviceBuffer> {
unsafe {
let span = offset as u64 + stride as u64 * height as u64;
if !self.src_cache.contains_key(&fd) {
let size = libc::lseek(fd, 0, libc::SEEK_END);
anyhow::ensure!(size > 0, "lseek(dmabuf)");
anyhow::ensure!(size as u64 >= span, "dmabuf smaller than frame span");
self.import_src(fd, size as u64)?;
}
let (src_buffer, src_size) = {
let s = &self.src_cache[&fd];
(s.buffer, s.size)
};
let copy_size = src_size.min(span);
self.ensure_dst(copy_size)?;
let dst = self.dst.as_ref().unwrap();
// Record + submit the GPU copy, wait on the fence (GPU-GPU, sub-millisecond).
self.device
.begin_command_buffer(
self.cmd,
&vk::CommandBufferBeginInfo::default()
.flags(vk::CommandBufferUsageFlags::ONE_TIME_SUBMIT),
)
.context("begin cmd")?;
let region = vk::BufferCopy::default().size(copy_size);
self.device
.cmd_copy_buffer(self.cmd, src_buffer, dst.buffer, &[region]);
self.device
.end_command_buffer(self.cmd)
.context("end cmd")?;
let cmds = [self.cmd];
let submit = vk::SubmitInfo::default().command_buffers(&cmds);
self.device
.queue_submit(self.queue, &[submit], self.fence)
.context("queue submit")?;
self.device
.wait_for_fences(&[self.fence], true, 1_000_000_000)
.context("fence wait")?;
self.device
.reset_fences(&[self.fence])
.context("reset fence")?;
// De-stride from the CUDA view of the exportable memory into a pooled buffer.
cuda::make_current()?;
let out = pool.get()?;
cuda::copy_pitched_to_buffer(dst.cuda.ptr + offset as u64, stride as usize, &out)?;
Ok(out)
}
}
}
impl Drop for VkBridge {
fn drop(&mut self) {
unsafe {
let _ = self.device.device_wait_idle();
for (_, s) in self.src_cache.drain() {
self.device.destroy_buffer(s.buffer, None);
self.device.free_memory(s.memory, None);
}
if let Some(d) = self.dst.take() {
self.device.destroy_buffer(d.buffer, None);
self.device.free_memory(d.memory, None);
}
self.device.destroy_fence(self.fence, None);
self.device.destroy_command_pool(self.cmd_pool, None);
self.device.destroy_device(None);
self.instance.destroy_instance(None);
}
}
}