Merge remote-tracking branch 'origin/main'

# Conflicts: # docs-site/content/docs/meta.json
feat(host): Apollo-backlog hardening — cert gate, NVENC RFI, media QoS, async injector
2026-06-21 00:07:36 +00:00 · 2026-06-21 00:06:30 +00:00 · 2026-06-20 23:32:23 +00:00 · 2026-06-20 22:20:00 +00:00 · 2026-06-20 21:31:07 +00:00 · 2026-06-20 17:46:17 +00:00
34 changed files with 2080 additions and 434 deletions
@@ -296,7 +296,8 @@ or 8.x/libavcodec 62** — validated live on Ubuntu 26.04 (8) and Bazzite F43 (7
 FFI also link-needs `libGL`/`libgbm`/`libcuda` at build time). Env knobs: `PUNKTFUNK_VIDEO_SOURCE=virtual|portal`,
 `PUNKTFUNK_COMPOSITOR=kwin|gamescope|mutter`, `PUNKTFUNK_ZEROCOPY=1`, `PUNKTFUNK_GAMESCOPE_APP=...`,
 `PUNKTFUNK_INPUT_BACKEND=...`, `PUNKTFUNK_PERF=1` (per-stage timing), `PUNKTFUNK_VIDEO_DROP=N` (FEC
-test), `PUNKTFUNK_FEC_PCT=N`.
+test), `PUNKTFUNK_FEC_PCT=N`, `PUNKTFUNK_DSCP=1` (opt-in DSCP/SO_PRIORITY media QoS on the data +
+GameStream video/audio sockets; no-op on the wire on Windows without a qWAVE policy).

 ## Conventions

@@ -45,7 +45,8 @@ use punktfunk_core::config::Role;
 use punktfunk_core::input::{InputEvent, InputKind};
 use punktfunk_core::packet::FLAG_PROBE;
 use punktfunk_core::quic::{
-    endpoint, io, Hello, ProbeRequest, ProbeResult, Reconfigure, Reconfigured, Start, Welcome,
+    endpoint, io, window_loss_ppm, Hello, LossReport, ProbeRequest, ProbeResult, Reconfigure,
+    Reconfigured, Start, Welcome,
 };
 use punktfunk_core::transport::UdpTransport;
 use punktfunk_core::{CompositorPref, Mode, PunktfunkError, Session};
@@ -433,13 +434,15 @@ async fn session(args: Args) -> Result<()> {
        None => None,
    };

-    // Speed-test accumulators: the data-plane loop folds each FLAG_PROBE filler AU in here; the
-    // --speed-test reporter below reads them once the host's ProbeResult lands. first/last hold
-    // now_ns timestamps of the receive window (0 = unset).
-    let probe_recv_bytes = std::sync::Arc::new(std::sync::atomic::AtomicU64::new(0));
-    let probe_recv_packets = std::sync::Arc::new(std::sync::atomic::AtomicU64::new(0));
-    let probe_first_ns = std::sync::Arc::new(std::sync::atomic::AtomicU64::new(0));
-    let probe_last_ns = std::sync::Arc::new(std::sync::atomic::AtomicU64::new(0));
+    // Packet-level receive counters mirrored from `session.stats()` by the data-plane loop. The
+    // speed test reads their delta over the burst window so throughput/loss reflect every delivered
+    // wire packet (graceful past the FEC budget), not just fully-reassembled probe AUs.
+    let rx_wire_packets = std::sync::Arc::new(std::sync::atomic::AtomicU64::new(0));
+    let rx_wire_bytes = std::sync::Arc::new(std::sync::atomic::AtomicU64::new(0));
+    // Adaptive-FEC loss feedback: the data loop publishes a windowed loss estimate here; in normal
+    // stream mode (no speed test / remode) a control-stream task relays it to the host as a
+    // LossReport so it can size FEC to the link. u32::MAX = "no fresh sample this window".
+    let loss_ppm = std::sync::Arc::new(std::sync::atomic::AtomicU32::new(u32::MAX));

    // Mid-stream renegotiation test: after a delay, ask the host to switch modes on the
    // still-open control stream. The stream then carries new-mode AUs (IDR + in-band
@@ -470,19 +473,25 @@ async fn session(args: Args) -> Result<()> {
            }
        });
    } else if let Some((target_kbps, duration_ms)) = args.speed_test {
-        // Bandwidth probe: after the stream warms up, ask the host to burst FLAG_PROBE filler;
-        // measure what arrives vs. what it reports sending.
+        // Bandwidth probe: after the stream warms up, ask the host to burst FLAG_PROBE filler; measure
+        // delivered WIRE packets (session-stat delta) vs. what the host reports putting on the wire.
        let mut ss = send;
        let mut sr = recv;
-        let (pb, pp, pf, pl) = (
-            probe_recv_bytes.clone(),
-            probe_recv_packets.clone(),
-            probe_first_ns.clone(),
-            probe_last_ns.clone(),
-        );
+        let (rxp, rxb) = (rx_wire_packets.clone(), rx_wire_bytes.clone());
+        // Per-packet wire size to express delivered bytes as link bytes (header + shard + crypto);
+        // every shard is zero-padded to shard_payload so all data packets are this exact size.
+        let crypto_overhead = if welcome.encrypt {
+            punktfunk_core::packet::CRYPTO_OVERHEAD as u64
+        } else {
+            0
+        };
        tokio::spawn(async move {
            use std::sync::atomic::Ordering::Relaxed;
            tokio::time::sleep(std::time::Duration::from_secs(2)).await; // let the stream warm up
+                                                                         // Baseline the packet-level counters right before the burst (video is paused during it,
+                                                                         // so the delta is pure probe traffic plus a sliver of resumed video in the settle).
+            let base_pkts = rxp.load(Relaxed);
+            let base_bytes = rxb.load(Relaxed);
            tracing::info!(target_kbps, duration_ms, "requesting speed-test probe");
            if io::write_msg(
                &mut ss,
@@ -505,37 +514,65 @@ async fn session(args: Args) -> Result<()> {
                    return;
                }
            };
-            // The reliable result can beat the last UDP shards — let them reassemble.
-            tokio::time::sleep(std::time::Duration::from_millis(400)).await;
-            let recv_bytes = pb.load(Relaxed);
-            let recv_packets = pp.load(Relaxed);
-            let (first, last) = (pf.load(Relaxed), pl.load(Relaxed));
-            let window_ms = if first > 0 && last > first {
-                (last - first) / 1_000_000
+            // The reliable result can beat the last UDP shards — let the tail arrive before reading.
+            // Keep this short: video resumes the instant the burst ends, so a long settle counts
+            // resumed-video packets against the probe (inflating recv past the host's wire count).
+            tokio::time::sleep(std::time::Duration::from_millis(60)).await;
+            let recv_packets = rxp.load(Relaxed).saturating_sub(base_pkts);
+            // bytes_received counts plaintext (header + shard); add per-packet crypto back for the
+            // true on-wire byte count.
+            let recv_wire_bytes =
+                rxb.load(Relaxed).saturating_sub(base_bytes) + recv_packets * crypto_overhead;
+            // The host's burst duration is the rate denominator (it sent for this long).
+            let window_ms = res.duration_ms.max(1) as u64;
+            let throughput_kbps = recv_wire_bytes.saturating_mul(8) / window_ms;
+            // Link loss: wire packets the host put out that didn't arrive. host_drop: wire packets
+            // the host couldn't even hand to the kernel (send buffer too small / can't keep up).
+            let link_loss = if res.wire_packets_sent > 0 {
+                (res.wire_packets_sent as i64 - recv_packets as i64).max(0) as f64
+                    / res.wire_packets_sent as f64
+                    * 100.0
            } else {
-                0
+                0.0
            };
-            let throughput_kbps = recv_bytes
-                .saturating_mul(8)
-                .checked_div(window_ms)
-                .unwrap_or(0);
-            let loss_pct = if res.bytes_sent > 0 {
-                res.bytes_sent.saturating_sub(recv_bytes) as f64 / res.bytes_sent as f64 * 100.0
+            let offered_wire = res.wire_packets_sent + res.send_dropped;
+            let host_drop = if offered_wire > 0 {
+                res.send_dropped as f64 / offered_wire as f64 * 100.0
            } else {
                0.0
            };
            tracing::info!(
                target_kbps,
-                host_sent_bytes = res.bytes_sent,
-                host_sent_packets = res.packets_sent,
-                recv_bytes,
-                recv_packets,
-                window_ms,
-                throughput_kbps,
-                loss_pct = format!("{loss_pct:.1}%"),
+                target_mbps = target_kbps / 1000,
+                delivered_mbps = throughput_kbps / 1000,
+                link_loss_pct = format!("{link_loss:.1}%"),
+                host_drop_pct = format!("{host_drop:.1}%"),
+                wire_pkts_sent = res.wire_packets_sent,
+                wire_pkts_recv = recv_packets,
+                send_dropped = res.send_dropped,
                "SPEED TEST complete",
            );
        });
+    } else {
+        // Normal stream mode: relay the data loop's windowed loss estimate to the host as periodic
+        // LossReports, so it can size FEC to the link (adaptive FEC). The control stream is otherwise
+        // idle here (remode/speed-test own it in their modes).
+        let mut ls = send;
+        let lp = loss_ppm.clone();
+        tokio::spawn(async move {
+            use std::sync::atomic::Ordering::Relaxed;
+            loop {
+                tokio::time::sleep(std::time::Duration::from_millis(750)).await;
+                let v = lp.swap(u32::MAX, Relaxed);
+                if v != u32::MAX
+                    && io::write_msg(&mut ls, &LossReport { loss_ppm: v }.encode())
+                        .await
+                        .is_err()
+                {
+                    break; // control stream gone
+                }
+            }
+        });
    }

    // Input plane: scripted events as QUIC datagrams (mouse square + 'A' taps), proving the
@@ -810,12 +847,8 @@ async fn session(args: Args) -> Result<()> {
    let cfg = welcome.session_config(Role::Client);
    let expected = welcome.frames;
    let out_path = args.out.clone();
-    let (pb, pp, pf, pl) = (
-        probe_recv_bytes.clone(),
-        probe_recv_packets.clone(),
-        probe_first_ns.clone(),
-        probe_last_ns.clone(),
-    );
+    let (rxp_dt, rxb_dt) = (rx_wire_packets.clone(), rx_wire_bytes.clone());
+    let lp_dt = loss_ppm.clone();

    // Express our receive time in the host clock before differencing against the host-stamped
    // capture pts. 0 ⇒ same-host or an old host that didn't answer the skew handshake (the latency
@@ -850,7 +883,32 @@ async fn session(args: Args) -> Result<()> {
        let mut latencies_us: Vec<u64> = Vec::new();
        let mut last_rx = std::time::Instant::now();
        let started = std::time::Instant::now();
+        // Adaptive-FEC loss window: publish a fresh estimate every 750 ms for the LossReport task.
+        let mut last_loss_report = std::time::Instant::now();
+        let (mut last_recovered, mut last_received, mut last_dropped) = (0u64, 0u64, 0u64);
        loop {
+            // Mirror packet-level receive counters for the speed-test reporter (reads their delta),
+            // and publish a windowed loss estimate for the adaptive-FEC LossReport task.
+            {
+                use std::sync::atomic::Ordering::Relaxed;
+                let s = session.stats();
+                rxp_dt.store(s.packets_received, Relaxed);
+                rxb_dt.store(s.bytes_received, Relaxed);
+                if last_loss_report.elapsed() >= std::time::Duration::from_millis(750) {
+                    lp_dt.store(
+                        window_loss_ppm(
+                            s.fec_recovered_shards.wrapping_sub(last_recovered),
+                            s.packets_received.wrapping_sub(last_received),
+                            s.frames_dropped.wrapping_sub(last_dropped),
+                        ),
+                        Relaxed,
+                    );
+                    last_loss_report = std::time::Instant::now();
+                    last_recovered = s.fec_recovered_shards;
+                    last_received = s.packets_received;
+                    last_dropped = s.frames_dropped;
+                }
+            }
            if expected > 0 && ok + mismatched >= expected {
                break;
            }
@@ -867,15 +925,9 @@ async fn session(args: Args) -> Result<()> {
            match session.poll_frame() {
                Ok(frame) => {
                    last_rx = std::time::Instant::now();
-                    // Speed-test filler isn't video: fold it into the probe accumulators and skip
-                    // verification / the --out sink.
+                    // Speed-test filler isn't video: it's measured via the packet-level counters
+                    // mirrored at the loop head — skip verification / the --out sink.
                    if frame.flags & FLAG_PROBE as u32 != 0 {
-                        use std::sync::atomic::Ordering::Relaxed;
-                        let n = now_ns();
-                        let _ = pf.compare_exchange(0, n, Relaxed, Relaxed);
-                        pl.store(n, Relaxed);
-                        pb.fetch_add(frame.data.len() as u64, Relaxed);
-                        pp.fetch_add(1, Relaxed);
                        continue;
                    }
                    bytes += frame.data.len() as u64;
@@ -31,7 +31,9 @@ fec-rs = { path = "vendor/fec-rs" }
 aes-gcm = "0.10"               # AES-128-GCM session crypto, matches GameStream
 zerocopy = { version = "0.8", features = ["derive"] }
 bytes = "1"
-socket2 = "0.6"                # set SO_SNDBUF/SO_RCVBUF — default UDP buffers are too small for 4K/5K frame bursts
+socket2 = { version = "0.6", features = [
+    "all",
+] } # SO_SNDBUF/SO_RCVBUF growth (default UDP buffers too small for 4K/5K bursts) + DSCP/SO_PRIORITY media QoS
 thiserror = "2"
 tracing = { version = "0.1", default-features = false, features = ["std"] }
 rand = "0.9"
@@ -1525,24 +1525,31 @@ pub unsafe extern "C" fn punktfunk_connection_frames_dropped(

 /// A speed-test measurement, filled by [`punktfunk_connection_probe_result`]. `done` is 0 until
 /// the host's end-of-burst report lands, then 1 (the numbers are final). `throughput_kbps` is the
-/// measured goodput to drive a bitrate choice from; `loss_pct` is the delivery loss at that rate.
+/// delivered wire throughput to drive a bitrate choice from; `loss_pct` is the link loss and
+/// `host_drop_pct` the host-side send-buffer drop (raise `net.core.wmem_max`) — they're measured
+/// separately so a host that can't keep up reads differently from a lossy link.
 #[repr(C)]
 #[derive(Clone, Copy, Debug, Default)]
 pub struct PunktfunkProbeResult {
    /// 1 once the host's end-of-burst report arrived (measurement final); else 0 (partial).
    pub done: u8,
-    /// Probe payload bytes / packets the client received.
+    /// Delivered wire bytes (header + shard) / packets the client received during the burst.
    pub recv_bytes: u64,
    pub recv_packets: u32,
-    /// Probe payload bytes / packets the host reported sending.
+    /// Application goodput bytes / access units the host offered.
    pub host_bytes: u64,
    pub host_packets: u32,
-    /// Client-measured receive window (first→last probe AU), milliseconds.
+    /// The host's measured burst duration, milliseconds (the throughput denominator).
    pub elapsed_ms: u32,
-    /// Measured goodput = `recv_bytes * 8 / elapsed_ms` (kilobits/second).
+    /// Delivered wire throughput = `recv_bytes * 8 / elapsed_ms` (kilobits/second).
    pub throughput_kbps: u32,
-    /// Delivery loss `(host_bytes - recv_bytes) / host_bytes` as a percentage (0 if unknown).
+    /// Link loss `(wire_packets_sent − recv_packets) / wire_packets_sent` as a percentage.
    pub loss_pct: f32,
+    /// Host-side send-buffer drop `send_dropped / (wire_packets_sent + send_dropped)`, percent.
+    pub host_drop_pct: f32,
+    /// Wire packets the host put on the link, and the ones its send buffer dropped (raw counts).
+    pub wire_packets_sent: u32,
+    pub send_dropped: u32,
 }

 /// Start a bandwidth speed test: ask the host to burst filler over the data plane at
@@ -1602,6 +1609,9 @@ pub unsafe extern "C" fn punktfunk_connection_probe_result(
                elapsed_ms: o.elapsed_ms,
                throughput_kbps: o.throughput_kbps,
                loss_pct: o.loss_pct,
+                host_drop_pct: o.host_drop_pct,
+                wire_packets_sent: o.wire_packets_sent,
+                send_dropped: o.send_dropped,
            };
        }
        PunktfunkStatus::Ok
@@ -16,8 +16,8 @@ use crate::error::{PunktfunkError, Result};
 use crate::input::InputEvent;
 use crate::packet::FLAG_PROBE;
 use crate::quic::{
-    endpoint, io, Hello, HidOutput, ProbeRequest, ProbeResult, Reconfigure, Reconfigured,
-    RequestKeyframe, RichInput, Start, Welcome,
+    endpoint, io, window_loss_ppm, Hello, HidOutput, LossReport, ProbeRequest, ProbeResult,
+    Reconfigure, Reconfigured, RequestKeyframe, RichInput, Start, Welcome,
 };
 use crate::session::{Frame, Session};
 use crate::transport::UdpTransport;
@@ -33,6 +33,7 @@ enum CtrlRequest {
    Mode(Mode),
    Probe(ProbeRequest),
    Keyframe,
+    Loss(LossReport),
 }

 /// What the worker reports to [`NativeClient::connect`] once the handshake lands: the negotiated
@@ -41,22 +42,35 @@ enum CtrlRequest {
 /// (ns, host minus client; 0 = no skew correction / an old host that didn't answer the handshake).
 type Negotiated = (Mode, CompositorPref, GamepadPref, [u8; 32], u32, i64);

-/// Accumulated state of an in-flight / finished speed test. The data-plane pump folds each
-/// received [`FLAG_PROBE`] access unit in; the control task records the host's [`ProbeResult`]
-/// when it lands. Read (and finalized into numbers) by [`NativeClient::probe_result`].
+/// Accumulated state of an in-flight / finished speed test. The data-plane pump mirrors the
+/// session's packet-level receive counters here; the control task finalizes the delivered figure
+/// and folds in the host's [`ProbeResult`] when it lands. Read by [`NativeClient::probe_result`].
+///
+/// Counting at the *packet* level (every delivered wire packet) — not whole reassembled probe AUs —
+/// is what makes the measurement degrade gracefully: once loss exceeds the FEC budget no AU
+/// completes, so the old AU-based count cliffed to zero even though most bytes still arrived.
 #[derive(Default)]
 struct ProbeState {
    /// A probe is in progress (set by `request_probe`, cleared by nothing — the latest one wins).
    active: bool,
-    /// Probe access-unit payload bytes the client received, and their count.
-    recv_bytes: u64,
-    recv_packets: u32,
-    /// First/last probe AU arrival — the measured receive window.
-    start: Option<Instant>,
-    last: Option<Instant>,
-    /// The host's report ([`ProbeResult`]); present once the burst finished.
-    host_bytes: u64,
-    host_packets: u32,
+    /// `session.stats()` receive counters at the burst's start (snapshotted by the pump on its first
+    /// tick while active) and latest, mirrored every pump iteration.
+    base_packets: Option<u64>,
+    base_bytes: Option<u64>,
+    rx_packets_now: u64,
+    rx_bytes_now: u64,
+    /// Delivered wire packets / plaintext bytes (header + shard), frozen when the host's report lands
+    /// (so resumed video after the burst can't inflate them).
+    delivered_packets: u64,
+    delivered_bytes: u64,
+    /// The host's end-of-burst report.
+    host_goodput_bytes: u64,
+    host_au: u32,
+    /// Wire packets the host actually put on the link, and the ones its send buffer dropped.
+    host_wire_packets: u32,
+    host_send_dropped: u32,
+    /// The host's measured burst duration (the throughput denominator).
+    host_duration_ms: u32,
    /// The host's `ProbeResult` arrived → the measurement is final.
    done: bool,
 }
@@ -66,19 +80,27 @@ struct ProbeState {
 pub struct ProbeOutcome {
    /// The host's end-of-burst report has arrived — the numbers below are final.
    pub done: bool,
-    /// Probe payload bytes / packets the client received.
+    /// Delivered wire bytes (header + shard) / packets the client received during the burst.
    pub recv_bytes: u64,
    pub recv_packets: u32,
-    /// Probe payload bytes / packets the host reported sending.
+    /// Application goodput bytes / access units the host offered.
    pub host_bytes: u64,
    pub host_packets: u32,
-    /// The client-measured receive window (first→last probe AU), in milliseconds.
+    /// The burst duration the host measured, in milliseconds (the throughput denominator).
    pub elapsed_ms: u32,
-    /// Measured goodput = `recv_bytes * 8 / elapsed_ms` (kilobits/second). This is the figure to
-    /// drive a [`Hello::bitrate_kbps`] choice from.
+    /// Delivered wire throughput = `recv_bytes * 8 / elapsed_ms` (kilobits/second). The figure to
+    /// drive a [`Hello::bitrate_kbps`] choice from (allow headroom for the FEC overhead + loss).
    pub throughput_kbps: u32,
-    /// Delivery loss = `(host_bytes - recv_bytes) / host_bytes`, as a percentage (0 if unknown).
+    /// Link loss = `(wire_packets_sent − received) / wire_packets_sent`, percent. Packets the host
+    /// put on the wire that didn't arrive.
    pub loss_pct: f32,
+    /// Host-side drop = `send_dropped / (wire_packets_sent + send_dropped)`, percent. Packets the
+    /// host's send buffer couldn't accept (raise `net.core.wmem_max` / lower the rate). Distinct
+    /// from `loss_pct`: this is the host failing to keep up, not the link dropping traffic.
+    pub host_drop_pct: f32,
+    /// Wire packets the host put on the link and the ones its send buffer dropped (raw counts).
+    pub wire_packets_sent: u32,
+    pub send_dropped: u32,
 }

 /// Frames buffered between the data-plane pump and the embedder. Small: the embedder
@@ -224,6 +246,7 @@ impl NativeClient {
        let mode_slot_w = mode_slot.clone();
        let probe_w = probe.clone();
        let frames_dropped_w = frames_dropped.clone();
+        let ctrl_tx_pump = ctrl_tx.clone(); // the data-plane pump sends adaptive-FEC LossReports
        let worker = std::thread::Builder::new()
            .name("punktfunk-client".into())
            .spawn(move || {
@@ -261,6 +284,7 @@ impl NativeClient {
                    mic_rx,
                    rich_input_rx,
                    ctrl_rx,
+                    ctrl_tx: ctrl_tx_pump,
                    ready_tx,
                    shutdown: shutdown_w,
                    mode_slot: mode_slot_w,
@@ -458,30 +482,52 @@ impl NativeClient {
    /// end-of-burst report lands). Derives goodput + loss from the accumulated probe bytes.
    pub fn probe_result(&self) -> ProbeOutcome {
        let p = self.probe.lock().unwrap();
-        let elapsed_ms = match (p.start, p.last) {
-            (Some(s), Some(l)) => l.duration_since(s).as_millis() as u32,
-            _ => 0,
+        // Delivered figures: live (rx_now − base) while the burst runs, frozen at the host's report.
+        let (delivered_packets, delivered_bytes) = if p.done {
+            (p.delivered_packets, p.delivered_bytes)
+        } else {
+            let base_p = p.base_packets.unwrap_or(p.rx_packets_now);
+            let base_b = p.base_bytes.unwrap_or(p.rx_bytes_now);
+            (
+                p.rx_packets_now.saturating_sub(base_p),
+                p.rx_bytes_now.saturating_sub(base_b),
+            )
        };
-        // bytes × 8 / ms = kilobits/second.
-        let throughput_kbps = if elapsed_ms > 0 {
-            (p.recv_bytes.saturating_mul(8) / elapsed_ms as u64) as u32
+        // The host's burst duration is the throughput denominator. bytes × 8 / ms = kilobits/second.
+        let window_ms = p.host_duration_ms;
+        let throughput_kbps = if window_ms > 0 {
+            (delivered_bytes.saturating_mul(8) / window_ms as u64) as u32
        } else {
            0
        };
-        let loss_pct = if p.host_bytes > 0 {
-            p.host_bytes.saturating_sub(p.recv_bytes) as f64 / p.host_bytes as f64 * 100.0
+        // Link loss: wire packets the host put out that didn't arrive. Packet-level, so it degrades
+        // smoothly past the FEC budget instead of cliffing to 100% the moment AUs stop completing.
+        let loss_pct = if p.host_wire_packets > 0 {
+            (p.host_wire_packets as i64 - delivered_packets as i64).max(0) as f64
+                / p.host_wire_packets as f64
+                * 100.0
+        } else {
+            0.0
+        } as f32;
+        // Host-side drop: what the send buffer couldn't even accept (the host-side ceiling).
+        let offered_wire = p.host_wire_packets + p.host_send_dropped;
+        let host_drop_pct = if offered_wire > 0 {
+            p.host_send_dropped as f64 / offered_wire as f64 * 100.0
        } else {
            0.0
        } as f32;
        ProbeOutcome {
            done: p.done,
-            recv_bytes: p.recv_bytes,
-            recv_packets: p.recv_packets,
-            host_bytes: p.host_bytes,
-            host_packets: p.host_packets,
-            elapsed_ms,
+            recv_bytes: delivered_bytes,
+            recv_packets: delivered_packets as u32,
+            host_bytes: p.host_goodput_bytes,
+            host_packets: p.host_au,
+            elapsed_ms: window_ms,
            throughput_kbps,
            loss_pct,
+            host_drop_pct,
+            wire_packets_sent: p.host_wire_packets,
+            send_dropped: p.host_send_dropped,
        }
    }

@@ -586,6 +632,7 @@ struct WorkerArgs {
    mic_rx: tokio::sync::mpsc::UnboundedReceiver<(u32, u64, Vec<u8>)>,
    rich_input_rx: tokio::sync::mpsc::UnboundedReceiver<RichInput>,
    ctrl_rx: tokio::sync::mpsc::UnboundedReceiver<CtrlRequest>,
+    ctrl_tx: tokio::sync::mpsc::UnboundedSender<CtrlRequest>,
    ready_tx: std::sync::mpsc::Sender<Result<Negotiated>>,
    shutdown: Arc<AtomicBool>,
    mode_slot: Arc<std::sync::Mutex<Mode>>,
@@ -615,6 +662,7 @@ async fn worker_main(args: WorkerArgs) {
        mut mic_rx,
        mut rich_input_rx,
        mut ctrl_rx,
+        ctrl_tx,
        ready_tx,
        shutdown,
        mode_slot,
@@ -808,6 +856,7 @@ async fn worker_main(args: WorkerArgs) {
                            CtrlRequest::Mode(m) => Reconfigure { mode: m }.encode(),
                            CtrlRequest::Probe(p) => p.encode(),
                            CtrlRequest::Keyframe => RequestKeyframe.encode(),
+                            CtrlRequest::Loss(r) => r.encode(),
                        };
                        if io::write_msg(&mut ctrl_send, &bytes).await.is_err() {
                            break;
@@ -824,13 +873,24 @@ async fn worker_main(args: WorkerArgs) {
                            }
                        } else if let Ok(result) = ProbeResult::decode(&msg) {
                            let mut p = probe.lock().unwrap();
-                            p.host_bytes = result.bytes_sent;
-                            p.host_packets = result.packets_sent;
+                            // Freeze the delivered figures now (the burst is done), before resumed
+                            // video can inflate the packet counters.
+                            let base_p = p.base_packets.unwrap_or(p.rx_packets_now);
+                            let base_b = p.base_bytes.unwrap_or(p.rx_bytes_now);
+                            p.delivered_packets = p.rx_packets_now.saturating_sub(base_p);
+                            p.delivered_bytes = p.rx_bytes_now.saturating_sub(base_b);
+                            p.host_goodput_bytes = result.bytes_sent;
+                            p.host_au = result.packets_sent;
+                            p.host_wire_packets = result.wire_packets_sent;
+                            p.host_send_dropped = result.send_dropped;
+                            p.host_duration_ms = result.duration_ms;
                            p.done = true;
                            tracing::info!(
-                                bytes_sent = result.bytes_sent,
-                                packets_sent = result.packets_sent,
+                                host_goodput_bytes = result.bytes_sent,
+                                wire_packets_sent = result.wire_packets_sent,
+                                send_dropped = result.send_dropped,
                                duration_ms = result.duration_ms,
+                                delivered_packets = p.delivered_packets,
                                "speed-test probe result"
                            );
                        } else {
@@ -890,23 +950,45 @@ async fn worker_main(args: WorkerArgs) {
    let pump_probe = probe.clone();
    let _ = tokio::task::spawn_blocking(move || {
        pin_thread_user_interactive(); // feeds frame_tx → the client's user-interactive video pump
+                                       // Adaptive-FEC loss reporting: every ADAPT_REPORT_INTERVAL, report the loss observed over the
+                                       // window (shards FEC recovered, plus a bump if any frame went unrecoverable) so the host can
+                                       // size FEC to the link. Suppressed during a speed test (its FLAG_PROBE filler would skew it).
+        const ADAPT_REPORT_INTERVAL: Duration = Duration::from_millis(750);
+        let mut last_report = Instant::now();
+        let (mut last_recovered, mut last_received, mut last_dropped) = (0u64, 0u64, 0u64);
        while !pump_shutdown.load(Ordering::SeqCst) {
            // Mirror the reassembler's unrecoverable-drop count for the client's keyframe-recovery
-            // loop. Updated every iteration (not just on a produced frame) so it stays current through
-            // a total-loss drought where no AU completes. Cheap: a few relaxed atomic loads.
-            frames_dropped.store(session.stats().frames_dropped, Ordering::Relaxed);
+            // loop, and (during a speed test) the packet-level receive counters for the throughput
+            // measurement. Updated every iteration (not just on a produced frame) so they stay current
+            // through a total-loss drought where no AU completes. Cheap: a few relaxed atomic loads.
+            let st = session.stats();
+            frames_dropped.store(st.frames_dropped, Ordering::Relaxed);
+            let probe_active = {
+                let mut p = pump_probe.lock().unwrap();
+                if p.active && !p.done {
+                    p.rx_packets_now = st.packets_received;
+                    p.rx_bytes_now = st.bytes_received;
+                    p.base_packets.get_or_insert(st.packets_received);
+                    p.base_bytes.get_or_insert(st.bytes_received);
+                }
+                p.active && !p.done
+            };
+            if !probe_active && last_report.elapsed() >= ADAPT_REPORT_INTERVAL {
+                let loss_ppm = window_loss_ppm(
+                    st.fec_recovered_shards.wrapping_sub(last_recovered),
+                    st.packets_received.wrapping_sub(last_received),
+                    st.frames_dropped.wrapping_sub(last_dropped),
+                );
+                let _ = ctrl_tx.send(CtrlRequest::Loss(LossReport { loss_ppm }));
+                last_report = Instant::now();
+                last_recovered = st.fec_recovered_shards;
+                last_received = st.packets_received;
+                last_dropped = st.frames_dropped;
+            }
            match session.poll_frame() {
                Ok(frame) => {
                    if frame.flags & FLAG_PROBE as u32 != 0 {
-                        let mut p = pump_probe.lock().unwrap();
-                        if p.active {
-                            let now = Instant::now();
-                            p.start.get_or_insert(now);
-                            p.last = Some(now);
-                            p.recv_bytes += frame.data.len() as u64;
-                            p.recv_packets += 1;
-                        }
-                        continue; // not video — never enqueue for the decoder
+                        continue; // speed-test filler, not video — measured via the counters above
                    }
                    let _ = frame_tx.try_send(frame);
                }
@@ -96,6 +96,18 @@ impl Packetizer {
        }
    }

+    /// Live-adjust the FEC recovery percentage (adaptive FEC). Takes effect on the next
+    /// [`packetize`](Self::packetize); the wire is self-describing (each packet carries its block's
+    /// data/recovery counts), so the receiver needs no notification. Clamped to ≤ 90.
+    pub fn set_fec_percent(&mut self, pct: u8) {
+        self.fec.fec_percent = pct.min(90);
+    }
+
+    /// The current FEC recovery percentage.
+    pub fn fec_percent(&self) -> u8 {
+        self.fec.fec_percent
+    }
+
    /// Packetize one access unit into wire packets (header + shard payload each).
    pub fn packetize(
        &mut self,
@@ -167,6 +167,18 @@ pub struct Reconfigured {
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
 pub struct RequestKeyframe;

+/// `client → host`, periodic: the client's observed data-plane loss, so the host can size FEC to
+/// the link instead of a flat percentage (adaptive FEC). `loss_ppm` is parts-per-million of shards
+/// that arrived missing-but-recovered (plus a bump when frames went unrecoverable) over the report
+/// window — i.e. the loss FEC is currently absorbing. The host maps it to a recovery percentage,
+/// clamped to a sane band, and applies it live; a clean link decays toward the floor (fewer packets,
+/// which directly helps a packet-rate-bound uplink like the Steam Deck's WiFi tx). Fire-and-forget.
+/// A host that predates this ignores it (unknown control message) and keeps its static FEC.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub struct LossReport {
+    pub loss_ppm: u32,
+}
+
 /// `client → host`, any time after [`Start`]: run a bandwidth speed test. The host bursts
 /// filler access units (flagged [`crate::packet::FLAG_PROBE`]) over the data plane at
 /// `target_kbps` of application goodput for `duration_ms`, *pausing video for the duration*, then
@@ -181,17 +193,30 @@ pub struct ProbeRequest {
    pub duration_ms: u32,
 }

-/// `host → client`: the probe burst is finished. Reports what the host actually sent so the
-/// client can compute delivery ratio (loss) = `received / bytes_sent` and throughput =
-/// `received_bytes * 8 / elapsed`.
+/// `host → client`: the probe burst is finished. Reports what the host actually put on the wire so
+/// the client can split the two failure modes apart: **host-side** drops (the send buffer couldn't
+/// keep up — raise `net.core.wmem_max`) vs **link** loss (wire packets the air dropped). The client
+/// measures delivered wire packets itself and computes:
+///
+/// - link loss   = `(wire_packets_sent − received) / wire_packets_sent`
+/// - host drop   = `send_dropped / (wire_packets_sent + send_dropped)`
+/// - throughput  = `received_wire_bytes * 8 / duration_ms`
+///
+/// Counting delivered traffic at the *packet* level (not whole reassembled AUs) makes the figure
+/// degrade gracefully past the FEC budget instead of cliffing to zero.
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
 pub struct ProbeResult {
-    /// Total access-unit payload bytes the host emitted for the probe.
+    /// Total access-unit payload bytes the host emitted for the probe (application goodput offered).
    pub bytes_sent: u64,
    /// Number of probe access units the host emitted.
    pub packets_sent: u32,
    /// The burst's actual duration in milliseconds (the host clamps/measures the request).
    pub duration_ms: u32,
+    /// Wire packets the kernel ACCEPTED for transmission — what actually went on the link (offered
+    /// minus the send-buffer drops below). `0` from a pre-wire-stats host (back-compat decode).
+    pub wire_packets_sent: u32,
+    /// Wire packets the host could NOT hand to the kernel (send buffer full): the host-side ceiling.
+    pub send_dropped: u32,
 }

 /// `client → host`, right after [`Start`]: one round of the wall-clock skew handshake. The client
@@ -238,6 +263,8 @@ pub const MSG_RECONFIGURE: u8 = 0x01;
 pub const MSG_RECONFIGURED: u8 = 0x02;
 /// Type byte of [`RequestKeyframe`].
 pub const MSG_REQUEST_KEYFRAME: u8 = 0x03;
+/// Type byte of [`LossReport`].
+pub const MSG_LOSS_REPORT: u8 = 0x04;
 /// Type byte of [`ProbeRequest`].
 pub const MSG_PROBE_REQUEST: u8 = 0x20;
 /// Type byte of [`ProbeResult`].
@@ -808,6 +835,43 @@ impl RequestKeyframe {
    }
 }

+impl LossReport {
+    pub fn encode(&self) -> Vec<u8> {
+        // magic[0..4] type[4] loss_ppm[5..9]
+        let mut b = Vec::with_capacity(9);
+        b.extend_from_slice(CTL_MAGIC);
+        b.push(MSG_LOSS_REPORT);
+        b.extend_from_slice(&self.loss_ppm.to_le_bytes());
+        b
+    }
+
+    pub fn decode(b: &[u8]) -> Result<LossReport> {
+        if b.len() != 9 || &b[0..4] != CTL_MAGIC || b[4] != MSG_LOSS_REPORT {
+            return Err(PunktfunkError::InvalidArg("bad LossReport"));
+        }
+        Ok(LossReport {
+            loss_ppm: u32::from_le_bytes(b[5..9].try_into().unwrap()),
+        })
+    }
+}
+
+/// Compute a [`LossReport`] `loss_ppm` from one window's session-stat deltas: shards FEC recovered
+/// (the loss it absorbed), shards received, and frames that went unrecoverable. Loss ≈ recovered /
+/// (received + recovered) — the fraction of shards that arrived missing. A frame drop means loss
+/// exceeded the current FEC budget (so `recovered` plateaus), so add a fixed bump to push the host's
+/// FEC up past the cap on the next adjustment. Returns parts-per-million, capped at 1e6.
+pub fn window_loss_ppm(recovered: u64, received: u64, frames_dropped: u64) -> u32 {
+    let denom = received.saturating_add(recovered);
+    let mut ppm = recovered
+        .saturating_mul(1_000_000)
+        .checked_div(denom)
+        .unwrap_or(0) as u32;
+    if frames_dropped > 0 {
+        ppm = ppm.saturating_add(50_000); // +5%: unrecoverable loss → raise FEC past the current cap
+    }
+    ppm.min(1_000_000)
+}
+
 impl ProbeRequest {
    pub fn encode(&self) -> Vec<u8> {
        // magic[0..4] type[4] target_kbps[5..9] duration_ms[9..13]
@@ -834,23 +898,36 @@ impl ProbeRequest {
 impl ProbeResult {
    pub fn encode(&self) -> Vec<u8> {
        // magic[0..4] type[4] bytes_sent[5..13] packets_sent[13..17] duration_ms[17..21]
-        let mut b = Vec::with_capacity(21);
+        // wire_packets_sent[21..25] send_dropped[25..29]
+        let mut b = Vec::with_capacity(29);
        b.extend_from_slice(CTL_MAGIC);
        b.push(MSG_PROBE_RESULT);
        b.extend_from_slice(&self.bytes_sent.to_le_bytes());
        b.extend_from_slice(&self.packets_sent.to_le_bytes());
        b.extend_from_slice(&self.duration_ms.to_le_bytes());
+        b.extend_from_slice(&self.wire_packets_sent.to_le_bytes());
+        b.extend_from_slice(&self.send_dropped.to_le_bytes());
        b
    }

    pub fn decode(b: &[u8]) -> Result<ProbeResult> {
-        if b.len() != 21 || &b[0..4] != CTL_MAGIC || b[4] != MSG_PROBE_RESULT {
+        // Back-compat: 21 bytes (pre-wire-stats host, new fields default 0) or 29 bytes (with the
+        // wire_packets_sent + send_dropped tail). Accept either; reject anything shorter/garbled.
+        if b.len() < 21 || &b[0..4] != CTL_MAGIC || b[4] != MSG_PROBE_RESULT {
            return Err(PunktfunkError::InvalidArg("bad ProbeResult"));
        }
+        let u32at = |o: usize| u32::from_le_bytes([b[o], b[o + 1], b[o + 2], b[o + 3]]);
+        let (wire_packets_sent, send_dropped) = if b.len() >= 29 {
+            (u32at(21), u32at(25))
+        } else {
+            (0, 0)
+        };
        Ok(ProbeResult {
            bytes_sent: u64::from_le_bytes(b[5..13].try_into().unwrap()),
-            packets_sent: u32::from_le_bytes(b[13..17].try_into().unwrap()),
-            duration_ms: u32::from_le_bytes(b[17..21].try_into().unwrap()),
+            packets_sent: u32at(13),
+            duration_ms: u32at(17),
+            wire_packets_sent,
+            send_dropped,
        })
    }
 }
@@ -1851,6 +1928,35 @@ mod tests {
        assert!(RequestKeyframe::decode(&[bytes.as_slice(), &[0]].concat()).is_err());
    }

+    #[test]
+    fn loss_report_roundtrip() {
+        for loss_ppm in [0u32, 1, 12_345, 50_000, 1_000_000] {
+            let r = LossReport { loss_ppm };
+            assert_eq!(LossReport::decode(&r.encode()).unwrap(), r);
+        }
+        // Disjoint from the other control messages (type byte + length).
+        assert!(LossReport::decode(&RequestKeyframe.encode()).is_err());
+        assert!(RequestKeyframe::decode(&LossReport { loss_ppm: 0 }.encode()).is_err());
+        assert!(LossReport::decode(
+            &[LossReport { loss_ppm: 0 }.encode().as_slice(), &[0]].concat()
+        )
+        .is_err());
+    }
+
+    #[test]
+    fn window_loss_ppm_estimates_and_caps() {
+        // No traffic → 0. A clean window (nothing recovered) → 0.
+        assert_eq!(window_loss_ppm(0, 0, 0), 0);
+        assert_eq!(window_loss_ppm(0, 1000, 0), 0);
+        // 50 recovered of 1000 total (950 received + 50 recovered) = 5%.
+        assert_eq!(window_loss_ppm(50, 950, 0), 50_000);
+        // An unrecoverable frame adds the +5% bump (push FEC past the current cap).
+        assert_eq!(window_loss_ppm(50, 950, 1), 100_000);
+        // A total-loss window with a drop but nothing received still reports the bump, capped at 1e6.
+        assert_eq!(window_loss_ppm(0, 0, 3), 50_000);
+        assert!(window_loss_ppm(u64::MAX, 1, 9) <= 1_000_000);
+    }
+
    #[test]
    fn probe_messages_roundtrip() {
        let req = ProbeRequest {
@@ -1862,8 +1968,20 @@ mod tests {
            bytes_sent: 62_500_000,
            packets_sent: 480,
            duration_ms: 2003,
+            wire_packets_sent: 41_000,
+            send_dropped: 1_200,
        };
        assert_eq!(ProbeResult::decode(&res.encode()).unwrap(), res);
+        assert_eq!(res.encode().len(), 29);
+        // A pre-wire-stats host's 21-byte ProbeResult still decodes, with the new fields zeroed.
+        let legacy = {
+            let full = res.encode();
+            full[..21].to_vec()
+        };
+        let decoded = ProbeResult::decode(&legacy).unwrap();
+        assert_eq!(decoded.wire_packets_sent, 0);
+        assert_eq!(decoded.send_dropped, 0);
+        assert_eq!(decoded.bytes_sent, res.bytes_sent);
        // Type bytes keep the control messages disjoint from each other.
        assert!(ProbeRequest::decode(&res.encode()).is_err());
        assert!(Reconfigure::decode(&req.encode()).is_err());
@@ -201,6 +201,18 @@ impl Session {
        r.map(|_| ())
    }

+    /// Host: live-adjust the FEC recovery percentage (adaptive FEC). Affects the next
+    /// [`submit_frame`](Self::submit_frame)/[`seal_frame`](Self::seal_frame); the receiver needs no
+    /// notification (each packet's header carries its block's data/recovery shard counts).
+    pub fn set_fec_percent(&mut self, pct: u8) {
+        self.packetizer.set_fec_percent(pct);
+    }
+
+    /// The current FEC recovery percentage (host side).
+    pub fn fec_percent(&self) -> u8 {
+        self.packetizer.fec_percent()
+    }
+
    /// Host: drain one pending input event from the client, if any.
    pub fn poll_input(&mut self) -> Result<Option<InputEvent>> {
        if self.config.role != Role::Host {
@@ -2,9 +2,11 @@
 //! directly — no async runtime is involved.

 mod loopback;
+mod qos;
 mod udp;

 pub use loopback::{loopback_pair, LoopbackTransport};
+pub use qos::{grow_socket_buffers, set_media_qos, MediaClass};
 /// Windows-only: reusable USO (UDP Send Offload) batch send for callers that own their own connected
 /// socket (the GameStream video sender) rather than going through [`UdpTransport`].
 #[cfg(target_os = "windows")]
@@ -0,0 +1,145 @@
+//! Shared UDP socket tuning for the media planes: send/recv buffer growth + best-effort link-layer
+//! QoS.
+//!
+//! [`grow_socket_buffers`] is the `SO_SNDBUF`/`SO_RCVBUF` growth the native data plane applies; the
+//! GameStream video/audio sockets reuse it so they don't go ENOBUFS-bound at high bitrate.
+//!
+//! [`set_media_qos`] DSCP-tags the latency-sensitive video/audio traffic (+ Linux `SO_PRIORITY`) so a
+//! QoS-aware path (Wi-Fi WMM access categories, a managed switch, a shaped uplink) can prioritize it
+//! over bulk flows. Mirrors what Apollo/Sunshine tag — DSCP **CS5** for video, **CS6** for audio. It
+//! is **opt-in** (`PUNKTFUNK_DSCP=1`): DSCP can interact badly with some consumer ISPs/routers, and on
+//! Windows a plain `IP_TOS` is silently stripped unless a qWAVE policy is active (Apollo uses the
+//! qWAVE API there — that port is a follow-up; today this is a no-op on the wire on Windows).
+
+use std::net::UdpSocket;
+
+/// Target kernel socket-buffer size (`SO_SNDBUF`/`SO_RCVBUF`). A high-resolution frame is a burst (a
+/// 5120×1440 keyframe is ~130 packets the send thread hands to `sendmmsg` at once); the default UDP
+/// buffer (~208 KB on Linux) overflows on it, which EAGAINs the host send (dropping packets) or drops
+/// on the client recv — and with infinite-GOP a single lost frame freezes the decode until the next
+/// RFI refresh. Requested large; the OS clamps to `net.core.{wmem,rmem}_max` (Linux) /
+/// `kern.ipc.maxsockbuf` (macOS).
+///
+/// Sized for 1 Gbps+: at ~1.2 Gbps on the wire an 8 MB buffer is only ~49 ms of steady state, and a
+/// single multi-MB IDR keyframe (~4 MB ≈ 3300 packets) instantly fills most of it. 32 MB gives ~200 ms
+/// of headroom and absorbs a keyframe burst without EAGAIN/ENOBUFS drops. (Paced sending —
+/// `punktfunk1.rs::paced_submit` — spreads a big frame's overflow, so this buffer mostly absorbs the
+/// immediate microburst rather than a whole unpaced frame.)
+pub(crate) const TARGET_SOCKBUF: usize = 32 * 1024 * 1024;
+
+/// Best-effort grow of `SO_SNDBUF`/`SO_RCVBUF` to [`TARGET_SOCKBUF`]. A failure isn't fatal (the
+/// stream just runs lossier); a grant far below the request means the OS cap is too low for clean
+/// 4K/5K streaming, so warn with the knob to raise.
+pub fn grow_socket_buffers(socket: &UdpSocket) {
+    let sock = socket2::SockRef::from(socket);
+    let _ = sock.set_send_buffer_size(TARGET_SOCKBUF);
+    let _ = sock.set_recv_buffer_size(TARGET_SOCKBUF);
+    // The kernel reports back the (possibly clamped, Linux-doubled) granted size.
+    let granted = sock
+        .send_buffer_size()
+        .unwrap_or(0)
+        .min(sock.recv_buffer_size().unwrap_or(0));
+    if granted < TARGET_SOCKBUF / 4 {
+        tracing::warn!(
+            granted_kb = granted / 1024,
+            "UDP socket buffer capped well below target — high-resolution streaming may drop \
+             frames; raise net.core.wmem_max / net.core.rmem_max (Linux) for clean 4K/5K"
+        );
+    }
+}
+
+/// Media class of a socket — selects the DSCP code point (and Linux `SO_PRIORITY`), matching Apollo's
+/// mapping: video = CS5, audio = CS6.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum MediaClass {
+    Video,
+    Audio,
+}
+
+impl MediaClass {
+    /// DSCP code point (the high 6 bits of the IPv4 TOS / IPv6 traffic-class byte).
+    const fn dscp(self) -> u32 {
+        match self {
+            MediaClass::Video => 40, // CS5
+            MediaClass::Audio => 48, // CS6
+        }
+    }
+}
+
+/// Whether DSCP/QoS marking is enabled (`PUNKTFUNK_DSCP=1`). Off by default.
+pub(crate) fn dscp_enabled() -> bool {
+    matches!(
+        std::env::var("PUNKTFUNK_DSCP").as_deref(),
+        Ok("1") | Ok("true") | Ok("on")
+    )
+}
+
+/// Best-effort: tag `socket`'s outgoing packets for prioritized delivery of its media class. A no-op
+/// unless `PUNKTFUNK_DSCP=1`. Every step is best-effort (failures logged at debug, never fatal) — QoS
+/// is a nicety, not required for correctness.
+///
+/// IPv4 only (all current media sockets bind `0.0.0.0`); a v6 socket simply isn't tagged. On Windows
+/// the `IP_TOS` set succeeds but the OS doesn't tag the wire without a qWAVE policy (follow-up).
+pub fn set_media_qos(socket: &UdpSocket, class: MediaClass) {
+    if dscp_enabled() {
+        apply_media_qos(socket, class);
+    }
+}
+
+/// The unconditional QoS application, factored out of [`set_media_qos`] so it is directly testable
+/// without touching the process-global `PUNKTFUNK_DSCP` env. Best-effort (every step logs-and-continues).
+fn apply_media_qos(socket: &UdpSocket, class: MediaClass) {
+    let sock = socket2::SockRef::from(socket);
+    // DSCP occupies the high 6 bits of the TOS byte → shift left 2.
+    if let Err(e) = sock.set_tos_v4(class.dscp() << 2) {
+        tracing::debug!(error = %e, ?class, "set IP_TOS (DSCP) failed — QoS marking skipped");
+    }
+    // SO_PRIORITY must be set AFTER IP_TOS (setting TOS resets SO_PRIORITY to 0 on Linux). Linux-only;
+    // 6 is the highest priority allowed without CAP_NET_ADMIN, so video=5 / audio=6 (Apollo's scheme).
+    #[cfg(target_os = "linux")]
+    {
+        let prio = match class {
+            MediaClass::Video => 5,
+            MediaClass::Audio => 6,
+        };
+        if let Err(e) = sock.set_priority(prio) {
+            tracing::debug!(error = %e, "set SO_PRIORITY failed");
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn dscp_code_points_match_apollo() {
+        // CS5 video / CS6 audio, shifted into the TOS byte (high 6 bits).
+        assert_eq!(MediaClass::Video.dscp(), 40);
+        assert_eq!(MediaClass::Audio.dscp(), 48);
+        assert_eq!(MediaClass::Video.dscp() << 2, 0xA0);
+        assert_eq!(MediaClass::Audio.dscp() << 2, 0xC0);
+    }
+
+    #[test]
+    fn qos_and_buffer_growth_are_best_effort_and_never_panic() {
+        let sock = UdpSocket::bind("127.0.0.1:0").unwrap();
+        // No PUNKTFUNK_DSCP in the test env → early return; must not panic regardless.
+        set_media_qos(&sock, MediaClass::Video);
+        set_media_qos(&sock, MediaClass::Audio);
+        grow_socket_buffers(&sock);
+    }
+
+    #[test]
+    fn apply_qos_tags_the_socket() {
+        // Exercise the enabled path directly (no env), and read the options back where we can.
+        let sock = UdpSocket::bind("127.0.0.1:0").unwrap();
+        apply_media_qos(&sock, MediaClass::Video);
+        #[cfg(target_os = "linux")]
+        {
+            let s = socket2::SockRef::from(&sock);
+            assert_eq!(s.tos_v4().unwrap(), 0xA0, "video → CS5 in the TOS byte");
+            assert_eq!(s.priority().unwrap(), 5, "video → SO_PRIORITY 5");
+        }
+    }
+}
@@ -413,26 +413,15 @@ pub struct UdpTransport {
 }

 impl UdpTransport {
-    /// Target kernel socket-buffer size. A high-resolution frame is a burst (a 5120×1440
-    /// keyframe is ~130 packets the send thread hands to `sendmmsg` at once); the default
-    /// UDP buffer (~208 KB on Linux) overflows on it, which EAGAINs the host send (dropping
-    /// packets) or drops on the client recv — and with infinite-GOP a single lost frame
-    /// freezes the decode until the next RFI refresh. Requested large; the OS clamps to
-    /// `net.core.{wmem,rmem}_max` (Linux) / `kern.ipc.maxsockbuf` (macOS).
-    ///
-    /// Sized for 1 Gbps+: at ~1.2 Gbps on the wire an 8 MB buffer is only ~49 ms of steady state,
-    /// and a single multi-MB IDR keyframe (~4 MB ≈ 3300 packets) instantly fills most of it. 32 MB
-    /// gives ~200 ms of headroom and absorbs a keyframe burst without EAGAIN drops. (Paced sending
-    /// — `punktfunk1.rs::paced_submit` — now spreads a big frame's overflow, so this buffer mostly absorbs
-    /// the immediate microburst rather than a whole unpaced frame.)
-    const TARGET_SOCKBUF: usize = 32 * 1024 * 1024;
-
    /// Bind `local` and `connect` to `peer`, so `send`/`recv` need no address and the
    /// kernel filters to this peer. Non-blocking, matching the [`Transport`] contract.
    pub fn connect(local: &str, peer: &str) -> std::io::Result<Self> {
        let socket = UdpSocket::bind(local)?;
        socket.connect(peer)?;
-        Self::grow_buffers(&socket);
+        super::qos::grow_socket_buffers(&socket);
+        // The native data plane is video-dominant — tag it as the video class (opt-in via
+        // PUNKTFUNK_DSCP). Each end marks its own egress.
+        super::qos::set_media_qos(&socket, super::qos::MediaClass::Video);
        socket.set_nonblocking(true)?;
        Ok(UdpTransport { socket })
    }
@@ -481,7 +470,8 @@ impl UdpTransport {
        let target = observed.map(|s| s.to_string());
        socket.connect(target.as_deref().unwrap_or(fallback_peer))?;
        socket.set_read_timeout(None)?;
-        Self::grow_buffers(&socket);
+        super::qos::grow_socket_buffers(&socket);
+        super::qos::set_media_qos(&socket, super::qos::MediaClass::Video);
        socket.set_nonblocking(true)?;
        Ok((UdpTransport { socket }, punched))
    }
@@ -498,27 +488,6 @@ impl UdpTransport {
        self.socket.local_addr()
    }

-    /// Best-effort grow of SO_SNDBUF/SO_RCVBUF (see [`TARGET_SOCKBUF`]). A failure isn't fatal
-    /// (the stream just runs lossier); a grant far below the request means the OS cap is too
-    /// low for clean 4K/5K streaming, so warn once with the knob to raise.
-    fn grow_buffers(socket: &UdpSocket) {
-        let sock = socket2::SockRef::from(socket);
-        let _ = sock.set_send_buffer_size(Self::TARGET_SOCKBUF);
-        let _ = sock.set_recv_buffer_size(Self::TARGET_SOCKBUF);
-        // The kernel reports back the (possibly clamped, Linux-doubled) granted size.
-        let granted = sock
-            .send_buffer_size()
-            .unwrap_or(0)
-            .min(sock.recv_buffer_size().unwrap_or(0));
-        if granted < Self::TARGET_SOCKBUF / 4 {
-            tracing::warn!(
-                granted_kb = granted / 1024,
-                "UDP socket buffer capped well below target — high-resolution streaming may drop \
-                 frames; raise net.core.wmem_max / net.core.rmem_max (Linux) for clean 4K/5K"
-            );
-        }
-    }
-
    /// Apple batched receive via `recvmsg_x` — drains up to `out.len()` datagrams in one syscall into
    /// the caller's reused buffers (the recv counterpart of Linux `recvmmsg`, which Darwin lacks).
    /// SAFETY: each `MsghdrX` holds a raw pointer into `iovs`, which holds raw pointers into `out`'s
@@ -57,6 +57,16 @@ pub trait Encoder: Send {
    /// Force the next submitted frame to be an IDR keyframe (e.g. after a client
    /// reference-frame-invalidation request). Default: no-op.
    fn request_keyframe(&mut self) {}
+    /// Invalidate a contiguous range of previously-encoded reference frames (client frame numbers,
+    /// as reported in a loss-recovery request) so the encoder re-references an older still-valid
+    /// frame instead of emitting a full IDR. Returns `true` if a real reference invalidation was
+    /// performed; `false` means the encoder couldn't (range older than the DPB, or the backend has
+    /// no RFI) and the caller should fall back to [`request_keyframe`](Self::request_keyframe).
+    /// Default: `false` — only the Windows direct-NVENC path implements true RFI; libavcodec
+    /// (Linux NVENC) and VAAPI can't express `nvEncInvalidateRefFrames`, so they keyframe.
+    fn invalidate_ref_frames(&mut self, _first_frame: i64, _last_frame: i64) -> bool {
+        false
+    }
    /// Pull the next encoded AU if one is ready.
    fn poll(&mut self) -> Result<Option<EncodedFrame>>;
    /// Signal end-of-stream. After this, drain the remaining AUs with [`poll`](Self::poll)
@@ -30,6 +30,11 @@ use nvidia_video_codec_sdk::ENCODE_API as API;
 // GPU-saturating game; this must be ≥ the helper's `PUNKTFUNK_ENCODE_DEPTH` (default 4, clamped ≤ 6).
 const POOL: usize = 8;

+/// Reference-frame DPB depth when RFI is supported (Apollo uses 5 for H.264/HEVC). A deeper DPB
+/// lets an invalidated reference fall back to an older still-valid frame instead of a full IDR;
+/// `numRefL0 = 1` keeps each P-frame single-reference for low latency.
+const RFI_DPB: u32 = 5;
+
 fn codec_guid(codec: Codec) -> nv::GUID {
    match codec {
        Codec::H264 => nv::NV_ENC_CODEC_H264_GUID,
@@ -40,6 +45,7 @@ fn codec_guid(codec: Codec) -> nv::GUID {

 pub struct NvencD3d11Encoder {
    encoder: *mut c_void,
+    codec: Codec,
    codec_guid: nv::GUID,
    width: u32,
    height: u32,
@@ -63,6 +69,14 @@ pub struct NvencD3d11Encoder {
    frame_idx: i64,
    force_kf: bool,
    inited: bool,
+    /// GPU capabilities probed once via `nvEncGetEncodeCaps` before configuring (Apollo's
+    /// `get_encoder_cap`): gates 10-bit/custom-VBV/RFI on what this card actually supports instead
+    /// of failing later as an opaque `InvalidParam`. Set by [`query_caps`](Self::query_caps).
+    rfi_supported: bool,
+    custom_vbv: bool,
+    /// The last reference-frame range we invalidated — dedupes repeated RFI requests for the same
+    /// loss event (the client resends until it sees recovery).
+    last_rfi_range: Option<(i64, i64)>,
    /// Raw ptr of the D3D11 device this session was initialized with. The capturer recreates the
    /// device on a desktop switch (normal ↔ Winlogon secure); when a frame carries a new device we
    /// tear down and re-init NVENC against it.
@@ -84,6 +98,7 @@ impl NvencD3d11Encoder {
    ) -> Result<Self> {
        Ok(Self {
            encoder: ptr::null_mut(),
+            codec,
            codec_guid: codec_guid(codec),
            width,
            height,
@@ -99,6 +114,9 @@ impl NvencD3d11Encoder {
            frame_idx: 0,
            force_kf: false,
            inited: false,
+            rfi_supported: false,
+            custom_vbv: false,
+            last_rfi_range: None,
            init_device: ptr::null_mut(),
        })
    }
@@ -128,6 +146,88 @@ impl NvencD3d11Encoder {
        self.encoder = ptr::null_mut();
        self.inited = false;
        self.next = 0;
+        // The new session starts with an empty DPB (its first frame is an IDR), so any prior
+        // invalidation range is meaningless against it.
+        self.last_rfi_range = None;
+    }
+
+    /// Query one `NV_ENC_CAPS` value for this codec on an open session; 0 on any error (treat an
+    /// unqueryable cap as "unsupported", the conservative choice).
+    unsafe fn get_cap(&self, enc: *mut c_void, which: nv::NV_ENC_CAPS) -> i32 {
+        let mut param = nv::NV_ENC_CAPS_PARAM {
+            version: nv::NV_ENC_CAPS_PARAM_VER,
+            capsToQuery: which,
+            reserved: [0; 62],
+        };
+        let mut val: i32 = 0;
+        match (API.get_encode_caps)(enc, self.codec_guid, &mut param, &mut val)
+            .result_without_string()
+        {
+            Ok(()) => val,
+            Err(_) => 0,
+        }
+    }
+
+    /// Probe this GPU's real capabilities once (Apollo's `get_encoder_cap`) before the bitrate-probe
+    /// loop configures the session: opens a throwaway session, queries the codec's max dimensions +
+    /// 10-bit / custom-VBV / ref-pic-invalidation support, destroys it. Rejects an out-of-range mode
+    /// up front with a clear error, downgrades 10-bit→8-bit when unsupported, and records the
+    /// RFI/custom-VBV flags the config + [`invalidate_ref_frames`](Encoder::invalidate_ref_frames)
+    /// gate on. Without this, an unsupported config surfaces only as an opaque `InvalidParam` that
+    /// the bitrate-clamp search misreads as "bitrate too high" and binary-searches into the floor.
+    unsafe fn query_caps(&mut self, device: &ID3D11Device) -> Result<()> {
+        let mut params = nv::NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS {
+            version: nv::NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER,
+            deviceType: nv::NV_ENC_DEVICE_TYPE::NV_ENC_DEVICE_TYPE_DIRECTX,
+            device: device.as_raw(),
+            apiVersion: nv::NVENCAPI_VERSION,
+            ..Default::default()
+        };
+        let mut enc: *mut c_void = ptr::null_mut();
+        (API.open_encode_session_ex)(&mut params, &mut enc)
+            .result_without_string()
+            .map_err(|e| {
+                anyhow!("NVENC open_encode_session_ex (caps probe): {e:?} (no NVIDIA GPU?)")
+            })?;
+        let wmax = self.get_cap(enc, nv::NV_ENC_CAPS::NV_ENC_CAPS_WIDTH_MAX);
+        let hmax = self.get_cap(enc, nv::NV_ENC_CAPS::NV_ENC_CAPS_HEIGHT_MAX);
+        let ten_bit = self.get_cap(enc, nv::NV_ENC_CAPS::NV_ENC_CAPS_SUPPORT_10BIT_ENCODE);
+        let rfi = self.get_cap(
+            enc,
+            nv::NV_ENC_CAPS::NV_ENC_CAPS_SUPPORT_REF_PIC_INVALIDATION,
+        );
+        let custom_vbv = self.get_cap(
+            enc,
+            nv::NV_ENC_CAPS::NV_ENC_CAPS_SUPPORT_CUSTOM_VBV_BUF_SIZE,
+        );
+        let _ = (API.destroy_encoder)(enc);
+
+        // Reject an over-range mode with a clear message instead of an opaque InvalidParam.
+        if wmax > 0 && hmax > 0 && (self.width as i32 > wmax || self.height as i32 > hmax) {
+            bail!(
+                "this GPU's NVENC max encode size for {:?} is {wmax}x{hmax}; client requested \
+                 {}x{} (lower the client resolution or use a codec/GPU that supports it)",
+                self.codec,
+                self.width,
+                self.height
+            );
+        }
+        // Degrade gracefully rather than fail: no 10-bit encode on this card → 8-bit SDR.
+        if self.bit_depth >= 10 && ten_bit == 0 {
+            tracing::warn!("NVENC: this GPU can't 10-bit encode — falling back to 8-bit SDR");
+            self.bit_depth = 8;
+            self.hdr = false;
+        }
+        self.rfi_supported = rfi != 0;
+        self.custom_vbv = custom_vbv != 0;
+        tracing::info!(
+            rfi = self.rfi_supported,
+            custom_vbv = self.custom_vbv,
+            max = %format!("{wmax}x{hmax}"),
+            ten_bit = ten_bit != 0,
+            "NVENC capabilities probed"
+        );
+        Ok(())
    }

    /// Open + configure + initialize ONE NVENC session at `bitrate` (bps) and `split_mode`. Returns
@@ -181,10 +281,13 @@ impl NvencD3d11Encoder {
        let bps = bitrate.min(u32::MAX as u64) as u32;
        cfg.rcParams.averageBitRate = bps;
        cfg.rcParams.maxBitRate = bps;
-        // Shrink the VBV with the bitrate — NVENC validates it against the same level ceiling.
-        let vbv = (bitrate as f64 / self.fps.max(1) as f64) as u32;
-        cfg.rcParams.vbvBufferSize = vbv;
-        cfg.rcParams.vbvInitialDelay = vbv;
+        // Shrink the VBV with the bitrate — NVENC validates it against the same level ceiling. Only
+        // when the GPU advertises custom-VBV support (else leave the preset default, per the caps probe).
+        if self.custom_vbv {
+            let vbv = (bitrate as f64 / self.fps.max(1) as f64) as u32;
+            cfg.rcParams.vbvBufferSize = vbv;
+            cfg.rcParams.vbvInitialDelay = vbv;
+        }

        // HIGH tier + autoselect level. The codec's PER-LEVEL bitrate ceiling is otherwise the
        // MAIN-tier cap — for HEVC at 5K that's Level 6.2 Main ≈ 240 Mbps. HIGH tier lifts the HEVC
@@ -212,6 +315,27 @@ impl NvencD3d11Encoder {
            vui.colourMatrix = nv::NV_ENC_VUI_MATRIX_COEFFS::NV_ENC_VUI_MATRIX_COEFFS_BT2020_NCL;
        }

+        // Reference-frame invalidation: keep a deeper DPB so an invalidated reference can fall back
+        // to an older still-valid frame instead of a full IDR, while `numRefL0 = 1` keeps each
+        // P-frame single-reference for low latency. Only when this GPU supports RFI (else leave the
+        // preset default — `invalidate_ref_frames` then returns false and the caller forces an IDR).
+        if self.rfi_supported {
+            let one = nv::NV_ENC_NUM_REF_FRAMES::NV_ENC_NUM_REF_FRAMES_1;
+            match self.codec {
+                Codec::H264 => {
+                    cfg.encodeCodecConfig.h264Config.maxNumRefFrames = RFI_DPB;
+                    cfg.encodeCodecConfig.h264Config.numRefL0 = one;
+                }
+                Codec::H265 => {
+                    cfg.encodeCodecConfig.hevcConfig.maxNumRefFramesInDPB = RFI_DPB;
+                    cfg.encodeCodecConfig.hevcConfig.numRefL0 = one;
+                }
+                Codec::Av1 => {
+                    cfg.encodeCodecConfig.av1Config.maxNumRefFramesInDPB = RFI_DPB;
+                }
+            }
+        }
+
        let mut init = nv::NV_ENC_INITIALIZE_PARAMS {
            version: nv::NV_ENC_INITIALIZE_PARAMS_VER,
            encodeGUID: self.codec_guid,
@@ -242,6 +366,10 @@ impl NvencD3d11Encoder {
    /// Lazily create the session on the first frame's D3D11 device (so capture + encode share it).
    fn init_session(&mut self, device: &ID3D11Device) -> Result<()> {
        unsafe {
+            // Probe real GPU caps first (max dims / 10-bit / custom-VBV / RFI) so the config below is
+            // gated on what this card supports and an out-of-range mode fails with a clear error
+            // rather than being misread as a too-high bitrate by the clamp search.
+            self.query_caps(device)?;
            // Bitrate clamp (see the search below): NVENC rejects `initialize_encoder` when the bitrate
            // exceeds the GPU's max codec level. We try the requested rate, then binary-search down to
            // the MAX the level accepts and clamp to it — so an over-asking client (e.g. 1 Gbps on HEVC)
@@ -521,6 +649,47 @@ impl Encoder for NvencD3d11Encoder {
        self.force_kf = true;
    }

+    fn invalidate_ref_frames(&mut self, first: i64, last: i64) -> bool {
+        // No live session, the GPU can't invalidate, or a nonsense range → caller forces a full IDR.
+        // (NVENC handles are single-threaded; this runs on the encode thread, like submit/poll.)
+        if self.encoder.is_null() || !self.rfi_supported || first < 0 || first > last {
+            return false;
+        }
+        // Already invalidated a covering range for this loss event — nothing more to do, no IDR.
+        if let Some((pf, pl)) = self.last_rfi_range {
+            if first >= pf && last <= pl {
+                return true;
+            }
+        }
+        // `frame_idx` is the NEXT timestamp to assign, so the last encoded frame is `frame_idx - 1`
+        // and the DPB holds `[frame_idx - RFI_DPB, frame_idx - 1]`. A lost frame older than that
+        // can't be invalidated, so the only correct recovery is an IDR.
+        let oldest_in_dpb = self.frame_idx - RFI_DPB as i64;
+        if first < oldest_in_dpb {
+            return false;
+        }
+        // Clamp to frames we've actually encoded (don't invalidate a timestamp we never assigned).
+        let last = last.min(self.frame_idx - 1);
+        if first > last {
+            return false;
+        }
+        // We tag each input with `inputTimeStamp = frame_idx` (0,1,2,…), which is also the client's
+        // frame number (the packetizer numbers frames in submit order), so the client's lost-frame
+        // range maps 1:1 onto the timestamps NVENC invalidates here.
+        unsafe {
+            for ts in first..=last {
+                if (API.invalidate_ref_frames)(self.encoder, ts as u64)
+                    .result_without_string()
+                    .is_err()
+                {
+                    return false; // any failure → fall back to IDR
+                }
+            }
+        }
+        self.last_rfi_range = Some((first, last));
+        true
+    }
+
    fn poll(&mut self) -> Result<Option<EncodedFrame>> {
        let Some((bs, map, pts_ns)) = self.pending.pop_front() else {
            return Ok(None);
@@ -303,6 +303,9 @@ fn run(
    audio_cap: &std::sync::Mutex<Option<Box<dyn AudioCapturer>>>,
 ) -> Result<()> {
    let sock = UdpSocket::bind(("0.0.0.0", AUDIO_PORT)).context("bind audio UDP")?;
+    // Grow SO_SNDBUF/RCVBUF + opt-in DSCP/QoS-tag this as the audio class (PUNKTFUNK_DSCP=1).
+    punktfunk_core::transport::grow_socket_buffers(&sock);
+    punktfunk_core::transport::set_media_qos(&sock, punktfunk_core::transport::MediaClass::Audio);
    // The client pings the audio port (~every 500ms) so we learn where to send.
    sock.set_read_timeout(Some(Duration::from_secs(10)))?;
    tracing::info!(port = AUDIO_PORT, "audio: awaiting client ping");
@@ -24,10 +24,11 @@

 use super::{AppState, CONTROL_PORT};
 use crate::inject::gamepad::GamepadManager;
-use crate::inject::InputInjector;
 use anyhow::{anyhow, Context, Result};
+use punktfunk_core::input::InputEvent;
 use rusty_enet::{Event, Host, HostSettings, Packet, PeerID};
 use std::net::UdpSocket;
+use std::sync::mpsc::Sender;
 use std::sync::Arc;
 use std::time::Duration;

@@ -53,12 +54,14 @@ pub fn spawn(state: Arc<AppState>) -> Result<()> {
    std::thread::Builder::new()
        .name("punktfunk-control".into())
        .spawn(move || {
-            // Thread-local (the injector owns non-Send Wayland/xkb state, so it must be
-            // created and live here rather than be captured into the closure).
            // GCM scheme detected from the first authenticating packet; reused thereafter.
            let mut detected: Option<Scheme> = None;
-            // Lazily opened on the first input event (Sway's Wayland socket is up by then).
-            let mut injector: Option<Box<dyn InputInjector>> = None;
+            // Decoded keyboard/mouse is forwarded to a dedicated host-lifetime injector thread —
+            // NEVER injected inline, so a slow Wayland/libei/SendInput call can't head-block ENet
+            // keepalive/retransmit servicing on this thread. The injector owns non-Send compositor
+            // state and lives on its own thread (see crate::inject::InjectorService); the held
+            // `inj_tx` clone keeps it alive for the control thread's lifetime.
+            let inj_tx = crate::inject::InjectorService::start().sender();
            // Virtual gamepads (uinput) + the host→client rumble sequence counter.
            let mut pads = GamepadManager::new();
            let mut rumble_seq: u32 = 0;
@@ -86,7 +89,7 @@ pub fn spawn(state: Arc<AppState>) -> Result<()> {
                                    channel_id,
                                    packet.data(),
                                    &mut detected,
-                                    &mut injector,
+                                    &inj_tx,
                                    &mut pads,
                                );
                            }
@@ -128,6 +131,19 @@ pub fn spawn(state: Arc<AppState>) -> Result<()> {
    Ok(())
 }

+/// Decode the lost-frame range from an invalidate-reference-frames (0x0301) control message: two
+/// little-endian `i64` (firstFrame, lastFrame) after the 4-byte `[u16 type][u16 length]` header,
+/// matching Sunshine/Apollo's `IDX_INVALIDATE_REF_FRAMES`. Returns `None` when the body is too
+/// short or the range is nonsensical, in which case the caller falls back to a full IDR.
+fn decode_rfi_range(pt: &[u8]) -> Option<(i64, i64)> {
+    if pt.len() < 20 {
+        return None;
+    }
+    let first = i64::from_le_bytes(pt[4..12].try_into().ok()?);
+    let last = i64::from_le_bytes(pt[12..20].try_into().ok()?);
+    (first >= 0 && last >= first).then_some((first, last))
+}
+
 /// Handle one received control packet: decrypt it (learning the GCM scheme on the first one),
 /// decode any input event, and inject it into the host session.
 fn on_receive(
@@ -135,7 +151,7 @@ fn on_receive(
    _channel_id: u8,
    d: &[u8],
    detected: &mut Option<Scheme>,
-    injector: &mut Option<Box<dyn InputInjector>>,
+    inj_tx: &Sender<InputEvent>,
    pads: &mut GamepadManager,
 ) {
    let Some(key) = state.launch.lock().unwrap().map(|s| s.gcm_key) else {
@@ -160,17 +176,32 @@ fn on_receive(
        }
    };

-    // Recovery requests after loss: invalidate-reference-frames (0x0301, Gen7) or request-IDR
-    // (0x0302, Gen7Enc). Force a keyframe so the client can resync without a multi-second stall.
+    // Recovery requests after loss. Invalidate-reference-frames (0x0301, Gen7) carries the lost
+    // frame range (two LE i64 after the [type][len] header, like Sunshine/Apollo's
+    // IDX_INVALIDATE_REF_FRAMES) — route it to the encoder, which invalidates those refs instead of
+    // a full IDR when it can (NVENC RFI). Request-IDR (0x0302 / 0x0305) and a malformed 0x0301 force
+    // a keyframe. The video thread drains rfi_range/force_idr and resyncs without a multi-second stall.
    if pt.len() >= 2 {
        let inner = u16::from_le_bytes([pt[0], pt[1]]);
-        if matches!(inner, 0x0301 | 0x0302 | 0x0305) {
+        if inner == 0x0301 {
+            if let Some((first, last)) = decode_rfi_range(&pt) {
+                *state.rfi_range.lock().unwrap() = Some((first, last));
+                tracing::info!(first, last, "control: RFI request → invalidate ref frames");
+            } else {
+                state
+                    .force_idr
+                    .store(true, std::sync::atomic::Ordering::SeqCst);
+                tracing::info!("control: RFI request (no range) → keyframe");
+            }
+            return;
+        }
+        if matches!(inner, 0x0302 | 0x0305) {
            state
                .force_idr
                .store(true, std::sync::atomic::Ordering::SeqCst);
            tracing::info!(
                ty = format!("{inner:#06x}"),
-                "control: IDR/RFI request → keyframe"
+                "control: IDR request → keyframe"
            );
            return;
        }
@@ -187,27 +218,11 @@ fn on_receive(
        return; // keepalive / QoS / unhandled input kind
    }

-    // Open the injector on demand — by the first input event the compositor session is up.
-    // Backend auto-selects per desktop (wlr on Sway, libei on KWin/GNOME); override with
-    // PUNKTFUNK_INPUT_BACKEND.
-    if injector.is_none() {
-        let backend = crate::inject::default_backend();
-        match crate::inject::open(backend) {
-            Ok(i) => {
-                tracing::info!(?backend, "input injection backend opened");
-                *injector = Some(i);
-            }
-            Err(e) => {
-                tracing::error!(error = %format!("{e:#}"), "input injection unavailable");
-                return;
-            }
-        }
-    }
-    let inj = injector.as_mut().unwrap();
+    // Forward to the dedicated injector thread (it opens the backend on the first event and
+    // coalesces redundant motion). A closed channel means the injector thread died at startup —
+    // input is lossy, so drop silently rather than spam.
    for ev in events {
-        if let Err(e) = inj.inject(&ev) {
-            tracing::warn!(error = %format!("{e:#}"), "inject failed");
-        }
+        let _ = inj_tx.send(ev);
    }
 }

@@ -426,3 +441,29 @@ fn gcm_open(key: &[u8; 16], nonce: &[u8], ct_tag: &[u8], aad: &[u8]) -> Option<V
        _ => None,
    }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::decode_rfi_range;
+
+    /// Build a 0x0301 invalidate-ref-frames plaintext: `[type LE][len LE][firstFrame i64 LE][last i64 LE]`.
+    fn rfi_msg(first: i64, last: i64) -> Vec<u8> {
+        let mut v = vec![0x01, 0x03, 0x10, 0x00]; // type 0x0301, length 16
+        v.extend_from_slice(&first.to_le_bytes());
+        v.extend_from_slice(&last.to_le_bytes());
+        v
+    }
+
+    #[test]
+    fn decodes_a_valid_rfi_range() {
+        assert_eq!(decode_rfi_range(&rfi_msg(40, 47)), Some((40, 47)));
+        assert_eq!(decode_rfi_range(&rfi_msg(5, 5)), Some((5, 5))); // single frame
+    }
+
+    #[test]
+    fn rejects_short_or_nonsensical_ranges() {
+        assert_eq!(decode_rfi_range(&[0x01, 0x03, 0x00, 0x00]), None); // header only, no body
+        assert_eq!(decode_rfi_range(&rfi_msg(-1, 9)), None); // negative first
+        assert_eq!(decode_rfi_range(&rfi_msg(9, 4)), None); // last < first
+    }
+}
@@ -113,6 +113,10 @@ pub struct AppState {
    /// Set by the control stream when the client requests an IDR / invalidates reference
    /// frames (recovery after loss); the video thread forces a keyframe and clears it.
    pub force_idr: std::sync::Arc<std::sync::atomic::AtomicBool>,
+    /// A client reference-frame-invalidation request carrying the lost frame range (0x0301). The
+    /// video thread drains it and calls `Encoder::invalidate_ref_frames`, falling back to a full
+    /// IDR when the encoder can't invalidate (range too old / no NVENC RFI). `None` = nothing pending.
+    pub rfi_range: std::sync::Arc<std::sync::Mutex<Option<(i64, i64)>>>,
    /// Persistent screen capturer, reused across streams so reconnects don't spawn a second
    /// (conflicting) screencast session. The video thread borrows it for the stream's duration
    /// and returns it; `set_active` gates its cost while idle.
@@ -138,6 +142,7 @@ impl AppState {
            streaming: std::sync::Arc::new(std::sync::atomic::AtomicBool::new(false)),
            audio_streaming: std::sync::Arc::new(std::sync::atomic::AtomicBool::new(false)),
            force_idr: std::sync::Arc::new(std::sync::atomic::AtomicBool::new(false)),
+            rfi_range: std::sync::Arc::new(std::sync::Mutex::new(None)),
            video_cap: std::sync::Arc::new(std::sync::Mutex::new(None)),
            audio_cap: std::sync::Arc::new(std::sync::Mutex::new(None)),
        }
@@ -293,18 +298,30 @@ fn load_paired() -> Vec<Vec<u8>> {
    }
 }

-/// Persist the paired-client allow-list (called after each successful pairing).
+/// Persist the paired-client allow-list (called after each successful pairing). Written
+/// atomically (temp file + rename) so a crash mid-write can't truncate `paired.json` — a partial
+/// write would otherwise lock out every paired client until they re-pair.
 pub(crate) fn save_paired(paired: &[Vec<u8>]) {
    let Some(path) = paired_path() else { return };
    if let Some(dir) = path.parent() {
        let _ = std::fs::create_dir_all(dir);
    }
-    match serde_json::to_vec(paired) {
-        Ok(bytes) => {
-            if let Err(e) = std::fs::write(&path, bytes) {
-                tracing::warn!(error = %e, "persisting pairings failed");
-            }
+    let bytes = match serde_json::to_vec(paired) {
+        Ok(b) => b,
+        Err(e) => {
+            tracing::warn!(error = %e, "serializing pairings failed");
+            return;
        }
-        Err(e) => tracing::warn!(error = %e, "serializing pairings failed"),
+    };
+    // Write to a sibling temp file, then rename over the target (atomic replace on Unix and
+    // Windows). Never write `path` in place.
+    let tmp = path.with_extension("json.tmp");
+    if let Err(e) = std::fs::write(&tmp, &bytes) {
+        tracing::warn!(error = %e, "persisting pairings failed (temp write)");
+        return;
+    }
+    if let Err(e) = std::fs::rename(&tmp, &path) {
+        tracing::warn!(error = %e, "persisting pairings failed (rename)");
+        let _ = std::fs::remove_file(&tmp);
    }
 }
@@ -3,6 +3,7 @@
 //! `/pin` endpoint to deliver the Moonlight-displayed PIN. Over HTTPS the client is
 //! mutual-TLS-authenticated, so `/serverinfo` reports `PairStatus=1` there.

+use super::tls::PeerCertFingerprint;
 use super::{serverinfo, AppState, LaunchSession, HTTPS_PORT, HTTP_PORT, RTSP_PORT};
 use anyhow::{anyhow, Context, Result};
 use axum::{
@@ -23,24 +24,36 @@ struct Https(bool);
 pub async fn run(state: Arc<AppState>) -> Result<()> {
    // Mutual-TLS: request + verify the client cert (Moonlight presents one for the
    // post-pairing pairchallenge + all post-pair endpoints).
-    let tls = axum_server::tls_rustls::RustlsConfig::from_config(super::tls::server_config(
-        &state.identity.cert_pem,
-        &state.identity.key_pem,
-    )?);
+    let tls = super::tls::server_config(&state.identity.cert_pem, &state.identity.key_pem)?;

    let http_addr = SocketAddr::from(([0, 0, 0, 0], HTTP_PORT));
    let https_addr = SocketAddr::from(([0, 0, 0, 0], HTTPS_PORT));
    tracing::info!(%http_addr, %https_addr, "nvhttp listening (serverinfo + pair + launch)");

    let http = axum_server::bind(http_addr).serve(router(state.clone(), false).into_make_service());
-    let https =
-        axum_server::bind_rustls(https_addr, tls).serve(router(state, true).into_make_service());
-    tokio::try_join!(async { http.await.context("nvhttp HTTP server") }, async {
-        https.await.context("nvhttp HTTPS server")
-    },)?;
+    // HTTPS runs the handshake itself (super::tls::serve_https) so handlers see the verified peer
+    // cert as a PeerCertFingerprint extension; the post-pair endpoints gate on the paired allow-list.
+    tokio::try_join!(
+        async { http.await.context("nvhttp HTTP server") },
+        super::tls::serve_https(https_addr, router(state, true), tls),
+    )?;
    Ok(())
 }

+/// True iff the request arrived over HTTPS with a client cert whose SHA-256 fingerprint is pinned
+/// in the paired allow-list. Plain-HTTP requests carry no client cert and are never paired. This is
+/// the post-handshake authorization check (Apollo's `get_verified_cert`) gating the launch surface.
+fn peer_is_paired(peer: &Option<Extension<PeerCertFingerprint>>, st: &AppState) -> bool {
+    let Some(Extension(PeerCertFingerprint(Some(fp)))) = peer else {
+        return false;
+    };
+    st.paired
+        .lock()
+        .unwrap()
+        .iter()
+        .any(|der| hex::encode(punktfunk_core::quic::endpoint::cert_fingerprint(der)) == *fp)
+}
+
 fn router(state: Arc<AppState>, https: bool) -> Router {
    Router::new()
        .route("/serverinfo", get(h_serverinfo))
@@ -61,9 +74,12 @@ fn xml(body: String) -> impl IntoResponse {
 async fn h_serverinfo(
    State(st): State<Arc<AppState>>,
    Extension(Https(https)): Extension<Https>,
+    peer: Option<Extension<PeerCertFingerprint>>,
 ) -> impl IntoResponse {
-    // Over the mutual-TLS port the peer is an authenticated (paired) client → PairStatus=1.
-    xml(serverinfo::serverinfo_xml(&st.host, https))
+    // PairStatus=1 only when the HTTPS peer presented a *pinned* client cert; an unpaired client
+    // (or plain HTTP) sees 0 and is steered into the pairing flow.
+    let paired = https && peer_is_paired(&peer, &st);
+    xml(serverinfo::serverinfo_xml(&st.host, https, paired))
 }

 async fn h_pin(
@@ -79,15 +95,27 @@ async fn h_pin(
    }
 }

-async fn h_applist(State(_st): State<Arc<AppState>>) -> impl IntoResponse {
+async fn h_applist(
+    State(st): State<Arc<AppState>>,
+    peer: Option<Extension<PeerCertFingerprint>>,
+) -> impl IntoResponse {
+    if !peer_is_paired(&peer, &st) {
+        tracing::warn!("applist rejected — client is not paired");
+        return xml(error_xml());
+    }
    // One app for now: the headless desktop (the wlroots virtual output).
    xml(super::apps::applist_xml())
 }

 async fn h_launch(
    State(st): State<Arc<AppState>>,
+    peer: Option<Extension<PeerCertFingerprint>>,
    Query(q): Query<HashMap<String, String>>,
 ) -> impl IntoResponse {
+    if !peer_is_paired(&peer, &st) {
+        tracing::warn!("launch rejected — client is not paired");
+        return xml(error_xml());
+    }
    match launch(&st, &q) {
        Ok(session) => {
            *st.launch.lock().unwrap() = Some(session);
@@ -108,7 +136,14 @@ async fn h_launch(
    }
 }

-async fn h_resume(State(st): State<Arc<AppState>>) -> impl IntoResponse {
+async fn h_resume(
+    State(st): State<Arc<AppState>>,
+    peer: Option<Extension<PeerCertFingerprint>>,
+) -> impl IntoResponse {
+    if !peer_is_paired(&peer, &st) {
+        tracing::warn!("resume rejected — client is not paired");
+        return xml(error_xml());
+    }
    if st.launch.lock().unwrap().is_some() {
        xml(session_url_xml(&st, "resume"))
    } else {
@@ -116,7 +151,14 @@ async fn h_resume(State(st): State<Arc<AppState>>) -> impl IntoResponse {
    }
 }

-async fn h_cancel(State(st): State<Arc<AppState>>) -> impl IntoResponse {
+async fn h_cancel(
+    State(st): State<Arc<AppState>>,
+    peer: Option<Extension<PeerCertFingerprint>>,
+) -> impl IntoResponse {
+    if !peer_is_paired(&peer, &st) {
+        tracing::warn!("cancel rejected — client is not paired");
+        return xml(error_xml());
+    }
    *st.launch.lock().unwrap() = None;
    // Quit semantics: stop the running media threads (they observe these flags) so the session
    // actually ends — the virtual output/gamescope teardown follows via the capturer's RAII.
@@ -234,3 +276,56 @@ fn pair_error_xml() -> String {
 fn error_xml() -> String {
    "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<root status_code=\"400\"></root>\n".to_string()
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::net::{IpAddr, Ipv4Addr};
+
+    fn test_state() -> Arc<AppState> {
+        let host = super::super::Host {
+            hostname: "t".into(),
+            uniqueid: "id".into(),
+            local_ip: IpAddr::V4(Ipv4Addr::LOCALHOST),
+            http_port: HTTP_PORT,
+            https_port: HTTPS_PORT,
+        };
+        let identity = super::super::cert::ServerIdentity::ephemeral().expect("ephemeral identity");
+        Arc::new(AppState::new(host, identity))
+    }
+
+    fn fp_of(der: &[u8]) -> String {
+        hex::encode(punktfunk_core::quic::endpoint::cert_fingerprint(der))
+    }
+
+    /// The launch surface (launch/resume/applist/cancel) must reject any client whose cert
+    /// fingerprint is not in the paired allow-list — including a certless (plain-HTTP) peer.
+    #[test]
+    fn launch_gate_requires_a_pinned_client_cert() {
+        let st = test_state();
+        let der = b"a-client-cert-der".to_vec();
+        let peer = Some(Extension(PeerCertFingerprint(Some(fp_of(&der)))));
+
+        // Empty allow-list: a presented cert, an absent extension, and an explicit None all fail.
+        assert!(!peer_is_paired(&peer, &st), "unknown cert must be rejected");
+        assert!(
+            !peer_is_paired(&None, &st),
+            "no client cert must be rejected"
+        );
+        assert!(
+            !peer_is_paired(&Some(Extension(PeerCertFingerprint(None))), &st),
+            "certless HTTPS peer must be rejected"
+        );
+
+        // After pinning, the same fingerprint is accepted but a different cert still isn't.
+        st.paired.lock().unwrap().push(der);
+        assert!(peer_is_paired(&peer, &st), "pinned cert must be accepted");
+        let other = Some(Extension(PeerCertFingerprint(Some(fp_of(
+            b"different-der",
+        )))));
+        assert!(
+            !peer_is_paired(&other, &st),
+            "a non-pinned cert stays rejected"
+        );
+    }
+}
@@ -182,6 +182,7 @@ fn handle_request(req: &Request, state: &AppState) -> String {
                        app,
                        state.streaming.clone(),
                        state.force_idr.clone(),
+                        state.rfi_range.clone(),
                        state.video_cap.clone(),
                    );
                }
@@ -3,18 +3,19 @@
 use super::{Host, APP_VERSION, GFE_VERSION, SERVER_CODEC_MODE_SUPPORT};

 /// Build the `<root status_code="200">…</root>` serverinfo document. `https` selects the
-/// paired-HTTPS variant (real MAC). Element names are case-sensitive and match what
-/// moonlight-common-c parses.
-pub fn serverinfo_xml(host: &Host, https: bool) -> String {
-    // MAC is hidden over plain HTTP; PairStatus reflects the pairing store once the HTTPS
-    // path carries per-client identity (a hardening follow-up — 0 for now).
+/// paired-HTTPS variant (real MAC); `paired` is whether the HTTPS peer presented a client cert
+/// that is in the paired allow-list (drives `PairStatus`). Element names are case-sensitive and
+/// match what moonlight-common-c parses.
+pub fn serverinfo_xml(host: &Host, https: bool, paired: bool) -> String {
+    // MAC is hidden over plain HTTP (no per-client identity there).
    let mac = if https {
        "01:02:03:04:05:06"
    } else {
        "00:00:00:00:00:00"
    };
-    // Over the mutual-TLS HTTPS port the peer is an authenticated (paired) client.
-    let pair_status = u8::from(https);
+    // PairStatus reflects the real allow-list: 1 only when the HTTPS peer's client-cert
+    // fingerprint is pinned (the nvhttp handler computes `paired`); 0 otherwise (incl. plain HTTP).
+    let pair_status = u8::from(paired);
    let codec_mode_support = codec_mode_support();
    format!(
        r#"<?xml version="1.0" encoding="utf-8"?>
@@ -104,7 +105,7 @@ mod tests {
            http_port: 47989,
            https_port: 47984,
        };
-        let xml = serverinfo_xml(&host, false);
+        let xml = serverinfo_xml(&host, false, false);
        // The mask is the GPU-aware value (NVENC/no-GPU → the static 65793; a VAAPI host →
        // whatever it probes). Assert the XML embeds exactly what `codec_mode_support()` returns,
        // so the test is deterministic regardless of the build host's GPU.
@@ -31,6 +31,10 @@ pub struct StreamConfig {
 /// streams so a reconnect doesn't open a second (conflicting) screencast session.
 pub type CapturerSlot = Arc<std::sync::Mutex<Option<Box<dyn Capturer>>>>;

+/// A pending client reference-frame-invalidation range (lost `firstFrame..=lastFrame`), set by the
+/// control plane and drained by the video thread (see [`AppState::rfi_range`](super::AppState)).
+pub type RfiSlot = Arc<std::sync::Mutex<Option<(i64, i64)>>>;
+
 /// Spawn the video stream thread (idempotent via `running`). Stops when `running` clears.
 /// `force_idr` is set by the control stream on a client recovery request; `video_cap` holds
 /// the persistent capturer the thread borrows for the stream's duration.
@@ -39,13 +43,21 @@ pub fn start(
    app: Option<super::apps::AppEntry>,
    running: Arc<AtomicBool>,
    force_idr: Arc<AtomicBool>,
+    rfi_range: RfiSlot,
    video_cap: CapturerSlot,
 ) {
    let _ = std::thread::Builder::new()
        .name("punktfunk-video".into())
        .spawn(move || {
            tracing::info!(?cfg, "video stream starting");
-            if let Err(e) = run(cfg, app.as_ref(), &running, &force_idr, &video_cap) {
+            if let Err(e) = run(
+                cfg,
+                app.as_ref(),
+                &running,
+                &force_idr,
+                &rfi_range,
+                &video_cap,
+            ) {
                tracing::error!(error = %format!("{e:#}"), "video stream failed");
            }
            running.store(false, Ordering::SeqCst);
@@ -58,6 +70,7 @@ fn run(
    app: Option<&super::apps::AppEntry>,
    running: &Arc<AtomicBool>,
    force_idr: &AtomicBool,
+    rfi_range: &std::sync::Mutex<Option<(i64, i64)>>,
    video_cap: &std::sync::Mutex<Option<Box<dyn Capturer>>>,
 ) -> Result<()> {
    // GameStream capture/encode thread: apply Windows session tuning (no-op off Windows).
@@ -66,6 +79,10 @@ fn run(
    encode::validate_dimensions(cfg.codec, cfg.width, cfg.height)
        .context("client-requested video mode")?;
    let sock = UdpSocket::bind(("0.0.0.0", VIDEO_PORT)).context("bind video UDP")?;
+    // Grow SO_SNDBUF/RCVBUF (avoid host-side ENOBUFS at high bitrate) like the native plane, and
+    // opt-in DSCP/QoS-tag this as the video class (PUNKTFUNK_DSCP=1).
+    punktfunk_core::transport::grow_socket_buffers(&sock);
+    punktfunk_core::transport::set_media_qos(&sock, punktfunk_core::transport::MediaClass::Video);
    // The client pings the video port so we learn where to send; it re-pings until video
    // flows, so a missed early ping is fine.
    sock.set_read_timeout(Some(Duration::from_secs(10)))?;
@@ -115,7 +132,7 @@ fn run(
        let mut capturer =
            capture::capture_virtual_output(vout).context("capture virtual output")?;
        capturer.set_active(true);
-        return stream_body(&mut *capturer, &sock, cfg, running, force_idr);
+        return stream_body(&mut *capturer, &sock, cfg, running, force_idr, rfi_range);
    }

    // Reuse the persistent capturer (one screencast session → clean reconnect); create it on
@@ -135,7 +152,7 @@ fn run(
        }
    };
    capturer.set_active(true);
-    let result = stream_body(&mut *capturer, &sock, cfg, running, force_idr);
+    let result = stream_body(&mut *capturer, &sock, cfg, running, force_idr, rfi_range);
    capturer.set_active(false);
    *video_cap.lock().unwrap() = Some(capturer);
    result
@@ -275,6 +292,7 @@ fn stream_body(
    cfg: StreamConfig,
    running: &Arc<AtomicBool>,
    force_idr: &AtomicBool,
+    rfi_range: &std::sync::Mutex<Option<(i64, i64)>>,
 ) -> Result<()> {
    // The first frame establishes the authoritative size/format for the encoder.
    let mut frame = capturer.next_frame().context("capture first frame")?;
@@ -349,8 +367,16 @@ fn stream_body(
            uniq += 1;
        }
        let t_cap = tick.elapsed();
-        // Honor a client recovery request (RFI / request-IDR): force a keyframe so the client
-        // resyncs immediately instead of waiting for the next GOP boundary.
+        // Honor a client recovery request. Prefer reference-frame invalidation (the encoder
+        // re-references an older still-valid frame — no costly IDR spike); if the encoder can't
+        // invalidate (range too old, or no NVENC RFI) it returns false and we force a keyframe.
+        if let Some((first, last)) = rfi_range.lock().unwrap().take() {
+            if !enc.invalidate_ref_frames(first, last) {
+                enc.request_keyframe();
+            }
+        }
+        // An explicit IDR request (or a rangeless RFI) forces a keyframe so the client resyncs
+        // immediately instead of waiting for the next GOP boundary.
        if force_idr.swap(false, Ordering::SeqCst) {
            enc.request_keyframe();
        }
@@ -1,17 +1,88 @@
-//! TLS for the HTTPS nvhttp port (47984). Moonlight does **mutual TLS** — it presents its
-//! client cert and expects the server to request one — so a plain server-auth config makes
-//! the post-pairing `pairchallenge` fail. This config requests the client cert and verifies
-//! the client owns its key, but (for now) accepts any well-formed cert; enforcing the
-//! paired allow-list (rejecting unpaired clients on /launch) is a follow-up hardening step.
+//! TLS for the HTTPS nvhttp port (47984) and the management API. Moonlight does **mutual TLS** —
+//! it presents its client cert and expects the server to request one — so a plain server-auth
+//! config makes the post-pairing `pairchallenge` fail. This config requests the client cert and
+//! verifies the client owns its key, but accepts any well-formed cert at the *handshake* (the
+//! pairing ceremony is the real proof of identity). Authorization against the paired allow-list is
+//! then enforced per-request: [`serve_https`] reads the verified peer cert and attaches its
+//! fingerprint ([`PeerCertFingerprint`]) to each request, and the nvhttp/mgmt handlers reject
+//! callers whose fingerprint is not pinned (mirroring Apollo's post-handshake `get_verified_cert`).

 use anyhow::{anyhow, Context, Result};
+use axum::Router;
 use rustls::client::danger::HandshakeSignatureValid;
 use rustls::crypto::{verify_tls12_signature, verify_tls13_signature, CryptoProvider};
 use rustls::pki_types::{CertificateDer, UnixTime};
 use rustls::server::danger::{ClientCertVerified, ClientCertVerifier};
 use rustls::{DigitallySignedStruct, DistinguishedName, ServerConfig, SignatureScheme};
+use std::net::SocketAddr;
 use std::sync::Arc;

+/// SHA-256 of the peer's client certificate (hex), injected per-connection into each request's
+/// extensions by [`serve_https`]; `None` when the peer presented no client cert (plain HTTP, or a
+/// browser falling back to a bearer token). Handlers authorize a request whose fingerprint is in
+/// the paired store.
+#[derive(Clone)]
+pub(crate) struct PeerCertFingerprint(pub Option<String>);
+
+/// HTTPS server that surfaces the verified client cert to handlers. `axum_server` can't expose the
+/// peer cert, so this runs the rustls handshake itself (tokio-rustls), reads the peer certificate,
+/// and serves the axum `Router` over hyper with the peer's fingerprint attached to every request as
+/// a [`PeerCertFingerprint`] extension. Shared by the nvhttp HTTPS listener and the management API.
+pub(crate) async fn serve_https(
+    bind: SocketAddr,
+    app: Router,
+    tls: Arc<ServerConfig>,
+) -> Result<()> {
+    use tower::ServiceExt;
+    let acceptor = tokio_rustls::TlsAcceptor::from(tls);
+    let listener = tokio::net::TcpListener::bind(bind)
+        .await
+        .with_context(|| format!("bind HTTPS {bind}"))?;
+    loop {
+        let (tcp, _peer) = match listener.accept().await {
+            Ok(v) => v,
+            Err(e) => {
+                tracing::warn!(error = %e, "HTTPS accept failed");
+                continue;
+            }
+        };
+        let acceptor = acceptor.clone();
+        let app = app.clone();
+        tokio::spawn(async move {
+            let tls_stream = match acceptor.accept(tcp).await {
+                Ok(s) => s,
+                // A failed handshake is routine (port scan, a browser bailing on the self-signed
+                // cert, a peer that hung up) — not fatal.
+                Err(_) => return,
+            };
+            // The verified peer cert (the verifier accepts any well-formed one; handlers authorize
+            // by fingerprint) → its SHA-256, matched against the paired store.
+            let fp = tls_stream
+                .get_ref()
+                .1
+                .peer_certificates()
+                .and_then(|c| c.first())
+                .map(|c| hex::encode(punktfunk_core::quic::endpoint::cert_fingerprint(c.as_ref())));
+            let peer = PeerCertFingerprint(fp);
+            let svc =
+                hyper::service::service_fn(move |req: hyper::Request<hyper::body::Incoming>| {
+                    let app = app.clone();
+                    let peer = peer.clone();
+                    async move {
+                        let mut req = req.map(axum::body::Body::new);
+                        req.extensions_mut().insert(peer);
+                        app.oneshot(req).await // Router error is Infallible
+                    }
+                });
+            let io = hyper_util::rt::TokioIo::new(tls_stream);
+            let _ =
+                hyper_util::server::conn::auto::Builder::new(hyper_util::rt::TokioExecutor::new())
+                    .serve_connection_with_upgrades(io, svc)
+                    .await;
+        });
+    }
+}
+
 /// Requests + signature-checks the client cert but accepts any (the pairing handshake is
 /// the real proof). Pinning to the paired set is a hardening follow-up.
 #[derive(Debug)]
@@ -10,7 +10,7 @@
 //! keysyms correctly.

 use anyhow::Result;
-use punktfunk_core::input::InputEvent;
+use punktfunk_core::input::{InputEvent, InputKind};

 /// Injects input events into the host session. Not `Send`: an injector owns compositor
 /// resources (a Wayland connection, an xkb state) and lives entirely on the control thread
@@ -127,6 +127,133 @@ pub fn default_backend() -> Backend {
    }
 }

+/// Host-lifetime pointer/keyboard injector running on its OWN thread, fed over a clonable `Send`
+/// channel. The injector backend owns non-`Send` compositor state (a Wayland connection / xkb / EIS
+/// socket), so it must live on a single thread; both the GameStream control plane and the native
+/// punktfunk/1 plane forward their decoded keyboard/mouse events here instead of injecting inline, so
+/// a slow inject (a portal stall, a desktop switch) never head-blocks the network thread's
+/// keepalive/retransmit servicing.
+pub(crate) struct InjectorService {
+    tx: std::sync::mpsc::Sender<InputEvent>,
+}
+
+impl InjectorService {
+    pub(crate) fn start() -> InjectorService {
+        let (tx, rx) = std::sync::mpsc::channel::<InputEvent>();
+        if let Err(e) = std::thread::Builder::new()
+            .name("punktfunk-injector".into())
+            .spawn(move || injector_service_thread(rx))
+        {
+            tracing::error!(error = %e, "injector service thread spawn failed — pointer/keyboard input disabled");
+        }
+        InjectorService { tx }
+    }
+
+    /// A sender a session/plane forwards its pointer/keyboard events to. Cloned per caller; dropping a
+    /// clone does NOT stop the service (it runs while any sender — incl. the service's own — lives).
+    pub(crate) fn sender(&self) -> std::sync::mpsc::Sender<InputEvent> {
+        self.tx.clone()
+    }
+}
+
+/// Backoff between reopen attempts after the injector backend fails to open or its worker dies, so a
+/// persistently-unavailable portal isn't hammered once per event.
+const INJECTOR_REOPEN_BACKOFF: std::time::Duration = std::time::Duration::from_secs(2);
+
+/// The host-lifetime injector worker: lazily open the pointer/keyboard backend, then inject every
+/// forwarded event. Reopen (after [`INJECTOR_REOPEN_BACKOFF`]) on open failure, on a backend change
+/// (input follows the active session), or if the backend's worker dies mid-stream. Exits only when
+/// every sender has dropped (host shutdown), which drops the injector and closes its portal session.
+///
+/// Each wake drains the whole backlog and [`coalesce`]s redundant motion before injecting, so a slow
+/// backend never builds up a queue of stale relative-mouse/scroll events (latency) — while button,
+/// key, and absolute-move ordering is preserved exactly.
+fn injector_service_thread(rx: std::sync::mpsc::Receiver<InputEvent>) {
+    let mut injector: Option<Box<dyn InputInjector>> = None;
+    let mut open_backend: Option<Backend> = None;
+    let mut last_failed: Option<std::time::Instant> = None;
+    while let Ok(first) = rx.recv() {
+        // Drain everything already queued behind `first` so we coalesce a whole burst at once.
+        let mut batch = vec![first];
+        while let Ok(ev) = rx.try_recv() {
+            batch.push(ev);
+        }
+
+        // The resolved input backend (PUNKTFUNK_INPUT_BACKEND, set per connect / mid-stream session
+        // switch) may have changed since we opened. Reopen against it so input FOLLOWS the active
+        // session instead of injecting into a stale, still-warm backend (e.g. the managed gamescope's
+        // EIS socket after the user switched to the KDE desktop).
+        let want = default_backend();
+        if injector.is_some() && open_backend != Some(want) {
+            tracing::info!(
+                ?open_backend,
+                ?want,
+                "input: backend changed — reopening injector for the active session"
+            );
+            injector = None;
+            last_failed = None; // re-resolve immediately
+        }
+        if injector.is_none() {
+            // Open on the first event; after a failure wait out the backoff before retrying (a few
+            // events drop during setup — acceptable, input is lossy).
+            let ready = last_failed.is_none_or(|t| t.elapsed() >= INJECTOR_REOPEN_BACKOFF);
+            if ready {
+                match open(want) {
+                    Ok(i) => {
+                        tracing::info!(backend = ?want, "input injector ready (host-lifetime)");
+                        injector = Some(i);
+                        open_backend = Some(want);
+                        last_failed = None;
+                    }
+                    Err(e) => {
+                        tracing::error!(error = %format!("{e:#}"), "pointer/keyboard injection unavailable — will retry");
+                        last_failed = Some(std::time::Instant::now());
+                    }
+                }
+            }
+        }
+        if let Some(inj) = injector.as_mut() {
+            for ev in coalesce(batch) {
+                if let Err(e) = inj.inject(&ev) {
+                    // The backend's worker (portal session / EIS socket) died — drop it and reopen on
+                    // a later event (covers a gamescope EIS socket that respawns with its session).
+                    tracing::warn!(error = %format!("{e:#}"), "inject failed — reopening injector");
+                    injector = None;
+                    open_backend = None;
+                    last_failed = Some(std::time::Instant::now());
+                    break; // abandon the rest of this batch; the next one reopens
+                }
+            }
+        }
+    }
+    tracing::debug!("injector service stopped (host shutting down)");
+}
+
+/// Coalesce a drained burst: sum consecutive relative-mouse deltas and consecutive same-axis scroll
+/// deltas (identical net effect, far fewer injects), passing buttons, keys, absolute moves, and any
+/// type change through untouched and in order. Only *adjacent* same-type events merge, so a button
+/// or key between two moves flushes the accumulated motion first — ordering is never reshuffled.
+fn coalesce(events: Vec<InputEvent>) -> Vec<InputEvent> {
+    let mut out: Vec<InputEvent> = Vec::with_capacity(events.len());
+    for ev in events {
+        match out.last_mut() {
+            Some(last) if last.kind == InputKind::MouseMove && ev.kind == InputKind::MouseMove => {
+                last.x = last.x.saturating_add(ev.x);
+                last.y = last.y.saturating_add(ev.y);
+            }
+            Some(last)
+                if last.kind == InputKind::MouseScroll
+                    && ev.kind == InputKind::MouseScroll
+                    && last.code == ev.code =>
+            {
+                last.x = last.x.saturating_add(ev.x);
+            }
+            _ => out.push(ev),
+        }
+    }
+    out
+}
+
 /// How the libei backend reaches its EIS server. KWin goes through the `RemoteDesktop` *portal*
 /// (with a pre-seeded grant), but GNOME's portal `Start()` needs an interactive approval a
 /// headless host can't answer — so GNOME goes straight to Mutter's *direct* RemoteDesktop EIS
@@ -321,3 +448,57 @@ mod libei;
 mod sendinput;
 #[cfg(target_os = "linux")]
 mod wlr;
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn mk(kind: InputKind, code: u32, x: i32, y: i32) -> InputEvent {
+        InputEvent {
+            kind,
+            _pad: [0; 3],
+            code,
+            x,
+            y,
+            flags: 0,
+        }
+    }
+
+    #[test]
+    fn coalesce_sums_adjacent_motion_and_preserves_order() {
+        let events = vec![
+            mk(InputKind::MouseMove, 0, 1, 2),
+            mk(InputKind::MouseMove, 0, 3, -1), // → summed with the previous move
+            mk(InputKind::KeyDown, 30, 0, 0),   // flushes the move, passes through verbatim
+            mk(InputKind::MouseMove, 0, 5, 5),  // a NEW run after the key (not merged across it)
+            mk(InputKind::MouseScroll, 0, 1, 0),
+            mk(InputKind::MouseScroll, 0, 2, 0), // same axis (code 0) → summed
+            mk(InputKind::MouseScroll, 1, 1, 0), // different axis (code 1) → separate
+        ];
+        let out = coalesce(events);
+        assert_eq!(out.len(), 5);
+        assert_eq!(
+            (out[0].kind, out[0].x, out[0].y),
+            (InputKind::MouseMove, 4, 1)
+        );
+        assert_eq!(out[1].kind, InputKind::KeyDown);
+        assert_eq!(
+            (out[2].kind, out[2].x, out[2].y),
+            (InputKind::MouseMove, 5, 5)
+        );
+        assert_eq!(
+            (out[3].kind, out[3].code, out[3].x),
+            (InputKind::MouseScroll, 0, 3)
+        );
+        assert_eq!(
+            (out[4].kind, out[4].code, out[4].x),
+            (InputKind::MouseScroll, 1, 1)
+        );
+    }
+
+    #[test]
+    fn coalesce_handles_empty_and_singleton() {
+        assert!(coalesce(vec![]).is_empty());
+        assert_eq!(coalesce(vec![mk(InputKind::MouseMove, 0, 7, 8)]).len(), 1);
+    }
+}
@@ -17,6 +17,7 @@

 use crate::encode::Codec;
 use crate::gamestream::{
+    tls::{serve_https, PeerCertFingerprint},
    AppState, APP_VERSION, AUDIO_PORT, CONTROL_PORT, GFE_VERSION, RTSP_PORT, VIDEO_PORT,
 };
 use anyhow::{Context, Result};
@@ -103,66 +104,6 @@ pub async fn run(
    serve_https(opts.bind, app, tls).await
 }

-/// SHA-256 of the peer's client certificate (hex), injected per-connection into each request's
-/// extensions by [`serve_https`]; `None` when the peer presented no client cert. `require_auth`
-/// authorizes a request whose fingerprint is in the paired store.
-#[derive(Clone)]
-struct PeerCertFingerprint(Option<String>);
-
-/// HTTPS server for the mgmt API. axum-server can't surface the client cert to a handler, so this
-/// runs the rustls handshake itself (via tokio-rustls), reads the verified peer certificate, and
-/// serves the axum `Router` over hyper with the peer's fingerprint attached to every request.
-async fn serve_https(bind: SocketAddr, app: Router, tls: Arc<rustls::ServerConfig>) -> Result<()> {
-    use tower::ServiceExt;
-    let acceptor = tokio_rustls::TlsAcceptor::from(tls);
-    let listener = tokio::net::TcpListener::bind(bind)
-        .await
-        .with_context(|| format!("bind management API {bind}"))?;
-    loop {
-        let (tcp, _peer) = match listener.accept().await {
-            Ok(v) => v,
-            Err(e) => {
-                tracing::warn!(error = %e, "management API accept failed");
-                continue;
-            }
-        };
-        let acceptor = acceptor.clone();
-        let app = app.clone();
-        tokio::spawn(async move {
-            let tls_stream = match acceptor.accept(tcp).await {
-                Ok(s) => s,
-                // A failed handshake is routine (port scan, a browser bailing on the self-signed
-                // cert, a client cert we'd still accept but the peer hung up) — not fatal.
-                Err(_) => return,
-            };
-            // The verified peer cert (the verifier accepts any well-formed one; we authorize by
-            // fingerprint in the auth layer) → its SHA-256, matched against the paired store.
-            let fp = tls_stream
-                .get_ref()
-                .1
-                .peer_certificates()
-                .and_then(|c| c.first())
-                .map(|c| hex::encode(punktfunk_core::quic::endpoint::cert_fingerprint(c.as_ref())));
-            let peer = PeerCertFingerprint(fp);
-            let svc =
-                hyper::service::service_fn(move |req: hyper::Request<hyper::body::Incoming>| {
-                    let app = app.clone();
-                    let peer = peer.clone();
-                    async move {
-                        let mut req = req.map(axum::body::Body::new);
-                        req.extensions_mut().insert(peer);
-                        app.oneshot(req).await // Router error is Infallible
-                    }
-                });
-            let io = hyper_util::rt::TokioIo::new(tls_stream);
-            let _ =
-                hyper_util::server::conn::auto::Builder::new(hyper_util::rt::TokioExecutor::new())
-                    .serve_connection_with_upgrades(io, svc)
-                    .await;
-        });
-    }
-}
-
 /// Compose the full management router (also used directly by the handler tests).
 fn app(
    state: Arc<AppState>,
@@ -27,13 +27,14 @@ use punktfunk_core::config::{CompositorPref, FecConfig, FecScheme, GamepadPref,
 use punktfunk_core::input::{InputEvent, InputKind};
 use punktfunk_core::packet::{FLAG_PIC, FLAG_PROBE, FLAG_SOF};
 use punktfunk_core::quic::{
-    endpoint, io, ClockEcho, ClockProbe, Hello, PairChallenge, PairProof, PairRequest, PairResult,
-    ProbeRequest, ProbeResult, Reconfigure, Reconfigured, RequestKeyframe, Start, Welcome,
+    endpoint, io, ClockEcho, ClockProbe, Hello, LossReport, PairChallenge, PairProof, PairRequest,
+    PairResult, ProbeRequest, ProbeResult, Reconfigure, Reconfigured, RequestKeyframe, Start,
+    Welcome,
 };
 use punktfunk_core::transport::UdpTransport;
 use punktfunk_core::Session;
 use rand::RngCore;
-use std::sync::atomic::{AtomicBool, Ordering};
+use std::sync::atomic::{AtomicBool, AtomicU8, Ordering};
 use std::sync::Arc;

 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
@@ -199,7 +200,7 @@ pub(crate) async fn serve(opts: Punktfunk1Options, np: Arc<NativePairing>) -> Re
    // RemoteDesktop-portal grant is established ONCE and reused, instead of a CreateSession per
    // session — which, under rapid client reconnects, raced a prior session's portal teardown and
    // wedged KWin's EIS setup ("EIS setup timed out"). Gamepads stay per-session (uinput).
-    let injector = InjectorService::start();
+    let injector = crate::inject::InjectorService::start();
    // One virtual microphone for the whole host lifetime (see MicService): the client's mic uplink
    // (0xCB) is Opus-decoded and fed into a persistent virtual mic host apps record from (Linux
    // PipeWire Audio/Source; Windows a virtual audio device's render endpoint).
@@ -334,15 +335,41 @@ fn resolve_bitrate_kbps(requested: u32) -> u32 {
    }
 }

-/// FEC recovery percent for the session's Welcome. Default 20% (Sunshine's default too); a clean
-/// wired LAN can lower it (every recovery shard is wire bytes + packets), so `PUNKTFUNK_FEC_PCT`
-/// overrides it — e.g. `0` disables FEC entirely, `10` halves the overhead. Clamped to ≤ 90.
-fn fec_percent_from_env() -> u8 {
+/// Static FEC override: `PUNKTFUNK_FEC_PCT`, when set, PINS the recovery percent and DISABLES
+/// adaptive FEC — so a speed test / measurement keeps a fixed, known overhead. `None` ⇒ adaptive
+/// FEC (the host sizes recovery to the loss the client reports). `0` disables FEC entirely.
+/// Clamped to ≤ 90.
+fn fec_static_override() -> Option<u8> {
    std::env::var("PUNKTFUNK_FEC_PCT")
        .ok()
        .and_then(|s| s.trim().parse::<u8>().ok())
        .map(|p| p.min(90))
-        .unwrap_or(20)
+}
+
+/// Adaptive-FEC band + starting point. Every recovery shard is extra wire bytes AND an extra
+/// packet, so on a clean link FEC decays toward [`FEC_MIN`] (fewer packets — the win for a
+/// packet-rate-bound uplink like the Steam Deck's WiFi tx); loss ramps it toward [`FEC_MAX`].
+/// Sessions start moderate so the first frames (before any loss report) are protected.
+const FEC_MIN: u8 = 1;
+const FEC_MAX: u8 = 50;
+const FEC_ADAPTIVE_START: u8 = 10;
+
+/// Map the client's reported data-plane loss (ppm of shards, see [`LossReport`]) to a recovery
+/// percentage. FEC must EXCEED the loss rate to recover a block, so target ≈ loss × 1.4 + 1 pt of
+/// margin, clamped to the band. A clean link (≈0 ppm) lands on [`FEC_MIN`].
+fn adapt_fec(loss_ppm: u32) -> u8 {
+    let loss_pct = loss_ppm as f64 / 10_000.0; // ppm → percent
+    let target = (loss_pct * 1.4).ceil() as u32 + 1;
+    target.clamp(FEC_MIN as u32, FEC_MAX as u32) as u8
+}
+
+/// Apply the latest adaptive-FEC target to the session if it changed (cheap relaxed load + compare),
+/// called once per frame on the data-plane send path.
+fn apply_fec_target(session: &mut Session, fec_target: &AtomicU8) {
+    let t = fec_target.load(Ordering::Relaxed);
+    if session.fec_percent() != t {
+        session.set_fec_percent(t);
+    }
 }

 /// Persistent audio-capturer slot, reused across sessions (same pattern as the GameStream
@@ -588,7 +615,9 @@ async fn serve_session(
            // The post-GameStream point of punktfunk/1: Leopard GF(2¹⁶) FEC + real encryption.
            fec: FecConfig {
                scheme: FecScheme::Gf16,
-                fec_percent: fec_percent_from_env(),
+                // Static override pins it; otherwise sessions start at the adaptive midpoint and the
+                // host re-sizes FEC live from the client's LossReports (adaptive FEC).
+                fec_percent: fec_static_override().unwrap_or(FEC_ADAPTIVE_START),
                max_data_per_block: 4096,
            },
            // ~1452-byte payload keeps the IP datagram within a 1500 MTU (1452 + 40 header + 24
@@ -644,6 +673,12 @@ async fn serve_session(
    let (probe_tx, probe_rx) = std::sync::mpsc::channel::<ProbeRequest>();
    let (probe_result_tx, mut probe_result_rx) =
        tokio::sync::mpsc::unbounded_channel::<ProbeResult>();
+    // Adaptive FEC: the control task maps each client LossReport to a recovery percent and publishes
+    // it here; the data-plane send loop reads + applies it per frame. Disabled (pinned) when
+    // PUNKTFUNK_FEC_PCT is set. Seeded with the session's starting FEC so it's a no-op until a report.
+    let adaptive_fec = fec_static_override().is_none();
+    let fec_target = Arc::new(AtomicU8::new(welcome.fec.fec_percent));
+    let fec_target_ctl = fec_target.clone();
    tokio::spawn(async move {
        let mut active = hello.mode;
        loop {
@@ -679,6 +714,22 @@ async fn serve_session(
                        if keyframe_tx.send(()).is_err() {
                            break; // data plane gone
                        }
+                    } else if let Ok(rep) = LossReport::decode(&msg) {
+                        // Adaptive FEC: size recovery to the loss the client is seeing. The data-plane
+                        // send loop reads `fec_target_ctl` and applies it per frame. Ignored when FEC
+                        // is pinned via PUNKTFUNK_FEC_PCT.
+                        if adaptive_fec {
+                            let target = adapt_fec(rep.loss_ppm);
+                            let prev = fec_target_ctl.swap(target, Ordering::Relaxed);
+                            if prev != target {
+                                tracing::info!(
+                                    loss_ppm = rep.loss_ppm,
+                                    fec_pct = target,
+                                    prev_fec_pct = prev,
+                                    "adaptive FEC adjusted"
+                                );
+                            }
+                        }
                    } else if let Ok(req) = ProbeRequest::decode(&msg) {
                        tracing::info!(
                            target_kbps = req.target_kbps,
@@ -830,6 +881,7 @@ async fn serve_session(
    let bitrate_kbps = welcome.bitrate_kbps; // resolved encoder bitrate (Hello clamped, or default)
    let bit_depth = welcome.bit_depth; // resolved encode bit depth (8, or 10 when negotiated)
    let stop_stream = stop.clone();
+    let fec_target_dp = fec_target.clone(); // data-plane handle to the adaptive-FEC target
    let result: Result<()> = async {
        tokio::task::spawn_blocking(move || -> Result<()> {
            // Wait briefly for the client to hole-punch our data port, then stream to its OBSERVED
@@ -865,6 +917,7 @@ async fn serve_session(
                    &stop_stream,
                    &probe_rx,
                    &probe_result_tx,
+                    &fec_target_dp,
                ),
                Punktfunk1Source::Virtual => {
                    let compositor = compositor
@@ -881,6 +934,7 @@ async fn serve_session(
                        bit_depth,
                        probe_rx,
                        probe_result_tx,
+                        fec_target_dp,
                    )
                }
            }
@@ -974,103 +1028,10 @@ impl PadState {
 /// actual pad creation at its own MAX_PADS.
 const MAX_WIRE_PADS: usize = 16;

-/// Host-lifetime pointer/keyboard injector, shared across punktfunk/1 sessions.
-///
-/// The injector backend (libei/RemoteDesktop on KWin/GNOME, gamescope's EIS, wlr, uinput) owns
-/// compositor resources and is `!Send`, so — unlike the audio capturer — it can't be handed
-/// between per-session threads through a slot. Instead one host-lifetime thread *owns* it and
-/// injects events forwarded over a clonable `Send` channel. Opening it ONCE means the privileged
-/// RemoteDesktop-portal grant is established once and held for the whole run, eliminating the
-/// per-session `CreateSession` churn that wedged KWin's EIS setup (rapid client reconnects raced
-/// a prior session's portal teardown — "EIS setup timed out"). The service opens lazily on the
-/// first event and reopens, after a backoff, if injection fails — so a transient portal hiccup,
-/// or a gamescope EIS socket that respawns with its nested session, self-heals.
-struct InjectorService {
-    tx: std::sync::mpsc::Sender<InputEvent>,
-}
-
-impl InjectorService {
-    fn start() -> InjectorService {
-        let (tx, rx) = std::sync::mpsc::channel::<InputEvent>();
-        if let Err(e) = std::thread::Builder::new()
-            .name("punktfunk1-injector".into())
-            .spawn(move || injector_service_thread(rx))
-        {
-            tracing::error!(error = %e, "injector service thread spawn failed — pointer/keyboard input disabled");
-        }
-        InjectorService { tx }
-    }
-
-    /// A sender a session forwards its pointer/keyboard events to. Cloned per session; dropping a
-    /// clone does NOT stop the service (the service holds the original sender for the host life).
-    fn sender(&self) -> std::sync::mpsc::Sender<InputEvent> {
-        self.tx.clone()
-    }
-}
-
-/// Backoff between reopen attempts after the injector backend fails to open or its worker dies,
-/// so a persistently-unavailable portal isn't hammered once per event.
+/// Backoff between reopen attempts after a host-lifetime service's backend (the mic source, a
+/// capturer) fails to open or its worker dies, so a persistently-unavailable resource isn't hammered.
 const INJECTOR_REOPEN_BACKOFF: std::time::Duration = std::time::Duration::from_secs(2);

-/// The host-lifetime injector worker: lazily open the pointer/keyboard backend, then inject every
-/// forwarded event into it. Reopen (after [`INJECTOR_REOPEN_BACKOFF`]) on open failure or if the
-/// backend's worker dies mid-stream. Exits only when every session sender *and* the service's own
-/// sender have dropped (host shutdown), which drops the injector and closes its portal session.
-fn injector_service_thread(rx: std::sync::mpsc::Receiver<InputEvent>) {
-    let mut injector: Option<Box<dyn crate::inject::InputInjector>> = None;
-    let mut open_backend: Option<crate::inject::Backend> = None;
-    let mut last_failed: Option<std::time::Instant> = None;
-    for ev in rx {
-        // The resolved input backend (PUNKTFUNK_INPUT_BACKEND, set per connect by apply_input_env,
-        // also on a mid-stream session switch) may have changed since we opened. Reopen against it
-        // so input FOLLOWS the active session instead of injecting into a stale, still-warm backend
-        // (e.g. the managed gamescope's EIS socket after the user switched to the KDE desktop).
-        let want = crate::inject::default_backend();
-        if injector.is_some() && open_backend != Some(want) {
-            tracing::info!(
-                ?open_backend,
-                ?want,
-                "input: backend changed — reopening injector for the active session"
-            );
-            injector = None;
-            last_failed = None; // re-resolve immediately
-        }
-        if injector.is_none() {
-            // Open on the first event; after a failure wait out the backoff before retrying (a
-            // few events drop during setup — acceptable, input is lossy).
-            let ready = last_failed.is_none_or(|t| t.elapsed() >= INJECTOR_REOPEN_BACKOFF);
-            if ready {
-                match crate::inject::open(want) {
-                    Ok(i) => {
-                        tracing::info!(
-                            backend = ?want,
-                            "punktfunk/1 input injector ready (host-lifetime)"
-                        );
-                        injector = Some(i);
-                        open_backend = Some(want);
-                        last_failed = None;
-                    }
-                    Err(e) => {
-                        tracing::error!(error = %format!("{e:#}"), "pointer/keyboard injection unavailable — will retry");
-                        last_failed = Some(std::time::Instant::now());
-                    }
-                }
-            }
-        }
-        if let Some(inj) = injector.as_mut() {
-            if let Err(e) = inj.inject(&ev) {
-                // The backend's worker (portal session / EIS socket) died — drop it and reopen on
-                // a later event (covers a gamescope EIS socket that respawns with its session).
-                tracing::warn!(error = %format!("{e:#}"), "inject failed — reopening injector");
-                injector = None;
-                open_backend = None;
-                last_failed = Some(std::time::Instant::now());
-            }
-        }
-    }
-    tracing::debug!("injector service stopped (host shutting down)");
-}
-
 /// Mic is 48 kHz stereo — matches the Opus stereo decoder and the host→client audio layout.
 const MIC_CHANNELS: u32 = 2;

@@ -1498,12 +1459,14 @@ fn synthetic_stream(
    stop: &AtomicBool,
    probe_rx: &std::sync::mpsc::Receiver<ProbeRequest>,
    probe_result_tx: &tokio::sync::mpsc::UnboundedSender<ProbeResult>,
+    fec_target: &AtomicU8,
 ) -> Result<()> {
    let interval = std::time::Duration::from_millis(1000 / 60);
    for idx in 0..frames {
        if stop.load(Ordering::SeqCst) {
            break;
        }
+        apply_fec_target(session, fec_target);
        // Service speed-test probes between synthetic frames (loopback bandwidth tests).
        service_probes(session, stop, probe_rx, probe_result_tx);
        let data = test_frame(idx, 64 * 1024);
@@ -1667,26 +1630,35 @@ fn run_probe_burst(session: &mut Session, req: ProbeRequest, stop: &AtomicBool)
            bytes_sent: 0,
            packets_sent: 0,
            duration_ms: 0,
+            wire_packets_sent: 0,
+            send_dropped: 0,
        };
    }
    // kbps -> bytes/s (x1000/8).
    let bytes_per_sec = target_kbps as u64 * 125;
-    // ~240 AUs/s for smooth pacing, each capped so one submit_frame stays a bounded burst (a large
-    // AU fragments into many UDP shards via sendmmsg).
-    let chunk = (bytes_per_sec / 240).clamp(1200, 256 * 1024) as usize;
+    // Keep each AU a SMALL burst (~16 KB ≈ a dozen MTU shards) and let the byte budget below pace
+    // the rate finely. The old 256 KB cap blasted ~200 packets into the send buffer per submit, so
+    // a small buffer (e.g. the Deck's 416 KB) overflowed on a single AU and the test measured
+    // self-inflicted buffer overflow instead of the link — mirror how `paced_submit` spreads the
+    // real video path's frames so the probe stresses the same way a real stream does.
+    let chunk = (bytes_per_sec / 240).clamp(1200, 16 * 1024) as usize;
    let filler = vec![0u8; chunk];
-    // Host send-buffer drops over the burst — at high target rates this is where the native
-    // single-send()-per-packet path first loses, so report it alongside what we offered.
-    let send_dropped0 = session.stats().packets_send_dropped;
+    // Wire-packet accounting via session-stat deltas: `packets_sent` counts every sealed wire packet
+    // (seal_frame), `packets_send_dropped` every one the send buffer rejected (WouldBlock/ENOBUFS).
+    // Their delta over the burst is exact — and isolates host-side drops from link loss for the
+    // client. Video is paused for the burst (the data-plane loop is blocked here), so these deltas
+    // are pure probe traffic.
+    let wire0 = session.stats().packets_sent;
+    let drop0 = session.stats().packets_send_dropped;
    let start = std::time::Instant::now();
    let deadline = start + std::time::Duration::from_millis(duration_ms as u64);
    let mut bytes_sent = 0u64;
-    let mut packets_sent = 0u32;
+    let mut packets_sent = 0u32; // probe access-unit count (goodput chunks)
    while std::time::Instant::now() < deadline && !stop.load(Ordering::SeqCst) {
        let allowed = (start.elapsed().as_secs_f64() * bytes_per_sec as f64) as u64;
        if bytes_sent < allowed {
-            // A full send buffer drops on WouldBlock (UdpTransport returns Ok) — that loss is part
-            // of what the probe measures, so count what we offered and keep going.
+            // A full send buffer drops on WouldBlock/ENOBUFS (UdpTransport returns Ok) — that loss is
+            // part of what the probe measures (it surfaces as send_dropped), so keep going.
            let _ = session.submit_frame(&filler, now_ns(), FLAG_PROBE as u32);
            bytes_sent += chunk as u64;
            packets_sent += 1;
@@ -1695,12 +1667,16 @@ fn run_probe_burst(session: &mut Session, req: ProbeRequest, stop: &AtomicBool)
        }
    }
    let actual_ms = start.elapsed().as_millis() as u32;
-    let send_dropped = session.stats().packets_send_dropped - send_dropped0;
+    let wire_offered = (session.stats().packets_sent - wire0) as u32;
+    let send_dropped = (session.stats().packets_send_dropped - drop0) as u32;
+    let wire_packets_sent = wire_offered.saturating_sub(send_dropped);
    tracing::info!(
        target_kbps,
        duration_ms = actual_ms,
        bytes_sent,
-        packets_sent,
+        au_count = packets_sent,
+        wire_offered,
+        wire_packets_sent,
        send_dropped,
        "speed-test probe burst complete"
    );
@@ -1708,6 +1684,8 @@ fn run_probe_burst(session: &mut Session, req: ProbeRequest, stop: &AtomicBool)
        bytes_sent,
        packets_sent,
        duration_ms: actual_ms,
+        wire_packets_sent,
+        send_dropped,
    }
 }

@@ -1891,6 +1869,7 @@ pub(crate) fn boost_thread_priority(critical: bool) {
    }
 }

+#[allow(clippy::too_many_arguments)]
 fn send_loop(
    mut session: Session,
    frame_rx: std::sync::mpsc::Receiver<FrameMsg>,
@@ -1899,6 +1878,7 @@ fn send_loop(
    stop: Arc<AtomicBool>,
    perf: bool,
    burst_cap: usize,
+    fec_target: Arc<AtomicU8>,
 ) {
    boost_thread_priority(false); // transmit thread: above-normal (Apollo's encoder-thread level)
    let mut last_perf = std::time::Instant::now();
@@ -1914,6 +1894,8 @@ fn send_loop(
        // Probes run here (they need the Session); a burst pauses video — the encode thread blocks
        // on the full frame channel meanwhile, which is exactly the intended pause.
        service_probes(&mut session, &stop, &probe_rx, &probe_result_tx);
+        // Adaptive FEC: pick up any new recovery target the control task set from client LossReports.
+        apply_fec_target(&mut session, &fec_target);
        // Short timeout so we keep re-checking `stop` + probes when no frames are flowing.
        match frame_rx.recv_timeout(std::time::Duration::from_millis(50)) {
            Ok(msg) => match paced_submit(
@@ -2058,6 +2040,7 @@ fn virtual_stream(
    bit_depth: u8,
    probe_rx: std::sync::mpsc::Receiver<ProbeRequest>,
    probe_result_tx: tokio::sync::mpsc::UnboundedSender<ProbeResult>,
+    fec_target: Arc<AtomicU8>,
 ) -> Result<()> {
    // This thread runs the capture+encode loop (single-process: Linux / synthetic / NO_WGC DDA) — or
    // tail-calls the relay below. Elevate it so a CPU-heavy game can't deschedule our GPU submission.
@@ -2080,6 +2063,7 @@ fn virtual_stream(
            bit_depth,
            probe_rx,
            probe_result_tx,
+            fec_target,
        );
    }
    tracing::info!(
@@ -2134,6 +2118,7 @@ fn virtual_stream(
                    stop,
                    perf,
                    burst_cap,
+                    fec_target,
                )
            }
        })
@@ -2382,6 +2367,7 @@ fn virtual_stream_relay(
    bit_depth: u8,
    probe_rx: std::sync::mpsc::Receiver<ProbeRequest>,
    probe_result_tx: tokio::sync::mpsc::UnboundedSender<ProbeResult>,
+    fec_target: Arc<AtomicU8>,
 ) -> Result<()> {
    use crate::capture::dxgi::WinCaptureTarget;
    use crate::capture::wgc_relay::HelperRelay;
@@ -2507,6 +2493,7 @@ fn virtual_stream_relay(
                    stop,
                    perf,
                    burst_cap,
+                    fec_target,
                )
            }
        })
@@ -2904,6 +2891,20 @@ fn build_pipeline(
 mod tests {
    use super::*;

+    #[test]
+    fn adapt_fec_maps_loss_to_recovery_band() {
+        // A perfectly clean window (0 loss) lands on the floor.
+        assert_eq!(adapt_fec(0), FEC_MIN);
+        // Any nonzero loss rounds up past the floor (ceil) — tiny but never below the cushion.
+        assert_eq!(adapt_fec(1), 2);
+        // FEC exceeds the loss it covers (×1.4 + 1pt headroom).
+        assert_eq!(adapt_fec(50_000), 8); // 5% loss → ceil(7)+1 = 8
+        assert_eq!(adapt_fec(100_000), 15); // 10% → ceil(14)+1 = 15
+                                            // Heavy loss saturates at the ceiling, never beyond.
+        assert_eq!(adapt_fec(1_000_000), FEC_MAX); // 100% → clamped
+        assert!(adapt_fec(u32::MAX) <= FEC_MAX);
+    }
+
    #[test]
    fn compositor_resolution_precedence() {
        use crate::vdisplay::Compositor::*;
@@ -14,7 +14,8 @@ On **Windows** (NVIDIA), the host ships as a signed installer instead — see [W
 |--------|-----------------|------------------------|-------|
 | **Ubuntu / Debian** | apt | `sudo apt install punktfunk-host` | [Ubuntu — GNOME](/docs/ubuntu-gnome) · [Ubuntu — KDE](/docs/ubuntu-kde) · [packaging/debian](https://git.unom.io/unom/punktfunk/src/branch/main/packaging/debian/README.md) |
 | **Fedora / Bazzite** | rpm-ostree | `rpm-ostree install punktfunk punktfunk-web` | [Fedora — KDE](/docs/fedora-kde) · [Bazzite](/docs/bazzite) · [packaging/rpm](https://git.unom.io/unom/punktfunk/src/branch/main/packaging/rpm/README.md) |
-| **Arch / Steam Deck** | PKGBUILD / sysext | `makepkg -si` (Arch) · sysext `.raw` (SteamOS/Deck) | [packaging/arch](https://git.unom.io/unom/punktfunk/src/branch/main/packaging/arch/README.md) |
+| **Arch** | PKGBUILD | `makepkg -si` | [packaging/arch](https://git.unom.io/unom/punktfunk/src/branch/main/packaging/arch/README.md) |
+| **Steam Deck (host)** | on-device script | `bash scripts/steamdeck/install.sh` | [Steam Deck (Host)](/docs/steam-deck-host) |

 Each registry is public — no auth, you just trust the repo's signing key. Adding the repo is a
 one-time step covered in the linked guide; after that, normal `apt upgrade` / `rpm-ostree upgrade`
@@ -11,6 +11,7 @@
    "ubuntu-kde",
    "fedora-kde",
    "bazzite",
+    "steam-deck-host",
    "windows-host",
    "running-as-a-service",
    "---Connecting---",
@@ -0,0 +1,108 @@
+---
+title: "Steam Deck (Host)"
+description: "Run a punktfunk host on a Steam Deck — stream its Game Mode (or desktop) to your other devices. One script, built on-device for SteamOS."
+---
+
+This is for using a **Steam Deck as the host** — streaming *from* it to a laptop, TV, phone, or
+another Deck. (For the usual case — streaming *to* a Deck — see [Install a Client](/docs/install-client),
+which uses the Flatpak + Decky plugin.)
+
+SteamOS is an immutable, read-only Arch base, so the host isn't a system package. Instead a single
+script builds the host **natively inside a Debian-trixie distrobox** (ABI-matched to SteamOS's
+FFmpeg/glibc — the binary then runs natively on SteamOS) and wires it up as systemd user services.
+Building on-device means a rebuild always matches the running OS, so a SteamOS update can't leave you
+with a binary linked against the wrong libraries. Encode is **VAAPI** on the Deck's AMD GPU
+(auto-detected; NVENC on NVIDIA).
+
+> **Heads up:** the Deck's WiFi *tx* tops out around ~250 Mbps of goodput regardless of band (it's a
+> hardware/driver packet-rate limit, not bandwidth) — plenty for 1080p/1440p60, not 4K. A wired dock
+> lifts that. See [Configuration](/docs/configuration) for bitrate guidance.
+
+## Prerequisites
+
+- A Steam Deck on **SteamOS 3** (LCD or OLED). Steady WiFi or, better, a wired dock.
+- **distrobox** installed (no root needed). If `distrobox` isn't found:
+  ```sh
+  curl -sfL https://raw.githubusercontent.com/89luca89/distrobox/main/install | sh -s -- --prefix ~/.local
+  ```
+  Make sure `~/.local/bin` is on your `PATH` (re-open the terminal).
+- The first build downloads a container image + toolchain (~1 GB) and takes ~10–15 minutes. Later
+  rebuilds are incremental.
+
+## 1. Get the source
+
+In Desktop Mode open **Konsole** (or ssh in), then:
+
+```sh
+git clone https://git.unom.io/unom/punktfunk ~/punktfunk
+```
+
+## 2. Run the installer
+
+```sh
+bash ~/punktfunk/scripts/steamdeck/install.sh
+```
+
+It is idempotent — safe to re-run. In one pass it:
+
+1. creates the `pf2` Debian-trixie distrobox and installs the build toolchain,
+2. builds `punktfunk-host` (and the web console),
+3. writes config to `~/.config/punktfunk/` (a generated web-console login password),
+4. raises the UDP socket buffers to 32 MB and adds you to the `input` group (needs `sudo`; skipped
+   with a warning if unavailable),
+5. installs + starts the `punktfunk-host` and `punktfunk-web` **systemd user services** (with linger,
+   so they run without a login session).
+
+Useful flags:
+
+| Flag | Effect |
+|------|--------|
+| `--open` | Accept **unpaired** clients (trust-on-first-use) — convenient on a fully trusted LAN. Default is PIN pairing required. |
+| `--no-web` | Skip the management web console. |
+| `--src=DIR` | Build from source at `DIR` instead of `~/punktfunk`. |
+
+When it finishes it prints the web-console URL and how to pair.
+
+## 3. Pair a device
+
+By default the host **requires PIN pairing** (secure). Two ways to pair:
+
+- **Web console** (printed at the end of step 2): open `http://<deck-ip>:3000`, log in with the
+  generated password (in `~/.config/punktfunk/web.env`), go to **Devices → arm pairing**, and enter
+  the PIN on your client.
+- **From the client directly**: pick this Deck (it advertises over mDNS as `_punktfunk._udp`) and
+  enter the PIN the host shows.
+
+On a trusted home LAN you can instead install with `--open` and skip pairing entirely.
+
+## 4. Verify
+
+```sh
+systemctl --user status punktfunk-host          # active (running)
+journalctl --user -u punktfunk-host -f          # watch a client connect
+```
+
+Connect from any client ([Moonlight](/docs/moonlight) or a [native client](/docs/clients)). In Game
+Mode the host attaches to the running gamescope session and streams it at your client's resolution; in
+Desktop Mode it streams the KDE desktop. The host auto-detects which session is live per connection.
+
+## Updating
+
+After pulling new source, rebuild and restart in one step (config + pairings persist):
+
+```sh
+git -C ~/punktfunk pull          # or rsync new source in
+bash ~/punktfunk/scripts/steamdeck/update.sh
+```
+
+## Notes & limits
+
+- **Single session at a time** at custom resolutions — two clients requesting different modes will
+  thrash the managed session. Pick one mode per session.
+- **Keep the Deck awake.** Game Mode auto-suspends on idle, which drops the host off the network mid
+  stream — disable auto-suspend (Settings → Power) for a headless host.
+- **It survives OS updates**, but a major SteamOS bump can move library versions; if the host fails to
+  start after an update, just re-run `update.sh` to rebuild against the new base.
+- Deeper reference (services, container, manual steps): [`scripts/steamdeck/README.md`](https://git.unom.io/unom/punktfunk/src/branch/main/scripts/steamdeck/README.md).
+
+Trouble? See [Troubleshooting](/docs/troubleshooting) and [Pairing](/docs/pairing).
@@ -1601,7 +1601,70 @@ adversarial-verify pass. *Area* is the investigation that surfaced it.
 >   re-copying the desktop and recompositing the cursor at its new position. `last_present` is repeated
 >   only on a genuine `WAIT_TIMEOUT` (nothing changed) or a rebuild gap — correct. No stutter from this
 >   cause. The only real (perf-only) delta is the redundant full-surface copy per pointer update; deferred.
+> - **2026-06-20 — re-verified the whole backlog against current code + landed the security & RFI
+>   chain.** A full re-verification (one agent per subsystem, checked against the live tree rather than
+>   this snapshot) found **22 of 96 items already done or obsolete since 2026-06-16** — the table below
+>   is the ORIGINAL snapshot and its blank ✓V cells do NOT reflect that; see **Re-verified status
+>   (2026-06-20)** immediately below for the authoritative current state.

+### Re-verified status (2026-06-20)
+
+The table further down is the 2026-06-16 snapshot. Re-verifying each item against the current tree
+(which shipped the in-binary Windows service, two-process secure desktop, DDA born-lost fixes, VAAPI
+host, adaptive FEC, etc. in between) gives the current state:
+
+**Done since the snapshot** (gap closed in current code — do not re-do): #1, #2, #4, #13, #16, #20,
+#21, #24, #25, #35, #37, #42, #47, #49, #55, #57, #64, #87.
+
+**Obsolete / not-a-bug** (premise no longer applies to punktfunk): #34 (idle dup-lock release), #53
+(NvEnc struct-version minimization — handled by the SDK crate), #90 (bitrate-derived pacing —
+Apollo paces to a fixed link ceiling, not negotiated bitrate, and punktfunk is pixel-rate-bound by
+design), #95 (expired-cert tolerance — n/a to the trust model).
+
+**Landed this pass (2026-06-20, working tree):**
+- **#5 + #92 + #26 — GameStream paired-cert allow-list + atomic store.** `gamestream/tls.rs` now
+  surfaces the verified peer cert to handlers (`serve_https` + `PeerCertFingerprint`, shared with the
+  mgmt API instead of duplicated); `nvhttp.rs` gates `/launch`/`/resume`/`/applist`/`/cancel` on the
+  `AppState.paired` fingerprint set and reports a real `PairStatus`; `mod.rs::save_paired` writes
+  atomically (temp + rename). Regression test `nvhttp::tests::launch_gate_requires_a_pinned_client_cert`.
+  Compiled + clippy-clean + tested on Linux. (Closes the "GameStream TLS accepts any client cert" hole.)
+- **#6 + #51 — NVENC capability query.** `encode/nvenc.rs::query_caps` probes `nvEncGetEncodeCaps`
+  (WIDTH/HEIGHT_MAX, 10-bit, custom-VBV, ref-pic-invalidation) once before configuring: rejects an
+  over-range mode with a clear error (instead of an opaque InvalidParam the bitrate-clamp search
+  misreads), downgrades 10-bit→8-bit when unsupported, gates custom VBV, and records the RFI flag.
+  Windows-only — adversarially reviewed against the SDK source (verdict SHIP); compile pending the RTX
+  box / Windows CI.
+- **#19 + #22 — reference-frame invalidation instead of always-IDR.** New
+  `Encoder::invalidate_ref_frames(first, last) -> bool` (default `false` → caller keyframes; only the
+  Windows NVENC path implements real RFI: a multi-ref DPB gated on caps + `nvEncInvalidateRefFrames`
+  with dedup + IDR-on-overflow). The GameStream control plane decodes the `0x0301` lost-frame range
+  (two LE i64, Apollo's `IDX_INVALIDATE_REF_FRAMES`) and routes it via `AppState.rfi_range` to the
+  encode loop, which prefers invalidation and falls back to a keyframe. Cross-platform wiring compiled
+  + tested on Linux (where it degrades to IDR — libavcodec/VAAPI can't express RFI); the NVENC
+  implementation is RTX-box/CI-pending. (Native punktfunk/1 RFI sites stay `request_keyframe` — the
+  protocol carries no frame range yet; the trait default keeps that correct.)
+- **#43 + #72 — media socket QoS + buffer growth.** New `punktfunk_core::transport::qos`:
+  `grow_socket_buffers` (the native plane's `SO_SNDBUF`/`SO_RCVBUF`=32 MB growth, factored out so the
+  GameStream sockets reuse it — kills host-side ENOBUFS at high bitrate) and `set_media_qos`
+  (opt-in `PUNKTFUNK_DSCP=1`: DSCP CS5 video / CS6 audio via `IP_TOS` + Linux `SO_PRIORITY` 5/6,
+  Apollo's scheme). Wired into the native `UdpTransport::connect`/`connect_via_punch` and the
+  GameStream video/audio sockets. Cross-platform; Linux readback test asserts `tos_v4()==0xA0` +
+  `priority()==5`. Windows note: plain `IP_TOS` is a no-op on the wire without a qWAVE policy (the
+  qWAVE port is the documented follow-up).
+- **#8 + #45 — GameStream input injection off the ENet service thread (+ coalescing).** `on_receive`
+  no longer injects inline (a slow Wayland/libei/SendInput call head-blocked ENet keepalive/retransmit);
+  it forwards decoded keyboard/mouse to a dedicated injector thread. The native plane's hardened
+  `InjectorService` (lazy open + backend-change reopen + failure backoff) was **moved from punktfunk1
+  into `crate::inject`** so both planes share one impl, and given a `coalesce` step (#45) that sums
+  adjacent relative-mouse + same-axis scroll deltas while preserving button/key/abs ordering — so a
+  slow backend never builds a backlog of stale motion. Cross-platform; unit-tested (`coalesce`) +
+  full native-plane regression suite green.
+
+**Still open / partial:** the remaining ~71 items (table rows not listed above). Highest-value next
+steps from this re-verification: **#23 / #89** (Windows DS4/DualSense ViGEm target, honoring the
+negotiated pad type), **#9** (actually launch the app on Windows via `CreateProcessAsUserW`), **#7 /
+#18** (WASAPI default-device-change + device-invalidated recovery), **#43 / #72** (media QoS/DSCP +
+GameStream `SO_SNDBUF`), **#8** (move GameStream input injection off the ENet service thread).

 | # | Improvement | Area | Win | Sev | Eff | ✓V |
 |---|---|---|---|---|---|---|
@@ -1609,10 +1672,10 @@ adversarial-verify pass. *Area* is the investigation that surfaced it.
 | 2 | Detect resolution/format change on the acquire hot path, not only during rebuild | win:capture-dxgi-dd | Y | high | small |  |
 | 3 | Per-frame IsCurrent() check to catch HDR/GPU/mode changes | win:capture-wgc | Y | high | small |  |
 | 4 | ✅ **DONE** — Batched/GSO send for the GameStream video plane on Windows | cmp:protocol-streaming | Y | high | medium | ✓ |
-| 5 | Gate the GameStream HTTPS plane on the paired-cert allow-list | cmp:gamestream-http-pairing | Y | high | medium |  |
-| 6 | Query NVENC encode capabilities before init and degrade gracefully | cmp:video-encode | Y | high | medium |  |
+| 5 | ✅ **DONE** — Gate the GameStream HTTPS plane on the paired-cert allow-list | cmp:gamestream-http-pairing | Y | high | medium |  |
+| 6 | ✅ **DONE** (CI-pending) — Query NVENC encode capabilities before init and degrade gracefully | cmp:video-encode | Y | high | medium |  |
 | 7 | Detect default-render-device changes and reinit WASAPI capture | cmp:audio | Y | high | medium |  |
-| 8 | Move GameStream input injection off the ENet service thread | cmp:input | Y | high | medium |  |
+| 8 | ✅ **DONE** — Move GameStream input injection off the ENet service thread | cmp:input | Y | high | medium |  |
 | 9 | Actually launch the app/game on Windows (CreateProcessAsUserW into the user session) | cmp:process-launch | Y | high | medium |  |
 | 10 | Native system tray with state-driven icon + notifications | cmp:config-management | Y | high | medium |  |
 | 11 | Treat S_OK-with-no-change frames as timeouts via DXGI update flags | win:capture-dxgi-dd | Y | high | medium |  |
@@ -1623,14 +1686,14 @@ adversarial-verify pass. *Area* is the investigation that surfaced it.
 | 16 | Add SET_RENDER_ADAPTER (IOCTL 0x802) to bind the IDD render GPU to the capture/encode GPU | win:virtual-display-sudovda | Y | high | medium |  |
 | 17 | Add streaming_will_start/stop session-level latency tuning on Windows | win:critic | Y | high | medium |  |
 | 18 | Recover WASAPI loopback from default-device change and AUDCLNT_E_DEVICE_INVALIDATED | win:critic | Y | high | medium |  |
-| 19 | Implement true reference-frame invalidation with a multi-ref DPB instead of always-full-IDR | cmp:video-encode | Y | high | large |  |
+| 19 | ✅ **DONE** (CI-pending) — Implement true reference-frame invalidation with a multi-ref DPB instead of always-full-IDR | cmp:video-encode | Y | high | large |  |
 | 20 | In-binary Windows service install + interactive-session launch | cmp:config-management | Y | high | large |  |
 | 21 | ⊘ **ALREADY-HANDLED** — Composite the moved cursor onto a clean copy even when DDA returns no new desktop frame | win:cursor-compositing | Y | high | large |  |
-| 22 | Add real reference-frame invalidation (RFI) instead of always forcing IDR | win:nvenc-d3d11 | Y | high | large |  |
+| 22 | ✅ **DONE** (CI-pending) — Add real reference-frame invalidation (RFI) instead of always forcing IDR | win:nvenc-d3d11 | Y | high | large |  |
 | 23 | Add a DS4 (DualShock4) ViGEm target on Windows with type auto-selection, motion, touchpad, battery and timestamp pump | win:input-sendinput-vigem | Y | high | large |  |
 | 24 | Replace the PsExec scheduled-task launch with a real Windows service that relaunches the host on session change | win:system-secure-desktop | Y | high | large |  |
 | 25 | Elevate capture/encode/send thread priority on the host hot path | cmp:protocol-streaming | Y | medium | small | ✓ |
-| 26 | Atomic temp+rename persistence for the GameStream paired store | cmp:gamestream-http-pairing | Y | medium | small |  |
+| 26 | ✅ **DONE** — Atomic temp+rename persistence for the GameStream paired store | cmp:gamestream-http-pairing | Y | medium | small |  |
 | 27 | Always emit explicit SDR color VUI (primaries/transfer/matrix/range), not just HDR | cmp:video-encode | Y | medium | small |  |
 | 28 | Set repeatSPSPPS=1 and wire slicesPerFrame for the Windows NVENC config | cmp:video-encode | Y | medium | small |  |
 | 29 | Raise the WASAPI capture thread to MMCSS Pro Audio priority | cmp:audio | Y | medium | small |  |
@@ -1647,15 +1710,15 @@ adversarial-verify pass. *Area* is the investigation that surfaced it.
 | 40 | Gate on SudoVDA protocol-version compatibility instead of only logging it | win:virtual-display-sudovda | Y | medium | small |  |
 | 41 | Retry device open with exponential backoff | win:virtual-display-sudovda | Y | medium | small |  |
 | 42 | Add per-frame IDXGIFactory::IsCurrent reinit detection and switch the host clock to GetSystemTimePreciseAsFileTime | win:system-secure-desktop | Y | medium | small |  |
-| 43 | Socket QoS / DSCP marking on the media sockets | cmp:protocol-streaming | Y | medium | medium | ✓ |
+| 43 | ✅ **DONE** — Socket QoS / DSCP marking on the media sockets | cmp:protocol-streaming | Y | medium | medium | ✓ |
 | 44 | Plumb HDR10 static metadata (mastering display + MaxCLL/MaxFALL) | cmp:video-encode | Y | medium | medium |  |
-| 45 | Coalesce relative-mouse/scroll/controller spam before injection | cmp:input | Y | medium | medium |  |
+| 45 | ✅ **DONE** (mouse/scroll) — Coalesce relative-mouse/scroll/controller spam before injection | cmp:input | Y | medium | medium |  |
 | 46 | Display-config apply/revert with a retry scheduler and guaranteed revert on disconnect | cmp:process-launch | Y | medium | medium |  |
 | 47 | Harden GPU scheduling priority + SetMaximumFrameLatency + NVIDIA-HAGS NVENC-realtime avoidance | win:capture-dxgi-dd | Y | medium | medium |  |
 | 48 | Use SystemRelativeTime (QPC) as the frame timestamp | win:capture-wgc | Y | medium | medium |  |
 | 49 | Stop baking the cursor destructively into the repeated gpu_copy texture | win:cursor-compositing | Y | medium | medium |  |
 | 50 | Gate HDR on (client requested HDR) AND (desktop is actually HDR), and signal the result in Welcome | win:hdr-colorspace | Y | medium | medium |  |
-| 51 | Query nvEncGetEncodeCaps and gate config on real GPU capabilities | win:nvenc-d3d11 | Y | medium | medium |  |
+| 51 | ✅ **DONE** (CI-pending) — Query nvEncGetEncodeCaps and gate config on real GPU capabilities | win:nvenc-d3d11 | Y | medium | medium |  |
 | 52 | Use async encode with a Win32 completion event + timeout | win:nvenc-d3d11 | Y | medium | medium |  |
 | 53 | Minimize NvEnc API/struct versions per codec for older-driver compatibility | win:nvenc-d3d11 | Y | medium | medium |  |
 | 54 | Use a canonical US-English VK→scancode table for normalized keys, and fall back to VK when no scancode maps | win:input-sendinput-vigem | Y | medium | medium |  |
@@ -1676,7 +1739,7 @@ adversarial-verify pass. *Area* is the investigation that surfaced it.
 | 69 | Convert to P010 in a D3D11 shader and feed NVENC YUV instead of ABGR10 RGB | win:hdr-colorspace | Y | medium | large |  |
 | 70 | Add an NvAPI driver-settings manager (PREFERRED_PSTATE_MAX + OGL_CPL_PREFER_DXPRESENT) with a crash-safe undo file | win:system-secure-desktop | Y | medium | large |  |
 | 71 | Install/select a virtual audio sink so a headless Windows host has audio with no physical device | win:critic | Y | medium | large |  |
-| 72 | Grow SO_SNDBUF on the GameStream video/audio sockets | cmp:protocol-streaming | Y | low | small |  |
+| 72 | ✅ **DONE** — Grow SO_SNDBUF on the GameStream video/audio sockets | cmp:protocol-streaming | Y | low | small |  |
 | 73 | Decode NVENCSTATUS into readable names and detect InvalidParam structurally | cmp:video-encode | Y | low | small |  |
 | 74 | Surface WASAPI data-discontinuity as a glitch diagnostic | cmp:audio | Y | low | small |  |
 | 75 | Inject per-app launch env (client res/fps/HDR/audio + status) for launch scripts | cmp:process-launch | Y | low | small |  |
@@ -1696,7 +1759,7 @@ adversarial-verify pass. *Area* is the investigation that surfaced it.
 | 89 | Support DualSense/DS4 ViGEm target + feedback on Windows, honoring negotiated pad type | win:critic | Y | low | large |  |
 | 90 | Bitrate-derived rate-control pacing (vs frame-interval-only) | cmp:protocol-streaming |  | medium | medium | ✓ |
 | 91 | Named, permissioned paired-device records for the GameStream store | cmp:gamestream-http-pairing |  | medium | medium |  |
-| 92 | Actually reject unpaired GameStream client certs (close the unpair gap) | cmp:config-management |  | medium | medium |  |
+| 92 | ✅ **DONE** — Actually reject unpaired GameStream client certs (close the unpair gap) | cmp:config-management |  | medium | medium |  |
 | 93 | Persisted host config + read/write config API endpoint | cmp:config-management |  | medium | large |  |
 | 94 | Consume the GameStream client loss-stats report | cmp:protocol-streaming |  | low | small | ✓ |
 | 95 | Tolerate not-yet-valid/expired client certs during verification | cmp:gamestream-http-pairing |  | low | small |  |
@@ -0,0 +1,134 @@
+# Windows host — virtual DualSense scoping
+
+**Status:** scoping (2026-06-20). Decision pending the web-research pass (see *Open questions* — web
+search was unavailable when this was written, so the VHF API/signing specifics and the
+"existing-driver-to-vendor" survey are marked TO-CONFIRM).
+
+## TL;DR
+
+Apollo's backlog item #23/#89 ("DS4 ViGEm target on Windows") is the **wrong target** if the goal is
+*actual DualSense*. ViGEmBus emulates only **Xbox 360 (XUSB)** and **DualShock 4 (DS4)** — never a
+DualSense. Because this is a *host-side* virtual pad, the DualSense-defining features (adaptive
+triggers, the fine haptic actuators, DS5 identity) can only work end-to-end if the **game sees a real
+DualSense** and therefore drives them; a DS4 virtual pad means the game uses its DS4 code path and
+never emits those commands, so the client's adaptive-trigger rendering is never exercised. ViGEm DS4
+structurally **cannot** deliver adaptive triggers.
+
+The right path is the Windows analog of what the Linux host already does: present a **real virtual
+DualSense HID device** (Sony VID `054C` / PID `0CE6`, the inputtino PS5 report descriptor). On Windows
+that means a kernel-mode virtual-HID device via the **Virtual HID Framework (VHF)** — the UHID analog —
+which is a SudoVDA-class driver effort (vendored + signed, installed by the existing Inno installer).
+
+## Why this is the wrong place to copy Apollo
+
+Apollo (and all of Sunshine's lineage) **does DualSense only on Linux** (`inputtino`,
+`DualSenseWired`). Its Windows input path (`src/platform/windows/input.cpp`) is ViGEm
+`XUSB_REPORT` + `DS4_REPORT_EX` only — `MPS2_TO_DS4_ACCEL` motion conversion, inverse-ViGEmBus gyro
+calibration, DS4 touchpad packing. There is **zero** VHF / virtual-HID / DualSense code on Apollo's
+Windows side. So:
+
+- Copying Apollo on Windows gets us a **DS4**, with the adaptive-trigger ceiling baked in.
+- There is **no in-ecosystem upstream** (Sunshine/Apollo/Wolf) that already solved virtual DualSense
+  on Windows to vendor from. This would be novel work for the streaming-host space.
+
+## The parity target — and what's *already* done
+
+The Linux host (`crates/punktfunk-host/src/inject/dualsense.rs`) creates a **UHID** device presenting
+the genuine DualSense descriptor, so the kernel `hid-playstation` driver binds it and games see a real
+DualSense — gamepad + motion + touchpad + lightbar/player-LEDs + adaptive triggers. It writes HID
+**input** report `0x01` (controller state) and reads HID **output** report `0x02` (the game's
+rumble/LED/trigger feedback), which it forwards to the client as `punktfunk_core::quic::HidOutput`.
+
+Crucially, **everything except the host backend is already platform-agnostic and DualSense-complete:**
+
+| Layer | State | Where |
+|---|---|---|
+| Protocol planes (rich input `0xCC`, rumble `0xCA`, HID-output `0xCD`) | done | `punktfunk_core::quic` |
+| Feedback abstraction (`HidOutput::{Led,PlayerLeds,Trigger,…}`) | done | `punktfunk_core::quic` |
+| Pad-type negotiation (client pref > env > default), `GamepadPref::DualSense` | done | `punktfunk1.rs::resolve_gamepad` |
+| Backend dispatch (`enum PadBackend`) | done; `DualSense` arm is `#[cfg(target_os="linux")]` | `punktfunk1.rs:1229` |
+| Clients (capture + adaptive-trigger/lightbar/haptic rendering) | done, all platforms | `clients/*` |
+| C-ABI (`next_hidout` / `send_rich_input`) | done | `abi.rs` |
+| **Host virtual-DualSense backend** | **Linux only (UHID)** | `inject/dualsense.rs` |
+
+So a Windows DualSense backend needs **no protocol, client, or C-ABI change**. It must only: create a
+virtual DualSense HID device, translate our pad state → HID input report `0x01`, and surface the game's
+HID output report `0x02` as the same `HidOutput` events the Linux path already emits. That is a
+well-bounded host-side addition (driver + a `DualSenseManager`-shaped userspace bridge + a
+`PadBackend::DualSense` Windows arm).
+
+## The Windows mechanism — VHF (primary candidate)
+
+Windows has **no userspace HID-device creation** (unlike Linux UHID), so a real virtual DualSense
+requires a kernel component. The Microsoft-sanctioned one is the **Virtual HID Framework (VHF)**: a
+small KMDF driver creates a virtual HID device from an arbitrary report descriptor, submits **input**
+reports to the OS, and receives **output/feature** reports written by applications (our feedback hook).
+This is the structural twin of `/dev/uhid`.
+
+Sketch of the integration (TO-CONFIRM details in *Open questions*):
+
+```
+host process (Rust)  <--IOCTL/named-pipe-->  punktfunk-ds5.sys (KMDF + VHF)  <--HID-->  game / Steam / GameInput
+  PadState  ----------- input report 0x01 ----------->  VhfReadReportSubmit
+  HidOutput <-- output report 0x02 (write callback) ---  EvtVhf*WriteReport
+```
+
+- **Descriptor reuse:** the exact inputtino PS5 descriptor + feature-report replies we already ship for
+  Linux (`dualsense.rs` `DS_*` constants) — same bytes, same VID/PID, so Windows + games recognize it
+  as a DualSense.
+- **Userspace bridge:** a `DualSenseManager`-shaped struct mirroring the Linux one (same `RichInput` →
+  report `0x01` packing, same `HidOutput` parsing from report `0x02`), talking to the driver over an
+  IOCTL/pipe instead of `/dev/uhid`.
+- **Packaging:** vendor + sign the `.sys`/`.inf`/`.cat` and install via the existing
+  `packaging/windows/sudovda` machinery (`nefconc.exe` + an `install-*.ps1`, bundled in the Inno
+  `setup.exe`). The precedent is already in the repo.
+
+## Effort & risk
+
+| Piece | Rough size | Notes / risk |
+|---|---|---|
+| KMDF + VHF virtual-HID driver | large | KMDF (kernel) is a higher bar than SudoVDA's UMDF/IddCx; bulk of the work |
+| Driver signing + distribution | medium | EV cert + Microsoft attestation for production; test-signing for dev; SudoVDA precedent but it's pre-signed/vendored, not built here |
+| Userspace `DualSenseManager` (Windows) | small–medium | Mostly a port of the Linux report packing/parsing; reuses descriptors |
+| `PadBackend::DualSense` Windows arm + negotiation | small | Un-gate the existing dispatch for Windows |
+| HidHide-style hiding of a physical pad | small (maybe unneeded) | Headless host usually has no physical pad; only matters if one is attached |
+
+**Top risks:** (1) a KMDF/VHF driver is real kernel work + signing logistics; (2) whether VHF's
+output-report callback cleanly surfaces the DualSense `0x02` effect report we need for adaptive
+triggers; (3) whether games/Steam/`Windows.Gaming.Input`/GameInput accept a VHF-sourced DualSense the
+same as a physical one (descriptor + VID/PID should suffice, but unverified on Windows).
+
+## Decision matrix
+
+| Option | Adaptive triggers / DS5 identity | Effort | When it's right |
+|---|---|---|---|
+| **A. VHF virtual DualSense** (parity) | ✅ full | large (kernel driver) | the goal — matches the Linux host |
+| **B. ViGEm DS4** (interim) | ❌ never (DS4 ceiling) | small | quick PS-pad-on-Windows w/ touchpad/motion/lightbar/rumble, no adaptive triggers |
+| **C. Hybrid** | A for DS5 clients, B/Xbox360 fallback | A + small | belt-and-suspenders once A exists |
+| **D. Defer** | — | — | if a higher-ROI item (#9 launch, #7/#18 audio) wins the slot |
+
+Xbox 360 (XInput) is already implemented and covers most Windows games regardless.
+
+## Open questions — REQUIRES the web-research pass (search was down)
+
+1. **VHF specifics:** confirm VHF is the right/current mechanism (vs. a newer HID-injection API);
+   exact API (`VhfCreate`/`VhfStart`/`VhfReadReportSubmit`/the output-report `EvtVhf…WriteReport`
+   callback); KMDF-only or UMDF-capable; minimum Windows version; the MS `vhidmini`/VHF sample.
+2. **Existing driver to vendor:** is there a maintained virtual-HID / virtual-DualSense Windows driver
+   (Nefarius/community) we can vendor like SudoVDA, instead of writing a KMDF driver from scratch?
+3. **Recognition:** does a VHF device with VID `054C`/PID `0CE6` + the DualSense descriptor get
+   recognized as a DualSense by Windows.Gaming.Input / GameInput / Steam Input / native-DS5 games —
+   including adaptive triggers via the `0x02` output report?
+4. **Signing/distribution:** attestation vs. WHQL for a KMDF driver; can we test-sign for dev and ship
+   an attestation-signed driver via the Inno installer like SudoVDA?
+5. **HidHide:** needed at all on a (usually headless) host, or only when a physical pad is present?
+
+## Recommended plan
+
+1. **Web-research pass** (when search is back) to close the five questions above — especially #2
+   (vendor vs. build) and #1 (VHF feasibility + output-report support), which gate the whole effort.
+2. If VHF (or a vendorable driver) is confirmed feasible: build **Option A** — driver + Windows
+   `DualSenseManager` + un-gate `PadBackend::DualSense`, reusing the inputtino descriptor and the
+   existing `HidOutput` plane (no protocol/client/ABI change), packaged via the SudoVDA path.
+3. Keep **Xbox 360** as-is and treat **ViGEm DS4** only as an optional fallback (Option C), never as
+   the DualSense answer.
@@ -183,6 +183,11 @@
 #define MSG_REQUEST_KEYFRAME 3
 #endif

+#if defined(PUNKTFUNK_FEATURE_QUIC)
+// Type byte of [`LossReport`].
+#define MSG_LOSS_REPORT 4
+#endif
+
 #if defined(PUNKTFUNK_FEATURE_QUIC)
 // Type byte of [`ProbeRequest`].
 #define MSG_PROBE_REQUEST 32
@@ -468,22 +473,29 @@ typedef struct {

 // A speed-test measurement, filled by [`punktfunk_connection_probe_result`]. `done` is 0 until
 // the host's end-of-burst report lands, then 1 (the numbers are final). `throughput_kbps` is the
-// measured goodput to drive a bitrate choice from; `loss_pct` is the delivery loss at that rate.
+// delivered wire throughput to drive a bitrate choice from; `loss_pct` is the link loss and
+// `host_drop_pct` the host-side send-buffer drop (raise `net.core.wmem_max`) — they're measured
+// separately so a host that can't keep up reads differently from a lossy link.
 typedef struct {
    // 1 once the host's end-of-burst report arrived (measurement final); else 0 (partial).
    uint8_t done;
-    // Probe payload bytes / packets the client received.
+    // Delivered wire bytes (header + shard) / packets the client received during the burst.
    uint64_t recv_bytes;
    uint32_t recv_packets;
-    // Probe payload bytes / packets the host reported sending.
+    // Application goodput bytes / access units the host offered.
    uint64_t host_bytes;
    uint32_t host_packets;
-    // Client-measured receive window (first→last probe AU), milliseconds.
+    // The host's measured burst duration, milliseconds (the throughput denominator).
    uint32_t elapsed_ms;
-    // Measured goodput = `recv_bytes * 8 / elapsed_ms` (kilobits/second).
+    // Delivered wire throughput = `recv_bytes * 8 / elapsed_ms` (kilobits/second).
    uint32_t throughput_kbps;
-    // Delivery loss `(host_bytes - recv_bytes) / host_bytes` as a percentage (0 if unknown).
+    // Link loss `(wire_packets_sent − recv_packets) / wire_packets_sent` as a percentage.
    float loss_pct;
+    // Host-side send-buffer drop `send_dropped / (wire_packets_sent + send_dropped)`, percent.
+    float host_drop_pct;
+    // Wire packets the host put on the link, and the ones its send buffer dropped (raw counts).
+    uint32_t wire_packets_sent;
+    uint32_t send_dropped;
 } PunktfunkProbeResult;

 #ifdef __cplusplus
@@ -1,23 +1,26 @@
 # punktfunk on Arch Linux / SteamOS

-Packaging for punktfunk on Arch and Arch-derived immutable distros (SteamOS 3, etc.). The
-`PKGBUILD` is a **split package** producing **`punktfunk-host`** (the gaming-rig host) and
-**`punktfunk-client`** (the GTK4 couch/Deck client) — mirrors the rpm subpackages
-(`packaging/rpm/punktfunk.spec`) and the deb build scripts. On a **Steam Deck you want
-`punktfunk-client`** (it's what the [Decky plugin](../../clients/decky/) launches); on a gaming
-rig, `punktfunk-host`.
+Packaging for punktfunk on Arch and Arch-derived immutable distros. The `PKGBUILD` is a **split
+package** producing **`punktfunk-host`** (the gaming-rig host) and **`punktfunk-client`** (the GTK4
+couch/Deck client) — mirrors the rpm subpackages (`packaging/rpm/punktfunk.spec`) and the deb build
+scripts. On a **Steam Deck used as a client you want `punktfunk-client`** (it's what the
+[Decky plugin](../../clients/decky/) launches); on a gaming rig, `punktfunk-host`.
+
+> **Steam Deck as a HOST:** don't use this PKGBUILD — SteamOS's read-only root makes `makepkg`/sysext
+> awkward, and a prebuilt binary breaks on OS library bumps. Use the on-device build script instead:
+> **[`scripts/steamdeck/install.sh`](../../scripts/steamdeck/)** (it builds in a Debian-trixie distrobox
+> ABI-matched to SteamOS and uses **VAAPI** on the Deck's AMD GPU). The Deck host path is the one
+> exception to "host encode is NVENC-only" below.

 A third member, **`punktfunk-web`** (the browser management console — pairing + status), is
 **opt-in**: build it by setting `PF_WITH_WEB=1`, which requires **`bun`** at build time (`bun-bin`
 from the AUR if it isn't in your repos; the console then runs on plain `nodejs`). A default
 `makepkg` builds only host+client with no JS tooling — mirroring the RPM spec's `%bcond_with web`.

-> ⚠️ **Host encode is NVENC-only today.** `crates/punktfunk-host/src/encode/linux.rs` implements
-> `hevc_nvenc`/`av1_nvenc`/`h264_nvenc` + a CUDA zero-copy path — there is **no VAAPI encoder**. So
-> `punktfunk-host` works on **Arch + NVIDIA** (incl. `bazzite-deck-nvidia`); an **AMD Deck-as-host**
-> can't encode until a `hevc_vaapi` backend is added (a code change, not packaging). The **client
-> is unaffected** — `punktfunk-client` decodes via **VAAPI on AMD/Intel** (the Deck) with a software
-> fallback, so streaming *to* a Deck works today.
+> **Host encode: NVENC on NVIDIA, VAAPI on AMD/Intel** (`PUNKTFUNK_ENCODER=auto` picks one). The host
+> now has a VAAPI encoder + zero-copy dmabuf path alongside NVENC/CUDA, so `punktfunk-host` works on
+> Arch + NVIDIA **and** AMD/Intel (incl. the Steam Deck — see the on-device path above). The client
+> decodes via VAAPI on AMD/Intel with a software fallback.

 ## Arch Linux (mutable)

@@ -0,0 +1,82 @@
+# punktfunk host on a Steam Deck
+
+Run a punktfunk **host** on a Steam Deck — stream its Game Mode (or KDE desktop) *to* other devices.
+(Streaming *to* a Deck is the client; use the Flatpak + [Decky plugin](../../clients/decky/) instead.)
+
+User-facing guide: **docs-site → "Steam Deck (Host)"** (`docs-site/content/docs/steam-deck-host.md`).
+This README is the deep reference for what the scripts do and how to operate them by hand.
+
+## Why build on-device (not a package or prebuilt binary)
+
+SteamOS 3 is an **immutable, read-only Arch** base:
+
+- No `pacman -S` for system libs; `/usr` is read-only and reset on A/B updates.
+- A **prebuilt binary is fragile** — it links the system FFmpeg/glibc, and a SteamOS update can bump
+  those sonames out from under it (the same class of breakage as the NVIDIA-driver-after-update issue).
+- The host needs **unsandboxed** `/dev/uinput` + `/dev/uhid`, PipeWire, the compositor, and VAAPI — so
+  Flatpak (the normal Deck app channel) doesn't fit. Flatpak/Decky are for the *client*.
+
+So the host is built **natively inside a Debian-trixie distrobox** (`pf2`), chosen because its
+FFmpeg/glibc ABI matches SteamOS's — the resulting binary runs **natively on SteamOS** (the container
+is only the build environment; `punktfunk-host` is launched directly, not via `distrobox enter`). A
+rebuild always matches the running OS. Encode is **VAAPI** on the Deck's AMD GPU (NVENC on NVIDIA),
+auto-selected by `PUNKTFUNK_ENCODER=auto`.
+
+The web console is the one part that stays in the container at runtime: it's a Nitro **node-server**
+build (`bun` builds it; **`node` runs it** — bun mis-resolves Nitro's externalized server deps like
+`srvx` at request time), so its service does `distrobox enter pf2 -- … node .output/server/index.mjs`.
+Both `bun` and `nodejs` are provisioned in the container.
+
+## Scripts
+
+| Script | What it does |
+|--------|--------------|
+| `install.sh` | Idempotent installer: ensure the `pf2` distrobox + toolchain → build host (+web) → write config → tune sysctl + `input` group (sudo) → install + start `punktfunk-host` / `punktfunk-web` systemd **user** services with linger. |
+| `update.sh` | Rebuild from the current source and restart the services (config + pairings persist). `--pull` does `git pull` first. |
+
+```sh
+git clone https://git.unom.io/unom/punktfunk ~/punktfunk
+bash ~/punktfunk/scripts/steamdeck/install.sh            # PIN pairing required (secure default)
+bash ~/punktfunk/scripts/steamdeck/install.sh --open     # trusted LAN: accept unpaired clients
+bash ~/punktfunk/scripts/steamdeck/install.sh --no-web   # host only, no web console
+bash ~/punktfunk/scripts/steamdeck/update.sh             # after pulling new source
+```
+
+Env overrides: `PUNKTFUNK_SRC` (source dir, default `~/punktfunk`), `PUNKTFUNK_BOX` (container name,
+default `pf2`), `PUNKTFUNK_MGMT_PORT` (47990), `PUNKTFUNK_WEB_PORT` (3000).
+
+## What gets installed
+
+- **Binary:** `~/punktfunk/target-steamos/release/punktfunk-host` (built in `pf2`, run natively).
+- **Config:** `~/.config/punktfunk/host.env` (encoder/compositor) and `web.env` (generated web login
+  password + session secret). Trust material (`cert.pem`, `mgmt-token`, `punktfunk1-paired.json`) lives
+  here too and persists across updates.
+- **Services:** `~/.config/systemd/user/punktfunk-host.service` (runs `serve --native --mgmt-bind
+  0.0.0.0:47990`, `+ --open` if chosen) and `punktfunk-web.service`. Linger is enabled so they run
+  without a login session.
+- **System tuning (sudo):** `/etc/sysctl.d/99-punktfunk-net.conf` (32 MB UDP buffers — the #1
+  high-bitrate lever), `/etc/udev/rules.d/60-punktfunk.rules`, and `$USER` in the `input` group.
+
+## Operating
+
+```sh
+systemctl --user status  punktfunk-host punktfunk-web
+journalctl --user -u punktfunk-host -f          # watch sessions / pairing PIN
+systemctl --user restart punktfunk-host         # after editing host.env
+```
+
+Pair from the web console (Devices → arm pairing) or directly from a client with the host's PIN. The
+host advertises over mDNS as `_punktfunk._udp`, so clients discover it automatically.
+
+## Gotchas
+
+- **distrobox required.** If missing: `curl -sfL https://raw.githubusercontent.com/89luca89/distrobox/main/install | sh -s -- --prefix ~/.local` (then ensure `~/.local/bin` is on PATH).
+- **First build is slow** (~10–15 min + ~1 GB toolchain/image). Incremental afterwards.
+- **No passwordless sudo** → the installer skips the sysctl/udev/input steps with a warning; high
+  bitrates will drop packets until you apply `99-punktfunk-net.conf` and join `input` yourself.
+- **Game Mode auto-suspend** drops the host off the network on idle — disable it (Settings → Power)
+  for a headless host.
+- **WiFi tx ceiling** ≈ 250 Mbps goodput (a Deck hardware/driver packet-rate limit, band-independent);
+  fine for 1080p/1440p60. A wired dock lifts it.
+- **After a major SteamOS update**, if the host won't start, run `update.sh` to rebuild against the new
+  base libraries.
@@ -0,0 +1,243 @@
+#!/usr/bin/env bash
+# punktfunk — Steam Deck HOST installer (stream FROM the Deck to other devices).
+#
+# SteamOS is an immutable, read-only Arch base, so the host can't be a system package and a
+# prebuilt binary would break on an OS library bump. Instead we build the host natively inside a
+# Debian-trixie distrobox (ABI-matched to SteamOS's FFmpeg/glibc) — the binary then runs natively
+# on SteamOS — and wire it up as proper systemd USER services. A rebuild always matches the
+# running OS. AMD encode uses VAAPI; NVIDIA uses NVENC (auto-detected).
+#
+# Run it on the Deck (Desktop Mode "Konsole", or over ssh). Idempotent — safe to re-run to update
+# config or pick up new options. To rebuild after pulling new source, use update.sh.
+#
+#   bash scripts/steamdeck/install.sh                 # secure default: PIN pairing required
+#   bash scripts/steamdeck/install.sh --open          # trusted LAN: accept unpaired clients (TOFU)
+#   bash scripts/steamdeck/install.sh --no-web        # skip the management web console
+#   PUNKTFUNK_SRC=~/src/punktfunk bash scripts/steamdeck/install.sh   # source elsewhere
+#
+set -euo pipefail
+
+log()  { printf '\033[1;36m==>\033[0m %s\n' "$*"; }
+ok()   { printf '\033[1;32m  ok\033[0m %s\n' "$*"; }
+warn() { printf '\033[1;33m  !!\033[0m %s\n' "$*" >&2; }
+die()  { printf '\033[1;31merror:\033[0m %s\n' "$*" >&2; exit 1; }
+have() { command -v "$1" >/dev/null 2>&1; }
+
+# --- options ---------------------------------------------------------------
+SRC="${PUNKTFUNK_SRC:-$HOME/punktfunk}"
+BOX="${PUNKTFUNK_BOX:-pf2}"
+BOX_IMAGE="${PUNKTFUNK_BOX_IMAGE:-docker.io/library/debian:trixie}"
+MGMT_PORT="${PUNKTFUNK_MGMT_PORT:-47990}"
+WEB_PORT="${PUNKTFUNK_WEB_PORT:-3000}"
+OPEN=0
+WITH_WEB=1
+for arg in "$@"; do
+    case "$arg" in
+        --open) OPEN=1 ;;
+        --no-web) WITH_WEB=0 ;;
+        --src=*) SRC="${arg#--src=}" ;;
+        -h|--help) sed -n '2,20p' "$0"; exit 0 ;;
+        *) die "unknown option: $arg (try --help)" ;;
+    esac
+done
+TARGET_DIR="$SRC/target-steamos"
+BIN="$TARGET_DIR/release/punktfunk-host"
+CONFIG="$HOME/.config/punktfunk"
+UNITS="$HOME/.config/systemd/user"
+XRD="${XDG_RUNTIME_DIR:-/run/user/$(id -u)}"
+
+# --- 0. preflight ----------------------------------------------------------
+log "Preflight"
+[ -f /etc/os-release ] && . /etc/os-release || true
+case "${ID:-}${ID_LIKE:-}" in
+    *steamos*|*arch*) ok "SteamOS / Arch base detected (${PRETTY_NAME:-unknown})" ;;
+    *) warn "This installer targets SteamOS; '${PRETTY_NAME:-unknown}' may differ — on a normal distro use the apt/rpm packages instead." ;;
+esac
+[ -d "$SRC/crates/punktfunk-host" ] || die "no punktfunk source at $SRC. Clone or rsync it there first, or pass --src=DIR (see scripts/steamdeck/README.md)."
+ok "source: $SRC"
+if ! have distrobox; then
+    die "distrobox not found. Install it once (no root needed):
+       curl -sfL https://raw.githubusercontent.com/89luca89/distrobox/main/install | sh -s -- --prefix ~/.local
+     then re-run this script (ensure ~/.local/bin is on PATH)."
+fi
+DISTROBOX="$(command -v distrobox)"   # baked into the web unit (may be /usr/bin or ~/.local/bin)
+ok "distrobox: $DISTROBOX"
+
+# --- 1. build container + toolchain ---------------------------------------
+log "Build container '$BOX' ($BOX_IMAGE)"
+if distrobox list 2>/dev/null | awk -F'|' '{gsub(/ /,"",$2); print $2}' | grep -qx "$BOX"; then
+    ok "container '$BOX' exists"
+else
+    log "creating '$BOX' (first time — pulls the image)…"
+    distrobox create --yes --name "$BOX" --image "$BOX_IMAGE" --home "$HOME"
+    ok "created '$BOX'"
+fi
+
+log "Provisioning build dependencies in '$BOX' (idempotent; apt + rustup + bun)"
+# One non-interactive provisioning pass. APT deps mirror the Linux host build (FFmpeg/PipeWire/
+# DRM/EGL/VAAPI dev libs); rustup + bun are per-user under the shared $HOME.
+distrobox enter "$BOX" -- bash -lc '
+set -e
+export DEBIAN_FRONTEND=noninteractive
+sudo apt-get update -qq
+sudo apt-get install -y -qq --no-install-recommends \
+    build-essential pkg-config clang curl git ca-certificates \
+    libavcodec-dev libavformat-dev libavutil-dev libavfilter-dev libswscale-dev libavdevice-dev \
+    libpipewire-0.3-dev libspa-0.2-dev \
+    libgbm-dev libegl-dev libgl-dev libdrm-dev libva-dev \
+    libxkbcommon-dev libudev-dev libssl-dev libopus-dev libsdl2-dev \
+    nodejs >/dev/null
+command -v rustc >/dev/null 2>&1 || command -v ~/.cargo/bin/rustc >/dev/null 2>&1 || \
+    curl --proto =https --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --no-modify-path >/dev/null
+# bun builds the web console; node runs it (the node-server preset; bun mis-resolves the Nitro
+# externalized server deps like srvx at request time).
+command -v bun >/dev/null 2>&1 || command -v ~/.bun/bin/bun >/dev/null 2>&1 || \
+    curl -fsSL https://bun.sh/install | bash >/dev/null
+'
+ok "build deps ready"
+
+# --- 2. build host (+ web) -------------------------------------------------
+log "Building punktfunk-host (release) — first build is slow (~10-15 min)"
+distrobox enter "$BOX" -- bash -lc "
+set -e
+export PATH=\$HOME/.cargo/bin:\$PATH CARGO_TARGET_DIR='$TARGET_DIR'
+cd '$SRC' && cargo build -r -p punktfunk-host
+"
+[ -x "$BIN" ] || die "build did not produce $BIN"
+ok "host binary: $BIN"
+
+if [ "$WITH_WEB" = 1 ]; then
+    log "Building the management web console (bun)"
+    distrobox enter "$BOX" -- bash -lc "
+set -e
+export PATH=\$HOME/.bun/bin:\$PATH
+cd '$SRC/web' && bun install --frozen-lockfile && bun run build
+"
+    [ -f "$SRC/web/.output/server/index.mjs" ] || die "web build did not produce web/.output/server/index.mjs"
+    ok "web console built"
+fi
+
+# --- 3. config -------------------------------------------------------------
+log "Configuration ($CONFIG)"
+mkdir -p "$CONFIG"
+if [ ! -f "$CONFIG/host.env" ]; then
+    cat > "$CONFIG/host.env" <<'EOF'
+# punktfunk Steam Deck host config (sourced by the punktfunk-host user service).
+# Auto encoder: VAAPI on the Deck's AMD GPU, NVENC on NVIDIA.
+PUNKTFUNK_ENCODER=auto
+# The host auto-detects the live session (Game Mode gamescope / Desktop KDE) per connect.
+# Override the compositor only if detection misbehaves: PUNKTFUNK_COMPOSITOR=gamescope
+EOF
+    ok "wrote host.env"
+else
+    ok "host.env exists (left as-is)"
+fi
+
+if [ "$WITH_WEB" = 1 ] && [ ! -f "$CONFIG/web.env" ]; then
+    # Random login password + session secret for the web console, generated once.
+    # `|| true` swallows the SIGPIPE `tr` takes when `head` closes the pipe (pipefail would abort).
+    WEB_PW="$(LC_ALL=C tr -dc 'a-z0-9' </dev/urandom 2>/dev/null | head -c 12 || true)"
+    WEB_SECRET="$(LC_ALL=C tr -dc 'A-Za-z0-9' </dev/urandom 2>/dev/null | head -c 32 || true)"
+    cat > "$CONFIG/web.env" <<EOF
+PUNKTFUNK_UI_PASSWORD=$WEB_PW
+PUNKTFUNK_UI_SECRET=$WEB_SECRET
+EOF
+    chmod 600 "$CONFIG/web.env"
+    ok "wrote web.env (generated login password)"
+else
+    [ "$WITH_WEB" = 1 ] && ok "web.env exists (login password unchanged)"
+fi
+
+# --- 4. system tuning (needs sudo; skipped gracefully if unavailable) ------
+log "System tuning (UDP buffers + input group) — needs sudo"
+if sudo -n true 2>/dev/null; then
+    printf 'net.core.wmem_max=33554432\nnet.core.rmem_max=33554432\n' \
+        | sudo tee /etc/sysctl.d/99-punktfunk-net.conf >/dev/null
+    sudo sysctl -q -p /etc/sysctl.d/99-punktfunk-net.conf >/dev/null
+    ok "UDP socket buffers raised to 32 MB (persisted)"
+    if [ -f "$SRC/scripts/60-punktfunk.rules" ]; then
+        sudo install -m644 "$SRC/scripts/60-punktfunk.rules" /etc/udev/rules.d/60-punktfunk.rules
+        sudo udevadm control --reload-rules && sudo udevadm trigger || true
+        ok "installed udev rule (virtual gamepads)"
+    fi
+    id -nG "$USER" | grep -qw input || { sudo usermod -aG input "$USER"; warn "added $USER to 'input' group — log out/in (or reboot) for gamepad support"; }
+else
+    warn "passwordless sudo unavailable — skipping UDP-buffer + udev tuning."
+    warn "Without it, high-bitrate streaming drops packets. Apply manually later:"
+    warn "  echo -e 'net.core.wmem_max=33554432\\nnet.core.rmem_max=33554432' | sudo tee /etc/sysctl.d/99-punktfunk-net.conf && sudo sysctl --system"
+fi
+
+# --- 5. systemd user services ---------------------------------------------
+log "Installing systemd user services"
+mkdir -p "$UNITS"
+SERVE_ARGS="serve --native --mgmt-bind 0.0.0.0:$MGMT_PORT"
+[ "$OPEN" = 1 ] && SERVE_ARGS="$SERVE_ARGS --open"
+cat > "$UNITS/punktfunk-host.service" <<EOF
+# Generated by scripts/steamdeck/install.sh — punktfunk Steam Deck host (native binary).
+[Unit]
+Description=punktfunk host (GameStream + punktfunk/1)
+After=pipewire.service
+
+[Service]
+EnvironmentFile=%h/.config/punktfunk/host.env
+Environment=XDG_RUNTIME_DIR=$XRD
+Environment=DBUS_SESSION_BUS_ADDRESS=unix:path=$XRD/bus
+ExecStart=$BIN $SERVE_ARGS
+Restart=on-failure
+RestartSec=2
+
+[Install]
+WantedBy=default.target
+EOF
+ok "punktfunk-host.service ($SERVE_ARGS)"
+
+if [ "$WITH_WEB" = 1 ]; then
+    # The console is a Nitro/Node server run by bun; it lives in the build container (bun + node
+    # libs) and proxies to the host's loopback HTTPS mgmt API.
+    cat > "$UNITS/punktfunk-web.service" <<EOF
+# Generated by scripts/steamdeck/install.sh — punktfunk web console (bun in the '$BOX' distrobox).
+[Unit]
+Description=punktfunk management web console
+After=punktfunk-host.service
+
+[Service]
+ExecStart=$DISTROBOX enter $BOX -- bash -lc 'cd $SRC/web; set -a; . $CONFIG/mgmt-token; . $CONFIG/web.env; set +a; export PUNKTFUNK_MGMT_URL=https://127.0.0.1:$MGMT_PORT NODE_TLS_REJECT_UNAUTHORIZED=0 PORT=$WEB_PORT HOST=0.0.0.0 NITRO_PORT=$WEB_PORT NITRO_HOST=0.0.0.0; exec node .output/server/index.mjs'
+Restart=on-failure
+RestartSec=3
+
+[Install]
+WantedBy=default.target
+EOF
+    ok "punktfunk-web.service (port $WEB_PORT)"
+fi
+
+systemctl --user daemon-reload
+loginctl show-user "$USER" 2>/dev/null | grep -q 'Linger=yes' || { sudo loginctl enable-linger "$USER" 2>/dev/null && ok "enabled linger (services run without login)" || warn "could not enable linger — services stop when you log out (sudo loginctl enable-linger $USER)"; }
+# enable + restart (not `enable --now`): restart picks up unit-file changes on a re-run, where
+# `--now` would no-op against an already-running service.
+systemctl --user enable punktfunk-host.service 2>/dev/null
+systemctl --user restart punktfunk-host.service
+ok "punktfunk-host started"
+if [ "$WITH_WEB" = 1 ]; then
+    # The host writes the mgmt token on first start; give it a moment so the web unit finds it.
+    for _ in $(seq 1 10); do [ -f "$CONFIG/mgmt-token" ] && break; sleep 0.5; done
+    systemctl --user enable punktfunk-web.service 2>/dev/null
+    systemctl --user restart punktfunk-web.service
+    ok "punktfunk-web started"
+fi
+
+# --- 6. summary ------------------------------------------------------------
+IP="$(ip -4 route get 1.1.1.1 2>/dev/null | sed -n 's/.* src \([0-9.]*\).*/\1/p' | head -1 || true)"
+echo
+log "Done — punktfunk host is running on this Steam Deck"
+echo "  • Host status:   systemctl --user status punktfunk-host"
+if [ "$WITH_WEB" = 1 ]; then
+    echo "  • Web console:   http://${IP:-steamdeck.local}:$WEB_PORT   (login: see $CONFIG/web.env)"
+    echo "  • Pair a device: open the web console → Devices → arm pairing → enter the PIN on the client"
+fi
+if [ "$OPEN" = 1 ]; then
+    echo "  • Mode: --open (unpaired clients accepted — trusted LAN only)"
+else
+    echo "  • Pairing required (secure default). From a client, pick this host and enter the PIN the host shows."
+fi
+echo "  • Update later:  bash $SRC/scripts/steamdeck/update.sh"
@@ -0,0 +1,37 @@
+#!/usr/bin/env bash
+# punktfunk — Steam Deck HOST update: rebuild from the current source + restart the services.
+# Run on the Deck after pulling/rsyncing new source. Pairings, config, and the web login persist.
+#
+#   bash scripts/steamdeck/update.sh           # rebuild host (+web if installed) and restart
+#   bash scripts/steamdeck/update.sh --pull    # `git pull` first (if the source is a git checkout)
+#
+set -euo pipefail
+log()  { printf '\033[1;36m==>\033[0m %s\n' "$*"; }
+ok()   { printf '\033[1;32m  ok\033[0m %s\n' "$*"; }
+die()  { printf '\033[1;31merror:\033[0m %s\n' "$*" >&2; exit 1; }
+
+SRC="${PUNKTFUNK_SRC:-$HOME/punktfunk}"
+BOX="${PUNKTFUNK_BOX:-pf2}"
+TARGET_DIR="$SRC/target-steamos"
+[ -d "$SRC/crates/punktfunk-host" ] || die "no punktfunk source at $SRC (set PUNKTFUNK_SRC)"
+WEB=0; [ -f "$HOME/.config/systemd/user/punktfunk-web.service" ] && WEB=1
+
+if [ "${1:-}" = "--pull" ]; then
+    if [ -d "$SRC/.git" ]; then log "git pull"; git -C "$SRC" pull --ff-only; ok "pulled"; else die "$SRC is not a git checkout — rsync new source then run without --pull"; fi
+fi
+
+log "Rebuilding host (release)"
+distrobox enter "$BOX" -- bash -lc "set -e; export PATH=\$HOME/.cargo/bin:\$PATH CARGO_TARGET_DIR='$TARGET_DIR'; cd '$SRC' && cargo build -r -p punktfunk-host"
+ok "host rebuilt"
+if [ "$WEB" = 1 ]; then
+    log "Rebuilding web console"
+    distrobox enter "$BOX" -- bash -lc "set -e; export PATH=\$HOME/.bun/bin:\$PATH; cd '$SRC/web' && bun install --frozen-lockfile && bun run build"
+    ok "web rebuilt"
+fi
+
+log "Restarting services"
+systemctl --user restart punktfunk-host.service
+ok "punktfunk-host restarted"
+if [ "$WEB" = 1 ]; then systemctl --user restart punktfunk-web.service; ok "punktfunk-web restarted"; fi
+echo
+log "Updated. Status: systemctl --user status punktfunk-host"
Author	SHA1	Message	Date
enricobuehler	22a9ce4229	Merge remote-tracking branch 'origin/main' apple / swift (push) Successful in 56s Details windows-host / package (push) Successful in 3m7s Details windows-msix / package (arm64, C:\Users\Public\ffmpeg-arm64, aarch64-pc-windows-msvc, C:\t-a64) (push) Successful in 1m18s Details android / android (push) Successful in 4m27s Details ci / rust (push) Successful in 4m43s Details ci / web (push) Successful in 31s Details ci / docs-site (push) Successful in 34s Details windows-msix / package (x64, C:\Users\Public\ffmpeg, x86_64-pc-windows-msvc, C:\t) (push) Successful in 1m18s Details windows / build (aarch64-pc-windows-msvc) (push) Successful in 1m1s Details deb / build-publish (push) Successful in 2m8s Details windows / build (x86_64-pc-windows-msvc) (push) Successful in 1m5s Details decky / build-publish (push) Successful in 24s Details docker / build-push (--build-arg FEDORA_VERSION=44, ci, ci/fedora-rpm.Dockerfile, punktfunk-fedora44-rpm) (push) Successful in 5s Details docker / build-push (., web/Dockerfile, punktfunk-web) (push) Successful in 4s Details docker / build-push (ci, ci/fedora-rpm.Dockerfile, punktfunk-fedora-rpm) (push) Successful in 4s Details ci / bench (push) Successful in 4m43s Details docker / build-push (docs-site, docs-site/Dockerfile, punktfunk-docs) (push) Successful in 26s Details docker / build-push (ci, ci/rust-ci.Dockerfile, punktfunk-rust-ci) (push) Successful in 2m11s Details flatpak / build-publish (push) Successful in 4m13s Details rpm / build-publish (bazzite, punktfunk-fedora-rpm) (push) Successful in 8m6s Details docker / deploy-docs (push) Successful in 18s Details rpm / build-publish (fedora-44, punktfunk-fedora44-rpm) (push) Successful in 7m41s Details # Conflicts: # docs-site/content/docs/meta.json	2026-06-21 00:07:36 +00:00
enricobuehler	450bcf1e7b	feat(host): Apollo-backlog hardening — cert gate, NVENC RFI, media QoS, async injector A pass over the apollo-comparison backlog (re-verified against current code). Lands four items end-to-end plus a Windows-DualSense scoping doc. - #5/#92/#26 — GameStream paired-cert allow-list. tls.rs surfaces the verified peer cert to handlers (serve_https + PeerCertFingerprint, now shared with the mgmt API instead of duplicated); nvhttp gates /launch /resume /applist /cancel on AppState.paired and reports a real PairStatus; save_paired writes atomically (temp+rename). Closes the "mTLS accepts any client cert" hole. + regression test. - #6/#51/#19/#22 — NVENC caps query -> reference-frame invalidation. nvenc.rs query_caps probes nvEncGetEncodeCaps (max dims / 10-bit / custom-VBV / RFI), rejecting over-range modes and degrading 10-bit->8-bit instead of an opaque InvalidParam. New Encoder::invalidate_ref_frames (default false -> caller keyframes); the Windows NVENC path implements real RFI (multi-ref DPB + nvEncInvalidateRefFrames, dedup + IDR-on-overflow). control.rs decodes the 0x0301 lost-frame range (Apollo's IDX_INVALIDATE_REF_FRAMES) -> AppState.rfi_range -> encode loop, falling back to a keyframe. NOTE: the Windows NVENC impl is RTX-box/CI-pending (can't compile on Linux); adversarially reviewed vs the SDK. - #43/#72 — media socket QoS + buffer growth. New punktfunk_core::transport::qos: grow_socket_buffers (factored out the native plane's 32MB SO_SNDBUF growth so the GameStream sockets reuse it) + set_media_qos (opt-in PUNKTFUNK_DSCP=1: DSCP CS5 video / CS6 audio + Linux SO_PRIORITY, Apollo's scheme). Wired into UdpTransport and the GameStream video/audio sockets. Windows IP_TOS needs qWAVE (follow-up). - #8/#45 — GameStream input injection off the ENet service thread. on_receive no longer injects inline (a slow inject head-blocked ENet keepalive/retransmit); it forwards to a dedicated injector thread. The hardened InjectorService moved from punktfunk1 into crate::inject (shared by both planes) + a coalesce step that sums adjacent relative-mouse/scroll deltas while preserving button/key/abs ordering. Docs: re-verified apollo-comparison.md status (22 items already done/obsolete since the snapshot) + windows-dualsense-scoping.md (ViGEm can't emulate a DualSense; real DS5 on Windows needs a VHF virtual-HID driver — web-research pass pending). fmt + clippy -D warnings clean; full workspace test suite green; no C-ABI/OpenAPI drift. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>	2026-06-21 00:06:30 +00:00
enricobuehler	a2a6b858f7	fix(steamdeck): run the web console with node, not bun (Nitro node-server preset) The management console is a Nitro `node-server` build (per web/vite.config.ts) — it must be run with `node`, not `bun`. Run under bun it 500s on every page render with "Cannot find package 'srvx'": bun mis-resolves Nitro's externalized server deps from the nested SSR chunk at request time. (This was pre-existing — the old manual pfweb.sh ran it with bun too.) - Provision `nodejs` in the pf2 distrobox; run the web service with `node .output/server/index.mjs`. - Use `enable` + `restart` (not `enable --now`) so re-running the installer actually applies unit-file changes instead of no-opping against the running service. Verified on the Deck: web `/login` now returns 200 (was 500), "Listening on http://0.0.0.0:3000", no srvx error. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>	2026-06-20 23:32:23 +00:00
enricobuehler	f85d51b9f9	feat(steamdeck): one-command host install + docs (build-on-device) SteamOS is immutable read-only Arch, and the Deck is AMD (VAAPI) — so none of the checked-in packaging (arch/sysext is NVENC-first + client-oriented, deb/rpm are soname-mismatched) actually installs a working host on a Steam Deck. The proven path (distrobox-built native binary + systemd-run units) was 100% manual. Make it one command. - scripts/steamdeck/install.sh — idempotent installer: ensure the pf2 Debian-trixie distrobox + toolchain → build host (+web console) → write config (generated web login password) → raise UDP buffers to 32 MB + udev + input group (sudo, skipped gracefully if unavailable) → install + start punktfunk-host / punktfunk-web systemd USER services with linger. Flags: --open (accept unpaired clients), --no-web, --src=DIR. Builds on-device so a rebuild always matches the running SteamOS (no prebuilt-binary fragility across OS updates); VAAPI on the Deck's AMD GPU. - scripts/steamdeck/update.sh — rebuild from current source + restart (config/pairings persist). - scripts/steamdeck/README.md — deep reference (why on-device, what's installed, gotchas). - docs-site: new "Steam Deck (Host)" guide + sidebar entry; install.md splits Arch from the Steam Deck host path; packaging/arch/README points Deck-host users here and corrects the stale "NVENC-only" note (VAAPI host encode landed). Live-validated on the Deck: installer runs clean, both services come up, host listens (QUIC :9777 + mgmt :47990), web serves (302→login); on a client connect it takes over the Game-Mode gamescope session at the client's mode, captures via PipeWire, and VAAPI-encodes (hevc_vaapi) — full pipeline confirmed in the host journal. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>	2026-06-20 22:20:00 +00:00
enricobuehler	516efcc3a3	feat(core/fec): adaptive FEC — size recovery to measured loss, not a flat 20% On a clean link the flat 20% FEC is pure waste: extra wire bytes AND extra packets. On a packet-rate-bound uplink (the Steam Deck's WiFi tx caps ~22k pps regardless of bitrate) those extra packets directly cost goodput — measured at 200 Mbps goodput, 20% FEC drove ~10% loss vs ~2.6% at 0% (it saturated the link). Adaptive FEC closes the loop: - Client measures the loss FEC is absorbing each ~750 ms window from session stats (recovered shards / received, + a bump when a frame went unrecoverable) and sends a periodic `LossReport { loss_ppm }` on the control stream (new message; `window_loss_ppm` helper, shared + unit-tested). Connector (Apple/Linux/Windows) and probe both report; suppressed during a speed test so its filler can't skew it. - Host maps loss → recovery % (`adapt_fec`: ≈ loss×1.4 + 1pt, clamped 1..50) and applies it live via `Session::set_fec_percent` (the wire is self-describing — each packet carries its block's data/recovery counts, so the receiver needs no notice). A clean link decays to ~1%; loss ramps it up and converges. - `PUNKTFUNK_FEC_PCT`, when set, now PINS FEC static (disables adaptation) so speed-test / measurement runs keep a fixed, known overhead. Unset ⇒ adaptive, starting at 10%. An older host ignores LossReport (unknown control message) and keeps static FEC; an older client simply never reports and the host holds its start value. Builds + clippy + fmt + tests green (adapt_fec / window_loss_ppm / loss_report unit tests). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>	2026-06-20 21:31:07 +00:00
enricobuehler	f37a304fba	fix(core/speed-test): packet-level throughput + paced burst (kill the 0/100% cliff) The punktfunk/1 speed test was unusable across every client/host: at the start of a burst a little data got through, then everything read as dropped (~10 MB total). Two compounding bugs: 1. Receive side measured throughput from fully-reassembled FLAG_PROBE access units only. The instant loss crossed the 20% FEC budget no AU completed, so the figure cliffed to 0 / 100% loss even though most bytes still arrived — a binary cliff, not a graded measurement. 2. Send side blasted each filler AU (up to 256 KB ≈ 200 packets) into the socket buffer in one unpaced batch, unlike the real video path which paces. On a small buffer (e.g. the Steam Deck's 416 KB) a single AU overflowed it, so the test measured self-inflicted buffer overflow instead of the link. Fixes: - Host `run_probe_burst` keeps each AU a small (~16 KB) burst and paces by the byte budget, mirroring `paced_submit`; reports the WIRE packets the kernel accepted and the ones the send buffer dropped (stat deltas), separating host-side drops from link loss. - `ProbeResult` gains `wire_packets_sent` + `send_dropped` (back-compat decode: a 21-byte pre-wire-stats result still decodes, new fields 0). - Clients (probe + connector) count delivered traffic at the packet level via `session.stats()` deltas over the burst window, so throughput/loss degrade gracefully. Connector freezes the delivered figure when the host report lands so resumed video can't inflate it. New `ProbeOutcome`/`PunktfunkProbeResult` fields: `host_drop_pct`, `wire_packets_sent`, `send_dropped`. Validated on loopback (graded 142→1391 Mbps, host_drop/link_loss split correctly, no cliff) and live against the Deck: clean to ~200 Mbps goodput / 273 Mbps wire at 0% link loss, host send buffer the wall above that (the lever-#1 target). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>	2026-06-20 17:46:17 +00:00