feat(core/fec): adaptive FEC — size recovery to measured loss, not a flat 20%

On a clean link the flat 20% FEC is pure waste: extra wire bytes AND extra
packets. On a packet-rate-bound uplink (the Steam Deck's WiFi tx caps ~22k pps
regardless of bitrate) those extra packets directly cost goodput — measured at
200 Mbps goodput, 20% FEC drove ~10% loss vs ~2.6% at 0% (it saturated the link).

Adaptive FEC closes the loop:
- Client measures the loss FEC is absorbing each ~750 ms window from session stats
  (recovered shards / received, + a bump when a frame went unrecoverable) and sends
  a periodic `LossReport { loss_ppm }` on the control stream (new message;
  `window_loss_ppm` helper, shared + unit-tested). Connector (Apple/Linux/Windows)
  and probe both report; suppressed during a speed test so its filler can't skew it.
- Host maps loss → recovery % (`adapt_fec`: ≈ loss×1.4 + 1pt, clamped 1..50) and
  applies it live via `Session::set_fec_percent` (the wire is self-describing — each
  packet carries its block's data/recovery counts, so the receiver needs no notice).
  A clean link decays to ~1%; loss ramps it up and converges.
- `PUNKTFUNK_FEC_PCT`, when set, now PINS FEC static (disables adaptation) so
  speed-test / measurement runs keep a fixed, known overhead. Unset ⇒ adaptive,
  starting at 10%.

An older host ignores LossReport (unknown control message) and keeps static FEC;
an older client simply never reports and the host holds its start value. Builds +
clippy + fmt + tests green (adapt_fec / window_loss_ppm / loss_report unit tests).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-20 21:31:07 +00:00
parent f37a304fba
commit 516efcc3a3
7 changed files with 272 additions and 15 deletions
+29 -4
View File
@@ -16,15 +16,15 @@ use crate::error::{PunktfunkError, Result};
use crate::input::InputEvent;
use crate::packet::FLAG_PROBE;
use crate::quic::{
endpoint, io, Hello, HidOutput, ProbeRequest, ProbeResult, Reconfigure, Reconfigured,
RequestKeyframe, RichInput, Start, Welcome,
endpoint, io, window_loss_ppm, Hello, HidOutput, LossReport, ProbeRequest, ProbeResult,
Reconfigure, Reconfigured, RequestKeyframe, RichInput, Start, Welcome,
};
use crate::session::{Frame, Session};
use crate::transport::UdpTransport;
use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
use std::sync::mpsc::{Receiver, RecvTimeoutError, SyncSender};
use std::sync::{Arc, Mutex};
use std::time::Duration;
use std::time::{Duration, Instant};
/// A control-stream request the embedder makes on the open handshake stream: a mode switch or a
/// speed test. One outbound channel carries both so the worker's `select!` has a single writer
@@ -33,6 +33,7 @@ enum CtrlRequest {
Mode(Mode),
Probe(ProbeRequest),
Keyframe,
Loss(LossReport),
}
/// What the worker reports to [`NativeClient::connect`] once the handshake lands: the negotiated
@@ -245,6 +246,7 @@ impl NativeClient {
let mode_slot_w = mode_slot.clone();
let probe_w = probe.clone();
let frames_dropped_w = frames_dropped.clone();
let ctrl_tx_pump = ctrl_tx.clone(); // the data-plane pump sends adaptive-FEC LossReports
let worker = std::thread::Builder::new()
.name("punktfunk-client".into())
.spawn(move || {
@@ -282,6 +284,7 @@ impl NativeClient {
mic_rx,
rich_input_rx,
ctrl_rx,
ctrl_tx: ctrl_tx_pump,
ready_tx,
shutdown: shutdown_w,
mode_slot: mode_slot_w,
@@ -629,6 +632,7 @@ struct WorkerArgs {
mic_rx: tokio::sync::mpsc::UnboundedReceiver<(u32, u64, Vec<u8>)>,
rich_input_rx: tokio::sync::mpsc::UnboundedReceiver<RichInput>,
ctrl_rx: tokio::sync::mpsc::UnboundedReceiver<CtrlRequest>,
ctrl_tx: tokio::sync::mpsc::UnboundedSender<CtrlRequest>,
ready_tx: std::sync::mpsc::Sender<Result<Negotiated>>,
shutdown: Arc<AtomicBool>,
mode_slot: Arc<std::sync::Mutex<Mode>>,
@@ -658,6 +662,7 @@ async fn worker_main(args: WorkerArgs) {
mut mic_rx,
mut rich_input_rx,
mut ctrl_rx,
ctrl_tx,
ready_tx,
shutdown,
mode_slot,
@@ -851,6 +856,7 @@ async fn worker_main(args: WorkerArgs) {
CtrlRequest::Mode(m) => Reconfigure { mode: m }.encode(),
CtrlRequest::Probe(p) => p.encode(),
CtrlRequest::Keyframe => RequestKeyframe.encode(),
CtrlRequest::Loss(r) => r.encode(),
};
if io::write_msg(&mut ctrl_send, &bytes).await.is_err() {
break;
@@ -944,6 +950,12 @@ async fn worker_main(args: WorkerArgs) {
let pump_probe = probe.clone();
let _ = tokio::task::spawn_blocking(move || {
pin_thread_user_interactive(); // feeds frame_tx → the client's user-interactive video pump
// Adaptive-FEC loss reporting: every ADAPT_REPORT_INTERVAL, report the loss observed over the
// window (shards FEC recovered, plus a bump if any frame went unrecoverable) so the host can
// size FEC to the link. Suppressed during a speed test (its FLAG_PROBE filler would skew it).
const ADAPT_REPORT_INTERVAL: Duration = Duration::from_millis(750);
let mut last_report = Instant::now();
let (mut last_recovered, mut last_received, mut last_dropped) = (0u64, 0u64, 0u64);
while !pump_shutdown.load(Ordering::SeqCst) {
// Mirror the reassembler's unrecoverable-drop count for the client's keyframe-recovery
// loop, and (during a speed test) the packet-level receive counters for the throughput
@@ -951,7 +963,7 @@ async fn worker_main(args: WorkerArgs) {
// through a total-loss drought where no AU completes. Cheap: a few relaxed atomic loads.
let st = session.stats();
frames_dropped.store(st.frames_dropped, Ordering::Relaxed);
{
let probe_active = {
let mut p = pump_probe.lock().unwrap();
if p.active && !p.done {
p.rx_packets_now = st.packets_received;
@@ -959,6 +971,19 @@ async fn worker_main(args: WorkerArgs) {
p.base_packets.get_or_insert(st.packets_received);
p.base_bytes.get_or_insert(st.bytes_received);
}
p.active && !p.done
};
if !probe_active && last_report.elapsed() >= ADAPT_REPORT_INTERVAL {
let loss_ppm = window_loss_ppm(
st.fec_recovered_shards.wrapping_sub(last_recovered),
st.packets_received.wrapping_sub(last_received),
st.frames_dropped.wrapping_sub(last_dropped),
);
let _ = ctrl_tx.send(CtrlRequest::Loss(LossReport { loss_ppm }));
last_report = Instant::now();
last_recovered = st.fec_recovered_shards;
last_received = st.packets_received;
last_dropped = st.frames_dropped;
}
match session.poll_frame() {
Ok(frame) => {
+12
View File
@@ -96,6 +96,18 @@ impl Packetizer {
}
}
/// Live-adjust the FEC recovery percentage (adaptive FEC). Takes effect on the next
/// [`packetize`](Self::packetize); the wire is self-describing (each packet carries its block's
/// data/recovery counts), so the receiver needs no notification. Clamped to ≤ 90.
pub fn set_fec_percent(&mut self, pct: u8) {
self.fec.fec_percent = pct.min(90);
}
/// The current FEC recovery percentage.
pub fn fec_percent(&self) -> u8 {
self.fec.fec_percent
}
/// Packetize one access unit into wire packets (header + shard payload each).
pub fn packetize(
&mut self,
+80
View File
@@ -167,6 +167,18 @@ pub struct Reconfigured {
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct RequestKeyframe;
/// `client → host`, periodic: the client's observed data-plane loss, so the host can size FEC to
/// the link instead of a flat percentage (adaptive FEC). `loss_ppm` is parts-per-million of shards
/// that arrived missing-but-recovered (plus a bump when frames went unrecoverable) over the report
/// window — i.e. the loss FEC is currently absorbing. The host maps it to a recovery percentage,
/// clamped to a sane band, and applies it live; a clean link decays toward the floor (fewer packets,
/// which directly helps a packet-rate-bound uplink like the Steam Deck's WiFi tx). Fire-and-forget.
/// A host that predates this ignores it (unknown control message) and keeps its static FEC.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct LossReport {
pub loss_ppm: u32,
}
/// `client → host`, any time after [`Start`]: run a bandwidth speed test. The host bursts
/// filler access units (flagged [`crate::packet::FLAG_PROBE`]) over the data plane at
/// `target_kbps` of application goodput for `duration_ms`, *pausing video for the duration*, then
@@ -251,6 +263,8 @@ pub const MSG_RECONFIGURE: u8 = 0x01;
pub const MSG_RECONFIGURED: u8 = 0x02;
/// Type byte of [`RequestKeyframe`].
pub const MSG_REQUEST_KEYFRAME: u8 = 0x03;
/// Type byte of [`LossReport`].
pub const MSG_LOSS_REPORT: u8 = 0x04;
/// Type byte of [`ProbeRequest`].
pub const MSG_PROBE_REQUEST: u8 = 0x20;
/// Type byte of [`ProbeResult`].
@@ -821,6 +835,43 @@ impl RequestKeyframe {
}
}
impl LossReport {
pub fn encode(&self) -> Vec<u8> {
// magic[0..4] type[4] loss_ppm[5..9]
let mut b = Vec::with_capacity(9);
b.extend_from_slice(CTL_MAGIC);
b.push(MSG_LOSS_REPORT);
b.extend_from_slice(&self.loss_ppm.to_le_bytes());
b
}
pub fn decode(b: &[u8]) -> Result<LossReport> {
if b.len() != 9 || &b[0..4] != CTL_MAGIC || b[4] != MSG_LOSS_REPORT {
return Err(PunktfunkError::InvalidArg("bad LossReport"));
}
Ok(LossReport {
loss_ppm: u32::from_le_bytes(b[5..9].try_into().unwrap()),
})
}
}
/// Compute a [`LossReport`] `loss_ppm` from one window's session-stat deltas: shards FEC recovered
/// (the loss it absorbed), shards received, and frames that went unrecoverable. Loss ≈ recovered /
/// (received + recovered) — the fraction of shards that arrived missing. A frame drop means loss
/// exceeded the current FEC budget (so `recovered` plateaus), so add a fixed bump to push the host's
/// FEC up past the cap on the next adjustment. Returns parts-per-million, capped at 1e6.
pub fn window_loss_ppm(recovered: u64, received: u64, frames_dropped: u64) -> u32 {
let denom = received.saturating_add(recovered);
let mut ppm = recovered
.saturating_mul(1_000_000)
.checked_div(denom)
.unwrap_or(0) as u32;
if frames_dropped > 0 {
ppm = ppm.saturating_add(50_000); // +5%: unrecoverable loss → raise FEC past the current cap
}
ppm.min(1_000_000)
}
impl ProbeRequest {
pub fn encode(&self) -> Vec<u8> {
// magic[0..4] type[4] target_kbps[5..9] duration_ms[9..13]
@@ -1877,6 +1928,35 @@ mod tests {
assert!(RequestKeyframe::decode(&[bytes.as_slice(), &[0]].concat()).is_err());
}
#[test]
fn loss_report_roundtrip() {
for loss_ppm in [0u32, 1, 12_345, 50_000, 1_000_000] {
let r = LossReport { loss_ppm };
assert_eq!(LossReport::decode(&r.encode()).unwrap(), r);
}
// Disjoint from the other control messages (type byte + length).
assert!(LossReport::decode(&RequestKeyframe.encode()).is_err());
assert!(RequestKeyframe::decode(&LossReport { loss_ppm: 0 }.encode()).is_err());
assert!(LossReport::decode(
&[LossReport { loss_ppm: 0 }.encode().as_slice(), &[0]].concat()
)
.is_err());
}
#[test]
fn window_loss_ppm_estimates_and_caps() {
// No traffic → 0. A clean window (nothing recovered) → 0.
assert_eq!(window_loss_ppm(0, 0, 0), 0);
assert_eq!(window_loss_ppm(0, 1000, 0), 0);
// 50 recovered of 1000 total (950 received + 50 recovered) = 5%.
assert_eq!(window_loss_ppm(50, 950, 0), 50_000);
// An unrecoverable frame adds the +5% bump (push FEC past the current cap).
assert_eq!(window_loss_ppm(50, 950, 1), 100_000);
// A total-loss window with a drop but nothing received still reports the bump, capped at 1e6.
assert_eq!(window_loss_ppm(0, 0, 3), 50_000);
assert!(window_loss_ppm(u64::MAX, 1, 9) <= 1_000_000);
}
#[test]
fn probe_messages_roundtrip() {
let req = ProbeRequest {
+12
View File
@@ -201,6 +201,18 @@ impl Session {
r.map(|_| ())
}
/// Host: live-adjust the FEC recovery percentage (adaptive FEC). Affects the next
/// [`submit_frame`](Self::submit_frame)/[`seal_frame`](Self::seal_frame); the receiver needs no
/// notification (each packet's header carries its block's data/recovery shard counts).
pub fn set_fec_percent(&mut self, pct: u8) {
self.packetizer.set_fec_percent(pct);
}
/// The current FEC recovery percentage (host side).
pub fn fec_percent(&self) -> u8 {
self.packetizer.fec_percent()
}
/// Host: drain one pending input event from the client, if any.
pub fn poll_input(&mut self) -> Result<Option<InputEvent>> {
if self.config.role != Role::Host {