feat(punktfunk/1): negotiable encoder bitrate + bandwidth speed-test probe
ci / rust (push) Has been cancelled

Two related additions to the native protocol, host-side (the client side of
each is exposed over the C ABI so the platform clients can wire it up).

Bitrate negotiation
- Hello/Welcome carry `bitrate_kbps` (appended trailing-byte field, back-compat:
  old peers decode 0 = host default). The client requests a rate; the host
  clamps it to [500 kbps, 500 Mbps] (or its 20 Mbps default when 0) and echoes
  the resolved value in Welcome. Replaces the hardcoded 20 Mbps NVENC bitrate in
  m3.rs — threaded through virtual_stream → build_pipeline → open_video, applied
  on the initial mode and every reconfigure rebuild.
- C ABI: punktfunk_connect_ex3(..., bitrate_kbps, ...) (ex2 delegates with 0);
  punktfunk_connection_bitrate() reads the resolved value.

Speed test (bandwidth probe)
- New typed control messages ProbeRequest{target_kbps,duration_ms} (0x20) /
  ProbeResult{bytes_sent,packets_sent,duration_ms} (0x21), plus a FLAG_PROBE
  packet flag. The client asks the host to burst zero-filled, FLAG_PROBE-tagged
  access units over the data plane at a target goodput for a duration (clamped
  ≤ 1 Gbps / ≤ 5 s), pacing by a bytes-allowed budget; video pauses for the
  burst. The host reports what it actually sent; the client measures received
  bytes + window → goodput and loss. Probe filler is never fed to the decoder
  (diverted in the connector pump and the reference client's poll loop).
- The host control task now multiplexes Reconfigure + ProbeRequest (inbound)
  and ProbeResult (outbound) over select!; a probe channel reaches the
  data-plane thread (both virtual and synthetic sources).
- Connector: NativeClient::request_probe()/probe_result() with an internal
  accumulator; C ABI punktfunk_connection_speed_test() +
  punktfunk_connection_probe_result() → PunktfunkProbeResult.
- punktfunk-client-rs gains `--bitrate KBPS` and `--speed-test KBPS:MS` (its own
  loop measures + logs goodput/loss) for loopback verification.

Validated on loopback (synthetic source): a 20 Mbps / 2 s probe measured
20050 kbps at 0% loss, bitrate negotiated (0→20000 and 50000→50000), and the
interleaved probe AUs were correctly excluded from frame verification
(mismatched=0). Wire codecs + trailing-byte back-compat have unit tests. C
header regenerated.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-11 18:44:47 +00:00
parent dcb2850c7c
commit 74819b1be8
7 changed files with 906 additions and 89 deletions
+106 -2
View File
@@ -39,7 +39,10 @@ use anyhow::{anyhow, Context, Result};
use punktfunk_core::config::GamepadPref;
use punktfunk_core::config::Role;
use punktfunk_core::input::{InputEvent, InputKind};
use punktfunk_core::quic::{endpoint, io, Hello, Reconfigure, Reconfigured, Start, Welcome};
use punktfunk_core::packet::FLAG_PROBE;
use punktfunk_core::quic::{
endpoint, io, Hello, ProbeRequest, ProbeResult, Reconfigure, Reconfigured, Start, Welcome,
};
use punktfunk_core::transport::UdpTransport;
use punktfunk_core::{CompositorPref, Mode, PunktfunkError, Session};
use std::io::Write;
@@ -67,6 +70,11 @@ struct Args {
compositor: CompositorPref,
/// `--gamepad NAME` — request a host virtual-pad backend (auto|xbox360|dualsense).
gamepad: GamepadPref,
/// `--bitrate KBPS` — request this encoder bitrate (kilobits/s); 0 = host default.
bitrate_kbps: u32,
/// `--speed-test KBPS:MS` — after the stream starts, ask the host for a `MS`-millisecond
/// bandwidth probe burst at `KBPS`, then report measured throughput + loss.
speed_test: Option<(u32, u32)>,
}
fn parse_mode(m: &str) -> Option<Mode> {
@@ -178,6 +186,11 @@ fn parse_args() -> Args {
name: get("--name").unwrap_or("punktfunk-client-rs").to_string(),
compositor,
gamepad,
bitrate_kbps: get("--bitrate").and_then(|s| s.parse().ok()).unwrap_or(0),
speed_test: get("--speed-test").and_then(|s| {
let (kbps, ms) = s.split_once(':')?;
Some((kbps.parse().ok()?, ms.parse().ok()?))
}),
}
}
@@ -263,6 +276,7 @@ async fn session(args: Args) -> Result<()> {
mode: args.mode,
compositor: args.compositor,
gamepad: args.gamepad,
bitrate_kbps: args.bitrate_kbps,
}
.encode(),
)
@@ -292,9 +306,18 @@ async fn session(args: Args) -> Result<()> {
)
.await?;
// Speed-test accumulators: the data-plane loop folds each FLAG_PROBE filler AU in here; the
// --speed-test reporter below reads them once the host's ProbeResult lands. first/last hold
// now_ns timestamps of the receive window (0 = unset).
let probe_recv_bytes = std::sync::Arc::new(std::sync::atomic::AtomicU64::new(0));
let probe_recv_packets = std::sync::Arc::new(std::sync::atomic::AtomicU64::new(0));
let probe_first_ns = std::sync::Arc::new(std::sync::atomic::AtomicU64::new(0));
let probe_last_ns = std::sync::Arc::new(std::sync::atomic::AtomicU64::new(0));
// Mid-stream renegotiation test: after a delay, ask the host to switch modes on the
// still-open control stream. The stream then carries new-mode AUs (IDR + in-band
// parameter sets) — ffprobe the --out file to see both resolutions.
// parameter sets) — ffprobe the --out file to see both resolutions. Mutually exclusive with
// --speed-test (both own the control stream).
if let Some((new_mode, after_secs)) = args.remode {
let mut rs = send;
let mut rr = recv;
@@ -319,6 +342,70 @@ async fn session(args: Args) -> Result<()> {
other => tracing::error!(?other, "bad Reconfigured"),
}
});
} else if let Some((target_kbps, duration_ms)) = args.speed_test {
// Bandwidth probe: after the stream warms up, ask the host to burst FLAG_PROBE filler;
// measure what arrives vs. what it reports sending.
let mut ss = send;
let mut sr = recv;
let (pb, pp, pf, pl) = (
probe_recv_bytes.clone(),
probe_recv_packets.clone(),
probe_first_ns.clone(),
probe_last_ns.clone(),
);
tokio::spawn(async move {
use std::sync::atomic::Ordering::Relaxed;
tokio::time::sleep(std::time::Duration::from_secs(2)).await; // let the stream warm up
tracing::info!(target_kbps, duration_ms, "requesting speed-test probe");
if io::write_msg(
&mut ss,
&ProbeRequest {
target_kbps,
duration_ms,
}
.encode(),
)
.await
.is_err()
{
tracing::error!("ProbeRequest write failed");
return;
}
let res = match io::read_msg(&mut sr).await.map(|b| ProbeResult::decode(&b)) {
Ok(Ok(r)) => r,
other => {
tracing::error!(?other, "bad ProbeResult");
return;
}
};
// The reliable result can beat the last UDP shards — let them reassemble.
tokio::time::sleep(std::time::Duration::from_millis(400)).await;
let recv_bytes = pb.load(Relaxed);
let recv_packets = pp.load(Relaxed);
let (first, last) = (pf.load(Relaxed), pl.load(Relaxed));
let window_ms = if first > 0 && last > first {
(last - first) / 1_000_000
} else {
0
};
let throughput_kbps = recv_bytes.saturating_mul(8).checked_div(window_ms).unwrap_or(0);
let loss_pct = if res.bytes_sent > 0 {
res.bytes_sent.saturating_sub(recv_bytes) as f64 / res.bytes_sent as f64 * 100.0
} else {
0.0
};
tracing::info!(
target_kbps,
host_sent_bytes = res.bytes_sent,
host_sent_packets = res.packets_sent,
recv_bytes,
recv_packets,
window_ms,
throughput_kbps,
loss_pct = format!("{loss_pct:.1}%"),
"SPEED TEST complete",
);
});
}
// Input plane: scripted events as QUIC datagrams (mouse square + 'A' taps), proving the
@@ -581,6 +668,12 @@ async fn session(args: Args) -> Result<()> {
let cfg = welcome.session_config(Role::Client);
let expected = welcome.frames;
let out_path = args.out.clone();
let (pb, pp, pf, pl) = (
probe_recv_bytes.clone(),
probe_recv_packets.clone(),
probe_first_ns.clone(),
probe_last_ns.clone(),
);
// Data plane on a blocking thread (native threads only on the frame path).
let result = tokio::task::spawn_blocking(move || -> Result<()> {
@@ -619,6 +712,17 @@ async fn session(args: Args) -> Result<()> {
match session.poll_frame() {
Ok(frame) => {
last_rx = std::time::Instant::now();
// Speed-test filler isn't video: fold it into the probe accumulators and skip
// verification / the --out sink.
if frame.flags & FLAG_PROBE as u32 != 0 {
use std::sync::atomic::Ordering::Relaxed;
let n = now_ns();
let _ = pf.compare_exchange(0, n, Relaxed, Relaxed);
pl.store(n, Relaxed);
pb.fetch_add(frame.data.len() as u64, Relaxed);
pp.fetch_add(1, Relaxed);
continue;
}
bytes += frame.data.len() as u64;
// The host stamps pts with its capture wall clock; same-host runs share it.
let lat = now_ns().saturating_sub(frame.pts_ns);