From 71f26083a66966842e80f7cb881f977aa9c73222 Mon Sep 17 00:00:00 2001 From: enricobuehler Date: Sat, 13 Jun 2026 19:18:40 +0000 Subject: [PATCH] bench(core): Tier-1 criterion microbenchmarks for the punktfunk/1 hot path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GPU-free, so they run in normal CI. Two layers: crypto/{seal,seal_in_place,open} on one MTU shard, and pipeline/{gf8,gf16}/{64KB,1MB} — a whole frame through the real per-frame path end to end over the loopback transport (FEC encode → AES-GCM seal → packetize → reassemble → FEC decode → open). Baselines on the dev box (RTX 5070 Ti VM): AES-GCM ~1.57 GiB/s/shard; gf16 ~418 MiB/s at 1 MB vs gf8 ~23 MiB/s (the GF(2^8) O(n^2) ceiling the GF(2^16) Leopard wall-breaker removes — exactly the kind of regression this should catch). The GPU capture/NVENC path is out of scope here (Tier 3). Co-Authored-By: Claude Opus 4.8 (1M context) --- Cargo.lock | 129 +++++++++++++++++++++- crates/punktfunk-core/Cargo.toml | 7 ++ crates/punktfunk-core/benches/pipeline.rs | 107 ++++++++++++++++++ 3 files changed, 242 insertions(+), 1 deletion(-) create mode 100644 crates/punktfunk-core/benches/pipeline.rs diff --git a/Cargo.lock b/Cargo.lock index cc28907..8e8d598 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -46,6 +46,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "anes" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" + [[package]] name = "annotate-snippets" version = "0.11.5" @@ -371,7 +377,7 @@ dependencies = [ "bitflags", "cexpr", "clang-sys", - "itertools", + "itertools 0.13.0", "proc-macro2", "quote", "regex", @@ -454,6 +460,12 @@ dependencies = [ "system-deps", ] +[[package]] +name = "cast" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" + [[package]] name = "cbc" version = "0.1.2" @@ -531,6 +543,33 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" +[[package]] +name = "ciborium" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" +dependencies = [ + "ciborium-io", + "ciborium-ll", + "serde", +] + +[[package]] +name = "ciborium-io" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" + +[[package]] +name = "ciborium-ll" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" +dependencies = [ + "ciborium-io", + "half", +] + [[package]] name = "cipher" version = "0.4.4" @@ -659,6 +698,40 @@ dependencies = [ "libc", ] +[[package]] +name = "criterion" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" +dependencies = [ + "anes", + "cast", + "ciborium", + "clap", + "criterion-plot", + "is-terminal", + "itertools 0.10.5", + "num-traits", + "once_cell", + "oorandom", + "regex", + "serde", + "serde_derive", + "serde_json", + "tinytemplate", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" +dependencies = [ + "cast", + "itertools 0.10.5", +] + [[package]] name = "crossbeam-deque" version = "0.8.6" @@ -684,6 +757,12 @@ version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" +[[package]] +name = "crunchy" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" + [[package]] name = "crypto-common" version = "0.1.7" @@ -1434,6 +1513,17 @@ dependencies = [ "tracing", ] +[[package]] +name = "half" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" +dependencies = [ + "cfg-if", + "crunchy", + "zerocopy", +] + [[package]] name = "hashbrown" version = "0.15.5" @@ -1604,12 +1694,32 @@ dependencies = [ "generic-array", ] +[[package]] +name = "is-terminal" +version = "0.4.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46" +dependencies = [ + "hermit-abi", + "libc", + "windows-sys 0.61.2", +] + [[package]] name = "is_terminal_polyfill" version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + [[package]] name = "itertools" version = "0.13.0" @@ -2045,6 +2155,12 @@ version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" +[[package]] +name = "oorandom" +version = "11.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" + [[package]] name = "opaque-debug" version = "0.3.1" @@ -2336,6 +2452,7 @@ dependencies = [ "aes-gcm", "bytes", "cbindgen", + "criterion", "fec-rs", "hmac", "libc", @@ -3293,6 +3410,16 @@ dependencies = [ "time-core", ] +[[package]] +name = "tinytemplate" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "tinyvec" version = "1.11.0" diff --git a/crates/punktfunk-core/Cargo.toml b/crates/punktfunk-core/Cargo.toml index 35422e0..2500604 100644 --- a/crates/punktfunk-core/Cargo.toml +++ b/crates/punktfunk-core/Cargo.toml @@ -56,6 +56,13 @@ libc = "0.2" [dev-dependencies] proptest = "1" +# Tier-1 microbenchmarks (benches/pipeline.rs). default-features off → no plotters/HTML (headless +# CI just needs the measurement + target/criterion/**/estimates.json for the regression compare). +criterion = { version = "0.5", default-features = false, features = ["cargo_bench_support"] } + +[[bench]] +name = "pipeline" +harness = false [build-dependencies] cbindgen = "0.29" diff --git a/crates/punktfunk-core/benches/pipeline.rs b/crates/punktfunk-core/benches/pipeline.rs new file mode 100644 index 0000000..4abc929 --- /dev/null +++ b/crates/punktfunk-core/benches/pipeline.rs @@ -0,0 +1,107 @@ +//! Tier-1 microbenchmarks for the punktfunk/1 hot path — GPU-free, so they run in normal CI. +//! +//! Two layers: +//! - `crypto/*` — the isolated AES-128-GCM primitives on one ~MTU shard. +//! - `pipeline/*`— a whole frame through the real per-frame path end to end over the in-process +//! loopback transport: FEC encode → AES-GCM seal → packetize → (loopback) → reassemble → +//! FEC decode → open. This is what a throughput/latency regression in the core would show up in. +//! +//! The GPU capture/NVENC encode path is deliberately out of scope here (no GPU in CI) — that's the +//! Tier-3 stream benchmark on a self-hosted GPU runner. Run locally with `cargo bench -p punktfunk-core`. + +use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput}; +use punktfunk_core::config::{Config, FecConfig, FecScheme, ProtocolPhase, Role}; +use punktfunk_core::crypto::SessionCrypto; +use punktfunk_core::session::Session; +use punktfunk_core::transport::loopback_pair; + +const TAG_LEN: usize = 16; // AES-GCM authentication tag +const SHARD: usize = 1452; // ~one MTU-sized data shard + +fn cfg(role: Role, scheme: FecScheme) -> Config { + Config { + role, + phase: match scheme { + FecScheme::Gf8 => ProtocolPhase::P1GameStream, + FecScheme::Gf16 => ProtocolPhase::P2Punktfunk, + }, + fec: FecConfig { + scheme, + fec_percent: 25, + // GF(2^8) is capped at ≤255 shards/block (Moonlight-compatible); GF(2^16) Leopard goes + // far higher. Use a realistic, valid block size for each. + max_data_per_block: match scheme { + FecScheme::Gf8 => 128, + FecScheme::Gf16 => 4096, + }, + }, + shard_payload: SHARD, + max_frame_bytes: 8 * 1024 * 1024, + encrypt: true, // bench the real path — crypto is always on for punktfunk/1 + key: [7u8; 16], + salt: [1, 2, 3, 4], + loopback_drop_period: 0, // throughput run: no induced loss (loss-harness covers recovery) + } +} + +fn bench_crypto(c: &mut Criterion) { + let host = SessionCrypto::new(&[7u8; 16], [1, 2, 3, 4], Role::Host); + let client = SessionCrypto::new(&[7u8; 16], [1, 2, 3, 4], Role::Client); + let payload = vec![0xABu8; SHARD]; + let sealed = host.seal(0, &payload).unwrap(); + + let mut g = c.benchmark_group("crypto"); + g.throughput(Throughput::Bytes(SHARD as u64)); + g.bench_function("seal", |b| { + let mut seq = 0u64; + b.iter(|| { + let ct = host.seal(seq, black_box(&payload)).unwrap(); + seq += 1; + black_box(ct) + }) + }); + g.bench_function("seal_in_place", |b| { + let mut seq = 0u64; + let mut buf = vec![0xABu8; SHARD + TAG_LEN]; + b.iter(|| { + host.seal_in_place(seq, black_box(&mut buf)).unwrap(); + seq += 1; + }) + }); + g.bench_function("open", |b| { + b.iter(|| black_box(client.open(0, black_box(&sealed)).unwrap())) + }); + g.finish(); +} + +fn bench_pipeline(c: &mut Criterion) { + let mut g = c.benchmark_group("pipeline"); + // 64 KB ≈ a steady-state P-frame; 1 MB ≈ a keyframe/scene-cut. Both FEC schemes (GF(2^8) + // GameStream-compat vs GF(2^16) Leopard, the wall-breaker). + for scheme in [FecScheme::Gf8, FecScheme::Gf16] { + let label = match scheme { + FecScheme::Gf8 => "gf8", + FecScheme::Gf16 => "gf16", + }; + for &size in &[64 * 1024usize, 1024 * 1024] { + g.throughput(Throughput::Bytes(size as u64)); + g.bench_with_input(BenchmarkId::new(label, size), &size, |b, &size| { + let (h, cl) = loopback_pair(0, 0); + let mut host = Session::new(cfg(Role::Host, scheme), Box::new(h)).unwrap(); + let mut client = Session::new(cfg(Role::Client, scheme), Box::new(cl)).unwrap(); + let frame = vec![0x5Au8; size]; + let mut seq = 0u64; + b.iter(|| { + host.submit_frame(black_box(&frame), seq, 0).unwrap(); + let f = client.poll_frame().unwrap(); + seq += 1; + black_box(f) + }) + }); + } + } + g.finish(); +} + +criterion_group!(benches, bench_crypto, bench_pipeline); +criterion_main!(benches);