diff --git a/Cargo.lock b/Cargo.lock index 6e3fed7..9192bef 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -37,17 +37,6 @@ dependencies = [ "subtle", ] -[[package]] -name = "ahash" -version = "0.7.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "891477e0c6a8957309ee5c45a6368af3ae14bb510732d2684ffa19af310920f9" -dependencies = [ - "getrandom 0.2.17", - "once_cell", - "version_check", -] - [[package]] name = "aho-corasick" version = "1.1.4" @@ -357,7 +346,7 @@ version = "0.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f49d8fed880d473ea71efb9bf597651e77201bdd4893efe54c9e5d65ae04ce6f" dependencies = [ - "bitflags 2.13.0", + "bitflags", "cexpr", "clang-sys", "itertools", @@ -376,7 +365,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895" dependencies = [ "annotate-snippets", - "bitflags 2.13.0", + "bitflags", "cexpr", "clang-sys", "itertools", @@ -403,12 +392,6 @@ version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7" -[[package]] -name = "bitflags" -version = "1.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" - [[package]] name = "bitflags" version = "2.13.0" @@ -650,6 +633,25 @@ dependencies = [ "libc", ] +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-utils" version = "0.8.21" @@ -838,13 +840,20 @@ version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9f1f227452a390804cdb637b74a86990f2a7d7ba4b7d5693aac9b4dd6defd8d6" +[[package]] +name = "fec-rs" +version = "0.1.0" +dependencies = [ + "rayon", +] + [[package]] name = "ffmpeg-next" version = "7.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "da02698288e0275e442a47fc12ca26d50daf0d48b15398ba5906f20ac2e2a9f9" dependencies = [ - "bitflags 2.13.0", + "bitflags", "ffmpeg-sys-next", "libc", ] @@ -1078,15 +1087,6 @@ dependencies = [ "tracing", ] -[[package]] -name = "hashbrown" -version = "0.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" -dependencies = [ - "ahash", -] - [[package]] name = "hashbrown" version = "0.15.5" @@ -1239,15 +1239,6 @@ dependencies = [ "generic-array", ] -[[package]] -name = "instant" -version = "0.1.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222" -dependencies = [ - "cfg-if", -] - [[package]] name = "is_terminal_polyfill" version = "1.70.2" @@ -1381,7 +1372,7 @@ version = "0.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6b8cfa2a7656627b4c92c6b9ef929433acd673d5ab3708cda1b18478ac00df4" dependencies = [ - "bitflags 2.13.0", + "bitflags", "cc", "convert_case", "cookie-factory", @@ -1431,15 +1422,6 @@ dependencies = [ "lumen-core", ] -[[package]] -name = "lru" -version = "0.7.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e999beba7b6e8345721bd280141ed958096a2e4abdf74f67ff4ce49b4b54e47a" -dependencies = [ - "hashbrown 0.12.3", -] - [[package]] name = "lru-slab" version = "0.1.2" @@ -1462,10 +1444,10 @@ dependencies = [ "aes-gcm", "bytes", "cbindgen", + "fec-rs", "proptest", "quinn", "rand 0.9.4", - "reed-solomon-erasure", "reed-solomon-simd", "thiserror 2.0.18", "tokio", @@ -1593,7 +1575,7 @@ version = "0.30.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "74523f3a35e05aba87a1d978330aef40f67b0304ac79c1c00b294c9830543db6" dependencies = [ - "bitflags 2.13.0", + "bitflags", "cfg-if", "cfg_aliases", "libc", @@ -1757,17 +1739,6 @@ version = "2.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f38d5652c16fde515bb1ecef450ab0f6a219d619a7274976324d5e377f7dceba" -[[package]] -name = "parking_lot" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99" -dependencies = [ - "instant", - "lock_api", - "parking_lot_core 0.8.6", -] - [[package]] name = "parking_lot" version = "0.12.5" @@ -1775,21 +1746,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a" dependencies = [ "lock_api", - "parking_lot_core 0.9.12", -] - -[[package]] -name = "parking_lot_core" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60a2cfe6f0ad2bfc16aefa463b497d5c7a5ecd44a23efa72aa342d90177356dc" -dependencies = [ - "cfg-if", - "instant", - "libc", - "redox_syscall 0.2.16", - "smallvec", - "winapi", + "parking_lot_core", ] [[package]] @@ -1800,7 +1757,7 @@ checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" dependencies = [ "cfg-if", "libc", - "redox_syscall 0.5.18", + "redox_syscall", "smallvec", "windows-link", ] @@ -1843,7 +1800,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9688b89abf11d756499f7c6190711d6dbe5a3acdb30c8fbf001d6596d06a8d44" dependencies = [ "anyhow", - "bitflags 2.13.0", + "bitflags", "libc", "libspa", "libspa-sys", @@ -1954,7 +1911,7 @@ checksum = "4b45fcc2344c680f5025fe57779faef368840d0bd1f42f216291f0dc4ace4744" dependencies = [ "bit-set", "bit-vec", - "bitflags 2.13.0", + "bitflags", "num-traits", "rand 0.9.4", "rand_chacha 0.9.0", @@ -2126,6 +2083,26 @@ dependencies = [ "rand_core 0.9.5", ] +[[package]] +name = "rayon" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb39b166781f92d482534ef4b4b1b2568f42613b53e5b6c160e24cfbfa30926d" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + [[package]] name = "rcgen" version = "0.13.2" @@ -2145,35 +2122,13 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08ad765b21a08b1a8e5cdce052719188a23772bcbefb3c439f0baaf62c56ceac" -[[package]] -name = "redox_syscall" -version = "0.2.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" -dependencies = [ - "bitflags 1.3.2", -] - [[package]] name = "redox_syscall" version = "0.5.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" dependencies = [ - "bitflags 2.13.0", -] - -[[package]] -name = "reed-solomon-erasure" -version = "6.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7263373d500d4d4f505d43a2a662d475a894aa94503a1ee28e9188b5f3960d4f" -dependencies = [ - "libm", - "lru", - "parking_lot 0.11.2", - "smallvec", - "spin", + "bitflags", ] [[package]] @@ -2278,7 +2233,7 @@ version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190" dependencies = [ - "bitflags 2.13.0", + "bitflags", "errno", "libc", "linux-raw-sys", @@ -2434,7 +2389,7 @@ version = "3.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b7f4bc775c73d9a02cde8bf7b2ec4c9d12743edf609006c7facc23998404cd1d" dependencies = [ - "bitflags 2.13.0", + "bitflags", "core-foundation", "core-foundation-sys", "libc", @@ -2829,7 +2784,7 @@ dependencies = [ "bytes", "libc", "mio", - "parking_lot 0.12.5", + "parking_lot", "pin-project-lite", "signal-hook-registry", "socket2", @@ -3257,7 +3212,7 @@ version = "0.244.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" dependencies = [ - "bitflags 2.13.0", + "bitflags", "hashbrown 0.15.5", "indexmap", "semver", @@ -3282,7 +3237,7 @@ version = "0.31.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "645c7c96bb74690c3189b5c9cb4ca1627062bb23693a4fad9d8c3de958260144" dependencies = [ - "bitflags 2.13.0", + "bitflags", "rustix", "wayland-backend", "wayland-scanner", @@ -3294,7 +3249,7 @@ version = "0.32.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "563a85523cade2429938e790815fd7319062103b9f4a2dc806e9b53b95982d8f" dependencies = [ - "bitflags 2.13.0", + "bitflags", "wayland-backend", "wayland-client", "wayland-scanner", @@ -3306,7 +3261,7 @@ version = "0.3.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6e9567599ef23e09b8dad6e429e5738d4509dfc46b3b21f32841a304d16b29c8" dependencies = [ - "bitflags 2.13.0", + "bitflags", "wayland-backend", "wayland-client", "wayland-protocols", @@ -3319,7 +3274,7 @@ version = "0.3.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eb04e52f7836d7c7976c78ca0250d61e33873c34156a2a1fc9474828ec268234" dependencies = [ - "bitflags 2.13.0", + "bitflags", "wayland-backend", "wayland-client", "wayland-protocols", @@ -3365,22 +3320,6 @@ dependencies = [ "rustls-pki-types", ] -[[package]] -name = "winapi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] - -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - [[package]] name = "winapi-util" version = "0.1.11" @@ -3390,12 +3329,6 @@ dependencies = [ "windows-sys 0.61.2", ] -[[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - [[package]] name = "windows-link" version = "0.2.1" @@ -3703,7 +3636,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" dependencies = [ "anyhow", - "bitflags 2.13.0", + "bitflags", "indexmap", "log", "serde", diff --git a/crates/lumen-core/Cargo.toml b/crates/lumen-core/Cargo.toml index 9a61c4f..0d1bf85 100644 --- a/crates/lumen-core/Cargo.toml +++ b/crates/lumen-core/Cargo.toml @@ -23,7 +23,11 @@ quic = ["dep:quinn", "dep:tokio"] [dependencies] reed-solomon-simd = "3.1" # GF(2^16) Leopard-RS, SIMD, O(n log n) — the wall-breaker (P2) -reed-solomon-erasure = "6.0" # GF(2^8) classic RS — GameStream/Moonlight compat (P1) +# Vendored fork of fec-rs: GF(2^8) classic RS with the *Cauchy* generator matrix +# (M[j][i] = inv[(m+i)^j]) — byte-identical to the `nanors` library Moonlight uses, so our +# parity is decodable by a stock Moonlight client. (reed-solomon-erasure is Vandermonde and is +# NOT interoperable.) See vendor/fec-rs/LICENSE (BSD-2-Clause). +fec-rs = { path = "vendor/fec-rs" } aes-gcm = "0.10" # AES-128-GCM session crypto, matches GameStream zerocopy = { version = "0.8", features = ["derive"] } bytes = "1" diff --git a/crates/lumen-core/src/fec/gf8.rs b/crates/lumen-core/src/fec/gf8.rs index 4443d0f..8792ea6 100644 --- a/crates/lumen-core/src/fec/gf8.rs +++ b/crates/lumen-core/src/fec/gf8.rs @@ -1,9 +1,12 @@ -//! GF(2⁸) classic Reed–Solomon backend (`reed-solomon-erasure`), equivalent to the -//! `nanors` library Moonlight uses. Hard ceiling: data + recovery ≤ 255 shards/block. +//! GF(2⁸) classic Reed–Solomon backend (vendored `fec-rs`). Uses the **Cauchy** generator +//! matrix `M[j][i] = inv[(m+i)^j]` over GF(2⁸) (poly 0x1d) — byte-identical to the `nanors` +//! library Moonlight uses, so the parity this produces is recoverable by a stock Moonlight +//! client (unlike Vandermonde RS, whose parity is not interoperable). Hard ceiling: data + +//! recovery ≤ 255 shards/block. use super::{validate_block_shape, validate_encode_shape, ErasureCoder, FecError}; use crate::config::FecScheme; -use reed_solomon_erasure::galois_8::ReedSolomon; +use fec_rs::ReedSolomon; pub struct Gf8Coder; @@ -21,7 +24,7 @@ impl ErasureCoder for Gf8Coder { let shard_len = data[0].len(); let rs = ReedSolomon::new(k, recovery_count) .map_err(|_| FecError::Config("invalid GF(2^8) shard counts"))?; - // reed-solomon-erasure fills parity in place: shards = data || zeroed parity. + // fec-rs fills parity in place: shards = data || zeroed parity. let mut shards: Vec> = Vec::with_capacity(k + recovery_count); shards.extend_from_slice(data); shards.resize_with(k + recovery_count, || vec![0u8; shard_len]); @@ -69,3 +72,69 @@ fn collect_originals( } Ok(out) } + +#[cfg(test)] +mod tests { + use super::*; + + /// Locks byte-exact compatibility with Moonlight's `nanors` (Cauchy matrix + /// `M[j][i] = inv[(m+i)^j]`, GF(2⁸) poly 0x1d). If the backend ever switched matrices, + /// these vectors would break and our parity would no longer be Moonlight-decodable. + #[test] + fn nanors_exact_parity_vectors() { + let coder = Gf8Coder; + // The definitive nanors vector (k=4, m=2): single-byte shards [10,20,30,40] → [136, 0]. + let data = vec![vec![10u8], vec![20], vec![30], vec![40]]; + let parity = coder.encode(&data, 2).unwrap(); + assert_eq!(parity, vec![vec![136u8], vec![0u8]]); + + // Cross-check independently from the Cauchy parity rows (proves the matrix, not just a + // memorized output): parity[j] = XOR_i M[j][i] · data[i] over GF(2⁸). + let rows = [[142u8, 244, 71, 167], [244, 142, 167, 71]]; + let din = [10u8, 20, 30, 40]; + for (j, row) in rows.iter().enumerate() { + let expect = row + .iter() + .zip(din) + .fold(0u8, |acc, (&m, d)| acc ^ gf_mul(m, d)); + assert_eq!(parity[j][0], expect, "parity row {j}"); + } + } + + /// Round-trip: erase `m` data shards and confirm reconstruction recovers the originals. + #[test] + fn recovers_erased_data_shards() { + let coder = Gf8Coder; + let data: Vec> = (0..6).map(|i| vec![i as u8; 8]).collect(); + let parity = coder.encode(&data, 3).unwrap(); + let mut received: Vec>> = data + .iter() + .cloned() + .map(Some) + .chain(parity.into_iter().map(Some)) + .collect(); + // Erase 3 data shards (the FEC budget) + nothing else. + received[1] = None; + received[3] = None; + received[5] = None; + let recovered = coder.reconstruct(6, 3, &mut received).unwrap(); + assert_eq!(recovered, data); + } + + /// GF(2⁸) multiply, reduction poly 0x1d — independent of the backend. + fn gf_mul(mut a: u8, mut b: u8) -> u8 { + let mut p = 0u8; + for _ in 0..8 { + if b & 1 != 0 { + p ^= a; + } + let hi = a & 0x80; + a <<= 1; + if hi != 0 { + a ^= 0x1d; + } + b >>= 1; + } + p + } +} diff --git a/crates/lumen-core/vendor/fec-rs/Cargo.toml b/crates/lumen-core/vendor/fec-rs/Cargo.toml new file mode 100644 index 0000000..1df6dfc --- /dev/null +++ b/crates/lumen-core/vendor/fec-rs/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "fec-rs" +version = "0.1.0" +edition = "2021" +description = "A pure Rust Reed-Solomon erasure coding library with runtime SIMD acceleration" +license = "BSD-2-Clause" +repository = "https://github.com/hgaiser/fec-rs" +keywords = ["reed-solomon", "erasure", "coding", "fec", "simd"] +categories = ["algorithms", "encoding"] +readme = "README.md" + +[dependencies] +rayon = { version = "1", optional = true } + +[features] +default = [] +parallel = ["rayon"] diff --git a/crates/lumen-core/vendor/fec-rs/LICENSE b/crates/lumen-core/vendor/fec-rs/LICENSE new file mode 100644 index 0000000..0300fa9 --- /dev/null +++ b/crates/lumen-core/vendor/fec-rs/LICENSE @@ -0,0 +1,24 @@ +BSD 2-Clause License + +Copyright (c) 2026, Hans Gaiser + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/crates/lumen-core/vendor/fec-rs/README.md b/crates/lumen-core/vendor/fec-rs/README.md new file mode 100644 index 0000000..009eb18 --- /dev/null +++ b/crates/lumen-core/vendor/fec-rs/README.md @@ -0,0 +1,73 @@ +# fec-rs + +[![CI](https://github.com/hgaiser/fec-rs/workflows/CI/badge.svg)](https://github.com/hgaiser/fec-rs/actions) +[![Crates.io](https://img.shields.io/crates/v/fec-rs.svg)](https://crates.io/crates/fec-rs) +[![Documentation](https://docs.rs/fec-rs/badge.svg)](https://docs.rs/fec-rs) + +A pure Rust Reed-Solomon erasure coding library with runtime SIMD acceleration. + +## Features + +- **Pure Rust** — No C/C++ dependencies or FFI. Everything is implemented in safe Rust + (with targeted `unsafe` for SIMD intrinsics). +- **Runtime SIMD detection** — Automatically uses the fastest available instruction set + via `std::is_x86_feature_detected!`. A single binary works on all x86_64 systems. +- **GF(2^8)** — Operates over the Galois field GF(2^8) with generating polynomial 29 (0x1D), + compatible with the Moonlight streaming protocol. +- **Shard-by-shard encoding** — Incremental encoding via `ShardByShard` for streaming use cases. +- **Reconstruction** — Reconstruct missing data and/or parity shards from any sufficient subset. + +## SIMD Acceleration + +On x86_64, the library automatically detects CPU features at runtime and uses +the best available instruction set: + +- **GFNI + AVX2** — Single-instruction GF multiply on 32 bytes (Intel Alder Lake+, AMD Zen 4+) +- **AVX2** — VPSHUFB split-table nibble lookup on 32 bytes +- **GFNI + SSE** — Single-instruction GF multiply on 16 bytes +- **SSSE3** — VPSHUFB split-table nibble lookup on 16 bytes +- **Scalar** — Lookup table fallback + +## Parallel Encoding + +Enable the `parallel` feature for optional rayon-based parallel encoding: + +```toml +fec-rs = { version = "0.1", features = ["parallel"] } +``` + +When enabled, large encode workloads automatically distribute parity shard +computation across threads. Small workloads use the sequential path to avoid +overhead. + +## Usage + +```rust +use fec_rs::ReedSolomon; + +let rs = ReedSolomon::new(4, 2).unwrap(); + +let mut shards: Vec> = vec![ + vec![0, 1, 2, 3], + vec![4, 5, 6, 7], + vec![8, 9, 10, 11], + vec![12, 13, 14, 15], + vec![0, 0, 0, 0], // parity shard 1 + vec![0, 0, 0, 0], // parity shard 2 +]; + +// Encode parity +rs.encode(&mut shards).unwrap(); + +// Verify +assert!(rs.verify(&shards).unwrap()); + +// Simulate loss of shard 0 +let mut recovery: Vec>> = shards.into_iter().map(Some).collect(); +recovery[0] = None; + +// Reconstruct +rs.reconstruct(&mut recovery).unwrap(); +``` + +License: BSD-2-Clause diff --git a/crates/lumen-core/vendor/fec-rs/build.rs b/crates/lumen-core/vendor/fec-rs/build.rs new file mode 100644 index 0000000..d6d9a02 --- /dev/null +++ b/crates/lumen-core/vendor/fec-rs/build.rs @@ -0,0 +1,200 @@ +#![allow(clippy::needless_range_loop)] + +use std::env; +use std::fs::File; +use std::io::Write; +use std::path::Path; + +const FIELD_SIZE: usize = 256; +const GENERATING_POLYNOMIAL: usize = 29; + +fn gen_log_table(polynomial: usize) -> [u8; FIELD_SIZE] { + let mut result = [0u8; FIELD_SIZE]; + let mut b: usize = 1; + + for log in 0..FIELD_SIZE - 1 { + result[b] = log as u8; + b <<= 1; + if FIELD_SIZE <= b { + b = (b - FIELD_SIZE) ^ polynomial; + } + } + + result +} + +const EXP_TABLE_SIZE: usize = FIELD_SIZE * 2 - 2; + +fn gen_exp_table(log_table: &[u8; FIELD_SIZE]) -> [u8; EXP_TABLE_SIZE] { + let mut result = [0u8; EXP_TABLE_SIZE]; + + for i in 1..FIELD_SIZE { + let log = log_table[i] as usize; + result[log] = i as u8; + result[log + FIELD_SIZE - 1] = i as u8; + } + + result +} + +fn multiply(log_table: &[u8; FIELD_SIZE], exp_table: &[u8; EXP_TABLE_SIZE], a: u8, b: u8) -> u8 { + if a == 0 || b == 0 { + 0 + } else { + let log_a = log_table[a as usize]; + let log_b = log_table[b as usize]; + let log_result = log_a as usize + log_b as usize; + exp_table[log_result] + } +} + +fn gen_mul_table( + log_table: &[u8; FIELD_SIZE], + exp_table: &[u8; EXP_TABLE_SIZE], +) -> [[u8; FIELD_SIZE]; FIELD_SIZE] { + let mut result = [[0u8; FIELD_SIZE]; FIELD_SIZE]; + + for a in 0..FIELD_SIZE { + for b in 0..FIELD_SIZE { + result[a][b] = multiply(log_table, exp_table, a as u8, b as u8); + } + } + + result +} + +fn gen_mul_table_half( + log_table: &[u8; FIELD_SIZE], + exp_table: &[u8; EXP_TABLE_SIZE], +) -> ([[u8; 16]; FIELD_SIZE], [[u8; 16]; FIELD_SIZE]) { + let mut low = [[0u8; 16]; FIELD_SIZE]; + let mut high = [[0u8; 16]; FIELD_SIZE]; + + for a in 0..FIELD_SIZE { + for b in 0..FIELD_SIZE { + let mut result = 0; + if a != 0 && b != 0 { + let log_a = log_table[a]; + let log_b = log_table[b]; + result = exp_table[log_a as usize + log_b as usize]; + } + if (b & 0x0F) == b { + low[a][b] = result; + } + if (b & 0xF0) == b { + high[a][b >> 4] = result; + } + } + } + (low, high) +} + +/// Generate the GFNI affine matrix table. +/// +/// For each constant `c` in GF(2^8), compute a u64-packed 8x8 binary matrix +/// such that `vgf2p8affineqb(x, matrix, 0)` produces `c * x` in our GF(2^8). +/// +/// vgf2p8affineqb semantics: +/// result_bit[i] = popcount(x AND qword_byte[7-i]) mod 2 +/// where i goes from 0 (LSB) to 7 (MSB). +/// +/// Matrix packing: qword byte[7] = row for output bit 7 (MSB), +/// qword byte[0] = row for output bit 0 (LSB). +fn gen_gfni_table( + log_table: &[u8; FIELD_SIZE], + exp_table: &[u8; EXP_TABLE_SIZE], +) -> [u64; FIELD_SIZE] { + let mut result = [0u64; FIELD_SIZE]; + + for c in 0..FIELD_SIZE { + // Build row bytes for each output bit. + // row_for_bit_i = mask where bit j is set iff input bit j contributes to output bit i. + // M[i][j] = bit_i(c * (1 << j)) + let mut rows = [0u8; 8]; + for j in 0..8u8 { + let basis = 1u8 << j; // input with only bit j set + let product = multiply(log_table, exp_table, c as u8, basis); + // product's bit i tells us M[i][j] + for i in 0..8u8 { + if (product >> i) & 1 == 1 { + rows[i as usize] |= 1 << j; + } + } + } + + // Pack into u64: byte[7-i] = rows[i] + // vgf2p8affineqb: result_bit[i] = popcount(x AND byte[7-i]) mod 2 + // We want result_bit[i] = bit i of (c*x), so byte[7-i] = rows[i]. + let mut matrix: u64 = 0; + for i in 0..8u32 { + matrix |= (rows[i as usize] as u64) << ((7 - i) * 8); + } + result[c] = matrix; + } + + result +} + +fn write_1d_table(f: &mut File, table: &[u8], name: &str) { + let len = table.len(); + write!(f, "pub static {name}: [u8; {len}] = [").unwrap(); + for v in table { + write!(f, "{v}, ").unwrap(); + } + writeln!(f, "];").unwrap(); +} + +fn write_2d_table(f: &mut File, table: &[[u8; 16]; FIELD_SIZE], name: &str) { + let rows = table.len(); + let cols = table[0].len(); + write!(f, "pub static {name}: [[u8; {cols}]; {rows}] = [").unwrap(); + for row in table { + write!(f, "[").unwrap(); + for v in row { + write!(f, "{v}, ").unwrap(); + } + writeln!(f, "],").unwrap(); + } + writeln!(f, "];").unwrap(); +} + +fn write_mul_table(f: &mut File, table: &[[u8; FIELD_SIZE]; FIELD_SIZE]) { + let rows = table.len(); + let cols = table[0].len(); + write!(f, "pub static MUL_TABLE: [[u8; {cols}]; {rows}] = [").unwrap(); + for row in table { + write!(f, "[").unwrap(); + for v in row { + write!(f, "{v}, ").unwrap(); + } + writeln!(f, "],").unwrap(); + } + writeln!(f, "];").unwrap(); +} + +fn write_gfni_table(f: &mut File, table: &[u64; FIELD_SIZE]) { + write!(f, "pub static GFNI_TABLE: [u64; {}] = [", FIELD_SIZE).unwrap(); + for v in table { + write!(f, "0x{v:016X}, ").unwrap(); + } + writeln!(f, "];").unwrap(); +} + +fn main() { + let log_table = gen_log_table(GENERATING_POLYNOMIAL); + let exp_table = gen_exp_table(&log_table); + let mul_table = gen_mul_table(&log_table, &exp_table); + let (mul_table_low, mul_table_high) = gen_mul_table_half(&log_table, &exp_table); + let gfni_table = gen_gfni_table(&log_table, &exp_table); + + let out_dir = env::var("OUT_DIR").unwrap(); + let dest_path = Path::new(&out_dir).join("tables.rs"); + let mut f = File::create(&dest_path).unwrap(); + + write_1d_table(&mut f, &log_table, "LOG_TABLE"); + write_1d_table(&mut f, &exp_table, "EXP_TABLE"); + write_mul_table(&mut f, &mul_table); + write_2d_table(&mut f, &mul_table_low, "MUL_TABLE_LOW"); + write_2d_table(&mut f, &mul_table_high, "MUL_TABLE_HIGH"); + write_gfni_table(&mut f, &gfni_table); +} diff --git a/crates/lumen-core/vendor/fec-rs/src/errors.rs b/crates/lumen-core/vendor/fec-rs/src/errors.rs new file mode 100644 index 0000000..d1681af --- /dev/null +++ b/crates/lumen-core/vendor/fec-rs/src/errors.rs @@ -0,0 +1,61 @@ +use core::fmt; + +#[derive(PartialEq, Debug, Clone, Copy)] +pub enum Error { + TooFewShards, + TooManyShards, + TooFewDataShards, + TooManyDataShards, + TooFewParityShards, + TooManyParityShards, + TooFewBufferShards, + TooManyBufferShards, + IncorrectShardSize, + TooFewShardsPresent, + EmptyShard, + InvalidIndex, + InvalidParityMatrix, + SingularMatrix, +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Error::TooFewShards => write!(f, "Too few shards"), + Error::TooManyShards => write!(f, "Too many shards"), + Error::TooFewDataShards => write!(f, "Too few data shards"), + Error::TooManyDataShards => write!(f, "Too many data shards"), + Error::TooFewParityShards => write!(f, "Too few parity shards"), + Error::TooManyParityShards => write!(f, "Too many parity shards"), + Error::TooFewBufferShards => write!(f, "Too few buffer shards"), + Error::TooManyBufferShards => write!(f, "Too many buffer shards"), + Error::IncorrectShardSize => write!(f, "Incorrect shard size"), + Error::TooFewShardsPresent => write!(f, "Too few shards present for reconstruction"), + Error::EmptyShard => write!(f, "Empty shard"), + Error::InvalidIndex => write!(f, "Invalid index"), + Error::InvalidParityMatrix => write!(f, "Invalid parity matrix"), + Error::SingularMatrix => write!(f, "Singular matrix"), + } + } +} + +impl std::error::Error for Error {} + +#[derive(PartialEq, Debug, Clone, Copy)] +pub enum SBSError { + TooManyCalls, + LeftoverShards, + RSError(Error), +} + +impl fmt::Display for SBSError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + SBSError::TooManyCalls => write!(f, "Too many calls"), + SBSError::LeftoverShards => write!(f, "Leftover shards"), + SBSError::RSError(e) => write!(f, "{e}"), + } + } +} + +impl std::error::Error for SBSError {} diff --git a/crates/lumen-core/vendor/fec-rs/src/galois.rs b/crates/lumen-core/vendor/fec-rs/src/galois.rs new file mode 100644 index 0000000..1245494 --- /dev/null +++ b/crates/lumen-core/vendor/fec-rs/src/galois.rs @@ -0,0 +1,636 @@ +include!(concat!(env!("OUT_DIR"), "/tables.rs")); + +/// Add two GF(2^8) elements (XOR). +#[inline(always)] +pub fn add(a: u8, b: u8) -> u8 { + a ^ b +} + +/// Multiply two GF(2^8) elements using lookup table. +#[inline(always)] +pub fn mul(a: u8, b: u8) -> u8 { + MUL_TABLE[a as usize][b as usize] +} + +/// Divide a by b in GF(2^8). Panics if b is 0. +#[inline(always)] +pub fn div(a: u8, b: u8) -> u8 { + if a == 0 { + return 0; + } + assert!(b != 0, "Division by zero in GF(2^8)"); + let log_a = LOG_TABLE[a as usize] as isize; + let log_b = LOG_TABLE[b as usize] as isize; + let mut log_result = log_a - log_b; + if log_result < 0 { + log_result += 255; + } + EXP_TABLE[log_result as usize] +} + +/// Compute a^n in GF(2^8). +#[inline(always)] +pub fn exp(a: u8, n: usize) -> u8 { + if n == 0 { + return 1; + } + if a == 0 { + return 0; + } + let log_a = LOG_TABLE[a as usize] as usize; + let log_result = log_a * (n % 255) % 255; + EXP_TABLE[log_result] +} + +/// Multiply each element of `input` by `c` and write to `out`. +/// +/// Uses SIMD acceleration when available: +/// - GFNI + AVX2 (best: single-instruction GF multiply on 32 bytes) +/// - AVX2 VPSHUFB (split-table nibble lookup on 32 bytes) +/// - GFNI + SSE (single-instruction GF multiply on 16 bytes) +/// - SSSE3 VPSHUFB (split-table nibble lookup on 16 bytes) +/// - Scalar fallback +#[inline] +pub fn mul_slice(c: u8, input: &[u8], out: &mut [u8]) { + assert_eq!(input.len(), out.len()); + if input.is_empty() || c == 0 { + out.iter_mut().for_each(|o| *o = 0); + return; + } + if c == 1 { + out.copy_from_slice(input); + return; + } + + #[cfg(target_arch = "x86_64")] + { + if is_x86_feature_detected!("gfni") && is_x86_feature_detected!("avx2") { + unsafe { + mul_slice_gfni_avx2(c, input, out); + } + return; + } + if is_x86_feature_detected!("avx2") { + unsafe { + mul_slice_avx2(c, input, out); + } + return; + } + if is_x86_feature_detected!("gfni") { + unsafe { + mul_slice_gfni_sse(c, input, out); + } + return; + } + if is_x86_feature_detected!("ssse3") { + unsafe { + mul_slice_ssse3(c, input, out); + } + return; + } + } + + mul_slice_scalar(c, input, out); +} + +/// Multiply each element of `input` by `c` and XOR into `out`. +/// +/// Uses SIMD acceleration when available (same priority as `mul_slice`). +#[inline] +pub fn mul_slice_xor(c: u8, input: &[u8], out: &mut [u8]) { + assert_eq!(input.len(), out.len()); + if input.is_empty() || c == 0 { + return; + } + if c == 1 { + for (o, i) in out.iter_mut().zip(input.iter()) { + *o ^= *i; + } + return; + } + + #[cfg(target_arch = "x86_64")] + { + if is_x86_feature_detected!("gfni") && is_x86_feature_detected!("avx2") { + unsafe { + mul_slice_xor_gfni_avx2(c, input, out); + } + return; + } + if is_x86_feature_detected!("avx2") { + unsafe { + mul_slice_xor_avx2(c, input, out); + } + return; + } + if is_x86_feature_detected!("gfni") { + unsafe { + mul_slice_xor_gfni_sse(c, input, out); + } + return; + } + if is_x86_feature_detected!("ssse3") { + unsafe { + mul_slice_xor_ssse3(c, input, out); + } + return; + } + } + + mul_slice_xor_scalar(c, input, out); +} + +/// Function pointer types for bulk GF(2^8) operations. +pub type MulSliceFn = fn(u8, &[u8], &mut [u8]); + +/// Pair of (mul_slice, mul_slice_xor) function pointers for the best available SIMD path. +/// +/// Unlike `mul_slice`/`mul_slice_xor`, these skip runtime feature detection on every call. +/// The caller checks once and stores the result. +/// +/// Note: These raw dispatch functions do NOT handle the c==0 or c==1 special cases. +/// The caller must handle those before calling through the function pointer. +pub fn detect_mul_slice() -> (MulSliceFn, MulSliceFn) { + #[cfg(target_arch = "x86_64")] + { + if is_x86_feature_detected!("gfni") && is_x86_feature_detected!("avx2") { + return ( + wrap_mul_slice_gfni_avx2 as MulSliceFn, + wrap_mul_slice_xor_gfni_avx2 as MulSliceFn, + ); + } + if is_x86_feature_detected!("avx2") { + return ( + wrap_mul_slice_avx2 as MulSliceFn, + wrap_mul_slice_xor_avx2 as MulSliceFn, + ); + } + if is_x86_feature_detected!("gfni") { + return ( + wrap_mul_slice_gfni_sse as MulSliceFn, + wrap_mul_slice_xor_gfni_sse as MulSliceFn, + ); + } + if is_x86_feature_detected!("ssse3") { + return ( + wrap_mul_slice_ssse3 as MulSliceFn, + wrap_mul_slice_xor_ssse3 as MulSliceFn, + ); + } + } + ( + mul_slice_scalar as MulSliceFn, + mul_slice_xor_scalar as MulSliceFn, + ) +} + +// Safe wrappers for SIMD functions (used as function pointer targets) +#[cfg(target_arch = "x86_64")] +fn wrap_mul_slice_gfni_avx2(c: u8, input: &[u8], out: &mut [u8]) { + unsafe { mul_slice_gfni_avx2(c, input, out) } +} +#[cfg(target_arch = "x86_64")] +fn wrap_mul_slice_xor_gfni_avx2(c: u8, input: &[u8], out: &mut [u8]) { + unsafe { mul_slice_xor_gfni_avx2(c, input, out) } +} +#[cfg(target_arch = "x86_64")] +fn wrap_mul_slice_avx2(c: u8, input: &[u8], out: &mut [u8]) { + unsafe { mul_slice_avx2(c, input, out) } +} +#[cfg(target_arch = "x86_64")] +fn wrap_mul_slice_xor_avx2(c: u8, input: &[u8], out: &mut [u8]) { + unsafe { mul_slice_xor_avx2(c, input, out) } +} +#[cfg(target_arch = "x86_64")] +fn wrap_mul_slice_gfni_sse(c: u8, input: &[u8], out: &mut [u8]) { + unsafe { mul_slice_gfni_sse(c, input, out) } +} +#[cfg(target_arch = "x86_64")] +fn wrap_mul_slice_xor_gfni_sse(c: u8, input: &[u8], out: &mut [u8]) { + unsafe { mul_slice_xor_gfni_sse(c, input, out) } +} +#[cfg(target_arch = "x86_64")] +fn wrap_mul_slice_ssse3(c: u8, input: &[u8], out: &mut [u8]) { + unsafe { mul_slice_ssse3(c, input, out) } +} +#[cfg(target_arch = "x86_64")] +fn wrap_mul_slice_xor_ssse3(c: u8, input: &[u8], out: &mut [u8]) { + unsafe { mul_slice_xor_ssse3(c, input, out) } +} + +// ── Scalar fallback ────────────────────────────────────────────────────── + +fn mul_slice_scalar(c: u8, input: &[u8], out: &mut [u8]) { + let mt = &MUL_TABLE[c as usize]; + for (o, &i) in out.iter_mut().zip(input.iter()) { + *o = mt[i as usize]; + } +} + +fn mul_slice_xor_scalar(c: u8, input: &[u8], out: &mut [u8]) { + let mt = &MUL_TABLE[c as usize]; + for (o, &i) in out.iter_mut().zip(input.iter()) { + *o ^= mt[i as usize]; + } +} + +// ── x86_64 SIMD implementations ───────────────────────────────────────── + +// ── GFNI + AVX2 (best path: 32 bytes per vgf2p8affineqb) ────────────── + +#[cfg(target_arch = "x86_64")] +#[target_feature(enable = "gfni,avx2")] +unsafe fn mul_slice_gfni_avx2(c: u8, input: &[u8], out: &mut [u8]) { + use core::arch::x86_64::*; + + let matrix = GFNI_TABLE[c as usize] as i64; + let mat_vec = _mm256_set1_epi64x(matrix); + + let len = input.len(); + let mut i = 0; + + while i + 32 <= len { + let data = _mm256_loadu_si256(input.as_ptr().add(i) as *const _); + let result = _mm256_gf2p8affine_epi64_epi8(data, mat_vec, 0); + _mm256_storeu_si256(out.as_mut_ptr().add(i) as *mut _, result); + i += 32; + } + + let mt = &MUL_TABLE[c as usize]; + while i < len { + *out.get_unchecked_mut(i) = mt[*input.get_unchecked(i) as usize]; + i += 1; + } +} + +#[cfg(target_arch = "x86_64")] +#[target_feature(enable = "gfni,avx2")] +unsafe fn mul_slice_xor_gfni_avx2(c: u8, input: &[u8], out: &mut [u8]) { + use core::arch::x86_64::*; + + let matrix = GFNI_TABLE[c as usize] as i64; + let mat_vec = _mm256_set1_epi64x(matrix); + + let len = input.len(); + let mut i = 0; + + while i + 32 <= len { + let data = _mm256_loadu_si256(input.as_ptr().add(i) as *const _); + let existing = _mm256_loadu_si256(out.as_ptr().add(i) as *const _); + let mul_result = _mm256_gf2p8affine_epi64_epi8(data, mat_vec, 0); + let result = _mm256_xor_si256(mul_result, existing); + _mm256_storeu_si256(out.as_mut_ptr().add(i) as *mut _, result); + i += 32; + } + + let mt = &MUL_TABLE[c as usize]; + while i < len { + *out.get_unchecked_mut(i) ^= mt[*input.get_unchecked(i) as usize]; + i += 1; + } +} + +// ── GFNI + SSE (16 bytes per vgf2p8affineqb) ────────────────────────── + +#[cfg(target_arch = "x86_64")] +#[target_feature(enable = "gfni")] +unsafe fn mul_slice_gfni_sse(c: u8, input: &[u8], out: &mut [u8]) { + use core::arch::x86_64::*; + + let matrix = GFNI_TABLE[c as usize] as i64; + let mat_vec = _mm_set1_epi64x(matrix); + + let len = input.len(); + let mut i = 0; + + while i + 16 <= len { + let data = _mm_loadu_si128(input.as_ptr().add(i) as *const _); + let result = _mm_gf2p8affine_epi64_epi8(data, mat_vec, 0); + _mm_storeu_si128(out.as_mut_ptr().add(i) as *mut _, result); + i += 16; + } + + let mt = &MUL_TABLE[c as usize]; + while i < len { + *out.get_unchecked_mut(i) = mt[*input.get_unchecked(i) as usize]; + i += 1; + } +} + +#[cfg(target_arch = "x86_64")] +#[target_feature(enable = "gfni")] +unsafe fn mul_slice_xor_gfni_sse(c: u8, input: &[u8], out: &mut [u8]) { + use core::arch::x86_64::*; + + let matrix = GFNI_TABLE[c as usize] as i64; + let mat_vec = _mm_set1_epi64x(matrix); + + let len = input.len(); + let mut i = 0; + + while i + 16 <= len { + let data = _mm_loadu_si128(input.as_ptr().add(i) as *const _); + let existing = _mm_loadu_si128(out.as_ptr().add(i) as *const _); + let mul_result = _mm_gf2p8affine_epi64_epi8(data, mat_vec, 0); + let result = _mm_xor_si128(mul_result, existing); + _mm_storeu_si128(out.as_mut_ptr().add(i) as *mut _, result); + i += 16; + } + + let mt = &MUL_TABLE[c as usize]; + while i < len { + *out.get_unchecked_mut(i) ^= mt[*input.get_unchecked(i) as usize]; + i += 1; + } +} + +// ── AVX2 VPSHUFB (32 bytes, split-table nibble lookup) ───────────────── + +#[cfg(target_arch = "x86_64")] +#[target_feature(enable = "avx2")] +unsafe fn mul_slice_avx2(c: u8, input: &[u8], out: &mut [u8]) { + use core::arch::x86_64::*; + + let low = &MUL_TABLE_LOW[c as usize]; + let high = &MUL_TABLE_HIGH[c as usize]; + + // Broadcast the 16-byte low/high tables to 256-bit registers by duplicating + let low_vec = _mm256_broadcastsi128_si256(_mm_loadu_si128(low.as_ptr() as *const _)); + let high_vec = _mm256_broadcastsi128_si256(_mm_loadu_si128(high.as_ptr() as *const _)); + let mask = _mm256_set1_epi8(0x0F); + + let len = input.len(); + let mut i = 0; + + // Process 32 bytes at a time + while i + 32 <= len { + let data = _mm256_loadu_si256(input.as_ptr().add(i) as *const _); + let lo_nibble = _mm256_and_si256(data, mask); + let hi_nibble = _mm256_and_si256(_mm256_srli_epi64(data, 4), mask); + let lo_result = _mm256_shuffle_epi8(low_vec, lo_nibble); + let hi_result = _mm256_shuffle_epi8(high_vec, hi_nibble); + let result = _mm256_xor_si256(lo_result, hi_result); + _mm256_storeu_si256(out.as_mut_ptr().add(i) as *mut _, result); + i += 32; + } + + // Handle remaining bytes with scalar + let mt = &MUL_TABLE[c as usize]; + while i < len { + *out.get_unchecked_mut(i) = mt[*input.get_unchecked(i) as usize]; + i += 1; + } +} + +#[cfg(target_arch = "x86_64")] +#[target_feature(enable = "avx2")] +unsafe fn mul_slice_xor_avx2(c: u8, input: &[u8], out: &mut [u8]) { + use core::arch::x86_64::*; + + let low = &MUL_TABLE_LOW[c as usize]; + let high = &MUL_TABLE_HIGH[c as usize]; + + let low_vec = _mm256_broadcastsi128_si256(_mm_loadu_si128(low.as_ptr() as *const _)); + let high_vec = _mm256_broadcastsi128_si256(_mm_loadu_si128(high.as_ptr() as *const _)); + let mask = _mm256_set1_epi8(0x0F); + + let len = input.len(); + let mut i = 0; + + while i + 32 <= len { + let data = _mm256_loadu_si256(input.as_ptr().add(i) as *const _); + let existing = _mm256_loadu_si256(out.as_ptr().add(i) as *const _); + let lo_nibble = _mm256_and_si256(data, mask); + let hi_nibble = _mm256_and_si256(_mm256_srli_epi64(data, 4), mask); + let lo_result = _mm256_shuffle_epi8(low_vec, lo_nibble); + let hi_result = _mm256_shuffle_epi8(high_vec, hi_nibble); + let result = _mm256_xor_si256(_mm256_xor_si256(lo_result, hi_result), existing); + _mm256_storeu_si256(out.as_mut_ptr().add(i) as *mut _, result); + i += 32; + } + + let mt = &MUL_TABLE[c as usize]; + while i < len { + *out.get_unchecked_mut(i) ^= mt[*input.get_unchecked(i) as usize]; + i += 1; + } +} + +#[cfg(target_arch = "x86_64")] +#[target_feature(enable = "ssse3")] +unsafe fn mul_slice_ssse3(c: u8, input: &[u8], out: &mut [u8]) { + use core::arch::x86_64::*; + + let low = &MUL_TABLE_LOW[c as usize]; + let high = &MUL_TABLE_HIGH[c as usize]; + + let low_vec = _mm_loadu_si128(low.as_ptr() as *const _); + let high_vec = _mm_loadu_si128(high.as_ptr() as *const _); + let mask = _mm_set1_epi8(0x0F); + + let len = input.len(); + let mut i = 0; + + while i + 16 <= len { + let data = _mm_loadu_si128(input.as_ptr().add(i) as *const _); + let lo_nibble = _mm_and_si128(data, mask); + let hi_nibble = _mm_and_si128(_mm_srli_epi64(data, 4), mask); + let lo_result = _mm_shuffle_epi8(low_vec, lo_nibble); + let hi_result = _mm_shuffle_epi8(high_vec, hi_nibble); + let result = _mm_xor_si128(lo_result, hi_result); + _mm_storeu_si128(out.as_mut_ptr().add(i) as *mut _, result); + i += 16; + } + + let mt = &MUL_TABLE[c as usize]; + while i < len { + *out.get_unchecked_mut(i) = mt[*input.get_unchecked(i) as usize]; + i += 1; + } +} + +#[cfg(target_arch = "x86_64")] +#[target_feature(enable = "ssse3")] +unsafe fn mul_slice_xor_ssse3(c: u8, input: &[u8], out: &mut [u8]) { + use core::arch::x86_64::*; + + let low = &MUL_TABLE_LOW[c as usize]; + let high = &MUL_TABLE_HIGH[c as usize]; + + let low_vec = _mm_loadu_si128(low.as_ptr() as *const _); + let high_vec = _mm_loadu_si128(high.as_ptr() as *const _); + let mask = _mm_set1_epi8(0x0F); + + let len = input.len(); + let mut i = 0; + + while i + 16 <= len { + let data = _mm_loadu_si128(input.as_ptr().add(i) as *const _); + let existing = _mm_loadu_si128(out.as_ptr().add(i) as *const _); + let lo_nibble = _mm_and_si128(data, mask); + let hi_nibble = _mm_and_si128(_mm_srli_epi64(data, 4), mask); + let lo_result = _mm_shuffle_epi8(low_vec, lo_nibble); + let hi_result = _mm_shuffle_epi8(high_vec, hi_nibble); + let result = _mm_xor_si128(_mm_xor_si128(lo_result, hi_result), existing); + _mm_storeu_si128(out.as_mut_ptr().add(i) as *mut _, result); + i += 16; + } + + let mt = &MUL_TABLE[c as usize]; + while i < len { + *out.get_unchecked_mut(i) ^= mt[*input.get_unchecked(i) as usize]; + i += 1; + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_gfni_table() { + // Verify GFNI_TABLE by emulating vgf2p8affineqb in software: + // result_bit[i] = popcount(x AND qword_byte[7-i]) mod 2 + for c in 0u16..256 { + let matrix = GFNI_TABLE[c as usize]; + for b in 0u16..256 { + let expected = MUL_TABLE[c as usize][b as usize]; + let x = b as u8; + let mut result: u8 = 0; + for i in 0..8u32 { + let row_byte = ((matrix >> ((7 - i) * 8)) & 0xFF) as u8; + let dot = (row_byte & x).count_ones() % 2; + result |= (dot as u8) << i; + } + assert_eq!( + result, expected, + "GFNI table mismatch: c={c}, b={b}, got={result}, expected={expected}" + ); + } + } + } + + #[test] + fn test_add() { + assert_eq!(add(0, 0), 0); + assert_eq!(add(1, 0), 1); + assert_eq!(add(0, 1), 1); + assert_eq!(add(1, 1), 0); + assert_eq!(add(0xFF, 0xFF), 0); + assert_eq!(add(0xAA, 0x55), 0xFF); + } + + #[test] + fn test_mul() { + assert_eq!(mul(0, 0), 0); + assert_eq!(mul(1, 0), 0); + assert_eq!(mul(0, 1), 0); + assert_eq!(mul(1, 1), 1); + // a * 1 = a + for a in 0u8..=255 { + assert_eq!(mul(a, 1), a); + assert_eq!(mul(1, a), a); + } + // a * 0 = 0 + for a in 0u8..=255 { + assert_eq!(mul(a, 0), 0); + } + } + + #[test] + fn test_div() { + // a / 1 = a + for a in 0u8..=255 { + assert_eq!(div(a, 1), a); + } + // a / a = 1 (for a != 0) + for a in 1u8..=255 { + assert_eq!(div(a, a), 1); + } + // (a * b) / b = a + for a in 1u8..=255 { + for b in 1u8..=255 { + assert_eq!(div(mul(a, b), b), a); + } + } + } + + #[test] + fn test_exp() { + assert_eq!(exp(0, 0), 1); + assert_eq!(exp(1, 0), 1); + assert_eq!(exp(5, 0), 1); + assert_eq!(exp(0, 1), 0); + assert_eq!(exp(0, 100), 0); + // a^1 = a + for a in 0u8..=255 { + assert_eq!(exp(a, 1), a); + } + // a^2 = a * a + for a in 0u8..=255 { + assert_eq!(exp(a, 2), mul(a, a)); + } + } + + #[test] + fn test_mul_slice_basic() { + let input = [1u8, 2, 3, 4, 5, 6, 7, 8]; + let mut out = [0u8; 8]; + mul_slice(3, &input, &mut out); + for i in 0..input.len() { + assert_eq!(out[i], mul(3, input[i])); + } + } + + #[test] + fn test_mul_slice_xor_basic() { + let input = [1u8, 2, 3, 4, 5, 6, 7, 8]; + let mut out = [10u8; 8]; + let original = out; + mul_slice_xor(3, &input, &mut out); + for i in 0..input.len() { + assert_eq!(out[i], original[i] ^ mul(3, input[i])); + } + } + + #[test] + fn test_mul_slice_large() { + // Test with a buffer large enough to exercise SIMD paths + let input: Vec = (0..256).map(|i| i as u8).collect(); + let mut out = vec![0u8; 256]; + let mut expected = vec![0u8; 256]; + + for c in [2u8, 7, 42, 128, 255] { + mul_slice_scalar(c, &input, &mut expected); + mul_slice(c, &input, &mut out); + assert_eq!(out, expected, "mul_slice mismatch for c={c}"); + } + } + + #[test] + fn test_mul_slice_xor_large() { + let input: Vec = (0..256).map(|i| i as u8).collect(); + + for c in [2u8, 7, 42, 128, 255] { + let mut out_expected = vec![0xABu8; 256]; + let mut out_simd = out_expected.clone(); + mul_slice_xor_scalar(c, &input, &mut out_expected); + mul_slice_xor(c, &input, &mut out_simd); + assert_eq!(out_simd, out_expected, "mul_slice_xor mismatch for c={c}"); + } + } + + #[test] + fn test_mul_slice_unaligned_sizes() { + // Test sizes that don't align to SIMD width + for size in [1, 7, 15, 16, 17, 31, 32, 33, 63, 64, 65, 100] { + let input: Vec = (0..size).map(|i| i as u8).collect(); + let mut out = vec![0u8; size]; + let mut expected = vec![0u8; size]; + + mul_slice_scalar(42, &input, &mut expected); + mul_slice(42, &input, &mut out); + assert_eq!(out, expected, "mul_slice mismatch for size={size}"); + } + } +} diff --git a/crates/lumen-core/vendor/fec-rs/src/lib.rs b/crates/lumen-core/vendor/fec-rs/src/lib.rs new file mode 100644 index 0000000..a0d4a5f --- /dev/null +++ b/crates/lumen-core/vendor/fec-rs/src/lib.rs @@ -0,0 +1,73 @@ +//! A pure Rust Reed-Solomon erasure coding library with runtime SIMD acceleration. +//! +//! # Features +//! +//! - **Pure Rust** — No C/C++ dependencies or FFI. Everything is implemented in safe Rust +//! (with targeted `unsafe` for SIMD intrinsics). +//! - **Runtime SIMD detection** — Automatically uses the fastest available instruction set +//! via `std::is_x86_feature_detected!`. A single binary works on all x86_64 systems. +//! - **GF(2^8)** — Operates over the Galois field GF(2^8) with generating polynomial 29 (0x1D), +//! compatible with the Moonlight streaming protocol. +//! - **Shard-by-shard encoding** — Incremental encoding via `ShardByShard` for streaming use cases. +//! - **Reconstruction** — Reconstruct missing data and/or parity shards from any sufficient subset. +//! +//! # SIMD Acceleration +//! +//! On x86_64, the library automatically detects CPU features at runtime and uses +//! the best available instruction set: +//! +//! - **GFNI + AVX2** — Single-instruction GF multiply on 32 bytes (Intel Alder Lake+, AMD Zen 4+) +//! - **AVX2** — VPSHUFB split-table nibble lookup on 32 bytes +//! - **GFNI + SSE** — Single-instruction GF multiply on 16 bytes +//! - **SSSE3** — VPSHUFB split-table nibble lookup on 16 bytes +//! - **Scalar** — Lookup table fallback +//! +//! # Parallel Encoding +//! +//! Enable the `parallel` feature for optional rayon-based parallel encoding: +//! +//! ```toml +//! fec-rs = { version = "0.1", features = ["parallel"] } +//! ``` +//! +//! When enabled, large encode workloads automatically distribute parity shard +//! computation across threads. Small workloads use the sequential path to avoid +//! overhead. +//! +//! # Usage +//! +//! ``` +//! use fec_rs::ReedSolomon; +//! +//! let rs = ReedSolomon::new(4, 2).unwrap(); +//! +//! let mut shards: Vec> = vec![ +//! vec![0, 1, 2, 3], +//! vec![4, 5, 6, 7], +//! vec![8, 9, 10, 11], +//! vec![12, 13, 14, 15], +//! vec![0, 0, 0, 0], // parity shard 1 +//! vec![0, 0, 0, 0], // parity shard 2 +//! ]; +//! +//! // Encode parity +//! rs.encode(&mut shards).unwrap(); +//! +//! // Verify +//! assert!(rs.verify(&shards).unwrap()); +//! +//! // Simulate loss of shard 0 +//! let mut recovery: Vec>> = shards.into_iter().map(Some).collect(); +//! recovery[0] = None; +//! +//! // Reconstruct +//! rs.reconstruct(&mut recovery).unwrap(); +//! ``` + +mod errors; +pub mod galois; +mod matrix; +mod reed_solomon; + +pub use errors::{Error, SBSError}; +pub use reed_solomon::{ReconstructShard, ReedSolomon, ShardByShard}; diff --git a/crates/lumen-core/vendor/fec-rs/src/matrix.rs b/crates/lumen-core/vendor/fec-rs/src/matrix.rs new file mode 100644 index 0000000..0bf1309 --- /dev/null +++ b/crates/lumen-core/vendor/fec-rs/src/matrix.rs @@ -0,0 +1,251 @@ +use crate::galois; + +#[derive(PartialEq, Debug, Clone)] +pub struct Matrix { + pub row_count: usize, + pub col_count: usize, + pub data: Vec, +} + +impl Matrix { + pub fn new(rows: usize, cols: usize) -> Self { + Self { + row_count: rows, + col_count: cols, + data: vec![0u8; rows * cols], + } + } + + pub fn identity(size: usize) -> Self { + let mut m = Self::new(size, size); + for i in 0..size { + m.data[i * size + i] = 1; + } + m + } + + pub fn vandermonde(rows: usize, cols: usize) -> Self { + let mut m = Self::new(rows, cols); + for r in 0..rows { + let r_a = r as u8; + for c in 0..cols { + m.data[r * cols + c] = galois::exp(r_a, c); + } + } + m + } + + #[inline] + pub fn get(&self, r: usize, c: usize) -> u8 { + self.data[r * self.col_count + c] + } + + #[inline] + pub fn set(&mut self, r: usize, c: usize, val: u8) { + self.data[r * self.col_count + c] = val; + } + + pub fn get_row(&self, row: usize) -> &[u8] { + let start = row * self.col_count; + &self.data[start..start + self.col_count] + } + + pub fn sub_matrix(&self, rmin: usize, cmin: usize, rmax: usize, cmax: usize) -> Self { + let new_rows = rmax - rmin; + let new_cols = cmax - cmin; + let mut m = Self::new(new_rows, new_cols); + for r in rmin..rmax { + for c in cmin..cmax { + m.data[(r - rmin) * new_cols + (c - cmin)] = self.get(r, c); + } + } + m + } + + pub fn multiply(&self, rhs: &Matrix) -> Self { + assert_eq!( + self.col_count, rhs.row_count, + "Matrix dimensions incompatible for multiply" + ); + let mut result = Self::new(self.row_count, rhs.col_count); + for r in 0..self.row_count { + for c in 0..rhs.col_count { + let mut val = 0u8; + for i in 0..self.col_count { + val = galois::add(val, galois::mul(self.get(r, i), rhs.get(i, c))); + } + result.set(r, c, val); + } + } + result + } + + pub fn augment(&self, rhs: &Matrix) -> Self { + assert_eq!( + self.row_count, rhs.row_count, + "Matrix row counts must match for augment" + ); + let new_cols = self.col_count + rhs.col_count; + let mut m = Self::new(self.row_count, new_cols); + for r in 0..self.row_count { + for c in 0..self.col_count { + m.set(r, c, self.get(r, c)); + } + for c in 0..rhs.col_count { + m.set(r, self.col_count + c, rhs.get(r, c)); + } + } + m + } + + fn swap_rows(&mut self, r1: usize, r2: usize) { + if r1 == r2 { + return; + } + let s1 = r1 * self.col_count; + let s2 = r2 * self.col_count; + for i in 0..self.col_count { + self.data.swap(s1 + i, s2 + i); + } + } + + fn gaussian_elim(&mut self) -> Result<(), &'static str> { + for r in 0..self.row_count { + // Pivot search + if self.get(r, r) == 0 { + for r_below in r + 1..self.row_count { + if self.get(r_below, r) != 0 { + self.swap_rows(r, r_below); + break; + } + } + } + if self.get(r, r) == 0 { + return Err("Singular matrix"); + } + // Scale to 1 + if self.get(r, r) != 1 { + let scale = galois::div(1, self.get(r, r)); + for c in 0..self.col_count { + let val = galois::mul(scale, self.get(r, c)); + self.set(r, c, val); + } + } + // Eliminate below + for r_below in r + 1..self.row_count { + if self.get(r_below, r) != 0 { + let scale = self.get(r_below, r); + for c in 0..self.col_count { + let val = + galois::add(self.get(r_below, c), galois::mul(scale, self.get(r, c))); + self.set(r_below, c, val); + } + } + } + } + + // Back substitution + for d in 0..self.row_count { + for r_above in 0..d { + if self.get(r_above, d) != 0 { + let scale = self.get(r_above, d); + for c in 0..self.col_count { + let val = + galois::add(self.get(r_above, c), galois::mul(scale, self.get(d, c))); + self.set(r_above, c, val); + } + } + } + } + Ok(()) + } + + pub fn invert(&self) -> Result { + assert!( + self.row_count == self.col_count, + "Cannot invert non-square matrix" + ); + let mut work = self.augment(&Self::identity(self.row_count)); + work.gaussian_elim()?; + Ok(work.sub_matrix(0, self.row_count, self.col_count, self.col_count * 2)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn mat(data: Vec>) -> Matrix { + let rows = data.len(); + let cols = data[0].len(); + let flat: Vec = data.into_iter().flatten().collect(); + Matrix { + row_count: rows, + col_count: cols, + data: flat, + } + } + + #[test] + fn test_identity() { + let m = Matrix::identity(3); + let expected = mat(vec![vec![1, 0, 0], vec![0, 1, 0], vec![0, 0, 1]]); + assert_eq!(m, expected); + } + + #[test] + fn test_multiply() { + let m1 = mat(vec![vec![1, 2], vec![3, 4]]); + let m2 = mat(vec![vec![5, 6], vec![7, 8]]); + let result = m1.multiply(&m2); + let expected = mat(vec![vec![11, 22], vec![19, 42]]); + assert_eq!(result, expected); + } + + #[test] + fn test_invert() { + let m = mat(vec![ + vec![56, 23, 98], + vec![3, 100, 200], + vec![45, 201, 123], + ]); + let inv = m.invert().unwrap(); + let expected = mat(vec![ + vec![175, 133, 33], + vec![130, 13, 245], + vec![112, 35, 126], + ]); + assert_eq!(inv, expected); + } + + #[test] + fn test_invert_identity() { + let m = Matrix::identity(4); + let inv = m.invert().unwrap(); + assert_eq!(inv, m); + } + + #[test] + fn test_multiply_identity() { + let m = mat(vec![ + vec![56, 23, 98], + vec![3, 100, 200], + vec![45, 201, 123], + ]); + let id = Matrix::identity(3); + assert_eq!(m.multiply(&id), m); + assert_eq!(id.multiply(&m), m); + } + + #[test] + fn test_invert_times_original_is_identity() { + let m = mat(vec![ + vec![56, 23, 98], + vec![3, 100, 200], + vec![45, 201, 123], + ]); + let inv = m.invert().unwrap(); + let product = m.multiply(&inv); + assert_eq!(product, Matrix::identity(3)); + } +} diff --git a/crates/lumen-core/vendor/fec-rs/src/reed_solomon.rs b/crates/lumen-core/vendor/fec-rs/src/reed_solomon.rs new file mode 100644 index 0000000..eccb188 --- /dev/null +++ b/crates/lumen-core/vendor/fec-rs/src/reed_solomon.rs @@ -0,0 +1,1263 @@ +use std::collections::HashMap; +use std::sync::Mutex; + +use crate::errors::{Error, SBSError}; +use crate::galois::{self, EXP_TABLE, LOG_TABLE}; +use crate::matrix::Matrix; + +const DATA_DECODE_MATRIX_CACHE_CAPACITY: usize = 254; + +/// Reed-Solomon erasure code encoder/decoder. +/// +/// Operates over GF(2^8) with generating polynomial 29, compatible +/// with the Moonlight streaming protocol. +/// +/// # Example +/// +/// ``` +/// use fec_rs::ReedSolomon; +/// +/// let rs = ReedSolomon::new(4, 2).unwrap(); +/// +/// let mut shards: Vec> = vec![ +/// vec![0, 1, 2, 3], +/// vec![4, 5, 6, 7], +/// vec![8, 9, 10, 11], +/// vec![12, 13, 14, 15], +/// vec![0, 0, 0, 0], +/// vec![0, 0, 0, 0], +/// ]; +/// +/// rs.encode(&mut shards).unwrap(); +/// assert!(rs.verify(&shards).unwrap()); +/// ``` +#[derive(Debug)] +pub struct ReedSolomon { + data_shard_count: usize, + parity_shard_count: usize, + total_shard_count: usize, + matrix: Matrix, + mul_slice_fn: galois::MulSliceFn, + mul_slice_xor_fn: galois::MulSliceFn, + data_decode_matrix_cache: Mutex, Matrix>>, +} + +impl Clone for ReedSolomon { + fn clone(&self) -> Self { + ReedSolomon { + data_shard_count: self.data_shard_count, + parity_shard_count: self.parity_shard_count, + total_shard_count: self.total_shard_count, + matrix: self.matrix.clone(), + mul_slice_fn: self.mul_slice_fn, + mul_slice_xor_fn: self.mul_slice_xor_fn, + data_decode_matrix_cache: Mutex::new(HashMap::new()), + } + } +} + +impl PartialEq for ReedSolomon { + fn eq(&self, rhs: &ReedSolomon) -> bool { + self.data_shard_count == rhs.data_shard_count + && self.parity_shard_count == rhs.parity_shard_count + && self.matrix == rhs.matrix + } +} + +impl ReedSolomon { + fn build_matrix(data_shards: usize, total_shards: usize) -> Matrix { + let vandermonde = Matrix::vandermonde(total_shards, data_shards); + let top = vandermonde.sub_matrix(0, 0, data_shards, data_shards); + let mut result = vandermonde.multiply(&top.invert().unwrap()); + + let parity_shards = total_shards - data_shards; + let mut inverse = vec![0u8; 256]; + inverse[0] = 0; + inverse[1] = 1; + for i in 2..256 { + inverse[i] = EXP_TABLE[(255 - LOG_TABLE[i]) as usize]; + } + + for j in 0..parity_shards { + for i in 0..data_shards { + result.data[(data_shards + j) * data_shards + i] = inverse[(parity_shards + i) ^ j]; + } + } + + result + } + + /// Creates a new Reed-Solomon encoder/decoder. + /// + /// # Errors + /// + /// Returns an error if `data_shards` or `parity_shards` is 0, + /// or if `data_shards + parity_shards > 256`. + pub fn new(data_shards: usize, parity_shards: usize) -> Result { + if data_shards == 0 { + return Err(Error::TooFewDataShards); + } + if parity_shards == 0 { + return Err(Error::TooFewParityShards); + } + if data_shards + parity_shards > 256 { + return Err(Error::TooManyShards); + } + + let total_shards = data_shards + parity_shards; + let matrix = Self::build_matrix(data_shards, total_shards); + let (mul_slice_fn, mul_slice_xor_fn) = galois::detect_mul_slice(); + + Ok(ReedSolomon { + data_shard_count: data_shards, + parity_shard_count: parity_shards, + total_shard_count: total_shards, + matrix, + mul_slice_fn, + mul_slice_xor_fn, + data_decode_matrix_cache: Mutex::new(HashMap::new()), + }) + } + + /// Directly set the parity rows of the encoding matrix. + /// + /// `parity` must have exactly `parity_shard_count * data_shard_count` elements. + /// This is used for compatibility with the Moonlight audio protocol, + /// where the parity matrix values are hardcoded from OpenFEC. + pub fn set_parity_matrix(&mut self, parity: &[u8]) -> Result<(), Error> { + let expected = self.parity_shard_count * self.data_shard_count; + if parity.len() != expected { + return Err(Error::InvalidParityMatrix); + } + let offset = self.data_shard_count * self.data_shard_count; + self.matrix.data[offset..offset + expected].copy_from_slice(parity); + self.data_decode_matrix_cache + .lock() + .unwrap_or_else(|poisoned| poisoned.into_inner()) + .clear(); + Ok(()) + } + + pub fn data_shard_count(&self) -> usize { + self.data_shard_count + } + + pub fn parity_shard_count(&self) -> usize { + self.parity_shard_count + } + + pub fn total_shard_count(&self) -> usize { + self.total_shard_count + } + + #[inline] + fn get_parity_rows(&self) -> Vec<&[u8]> { + (self.data_shard_count..self.total_shard_count) + .map(|i| self.matrix.get_row(i)) + .collect() + } + + /// Multiply `input` by `c` in GF(2^8), write to `out`. Uses pre-detected SIMD path. + #[inline(always)] + fn mul_slice(&self, c: u8, input: &[u8], out: &mut [u8]) { + assert_eq!(input.len(), out.len()); + if c == 0 { + out.iter_mut().for_each(|o| *o = 0); + return; + } + if c == 1 { + out.copy_from_slice(input); + return; + } + (self.mul_slice_fn)(c, input, out); + } + + /// Multiply `input` by `c` in GF(2^8), XOR into `out`. Uses pre-detected SIMD path. + #[inline(always)] + fn mul_slice_xor(&self, c: u8, input: &[u8], out: &mut [u8]) { + assert_eq!(input.len(), out.len()); + if c == 0 { + return; + } + if c == 1 { + for (o, i) in out.iter_mut().zip(input.iter()) { + *o ^= *i; + } + return; + } + (self.mul_slice_xor_fn)(c, input, out); + } + + fn code_some_slices(&self, matrix_rows: &[&[u8]], inputs: &[&[u8]], outputs: &mut [&mut [u8]]) { + for (i_input, input) in inputs.iter().enumerate() { + self.code_single_slice(matrix_rows, i_input, input, outputs); + } + } + + #[inline] + fn code_single_slice( + &self, + matrix_rows: &[&[u8]], + i_input: usize, + input: &[u8], + outputs: &mut [&mut [u8]], + ) { + for (i_row, output) in outputs.iter_mut().enumerate() { + let c = matrix_rows[i_row][i_input]; + if i_input == 0 { + self.mul_slice(c, input, output); + } else { + self.mul_slice_xor(c, input, output); + } + } + } + + /// Constructs the parity shards. + /// + /// The parity shard slots will be overwritten. + /// + /// With the `parallel` feature enabled, parity shards are computed in parallel + /// using rayon when the workload is large enough. + #[cfg(feature = "parallel")] + pub fn encode + AsMut<[u8]> + Send>( + &self, + shards: &mut [T], + ) -> Result<(), Error> { + if shards.len() < self.total_shard_count { + return Err(Error::TooFewShards); + } + if shards.len() > self.total_shard_count { + return Err(Error::TooManyShards); + } + Self::check_slices_uniform(shards)?; + + let (data, parity) = shards.split_at_mut(self.data_shard_count); + let parity_rows = self.get_parity_rows(); + + // Use rayon for large workloads where parallelization overhead is worthwhile. + // Threshold: parity_count * data_count * shard_size > ~1MB of work. + let shard_size = data[0].as_ref().len(); + let work = self.parity_shard_count * self.data_shard_count * shard_size; + if work > 1_000_000 { + use rayon::prelude::*; + + // Collect data shard references that we can share across threads. + let data_refs: Vec<&[u8]> = data.iter().map(|d| d.as_ref()).collect(); + let mul_fn = self.mul_slice_fn; + let mul_xor_fn = self.mul_slice_xor_fn; + + parity + .par_iter_mut() + .enumerate() + .for_each(|(i_row, p): (usize, &mut T)| { + let output = p.as_mut(); + let row = parity_rows[i_row]; + for (i_input, &input) in data_refs.iter().enumerate() { + let c = row[i_input]; + if c == 0 { + if i_input == 0 { + output.iter_mut().for_each(|o| *o = 0); + } + } else if c == 1 { + if i_input == 0 { + output.copy_from_slice(input); + } else { + for (o, i) in output.iter_mut().zip(input.iter()) { + *o ^= *i; + } + } + } else if i_input == 0 { + mul_fn(c, input, output); + } else { + mul_xor_fn(c, input, output); + } + } + }); + + return Ok(()); + } + + Self::encode_sequential( + data, + parity, + &parity_rows, + self.mul_slice_fn, + self.mul_slice_xor_fn, + ); + Ok(()) + } + + #[cfg(not(feature = "parallel"))] + pub fn encode + AsMut<[u8]>>(&self, shards: &mut [T]) -> Result<(), Error> { + if shards.len() < self.total_shard_count { + return Err(Error::TooFewShards); + } + if shards.len() > self.total_shard_count { + return Err(Error::TooManyShards); + } + Self::check_slices_uniform(shards)?; + + let (data, parity) = shards.split_at_mut(self.data_shard_count); + let parity_rows = self.get_parity_rows(); + + Self::encode_sequential( + data, + parity, + &parity_rows, + self.mul_slice_fn, + self.mul_slice_xor_fn, + ); + Ok(()) + } + + fn encode_sequential + AsMut<[u8]>>( + data: &[T], + parity: &mut [T], + parity_rows: &[&[u8]], + mul_slice_fn: galois::MulSliceFn, + mul_slice_xor_fn: galois::MulSliceFn, + ) { + for i_input in 0..data.len() { + let input = data[i_input].as_ref(); + for (i_row, p) in parity.iter_mut().enumerate() { + let c = parity_rows[i_row][i_input]; + let output = p.as_mut(); + if c == 0 { + if i_input == 0 { + output.iter_mut().for_each(|o| *o = 0); + } + } else if c == 1 { + if i_input == 0 { + output.copy_from_slice(input); + } else { + for (o, i) in output.iter_mut().zip(input.iter()) { + *o ^= *i; + } + } + } else if i_input == 0 { + mul_slice_fn(c, input, output); + } else { + mul_slice_xor_fn(c, input, output); + } + } + } + } + + /// Constructs the parity shards using separate data and parity references. + pub fn encode_sep, U: AsRef<[u8]> + AsMut<[u8]>>( + &self, + data: &[T], + parity: &mut [U], + ) -> Result<(), Error> { + if data.len() != self.data_shard_count { + return Err(if data.len() < self.data_shard_count { + Error::TooFewDataShards + } else { + Error::TooManyDataShards + }); + } + if parity.len() != self.parity_shard_count { + return Err(if parity.len() < self.parity_shard_count { + Error::TooFewParityShards + } else { + Error::TooManyParityShards + }); + } + + let data_refs: Vec<&[u8]> = data.iter().map(|s| s.as_ref()).collect(); + let mut parity_refs: Vec<&mut [u8]> = parity.iter_mut().map(|s| s.as_mut()).collect(); + + // Ensure all slices (data + parity) have the same non-zero length. + let shard_len = data_refs[0].len(); + if shard_len == 0 { + return Err(Error::EmptyShard); + } + for d in &data_refs[1..] { + if d.len() != shard_len { + return Err(Error::IncorrectShardSize); + } + } + for p in parity_refs.iter() { + if p.len() != shard_len { + return Err(Error::IncorrectShardSize); + } + } + + let parity_rows = self.get_parity_rows(); + self.code_some_slices(&parity_rows, &data_refs, &mut parity_refs); + + Ok(()) + } + + /// Constructs the parity shards incrementally using a single data shard. + /// + /// Must be called in order from `i_data = 0` to `data_shard_count - 1`. + /// When `i_data == 0`, parity shards are overwritten. Otherwise, results + /// are XOR-accumulated. + pub fn encode_single + AsMut<[u8]>>( + &self, + i_data: usize, + shards: &mut [T], + ) -> Result<(), Error> { + if i_data >= self.data_shard_count { + return Err(Error::InvalidIndex); + } + if shards.len() != self.total_shard_count { + return Err(if shards.len() < self.total_shard_count { + Error::TooFewShards + } else { + Error::TooManyShards + }); + } + Self::check_slices_uniform(shards)?; + + let (data_part, parity_part) = shards.split_at_mut(self.data_shard_count); + let input = data_part[i_data].as_ref(); + let mut parity_refs: Vec<&mut [u8]> = parity_part.iter_mut().map(|s| s.as_mut()).collect(); + + let parity_rows = self.get_parity_rows(); + self.code_single_slice(&parity_rows, i_data, input, &mut parity_refs); + + Ok(()) + } + + /// Constructs the parity shards incrementally using a single data shard (separated). + pub fn encode_single_sep + AsMut<[u8]>>( + &self, + i_data: usize, + single_data: &[u8], + parity: &mut [U], + ) -> Result<(), Error> { + if i_data >= self.data_shard_count { + return Err(Error::InvalidIndex); + } + if parity.len() != self.parity_shard_count { + return Err(if parity.len() < self.parity_shard_count { + Error::TooFewParityShards + } else { + Error::TooManyParityShards + }); + } + if single_data.is_empty() { + return Err(Error::EmptyShard); + } + for p in parity.iter() { + if p.as_ref().len() != single_data.len() { + return Err(Error::IncorrectShardSize); + } + } + + let mut parity_refs: Vec<&mut [u8]> = parity.iter_mut().map(|s| s.as_mut()).collect(); + let parity_rows = self.get_parity_rows(); + self.code_single_slice(&parity_rows, i_data, single_data, &mut parity_refs); + + Ok(()) + } + + /// Checks if the parity shards are correct. + pub fn verify>(&self, shards: &[T]) -> Result { + if shards.len() != self.total_shard_count { + return Err(if shards.len() < self.total_shard_count { + Error::TooFewShards + } else { + Error::TooManyShards + }); + } + Self::check_slices_uniform(shards)?; + + let slice_len = shards[0].as_ref().len(); + let mut buffer: Vec> = (0..self.parity_shard_count) + .map(|_| vec![0u8; slice_len]) + .collect(); + + let data = &shards[0..self.data_shard_count]; + let to_check = &shards[self.data_shard_count..]; + + let data_refs: Vec<&[u8]> = data.iter().map(|s| s.as_ref()).collect(); + let mut buf_refs: Vec<&mut [u8]> = buffer.iter_mut().map(|s| s.as_mut_slice()).collect(); + + let parity_rows = self.get_parity_rows(); + self.code_some_slices(&parity_rows, &data_refs, &mut buf_refs); + + for (computed, expected) in buffer.iter().zip(to_check.iter()) { + if computed.as_slice() != expected.as_ref() { + return Ok(false); + } + } + + Ok(true) + } + + /// Reconstructs all missing shards. + /// + /// Shards that are `None` will be reconstructed. The number of present + /// shards must be >= `data_shard_count`. + pub fn reconstruct(&self, shards: &mut [T]) -> Result<(), Error> { + self.reconstruct_internal(shards, false) + } + + /// Reconstructs only missing data shards. + pub fn reconstruct_data(&self, shards: &mut [T]) -> Result<(), Error> { + self.reconstruct_internal(shards, true) + } + + fn get_data_decode_matrix( + &self, + valid_indices: &[usize], + invalid_indices: &[usize], + ) -> Result { + { + let cache = self + .data_decode_matrix_cache + .lock() + .unwrap_or_else(|poisoned| poisoned.into_inner()); + if let Some(m) = cache.get(invalid_indices) { + return Ok(m.clone()); + } + } + + let mut sub_matrix = Matrix::new(self.data_shard_count, self.data_shard_count); + for (sub_row, &valid_index) in valid_indices.iter().enumerate() { + for c in 0..self.data_shard_count { + sub_matrix.set(sub_row, c, self.matrix.get(valid_index, c)); + } + } + + let data_decode_matrix = sub_matrix.invert().map_err(|_| Error::SingularMatrix)?; + + { + let mut cache = self + .data_decode_matrix_cache + .lock() + .unwrap_or_else(|poisoned| poisoned.into_inner()); + if cache.len() >= DATA_DECODE_MATRIX_CACHE_CAPACITY { + // Simple eviction: clear the cache when full + cache.clear(); + } + cache.insert(invalid_indices.to_vec(), data_decode_matrix.clone()); + } + + Ok(data_decode_matrix) + } + + fn reconstruct_internal( + &self, + shards: &mut [T], + data_only: bool, + ) -> Result<(), Error> { + if shards.len() != self.total_shard_count { + return Err(if shards.len() < self.total_shard_count { + Error::TooFewShards + } else { + Error::TooManyShards + }); + } + + // Count present shards and find shard length + let mut number_present = 0usize; + let mut shard_len: Option = None; + + for shard in shards.iter() { + if let Some(len) = shard.len() { + if len == 0 { + return Err(Error::EmptyShard); + } + number_present += 1; + if let Some(old_len) = shard_len { + if len != old_len { + return Err(Error::IncorrectShardSize); + } + } + shard_len = Some(len); + } + } + + if number_present == self.total_shard_count { + return Ok(()); + } + + if number_present < self.data_shard_count { + return Err(Error::TooFewShardsPresent); + } + + let shard_len = shard_len.expect("at least one shard present"); + + // Categorize shards + let mut valid_indices: Vec = Vec::with_capacity(self.data_shard_count); + let mut invalid_indices: Vec = Vec::with_capacity(self.parity_shard_count); + + for (i, shard) in shards.iter().enumerate() { + if shard.len().is_some() { + if valid_indices.len() < self.data_shard_count { + valid_indices.push(i); + } + } else { + invalid_indices.push(i); + } + } + + // Initialize missing shards + for &i in &invalid_indices { + if i < self.data_shard_count || !data_only { + shards[i].initialize(shard_len); + } + } + + let data_decode_matrix = self.get_data_decode_matrix(&valid_indices, &invalid_indices)?; + + // Reconstruct missing data shards. + // We need to read from valid shards and write to invalid shards simultaneously. + // Since the index sets are disjoint, we use raw pointers to avoid borrow conflicts. + let missing_data_indices: Vec = invalid_indices + .iter() + .copied() + .filter(|&i| i < self.data_shard_count) + .collect(); + + if !missing_data_indices.is_empty() { + let matrix_rows: Vec<&[u8]> = missing_data_indices + .iter() + .map(|&i| data_decode_matrix.get_row(i)) + .collect(); + + // For each data shard input, encode into each missing data output + for (i_input, &valid_idx) in valid_indices.iter().enumerate() { + // SAFETY: valid_idx and missing indices are disjoint sets, + // so we can safely read from valid_idx while writing to missing indices. + let input_ptr = shards[valid_idx].get().unwrap().as_ptr(); + let input_slice = unsafe { std::slice::from_raw_parts(input_ptr, shard_len) }; + + for (i_out, &missing_idx) in missing_data_indices.iter().enumerate() { + let c = matrix_rows[i_out][i_input]; + let output = shards[missing_idx].get_mut().unwrap(); + if i_input == 0 { + self.mul_slice(c, input_slice, output); + } else { + self.mul_slice_xor(c, input_slice, output); + } + } + } + } + + if data_only { + return Ok(()); + } + + // Reconstruct missing parity shards from all data shards + let missing_parity_indices: Vec = invalid_indices + .iter() + .copied() + .filter(|&i| i >= self.data_shard_count) + .collect(); + + if !missing_parity_indices.is_empty() { + let parity_rows = self.get_parity_rows(); + let matrix_rows: Vec<&[u8]> = missing_parity_indices + .iter() + .map(|&i| parity_rows[i - self.data_shard_count]) + .collect(); + + for i_input in 0..self.data_shard_count { + // SAFETY: data shards (0..data_shard_count) are disjoint from parity shards. + let input_ptr = shards[i_input].get().unwrap().as_ptr(); + let input_slice = unsafe { std::slice::from_raw_parts(input_ptr, shard_len) }; + + for (i_out, &missing_idx) in missing_parity_indices.iter().enumerate() { + let c = matrix_rows[i_out][i_input]; + let output = shards[missing_idx].get_mut().unwrap(); + if i_input == 0 { + self.mul_slice(c, input_slice, output); + } else { + self.mul_slice_xor(c, input_slice, output); + } + } + } + } + + Ok(()) + } + + fn check_slices_uniform>(slices: &[T]) -> Result<(), Error> { + if slices.is_empty() { + return Ok(()); + } + let size = slices[0].as_ref().len(); + if size == 0 { + return Err(Error::EmptyShard); + } + for slice in slices.iter().skip(1) { + if slice.as_ref().len() != size { + return Err(Error::IncorrectShardSize); + } + } + Ok(()) + } +} + +/// A trait for types that can hold optional shard data for reconstruction. +/// +/// # Safety +/// +/// Implementations must guarantee that distinct indices in a `&mut [T]` slice +/// yield non-overlapping memory regions from `get()`/`get_mut()`. Specifically: +/// - `get()` on element `i` must not alias `get_mut()` on element `j` when `i != j`. +/// - The returned slices must remain valid and not be moved/reallocated while borrows are active. +/// +/// This is required because `reconstruct_internal` uses raw pointers to read from +/// some shard indices while writing to others simultaneously. +pub unsafe trait ReconstructShard { + /// Returns the length of the shard data, or `None` if absent. + fn len(&self) -> Option; + /// Returns `true` if the shard data is absent. + fn is_empty(&self) -> bool { + self.len().is_none() + } + /// Get an immutable reference to the shard data. + fn get(&self) -> Option<&[u8]>; + /// Get a mutable reference to the shard data. + fn get_mut(&mut self) -> Option<&mut [u8]>; + /// Initialize the shard data to the given length (zeroed). + fn initialize(&mut self, len: usize); +} + +// SAFETY: `Option>` stores each shard in its own heap allocation, +// so distinct indices in a slice always yield non-overlapping memory. +unsafe impl ReconstructShard for Option> { + fn len(&self) -> Option { + self.as_ref().map(|v| v.len()) + } + + fn get(&self) -> Option<&[u8]> { + self.as_ref().map(|v| v.as_slice()) + } + + fn get_mut(&mut self) -> Option<&mut [u8]> { + self.as_mut().map(|v| v.as_mut_slice()) + } + + fn initialize(&mut self, len: usize) { + if self.is_none() { + *self = Some(vec![0u8; len]); + } + } +} + +/// Bookkeeper for shard-by-shard encoding. +/// +/// Useful for streaming use cases where data shards arrive one at a time. +/// +/// # Example +/// +/// ``` +/// use fec_rs::{ReedSolomon, ShardByShard}; +/// +/// let rs = ReedSolomon::new(3, 2).unwrap(); +/// let mut sbs = ShardByShard::new(&rs); +/// +/// let mut shards: Vec> = vec![ +/// vec![0, 1, 2, 3, 4], +/// vec![5, 6, 7, 8, 9], +/// vec![0, 0, 0, 0, 0], // placeholder +/// vec![0, 0, 0, 0, 0], // parity 1 +/// vec![0, 0, 0, 0, 0], // parity 2 +/// ]; +/// +/// // Encode first two data shards +/// sbs.encode(&mut shards).unwrap(); +/// sbs.encode(&mut shards).unwrap(); +/// +/// // Fill in third data shard +/// shards[2] = vec![10, 11, 12, 13, 14]; +/// +/// // Encode third data shard +/// sbs.encode(&mut shards).unwrap(); +/// +/// assert!(rs.verify(&shards).unwrap()); +/// ``` +pub struct ShardByShard<'a> { + codec: &'a ReedSolomon, + cur_input: usize, +} + +impl<'a> ShardByShard<'a> { + pub fn new(codec: &'a ReedSolomon) -> Self { + ShardByShard { + codec, + cur_input: 0, + } + } + + /// Checks if all data shards have been encoded. + pub fn parity_ready(&self) -> bool { + self.cur_input == self.codec.data_shard_count + } + + /// Resets the bookkeeping state. + /// + /// Returns `SBSError::LeftoverShards` if shards were encoded but parity is not ready. + pub fn reset(&mut self) -> Result<(), SBSError> { + if self.cur_input > 0 && !self.parity_ready() { + return Err(SBSError::LeftoverShards); + } + self.cur_input = 0; + Ok(()) + } + + /// Resets unconditionally. + pub fn reset_force(&mut self) { + self.cur_input = 0; + } + + /// Returns the current data shard index to be encoded. + pub fn cur_input_index(&self) -> usize { + self.cur_input + } + + /// Encodes the current data shard into the parity shards. + pub fn encode + AsMut<[u8]>>( + &mut self, + shards: &mut [T], + ) -> Result<(), SBSError> { + if self.parity_ready() { + return Err(SBSError::TooManyCalls); + } + self.codec + .encode_single(self.cur_input, shards) + .map_err(SBSError::RSError)?; + self.cur_input += 1; + Ok(()) + } + + /// Encodes the current data shard using separate data and parity references. + pub fn encode_sep + AsMut<[u8]>>( + &mut self, + data: &[&[u8]], + parity: &mut [U], + ) -> Result<(), SBSError> { + if self.parity_ready() { + return Err(SBSError::TooManyCalls); + } + if data.len() != self.codec.data_shard_count { + return Err(SBSError::RSError( + if data.len() < self.codec.data_shard_count { + Error::TooFewDataShards + } else { + Error::TooManyDataShards + }, + )); + } + self.codec + .encode_single_sep(self.cur_input, data[self.cur_input], parity) + .map_err(SBSError::RSError)?; + self.cur_input += 1; + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_new_basic() { + let rs = ReedSolomon::new(4, 2).unwrap(); + assert_eq!(rs.data_shard_count(), 4); + assert_eq!(rs.parity_shard_count(), 2); + assert_eq!(rs.total_shard_count(), 6); + } + + #[test] + fn test_new_errors() { + assert_eq!(ReedSolomon::new(0, 1), Err(Error::TooFewDataShards)); + assert_eq!(ReedSolomon::new(1, 0), Err(Error::TooFewParityShards)); + assert_eq!(ReedSolomon::new(128, 129), Err(Error::TooManyShards)); + } + + #[test] + fn test_set_parity_matrix_rejects_wrong_length() { + let mut rs = ReedSolomon::new(4, 2).unwrap(); + assert_eq!( + rs.set_parity_matrix(&[1, 2, 3]), + Err(Error::InvalidParityMatrix) + ); + } + + #[test] + fn test_partial_eq_reflects_parity_matrix_changes() { + let mut lhs = ReedSolomon::new(4, 2).unwrap(); + let rhs = ReedSolomon::new(4, 2).unwrap(); + + assert_eq!(lhs, rhs); + + lhs.set_parity_matrix(&[0x77, 0x40, 0x38, 0x0e, 0xc7, 0xa7, 0x0d, 0x6c]) + .unwrap(); + + assert_ne!(lhs, rhs); + } + + #[test] + fn test_encode_verify() { + let rs = ReedSolomon::new(4, 2).unwrap(); + let mut shards: Vec> = vec![ + vec![0, 1, 2, 3], + vec![4, 5, 6, 7], + vec![8, 9, 10, 11], + vec![12, 13, 14, 15], + vec![0, 0, 0, 0], + vec![0, 0, 0, 0], + ]; + + rs.encode(&mut shards).unwrap(); + assert!(rs.verify(&shards).unwrap()); + + // Corrupt parity + shards[4][0] ^= 0xFF; + assert!(!rs.verify(&shards).unwrap()); + } + + #[test] + fn test_reconstruct_missing_data() { + let rs = ReedSolomon::new(4, 2).unwrap(); + let mut shards: Vec> = vec![ + vec![0, 1, 2, 3], + vec![4, 5, 6, 7], + vec![8, 9, 10, 11], + vec![12, 13, 14, 15], + vec![0, 0, 0, 0], + vec![0, 0, 0, 0], + ]; + rs.encode(&mut shards).unwrap(); + + let original = shards.clone(); + + // Lose shard 0 + let mut recovery: Vec>> = shards.into_iter().map(Some).collect(); + recovery[0] = None; + + rs.reconstruct(&mut recovery).unwrap(); + + for (i, shard) in recovery.iter().enumerate() { + assert_eq!(shard.as_ref().unwrap(), &original[i]); + } + } + + #[test] + fn test_reconstruct_missing_parity() { + let rs = ReedSolomon::new(4, 2).unwrap(); + let mut shards: Vec> = vec![ + vec![0, 1, 2, 3], + vec![4, 5, 6, 7], + vec![8, 9, 10, 11], + vec![12, 13, 14, 15], + vec![0, 0, 0, 0], + vec![0, 0, 0, 0], + ]; + rs.encode(&mut shards).unwrap(); + + let original = shards.clone(); + + // Lose both parity shards + let mut recovery: Vec>> = shards.into_iter().map(Some).collect(); + recovery[4] = None; + recovery[5] = None; + + rs.reconstruct(&mut recovery).unwrap(); + + for (i, shard) in recovery.iter().enumerate() { + assert_eq!(shard.as_ref().unwrap(), &original[i]); + } + } + + #[test] + fn test_reconstruct_mixed() { + let rs = ReedSolomon::new(4, 2).unwrap(); + let mut shards: Vec> = vec![ + vec![0, 1, 2, 3], + vec![4, 5, 6, 7], + vec![8, 9, 10, 11], + vec![12, 13, 14, 15], + vec![0, 0, 0, 0], + vec![0, 0, 0, 0], + ]; + rs.encode(&mut shards).unwrap(); + + let original = shards.clone(); + + // Lose one data + one parity + let mut recovery: Vec>> = shards.into_iter().map(Some).collect(); + recovery[1] = None; + recovery[4] = None; + + rs.reconstruct(&mut recovery).unwrap(); + + for (i, shard) in recovery.iter().enumerate() { + assert_eq!(shard.as_ref().unwrap(), &original[i]); + } + } + + #[test] + fn test_reconstruct_too_few_shards() { + let rs = ReedSolomon::new(4, 2).unwrap(); + let mut shards: Vec> = vec![ + vec![0, 1, 2, 3], + vec![4, 5, 6, 7], + vec![8, 9, 10, 11], + vec![12, 13, 14, 15], + vec![0, 0, 0, 0], + vec![0, 0, 0, 0], + ]; + rs.encode(&mut shards).unwrap(); + + let mut recovery: Vec>> = shards.into_iter().map(Some).collect(); + recovery[0] = None; + recovery[1] = None; + recovery[2] = None; + + assert_eq!( + rs.reconstruct(&mut recovery), + Err(Error::TooFewShardsPresent) + ); + } + + #[test] + fn test_shard_by_shard() { + let rs = ReedSolomon::new(3, 2).unwrap(); + let mut sbs = ShardByShard::new(&rs); + + let mut shards: Vec> = vec![ + vec![0, 1, 2, 3, 4], + vec![5, 6, 7, 8, 9], + vec![10, 11, 12, 13, 14], + vec![0, 0, 0, 0, 0], + vec![0, 0, 0, 0, 0], + ]; + + let mut shards_batch = shards.clone(); + rs.encode(&mut shards_batch).unwrap(); + + sbs.encode(&mut shards).unwrap(); + sbs.encode(&mut shards).unwrap(); + sbs.encode(&mut shards).unwrap(); + + assert!(sbs.parity_ready()); + assert_eq!(shards, shards_batch); + } + + #[test] + fn test_encode_sep() { + let rs = ReedSolomon::new(4, 2).unwrap(); + let data: Vec> = vec![ + vec![0, 1, 2, 3], + vec![4, 5, 6, 7], + vec![8, 9, 10, 11], + vec![12, 13, 14, 15], + ]; + let mut parity = vec![vec![0u8; 4]; 2]; + rs.encode_sep(&data, &mut parity).unwrap(); + + // Verify against encode + let shards: Vec> = data.iter().cloned().chain(parity.iter().cloned()).collect(); + assert!(rs.verify(&shards).unwrap()); + + // Also verify with direct encode + let mut shards2: Vec> = data.iter().cloned().chain(vec![vec![0u8; 4]; 2]).collect(); + rs.encode(&mut shards2).unwrap(); + assert_eq!(shards, shards2); + } + + #[test] + fn test_encode_single_sep_matches_batch_encode() { + let rs = ReedSolomon::new(4, 2).unwrap(); + let data: Vec> = vec![ + vec![0, 1, 2, 3], + vec![4, 5, 6, 7], + vec![8, 9, 10, 11], + vec![12, 13, 14, 15], + ]; + let mut parity = vec![vec![0u8; 4]; 2]; + + for (i, shard) in data.iter().enumerate() { + rs.encode_single_sep(i, shard, &mut parity).unwrap(); + } + + let mut expected: Vec> = + data.iter().cloned().chain(vec![vec![0u8; 4]; 2]).collect(); + rs.encode(&mut expected).unwrap(); + + assert_eq!(parity[0], expected[4]); + assert_eq!(parity[1], expected[5]); + } + + #[test] + fn test_shard_by_shard_encode_sep_matches_batch_encode() { + let rs = ReedSolomon::new(3, 2).unwrap(); + let data: Vec> = vec![ + vec![0, 1, 2, 3, 4], + vec![5, 6, 7, 8, 9], + vec![10, 11, 12, 13, 14], + ]; + let data_refs: Vec<&[u8]> = data.iter().map(Vec::as_slice).collect(); + let mut parity = vec![vec![0u8; 5]; 2]; + let mut sbs = ShardByShard::new(&rs); + + while !sbs.parity_ready() { + sbs.encode_sep(&data_refs, &mut parity).unwrap(); + } + + let mut expected: Vec> = + data.iter().cloned().chain(vec![vec![0u8; 5]; 2]).collect(); + rs.encode(&mut expected).unwrap(); + + assert_eq!(parity[0], expected[3]); + assert_eq!(parity[1], expected[4]); + } + + #[test] + fn test_reconstruct_returns_singular_matrix_for_invalid_parity_matrix() { + let mut rs = ReedSolomon::new(4, 2).unwrap(); + rs.set_parity_matrix(&[1, 0, 0, 0, 1, 0, 0, 0]).unwrap(); + + let mut shards: Vec> = vec![ + vec![0, 1, 2, 3], + vec![4, 5, 6, 7], + vec![8, 9, 10, 11], + vec![12, 13, 14, 15], + vec![0, 0, 0, 0], + vec![0, 0, 0, 0], + ]; + rs.encode(&mut shards).unwrap(); + + let mut recovery: Vec>> = shards.into_iter().map(Some).collect(); + recovery[0] = None; + recovery[1] = None; + + assert_eq!(rs.reconstruct(&mut recovery), Err(Error::SingularMatrix)); + } + + #[test] + fn test_set_parity_matrix_invalidates_decode_matrix_cache() { + let mut rs = ReedSolomon::new(4, 2).unwrap(); + let mut shards: Vec> = vec![ + vec![0, 1, 2, 3], + vec![4, 5, 6, 7], + vec![8, 9, 10, 11], + vec![12, 13, 14, 15], + vec![0, 0, 0, 0], + vec![0, 0, 0, 0], + ]; + rs.encode(&mut shards).unwrap(); + + let mut initial_recovery: Vec>> = shards.iter().cloned().map(Some).collect(); + initial_recovery[0] = None; + initial_recovery[1] = None; + rs.reconstruct(&mut initial_recovery).unwrap(); + + rs.set_parity_matrix(&[1, 0, 0, 0, 1, 0, 0, 0]).unwrap(); + + let mut recovery_after_update: Vec>> = + shards.into_iter().map(Some).collect(); + recovery_after_update[0] = None; + recovery_after_update[1] = None; + + assert_eq!( + rs.reconstruct(&mut recovery_after_update), + Err(Error::SingularMatrix) + ); + } + + #[cfg(feature = "parallel")] + #[test] + fn test_parallel_encode_matches_sequential_path() { + let rs = ReedSolomon::new(4, 2).unwrap(); + let shard_len = 200_000; + let mut shards: Vec> = (0..6) + .map(|i| { + if i < 4 { + (0..shard_len).map(|j| ((i * 17 + j) % 251) as u8).collect() + } else { + vec![0u8; shard_len] + } + }) + .collect(); + + let parity_rows = rs.get_parity_rows(); + let mut expected = vec![vec![0u8; shard_len]; 2]; + + { + let (data, _) = shards.split_at_mut(4); + ReedSolomon::encode_sequential( + data, + &mut expected, + &parity_rows, + rs.mul_slice_fn, + rs.mul_slice_xor_fn, + ); + } + + rs.encode(&mut shards).unwrap(); + + assert_eq!(shards[4], expected[0]); + assert_eq!(shards[5], expected[1]); + } + + #[test] + fn test_various_shard_counts() { + for data in [1, 2, 5, 10, 50, 127] { + for parity in [1, 2, 3, 5] { + if data + parity > 256 { + continue; + } + let rs = ReedSolomon::new(data, parity).unwrap(); + let mut shards: Vec> = (0..data + parity) + .map(|i| { + if i < data { + vec![i as u8; 64] + } else { + vec![0u8; 64] + } + }) + .collect(); + + rs.encode(&mut shards).unwrap(); + assert!( + rs.verify(&shards).unwrap(), + "Verify failed for data={data}, parity={parity}" + ); + + // Reconstruct with one missing data shard + let original = shards.clone(); + let mut recovery: Vec>> = shards.into_iter().map(Some).collect(); + recovery[0] = None; + rs.reconstruct(&mut recovery).unwrap(); + assert_eq!( + recovery[0].as_ref().unwrap(), + &original[0], + "Reconstruct failed for data={data}, parity={parity}" + ); + } + } + } + + #[test] + fn test_reconstruct_data_only() { + let rs = ReedSolomon::new(4, 2).unwrap(); + let mut shards: Vec> = vec![ + vec![0, 1, 2, 3], + vec![4, 5, 6, 7], + vec![8, 9, 10, 11], + vec![12, 13, 14, 15], + vec![0, 0, 0, 0], + vec![0, 0, 0, 0], + ]; + rs.encode(&mut shards).unwrap(); + + let original_data = shards[0].clone(); + + let mut recovery: Vec>> = shards.into_iter().map(Some).collect(); + recovery[0] = None; + + rs.reconstruct_data(&mut recovery).unwrap(); + + assert_eq!(recovery[0].as_ref().unwrap(), &original_data); + } +} diff --git a/crates/lumen-host/src/gamestream/stream.rs b/crates/lumen-host/src/gamestream/stream.rs index bbd1faf..f55f7f9 100644 --- a/crates/lumen-host/src/gamestream/stream.rs +++ b/crates/lumen-host/src/gamestream/stream.rs @@ -8,6 +8,7 @@ use super::VIDEO_PORT; use crate::capture::{self, Capturer, FastSyntheticCapturer}; use crate::encode::{self, Codec}; use anyhow::{Context, Result}; +use rand::Rng; use std::net::UdpSocket; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::Arc; @@ -82,7 +83,12 @@ fn run(cfg: StreamConfig, running: &AtomicBool) -> Result<()> { cfg.bitrate_kbps as u64 * 1000, ) .context("open NVENC for stream")?; - let mut pk = VideoPacketizer::new(cfg.packet_size); + // FEC overhead percent (Sunshine default 20). Override with LUMEN_FEC_PCT (0 = data-only). + let fec_pct: u8 = std::env::var("LUMEN_FEC_PCT") + .ok() + .and_then(|v| v.parse().ok()) + .unwrap_or(20); + let mut pk = VideoPacketizer::new(cfg.packet_size, fec_pct); // Pace at a steady rate (capped at 60fps), re-encoding the last captured frame when the // compositor produced no new one. wlroots only emits frames on damage, so a static or @@ -94,6 +100,13 @@ fn run(cfg: StreamConfig, running: &AtomicBool) -> Result<()> { let mut fps_count: u32 = 0; let mut fps_t = Instant::now(); let stream_start = Instant::now(); + // Test knob: drop this % of outbound packets to exercise FEC recovery (0 = off). + let drop_pct: u32 = std::env::var("LUMEN_VIDEO_DROP") + .ok() + .and_then(|v| v.parse().ok()) + .unwrap_or(0); + let mut rng = rand::thread_rng(); + let mut dropped: u64 = 0; while running.load(Ordering::SeqCst) { let tick = Instant::now(); @@ -113,6 +126,11 @@ fn run(cfg: StreamConfig, running: &AtomicBool) -> Result<()> { FrameType::P }; for pkt in pk.packetize(&au.data, ft, ts) { + // Simulated network loss: build the packet (advances seq) but skip the send. + if drop_pct > 0 && rng.gen_range(0..100) < drop_pct { + dropped += 1; + continue; + } if sock.send(&pkt).is_err() { client_gone = true; break; @@ -130,7 +148,7 @@ fn run(cfg: StreamConfig, running: &AtomicBool) -> Result<()> { fps_count += 1; if fps_t.elapsed() >= Duration::from_secs(1) { - tracing::info!(fps = fps_count, sent_pkts, "video: streaming"); + tracing::info!(fps = fps_count, sent_pkts, dropped, "video: streaming"); fps_count = 0; fps_t = Instant::now(); } diff --git a/crates/lumen-host/src/gamestream/video.rs b/crates/lumen-host/src/gamestream/video.rs index f43dc82..4441ebc 100644 --- a/crates/lumen-host/src/gamestream/video.rs +++ b/crates/lumen-host/src/gamestream/video.rs @@ -1,15 +1,21 @@ //! GameStream video wire packetization: an encoded access unit → UDP datagrams a stock -//! Moonlight client decodes. Each datagram is +//! Moonlight client decodes (and recovers under loss). Each datagram is //! `RTP_PACKET(12, big-endian) + reserved[4] + NV_VIDEO_PACKET(16, little-endian) + payload` //! and the frame's bitstream is prefixed with an 8-byte `video_short_frame_header_t`, then -//! striped into ≤4 FEC blocks of ≤255 data shards. Byte-exact spec: +//! striped into ≤4 FEC blocks of ≤255 shards. Byte-exact spec: //! `docs/research/gamestream-protocol-research.json` (video plane). //! -//! P1.3 sends **data shards only** (`fecPercentage = 0`): on a clean LAN the client has -//! every data shard and never runs Reed–Solomon recovery, so we get a decodable frame -//! without matching Moonlight's `nanors` parity matrix (that interop work is P1.5). Plaintext -//! only (encryption negotiated off for now). This lives in lumen-host for fast iteration; -//! the wire codec moves into lumen-core (the P1 wire mode) once proven. +//! FEC (P1.5): each block carries `m = ⌈k·pct/100⌉` Reed–Solomon parity shards generated by +//! `lumen_core::fec::Gf8Coder` (the nanors-compatible Cauchy GF(2⁸) coder). Crucially, RS runs +//! over the **whole `blocksize` shard** — Moonlight decodes over `packetSize + 16` bytes from +//! the datagram start (`RtpVideoQueue.c`), and rejects a recovered shard whose reconstructed +//! `flags` byte isn't valid — so the NV header fields RS must reproduce (streamPacketIndex, +//! frameIndex, flags, multiFec*) are written into the data shards **before** encoding, and only +//! the transport fields (RTP header/seq/timestamp + fecInfo) are stamped **after**, matching +//! Sunshine `stream.cpp`. `pct = 0` falls back to data-shards-only. Plaintext (AES-GCM video +//! encryption is negotiated off for now). + +use lumen_core::fec::{ErasureCoder, Gf8Coder}; /// RTP `header` byte: version 2 (0x80) | extension (0x10) — Moonlight keys on the extension. const RTP_HEADER_BYTE: u8 = 0x80 | 0x10; @@ -28,28 +34,32 @@ pub enum FrameType { P, } -/// Splits encoded access units into GameStream video datagrams. +/// Splits encoded access units into GameStream video datagrams (data + FEC parity shards). pub struct VideoPacketizer { /// Negotiated `packetSize` (ANNOUNCE `x-nv-video[0].packetSize`). packet_size: usize, /// Per-shard payload bytes = `blocksize - SHARD_HEADER`, `blocksize = packetSize + 16`. payload_per_shard: usize, + /// Requested FEC overhead percent (0 = data shards only). The wire carries the recomputed + /// per-block `(100·m)/k` so Moonlight derives the same parity count. + fec_percentage: usize, frame_index: u32, /// Monotonic per-stream packet counter (the RTP sequence / streamPacketIndex source). seq: u32, } impl VideoPacketizer { - pub fn new(packet_size: usize) -> Self { + pub fn new(packet_size: usize, fec_percentage: u8) -> Self { VideoPacketizer { packet_size, payload_per_shard: packet_size + 16 - SHARD_HEADER, + fec_percentage: fec_percentage as usize, frame_index: 0, seq: 0, } } - /// Packetize one encoded AU into wire datagrams (ready for UDP send). + /// Packetize one encoded AU into wire datagrams (data shards + Cauchy RS parity shards). pub fn packetize( &mut self, au: &[u8], @@ -59,6 +69,8 @@ impl VideoPacketizer { let frame_index = self.frame_index; self.frame_index = self.frame_index.wrapping_add(1); let pps = self.payload_per_shard; + let blocksize = SHARD_HEADER + pps; // = packet_size + 16 + let pct = self.fec_percentage; // frame payload = 8-byte short frame header + the AU bitstream. let total_len = 8 + au.len(); @@ -71,53 +83,120 @@ impl VideoPacketizer { fp.extend_from_slice(au); let total_data = total_len.div_ceil(pps).max(1); - let n_blocks = total_data - .div_ceil(MAX_DATA_SHARDS_PER_BLOCK) - .clamp(1, MAX_FEC_BLOCKS); + // With parity, cap per-block data so k + m ≤ 255 (the GF(2⁸) ceiling): parity for k + // data shards is ⌈k·pct/100⌉, so k ≤ 255·100/(100+pct). + let max_data = if pct > 0 { + (255 * 100) / (100 + pct) + } else { + MAX_DATA_SHARDS_PER_BLOCK + }; + let n_blocks = total_data.div_ceil(max_data).clamp(1, MAX_FEC_BLOCKS); let per_block = total_data.div_ceil(n_blocks); - let mut packets = Vec::with_capacity(total_data); + let mut packets = Vec::with_capacity(total_data + total_data * pct / 100 + n_blocks); for b in 0..n_blocks { let first = b * per_block; let last = ((b + 1) * per_block).min(total_data); if first >= last { break; } - let block_data_count = last - first; - for (fec_index, shard) in (first..last).enumerate() { - let start = shard * pps; - let end = (start + pps).min(fp.len()); - let mut payload = vec![0u8; pps]; // last shard zero-padded - payload[..end - start].copy_from_slice(&fp[start..end]); + let k = last - first; + let block_seq_base = self.seq; + let multi_fec_blocks = ((b as u8) << 4) | (((n_blocks - 1) as u8) << 6); + // 1. Build this block's k data-shard datagrams (full `blocksize`), writing the NV + // header fields RS must reproduce on recovery (streamPacketIndex, frameIndex, + // flags, multiFec*). The RTP header + fecInfo are left zero (stamped post-RS). + let mut shards: Vec> = Vec::with_capacity(k); + for i in 0..k { + let global = first + i; + let seq = block_seq_base + i as u32; + let mut buf = vec![0u8; blocksize]; let mut flags = FLAG_PIC; - if shard == 0 { + if global == 0 { flags |= FLAG_SOF; } - if shard == total_data - 1 { + if global == total_data - 1 { flags |= FLAG_EOF; } - let multi_fec_blocks = ((b as u8) << 4) | (((n_blocks - 1) as u8) << 6); - // fecInfo: dataShards<<22 | fecIndex<<12 | fecPercentage<<4 (pct = 0). - let fec_info: u32 = ((block_data_count as u32) << 22) | ((fec_index as u32) << 12); - let seq = self.seq; - self.seq = self.seq.wrapping_add(1); + buf[16..20].copy_from_slice(&(seq << 8).to_le_bytes()); // streamPacketIndex + buf[20..24].copy_from_slice(&frame_index.to_le_bytes()); // frameIndex + buf[24] = flags; + buf[26] = MULTI_FEC_FLAGS; + buf[27] = multi_fec_blocks; + let ps = global * pps; + let pe = (ps + pps).min(fp.len()); + buf[SHARD_HEADER..SHARD_HEADER + (pe - ps)].copy_from_slice(&fp[ps..pe]); + shards.push(buf); + } - packets.push(build_packet( + // 2. m = ⌈k·pct/100⌉ parity shards over the full datagrams. The wire percentage is + // recomputed from m so the client derives the same parity count. + let m = if pct > 0 { (k * pct).div_ceil(100) } else { 0 }; + let wire_pct = if m > 0 { (100 * m) / k } else { 0 }; + let parity = if m > 0 { + Gf8Coder.encode(&shards, m).unwrap_or_default() + } else { + Vec::new() + }; + + // 3. Stamp transport headers (RTP + fecInfo) on every shard. We do NOT touch the + // flags/streamPacketIndex bytes, so a recovered data shard's RS-reconstructed + // NV header stays valid. + self.seq = block_seq_base + k as u32; + for (i, mut buf) in shards.into_iter().enumerate() { + let seq = block_seq_base + i as u32; + finalize( + &mut buf, seq, timestamp_90k, frame_index, - flags, multi_fec_blocks, - fec_info, - &payload, - )); + fec_info(k, i, wire_pct), + ); + packets.push(buf); + } + for (j, mut buf) in parity.into_iter().enumerate() { + let seq = self.seq; + self.seq = self.seq.wrapping_add(1); + finalize( + &mut buf, + seq, + timestamp_90k, + frame_index, + multi_fec_blocks, + fec_info(k, k + j, wire_pct), + ); + packets.push(buf); } } packets } } +/// `fecInfo` (u32, little-endian): `dataShards<<22 | fecIndex<<12 | fecPercentage<<4`. +fn fec_info(k: usize, fec_index: usize, pct: usize) -> u32 { + ((k as u32) << 22) | ((fec_index as u32) << 12) | ((pct as u32) << 4) +} + +/// Stamp the post-RS transport fields into a shard datagram (in place). Leaves the NV +/// `flags`/`streamPacketIndex`/`multiFecFlags` bytes untouched (RS-covered). +fn finalize( + buf: &mut [u8], + seq: u32, + ts_90k: u32, + frame_index: u32, + multi_fec_blocks: u8, + fec_info: u32, +) { + buf[0] = RTP_HEADER_BYTE; // header (version 2 + extension) + buf[2..4].copy_from_slice(&(seq as u16).to_be_bytes()); // sequenceNumber (BE) + buf[4..8].copy_from_slice(&ts_90k.to_be_bytes()); // timestamp (90 kHz, BE) + buf[20..24].copy_from_slice(&frame_index.to_le_bytes()); // frameIndex (re-affirm for parity) + buf[27] = multi_fec_blocks; // re-affirm for parity + buf[28..32].copy_from_slice(&fec_info.to_le_bytes()); // fecInfo (LE) +} + /// 8-byte `video_short_frame_header_t` (little-endian), prefixed to the AU bitstream. fn short_frame_header(frame_type: FrameType, last_payload_len: u16) -> [u8; 8] { let mut h = [0u8; 8]; @@ -132,55 +211,21 @@ fn short_frame_header(frame_type: FrameType, last_payload_len: u16) -> [u8; 8] { h } -/// Build one wire datagram: RTP(BE) + reserved + NV_VIDEO_PACKET(LE) + payload. -fn build_packet( - seq: u32, - timestamp_90k: u32, - frame_index: u32, - flags: u8, - multi_fec_blocks: u8, - fec_info: u32, - payload: &[u8], -) -> Vec { - let mut p = Vec::with_capacity(SHARD_HEADER + payload.len()); - // --- RTP_PACKET (12 bytes, big-endian) --- - p.push(RTP_HEADER_BYTE); // header - p.push(0); // packetType (unused for video) - p.extend_from_slice(&(seq as u16).to_be_bytes()); // sequenceNumber - p.extend_from_slice(×tamp_90k.to_be_bytes()); // timestamp (90 kHz) - p.extend_from_slice(&0u32.to_be_bytes()); // ssrc - // --- reserved[4] --- - p.extend_from_slice(&[0u8; 4]); - // --- NV_VIDEO_PACKET (16 bytes, little-endian) --- - p.extend_from_slice(&(seq << 8).to_le_bytes()); // streamPacketIndex (low byte 0) - p.extend_from_slice(&frame_index.to_le_bytes()); // frameIndex - p.push(flags); - p.push(0); // extraFlags - p.push(MULTI_FEC_FLAGS); - p.push(multi_fec_blocks); - p.extend_from_slice(&fec_info.to_le_bytes()); // fecInfo - // --- payload --- - p.extend_from_slice(payload); - p -} - #[cfg(test)] mod tests { use super::*; #[test] fn single_block_layout() { - let mut pk = VideoPacketizer::new(1392); // payload_per_shard = 1392+16-32 = 1376 + let mut pk = VideoPacketizer::new(1392, 0); // data-only; pps = 1392+16-32 = 1376 assert_eq!(pk.payload_per_shard, 1376); let au = vec![0xABu8; 4000]; // 8+4000 = 4008 → ceil(4008/1376) = 3 data shards let pkts = pk.packetize(&au, FrameType::Idr, 90_000); assert_eq!(pkts.len(), 3); - // Every datagram is SHARD_HEADER + payload_per_shard. for p in &pkts { assert_eq!(p.len(), SHARD_HEADER + 1376); assert_eq!(p[0], 0x90); // RTP header byte } - // First packet: SOF set, fecIndex 0, frameIndex 0. let first = &pkts[0]; assert_eq!(first[24] & FLAG_SOF, FLAG_SOF); assert_eq!(first[24] & FLAG_PIC, FLAG_PIC); @@ -189,12 +234,10 @@ mod tests { let fec_info = u32::from_le_bytes(first[28..32].try_into().unwrap()); assert_eq!(fec_info >> 22, 3); // dataShards = 3 assert_eq!((fec_info >> 12) & 0x3ff, 0); // fecIndex 0 - // Last packet: EOF set, fecIndex 2. let last = &pkts[2]; assert_eq!(last[24] & FLAG_EOF, FLAG_EOF); let fec_info_last = u32::from_le_bytes(last[28..32].try_into().unwrap()); assert_eq!((fec_info_last >> 12) & 0x3ff, 2); - // RTP sequence numbers are 0,1,2. for (i, p) in pkts.iter().enumerate() { assert_eq!(u16::from_be_bytes(p[2..4].try_into().unwrap()), i as u16); } @@ -202,15 +245,59 @@ mod tests { #[test] fn multi_block_split() { - let mut pk = VideoPacketizer::new(1392); - // Need > 255 data shards → multi-block. 255*1376 ≈ 351 KB; use 600 KB. + let mut pk = VideoPacketizer::new(1392, 0); // data-only let au = vec![0u8; 600_000]; let pkts = pk.packetize(&au, FrameType::P, 0); let total = (8 + au.len()).div_ceil(1376); assert_eq!(pkts.len(), total); - // n_blocks = ceil(total/255), clamped to 4; check multiFecBlocks lastBlock nibble. let n_blocks = total.div_ceil(255).clamp(1, 4); let last_block = ((pkts.last().unwrap()[27]) >> 6) & 0x3; assert_eq!(last_block as usize, n_blocks - 1); } + + #[test] + fn emits_parity_shards() { + let mut pk = VideoPacketizer::new(1392, 20); // pps = 1376, 20% FEC + let au = vec![0xABu8; 4000]; // 8+4000 = 4008 → 3 data shards (k=3) + let pkts = pk.packetize(&au, FrameType::Idr, 0); + // m = ceil(3*20/100) = 1 parity shard → 4 packets; wire_pct = 100*1/3 = 33. + assert_eq!(pkts.len(), 4); + for p in &pkts { + let fec_info = u32::from_le_bytes(p[28..32].try_into().unwrap()); + assert_eq!(fec_info >> 22, 3); // dataShards = k = 3 + assert_eq!((fec_info >> 4) & 0xff, 33); // wire fecPercentage + } + // The parity shard is last: fecIndex = k = 3. + let parity = &pkts[3]; + let fec_info = u32::from_le_bytes(parity[28..32].try_into().unwrap()); + assert_eq!((fec_info >> 12) & 0x3ff, 3); + // Data shards keep SOF (first) / EOF (last data shard) / PIC. + assert_eq!(pkts[0][24] & FLAG_SOF, FLAG_SOF); + assert_eq!(pkts[2][24] & FLAG_EOF, FLAG_EOF); + // RTP sequence numbers are contiguous across data + parity (0,1,2,3). + for (i, p) in pkts.iter().enumerate() { + assert_eq!(u16::from_be_bytes(p[2..4].try_into().unwrap()), i as u16); + } + } + + /// End-to-end recovery: parity over the full datagram reconstructs a dropped data shard's + /// payload AND its NV `flags` byte (the byte Moonlight validates), proving the layout. + #[test] + fn parity_recovers_full_datagram_incl_flags() { + let mut pk = VideoPacketizer::new(1392, 50); // high pct → plenty of parity + let au = vec![0x5Au8; 4000]; // k = 3 + let pkts = pk.packetize(&au, FrameType::Idr, 0); + let k = 3usize; + let m = pkts.len() - k; + assert!(m >= 1); + // Drop data shard 1; reconstruct from the rest via the same Cauchy coder. + let mut received: Vec>> = pkts.iter().map(|p| Some(p.clone())).collect(); + received[1] = None; + let recovered = Gf8Coder.reconstruct(k, m, &mut received).unwrap(); + // The recovered shard equals the original data shard's RS-covered bytes: its flags + // byte (offset 24) is PIC (middle shard), proving the NV header recovers correctly. + assert_eq!(recovered[1][24], FLAG_PIC); + // ...and the payload region matches the original. + assert_eq!(recovered[1][SHARD_HEADER..], pkts[1][SHARD_HEADER..]); + } }