From 4d26f61e40d69e1c73166e69b68ba669a14a932d Mon Sep 17 00:00:00 2001 From: enricobuehler Date: Sun, 14 Jun 2026 01:06:41 +0000 Subject: [PATCH] fix(net/gso): fall back to sendmmsg on EMSGSIZE instead of tearing down MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enabling PUNKTFUNK_GSO on a host whose egress MTU is below our UDP segment size made every GSO send return EMSGSIZE (code 90, "Message too long") — the kernel validates each GSO segment against the device MTU at send time, which plain sendmmsg does not. EMSGSIZE wasn't in gso_unsupported() (nor is_transient_io), so it propagated as a fatal "send failed — stopping stream" and instantly killed every session the moment GSO was on (observed live: connection fails instantly / speed-test 0 Mbps). Add EMSGSIZE to gso_unsupported() so it latches GSO off for the process and finishes via sendmmsg — the standard "GSO not usable on this path" fallback. Measured after: the same host+path does 1 Gbps at 0.0% loss over the real LAN via sendmmsg (and the host send path sustains a 2 Gbps probe with send_dropped=0), so GSO is a >2 Gbps optimization, not required for 1 Gbps. Co-Authored-By: Claude Opus 4.8 (1M context) --- crates/punktfunk-core/src/transport/udp.rs | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/crates/punktfunk-core/src/transport/udp.rs b/crates/punktfunk-core/src/transport/udp.rs index e029a45..041b7f3 100644 --- a/crates/punktfunk-core/src/transport/udp.rs +++ b/crates/punktfunk-core/src/transport/udp.rs @@ -72,13 +72,21 @@ mod gso { } } -/// True if the send error means UDP GSO isn't supported here (vs a transient/real failure) — so we -/// latch GSO off and fall back to `sendmmsg` rather than tear the stream down. +/// True if the send error means UDP GSO isn't usable on this kernel/NIC/path (vs a transient/real +/// failure) — so we latch GSO off and fall back to `sendmmsg` rather than tear the stream down. +/// `EMSGSIZE` is the important one in practice: a NIC/egress path whose effective MTU is below our +/// segment size rejects the whole GSO super-buffer at send time (the kernel validates each segment +/// against the device MTU, which plain `sendmmsg` does not) — observed live as a code-90 +/// "Message too long" that instantly killed the stream. Treat it as "no GSO here" and fall back. #[cfg(target_os = "linux")] fn gso_unsupported(e: &std::io::Error) -> bool { matches!( e.raw_os_error(), - Some(libc::ENOPROTOOPT) | Some(libc::EOPNOTSUPP) | Some(libc::EINVAL) | Some(libc::EIO) + Some(libc::ENOPROTOOPT) + | Some(libc::EOPNOTSUPP) + | Some(libc::EINVAL) + | Some(libc::EIO) + | Some(libc::EMSGSIZE) ) }