feat(host/windows): openh264 software H.264 encoder (GPU-less path)
apple / swift (push) Successful in 53s
android / android (push) Failing after 1m31s
ci / rust (push) Failing after 45s
ci / web (push) Successful in 27s
ci / docs-site (push) Successful in 29s
ci / bench (push) Successful in 1m37s
decky / build-publish (push) Successful in 11s
docker / build-push (--build-arg FEDORA_VERSION=44, ci, ci/fedora-rpm.Dockerfile, punktfunk-fedora44-rpm) (push) Successful in 5s
docker / build-push (., web/Dockerfile, punktfunk-web) (push) Successful in 5s
docker / build-push (ci, ci/fedora-rpm.Dockerfile, punktfunk-fedora-rpm) (push) Successful in 4s
docker / build-push (ci, ci/rust-ci.Dockerfile, punktfunk-rust-ci) (push) Successful in 3s
docker / build-push (docs-site, docs-site/Dockerfile, punktfunk-docs) (push) Successful in 4s
flatpak / build-publish (push) Failing after 2s
deb / build-publish (push) Successful in 3m6s
rpm / build-publish (bazzite, punktfunk-fedora-rpm) (push) Failing after 1m21s
rpm / build-publish (fedora-44, punktfunk-fedora44-rpm) (push) Failing after 1m46s
docker / deploy-docs (push) Successful in 18s

Windows Encoder impl via the openh264 crate (statically-bundled, BSD-2): low-latency screen-content config (Baseline/no-B-frames, bitrate RC, BT.709 limited, near-infinite GOP + forced-IDR recovery via request_keyframe), packed CPU pixels (BGRx/BGRA/RGB/RGBA/RGBx/BGR) -> I420 -> AnnexB with in-band SPS/PPS each IDR. Synchronous: submit encodes immediately, poll hands back the one AU, flush is a no-op. Windows open_video factory selects it (PUNKTFUNK_ENCODER=software|nvenc|auto; NVENC arm lands later), H.264-only with a clear error otherwise, SW bitrate ceiling. Unit-tested live on the VM: synthetic BGRx -> AnnexB IDR + SPS NAL. Unblocks the GPU-less capture->encode->FEC->send pipeline. Compiles clean on Windows + Linux.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-15 00:43:19 +00:00
parent cce2eb60f6
commit cbbeaa5c29
4 changed files with 319 additions and 2 deletions
Generated
+56
View File
@@ -448,6 +448,12 @@ version = "3.20.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72f5acc6cb2ba439de613abc23857ec3d78374d8ed5ac84e9d11336e87da8649" checksum = "72f5acc6cb2ba439de613abc23857ec3d78374d8ed5ac84e9d11336e87da8649"
[[package]]
name = "bytemuck"
version = "1.25.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c8efb64bd706a16a1bdde310ae86b351e4d21550d98d056f22f8a7f7a2183fec"
[[package]] [[package]]
name = "bytes" name = "bytes"
version = "1.11.1" version = "1.11.1"
@@ -2049,6 +2055,15 @@ dependencies = [
"windows-sys 0.61.2", "windows-sys 0.61.2",
] ]
[[package]]
name = "nasm-rs"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "706bf8a5e8c8ddb99128c3291d31bd21f4bcde17f0f4c20ec678d85c74faa149"
dependencies = [
"log",
]
[[package]] [[package]]
name = "ndk" name = "ndk"
version = "0.9.0" version = "0.9.0"
@@ -2240,6 +2255,27 @@ version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c08d65885ee38876c4f86fa503fb49d7b507c2b62552df7c70b2fce627e06381" checksum = "c08d65885ee38876c4f86fa503fb49d7b507c2b62552df7c70b2fce627e06381"
[[package]]
name = "openh264"
version = "0.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4a12b82c14f702c2cece4e0fc28896c6a6bed5317dc13448c86ac41df91a6f82"
dependencies = [
"openh264-sys2",
"wide",
]
[[package]]
name = "openh264-sys2"
version = "0.9.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fa9e072e9b270f3b291c80488dc160abc31ecc214ab3bfde937213cfd8c83b32"
dependencies = [
"cc",
"nasm-rs",
"walkdir",
]
[[package]] [[package]]
name = "openssl-probe" name = "openssl-probe"
version = "0.2.1" version = "0.2.1"
@@ -2579,6 +2615,7 @@ dependencies = [
"khronos-egl", "khronos-egl",
"libc", "libc",
"mdns-sd", "mdns-sd",
"openh264",
"opus", "opus",
"pipewire", "pipewire",
"punktfunk-core", "punktfunk-core",
@@ -3071,6 +3108,15 @@ version = "1.0.23"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f"
[[package]]
name = "safe_arch"
version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "96b02de82ddbe1b636e6170c21be622223aea188ef2e139be0a5b219ec215323"
dependencies = [
"bytemuck",
]
[[package]] [[package]]
name = "same-file" name = "same-file"
version = "1.0.6" version = "1.0.6"
@@ -4124,6 +4170,16 @@ dependencies = [
"rustls-pki-types", "rustls-pki-types",
] ]
[[package]]
name = "wide"
version = "0.7.33"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ce5da8ecb62bcd8ec8b7ea19f69a51275e91299be594ea5cc6ef7819e16cd03"
dependencies = [
"bytemuck",
"safe_arch",
]
[[package]] [[package]]
name = "winapi-util" name = "winapi-util"
version = "0.1.11" version = "0.1.11"
+3
View File
@@ -117,3 +117,6 @@ windows = { version = "0.62", features = [
"Win32_UI_WindowsAndMessaging", "Win32_UI_WindowsAndMessaging",
"Win32_System_StationsAndDesktops", "Win32_System_StationsAndDesktops",
] } ] }
# Software H.264 encoder (GPU-less path + NVENC fallback). The default `source` feature statically
# compiles OpenH264 (BSD-2) — no system lib, builds on MSVC; nasm on PATH adds the SIMD fast path.
openh264 = "0.9"
+27 -2
View File
@@ -155,15 +155,40 @@ pub fn open_video(
} }
Err(last.unwrap_or_else(|| anyhow::anyhow!("encoder open failed at every probed bitrate"))) Err(last.unwrap_or_else(|| anyhow::anyhow!("encoder open failed at every probed bitrate")))
} }
#[cfg(not(target_os = "linux"))] #[cfg(target_os = "windows")]
{
let _ = cuda; // always false on Windows (no Cuda payload)
let pref = std::env::var("PUNKTFUNK_ENCODER")
.unwrap_or_default()
.to_ascii_lowercase();
if matches!(pref.as_str(), "nvenc" | "hw" | "nvidia") {
anyhow::bail!(
"NVENC hardware encode is not yet implemented on Windows — omit PUNKTFUNK_ENCODER \
or set it to 'software' to use the openh264 encoder"
);
}
anyhow::ensure!(
codec == Codec::H264,
"the Windows software encoder supports H.264 only; client negotiated {codec:?} \
(request H264, or use a GPU host once NVENC lands)"
);
// Software H.264 realistically caps far below the negotiated hardware rates.
const SW_BITRATE_CEIL: u64 = 100_000_000;
let enc =
sw::OpenH264Encoder::open(format, width, height, fps, bitrate_bps.min(SW_BITRATE_CEIL))?;
Ok(Box::new(enc) as Box<dyn Encoder>)
}
#[cfg(not(any(target_os = "linux", target_os = "windows")))]
{ {
let _ = (codec, format, width, height, fps, bitrate_bps, cuda); let _ = (codec, format, width, height, fps, bitrate_bps, cuda);
anyhow::bail!("NVENC encode requires Linux (FFmpeg + NVIDIA driver)") anyhow::bail!("video encode requires Linux or Windows")
} }
} }
#[cfg(target_os = "linux")] #[cfg(target_os = "linux")]
mod linux; mod linux;
#[cfg(target_os = "windows")]
mod sw;
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
+233
View File
@@ -0,0 +1,233 @@
//! Software H.264 encoder (openh264) — the GPU-less encode path for the Windows host (and a
//! fallback when NVENC is unavailable). Low-latency screen-content config: single-reference,
//! no B-frames (Baseline), bitrate rate-control, in-band SPS/PPS each IDR, BT.709 limited range.
//! Synchronous: `submit` encodes immediately and stashes the AU for `poll` (no internal queue).
use super::{EncodedFrame, Encoder};
use crate::capture::{CapturedFrame, FramePayload, PixelFormat};
use anyhow::{bail, ensure, Context, Result};
use openh264::encoder::{
BitRate, Complexity, Encoder as Oh264, EncoderConfig, FrameRate, FrameType, IntraFramePeriod,
Profile, RateControlMode, SpsPpsStrategy, UsageType,
};
use openh264::formats::{BgraSliceU8, RgbSliceU8, YUVBuffer};
use openh264::OpenH264API;
pub struct OpenH264Encoder {
enc: Oh264,
yuv: YUVBuffer,
width: u32,
height: u32,
fps: u32,
src_format: PixelFormat,
/// BGRA scratch for the 3-bpp (Bgr) and R/B-swapped (Rgba/Rgbx) formats openh264 can't wrap
/// directly. Reused across frames.
scratch: Vec<u8>,
frame_idx: i64,
force_kf: bool,
/// At most one AU per submit (no lookahead), handed back by the next `poll`.
pending: Option<EncodedFrame>,
}
// openh264's Encoder holds a raw C handle (not auto-Send); it lives on the single encode thread.
unsafe impl Send for OpenH264Encoder {}
impl OpenH264Encoder {
pub fn open(
format: PixelFormat,
width: u32,
height: u32,
fps: u32,
bitrate_bps: u64,
) -> Result<Self> {
// validate_dimensions() ran in open_video: even, non-zero, <= 4096.
let bps: u32 = bitrate_bps.try_into().unwrap_or(u32::MAX);
let cfg = EncoderConfig::new()
.usage_type(UsageType::ScreenContentRealTime)
.max_frame_rate(FrameRate::from_hz(fps.max(1) as f32))
.rate_control_mode(RateControlMode::Bitrate)
.bitrate(BitRate::from_bps(bps))
.skip_frames(false)
.intra_frame_period(IntraFramePeriod::from_num_frames(intra_period_frames(fps)))
.sps_pps_strategy(SpsPpsStrategy::ConstantId) // SPS/PPS in-band on every IDR
.num_threads(num_threads())
.scene_change_detect(false) // no surprise IDRs (bitrate spikes / freeze)
.adaptive_quantization(true)
.complexity(Complexity::Low) // latency over BD-rate
.profile(Profile::Baseline); // no B-frames; BT.709 limited is the crate default VUI
let api = OpenH264API::from_source(); // statically-bundled build (default `source` feature)
let enc = Oh264::with_api_config(api, cfg).context("openh264 Encoder::with_api_config")?;
let yuv = YUVBuffer::new(width as usize, height as usize);
tracing::info!(
"openh264 software encoder: {width}x{height}@{fps} {} Mbps (Baseline, screen-content)",
bps / 1_000_000
);
Ok(Self {
enc,
yuv,
width,
height,
fps,
src_format: format,
scratch: Vec::new(),
frame_idx: 0,
force_kf: false,
pending: None,
})
}
/// Normalize a packed source buffer into the reused BGRA `scratch` ([B,G,R,A]). `rgb_order`
/// = source is R,G,B (swap into B,G,R); otherwise source is already B,G,R.
fn normalize_to_bgra(&mut self, src: &[u8], src_bpp: usize, rgb_order: bool) {
let w = self.width as usize;
let h = self.height as usize;
self.scratch.resize(w * h * 4, 0);
for px in 0..(w * h) {
let s = &src[px * src_bpp..px * src_bpp + 3];
let d = &mut self.scratch[px * 4..px * 4 + 4];
if rgb_order {
d[0] = s[2];
d[1] = s[1];
d[2] = s[0];
} else {
d[0] = s[0];
d[1] = s[1];
d[2] = s[2];
}
d[3] = 0xff;
}
}
}
impl Encoder for OpenH264Encoder {
fn submit(&mut self, captured: &CapturedFrame) -> Result<()> {
ensure!(
captured.width == self.width && captured.height == self.height,
"captured {}x{} != encoder {}x{}",
captured.width,
captured.height,
self.width,
self.height
);
ensure!(
captured.format == self.src_format,
"captured format {:?} != encoder source {:?}",
captured.format,
self.src_format
);
// Refutable once the capture backend adds `FramePayload::D3d11`; today `Cpu` is the only
// non-Linux variant, so the pattern is (temporarily) irrefutable.
#[allow(irrefutable_let_patterns)]
let FramePayload::Cpu(bytes) = &captured.payload
else {
bail!("openh264 backend requires a CPU frame payload");
};
let w = self.width as usize;
let h = self.height as usize;
ensure!(
bytes.len() >= w * h * self.src_format.bytes_per_pixel(),
"captured buffer {} bytes too small for {w}x{h} {:?}",
bytes.len(),
self.src_format
);
match self.src_format {
PixelFormat::Rgb => self.yuv.read_rgb(RgbSliceU8::new(&bytes[..w * h * 3], (w, h))),
PixelFormat::Bgra | PixelFormat::Bgrx => {
self.yuv.read_rgb(BgraSliceU8::new(&bytes[..w * h * 4], (w, h)))
}
PixelFormat::Rgba | PixelFormat::Rgbx => {
self.normalize_to_bgra(bytes, 4, true);
self.yuv.read_rgb(BgraSliceU8::new(&self.scratch, (w, h)));
}
PixelFormat::Bgr => {
self.normalize_to_bgra(bytes, 3, false);
self.yuv.read_rgb(BgraSliceU8::new(&self.scratch, (w, h)));
}
}
if self.force_kf {
self.enc.force_intra_frame();
self.force_kf = false;
}
let bs = self.enc.encode(&self.yuv).context("openh264 encode")?;
let mut data = Vec::new();
bs.write_vec(&mut data); // AnnexB start codes; SPS/PPS prepended on IDR
if !data.is_empty() {
let keyframe = matches!(bs.frame_type(), FrameType::IDR | FrameType::I);
let pts_ns = self.frame_idx as u64 * 1_000_000_000 / self.fps.max(1) as u64;
self.pending = Some(EncodedFrame {
data,
pts_ns,
keyframe,
});
}
self.frame_idx += 1;
Ok(())
}
fn request_keyframe(&mut self) {
self.force_kf = true;
}
fn poll(&mut self) -> Result<Option<EncodedFrame>> {
Ok(self.pending.take())
}
fn flush(&mut self) -> Result<()> {
Ok(()) // synchronous: nothing buffered
}
}
/// Approximate infinite-GOP: insert IDRs rarely (recovery is via `request_keyframe`/RFI). Env
/// `PUNKTFUNK_OH264_GOP` overrides (0 = encoder-auto).
fn intra_period_frames(fps: u32) -> u32 {
if let Ok(v) = std::env::var("PUNKTFUNK_OH264_GOP") {
if let Ok(n) = v.trim().parse::<u32>() {
return n;
}
}
fps.max(1).saturating_mul(600) // ~10 min between automatic IDRs
}
/// Encode threads. Env `PUNKTFUNK_OH264_THREADS` overrides; default 2 (latency over throughput).
fn num_threads() -> u16 {
std::env::var("PUNKTFUNK_OH264_THREADS")
.ok()
.and_then(|v| v.trim().parse::<u16>().ok())
.unwrap_or(2)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::capture::{CapturedFrame, FramePayload, PixelFormat};
#[test]
fn encodes_synthetic_frame_to_annexb_idr() {
let (w, h, fps) = (1280u32, 720u32, 60u32);
let mut enc =
OpenH264Encoder::open(PixelFormat::Bgrx, w, h, fps, 8_000_000).expect("open openh264");
// A flat gray BGRx frame.
let frame = CapturedFrame {
width: w,
height: h,
pts_ns: 0,
format: PixelFormat::Bgrx,
payload: FramePayload::Cpu(vec![0x80u8; (w * h * 4) as usize]),
};
enc.submit(&frame).expect("submit");
let au = enc.poll().expect("poll").expect("an AU");
assert!(au.keyframe, "first frame must be an IDR");
// AnnexB start code + an SPS NAL (type 7) somewhere in the first frame.
assert!(
au.data.starts_with(&[0, 0, 0, 1]) || au.data.starts_with(&[0, 0, 1]),
"expected AnnexB start code"
);
let has_sps = au
.data
.windows(5)
.any(|w| w[0] == 0 && w[1] == 0 && w[2] == 0 && w[3] == 1 && (w[4] & 0x1f) == 7);
assert!(has_sps, "IDR must carry an SPS NAL (type 7)");
}
}