fix(encode/windows): AMF latency — honor the loop's blocking-poll contract + preset polish
apple / swift (push) Successful in 1m6s
windows-drivers / driver-build (push) Successful in 1m34s
windows-drivers / probe-and-proto (push) Successful in 20s
ci / rust (push) Failing after 47s
ci / web (push) Successful in 52s
windows-host / package (push) Failing after 11s
ci / docs-site (push) Successful in 1m6s
android / android (push) Successful in 3m20s
deb / build-publish (push) Failing after 46s
decky / build-publish (push) Successful in 12s
docker / build-push (--build-arg FEDORA_VERSION=44, ci, ci/fedora-rpm.Dockerfile, punktfunk-fedora44-rpm) (push) Successful in 13s
docker / build-push (., web/Dockerfile, punktfunk-web) (push) Successful in 4s
docker / build-push (ci, ci/fedora-rpm.Dockerfile, punktfunk-fedora-rpm) (push) Successful in 4s
docker / build-push (ci, ci/rust-ci.Dockerfile, punktfunk-rust-ci) (push) Successful in 4s
docker / build-push (docs-site, docs-site/Dockerfile, punktfunk-docs) (push) Successful in 43s
apple / screenshots (push) Successful in 5m11s
docker / deploy-docs (push) Successful in 19s
rpm / build-publish (fedora-44, punktfunk-fedora44-rpm) (push) Failing after 3m27s
ci / bench (push) Successful in 4m43s
rpm / build-publish (bazzite, punktfunk-fedora-rpm) (push) Failing after 3m24s
apple / swift (push) Successful in 1m6s
windows-drivers / driver-build (push) Successful in 1m34s
windows-drivers / probe-and-proto (push) Successful in 20s
ci / rust (push) Failing after 47s
ci / web (push) Successful in 52s
windows-host / package (push) Failing after 11s
ci / docs-site (push) Successful in 1m6s
android / android (push) Successful in 3m20s
deb / build-publish (push) Failing after 46s
decky / build-publish (push) Successful in 12s
docker / build-push (--build-arg FEDORA_VERSION=44, ci, ci/fedora-rpm.Dockerfile, punktfunk-fedora44-rpm) (push) Successful in 13s
docker / build-push (., web/Dockerfile, punktfunk-web) (push) Successful in 4s
docker / build-push (ci, ci/fedora-rpm.Dockerfile, punktfunk-fedora-rpm) (push) Successful in 4s
docker / build-push (ci, ci/rust-ci.Dockerfile, punktfunk-rust-ci) (push) Successful in 4s
docker / build-push (docs-site, docs-site/Dockerfile, punktfunk-docs) (push) Successful in 43s
apple / screenshots (push) Successful in 5m11s
docker / deploy-docs (push) Successful in 19s
rpm / build-publish (fedora-44, punktfunk-fedora44-rpm) (push) Failing after 3m27s
ci / bench (push) Successful in 4m43s
rpm / build-publish (bazzite, punktfunk-fedora-rpm) (push) Failing after 3m24s
The session loop's pipeline deferral was designed around direct NVENC, whose poll() BLOCKS in lock_bitstream; libavcodec's AMF wrapper is truly async (EAGAIN until the ASIC finishes), so a single non-blocking receive quantized AU retrieval to the submit cadence: +1–2 frame periods flat (~43 ms p50 at 720p60 on the Ryzen iGPU vs ~3.5 ms of actual encode). FfmpegWinEncoder now tracks in-flight frames and, while an AU is owed, spin-polls with short sleeps bounded to ~2 frame periods (an overloaded encoder degrades to next-tick pickup instead of stalling capture). Also: quality=speed (latency-first, iGPU-class VCN), explicit bf=0 (h264_amf defaults >0 on RDNA3+), AMF low-latency submission mode (FFmpeg ≥6.1, ignored on older). Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
@@ -217,9 +217,17 @@ unsafe fn open_win_encoder(
|
||||
WinVendor::Amf => {
|
||||
opts.set("usage", "ultralowlatency");
|
||||
opts.set("rc", "cbr");
|
||||
opts.set("quality", "balanced");
|
||||
// Streaming is latency-first: `speed` trims per-frame motion-estimation depth — the
|
||||
// difference between ~encode-time and ~frame-budget on iGPU-class VCN (matches the
|
||||
// low-latency preset choice on the NVENC path).
|
||||
opts.set("quality", "speed");
|
||||
opts.set("preanalysis", "false");
|
||||
opts.set("enforce_hrd", "true");
|
||||
// AMF low-latency submission mode (FFmpeg ≥ 6.1; unknown-option-ignored on older).
|
||||
opts.set("latency", "true");
|
||||
// Never B-frames: h264_amf defaults >0 on RDNA3+ HW that supports them, and each
|
||||
// B-frame is a full frame period of added latency. (HEVC VCN has none; ignored there.)
|
||||
opts.set("bf", "0");
|
||||
// VPS/SPS/PPS on each IDR (clean mid-stream join) — HEVC/AV1 only; ignored elsewhere.
|
||||
opts.set("header_insertion_mode", "idr");
|
||||
}
|
||||
@@ -292,14 +300,22 @@ pub fn probe_can_encode(vendor: WinVendor, codec: Codec) -> bool {
|
||||
}
|
||||
}
|
||||
|
||||
/// One `receive_packet` attempt, with the not-ready states kept distinct so the blocking poll
|
||||
/// below can tell "still encoding" (retry) from "stream over" (stop).
|
||||
enum PollOutcome {
|
||||
Packet(EncodedFrame),
|
||||
Again,
|
||||
Eof,
|
||||
}
|
||||
|
||||
/// Drain the encoder for one packet (shared poll logic, identical to the VAAPI/NVENC paths).
|
||||
fn poll_encoder(enc: &mut encoder::video::Encoder, fps: u32) -> Result<Option<EncodedFrame>> {
|
||||
fn poll_encoder(enc: &mut encoder::video::Encoder, fps: u32) -> Result<PollOutcome> {
|
||||
let mut pkt = Packet::empty();
|
||||
match enc.receive_packet(&mut pkt) {
|
||||
Ok(()) => {
|
||||
let data = pkt.data().map(|d| d.to_vec()).unwrap_or_default();
|
||||
let pts = pkt.pts().unwrap_or(0).max(0) as u64;
|
||||
Ok(Some(EncodedFrame {
|
||||
Ok(PollOutcome::Packet(EncodedFrame {
|
||||
data,
|
||||
pts_ns: pts * 1_000_000_000 / fps as u64,
|
||||
keyframe: pkt.is_key(),
|
||||
@@ -309,9 +325,9 @@ fn poll_encoder(enc: &mut encoder::video::Encoder, fps: u32) -> Result<Option<En
|
||||
if errno == ffmpeg::util::error::EAGAIN
|
||||
|| errno == ffmpeg::util::error::EWOULDBLOCK =>
|
||||
{
|
||||
Ok(None)
|
||||
Ok(PollOutcome::Again)
|
||||
}
|
||||
Err(ffmpeg::Error::Eof) => Ok(None),
|
||||
Err(ffmpeg::Error::Eof) => Ok(PollOutcome::Eof),
|
||||
Err(e) => Err(e).context("receive_packet"),
|
||||
}
|
||||
}
|
||||
@@ -1100,6 +1116,9 @@ pub struct FfmpegWinEncoder {
|
||||
bound_device: isize,
|
||||
frame_idx: i64,
|
||||
force_kf: bool,
|
||||
/// Frames sent to libavcodec whose AUs haven't been received yet. `poll` blocks (bounded)
|
||||
/// while this is non-zero — see the poll-contract note on [`Encoder::poll`] below.
|
||||
in_flight: usize,
|
||||
}
|
||||
|
||||
// Raw FFI pointers + COM objects; the encoder lives on a single thread (same contract as NVENC/VAAPI).
|
||||
@@ -1161,6 +1180,7 @@ impl FfmpegWinEncoder {
|
||||
bound_device: 0,
|
||||
frame_idx: 0,
|
||||
force_kf: false,
|
||||
in_flight: 0,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -1231,7 +1251,7 @@ impl Encoder for FfmpegWinEncoder {
|
||||
self.frame_idx += 1;
|
||||
let idr = self.force_kf;
|
||||
self.force_kf = false;
|
||||
match &captured.payload {
|
||||
let submitted = match &captured.payload {
|
||||
FramePayload::D3d11(f) => {
|
||||
self.ensure_inner_d3d11(&f.device)?;
|
||||
// If zero-copy is active but the capturer fell back to a format the NV12/P010 pool
|
||||
@@ -1271,18 +1291,53 @@ impl Encoder for FfmpegWinEncoder {
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
if submitted.is_ok() {
|
||||
self.in_flight += 1;
|
||||
}
|
||||
submitted
|
||||
}
|
||||
|
||||
fn request_keyframe(&mut self) {
|
||||
self.force_kf = true;
|
||||
}
|
||||
|
||||
/// Poll-contract note: the session encode loop's pipelining treats a `None` from `poll` as
|
||||
/// "come back next tick" and was designed around direct NVENC, whose poll BLOCKS in
|
||||
/// `lock_bitstream` until the owed AU is done. libavcodec's AMF wrapper is truly async
|
||||
/// (EAGAIN until the ASIC finishes), so a single non-blocking try quantizes AU retrieval to
|
||||
/// the submit cadence — measured +1–2 frame periods (~43 ms p50 at 720p60 on the Ryzen iGPU,
|
||||
/// vs ~3.5 ms of actual encode). While an AU is owed (`in_flight > 0`), spin-poll with short
|
||||
/// sleeps like NVENC's blocking wait, bounded to ~2 frame periods so an overloaded encoder
|
||||
/// degrades back to next-tick pickup instead of stalling capture.
|
||||
fn poll(&mut self) -> Result<Option<EncodedFrame>> {
|
||||
match &mut self.inner {
|
||||
Some(Inner::System(s)) => poll_encoder(&mut s.enc, self.fps),
|
||||
Some(Inner::ZeroCopy(z)) => poll_encoder(&mut z.enc, self.fps),
|
||||
None => Ok(None),
|
||||
let fps = self.fps;
|
||||
let enc = match &mut self.inner {
|
||||
Some(Inner::System(s)) => &mut s.enc,
|
||||
Some(Inner::ZeroCopy(z)) => &mut z.enc,
|
||||
None => return Ok(None),
|
||||
};
|
||||
let deadline = (self.in_flight > 0).then(|| {
|
||||
std::time::Instant::now()
|
||||
+ std::time::Duration::from_micros((2_000_000 / fps.max(1) as u64).max(10_000))
|
||||
});
|
||||
loop {
|
||||
match poll_encoder(enc, fps)? {
|
||||
PollOutcome::Packet(au) => {
|
||||
self.in_flight = self.in_flight.saturating_sub(1);
|
||||
return Ok(Some(au));
|
||||
}
|
||||
PollOutcome::Eof => {
|
||||
self.in_flight = 0; // flushed: nothing further is owed
|
||||
return Ok(None);
|
||||
}
|
||||
PollOutcome::Again => match deadline {
|
||||
Some(d) if std::time::Instant::now() < d => {
|
||||
std::thread::sleep(std::time::Duration::from_micros(250));
|
||||
}
|
||||
_ => return Ok(None),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user