fix(encode/windows): AMF latency — honor the loop's blocking-poll contract + preset polish
apple / swift (push) Successful in 1m6s
windows-drivers / driver-build (push) Successful in 1m34s
windows-drivers / probe-and-proto (push) Successful in 20s
ci / rust (push) Failing after 47s
ci / web (push) Successful in 52s
windows-host / package (push) Failing after 11s
ci / docs-site (push) Successful in 1m6s
android / android (push) Successful in 3m20s
deb / build-publish (push) Failing after 46s
decky / build-publish (push) Successful in 12s
docker / build-push (--build-arg FEDORA_VERSION=44, ci, ci/fedora-rpm.Dockerfile, punktfunk-fedora44-rpm) (push) Successful in 13s
docker / build-push (., web/Dockerfile, punktfunk-web) (push) Successful in 4s
docker / build-push (ci, ci/fedora-rpm.Dockerfile, punktfunk-fedora-rpm) (push) Successful in 4s
docker / build-push (ci, ci/rust-ci.Dockerfile, punktfunk-rust-ci) (push) Successful in 4s
docker / build-push (docs-site, docs-site/Dockerfile, punktfunk-docs) (push) Successful in 43s
apple / screenshots (push) Successful in 5m11s
docker / deploy-docs (push) Successful in 19s
rpm / build-publish (fedora-44, punktfunk-fedora44-rpm) (push) Failing after 3m27s
ci / bench (push) Successful in 4m43s
rpm / build-publish (bazzite, punktfunk-fedora-rpm) (push) Failing after 3m24s
apple / swift (push) Successful in 1m6s
windows-drivers / driver-build (push) Successful in 1m34s
windows-drivers / probe-and-proto (push) Successful in 20s
ci / rust (push) Failing after 47s
ci / web (push) Successful in 52s
windows-host / package (push) Failing after 11s
ci / docs-site (push) Successful in 1m6s
android / android (push) Successful in 3m20s
deb / build-publish (push) Failing after 46s
decky / build-publish (push) Successful in 12s
docker / build-push (--build-arg FEDORA_VERSION=44, ci, ci/fedora-rpm.Dockerfile, punktfunk-fedora44-rpm) (push) Successful in 13s
docker / build-push (., web/Dockerfile, punktfunk-web) (push) Successful in 4s
docker / build-push (ci, ci/fedora-rpm.Dockerfile, punktfunk-fedora-rpm) (push) Successful in 4s
docker / build-push (ci, ci/rust-ci.Dockerfile, punktfunk-rust-ci) (push) Successful in 4s
docker / build-push (docs-site, docs-site/Dockerfile, punktfunk-docs) (push) Successful in 43s
apple / screenshots (push) Successful in 5m11s
docker / deploy-docs (push) Successful in 19s
rpm / build-publish (fedora-44, punktfunk-fedora44-rpm) (push) Failing after 3m27s
ci / bench (push) Successful in 4m43s
rpm / build-publish (bazzite, punktfunk-fedora-rpm) (push) Failing after 3m24s
The session loop's pipeline deferral was designed around direct NVENC, whose poll() BLOCKS in lock_bitstream; libavcodec's AMF wrapper is truly async (EAGAIN until the ASIC finishes), so a single non-blocking receive quantized AU retrieval to the submit cadence: +1–2 frame periods flat (~43 ms p50 at 720p60 on the Ryzen iGPU vs ~3.5 ms of actual encode). FfmpegWinEncoder now tracks in-flight frames and, while an AU is owed, spin-polls with short sleeps bounded to ~2 frame periods (an overloaded encoder degrades to next-tick pickup instead of stalling capture). Also: quality=speed (latency-first, iGPU-class VCN), explicit bf=0 (h264_amf defaults >0 on RDNA3+), AMF low-latency submission mode (FFmpeg ≥6.1, ignored on older). Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
@@ -217,9 +217,17 @@ unsafe fn open_win_encoder(
|
|||||||
WinVendor::Amf => {
|
WinVendor::Amf => {
|
||||||
opts.set("usage", "ultralowlatency");
|
opts.set("usage", "ultralowlatency");
|
||||||
opts.set("rc", "cbr");
|
opts.set("rc", "cbr");
|
||||||
opts.set("quality", "balanced");
|
// Streaming is latency-first: `speed` trims per-frame motion-estimation depth — the
|
||||||
|
// difference between ~encode-time and ~frame-budget on iGPU-class VCN (matches the
|
||||||
|
// low-latency preset choice on the NVENC path).
|
||||||
|
opts.set("quality", "speed");
|
||||||
opts.set("preanalysis", "false");
|
opts.set("preanalysis", "false");
|
||||||
opts.set("enforce_hrd", "true");
|
opts.set("enforce_hrd", "true");
|
||||||
|
// AMF low-latency submission mode (FFmpeg ≥ 6.1; unknown-option-ignored on older).
|
||||||
|
opts.set("latency", "true");
|
||||||
|
// Never B-frames: h264_amf defaults >0 on RDNA3+ HW that supports them, and each
|
||||||
|
// B-frame is a full frame period of added latency. (HEVC VCN has none; ignored there.)
|
||||||
|
opts.set("bf", "0");
|
||||||
// VPS/SPS/PPS on each IDR (clean mid-stream join) — HEVC/AV1 only; ignored elsewhere.
|
// VPS/SPS/PPS on each IDR (clean mid-stream join) — HEVC/AV1 only; ignored elsewhere.
|
||||||
opts.set("header_insertion_mode", "idr");
|
opts.set("header_insertion_mode", "idr");
|
||||||
}
|
}
|
||||||
@@ -292,14 +300,22 @@ pub fn probe_can_encode(vendor: WinVendor, codec: Codec) -> bool {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// One `receive_packet` attempt, with the not-ready states kept distinct so the blocking poll
|
||||||
|
/// below can tell "still encoding" (retry) from "stream over" (stop).
|
||||||
|
enum PollOutcome {
|
||||||
|
Packet(EncodedFrame),
|
||||||
|
Again,
|
||||||
|
Eof,
|
||||||
|
}
|
||||||
|
|
||||||
/// Drain the encoder for one packet (shared poll logic, identical to the VAAPI/NVENC paths).
|
/// Drain the encoder for one packet (shared poll logic, identical to the VAAPI/NVENC paths).
|
||||||
fn poll_encoder(enc: &mut encoder::video::Encoder, fps: u32) -> Result<Option<EncodedFrame>> {
|
fn poll_encoder(enc: &mut encoder::video::Encoder, fps: u32) -> Result<PollOutcome> {
|
||||||
let mut pkt = Packet::empty();
|
let mut pkt = Packet::empty();
|
||||||
match enc.receive_packet(&mut pkt) {
|
match enc.receive_packet(&mut pkt) {
|
||||||
Ok(()) => {
|
Ok(()) => {
|
||||||
let data = pkt.data().map(|d| d.to_vec()).unwrap_or_default();
|
let data = pkt.data().map(|d| d.to_vec()).unwrap_or_default();
|
||||||
let pts = pkt.pts().unwrap_or(0).max(0) as u64;
|
let pts = pkt.pts().unwrap_or(0).max(0) as u64;
|
||||||
Ok(Some(EncodedFrame {
|
Ok(PollOutcome::Packet(EncodedFrame {
|
||||||
data,
|
data,
|
||||||
pts_ns: pts * 1_000_000_000 / fps as u64,
|
pts_ns: pts * 1_000_000_000 / fps as u64,
|
||||||
keyframe: pkt.is_key(),
|
keyframe: pkt.is_key(),
|
||||||
@@ -309,9 +325,9 @@ fn poll_encoder(enc: &mut encoder::video::Encoder, fps: u32) -> Result<Option<En
|
|||||||
if errno == ffmpeg::util::error::EAGAIN
|
if errno == ffmpeg::util::error::EAGAIN
|
||||||
|| errno == ffmpeg::util::error::EWOULDBLOCK =>
|
|| errno == ffmpeg::util::error::EWOULDBLOCK =>
|
||||||
{
|
{
|
||||||
Ok(None)
|
Ok(PollOutcome::Again)
|
||||||
}
|
}
|
||||||
Err(ffmpeg::Error::Eof) => Ok(None),
|
Err(ffmpeg::Error::Eof) => Ok(PollOutcome::Eof),
|
||||||
Err(e) => Err(e).context("receive_packet"),
|
Err(e) => Err(e).context("receive_packet"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1100,6 +1116,9 @@ pub struct FfmpegWinEncoder {
|
|||||||
bound_device: isize,
|
bound_device: isize,
|
||||||
frame_idx: i64,
|
frame_idx: i64,
|
||||||
force_kf: bool,
|
force_kf: bool,
|
||||||
|
/// Frames sent to libavcodec whose AUs haven't been received yet. `poll` blocks (bounded)
|
||||||
|
/// while this is non-zero — see the poll-contract note on [`Encoder::poll`] below.
|
||||||
|
in_flight: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Raw FFI pointers + COM objects; the encoder lives on a single thread (same contract as NVENC/VAAPI).
|
// Raw FFI pointers + COM objects; the encoder lives on a single thread (same contract as NVENC/VAAPI).
|
||||||
@@ -1161,6 +1180,7 @@ impl FfmpegWinEncoder {
|
|||||||
bound_device: 0,
|
bound_device: 0,
|
||||||
frame_idx: 0,
|
frame_idx: 0,
|
||||||
force_kf: false,
|
force_kf: false,
|
||||||
|
in_flight: 0,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1231,7 +1251,7 @@ impl Encoder for FfmpegWinEncoder {
|
|||||||
self.frame_idx += 1;
|
self.frame_idx += 1;
|
||||||
let idr = self.force_kf;
|
let idr = self.force_kf;
|
||||||
self.force_kf = false;
|
self.force_kf = false;
|
||||||
match &captured.payload {
|
let submitted = match &captured.payload {
|
||||||
FramePayload::D3d11(f) => {
|
FramePayload::D3d11(f) => {
|
||||||
self.ensure_inner_d3d11(&f.device)?;
|
self.ensure_inner_d3d11(&f.device)?;
|
||||||
// If zero-copy is active but the capturer fell back to a format the NV12/P010 pool
|
// If zero-copy is active but the capturer fell back to a format the NV12/P010 pool
|
||||||
@@ -1271,18 +1291,53 @@ impl Encoder for FfmpegWinEncoder {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
};
|
||||||
|
if submitted.is_ok() {
|
||||||
|
self.in_flight += 1;
|
||||||
}
|
}
|
||||||
|
submitted
|
||||||
}
|
}
|
||||||
|
|
||||||
fn request_keyframe(&mut self) {
|
fn request_keyframe(&mut self) {
|
||||||
self.force_kf = true;
|
self.force_kf = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Poll-contract note: the session encode loop's pipelining treats a `None` from `poll` as
|
||||||
|
/// "come back next tick" and was designed around direct NVENC, whose poll BLOCKS in
|
||||||
|
/// `lock_bitstream` until the owed AU is done. libavcodec's AMF wrapper is truly async
|
||||||
|
/// (EAGAIN until the ASIC finishes), so a single non-blocking try quantizes AU retrieval to
|
||||||
|
/// the submit cadence — measured +1–2 frame periods (~43 ms p50 at 720p60 on the Ryzen iGPU,
|
||||||
|
/// vs ~3.5 ms of actual encode). While an AU is owed (`in_flight > 0`), spin-poll with short
|
||||||
|
/// sleeps like NVENC's blocking wait, bounded to ~2 frame periods so an overloaded encoder
|
||||||
|
/// degrades back to next-tick pickup instead of stalling capture.
|
||||||
fn poll(&mut self) -> Result<Option<EncodedFrame>> {
|
fn poll(&mut self) -> Result<Option<EncodedFrame>> {
|
||||||
match &mut self.inner {
|
let fps = self.fps;
|
||||||
Some(Inner::System(s)) => poll_encoder(&mut s.enc, self.fps),
|
let enc = match &mut self.inner {
|
||||||
Some(Inner::ZeroCopy(z)) => poll_encoder(&mut z.enc, self.fps),
|
Some(Inner::System(s)) => &mut s.enc,
|
||||||
None => Ok(None),
|
Some(Inner::ZeroCopy(z)) => &mut z.enc,
|
||||||
|
None => return Ok(None),
|
||||||
|
};
|
||||||
|
let deadline = (self.in_flight > 0).then(|| {
|
||||||
|
std::time::Instant::now()
|
||||||
|
+ std::time::Duration::from_micros((2_000_000 / fps.max(1) as u64).max(10_000))
|
||||||
|
});
|
||||||
|
loop {
|
||||||
|
match poll_encoder(enc, fps)? {
|
||||||
|
PollOutcome::Packet(au) => {
|
||||||
|
self.in_flight = self.in_flight.saturating_sub(1);
|
||||||
|
return Ok(Some(au));
|
||||||
|
}
|
||||||
|
PollOutcome::Eof => {
|
||||||
|
self.in_flight = 0; // flushed: nothing further is owed
|
||||||
|
return Ok(None);
|
||||||
|
}
|
||||||
|
PollOutcome::Again => match deadline {
|
||||||
|
Some(d) if std::time::Instant::now() < d => {
|
||||||
|
std::thread::sleep(std::time::Duration::from_micros(250));
|
||||||
|
}
|
||||||
|
_ => return Ok(None),
|
||||||
|
},
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user