feat(client): request a recovery keyframe on unrecoverable loss
apple / swift (push) Successful in 54s
windows-msix / package (push) Successful in 1m0s
windows / build (push) Successful in 54s
android / android (push) Successful in 2m30s
ci / web (push) Successful in 37s
ci / docs-site (push) Successful in 38s
ci / rust (push) Successful in 4m24s
deb / build-publish (push) Successful in 2m5s
decky / build-publish (push) Successful in 25s
ci / bench (push) Successful in 4m25s
docker / build-push (., web/Dockerfile, punktfunk-web) (push) Successful in 16s
docker / build-push (--build-arg FEDORA_VERSION=44, ci, ci/fedora-rpm.Dockerfile, punktfunk-fedora44-rpm) (push) Successful in 2m38s
docker / build-push (ci, ci/fedora-rpm.Dockerfile, punktfunk-fedora-rpm) (push) Successful in 2m24s
docker / build-push (docs-site, docs-site/Dockerfile, punktfunk-docs) (push) Successful in 22s
docker / build-push (ci, ci/rust-ci.Dockerfile, punktfunk-rust-ci) (push) Successful in 2m15s
flatpak / build-publish (push) Failing after 5m13s
rpm / build-publish (bazzite, punktfunk-fedora-rpm) (push) Failing after 4m37s
docker / deploy-docs (push) Successful in 18s
rpm / build-publish (fedora-44, punktfunk-fedora44-rpm) (push) Successful in 7m26s
apple / swift (push) Successful in 54s
windows-msix / package (push) Successful in 1m0s
windows / build (push) Successful in 54s
android / android (push) Successful in 2m30s
ci / web (push) Successful in 37s
ci / docs-site (push) Successful in 38s
ci / rust (push) Successful in 4m24s
deb / build-publish (push) Successful in 2m5s
decky / build-publish (push) Successful in 25s
ci / bench (push) Successful in 4m25s
docker / build-push (., web/Dockerfile, punktfunk-web) (push) Successful in 16s
docker / build-push (--build-arg FEDORA_VERSION=44, ci, ci/fedora-rpm.Dockerfile, punktfunk-fedora44-rpm) (push) Successful in 2m38s
docker / build-push (ci, ci/fedora-rpm.Dockerfile, punktfunk-fedora-rpm) (push) Successful in 2m24s
docker / build-push (docs-site, docs-site/Dockerfile, punktfunk-docs) (push) Successful in 22s
docker / build-push (ci, ci/rust-ci.Dockerfile, punktfunk-rust-ci) (push) Successful in 2m15s
flatpak / build-publish (push) Failing after 5m13s
rpm / build-publish (bazzite, punktfunk-fedora-rpm) (push) Failing after 4m37s
docker / deploy-docs (push) Successful in 18s
rpm / build-publish (fedora-44, punktfunk-fedora44-rpm) (push) Successful in 7m26s
Under infinite GOP the punktfunk/1 plane has no periodic IDR — the only recovery keyframe is one the client requests. But the reassembler drops unrecoverable AUs silently (frames_dropped) and hands the decoder reference-missing delta frames that libavcodec conceals and returns Ok for, so keying recovery off a decode error mostly never fires under real loss → a long/permanent freeze. Surface the data-plane pump's Session.frames_dropped to NativeClient via a shared atomic (NativeClient::frames_dropped()), updated every pump iteration so it stays current through a total-loss drought. The Linux and Windows client video loops watch it and call request_keyframe() when it climbs, throttled to 100 ms (the decode stays wedged for several frames until the IDR lands). macOS already does this; client-rs doesn't decode. Resolves reliability backlog #2. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -157,6 +157,9 @@ fn pump(
|
|||||||
let mut decode_us_sum = 0u64;
|
let mut decode_us_sum = 0u64;
|
||||||
let mut lat_us: Vec<u64> = Vec::with_capacity(256);
|
let mut lat_us: Vec<u64> = Vec::with_capacity(256);
|
||||||
let mut pcm = vec![0f32; 5760 * 2]; // decode scratch: max Opus frame (120 ms stereo)
|
let mut pcm = vec![0f32; 5760 * 2]; // decode scratch: max Opus frame (120 ms stereo)
|
||||||
|
// Loss recovery: watch the host→client unrecoverable-drop count and ask for an IDR when it climbs.
|
||||||
|
let mut last_dropped = connector.frames_dropped();
|
||||||
|
let mut last_kf_req: Option<Instant> = None;
|
||||||
|
|
||||||
let end: Option<String> = loop {
|
let end: Option<String> = loop {
|
||||||
if stop.load(Ordering::SeqCst) {
|
if stop.load(Ordering::SeqCst) {
|
||||||
@@ -197,6 +200,22 @@ fn pump(
|
|||||||
Err(e) => break Some(format!("session: {e:?}")),
|
Err(e) => break Some(format!("session: {e:?}")),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Loss recovery: under infinite GOP the only recovery keyframe is one we request. The
|
||||||
|
// reassembler drops unrecoverable AUs (frames_dropped); the decoder then conceals the
|
||||||
|
// reference-missing delta frames that follow and returns Ok, so keying off a decode error
|
||||||
|
// rarely fires. Request an IDR when the drop count climbs, throttled — the decode stays
|
||||||
|
// wedged for several frames until the IDR lands, so requesting every frame would flood.
|
||||||
|
let dropped = connector.frames_dropped();
|
||||||
|
if dropped > last_dropped {
|
||||||
|
last_dropped = dropped;
|
||||||
|
let now = Instant::now();
|
||||||
|
if last_kf_req.is_none_or(|t| now.duration_since(t) >= Duration::from_millis(100)) {
|
||||||
|
last_kf_req = Some(now);
|
||||||
|
let _ = connector.request_keyframe();
|
||||||
|
tracing::debug!(dropped, "requested keyframe (loss recovery)");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Drain audio between frames (packets land every 5 ms; the queue holds 320 ms).
|
// Drain audio between frames (packets land every 5 ms; the queue holds 320 ms).
|
||||||
while let Ok(pkt) = connector.next_audio(Duration::ZERO) {
|
while let Ok(pkt) = connector.next_audio(Duration::ZERO) {
|
||||||
if let (Some(player), Some(dec)) = (&player, opus_dec.as_mut()) {
|
if let (Some(player), Some(dec)) = (&player, opus_dec.as_mut()) {
|
||||||
|
|||||||
@@ -160,6 +160,9 @@ fn pump(
|
|||||||
let mut decode_us_sum = 0u64;
|
let mut decode_us_sum = 0u64;
|
||||||
let mut lat_us: Vec<u64> = Vec::with_capacity(256);
|
let mut lat_us: Vec<u64> = Vec::with_capacity(256);
|
||||||
let mut pcm = vec![0f32; 5760 * 2]; // decode scratch: max Opus frame (120 ms stereo)
|
let mut pcm = vec![0f32; 5760 * 2]; // decode scratch: max Opus frame (120 ms stereo)
|
||||||
|
// Loss recovery: watch the host→client unrecoverable-drop count and ask for an IDR when it climbs.
|
||||||
|
let mut last_dropped = connector.frames_dropped();
|
||||||
|
let mut last_kf_req: Option<Instant> = None;
|
||||||
|
|
||||||
let end: Option<String> = loop {
|
let end: Option<String> = loop {
|
||||||
if stop.load(Ordering::SeqCst) {
|
if stop.load(Ordering::SeqCst) {
|
||||||
@@ -202,6 +205,21 @@ fn pump(
|
|||||||
Err(e) => break Some(format!("session: {e:?}")),
|
Err(e) => break Some(format!("session: {e:?}")),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Loss recovery: under infinite GOP the only recovery keyframe is one we request. The
|
||||||
|
// reassembler drops unrecoverable AUs (frames_dropped); the decoder conceals the
|
||||||
|
// reference-missing delta frames that follow and returns Ok, so keying off a decode error
|
||||||
|
// rarely fires. Request an IDR when the drop count climbs, throttled.
|
||||||
|
let dropped = connector.frames_dropped();
|
||||||
|
if dropped > last_dropped {
|
||||||
|
last_dropped = dropped;
|
||||||
|
let now = Instant::now();
|
||||||
|
if last_kf_req.is_none_or(|t| now.duration_since(t) >= Duration::from_millis(100)) {
|
||||||
|
last_kf_req = Some(now);
|
||||||
|
let _ = connector.request_keyframe();
|
||||||
|
tracing::debug!(dropped, "requested keyframe (loss recovery)");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Drain audio between frames (packets land every 5 ms; the queue holds 320 ms).
|
// Drain audio between frames (packets land every 5 ms; the queue holds 320 ms).
|
||||||
while let Ok(pkt) = connector.next_audio(Duration::ZERO) {
|
while let Ok(pkt) = connector.next_audio(Duration::ZERO) {
|
||||||
if let (Some(player), Some(dec)) = (&player, opus_dec.as_mut()) {
|
if let (Some(player), Some(dec)) = (&player, opus_dec.as_mut()) {
|
||||||
|
|||||||
@@ -21,7 +21,7 @@ use crate::quic::{
|
|||||||
};
|
};
|
||||||
use crate::session::{Frame, Session};
|
use crate::session::{Frame, Session};
|
||||||
use crate::transport::UdpTransport;
|
use crate::transport::UdpTransport;
|
||||||
use std::sync::atomic::{AtomicBool, Ordering};
|
use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
|
||||||
use std::sync::mpsc::{Receiver, RecvTimeoutError, SyncSender};
|
use std::sync::mpsc::{Receiver, RecvTimeoutError, SyncSender};
|
||||||
use std::sync::{Arc, Mutex};
|
use std::sync::{Arc, Mutex};
|
||||||
use std::time::{Duration, Instant};
|
use std::time::{Duration, Instant};
|
||||||
@@ -128,6 +128,11 @@ pub struct NativeClient {
|
|||||||
/// Speed-test accumulator, shared with the data-plane pump + control task.
|
/// Speed-test accumulator, shared with the data-plane pump + control task.
|
||||||
probe: Arc<Mutex<ProbeState>>,
|
probe: Arc<Mutex<ProbeState>>,
|
||||||
shutdown: Arc<AtomicBool>,
|
shutdown: Arc<AtomicBool>,
|
||||||
|
/// Cumulative count of access units the reassembler gave up on (FEC couldn't recover), mirrored
|
||||||
|
/// from the data-plane pump's `Session`. A client video loop watches this for increases to request
|
||||||
|
/// a recovery keyframe under infinite GOP — the correct loss trigger, since unrecoverable loss
|
||||||
|
/// yields reference-missing frames the decoder silently conceals (a decode-error trigger misses them).
|
||||||
|
frames_dropped: Arc<AtomicU64>,
|
||||||
worker: Option<std::thread::JoinHandle<()>>,
|
worker: Option<std::thread::JoinHandle<()>>,
|
||||||
/// The currently active session mode (the Welcome's, then updated by every accepted
|
/// The currently active session mode (the Welcome's, then updated by every accepted
|
||||||
/// [`NativeClient::request_mode`]).
|
/// [`NativeClient::request_mode`]).
|
||||||
@@ -208,11 +213,13 @@ impl NativeClient {
|
|||||||
let shutdown = Arc::new(AtomicBool::new(false));
|
let shutdown = Arc::new(AtomicBool::new(false));
|
||||||
let mode_slot = Arc::new(std::sync::Mutex::new(mode));
|
let mode_slot = Arc::new(std::sync::Mutex::new(mode));
|
||||||
let probe = Arc::new(Mutex::new(ProbeState::default()));
|
let probe = Arc::new(Mutex::new(ProbeState::default()));
|
||||||
|
let frames_dropped = Arc::new(AtomicU64::new(0));
|
||||||
|
|
||||||
let host = host.to_string();
|
let host = host.to_string();
|
||||||
let shutdown_w = shutdown.clone();
|
let shutdown_w = shutdown.clone();
|
||||||
let mode_slot_w = mode_slot.clone();
|
let mode_slot_w = mode_slot.clone();
|
||||||
let probe_w = probe.clone();
|
let probe_w = probe.clone();
|
||||||
|
let frames_dropped_w = frames_dropped.clone();
|
||||||
let worker = std::thread::Builder::new()
|
let worker = std::thread::Builder::new()
|
||||||
.name("punktfunk-client".into())
|
.name("punktfunk-client".into())
|
||||||
.spawn(move || {
|
.spawn(move || {
|
||||||
@@ -253,6 +260,7 @@ impl NativeClient {
|
|||||||
shutdown: shutdown_w,
|
shutdown: shutdown_w,
|
||||||
mode_slot: mode_slot_w,
|
mode_slot: mode_slot_w,
|
||||||
probe: probe_w,
|
probe: probe_w,
|
||||||
|
frames_dropped: frames_dropped_w,
|
||||||
}));
|
}));
|
||||||
})
|
})
|
||||||
.map_err(PunktfunkError::Io)?;
|
.map_err(PunktfunkError::Io)?;
|
||||||
@@ -285,6 +293,7 @@ impl NativeClient {
|
|||||||
probe,
|
probe,
|
||||||
shutdown,
|
shutdown,
|
||||||
worker: Some(worker),
|
worker: Some(worker),
|
||||||
|
frames_dropped,
|
||||||
mode: mode_slot,
|
mode: mode_slot,
|
||||||
host_fingerprint: fingerprint,
|
host_fingerprint: fingerprint,
|
||||||
resolved_compositor,
|
resolved_compositor,
|
||||||
@@ -412,6 +421,15 @@ impl NativeClient {
|
|||||||
.map_err(|_| PunktfunkError::Closed)
|
.map_err(|_| PunktfunkError::Closed)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Cumulative access units the host→client reassembler dropped as unrecoverable (FEC couldn't
|
||||||
|
/// rebuild them). A video loop polls this and calls [`request_keyframe`](Self::request_keyframe)
|
||||||
|
/// when it increases — the correct loss trigger under infinite GOP, where unrecoverable loss
|
||||||
|
/// produces reference-missing delta frames the decoder silently conceals (so a decode-error
|
||||||
|
/// trigger would rarely fire). Monotonic for the session; compare against the last observed value.
|
||||||
|
pub fn frames_dropped(&self) -> u64 {
|
||||||
|
self.frames_dropped.load(Ordering::Relaxed)
|
||||||
|
}
|
||||||
|
|
||||||
/// Start a bandwidth speed test: ask the host to burst filler over the data plane at
|
/// Start a bandwidth speed test: ask the host to burst filler over the data plane at
|
||||||
/// `target_kbps` of goodput for `duration_ms`, *briefly pausing video*. Non-blocking — the
|
/// `target_kbps` of goodput for `duration_ms`, *briefly pausing video*. Non-blocking — the
|
||||||
/// measurement accumulates in the background; poll [`NativeClient::probe_result`] until its
|
/// measurement accumulates in the background; poll [`NativeClient::probe_result`] until its
|
||||||
@@ -566,6 +584,7 @@ struct WorkerArgs {
|
|||||||
shutdown: Arc<AtomicBool>,
|
shutdown: Arc<AtomicBool>,
|
||||||
mode_slot: Arc<std::sync::Mutex<Mode>>,
|
mode_slot: Arc<std::sync::Mutex<Mode>>,
|
||||||
probe: Arc<Mutex<ProbeState>>,
|
probe: Arc<Mutex<ProbeState>>,
|
||||||
|
frames_dropped: Arc<AtomicU64>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The worker: QUIC handshake, then the input/datagram/control tasks + the blocking
|
/// The worker: QUIC handshake, then the input/datagram/control tasks + the blocking
|
||||||
@@ -593,6 +612,7 @@ async fn worker_main(args: WorkerArgs) {
|
|||||||
shutdown,
|
shutdown,
|
||||||
mode_slot,
|
mode_slot,
|
||||||
probe,
|
probe,
|
||||||
|
frames_dropped,
|
||||||
} = args;
|
} = args;
|
||||||
let setup = async {
|
let setup = async {
|
||||||
let remote: std::net::SocketAddr = format!("{host}:{port}")
|
let remote: std::net::SocketAddr = format!("{host}:{port}")
|
||||||
@@ -864,6 +884,10 @@ async fn worker_main(args: WorkerArgs) {
|
|||||||
let _ = tokio::task::spawn_blocking(move || {
|
let _ = tokio::task::spawn_blocking(move || {
|
||||||
pin_thread_user_interactive(); // feeds frame_tx → the client's user-interactive video pump
|
pin_thread_user_interactive(); // feeds frame_tx → the client's user-interactive video pump
|
||||||
while !pump_shutdown.load(Ordering::SeqCst) {
|
while !pump_shutdown.load(Ordering::SeqCst) {
|
||||||
|
// Mirror the reassembler's unrecoverable-drop count for the client's keyframe-recovery
|
||||||
|
// loop. Updated every iteration (not just on a produced frame) so it stays current through
|
||||||
|
// a total-loss drought where no AU completes. Cheap: a few relaxed atomic loads.
|
||||||
|
frames_dropped.store(session.stats().frames_dropped, Ordering::Relaxed);
|
||||||
match session.poll_frame() {
|
match session.poll_frame() {
|
||||||
Ok(frame) => {
|
Ok(frame) => {
|
||||||
if frame.flags & FLAG_PROBE as u32 != 0 {
|
if frame.flags & FLAG_PROBE as u32 != 0 {
|
||||||
|
|||||||
Reference in New Issue
Block a user