feat(android): live stats HUD + low-latency decode tuning
apple / swift (push) Successful in 54s
windows-msix / package (push) Successful in 1m1s
decky / build-publish (push) Has been cancelled
deb / build-publish (push) Has been cancelled
rpm / build-publish (bazzite, punktfunk-fedora-rpm) (push) Has been cancelled
rpm / build-publish (fedora-44, punktfunk-fedora44-rpm) (push) Has been cancelled
windows / build (push) Successful in 55s
audit / cargo-audit (push) Failing after 1m8s
android / android (push) Failing after 2m12s
ci / web (push) Successful in 31s
ci / docs-site (push) Successful in 31s
ci / bench (push) Successful in 4m31s
ci / rust (push) Successful in 6m31s
docker / build-push (., web/Dockerfile, punktfunk-web) (push) Successful in 5s
docker / build-push (ci, ci/fedora-rpm.Dockerfile, punktfunk-fedora-rpm) (push) Successful in 35s
docker / build-push (--build-arg FEDORA_VERSION=44, ci, ci/fedora-rpm.Dockerfile, punktfunk-fedora44-rpm) (push) Successful in 2m44s
docker / build-push (docs-site, docs-site/Dockerfile, punktfunk-docs) (push) Successful in 4s
docker / build-push (ci, ci/rust-ci.Dockerfile, punktfunk-rust-ci) (push) Successful in 2m25s
flatpak / build-publish (push) Successful in 5m5s
docker / deploy-docs (push) Successful in 20s
apple / swift (push) Successful in 54s
windows-msix / package (push) Successful in 1m1s
decky / build-publish (push) Has been cancelled
deb / build-publish (push) Has been cancelled
rpm / build-publish (bazzite, punktfunk-fedora-rpm) (push) Has been cancelled
rpm / build-publish (fedora-44, punktfunk-fedora44-rpm) (push) Has been cancelled
windows / build (push) Successful in 55s
audit / cargo-audit (push) Failing after 1m8s
android / android (push) Failing after 2m12s
ci / web (push) Successful in 31s
ci / docs-site (push) Successful in 31s
ci / bench (push) Successful in 4m31s
ci / rust (push) Successful in 6m31s
docker / build-push (., web/Dockerfile, punktfunk-web) (push) Successful in 5s
docker / build-push (ci, ci/fedora-rpm.Dockerfile, punktfunk-fedora-rpm) (push) Successful in 35s
docker / build-push (--build-arg FEDORA_VERSION=44, ci, ci/fedora-rpm.Dockerfile, punktfunk-fedora44-rpm) (push) Successful in 2m44s
docker / build-push (docs-site, docs-site/Dockerfile, punktfunk-docs) (push) Successful in 4s
docker / build-push (ci, ci/rust-ci.Dockerfile, punktfunk-rust-ci) (push) Successful in 2m25s
flatpak / build-publish (push) Successful in 5m5s
docker / deploy-docs (push) Successful in 20s
Stats HUD (mirrors the Apple client): the decode thread accumulates FPS, receive throughput, and capture->client latency (p50/p95, skew-corrected) in Rust (clients/android/native/src/stats.rs); nativeVideoStats drains a snapshot ~1 Hz over JNI as a DoubleArray. StreamScreen renders a Compose overlay (W*H@Hz / fps / Mb/s / latency, + dropped-under-loss), toggled by a Settings switch (persisted, default on) or a 3-finger tap. Performance (decode.rs): - ANativeWindow_setFrameRate(refresh_hz): align display vsync to the stream rate (no 60-in-120 judder); safe since minSdk 31 >= API 30. - Raise the decode thread toward URGENT_DISPLAY (best-effort setpriority) so background work can't preempt it under load. - Codec low-latency hints KEY_PRIORITY=0 (realtime) + KEY_OPERATING_RATE. Verified host-side: cargo build/clippy/fmt --workspace (the ungated stats + JNI accessor). The android-gated decode.rs (NDK) and the Kotlin build only in CI (android.yml: gradle + cargo-ndk) -- APIs verified against crate sources. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -29,6 +29,8 @@ android_logger = "0.14"
|
||||
# Pure-Rust FFI to libmediandk/libnativewindow/libaaudio — no C++/libc++_shared to bundle. Decode +
|
||||
# audio run entirely in Rust on native threads (the "no async on the hot path" invariant).
|
||||
ndk = { version = "0.9", features = ["media", "audio"] }
|
||||
# setpriority/gettid to raise the decode thread toward URGENT_DISPLAY (see decode::boost_thread_priority).
|
||||
libc = "0.2"
|
||||
# Opus decode for the host→client audio plane (0xC9: 48 kHz stereo, 5 ms frames). Same crate the
|
||||
# host + Linux client use. audiopus_sys vendors libopus (pure C) and builds it static via cmake —
|
||||
# the cargo-ndk build sets LIBOPUS_STATIC=1/LIBOPUS_NO_PKG=1 so it links the bundled lib, not the host's.
|
||||
|
||||
@@ -10,7 +10,7 @@ use ndk::media::media_codec::{
|
||||
DequeuedInputBufferResult, DequeuedOutputBufferInfoResult, MediaCodec, MediaCodecDirection,
|
||||
};
|
||||
use ndk::media::media_format::MediaFormat;
|
||||
use ndk::native_window::NativeWindow;
|
||||
use ndk::native_window::{FrameRateCompatibility, NativeWindow};
|
||||
use punktfunk_core::client::NativeClient;
|
||||
use punktfunk_core::error::PunktfunkError;
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
@@ -18,7 +18,13 @@ use std::sync::Arc;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
/// The decode loop. Runs on the `pf-decode` thread until `shutdown` is set or the session closes.
|
||||
pub fn run(client: Arc<NativeClient>, window: NativeWindow, shutdown: Arc<AtomicBool>) {
|
||||
pub fn run(
|
||||
client: Arc<NativeClient>,
|
||||
window: NativeWindow,
|
||||
shutdown: Arc<AtomicBool>,
|
||||
stats: Arc<crate::stats::VideoStats>,
|
||||
) {
|
||||
boost_thread_priority();
|
||||
let mode = client.mode();
|
||||
let codec = match MediaCodec::from_decoder_type("video/hevc") {
|
||||
Some(c) => c,
|
||||
@@ -39,6 +45,11 @@ pub fn run(client: Arc<NativeClient>, window: NativeWindow, shutdown: Arc<Atomic
|
||||
);
|
||||
// Ask for the low-latency decode path where the decoder supports it (no reordering buffer).
|
||||
format.set_i32("low-latency", 1);
|
||||
// Advisory low-latency hints (KEY_PRIORITY / KEY_OPERATING_RATE), ignored where unsupported:
|
||||
// realtime priority + the target frame rate, so vendor decoders (e.g. Qualcomm) run at full
|
||||
// clocks instead of a power-saving cadence that adds dequeue latency.
|
||||
format.set_i32("priority", 0); // 0 = realtime
|
||||
format.set_i32("operating-rate", mode.refresh_hz as i32);
|
||||
|
||||
if let Err(e) = codec.configure(&format, Some(&window), MediaCodecDirection::Decoder) {
|
||||
log::error!("decode: configure failed: {e}");
|
||||
@@ -53,6 +64,15 @@ pub fn run(client: Arc<NativeClient>, window: NativeWindow, shutdown: Arc<Atomic
|
||||
mode.width,
|
||||
mode.height
|
||||
);
|
||||
// Tell the display the stream's refresh so Android can pick a matching display mode and align
|
||||
// vsync (no 60-in-120 judder on high-refresh panels). minSdk 31 ≥ API 30, so the underlying
|
||||
// ANativeWindow_setFrameRate is always present; non-fatal if the platform declines.
|
||||
if let Err(e) = window.set_frame_rate(mode.refresh_hz as f32, FrameRateCompatibility::Default) {
|
||||
log::warn!(
|
||||
"decode: set_frame_rate({} Hz) failed (non-fatal): {e}",
|
||||
mode.refresh_hz
|
||||
);
|
||||
}
|
||||
|
||||
let mut fed: u64 = 0;
|
||||
let mut rendered: u64 = 0;
|
||||
@@ -60,6 +80,9 @@ pub fn run(client: Arc<NativeClient>, window: NativeWindow, shutdown: Arc<Atomic
|
||||
// climbs.
|
||||
let mut last_dropped = client.frames_dropped();
|
||||
let mut last_kf_req: Option<Instant> = None;
|
||||
// Capture→client-receipt latency uses the negotiated host-minus-client clock offset (0 if the
|
||||
// host didn't answer the skew handshake — then the HUD flags it "same-host").
|
||||
let clock_offset = client.clock_offset_ns;
|
||||
while !shutdown.load(Ordering::Relaxed) {
|
||||
match client.next_frame(Duration::from_millis(5)) {
|
||||
Ok(frame) => {
|
||||
@@ -72,6 +95,11 @@ pub fn run(client: Arc<NativeClient>, window: NativeWindow, shutdown: Arc<Atomic
|
||||
);
|
||||
}
|
||||
fed += 1;
|
||||
// HUD stat: capture→client-receipt latency = client_now + (host−client) − capture_pts.
|
||||
let lat_ns = now_realtime_ns() + clock_offset as i128 - frame.pts_ns as i128;
|
||||
let lat_us =
|
||||
(lat_ns > 0 && lat_ns < 10_000_000_000).then_some((lat_ns / 1000) as u64);
|
||||
stats.note(frame.data.len(), lat_us, clock_offset != 0);
|
||||
feed(&codec, &frame.data, frame.pts_ns / 1000);
|
||||
}
|
||||
Err(PunktfunkError::NoFrame) => {} // timeout — still drain output below
|
||||
@@ -105,6 +133,33 @@ pub fn run(client: Arc<NativeClient>, window: NativeWindow, shutdown: Arc<Atomic
|
||||
log::info!("decode: stopped (fed={fed} rendered={rendered})");
|
||||
}
|
||||
|
||||
/// Wall-clock now in nanoseconds (CLOCK_REALTIME basis), to compare against the host-stamped
|
||||
/// capture `pts_ns` after the skew offset is applied.
|
||||
fn now_realtime_ns() -> i128 {
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.map(|d| d.as_nanos() as i128)
|
||||
.unwrap_or(0)
|
||||
}
|
||||
|
||||
/// Best-effort: raise the decode thread toward Android's URGENT_DISPLAY band so background work
|
||||
/// can't preempt it under load (which shows up as late/dropped frames). Non-fatal if the platform
|
||||
/// refuses (foreground apps may set their own threads; the exact floor is policy-dependent).
|
||||
fn boost_thread_priority() {
|
||||
// SAFETY: `gettid`/`setpriority` on the calling thread are always-safe syscalls. PRIO_PROCESS
|
||||
// with a TID targets that one task on Linux — the same idiom `Process.setThreadPriority` uses.
|
||||
unsafe {
|
||||
let tid = libc::gettid();
|
||||
if libc::setpriority(libc::PRIO_PROCESS, tid as libc::id_t, -10) != 0 {
|
||||
log::warn!(
|
||||
"decode: setpriority(-10) failed (non-fatal): {}",
|
||||
std::io::Error::last_os_error()
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Copy one access unit into a codec input buffer and queue it.
|
||||
fn feed(codec: &MediaCodec, au: &[u8], pts_us: u64) {
|
||||
match codec.dequeue_input_buffer(Duration::from_millis(10)) {
|
||||
|
||||
@@ -29,6 +29,7 @@ mod feedback;
|
||||
#[cfg(target_os = "android")]
|
||||
mod mic;
|
||||
mod session;
|
||||
mod stats;
|
||||
|
||||
/// Initialize `android_logger` once when the JVM loads the library. Logs land in logcat under the
|
||||
/// `punktfunk` tag. Android-only — there is no JVM (and no logcat) on the host build.
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
//! renegotiation. Port the remaining orchestration from `clients/linux`.
|
||||
|
||||
use jni::objects::{JObject, JString};
|
||||
use jni::sys::{jboolean, jint, jlong};
|
||||
use jni::sys::{jboolean, jdoubleArray, jint, jlong, jsize};
|
||||
use jni::JNIEnv;
|
||||
use punktfunk_core::client::NativeClient;
|
||||
use punktfunk_core::config::{CompositorPref, GamepadPref, Mode};
|
||||
@@ -40,6 +40,8 @@ pub(crate) struct SessionHandle {
|
||||
struct VideoThread {
|
||||
shutdown: Arc<AtomicBool>,
|
||||
join: Option<JoinHandle<()>>,
|
||||
/// Live decode stats, written by the decode thread and drained ~1 Hz by `nativeVideoStats`.
|
||||
stats: Arc<crate::stats::VideoStats>,
|
||||
}
|
||||
|
||||
impl SessionHandle {
|
||||
@@ -331,13 +333,19 @@ pub extern "system" fn Java_io_unom_punktfunk_kit_NativeBridge_nativeStartVideo(
|
||||
}
|
||||
};
|
||||
let shutdown = Arc::new(AtomicBool::new(false));
|
||||
let stats = Arc::new(crate::stats::VideoStats::new());
|
||||
let client = h.client.clone();
|
||||
let sd = shutdown.clone();
|
||||
let st = stats.clone();
|
||||
let join = std::thread::Builder::new()
|
||||
.name("pf-decode".into())
|
||||
.spawn(move || crate::decode::run(client, window, sd))
|
||||
.spawn(move || crate::decode::run(client, window, sd, st))
|
||||
.ok();
|
||||
*guard = Some(VideoThread { shutdown, join });
|
||||
*guard = Some(VideoThread {
|
||||
shutdown,
|
||||
join,
|
||||
stats,
|
||||
});
|
||||
}
|
||||
|
||||
/// `NativeBridge.nativeStopVideo(handle)` — stop + join the decode thread (without closing the
|
||||
@@ -355,6 +363,50 @@ pub extern "system" fn Java_io_unom_punktfunk_kit_NativeBridge_nativeStopVideo(
|
||||
}
|
||||
}
|
||||
|
||||
/// `NativeBridge.nativeVideoStats(handle): DoubleArray?` — drain ~1 s of decode stats for the HUD.
|
||||
/// Returns 10 doubles
|
||||
/// `[fps, mbps, latP50Ms, latP95Ms, latValid, skewCorrected, width, height, refreshHz, framesDropped]`
|
||||
/// (the two flags are 1.0/0.0), or `null` when no decode thread is running. Poll ~1 Hz from the UI;
|
||||
/// each call resets the measurement window. Not android-gated — pure `jni` + connector reads, so it
|
||||
/// links on the host build too (Kotlin only ever calls it on device).
|
||||
#[no_mangle]
|
||||
pub extern "system" fn Java_io_unom_punktfunk_kit_NativeBridge_nativeVideoStats(
|
||||
env: JNIEnv,
|
||||
_this: JObject,
|
||||
handle: jlong,
|
||||
) -> jdoubleArray {
|
||||
if handle == 0 {
|
||||
return std::ptr::null_mut();
|
||||
}
|
||||
// SAFETY: live handle per the nativeConnect/nativeClose contract.
|
||||
let h = unsafe { &*(handle as *const SessionHandle) };
|
||||
let snap = match h.video.lock().unwrap().as_ref() {
|
||||
Some(vt) => vt.stats.drain(),
|
||||
None => return std::ptr::null_mut(), // not streaming → no stats
|
||||
};
|
||||
let mode = h.client.mode();
|
||||
let buf: [f64; 10] = [
|
||||
snap.fps,
|
||||
snap.mbps,
|
||||
snap.lat_p50_ms,
|
||||
snap.lat_p95_ms,
|
||||
if snap.lat_valid { 1.0 } else { 0.0 },
|
||||
if snap.skew_corrected { 1.0 } else { 0.0 },
|
||||
mode.width as f64,
|
||||
mode.height as f64,
|
||||
mode.refresh_hz as f64,
|
||||
h.client.frames_dropped() as f64,
|
||||
];
|
||||
let arr = match env.new_double_array(buf.len() as jsize) {
|
||||
Ok(a) => a,
|
||||
Err(_) => return std::ptr::null_mut(),
|
||||
};
|
||||
if env.set_double_array_region(&arr, 0, &buf).is_err() {
|
||||
return std::ptr::null_mut();
|
||||
}
|
||||
arr.into_raw()
|
||||
}
|
||||
|
||||
/// `NativeBridge.nativeStartAudio(handle)` — start the Opus→AAudio playback thread. No-op if already
|
||||
/// started or on a `0` handle. Best-effort: a failure leaves video streaming.
|
||||
#[cfg(target_os = "android")]
|
||||
|
||||
@@ -0,0 +1,93 @@
|
||||
//! Live decode stats for the on-stream HUD (mirrors the Apple client's stats overlay): FPS,
|
||||
//! receive throughput, and capture→client-receipt latency (p50/p95). The decode thread is the sole
|
||||
//! writer (`note` per access unit); the JNI accessor `nativeVideoStats` drains a snapshot ~1 Hz and
|
||||
//! resets the window. Pure `std` so it compiles on the host build too (the decode thread is
|
||||
//! android-only, but `VideoThread` holds the shared handle unconditionally).
|
||||
|
||||
use std::sync::Mutex;
|
||||
use std::time::Instant;
|
||||
|
||||
/// Rolling per-window accumulator. Rates are computed over the actual elapsed wall-time at drain
|
||||
/// (robust to poll jitter), so a poll that lands at 0.9 s or 1.1 s still reports the right FPS.
|
||||
pub struct VideoStats {
|
||||
inner: Mutex<Inner>,
|
||||
}
|
||||
|
||||
struct Inner {
|
||||
window_start: Instant,
|
||||
frames: u64,
|
||||
bytes: u64,
|
||||
/// capture→client-receipt latency samples for this window, in microseconds.
|
||||
lat_us: Vec<u64>,
|
||||
/// Whether the host answered the clock-skew handshake (latency is cross-machine valid).
|
||||
skew_corrected: bool,
|
||||
}
|
||||
|
||||
/// A drained, computed view of one window. `lat_valid` is false when no in-range latency sample
|
||||
/// landed (then p50/p95 are 0 and the HUD hides the latency line, exactly like the Apple client).
|
||||
pub struct Snapshot {
|
||||
pub fps: f64,
|
||||
pub mbps: f64,
|
||||
pub lat_p50_ms: f64,
|
||||
pub lat_p95_ms: f64,
|
||||
pub lat_valid: bool,
|
||||
pub skew_corrected: bool,
|
||||
}
|
||||
|
||||
impl VideoStats {
|
||||
// `new`/`note` are driven only by the android-only decode thread; `drain` (the JNI accessor) is
|
||||
// ungated, so on the host build these two are unreferenced — that's expected, not dead code.
|
||||
#[cfg_attr(not(target_os = "android"), allow(dead_code))]
|
||||
pub fn new() -> VideoStats {
|
||||
VideoStats {
|
||||
inner: Mutex::new(Inner {
|
||||
window_start: Instant::now(),
|
||||
frames: 0,
|
||||
bytes: 0,
|
||||
lat_us: Vec::with_capacity(256),
|
||||
skew_corrected: false,
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
/// Record one decoded access unit: its wire size and (if in range) its capture→client latency.
|
||||
#[cfg_attr(not(target_os = "android"), allow(dead_code))]
|
||||
pub fn note(&self, bytes: usize, lat_us: Option<u64>, skew_corrected: bool) {
|
||||
let mut g = self.inner.lock().unwrap();
|
||||
g.frames += 1;
|
||||
g.bytes += bytes as u64;
|
||||
g.skew_corrected = skew_corrected;
|
||||
if let Some(l) = lat_us {
|
||||
g.lat_us.push(l);
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute the window's rates + latency percentiles, then reset for the next window.
|
||||
pub fn drain(&self) -> Snapshot {
|
||||
let mut g = self.inner.lock().unwrap();
|
||||
let elapsed = g.window_start.elapsed().as_secs_f64().max(1e-3);
|
||||
let fps = g.frames as f64 / elapsed;
|
||||
let mbps = g.bytes as f64 * 8.0 / 1_000_000.0 / elapsed;
|
||||
let (p50, p95, valid) = if g.lat_us.is_empty() {
|
||||
(0.0, 0.0, false)
|
||||
} else {
|
||||
g.lat_us.sort_unstable();
|
||||
let n = g.lat_us.len();
|
||||
let at = |p: f64| g.lat_us[((n as f64 * p) as usize).min(n - 1)] as f64 / 1000.0;
|
||||
(at(0.50), at(0.95), true)
|
||||
};
|
||||
let skew = g.skew_corrected;
|
||||
g.window_start = Instant::now();
|
||||
g.frames = 0;
|
||||
g.bytes = 0;
|
||||
g.lat_us.clear();
|
||||
Snapshot {
|
||||
fps,
|
||||
mbps,
|
||||
lat_p50_ms: p50,
|
||||
lat_p95_ms: p95,
|
||||
lat_valid: valid,
|
||||
skew_corrected: skew,
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user