perf(host/windows): elevate capture/encode/send thread CPU priority (Apollo-parity)
apple / swift (push) Successful in 54s
ci / rust (push) Successful in 1m36s
android / android (push) Successful in 2m5s
ci / web (push) Successful in 29s
ci / docs-site (push) Successful in 29s
deb / build-publish (push) Successful in 2m31s
decky / build-publish (push) Successful in 15s
docker / build-push (--build-arg FEDORA_VERSION=44, ci, ci/fedora-rpm.Dockerfile, punktfunk-fedora44-rpm) (push) Successful in 4s
docker / build-push (., web/Dockerfile, punktfunk-web) (push) Successful in 4s
docker / build-push (ci, ci/fedora-rpm.Dockerfile, punktfunk-fedora-rpm) (push) Successful in 3s
docker / build-push (ci, ci/rust-ci.Dockerfile, punktfunk-rust-ci) (push) Successful in 4s
docker / build-push (docs-site, docs-site/Dockerfile, punktfunk-docs) (push) Successful in 4s
ci / bench (push) Successful in 4m28s
rpm / build-publish (bazzite, punktfunk-fedora-rpm) (push) Successful in 8m20s
docker / deploy-docs (push) Successful in 17s
rpm / build-publish (fedora-44, punktfunk-fedora44-rpm) (push) Successful in 7m58s
apple / swift (push) Successful in 54s
ci / rust (push) Successful in 1m36s
android / android (push) Successful in 2m5s
ci / web (push) Successful in 29s
ci / docs-site (push) Successful in 29s
deb / build-publish (push) Successful in 2m31s
decky / build-publish (push) Successful in 15s
docker / build-push (--build-arg FEDORA_VERSION=44, ci, ci/fedora-rpm.Dockerfile, punktfunk-fedora44-rpm) (push) Successful in 4s
docker / build-push (., web/Dockerfile, punktfunk-web) (push) Successful in 4s
docker / build-push (ci, ci/fedora-rpm.Dockerfile, punktfunk-fedora-rpm) (push) Successful in 3s
docker / build-push (ci, ci/rust-ci.Dockerfile, punktfunk-rust-ci) (push) Successful in 4s
docker / build-push (docs-site, docs-site/Dockerfile, punktfunk-docs) (push) Successful in 4s
ci / bench (push) Successful in 4m28s
rpm / build-publish (bazzite, punktfunk-fedora-rpm) (push) Successful in 8m20s
docker / deploy-docs (push) Successful in 17s
rpm / build-publish (fedora-44, punktfunk-fedora44-rpm) (push) Successful in 7m58s
Apollo runs its capture thread at CRITICAL and its encoder thread at ABOVE_NORMAL; we set none. Our GPU work is already HIGH priority, but the GPU scheduler can only favour commands we've SUBMITTED — a normal-priority thread descheduled by a CPU-heavy game submits the convert/encode late, so the HIGH GPU priority never bites (consistent with the measured "NVENC engine idle yet the encode waits ~15 ms"). Raise the WGC helper's capture+encode loop and the single-process capture+encode loop to THREAD_PRIORITY_HIGHEST, and the transmit thread to ABOVE_NORMAL, via a cross-platform boost_thread_priority() (Windows-only effect — the Linux host caps the game via gamescope so its threads aren't starved). Not yet built/validated on the GPU box (it's down); the cross-platform side compiles (cargo check) and the Windows calls are cross-checked against the windows-0.62 API. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -1828,6 +1828,38 @@ struct FrameMsg {
|
||||
/// speed-test probe bursts (which also need the Session). Decoupling the paced send from encoding
|
||||
/// lets the encode of frame N+1 overlap the transmit of frame N instead of waiting behind its tail.
|
||||
/// Runs until the encode thread drops the frame channel (end of stream) or `stop` is set.
|
||||
/// Raise the current thread's OS scheduling priority so a CPU-heavy game can't deschedule our
|
||||
/// capture/encode/send threads. This matters even though our GPU work is already HIGH priority: the
|
||||
/// GPU scheduler can only favour commands we've actually SUBMITTED, so if a normal-priority thread is
|
||||
/// descheduled by the game it submits the convert/encode late and the GPU priority never bites. Apollo
|
||||
/// does the same (capture thread CRITICAL, encoder ABOVE_NORMAL). Windows-only — the Linux host caps
|
||||
/// the game via gamescope, so its threads aren't starved. `critical` → highest non-realtime class
|
||||
/// (the capture+encode loop); otherwise above-normal (the send/relay thread).
|
||||
pub(crate) fn boost_thread_priority(critical: bool) {
|
||||
#[cfg(target_os = "windows")]
|
||||
unsafe {
|
||||
use windows::Win32::System::Threading::{
|
||||
GetCurrentThread, SetThreadPriority, THREAD_PRIORITY_ABOVE_NORMAL,
|
||||
THREAD_PRIORITY_HIGHEST,
|
||||
};
|
||||
let prio = if critical {
|
||||
THREAD_PRIORITY_HIGHEST
|
||||
} else {
|
||||
THREAD_PRIORITY_ABOVE_NORMAL
|
||||
};
|
||||
match SetThreadPriority(GetCurrentThread(), prio) {
|
||||
Ok(()) => tracing::debug!(critical, "thread priority raised"),
|
||||
Err(e) => {
|
||||
tracing::debug!(critical, error = %format!("{e:?}"), "SetThreadPriority failed")
|
||||
}
|
||||
}
|
||||
}
|
||||
#[cfg(not(target_os = "windows"))]
|
||||
{
|
||||
let _ = critical;
|
||||
}
|
||||
}
|
||||
|
||||
fn send_loop(
|
||||
mut session: Session,
|
||||
frame_rx: std::sync::mpsc::Receiver<FrameMsg>,
|
||||
@@ -1837,6 +1869,7 @@ fn send_loop(
|
||||
perf: bool,
|
||||
burst_cap: usize,
|
||||
) {
|
||||
boost_thread_priority(false); // transmit thread: above-normal (Apollo's encoder-thread level)
|
||||
let mut last_perf = std::time::Instant::now();
|
||||
let mut last_bytes = 0u64;
|
||||
let mut last_send_dropped = 0u64;
|
||||
@@ -1995,6 +2028,9 @@ fn virtual_stream(
|
||||
probe_rx: std::sync::mpsc::Receiver<ProbeRequest>,
|
||||
probe_result_tx: tokio::sync::mpsc::UnboundedSender<ProbeResult>,
|
||||
) -> Result<()> {
|
||||
// This thread runs the capture+encode loop (single-process: Linux / synthetic / NO_WGC DDA) — or
|
||||
// tail-calls the relay below. Elevate it so a CPU-heavy game can't deschedule our GPU submission.
|
||||
boost_thread_priority(true);
|
||||
// Windows two-process secure-desktop path: when the host runs as SYSTEM (required for the secure
|
||||
// desktop + SendInput), WGC can't activate in-process, so we capture the normal desktop via a
|
||||
// helper spawned in the user session and relay its AUs. (Single-process WGC/DDA is used as the
|
||||
|
||||
@@ -46,6 +46,12 @@ pub fn run(opts: HelperOptions) -> Result<()> {
|
||||
"WGC helper starting (user session)"
|
||||
);
|
||||
|
||||
// This thread does WGC capture + video-processor convert + NVENC submit — the GPU-submitting hot
|
||||
// path. Elevate its OS priority so a CPU-heavy game can't deschedule it and delay submission (which
|
||||
// would leave our HIGH GPU priority with nothing queued to prioritise). Apollo's capture thread is
|
||||
// likewise CRITICAL.
|
||||
crate::m3::boost_thread_priority(true);
|
||||
|
||||
// Capture the EXISTING SudoVDA output by GDI name / target id — do NOT create one (the host owns
|
||||
// the virtual output + its isolate/restore; a second topology owner breaks DDA recovery).
|
||||
let target = WinCaptureTarget {
|
||||
|
||||
Reference in New Issue
Block a user