fix(host/windows): retry DuplicateOutput1 to ride out the old-dup teardown race
User's insight, and it fits the evidence exactly: in duplicate_output the FIRST
DuplicateOutput1 (called microseconds after the caller releases the old
duplication via self.dupl=None) returns E_ACCESSDENIED, but the legacy
DuplicateOutput a beat later SUCCEEDS — the only difference is TIMING. The
kernel-side teardown of the just-released duplication is async, so the immediate
DuplicateOutput1 races it ('output still duplicated' -> E_ACCESSDENIED). We then
fell straight through to legacy DuplicateOutput, which 'succeeds' into a FRAGILE
dup that churns ACCESS_LOST/MODE_CHANGE every few ms on this cross-GPU IDD
(causing the post-login freeze + UAC-confirm drop).
Fix: retry DuplicateOutput1 up to 5x with escalating 2/4/8/16 ms waits before
falling back to legacy, so the teardown finishes and the ROBUST DuplicateOutput1
dup succeeds (no churn). Bounded (~30 ms worst case) so a genuine failure still
falls back quickly. This is exactly Apollo's 2x/200ms retry rationale.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -186,12 +186,38 @@ unsafe fn duplicate_output(
|
|||||||
// wrongly tripping the HDR path. Real HDR capture (FP16 first + IDXGIOutput6 colorspace
|
// wrongly tripping the HDR path. Real HDR capture (FP16 first + IDXGIOutput6 colorspace
|
||||||
// detection to pick the path) is the follow-up once the churn is settled.
|
// detection to pick the path) is the follow-up once the churn is settled.
|
||||||
let formats = [DXGI_FORMAT_B8G8R8A8_UNORM];
|
let formats = [DXGI_FORMAT_B8G8R8A8_UNORM];
|
||||||
match output5.DuplicateOutput1(device, 0, &formats) {
|
// RETRY DuplicateOutput1. The caller releases the OLD duplication (self.dupl = None) immediately
|
||||||
Ok(d) => return Ok(d),
|
// before calling us, and the kernel-side teardown of that duplication is ASYNC — the FIRST
|
||||||
Err(e) => tracing::warn!(
|
// DuplicateOutput1 right after can race it and return E_ACCESSDENIED ("output still duplicated")
|
||||||
|
// even though we dropped our only reference. A few short retries let the teardown finish so the
|
||||||
|
// ROBUST DuplicateOutput1 dup succeeds, instead of falling through to legacy DuplicateOutput,
|
||||||
|
// which "succeeds" into a fragile dup that churns ACCESS_LOST/MODE_CHANGE every few ms on this
|
||||||
|
// cross-GPU IDD. (This is why DuplicateOutput1 failed but the legacy call a beat later
|
||||||
|
// succeeded — pure timing. Apollo retries DuplicateOutput1 2x/200ms for the same reason.)
|
||||||
|
let mut last_err = None;
|
||||||
|
for attempt in 0..5u64 {
|
||||||
|
match output5.DuplicateOutput1(device, 0, &formats) {
|
||||||
|
Ok(d) => {
|
||||||
|
if attempt > 0 {
|
||||||
|
tracing::info!(attempt, "DuplicateOutput1 succeeded on retry (raced old-dup teardown)");
|
||||||
|
}
|
||||||
|
return Ok(d);
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
last_err = Some(e);
|
||||||
|
// Escalating brief waits: 2,4,8,16 ms (skip after the last attempt). Bounded so a
|
||||||
|
// GENUINE failure still falls back to legacy quickly (~30 ms worst case).
|
||||||
|
if attempt < 4 {
|
||||||
|
std::thread::sleep(Duration::from_millis(2u64 << attempt));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if let Some(e) = last_err {
|
||||||
|
tracing::warn!(
|
||||||
error = %format!("{e:?}"),
|
error = %format!("{e:?}"),
|
||||||
"DuplicateOutput1 failed — falling back to legacy DuplicateOutput"
|
"DuplicateOutput1 failed after retries — falling back to legacy DuplicateOutput (will churn)"
|
||||||
),
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
output.DuplicateOutput(device).context("DuplicateOutput")
|
output.DuplicateOutput(device).context("DuplicateOutput")
|
||||||
|
|||||||
Reference in New Issue
Block a user