feat(host): KDE-reliability phase 2 — pipeline retry, graceful capture teardown, refresh reconcile
Hardens the virtual-display → capture → encode bring-up against the transient failures that surfaced as black screens / wrong refresh on cold KDE sessions. - m3: build_pipeline_with_retry wraps the initial vd.create() + first-frame with bounded exponential backoff (4 attempts, 500ms→2s). is_permanent_build_error classifies config/version/missing-tool failures so they fail fast instead of burning the retry budget. Encoder + frame clock now pace to the *achieved* refresh reported in VirtualOutput::preferred_mode, not the requested rate. - capture/linux: PortalCapturer::Drop sends a pipewire channel quit and joins the thread, so a dropped/failed/retried capturer releases its PipeWire thread + EGL/ CUDA context promptly instead of leaking it to process exit. First-frame timeout now reports the node id and distinguishes "format never negotiated" from "negotiated but no buffers arrived" via a negotiated flag set in param_changed. - vdisplay/kwin: set_custom_refresh reads back the active mode from kscreen-doctor and returns the refresh KWin actually gave us (a rejected custom mode silently leaves the output at 60Hz); create() carries it into preferred_mode. - vdisplay/gamescope: find_gamescope_node requires the Video/Source object (the node.name=gamescope tag is on two objects; the other wedges the link); a version check warns on <3.16.22 (the PipeWire-1.6 capture-deadlock signature). Live-validated against headless KWin: 720p120 build with requested=120 achieved=120, zero-copy CUDA frames, and no per-session thread accumulation across back-to-back sessions. Tests: +3 unit (retry classifier, gamescope version parse); 49 host tests green, clippy/fmt clean. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -48,6 +48,7 @@ impl VirtualDisplay for GamescopeDisplay {
|
||||
keepalive: Box::new(()),
|
||||
});
|
||||
}
|
||||
check_gamescope_version(); // diagnostic only — warns on known-deadlock-prone versions
|
||||
let proc = GamescopeProc(spawn(mode.width, mode.height, mode.refresh_hz.max(1))?);
|
||||
// gamescope creates its PipeWire node a moment after start; poll for it (the proc is held
|
||||
// alive meanwhile, and killed if we give up).
|
||||
@@ -147,24 +148,92 @@ fn node_from_log() -> Option<u32> {
|
||||
}
|
||||
|
||||
/// Find the `gamescope` `Video/Source` node id in a `pw-dump` snapshot of the default daemon.
|
||||
///
|
||||
/// `node.name=gamescope` appears on TWO objects (the adapter *and* the inner stream node); only
|
||||
/// the one whose `media.class` is `Video/Source` is a valid capture target — connecting to the
|
||||
/// other wedges the link. So we require `Video/Source` first and fall back to a bare name match
|
||||
/// only if no class-tagged node is present (older gamescope that doesn't set media.class).
|
||||
fn find_gamescope_node() -> Option<u32> {
|
||||
let out = Command::new("pw-dump").output().ok()?;
|
||||
let dump: serde_json::Value = serde_json::from_slice(&out.stdout).ok()?;
|
||||
for obj in dump.as_array()? {
|
||||
let nodes = dump.as_array()?;
|
||||
let node_props = |obj: &serde_json::Value| -> Option<(u32, String, String)> {
|
||||
if obj.get("type").and_then(|t| t.as_str()) != Some("PipeWire:Interface:Node") {
|
||||
continue;
|
||||
return None;
|
||||
}
|
||||
let id = obj.get("id").and_then(|i| i.as_u64())? as u32;
|
||||
let props = obj.get("info").and_then(|i| i.get("props"));
|
||||
let name = props
|
||||
.and_then(|p| p.get("node.name"))
|
||||
.and_then(|n| n.as_str())
|
||||
.unwrap_or("");
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
let class = props
|
||||
.and_then(|p| p.get("media.class"))
|
||||
.and_then(|n| n.as_str())
|
||||
.unwrap_or("");
|
||||
if name == "gamescope" || (class == "Video/Source" && name.contains("gamescope")) {
|
||||
return obj.get("id").and_then(|i| i.as_u64()).map(|x| x as u32);
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
Some((id, name, class))
|
||||
};
|
||||
// Preferred: a Video/Source node named (or containing) "gamescope".
|
||||
for obj in nodes {
|
||||
if let Some((id, name, class)) = node_props(obj) {
|
||||
if class == "Video/Source" && (name == "gamescope" || name.contains("gamescope")) {
|
||||
return Some(id);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Fallback: a node literally named "gamescope" with no usable class tag.
|
||||
for obj in nodes {
|
||||
if let Some((id, name, _)) = node_props(obj) {
|
||||
if name == "gamescope" {
|
||||
tracing::warn!(
|
||||
node_id = id,
|
||||
"gamescope node has no media.class=Video/Source tag — capturing it anyway"
|
||||
);
|
||||
return Some(id);
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Minimum gamescope that captures reliably: below 3.16.22, headless PipeWire capture deadlocks
|
||||
/// against PipeWire ≥ 1.6 (a loop-lock bug) and a stuck link head-blocks the whole daemon.
|
||||
const MIN_GAMESCOPE: (u32, u32, u32) = (3, 16, 22);
|
||||
|
||||
/// Best-effort: warn loudly if the installed gamescope is older than [`MIN_GAMESCOPE`]. Parsing
|
||||
/// failures are silent (don't block a possibly-fine custom build) — this is a diagnostic, not a
|
||||
/// gate. Returns the parsed version when it could read one.
|
||||
fn check_gamescope_version() -> Option<(u32, u32, u32)> {
|
||||
let out = Command::new("gamescope").arg("--version").output().ok()?;
|
||||
// gamescope prints the version banner to stderr on some builds, stdout on others.
|
||||
let text = format!(
|
||||
"{}{}",
|
||||
String::from_utf8_lossy(&out.stdout),
|
||||
String::from_utf8_lossy(&out.stderr)
|
||||
);
|
||||
let ver = parse_version(&text)?;
|
||||
if ver < MIN_GAMESCOPE {
|
||||
tracing::warn!(
|
||||
found = %format!("{}.{}.{}", ver.0, ver.1, ver.2),
|
||||
min = %format!("{}.{}.{}", MIN_GAMESCOPE.0, MIN_GAMESCOPE.1, MIN_GAMESCOPE.2),
|
||||
"gamescope is older than the minimum for reliable headless capture — expect a \
|
||||
capture deadlock against PipeWire ≥ 1.6 (a wedged link head-blocks the daemon); \
|
||||
upgrade gamescope or use PUNKTFUNK_COMPOSITOR=kwin|mutter"
|
||||
);
|
||||
}
|
||||
Some(ver)
|
||||
}
|
||||
|
||||
/// Extract the first `X.Y.Z` version triple from arbitrary text (e.g. `gamescope version 3.16.22`).
|
||||
fn parse_version(text: &str) -> Option<(u32, u32, u32)> {
|
||||
for token in text.split(|c: char| !(c.is_ascii_digit() || c == '.')) {
|
||||
let mut parts = token.split('.');
|
||||
let (a, b, c) = (parts.next()?, parts.next(), parts.next());
|
||||
let (Some(b), Some(c)) = (b, c) else { continue };
|
||||
if let (Ok(a), Ok(b), Ok(c)) = (a.parse(), b.parse(), c.parse()) {
|
||||
return Some((a, b, c));
|
||||
}
|
||||
}
|
||||
None
|
||||
@@ -179,3 +248,28 @@ impl Drop for GamescopeProc {
|
||||
let _ = self.0.wait();
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{parse_version, MIN_GAMESCOPE};
|
||||
|
||||
#[test]
|
||||
fn parses_version_banner() {
|
||||
assert_eq!(parse_version("gamescope version 3.16.22"), Some((3, 16, 22)));
|
||||
assert_eq!(
|
||||
parse_version("gamescope: version v3.15.9 (no PipeWire)"),
|
||||
Some((3, 15, 9))
|
||||
);
|
||||
assert_eq!(parse_version("3.16.20-1.fc41"), Some((3, 16, 20)));
|
||||
assert_eq!(parse_version("no version here"), None);
|
||||
assert_eq!(parse_version("only 3.16 here"), None); // needs a full triple
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn flags_known_bad_versions() {
|
||||
// The 26.04-shipped 3.16.20 is below the minimum (PipeWire 1.6 deadlock).
|
||||
assert!(parse_version("gamescope version 3.16.20").unwrap() < MIN_GAMESCOPE);
|
||||
assert!(parse_version("gamescope version 3.16.22").unwrap() >= MIN_GAMESCOPE);
|
||||
assert!(parse_version("gamescope version 3.17.0").unwrap() >= MIN_GAMESCOPE);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -91,17 +91,22 @@ impl VirtualDisplay for KwinDisplay {
|
||||
};
|
||||
tracing::info!(node_id, width, height, "KWin virtual output ready");
|
||||
// KWin creates virtual outputs at a hardcoded 60 Hz and `stream_virtual_output` has no
|
||||
// refresh argument, so when the client wants more we install + select a custom mode
|
||||
// (supported on virtual outputs since KWin 6.6). Done before capture connects PipeWire so
|
||||
// the stream negotiates at the higher rate. First cut shells out to kscreen-doctor; the
|
||||
// in-process kde_output_management_v2 client is a follow-up.
|
||||
if mode.refresh_hz > 60 {
|
||||
set_custom_refresh(width, height, mode.refresh_hz);
|
||||
}
|
||||
// refresh argument, so above 60 Hz we install + select a custom mode (supported on virtual
|
||||
// outputs since KWin 6.6) before capture connects PipeWire, so the stream negotiates at the
|
||||
// higher rate. First cut shells out to kscreen-doctor; the in-process
|
||||
// kde_output_management_v2 client is a follow-up. `set_custom_refresh` reads back and
|
||||
// returns what KWin *actually* achieved so the encoder paces to the real source rate (a
|
||||
// rejected custom mode leaves the output at 60 Hz). At ≤60 Hz there's nothing to install —
|
||||
// the source runs 60 Hz and the encoder downsamples — so carry the requested rate through.
|
||||
let achieved_hz = if mode.refresh_hz > 60 {
|
||||
set_custom_refresh(width, height, mode.refresh_hz)
|
||||
} else {
|
||||
mode.refresh_hz
|
||||
};
|
||||
Ok(VirtualOutput {
|
||||
node_id,
|
||||
remote_fd: None,
|
||||
preferred_mode: Some((mode.width, mode.height, mode.refresh_hz)),
|
||||
preferred_mode: Some((mode.width, mode.height, achieved_hz)),
|
||||
keepalive: Box::new(StopGuard(stop)),
|
||||
})
|
||||
}
|
||||
@@ -109,8 +114,11 @@ impl VirtualDisplay for KwinDisplay {
|
||||
|
||||
/// Best-effort: raise the just-created virtual output's refresh above KWin's default 60 Hz by
|
||||
/// installing + selecting a custom mode via `kscreen-doctor` (the output is `Virtual-<VOUT_NAME>`,
|
||||
/// refresh given in mHz). Failure leaves the source at 60 Hz — the stream still works, just capped.
|
||||
fn set_custom_refresh(width: u32, height: u32, hz: u32) {
|
||||
/// refresh given in mHz), then **read back the active mode** and return the refresh KWin actually
|
||||
/// gave us. The apply command can report success yet leave the output at 60 Hz (mode rejected),
|
||||
/// and a silent rate mismatch surfaces downstream as judder / duplicated frames — so the caller
|
||||
/// paces the encoder to the *achieved* rate, not the requested one.
|
||||
fn set_custom_refresh(width: u32, height: u32, hz: u32) -> u32 {
|
||||
let output = format!("Virtual-{VOUT_NAME}");
|
||||
let mhz = hz.saturating_mul(1000);
|
||||
let run = |arg: String| {
|
||||
@@ -124,17 +132,70 @@ fn set_custom_refresh(width: u32, height: u32, hz: u32) {
|
||||
let _ = run(format!(
|
||||
"output.{output}.addCustomMode.{width}.{height}.{mhz}.full"
|
||||
));
|
||||
if run(format!("output.{output}.mode.{width}x{height}@{hz}")) {
|
||||
tracing::info!(output, hz, "KWin virtual output: custom refresh applied");
|
||||
} else {
|
||||
tracing::warn!(
|
||||
output,
|
||||
hz,
|
||||
"kscreen-doctor refresh set failed — source stays 60 Hz (is kscreen-doctor installed?)"
|
||||
);
|
||||
let applied = run(format!("output.{output}.mode.{width}x{height}@{hz}"));
|
||||
match read_active_refresh(&output) {
|
||||
Some(achieved) if achieved >= hz => {
|
||||
tracing::info!(
|
||||
output,
|
||||
requested = hz,
|
||||
achieved,
|
||||
"KWin virtual output: custom refresh applied"
|
||||
);
|
||||
achieved
|
||||
}
|
||||
Some(achieved) => {
|
||||
tracing::warn!(
|
||||
output,
|
||||
requested = hz,
|
||||
achieved,
|
||||
applied,
|
||||
"KWin virtual output refresh below requested — pacing the encoder to the achieved \
|
||||
rate (custom-mode install rejected? is kscreen-doctor up to date?)"
|
||||
);
|
||||
achieved.max(1)
|
||||
}
|
||||
None => {
|
||||
tracing::warn!(
|
||||
output,
|
||||
requested = hz,
|
||||
applied,
|
||||
"could not read back KWin virtual output refresh — assuming 60 Hz (is \
|
||||
kscreen-doctor installed?)"
|
||||
);
|
||||
60
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Read the active refresh (Hz, rounded) of `output` from `kscreen-doctor -j`. `None` if the
|
||||
/// tool, the output, or its current mode can't be found. Mode/output ids come through as either
|
||||
/// JSON strings or numbers depending on the KWin version, so both are accepted.
|
||||
fn read_active_refresh(output: &str) -> Option<u32> {
|
||||
let out = std::process::Command::new("kscreen-doctor")
|
||||
.arg("-j")
|
||||
.output()
|
||||
.ok()?;
|
||||
let doc: serde_json::Value = serde_json::from_slice(&out.stdout).ok()?;
|
||||
let as_id = |v: &serde_json::Value| -> Option<String> {
|
||||
v.as_str()
|
||||
.map(|s| s.to_string())
|
||||
.or_else(|| v.as_u64().map(|n| n.to_string()))
|
||||
};
|
||||
let o = doc
|
||||
.get("outputs")?
|
||||
.as_array()?
|
||||
.iter()
|
||||
.find(|o| o.get("name").and_then(|n| n.as_str()) == Some(output))?;
|
||||
let current = o.get("currentModeId").and_then(as_id)?;
|
||||
let mode = o
|
||||
.get("modes")?
|
||||
.as_array()?
|
||||
.iter()
|
||||
.find(|m| m.get("id").and_then(as_id).as_deref() == Some(current.as_str()))?;
|
||||
let hz = mode.get("refreshRate").and_then(|r| r.as_f64())?;
|
||||
Some(hz.round() as u32)
|
||||
}
|
||||
|
||||
/// Dropping this releases the KWin virtual output: it flips the keepalive thread's `stop`, which
|
||||
/// drops the Wayland connection and makes KWin reclaim the output.
|
||||
struct StopGuard(Arc<AtomicBool>);
|
||||
|
||||
Reference in New Issue
Block a user