diff --git a/crates/punktfunk-host/src/main.rs b/crates/punktfunk-host/src/main.rs index ff2d30f..043c432 100644 --- a/crates/punktfunk-host/src/main.rs +++ b/crates/punktfunk-host/src/main.rs @@ -27,7 +27,7 @@ mod vdisplay; #[cfg(target_os = "linux")] mod zerocopy; -use anyhow::{bail, Result}; +use anyhow::{bail, Context, Result}; use encode::Codec; use m0::{Options, Source}; use std::path::PathBuf; @@ -68,6 +68,15 @@ fn real_main() -> Result<()> { // Zero-copy FFI/GPU probe: init the EGL importer + CUDA context (no capture needed). #[cfg(target_os = "linux")] Some("zerocopy-probe") => zerocopy::probe(), + // Compositor readiness probe: exit 0 iff the (detected or PUNKTFUNK_COMPOSITOR-forced) + // compositor is up and able to create a virtual output *now*. A session-bringup + // script polls this to gate on real readiness instead of a blind `sleep`. + Some("probe-compositor") => { + let compositor = vdisplay::detect()?; + vdisplay::probe(compositor).with_context(|| format!("{compositor:?} not ready"))?; + println!("{compositor:?} ready"); + Ok(()) + } // M0 pipeline spike. Some("m0") => m0::run(parse_m0(&args[1..])?), // M3: native punktfunk/1 host (QUIC control plane + UDP data plane). @@ -308,6 +317,7 @@ USAGE: + the management REST API punktfunk-host openapi print the management API's OpenAPI document (codegen) punktfunk-host m3-host [OPTIONS] native punktfunk/1 host (QUIC control plane + UDP data plane) + punktfunk-host probe-compositor exit 0 iff the compositor is up + ready (session-bringup gate) punktfunk-host m0 [OPTIONS] M0 capture→encode→file pipeline spike SERVE OPTIONS: diff --git a/crates/punktfunk-host/src/vdisplay.rs b/crates/punktfunk-host/src/vdisplay.rs index c771b02..b5a63cb 100644 --- a/crates/punktfunk-host/src/vdisplay.rs +++ b/crates/punktfunk-host/src/vdisplay.rs @@ -111,6 +111,30 @@ pub fn open(compositor: Compositor) -> Result> { } } +/// Readiness probe for `compositor`: is it up and able to create a virtual output *right +/// now*? A session-bringup script polls this (via `punktfunk-host probe-compositor`) to gate +/// on actual readiness instead of racing the compositor with a blind sleep. +/// +/// KWin gets a real check (the privileged `zkde_screencast` global must be advertised). The +/// others are spawn/D-Bus/portal-based and have no equivalent pre-flight global, so a probe +/// just confirms the backend opens — `Ok(())` means "go ahead and try `create`". +pub fn probe(compositor: Compositor) -> Result<()> { + #[cfg(target_os = "linux")] + { + match compositor { + Compositor::Kwin => kwin::probe(), + // gamescope spawns its own nested session per `create`; Mutter is D-Bus on demand; + // wlroots creates the output on demand — nothing to pre-check beyond "Linux". + Compositor::Gamescope | Compositor::Mutter | Compositor::Wlroots => Ok(()), + } + } + #[cfg(not(target_os = "linux"))] + { + let _ = compositor; + anyhow::bail!("virtual displays require Linux (Wayland compositor)") + } +} + /// Path of the file where the gamescope backend relays the nested session's `LIBEI_SOCKET` /// (gamescope's EIS server) for the input injector. #[cfg(target_os = "linux")] diff --git a/crates/punktfunk-host/src/vdisplay/kwin.rs b/crates/punktfunk-host/src/vdisplay/kwin.rs index 8d2a85e..931a153 100644 --- a/crates/punktfunk-host/src/vdisplay/kwin.rs +++ b/crates/punktfunk-host/src/vdisplay/kwin.rs @@ -223,6 +223,29 @@ fn virtual_output_thread( } } +/// Readiness probe: connect to the KWin Wayland socket, roundtrip the registry, and confirm +/// the privileged `zkde_screencast` global is actually advertised. This is exactly what +/// [`run`] needs before it can create a virtual output, so a session-bringup script can poll +/// this to gate on the compositor being *ready* (not merely the socket existing) instead of +/// racing it with a blind sleep. `Ok(())` = ready; `Err` = not ready / no global yet. +pub fn probe() -> Result<()> { + let conn = Connection::connect_to_env() + .context("connect to KWin Wayland (is WAYLAND_DISPLAY set to the KWin socket?)")?; + let mut queue = conn.new_event_queue(); + let qh = queue.handle(); + let _registry = conn.display().get_registry(&qh, ()); + let mut state = State::default(); + queue.roundtrip(&mut state).context("registry roundtrip")?; + if state.screencast.is_none() { + bail!( + "KWin is up but does not (yet) expose zkde_screencast_unstable_v1 — needs a real \ + KDE session (or KWIN_WAYLAND_NO_PERMISSION_CHECKS=1), and KWin ≥ 6.5.6 for the \ + headless virtual output" + ); + } + Ok(()) +} + fn run( width: u32, height: u32, diff --git a/scripts/headless/run-headless-kde.sh b/scripts/headless/run-headless-kde.sh index bba4443..04cbeab 100755 --- a/scripts/headless/run-headless-kde.sh +++ b/scripts/headless/run-headless-kde.sh @@ -1,12 +1,11 @@ #!/usr/bin/env bash # Headless KDE Plasma session for the punktfunk host (no KMS scanout → kwin --virtual). # -# Brings up the full desktop, not just the compositor. The env matters: without +# Brings up the full desktop, not just the compositor, and waits for it to be *actually +# ready* before starting the portals/plasma — no blind `sleep`. The env matters: without # XDG_MENU_PREFIX=plasma- the launcher resolves ${XDG_MENU_PREFIX}applications.menu → # "applications.menu", which doesn't exist on KDE installs (it ships -# plasma-applications.menu) — plasmashell runs fine but the menu shows NO applications -# and no System Settings entry. kded6/krunner/kglobalacceld are D-Bus-activated once -# plasmashell starts in the right session env. +# plasma-applications.menu) — plasmashell runs fine but the menu shows NO applications. # # bash scripts/headless/run-headless-kde.sh [WxH] # default 1920x1080 # @@ -29,18 +28,54 @@ export DESKTOP_SESSION=plasma export WAYLAND_DISPLAY=wayland-kde export KWIN_WAYLAND_NO_PERMISSION_CHECKS=1 -kwin_wayland --virtual --width "$W" --height "$H" --no-lockscreen \ - --socket "$WAYLAND_DISPLAY" & -KWIN_PID=$! -sleep 2 +# The probe binary (gates readiness on KWin actually exposing zkde_screencast — not merely +# the socket existing). Use a release build if present, else fall back to `cargo run`. +ROOT="$(cd "$(dirname "$0")/../.." && pwd)" +if [[ -x "$ROOT/target/release/punktfunk-host" ]]; then + PROBE=("$ROOT/target/release/punktfunk-host" probe-compositor) +elif [[ -x "$ROOT/target/debug/punktfunk-host" ]]; then + PROBE=("$ROOT/target/debug/punktfunk-host" probe-compositor) +else + PROBE=(cargo run -q --manifest-path "$ROOT/Cargo.toml" -p punktfunk-host -- probe-compositor) +fi -# The xdg-desktop-portal processes bind to the compositor that existed when THEY started; -# after a kwin restart the stale instances point at a dead socket and RemoteDesktop/EIS -# (mouse/keyboard injection) times out. Restart them against the fresh compositor. -systemctl --user try-restart plasma-xdg-desktop-portal-kde.service xdg-desktop-portal.service 2>/dev/null || true +# kwin to its own log (so its EGL/GPU-init errors are captured, not lost to the terminal). +KWIN_LOG="${TMPDIR:-/tmp}/punktfunk-kwin.log" +kwin_wayland --virtual --width "$W" --height "$H" --no-lockscreen \ + --socket "$WAYLAND_DISPLAY" >"$KWIN_LOG" 2>&1 & +KWIN_PID=$! + +# Active readiness wait: poll until KWin is up AND advertises the zkde_screencast global +# (what the virtual-output backend needs), or fail fast with a useful message. kwin can also +# exit immediately if EGL/GPU init fails — catch that. +echo "waiting for KWin ($RES) to become ready…" +DEADLINE=$(( SECONDS + 30 )) +until "${PROBE[@]}" >/dev/null 2>&1; do + if ! kill -0 "$KWIN_PID" 2>/dev/null; then + echo "ERROR: kwin_wayland exited during startup — see $KWIN_LOG:" >&2 + tail -n 20 "$KWIN_LOG" >&2 || true + exit 1 + fi + if (( SECONDS >= DEADLINE )); then + echo "ERROR: KWin did not become ready within 30s. Last probe:" >&2 + "${PROBE[@]}" >&2 || true + exit 1 + fi + sleep 0.5 +done +echo "KWin ready." + +# Only NOW restart the portals, and against the correct env: the xdg-desktop-portal chain +# binds the compositor that existed when it started, so a stale portal points at a dead +# socket and RemoteDesktop/EIS (input injection) times out. Import the session env into the +# systemd/D-Bus activation environment FIRST (the missing piece — the Sway script does this; +# without it the restarted portal can inherit an empty WAYLAND_DISPLAY), then restart. +systemctl --user import-environment WAYLAND_DISPLAY XDG_CURRENT_DESKTOP DBUS_SESSION_BUS_ADDRESS XDG_RUNTIME_DIR 2>/dev/null || true +dbus-update-activation-environment --systemd WAYLAND_DISPLAY XDG_CURRENT_DESKTOP DBUS_SESSION_BUS_ADDRESS 2>/dev/null || true +systemctl --user try-restart plasma-xdg-desktop-portal-kde.service xdg-desktop-portal-kde.service xdg-desktop-portal.service 2>/dev/null || true kbuildsycoca6 >/dev/null 2>&1 || true # rebuild the menu cache under the correct env plasmashell & -echo "headless KDE up on $WAYLAND_DISPLAY ($RES), kwin pid $KWIN_PID" +echo "headless KDE up on $WAYLAND_DISPLAY ($RES), kwin pid $KWIN_PID (log: $KWIN_LOG)" wait "$KWIN_PID"