feat(headless-kde): reliable bring-up — readiness probe, fix portal ordering/env (roadmap #1 phase 1)
ci / rust (push) Has been cancelled
ci / rust (push) Has been cancelled
Headless KDE startup was a chain of timing-sensitive handoffs gated by a blind `sleep 2`, the dominant source of black screens. Phase-1 fixes: - New `punktfunk-host probe-compositor` subcommand: exits 0 iff the detected compositor is up AND ready to create a virtual output now. KWin gets a real check (connect + registry roundtrip + the privileged zkde_screencast global must be advertised — what the backend needs); gamescope/Mutter/wlroots create on demand so the probe just confirms Linux. (vdisplay::probe dispatcher + kwin::probe; reuses kwin.rs's existing roundtrip path.) - run-headless-kde.sh: replace `sleep 2` with an active readiness wait (poll probe-compositor until ready, 30s deadline, and bail with kwin's log if kwin_wayland exits during init). Move the portal restart to AFTER readiness, and precede it with `systemctl --user import-environment` + `dbus-update-activation-environment` (the missing env import — the Sway script does this; without it a restarted portal inherits a stale/empty WAYLAND_DISPLAY, which is the "streams but eats no input/audio" failure). kwin's stderr → a log file. Validated: probe-compositor exits 0 "Kwin ready" against the live session, exit 1 with a clear diagnostic when the compositor is absent. 114 tests green, clippy/fmt clean. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -27,7 +27,7 @@ mod vdisplay;
|
||||
#[cfg(target_os = "linux")]
|
||||
mod zerocopy;
|
||||
|
||||
use anyhow::{bail, Result};
|
||||
use anyhow::{bail, Context, Result};
|
||||
use encode::Codec;
|
||||
use m0::{Options, Source};
|
||||
use std::path::PathBuf;
|
||||
@@ -68,6 +68,15 @@ fn real_main() -> Result<()> {
|
||||
// Zero-copy FFI/GPU probe: init the EGL importer + CUDA context (no capture needed).
|
||||
#[cfg(target_os = "linux")]
|
||||
Some("zerocopy-probe") => zerocopy::probe(),
|
||||
// Compositor readiness probe: exit 0 iff the (detected or PUNKTFUNK_COMPOSITOR-forced)
|
||||
// compositor is up and able to create a virtual output *now*. A session-bringup
|
||||
// script polls this to gate on real readiness instead of a blind `sleep`.
|
||||
Some("probe-compositor") => {
|
||||
let compositor = vdisplay::detect()?;
|
||||
vdisplay::probe(compositor).with_context(|| format!("{compositor:?} not ready"))?;
|
||||
println!("{compositor:?} ready");
|
||||
Ok(())
|
||||
}
|
||||
// M0 pipeline spike.
|
||||
Some("m0") => m0::run(parse_m0(&args[1..])?),
|
||||
// M3: native punktfunk/1 host (QUIC control plane + UDP data plane).
|
||||
@@ -308,6 +317,7 @@ USAGE:
|
||||
+ the management REST API
|
||||
punktfunk-host openapi print the management API's OpenAPI document (codegen)
|
||||
punktfunk-host m3-host [OPTIONS] native punktfunk/1 host (QUIC control plane + UDP data plane)
|
||||
punktfunk-host probe-compositor exit 0 iff the compositor is up + ready (session-bringup gate)
|
||||
punktfunk-host m0 [OPTIONS] M0 capture→encode→file pipeline spike
|
||||
|
||||
SERVE OPTIONS:
|
||||
|
||||
@@ -111,6 +111,30 @@ pub fn open(compositor: Compositor) -> Result<Box<dyn VirtualDisplay>> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Readiness probe for `compositor`: is it up and able to create a virtual output *right
|
||||
/// now*? A session-bringup script polls this (via `punktfunk-host probe-compositor`) to gate
|
||||
/// on actual readiness instead of racing the compositor with a blind sleep.
|
||||
///
|
||||
/// KWin gets a real check (the privileged `zkde_screencast` global must be advertised). The
|
||||
/// others are spawn/D-Bus/portal-based and have no equivalent pre-flight global, so a probe
|
||||
/// just confirms the backend opens — `Ok(())` means "go ahead and try `create`".
|
||||
pub fn probe(compositor: Compositor) -> Result<()> {
|
||||
#[cfg(target_os = "linux")]
|
||||
{
|
||||
match compositor {
|
||||
Compositor::Kwin => kwin::probe(),
|
||||
// gamescope spawns its own nested session per `create`; Mutter is D-Bus on demand;
|
||||
// wlroots creates the output on demand — nothing to pre-check beyond "Linux".
|
||||
Compositor::Gamescope | Compositor::Mutter | Compositor::Wlroots => Ok(()),
|
||||
}
|
||||
}
|
||||
#[cfg(not(target_os = "linux"))]
|
||||
{
|
||||
let _ = compositor;
|
||||
anyhow::bail!("virtual displays require Linux (Wayland compositor)")
|
||||
}
|
||||
}
|
||||
|
||||
/// Path of the file where the gamescope backend relays the nested session's `LIBEI_SOCKET`
|
||||
/// (gamescope's EIS server) for the input injector.
|
||||
#[cfg(target_os = "linux")]
|
||||
|
||||
@@ -223,6 +223,29 @@ fn virtual_output_thread(
|
||||
}
|
||||
}
|
||||
|
||||
/// Readiness probe: connect to the KWin Wayland socket, roundtrip the registry, and confirm
|
||||
/// the privileged `zkde_screencast` global is actually advertised. This is exactly what
|
||||
/// [`run`] needs before it can create a virtual output, so a session-bringup script can poll
|
||||
/// this to gate on the compositor being *ready* (not merely the socket existing) instead of
|
||||
/// racing it with a blind sleep. `Ok(())` = ready; `Err` = not ready / no global yet.
|
||||
pub fn probe() -> Result<()> {
|
||||
let conn = Connection::connect_to_env()
|
||||
.context("connect to KWin Wayland (is WAYLAND_DISPLAY set to the KWin socket?)")?;
|
||||
let mut queue = conn.new_event_queue();
|
||||
let qh = queue.handle();
|
||||
let _registry = conn.display().get_registry(&qh, ());
|
||||
let mut state = State::default();
|
||||
queue.roundtrip(&mut state).context("registry roundtrip")?;
|
||||
if state.screencast.is_none() {
|
||||
bail!(
|
||||
"KWin is up but does not (yet) expose zkde_screencast_unstable_v1 — needs a real \
|
||||
KDE session (or KWIN_WAYLAND_NO_PERMISSION_CHECKS=1), and KWin ≥ 6.5.6 for the \
|
||||
headless virtual output"
|
||||
);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn run(
|
||||
width: u32,
|
||||
height: u32,
|
||||
|
||||
@@ -1,12 +1,11 @@
|
||||
#!/usr/bin/env bash
|
||||
# Headless KDE Plasma session for the punktfunk host (no KMS scanout → kwin --virtual).
|
||||
#
|
||||
# Brings up the full desktop, not just the compositor. The env matters: without
|
||||
# Brings up the full desktop, not just the compositor, and waits for it to be *actually
|
||||
# ready* before starting the portals/plasma — no blind `sleep`. The env matters: without
|
||||
# XDG_MENU_PREFIX=plasma- the launcher resolves ${XDG_MENU_PREFIX}applications.menu →
|
||||
# "applications.menu", which doesn't exist on KDE installs (it ships
|
||||
# plasma-applications.menu) — plasmashell runs fine but the menu shows NO applications
|
||||
# and no System Settings entry. kded6/krunner/kglobalacceld are D-Bus-activated once
|
||||
# plasmashell starts in the right session env.
|
||||
# plasma-applications.menu) — plasmashell runs fine but the menu shows NO applications.
|
||||
#
|
||||
# bash scripts/headless/run-headless-kde.sh [WxH] # default 1920x1080
|
||||
#
|
||||
@@ -29,18 +28,54 @@ export DESKTOP_SESSION=plasma
|
||||
export WAYLAND_DISPLAY=wayland-kde
|
||||
export KWIN_WAYLAND_NO_PERMISSION_CHECKS=1
|
||||
|
||||
kwin_wayland --virtual --width "$W" --height "$H" --no-lockscreen \
|
||||
--socket "$WAYLAND_DISPLAY" &
|
||||
KWIN_PID=$!
|
||||
sleep 2
|
||||
# The probe binary (gates readiness on KWin actually exposing zkde_screencast — not merely
|
||||
# the socket existing). Use a release build if present, else fall back to `cargo run`.
|
||||
ROOT="$(cd "$(dirname "$0")/../.." && pwd)"
|
||||
if [[ -x "$ROOT/target/release/punktfunk-host" ]]; then
|
||||
PROBE=("$ROOT/target/release/punktfunk-host" probe-compositor)
|
||||
elif [[ -x "$ROOT/target/debug/punktfunk-host" ]]; then
|
||||
PROBE=("$ROOT/target/debug/punktfunk-host" probe-compositor)
|
||||
else
|
||||
PROBE=(cargo run -q --manifest-path "$ROOT/Cargo.toml" -p punktfunk-host -- probe-compositor)
|
||||
fi
|
||||
|
||||
# The xdg-desktop-portal processes bind to the compositor that existed when THEY started;
|
||||
# after a kwin restart the stale instances point at a dead socket and RemoteDesktop/EIS
|
||||
# (mouse/keyboard injection) times out. Restart them against the fresh compositor.
|
||||
systemctl --user try-restart plasma-xdg-desktop-portal-kde.service xdg-desktop-portal.service 2>/dev/null || true
|
||||
# kwin to its own log (so its EGL/GPU-init errors are captured, not lost to the terminal).
|
||||
KWIN_LOG="${TMPDIR:-/tmp}/punktfunk-kwin.log"
|
||||
kwin_wayland --virtual --width "$W" --height "$H" --no-lockscreen \
|
||||
--socket "$WAYLAND_DISPLAY" >"$KWIN_LOG" 2>&1 &
|
||||
KWIN_PID=$!
|
||||
|
||||
# Active readiness wait: poll until KWin is up AND advertises the zkde_screencast global
|
||||
# (what the virtual-output backend needs), or fail fast with a useful message. kwin can also
|
||||
# exit immediately if EGL/GPU init fails — catch that.
|
||||
echo "waiting for KWin ($RES) to become ready…"
|
||||
DEADLINE=$(( SECONDS + 30 ))
|
||||
until "${PROBE[@]}" >/dev/null 2>&1; do
|
||||
if ! kill -0 "$KWIN_PID" 2>/dev/null; then
|
||||
echo "ERROR: kwin_wayland exited during startup — see $KWIN_LOG:" >&2
|
||||
tail -n 20 "$KWIN_LOG" >&2 || true
|
||||
exit 1
|
||||
fi
|
||||
if (( SECONDS >= DEADLINE )); then
|
||||
echo "ERROR: KWin did not become ready within 30s. Last probe:" >&2
|
||||
"${PROBE[@]}" >&2 || true
|
||||
exit 1
|
||||
fi
|
||||
sleep 0.5
|
||||
done
|
||||
echo "KWin ready."
|
||||
|
||||
# Only NOW restart the portals, and against the correct env: the xdg-desktop-portal chain
|
||||
# binds the compositor that existed when it started, so a stale portal points at a dead
|
||||
# socket and RemoteDesktop/EIS (input injection) times out. Import the session env into the
|
||||
# systemd/D-Bus activation environment FIRST (the missing piece — the Sway script does this;
|
||||
# without it the restarted portal can inherit an empty WAYLAND_DISPLAY), then restart.
|
||||
systemctl --user import-environment WAYLAND_DISPLAY XDG_CURRENT_DESKTOP DBUS_SESSION_BUS_ADDRESS XDG_RUNTIME_DIR 2>/dev/null || true
|
||||
dbus-update-activation-environment --systemd WAYLAND_DISPLAY XDG_CURRENT_DESKTOP DBUS_SESSION_BUS_ADDRESS 2>/dev/null || true
|
||||
systemctl --user try-restart plasma-xdg-desktop-portal-kde.service xdg-desktop-portal-kde.service xdg-desktop-portal.service 2>/dev/null || true
|
||||
|
||||
kbuildsycoca6 >/dev/null 2>&1 || true # rebuild the menu cache under the correct env
|
||||
plasmashell &
|
||||
|
||||
echo "headless KDE up on $WAYLAND_DISPLAY ($RES), kwin pid $KWIN_PID"
|
||||
echo "headless KDE up on $WAYLAND_DISPLAY ($RES), kwin pid $KWIN_PID (log: $KWIN_LOG)"
|
||||
wait "$KWIN_PID"
|
||||
|
||||
Reference in New Issue
Block a user