feat(headless-kde): reliable bring-up — readiness probe, fix portal ordering/env (roadmap #1 phase 1)
ci / rust (push) Has been cancelled
ci / rust (push) Has been cancelled
Headless KDE startup was a chain of timing-sensitive handoffs gated by a blind `sleep 2`, the dominant source of black screens. Phase-1 fixes: - New `punktfunk-host probe-compositor` subcommand: exits 0 iff the detected compositor is up AND ready to create a virtual output now. KWin gets a real check (connect + registry roundtrip + the privileged zkde_screencast global must be advertised — what the backend needs); gamescope/Mutter/wlroots create on demand so the probe just confirms Linux. (vdisplay::probe dispatcher + kwin::probe; reuses kwin.rs's existing roundtrip path.) - run-headless-kde.sh: replace `sleep 2` with an active readiness wait (poll probe-compositor until ready, 30s deadline, and bail with kwin's log if kwin_wayland exits during init). Move the portal restart to AFTER readiness, and precede it with `systemctl --user import-environment` + `dbus-update-activation-environment` (the missing env import — the Sway script does this; without it a restarted portal inherits a stale/empty WAYLAND_DISPLAY, which is the "streams but eats no input/audio" failure). kwin's stderr → a log file. Validated: probe-compositor exits 0 "Kwin ready" against the live session, exit 1 with a clear diagnostic when the compositor is absent. 114 tests green, clippy/fmt clean. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -27,7 +27,7 @@ mod vdisplay;
|
|||||||
#[cfg(target_os = "linux")]
|
#[cfg(target_os = "linux")]
|
||||||
mod zerocopy;
|
mod zerocopy;
|
||||||
|
|
||||||
use anyhow::{bail, Result};
|
use anyhow::{bail, Context, Result};
|
||||||
use encode::Codec;
|
use encode::Codec;
|
||||||
use m0::{Options, Source};
|
use m0::{Options, Source};
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
@@ -68,6 +68,15 @@ fn real_main() -> Result<()> {
|
|||||||
// Zero-copy FFI/GPU probe: init the EGL importer + CUDA context (no capture needed).
|
// Zero-copy FFI/GPU probe: init the EGL importer + CUDA context (no capture needed).
|
||||||
#[cfg(target_os = "linux")]
|
#[cfg(target_os = "linux")]
|
||||||
Some("zerocopy-probe") => zerocopy::probe(),
|
Some("zerocopy-probe") => zerocopy::probe(),
|
||||||
|
// Compositor readiness probe: exit 0 iff the (detected or PUNKTFUNK_COMPOSITOR-forced)
|
||||||
|
// compositor is up and able to create a virtual output *now*. A session-bringup
|
||||||
|
// script polls this to gate on real readiness instead of a blind `sleep`.
|
||||||
|
Some("probe-compositor") => {
|
||||||
|
let compositor = vdisplay::detect()?;
|
||||||
|
vdisplay::probe(compositor).with_context(|| format!("{compositor:?} not ready"))?;
|
||||||
|
println!("{compositor:?} ready");
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
// M0 pipeline spike.
|
// M0 pipeline spike.
|
||||||
Some("m0") => m0::run(parse_m0(&args[1..])?),
|
Some("m0") => m0::run(parse_m0(&args[1..])?),
|
||||||
// M3: native punktfunk/1 host (QUIC control plane + UDP data plane).
|
// M3: native punktfunk/1 host (QUIC control plane + UDP data plane).
|
||||||
@@ -308,6 +317,7 @@ USAGE:
|
|||||||
+ the management REST API
|
+ the management REST API
|
||||||
punktfunk-host openapi print the management API's OpenAPI document (codegen)
|
punktfunk-host openapi print the management API's OpenAPI document (codegen)
|
||||||
punktfunk-host m3-host [OPTIONS] native punktfunk/1 host (QUIC control plane + UDP data plane)
|
punktfunk-host m3-host [OPTIONS] native punktfunk/1 host (QUIC control plane + UDP data plane)
|
||||||
|
punktfunk-host probe-compositor exit 0 iff the compositor is up + ready (session-bringup gate)
|
||||||
punktfunk-host m0 [OPTIONS] M0 capture→encode→file pipeline spike
|
punktfunk-host m0 [OPTIONS] M0 capture→encode→file pipeline spike
|
||||||
|
|
||||||
SERVE OPTIONS:
|
SERVE OPTIONS:
|
||||||
|
|||||||
@@ -111,6 +111,30 @@ pub fn open(compositor: Compositor) -> Result<Box<dyn VirtualDisplay>> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Readiness probe for `compositor`: is it up and able to create a virtual output *right
|
||||||
|
/// now*? A session-bringup script polls this (via `punktfunk-host probe-compositor`) to gate
|
||||||
|
/// on actual readiness instead of racing the compositor with a blind sleep.
|
||||||
|
///
|
||||||
|
/// KWin gets a real check (the privileged `zkde_screencast` global must be advertised). The
|
||||||
|
/// others are spawn/D-Bus/portal-based and have no equivalent pre-flight global, so a probe
|
||||||
|
/// just confirms the backend opens — `Ok(())` means "go ahead and try `create`".
|
||||||
|
pub fn probe(compositor: Compositor) -> Result<()> {
|
||||||
|
#[cfg(target_os = "linux")]
|
||||||
|
{
|
||||||
|
match compositor {
|
||||||
|
Compositor::Kwin => kwin::probe(),
|
||||||
|
// gamescope spawns its own nested session per `create`; Mutter is D-Bus on demand;
|
||||||
|
// wlroots creates the output on demand — nothing to pre-check beyond "Linux".
|
||||||
|
Compositor::Gamescope | Compositor::Mutter | Compositor::Wlroots => Ok(()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#[cfg(not(target_os = "linux"))]
|
||||||
|
{
|
||||||
|
let _ = compositor;
|
||||||
|
anyhow::bail!("virtual displays require Linux (Wayland compositor)")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Path of the file where the gamescope backend relays the nested session's `LIBEI_SOCKET`
|
/// Path of the file where the gamescope backend relays the nested session's `LIBEI_SOCKET`
|
||||||
/// (gamescope's EIS server) for the input injector.
|
/// (gamescope's EIS server) for the input injector.
|
||||||
#[cfg(target_os = "linux")]
|
#[cfg(target_os = "linux")]
|
||||||
|
|||||||
@@ -223,6 +223,29 @@ fn virtual_output_thread(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Readiness probe: connect to the KWin Wayland socket, roundtrip the registry, and confirm
|
||||||
|
/// the privileged `zkde_screencast` global is actually advertised. This is exactly what
|
||||||
|
/// [`run`] needs before it can create a virtual output, so a session-bringup script can poll
|
||||||
|
/// this to gate on the compositor being *ready* (not merely the socket existing) instead of
|
||||||
|
/// racing it with a blind sleep. `Ok(())` = ready; `Err` = not ready / no global yet.
|
||||||
|
pub fn probe() -> Result<()> {
|
||||||
|
let conn = Connection::connect_to_env()
|
||||||
|
.context("connect to KWin Wayland (is WAYLAND_DISPLAY set to the KWin socket?)")?;
|
||||||
|
let mut queue = conn.new_event_queue();
|
||||||
|
let qh = queue.handle();
|
||||||
|
let _registry = conn.display().get_registry(&qh, ());
|
||||||
|
let mut state = State::default();
|
||||||
|
queue.roundtrip(&mut state).context("registry roundtrip")?;
|
||||||
|
if state.screencast.is_none() {
|
||||||
|
bail!(
|
||||||
|
"KWin is up but does not (yet) expose zkde_screencast_unstable_v1 — needs a real \
|
||||||
|
KDE session (or KWIN_WAYLAND_NO_PERMISSION_CHECKS=1), and KWin ≥ 6.5.6 for the \
|
||||||
|
headless virtual output"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
fn run(
|
fn run(
|
||||||
width: u32,
|
width: u32,
|
||||||
height: u32,
|
height: u32,
|
||||||
|
|||||||
@@ -1,12 +1,11 @@
|
|||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
# Headless KDE Plasma session for the punktfunk host (no KMS scanout → kwin --virtual).
|
# Headless KDE Plasma session for the punktfunk host (no KMS scanout → kwin --virtual).
|
||||||
#
|
#
|
||||||
# Brings up the full desktop, not just the compositor. The env matters: without
|
# Brings up the full desktop, not just the compositor, and waits for it to be *actually
|
||||||
|
# ready* before starting the portals/plasma — no blind `sleep`. The env matters: without
|
||||||
# XDG_MENU_PREFIX=plasma- the launcher resolves ${XDG_MENU_PREFIX}applications.menu →
|
# XDG_MENU_PREFIX=plasma- the launcher resolves ${XDG_MENU_PREFIX}applications.menu →
|
||||||
# "applications.menu", which doesn't exist on KDE installs (it ships
|
# "applications.menu", which doesn't exist on KDE installs (it ships
|
||||||
# plasma-applications.menu) — plasmashell runs fine but the menu shows NO applications
|
# plasma-applications.menu) — plasmashell runs fine but the menu shows NO applications.
|
||||||
# and no System Settings entry. kded6/krunner/kglobalacceld are D-Bus-activated once
|
|
||||||
# plasmashell starts in the right session env.
|
|
||||||
#
|
#
|
||||||
# bash scripts/headless/run-headless-kde.sh [WxH] # default 1920x1080
|
# bash scripts/headless/run-headless-kde.sh [WxH] # default 1920x1080
|
||||||
#
|
#
|
||||||
@@ -29,18 +28,54 @@ export DESKTOP_SESSION=plasma
|
|||||||
export WAYLAND_DISPLAY=wayland-kde
|
export WAYLAND_DISPLAY=wayland-kde
|
||||||
export KWIN_WAYLAND_NO_PERMISSION_CHECKS=1
|
export KWIN_WAYLAND_NO_PERMISSION_CHECKS=1
|
||||||
|
|
||||||
kwin_wayland --virtual --width "$W" --height "$H" --no-lockscreen \
|
# The probe binary (gates readiness on KWin actually exposing zkde_screencast — not merely
|
||||||
--socket "$WAYLAND_DISPLAY" &
|
# the socket existing). Use a release build if present, else fall back to `cargo run`.
|
||||||
KWIN_PID=$!
|
ROOT="$(cd "$(dirname "$0")/../.." && pwd)"
|
||||||
sleep 2
|
if [[ -x "$ROOT/target/release/punktfunk-host" ]]; then
|
||||||
|
PROBE=("$ROOT/target/release/punktfunk-host" probe-compositor)
|
||||||
|
elif [[ -x "$ROOT/target/debug/punktfunk-host" ]]; then
|
||||||
|
PROBE=("$ROOT/target/debug/punktfunk-host" probe-compositor)
|
||||||
|
else
|
||||||
|
PROBE=(cargo run -q --manifest-path "$ROOT/Cargo.toml" -p punktfunk-host -- probe-compositor)
|
||||||
|
fi
|
||||||
|
|
||||||
# The xdg-desktop-portal processes bind to the compositor that existed when THEY started;
|
# kwin to its own log (so its EGL/GPU-init errors are captured, not lost to the terminal).
|
||||||
# after a kwin restart the stale instances point at a dead socket and RemoteDesktop/EIS
|
KWIN_LOG="${TMPDIR:-/tmp}/punktfunk-kwin.log"
|
||||||
# (mouse/keyboard injection) times out. Restart them against the fresh compositor.
|
kwin_wayland --virtual --width "$W" --height "$H" --no-lockscreen \
|
||||||
systemctl --user try-restart plasma-xdg-desktop-portal-kde.service xdg-desktop-portal.service 2>/dev/null || true
|
--socket "$WAYLAND_DISPLAY" >"$KWIN_LOG" 2>&1 &
|
||||||
|
KWIN_PID=$!
|
||||||
|
|
||||||
|
# Active readiness wait: poll until KWin is up AND advertises the zkde_screencast global
|
||||||
|
# (what the virtual-output backend needs), or fail fast with a useful message. kwin can also
|
||||||
|
# exit immediately if EGL/GPU init fails — catch that.
|
||||||
|
echo "waiting for KWin ($RES) to become ready…"
|
||||||
|
DEADLINE=$(( SECONDS + 30 ))
|
||||||
|
until "${PROBE[@]}" >/dev/null 2>&1; do
|
||||||
|
if ! kill -0 "$KWIN_PID" 2>/dev/null; then
|
||||||
|
echo "ERROR: kwin_wayland exited during startup — see $KWIN_LOG:" >&2
|
||||||
|
tail -n 20 "$KWIN_LOG" >&2 || true
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
if (( SECONDS >= DEADLINE )); then
|
||||||
|
echo "ERROR: KWin did not become ready within 30s. Last probe:" >&2
|
||||||
|
"${PROBE[@]}" >&2 || true
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
sleep 0.5
|
||||||
|
done
|
||||||
|
echo "KWin ready."
|
||||||
|
|
||||||
|
# Only NOW restart the portals, and against the correct env: the xdg-desktop-portal chain
|
||||||
|
# binds the compositor that existed when it started, so a stale portal points at a dead
|
||||||
|
# socket and RemoteDesktop/EIS (input injection) times out. Import the session env into the
|
||||||
|
# systemd/D-Bus activation environment FIRST (the missing piece — the Sway script does this;
|
||||||
|
# without it the restarted portal can inherit an empty WAYLAND_DISPLAY), then restart.
|
||||||
|
systemctl --user import-environment WAYLAND_DISPLAY XDG_CURRENT_DESKTOP DBUS_SESSION_BUS_ADDRESS XDG_RUNTIME_DIR 2>/dev/null || true
|
||||||
|
dbus-update-activation-environment --systemd WAYLAND_DISPLAY XDG_CURRENT_DESKTOP DBUS_SESSION_BUS_ADDRESS 2>/dev/null || true
|
||||||
|
systemctl --user try-restart plasma-xdg-desktop-portal-kde.service xdg-desktop-portal-kde.service xdg-desktop-portal.service 2>/dev/null || true
|
||||||
|
|
||||||
kbuildsycoca6 >/dev/null 2>&1 || true # rebuild the menu cache under the correct env
|
kbuildsycoca6 >/dev/null 2>&1 || true # rebuild the menu cache under the correct env
|
||||||
plasmashell &
|
plasmashell &
|
||||||
|
|
||||||
echo "headless KDE up on $WAYLAND_DISPLAY ($RES), kwin pid $KWIN_PID"
|
echo "headless KDE up on $WAYLAND_DISPLAY ($RES), kwin pid $KWIN_PID (log: $KWIN_LOG)"
|
||||||
wait "$KWIN_PID"
|
wait "$KWIN_PID"
|
||||||
|
|||||||
Reference in New Issue
Block a user