feat(apple): explicit input-capture state machine — no more cursor grabs on window chrome
ci / rust (push) Has been cancelled
ci / rust (push) Has been cancelled
Capture used to engage whenever the app became active, so the click that activates the window — on the title bar (a drag) or a resize edge — got the cursor warped away mid-gesture, and raw deltas kept streaming to the host while the user fought the window. Reworked Moonlight-style, with capture as a deliberate, reversible state owned by StreamLayerView: - Engage: automatically once when the stream starts / trust is confirmed (one-shot, can never fire surprisingly later), or by clicking into the video (that click's press/release are suppressed toward the host; acceptsFirstMouse makes it one click from another app). NEVER on app re-activation. - Release: ⌘⎋ (toggles, key-window-scoped), focus loss — now including same-app window switches (⌘, / ⌘N / ⌘M resign key without resigning the app; previously the new window inherited a hidden frozen cursor and its typing was double-delivered to the host) — and disconnect. - While released: nothing is forwarded (InputCapture.forwarding gates the GC handlers; held keys/buttons are flushed host-side so nothing sticks), the cursor is free, and the HUD (now showing the capture state) is clickable. - The no-beep behavior moved from the NSEvent monitor to first-responder key consumption — swallowing at the monitor risked starving GC's own delivery (the "input broken altogether" report). The monitor now only intercepts ⌘⎋. - Adversarial-review fixes: a second session preempts the previous one cleanly instead of leaving it captured with dead GC handlers (onPreempted); the engage click's suppression latch can't outlive the click (mouseUp backstop); ⌘⎋'s physical Esc can't type into the host in either toggle direction (suppressedVK latch + Esc-while-⌘ guard); capture callbacks defer out of the SwiftUI update pass. Validated live against the box: 16185 input datagrams injected during a captured session (gamescope EIS), title-bar drag/resize free while released, and visible cursor + typing on a streamed KWin desktop, all user-confirmed. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
@@ -5,6 +5,13 @@
|
||||
// zero-copy on Apple silicon. Stage 2 (explicit VTDecompressionSession + CAMetalLayer)
|
||||
// replaces this when we start tuning frame pacing / measuring glass-to-glass.
|
||||
//
|
||||
// The view also owns the input-capture state machine (Moonlight-style): capture is a
|
||||
// deliberate, reversible state — engaged when the stream starts and when the user clicks
|
||||
// into the video, released by ⌘⎋ or focus loss, and NEVER engaged by mere app
|
||||
// activation (the click that activates the window may be a title-bar drag or a resize —
|
||||
// warping the cursor there is exactly the intrusiveness this design removes). While
|
||||
// released, nothing is forwarded to the host and the local cursor is free.
|
||||
//
|
||||
// macOS-first (NSViewRepresentable); the iOS variant is the same layer under
|
||||
// UIViewRepresentable.
|
||||
|
||||
@@ -13,10 +20,10 @@ import AppKit
|
||||
import AVFoundation
|
||||
import SwiftUI
|
||||
|
||||
/// Hides the LOCAL cursor while streaming. The host renders its own cursor, and the local
|
||||
/// Hides the LOCAL cursor while captured. The host renders its own cursor, and the local
|
||||
/// one both diverges from it (the host applies acceleration/clamping to our raw deltas)
|
||||
/// and can wander out of the window — a click there would focus another app. So while the
|
||||
/// stream has focus we do what Moonlight does: warp the cursor into the view, freeze it
|
||||
/// and can wander out of the window — a click there would focus another app. So while
|
||||
/// captured we do what Moonlight does: warp the cursor into the view, freeze it
|
||||
/// (`CGAssociateMouseAndMouseCursorPosition(false)` — GCMouse still delivers raw HID
|
||||
/// deltas), and hide it. hide/unhide and associate are balanced via `captured`.
|
||||
private final class CursorCapture {
|
||||
@@ -44,34 +51,41 @@ private final class CursorCapture {
|
||||
|
||||
public struct StreamView: NSViewRepresentable {
|
||||
private let connection: PunktfunkConnection
|
||||
private let capturesCursor: Bool
|
||||
private let captureEnabled: Bool
|
||||
private let onCaptureChange: ((Bool) -> Void)?
|
||||
private let onFrame: (@Sendable (AccessUnit) -> Void)?
|
||||
private let onSessionEnd: (@Sendable () -> Void)?
|
||||
|
||||
/// `onFrame`/`onSessionEnd` fire on the pump thread — hop to the main actor for UI.
|
||||
/// `capturesCursor: false` keeps the local cursor usable while UI (e.g. a trust
|
||||
/// prompt) is layered over the stream; flip it to true to enter capture.
|
||||
/// `captureEnabled: false` disables input capture entirely while UI (e.g. a trust
|
||||
/// prompt) is layered over the stream; flipping it to true auto-engages capture
|
||||
/// once. `onCaptureChange` (main thread) reports engage/release — drive the HUD's
|
||||
/// "click to capture" / "⌘⎋ releases" hint with it.
|
||||
public init(
|
||||
connection: PunktfunkConnection,
|
||||
capturesCursor: Bool = true,
|
||||
captureEnabled: Bool = true,
|
||||
onCaptureChange: ((Bool) -> Void)? = nil,
|
||||
onFrame: (@Sendable (AccessUnit) -> Void)? = nil,
|
||||
onSessionEnd: (@Sendable () -> Void)? = nil
|
||||
) {
|
||||
self.connection = connection
|
||||
self.capturesCursor = capturesCursor
|
||||
self.captureEnabled = captureEnabled
|
||||
self.onCaptureChange = onCaptureChange
|
||||
self.onFrame = onFrame
|
||||
self.onSessionEnd = onSessionEnd
|
||||
}
|
||||
|
||||
public func makeNSView(context: Context) -> StreamLayerView {
|
||||
let view = StreamLayerView()
|
||||
view.capturesCursor = capturesCursor
|
||||
view.onCaptureChange = onCaptureChange
|
||||
view.captureEnabled = captureEnabled
|
||||
view.start(connection: connection, onFrame: onFrame, onSessionEnd: onSessionEnd)
|
||||
return view
|
||||
}
|
||||
|
||||
public func updateNSView(_ view: StreamLayerView, context: Context) {
|
||||
view.capturesCursor = capturesCursor
|
||||
view.onCaptureChange = onCaptureChange
|
||||
view.captureEnabled = captureEnabled
|
||||
// SwiftUI reuses the NSView across state changes — repoint the pump only when the
|
||||
// connection identity actually changed.
|
||||
if view.connection !== connection {
|
||||
@@ -106,16 +120,30 @@ public final class StreamLayerView: NSView {
|
||||
private var token: PumpToken?
|
||||
public private(set) var connection: PunktfunkConnection?
|
||||
private let cursorCapture = CursorCapture()
|
||||
private var inputCapture: InputCapture?
|
||||
private var appObservers: [NSObjectProtocol] = []
|
||||
private var windowObservers: [NSObjectProtocol] = []
|
||||
|
||||
/// Main-thread only. False = leave the local cursor alone (UI layered over the
|
||||
/// stream); switching back to true re-enters capture immediately.
|
||||
public var capturesCursor = true {
|
||||
/// Whether input capture is currently engaged (cursor hidden+frozen, mouse/keyboard
|
||||
/// forwarded). Main-thread only.
|
||||
public private(set) var captured = false
|
||||
/// One-shot auto-engage request (stream start, trust confirmed) — attempted as soon
|
||||
/// as the view is in a window with real bounds, then dropped, so it can never fire
|
||||
/// surprisingly later (e.g. on a resize).
|
||||
private var pendingAutoCapture = false
|
||||
|
||||
/// Reports engage/release on the main thread.
|
||||
public var onCaptureChange: ((Bool) -> Void)?
|
||||
|
||||
/// Main-thread only. False = input capture disabled outright (UI layered over the
|
||||
/// stream); flipping to true auto-engages once.
|
||||
public var captureEnabled = true {
|
||||
didSet {
|
||||
if capturesCursor {
|
||||
captureCursorIfStreaming()
|
||||
guard captureEnabled != oldValue else { return }
|
||||
if captureEnabled {
|
||||
requestAutoCapture()
|
||||
} else {
|
||||
cursorCapture.release()
|
||||
releaseCapture()
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -125,17 +153,13 @@ public final class StreamLayerView: NSView {
|
||||
displayLayer.videoGravity = .resizeAspect
|
||||
layer = displayLayer // layer-hosting: assign before wantsLayer
|
||||
wantsLayer = true
|
||||
// The cursor comes back whenever the app loses focus (Cmd+Tab is the escape
|
||||
// hatch) and is re-captured when the stream regains it.
|
||||
// Focus loss releases capture. Becoming active does NOT re-engage: the click
|
||||
// that activates the window may be on the title bar (a drag) or a resize edge —
|
||||
// the user clicks into the video (or hits ⌘⎋) when they want capture back.
|
||||
appObservers.append(NotificationCenter.default.addObserver(
|
||||
forName: NSApplication.didResignActiveNotification, object: nil, queue: .main
|
||||
) { [weak self] _ in
|
||||
self?.cursorCapture.release()
|
||||
})
|
||||
appObservers.append(NotificationCenter.default.addObserver(
|
||||
forName: NSApplication.didBecomeActiveNotification, object: nil, queue: .main
|
||||
) { [weak self] _ in
|
||||
self?.captureCursorIfStreaming()
|
||||
self?.releaseCapture()
|
||||
})
|
||||
}
|
||||
|
||||
@@ -143,18 +167,111 @@ public final class StreamLayerView: NSView {
|
||||
|
||||
public override func viewDidMoveToWindow() {
|
||||
super.viewDidMoveToWindow()
|
||||
if window == nil {
|
||||
cursorCapture.release()
|
||||
} else {
|
||||
captureCursorIfStreaming()
|
||||
windowObservers.forEach(NotificationCenter.default.removeObserver(_:))
|
||||
windowObservers.removeAll()
|
||||
guard let window else {
|
||||
releaseCapture()
|
||||
return
|
||||
}
|
||||
// ⌘-key-equivalents stay live while captured, so Settings (⌘,), a new window
|
||||
// (⌘N), or Minimize (⌘M) can take key status without the APP resigning active —
|
||||
// capture must release then too, or the new window inherits a hidden, frozen
|
||||
// cursor and its local typing is double-delivered to the host.
|
||||
for name in [NSWindow.didResignKeyNotification, NSWindow.didMiniaturizeNotification] {
|
||||
windowObservers.append(NotificationCenter.default.addObserver(
|
||||
forName: name, object: window, queue: .main
|
||||
) { [weak self] _ in
|
||||
self?.releaseCapture()
|
||||
})
|
||||
}
|
||||
attemptPendingCapture()
|
||||
}
|
||||
|
||||
private func captureCursorIfStreaming() {
|
||||
guard capturesCursor, token != nil, NSApp.isActive else { return }
|
||||
cursorCapture.capture(in: self)
|
||||
public override func layout() {
|
||||
super.layout()
|
||||
attemptPendingCapture() // bounds become real here on first presentation
|
||||
}
|
||||
|
||||
// MARK: - Capture state machine
|
||||
|
||||
/// Clicking into the video engages capture; that click is local (engagement), so
|
||||
/// InputCapture suppresses its press/release toward the host. Clicks while captured
|
||||
/// are the host's (GC forwards them) — nothing to do here.
|
||||
public override func mouseDown(with event: NSEvent) {
|
||||
if captureEnabled, !captured {
|
||||
engageCapture(fromClick: true)
|
||||
return
|
||||
}
|
||||
super.mouseDown(with: event)
|
||||
}
|
||||
|
||||
/// A click from another app counts (one click into the video captures, not two).
|
||||
public override func acceptsFirstMouse(for event: NSEvent?) -> Bool { true }
|
||||
|
||||
/// The engage click is complete — drop its suppression latch (see InputCapture;
|
||||
/// guards against GC delivering both halves of the click before our mouseDown).
|
||||
public override func mouseUp(with event: NSEvent) {
|
||||
inputCapture?.endClickSuppression()
|
||||
super.mouseUp(with: event)
|
||||
}
|
||||
|
||||
// While captured, the view is first responder and consumes key events — GC delivers
|
||||
// them to the host independently, and consuming here stops the responder chain's
|
||||
// "unhandled keyDown" beep without touching the event stream GC may rely on.
|
||||
// ⌘-combos arrive via performKeyEquivalent instead and stay fully functional (⌘D).
|
||||
public override var acceptsFirstResponder: Bool { true }
|
||||
public override func keyDown(with event: NSEvent) {
|
||||
if captured { return }
|
||||
super.keyDown(with: event)
|
||||
}
|
||||
public override func keyUp(with event: NSEvent) {
|
||||
if captured { return }
|
||||
super.keyUp(with: event)
|
||||
}
|
||||
|
||||
private func requestAutoCapture() {
|
||||
pendingAutoCapture = true
|
||||
attemptPendingCapture()
|
||||
}
|
||||
|
||||
private func attemptPendingCapture() {
|
||||
guard pendingAutoCapture, window != nil, bounds.width > 0 else { return }
|
||||
pendingAutoCapture = false // one shot, even if the engage below is refused
|
||||
engageCapture(fromClick: false)
|
||||
}
|
||||
|
||||
private func engageCapture(fromClick: Bool) {
|
||||
// A click is explicit intent AND may arrive mid-activation (acceptsFirstMouse:
|
||||
// NSApp.isActive / isKeyWindow are still false for the click coming in from
|
||||
// another app) — only the auto-engage paths require already-held key status.
|
||||
guard captureEnabled, !captured, token != nil, window != nil,
|
||||
fromClick || (NSApp.isActive && window?.isKeyWindow == true)
|
||||
else { return }
|
||||
cursorCapture.capture(in: self)
|
||||
inputCapture?.setForwarding(true, suppressClick: fromClick)
|
||||
captured = true
|
||||
window?.makeFirstResponder(self)
|
||||
notifyCaptureChange(true)
|
||||
}
|
||||
|
||||
private func releaseCapture() {
|
||||
guard captured else { return }
|
||||
cursorCapture.release()
|
||||
inputCapture?.setForwarding(false)
|
||||
captured = false
|
||||
notifyCaptureChange(false)
|
||||
}
|
||||
|
||||
/// Engage/release can run inside a SwiftUI update pass (captureEnabled flips in
|
||||
/// updateNSView; release in dismantleNSView) — publishing model state synchronously
|
||||
/// there is undefined behavior, so the callback is deferred a runloop turn.
|
||||
private func notifyCaptureChange(_ captured: Bool) {
|
||||
guard let onCaptureChange else { return }
|
||||
DispatchQueue.main.async { onCaptureChange(captured) }
|
||||
}
|
||||
|
||||
// MARK: - Pump
|
||||
|
||||
/// Pump thread: pull AUs from the connection, wrap, enqueue. The first IDR yields the
|
||||
/// format description; non-IDR AUs before it are dropped (the host opens with an IDR).
|
||||
public func start(
|
||||
@@ -169,6 +286,27 @@ public final class StreamLayerView: NSView {
|
||||
let layer = displayLayer
|
||||
layer.flush() // drop any frames a previous connection left queued
|
||||
|
||||
// The view owns the session's input capture: handlers attach now, but nothing is
|
||||
// forwarded until capture engages (captureEnabled + auto-engage or a click).
|
||||
let capture = InputCapture(connection: connection)
|
||||
capture.onToggleCapture = { [weak self] in
|
||||
// The ⌘⎋ monitor is app-wide — only the key window's stream owns the toggle
|
||||
// (two stream windows would otherwise flip each other's capture).
|
||||
guard let self, self.window?.isKeyWindow == true else { return }
|
||||
if self.captured {
|
||||
self.releaseCapture()
|
||||
} else {
|
||||
self.engageCapture(fromClick: false)
|
||||
}
|
||||
}
|
||||
capture.onPreempted = { [weak self] in
|
||||
// A newer session took the GC handler slots — staying "captured" here would
|
||||
// be a cursor trap with dead input.
|
||||
self?.releaseCapture()
|
||||
}
|
||||
capture.start()
|
||||
inputCapture = capture
|
||||
|
||||
let thread = Thread {
|
||||
var format: CMVideoFormatDescription?
|
||||
while token.isLive {
|
||||
@@ -203,13 +341,15 @@ public final class StreamLayerView: NSView {
|
||||
thread.name = "punktfunk-pump"
|
||||
thread.qualityOfService = .userInteractive
|
||||
thread.start()
|
||||
captureCursorIfStreaming()
|
||||
requestAutoCapture() // entering a session is the deliberate "capture me" moment
|
||||
}
|
||||
|
||||
/// Stop pumping (≤ one poll timeout). Does not close the connection — that stays with
|
||||
/// whoever owns it (PunktfunkConnection.close() is safe alongside a draining pump).
|
||||
public func stop() {
|
||||
cursorCapture.release()
|
||||
releaseCapture()
|
||||
inputCapture?.stop()
|
||||
inputCapture = nil
|
||||
token?.cancel()
|
||||
token = nil
|
||||
connection = nil
|
||||
@@ -217,6 +357,7 @@ public final class StreamLayerView: NSView {
|
||||
|
||||
deinit {
|
||||
appObservers.forEach(NotificationCenter.default.removeObserver(_:))
|
||||
windowObservers.forEach(NotificationCenter.default.removeObserver(_:))
|
||||
token?.cancel()
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user