feat: M4 stage 1 — the SwiftUI client is real: compiles, tested, first light on glass
ci / rust (push) Has been cancelled

The clients/apple scaffold is now a working macOS client, validated live against this
repo's host across the LAN: gamescope virtual output → NVENC HEVC → lumen/1 (GF(2¹⁶) FEC +
AES-GCM over UDP, QUIC control) → VideoToolbox → AVSampleBufferDisplayLayer at 720p60,
mouse/keyboard flowing back as QUIC datagrams into the host's gamescope EIS injector
(~3.7k events injected in one session).

LumenKit:
- LumenConnection: the predicted cbindgen compile fixes (C17 header spells the typedefs as
  integers while the enum constants import as a distinct Swift type — bridge by rawValue);
  close() is now safe from any thread (a close flag + pumpLock held across the blocking
  poll enforce the C contract "never close with a next_au in flight"; flag prevents
  lock-starvation by back-to-back polls).
- StreamView: per-pump cancellation token (reconnects can't double-pump), flush + re-gate
  on the next in-band parameter sets when the layer fails, no stale enqueue after restart.
- InputCapture: fractional-delta accumulation (sub-pixel motion isn't truncated away),
  pressed-state tracking with release-all on focus loss and stop() (nothing sticks down
  host-side), global-singleton ownership guard (GC has one handler slot per process),
  X1/X2 buttons, horizontal scroll, full keypad/CapsLock/ISO-102nd/PrintScreen/Menu VKs.
- LumenClient app shell (swift run LumenClient): connect form, fps/Mb-s HUD,
  LUMEN_AUTOCONNECT/LUMEN_MODE for scripted first-light runs.
- Tests: Annex-B byte-level units; real-codec round trip (VTCompressionSession-encoded
  HEVC rebuilt as the host's wire shape → AnnexB → VTDecompressionSession → pixels);
  test-loopback.sh (Swift client vs a real local m3-host over loopback — the Swift twin of
  c_abi_connection_roundtrip); RemoteFirstLightTests (full pipeline over the LAN).

Host/build fixes that fell out:
- The workspace builds on non-Linux again: gamestream audio (opus) and sendmmsg batching
  are now platform-gated with stubs/fallback, per the crate's "compiles everywhere" rule.
- Horizontal scroll was inverted end-to-end: the injectors negated BOTH axes onto the
  ei/wl axes, but GameStream's horizontal convention is positive = right
  (moonlight-qt/Sunshine pass it through unnegated) — only vertical flips now. This also
  un-inverts real Moonlight clients.
- AnnexB drops all zeros preceding a start code (trailing_zero_8bits padding), ffmpeg's
  policy, instead of leaking them into the preceding NAL.
- build-xcframework.sh: deployment targets pinned to the package floor + an otool guard —
  cargo does not fingerprint MACOSX_DEPLOYMENT_TARGET, so warm caches can silently ship
  too-new minos objects.

Adversarially reviewed (5-dimension multi-agent pass, every finding refutation-verified):
14 confirmed findings, all fixed above; the send-while-polling core-contract gap flagged
here is closed by the lumen/1 session-planes work (&self pulls + per-plane borrow slots).

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
2026-06-10 14:38:01 +02:00
parent 520d7342dd
commit bf8a974e8b
23 changed files with 1212 additions and 180 deletions
@@ -0,0 +1,79 @@
// Unit tests for the Annex-B AVCC plumbing (pure byte-level; no codec involved
// VideoToolboxRoundTripTests covers the real-bitstream path).
import XCTest
@testable import LumenKit
final class AnnexBTests: XCTestCase {
/// NAL with the given HEVC type in bits 1..6 of the first header byte.
private func nal(type: UInt8, payload: [UInt8]) -> Data {
Data([type << 1, 0x01] + payload)
}
private let start4: [UInt8] = [0, 0, 0, 1]
private let start3: [UInt8] = [0, 0, 1]
func testSplitMixedStartCodes() {
let a = nal(type: 32, payload: [0xAA])
let b = nal(type: 33, payload: [0xBB, 0xBC])
let c = nal(type: 19, payload: [0xCC, 0xCD, 0xCE])
var au = Data(start4)
au.append(a)
au.append(contentsOf: start3)
au.append(b)
au.append(contentsOf: start4)
au.append(c)
let nals = AnnexB.nalUnits(in: au)
XCTAssertEqual(nals, [a, b, c])
XCTAssertEqual(nals.map(AnnexB.hevcNalType), [32, 33, 19])
}
func testSplitSingleNalNoTrailingCode() {
let v = nal(type: 34, payload: [1, 2, 3])
let au = Data(start3) + v
XCTAssertEqual(AnnexB.nalUnits(in: au), [v])
}
func testSplitEmptyAndGarbage() {
XCTAssertEqual(AnnexB.nalUnits(in: Data()), [])
// No start code at all no NALs.
XCTAssertEqual(AnnexB.nalUnits(in: Data([9, 8, 7, 6])), [])
}
func testSplitDropsTrailingZeroPadding() {
// trailing_zero_8bits between NALs (and >2 zeros forming a long separator) must
// not leak into the preceding NAL.
let a = nal(type: 33, payload: [0xAA])
let b = nal(type: 19, payload: [0xBB])
var au = Data(start4)
au.append(a)
au.append(contentsOf: [0, 0, 0, 0, 0, 1]) // padding + start code
au.append(b)
XCTAssertEqual(AnnexB.nalUnits(in: au), [a, b])
}
func testAvccDropsParameterSetsAndPrefixesLengths() {
let vps = nal(type: 32, payload: [0xAA])
let sps = nal(type: 33, payload: [0xBB])
let pps = nal(type: 34, payload: [0xCC])
let idr = nal(type: 19, payload: [0xDD, 0xDE, 0xDF, 0xE0])
var au = Data()
for n in [vps, sps, pps, idr] {
au.append(contentsOf: start4)
au.append(n)
}
let avcc = AnnexB.avcc(from: au)
// Only the IDR survives: 4-byte BE length, then the NAL bytes.
var expected = Data([0, 0, 0, UInt8(idr.count)])
expected.append(idr)
XCTAssertEqual(avcc, expected)
}
func testFormatDescriptionNilWithoutParameterSets() {
let idr = nal(type: 19, payload: [0xDD])
let au = Data(start4) + idr
XCTAssertNil(AnnexB.formatDescription(fromIDR: au))
}
}
@@ -0,0 +1,65 @@
// Integration: the Swift wrapper against a real lumen/1 host over QUIC + UDP on loopback
// the Swift twin of lumen-host's m3.rs::c_abi_connection_roundtrip, this time through the
// statically linked xcframework. Driven by clients/apple/test-loopback.sh, which builds and
// starts `lumen-host m3-host --source synthetic` and sets LUMEN_LOOPBACK_PORT.
import XCTest
@testable import LumenKit
final class LoopbackIntegrationTests: XCTestCase {
func testSyntheticStreamRoundTrip() throws {
guard let portStr = ProcessInfo.processInfo.environment["LUMEN_LOOPBACK_PORT"],
let port = UInt16(portStr)
else {
throw XCTSkip("needs a running m3-host — use clients/apple/test-loopback.sh")
}
let conn = try LumenConnection(
host: "127.0.0.1", port: port, width: 1280, height: 720, refreshHz: 60)
XCTAssertEqual(conn.width, 1280)
XCTAssertEqual(conn.height, 720)
XCTAssertEqual(conn.refreshHz, 60)
// Pull 25 synthetic frames and byte-verify the documented pattern:
// u32 LE frame index, then data[i] = (idx as u8) &+ (i as u8).
var got = 0
var lastIndex: UInt32 = 0
let deadline = Date().addingTimeInterval(30)
while got < 25 {
XCTAssertLessThan(Date(), deadline, "timed out after \(got) frames")
guard let au = try conn.nextAU(timeoutMs: 2000) else { continue }
let idx = au.data.prefix(4).reversed().reduce(UInt32(0)) { ($0 << 8) | UInt32($1) }
for (i, byte) in au.data.enumerated().dropFirst(4) {
let expected = UInt8(truncatingIfNeeded: idx) &+ UInt8(truncatingIfNeeded: i)
if byte != expected {
XCTFail("frame \(idx) corrupt at offset \(i)")
break
}
}
XCTAssertGreaterThan(au.ptsNs, 0)
lastIndex = idx
got += 1
}
XCTAssertGreaterThanOrEqual(lastIndex, 24)
// Input goes the other way (enqueue-only; the host logs the count on close).
conn.send(.mouseMove(dx: 1, dy: 2))
conn.send(.key(0x41, down: true))
conn.send(.key(0x41, down: false))
conn.close()
XCTAssertThrowsError(try conn.nextAU(timeoutMs: 10)) { error in
guard case LumenClientError.closed = error else {
return XCTFail("expected .closed, got \(error)")
}
}
}
func testConnectFailureThrows() {
// Nothing listens on this port; connect must fail within its timeout, not hang.
XCTAssertThrowsError(
try LumenConnection(
host: "127.0.0.1", port: 9, width: 640, height: 480, refreshHz: 30,
timeoutMs: 2000))
}
}
@@ -0,0 +1,80 @@
// First light, headless: the full client pipeline against a REAL remote host QUIC
// handshake over the LAN, NVENC HEVC AUs through FEC + AES-GCM, AnnexB conversion, and a
// real VTDecompressionSession turning them into pixels. Everything the GUI does except
// putting the layer on glass.
//
// Run (host side, on the Linux box):
// LUMEN_COMPOSITOR=gamescope LUMEN_GAMESCOPE_APP=vkcube LUMEN_ZEROCOPY=1 \
// lumen-host m3-host --source virtual --seconds 120
// Then here:
// LUMEN_REMOTE_HOST=192.168.1.70 swift test --filter RemoteFirstLightTests
import CoreMedia
import VideoToolbox
import XCTest
@testable import LumenKit
final class RemoteFirstLightTests: XCTestCase {
func testRemoteStreamDecodesToPixels() throws {
guard let host = ProcessInfo.processInfo.environment["LUMEN_REMOTE_HOST"] else {
throw XCTSkip("set LUMEN_REMOTE_HOST (and start m3-host --source virtual there)")
}
let width: UInt32 = 1280
let height: UInt32 = 720
let conn = try LumenConnection(
host: host, width: width, height: height, refreshHz: 60)
defer { conn.close() }
XCTAssertEqual(conn.width, width)
XCTAssertEqual(conn.height, height)
var format: CMVideoFormatDescription?
var decoder: VTDecompressionSession?
defer { decoder.map { VTDecompressionSessionInvalidate($0) } }
var received = 0
var decoded = 0
var firstPtsNs: UInt64 = 0
var lastPtsNs: UInt64 = 0
let deadline = Date().addingTimeInterval(30)
while decoded < 60, Date() < deadline {
guard let au = try conn.nextAU(timeoutMs: 2000) else { continue }
received += 1
if firstPtsNs == 0 { firstPtsNs = au.ptsNs }
lastPtsNs = au.ptsNs
if let f = AnnexB.formatDescription(fromIDR: au.data) {
format = f
if decoder == nil {
let dims = CMVideoFormatDescriptionGetDimensions(f)
XCTAssertEqual(UInt32(dims.width), width)
XCTAssertEqual(UInt32(dims.height), height)
var session: VTDecompressionSession?
XCTAssertEqual(
VTDecompressionSessionCreate(
allocator: nil, formatDescription: f, decoderSpecification: nil,
imageBufferAttributes: nil, outputCallback: nil,
decompressionSessionOut: &session),
noErr)
decoder = session
}
}
guard let f = format, let dec = decoder,
let sample = AnnexB.sampleBuffer(au: au, format: f)
else { continue }
var gotPixels = false
VTDecompressionSessionDecodeFrame(
dec, sampleBuffer: sample, flags: [], infoFlagsOut: nil
) { status, _, imageBuffer, _, _ in
gotPixels = status == noErr && imageBuffer != nil
}
if gotPixels { decoded += 1 }
}
XCTAssertGreaterThanOrEqual(decoded, 60, "decoded \(decoded)/\(received) received AUs")
// The host stamps pts with its capture wall clock 60 frames should span ~1 s.
let spanMs = Double(lastPtsNs &- firstPtsNs) / 1_000_000
print("first light: \(decoded) frames decoded, \(received) received, pts span \(Int(spanMs)) ms")
}
}
@@ -0,0 +1,176 @@
// Real-bitstream proof of the decode-prep path: VTCompressionSession encodes HEVC, we
// rebuild the host's wire shape (Annex-B AU with in-band VPS/SPS/PPS exactly what
// lumen-host emits on every IDR), run it through AnnexB, and hand the result to a real
// VTDecompressionSession. Pixels out = the whole client decode path is sound.
import AVFoundation
import CoreMedia
import VideoToolbox
import XCTest
@testable import LumenKit
final class VideoToolboxRoundTripTests: XCTestCase {
private let width = 320
private let height = 240
func testEncodeAnnexBDecodeRoundTrip() throws {
let (formatDesc, avccSample) = try encodeOneHEVCKeyframe()
// Rebuild the host's wire format: Annex-B AU, parameter sets in-band before the VCL.
let annexB = try annexBAU(formatDesc: formatDesc, avccSample: avccSample)
// 1) Parameter-set extraction format description.
let rebuilt = try XCTUnwrap(
AnnexB.formatDescription(fromIDR: annexB),
"in-band VPS/SPS/PPS should yield a format description")
let dims = CMVideoFormatDescriptionGetDimensions(rebuilt)
XCTAssertEqual(Int(dims.width), width)
XCTAssertEqual(Int(dims.height), height)
// 2) Annex-B AVCC re-pack must reproduce the encoder's own sample bytes.
XCTAssertEqual(AnnexB.avcc(from: annexB), avccSample)
// 3) Sample buffer real decoder pixels.
let au = AccessUnit(data: annexB, ptsNs: 1_000_000, frameIndex: 0, flags: 0)
let sample = try XCTUnwrap(AnnexB.sampleBuffer(au: au, format: rebuilt))
var session: VTDecompressionSession?
XCTAssertEqual(
VTDecompressionSessionCreate(
allocator: nil, formatDescription: rebuilt, decoderSpecification: nil,
imageBufferAttributes: nil, outputCallback: nil,
decompressionSessionOut: &session),
noErr)
let decoder = try XCTUnwrap(session)
defer { VTDecompressionSessionInvalidate(decoder) }
var decoded: CVImageBuffer?
var decodeStatus: OSStatus = -1
// No async flag the handler runs before DecodeFrame returns.
VTDecompressionSessionDecodeFrame(
decoder, sampleBuffer: sample, flags: [], infoFlagsOut: nil
) { status, _, imageBuffer, _, _ in
decodeStatus = status
decoded = imageBuffer
}
XCTAssertEqual(decodeStatus, noErr)
let pixels = try XCTUnwrap(decoded) // CVImageBuffer and CVPixelBuffer are the same CF type
XCTAssertEqual(CVPixelBufferGetWidth(pixels), width)
XCTAssertEqual(CVPixelBufferGetHeight(pixels), height)
}
// MARK: - encode helpers
/// One forced-IDR HEVC frame; returns its format description and raw AVCC sample bytes.
private func encodeOneHEVCKeyframe() throws -> (CMVideoFormatDescription, Data) {
var session: VTCompressionSession?
let rc = VTCompressionSessionCreate(
allocator: nil, width: Int32(width), height: Int32(height),
codecType: kCMVideoCodecType_HEVC, encoderSpecification: nil,
imageBufferAttributes: nil, compressedDataAllocator: nil,
outputCallback: nil, refcon: nil, compressionSessionOut: &session)
guard rc == noErr, let encoder = session else {
throw XCTSkip("no HEVC encoder available (\(rc))")
}
defer { VTCompressionSessionInvalidate(encoder) }
VTSessionSetProperty(encoder, key: kVTCompressionPropertyKey_RealTime, value: kCFBooleanTrue)
VTSessionSetProperty(
encoder, key: kVTCompressionPropertyKey_AllowFrameReordering, value: kCFBooleanFalse)
let lock = NSLock()
var output: CMSampleBuffer?
let done = expectation(description: "encoded")
VTCompressionSessionEncodeFrame(
encoder, imageBuffer: try gradientPixelBuffer(),
presentationTimeStamp: CMTime(value: 0, timescale: 30),
duration: CMTime(value: 1, timescale: 30),
frameProperties: [kVTEncodeFrameOptionKey_ForceKeyFrame: kCFBooleanTrue] as CFDictionary,
infoFlagsOut: nil
) { status, _, sample in
XCTAssertEqual(status, noErr)
lock.lock()
output = sample
lock.unlock()
done.fulfill()
}
VTCompressionSessionCompleteFrames(encoder, untilPresentationTimeStamp: .invalid)
wait(for: [done], timeout: 10)
lock.lock()
defer { lock.unlock() }
let sample = try XCTUnwrap(output)
let desc = try XCTUnwrap(CMSampleBufferGetFormatDescription(sample))
let block = try XCTUnwrap(CMSampleBufferGetDataBuffer(sample))
var bytes = Data(count: CMBlockBufferGetDataLength(block))
try bytes.withUnsafeMutableBytes { raw in
let rc = CMBlockBufferCopyDataBytes(
block, atOffset: 0, dataLength: raw.count,
destination: raw.baseAddress!)
if rc != noErr { throw NSError(domain: "CMBlockBuffer", code: Int(rc)) }
}
return (desc, bytes)
}
/// The host's wire shape: 4-byte start codes, VPS/SPS/PPS in-band, then the VCL NALs.
private func annexBAU(formatDesc: CMVideoFormatDescription, avccSample: Data) throws -> Data {
var au = Data()
var psCount = 0
var nalHeaderLen: Int32 = 0
XCTAssertEqual(
CMVideoFormatDescriptionGetHEVCParameterSetAtIndex(
formatDesc, parameterSetIndex: 0, parameterSetPointerOut: nil,
parameterSetSizeOut: nil, parameterSetCountOut: &psCount,
nalUnitHeaderLengthOut: &nalHeaderLen),
noErr)
XCTAssertEqual(nalHeaderLen, 4, "AnnexB.avcc assumes 4-byte NAL length prefixes")
for i in 0..<psCount {
var ptr: UnsafePointer<UInt8>?
var size = 0
XCTAssertEqual(
CMVideoFormatDescriptionGetHEVCParameterSetAtIndex(
formatDesc, parameterSetIndex: i, parameterSetPointerOut: &ptr,
parameterSetSizeOut: &size, parameterSetCountOut: nil,
nalUnitHeaderLengthOut: nil),
noErr)
au.append(contentsOf: [0, 0, 0, 1])
au.append(Data(bytes: try XCTUnwrap(ptr), count: size))
}
// AVCC sample (4-byte BE length per NAL) start codes.
var i = avccSample.startIndex
while i + 4 <= avccSample.endIndex {
let len = avccSample[i..<i + 4].reduce(0) { ($0 << 8) | Int($1) }
let body = avccSample.index(i, offsetBy: 4)
guard let end = avccSample.index(body, offsetBy: len, limitedBy: avccSample.endIndex)
else { break }
au.append(contentsOf: [0, 0, 0, 1])
au.append(avccSample[body..<end])
i = end
}
return au
}
private func gradientPixelBuffer() throws -> CVPixelBuffer {
var pb: CVPixelBuffer?
let attrs = [kCVPixelBufferIOSurfacePropertiesKey: [:]] as CFDictionary
XCTAssertEqual(
CVPixelBufferCreate(nil, width, height, kCVPixelFormatType_32BGRA, attrs, &pb),
kCVReturnSuccess)
let buf = try XCTUnwrap(pb)
CVPixelBufferLockBaseAddress(buf, [])
defer { CVPixelBufferUnlockBaseAddress(buf, []) }
let base = try XCTUnwrap(CVPixelBufferGetBaseAddress(buf))
let stride = CVPixelBufferGetBytesPerRow(buf)
for y in 0..<height {
let row = base.advanced(by: y * stride).assumingMemoryBound(to: UInt8.self)
for x in 0..<width {
row[x * 4 + 0] = UInt8(x & 0xFF) // B
row[x * 4 + 1] = UInt8(y & 0xFF) // G
row[x * 4 + 2] = UInt8((x ^ y) & 0xFF) // R
row[x * 4 + 3] = 0xFF
}
}
return buf
}
}