feat(apple): gamepad UI v2 — controller settings + add host, aurora, macOS
Sources reorganized (client: Home/Session/Settings/Stores/Support/Trust; kit: Audio/Connection/Gamepad/Input/Support/Video/Views) with the big files split along the same seams. The gamepad mode is couch-complete, and now on macOS too (the living-room Mac case), not just iOS/iPadOS: - GamepadSettingsView: a console-style, fully controller-navigable settings screen (X from the launcher) — up/down moves focus, left/right steps values (clamped, boundary thud), A cycles/toggles, B closes; the focused row shows a one-line description. Backed by GamepadMenuList, the vertical sibling of GamepadCarousel, and SettingsOptions — the option lists hoisted out of SettingsView statics and shared by the touch, tvOS and gamepad settings. - GamepadAddHostView + GamepadKeyboard: register a host end to end with a pad — field rows open an on-screen controller keyboard (dpad grid, A types, X backspaces, B done); the launcher carousel ends in an Add Host tile, so the dead-end "add one with touch first" empty state is gone. - Launcher polish: contextual hint bar with the pad's real button glyphs, controller name + battery chip, one shared console chrome. - GamepadScreenBackground: an animated aurora (TimelineView-driven drifting blobs in the brand's violet family, breathing radii, slow hue shift, legibility scrim; freezes under Reduce Motion). Pure SwiftUI on purpose — a .metal library only bundles reliably in one of the two build systems (SPM vs the xcodeproj's synced folders) these sources compile under. - macOS port: settings/add-host/library present as sized sheets (a macOS sheet takes its content's IDEAL size, and the GeometryReader-driven screens collapsed to nothing), NSScreen-based mode lists, scroll indicators .never (the "always show scroll bars" setting overrides .hidden), tray scrims so scrolled rows dim under the pinned title/hints, extra title clearance, and a PUNKTFUNK_FORCE_GAMEPAD_UI=1 dev hook — launcher/settings/add-host/keyboard/ library render-verified live on a real Mac + LAN hosts. - GamepadMenuInput: X button support, and (re)start now snapshots held buttons so a controller handoff press never fires twice (the B that closed the keyboard no longer also cancels the screen underneath). - Cleanups: one "Connection failed" alert in ContentView instead of one per home screen; HostDiscovery.advertises/unsaved shared by both home screens. - host: can_encode_444 stub for the non-Linux/Windows host build (the macOS synthetic-source loopback used by the Swift tests). Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,265 @@
|
||||
// Annex-B (HEVC / H.264) → CoreMedia plumbing.
|
||||
//
|
||||
// The punktfunk host emits Annex-B access units with in-band parameter sets on every IDR
|
||||
// (deliberately — the client needs no out-of-band extradata). VideoToolbox wants the AVCC
|
||||
// flavor instead: a CMVideoFormatDescription built from the parameter sets, and sample
|
||||
// buffers whose NALs are 4-byte-length-prefixed. This file converts between the two, for
|
||||
// the codec the host resolved in the Welcome (`connection.videoCodec`) — HEVC and H.264
|
||||
// differ only in NAL-header layout and which parameter sets exist (HEVC adds a VPS). AV1
|
||||
// is not an Annex-B/NAL codec and isn't handled here (hosts don't emit it on the native
|
||||
// path yet).
|
||||
//
|
||||
// HOT PATH: both pumps run `formatDescription(fromIDR:codec:)` + `sampleBuffer(au:format:codec:)`
|
||||
// once per AU, so the conversion is built on `forEachNAL` — a zero-copy scan over the AU's bytes
|
||||
// (ranges, not materialized Datas) — and `sampleBuffer` packs the AVCC form straight into
|
||||
// the CMBlockBuffer's own allocation. Per AU that leaves exactly one copy here (source →
|
||||
// block buffer) instead of the naive scan-copy-slice-repack chain.
|
||||
|
||||
import CoreMedia
|
||||
import Foundation
|
||||
|
||||
/// The video codec of the host's elementary stream — negotiated in the Welcome and read via
|
||||
/// `punktfunk_connection_codec`.
|
||||
public enum VideoCodec: Equatable {
|
||||
case h264
|
||||
case hevc
|
||||
|
||||
/// Resolve from the wire `Welcome.codec` byte (`PUNKTFUNK_CODEC_*`; unknown → HEVC).
|
||||
public init(wire: UInt8) {
|
||||
self = wire == 0x01 ? .h264 : .hevc // 0x01 = PUNKTFUNK_CODEC_H264
|
||||
}
|
||||
|
||||
/// NAL unit type from a NAL's first byte. HEVC: bits 1..6; H.264: bits 0..4.
|
||||
fileprivate func nalType(_ first: UInt8) -> UInt8 {
|
||||
self == .hevc ? (first >> 1) & 0x3F : first & 0x1F
|
||||
}
|
||||
|
||||
/// True for a parameter-set NAL (dropped from AVCC; kept for the format description).
|
||||
/// HEVC: VPS 32 / SPS 33 / PPS 34. H.264: SPS 7 / PPS 8 (no VPS).
|
||||
fileprivate func isParameterSet(_ first: UInt8) -> Bool {
|
||||
let t = nalType(first)
|
||||
return self == .hevc ? (32...34).contains(t) : t == 7 || t == 8
|
||||
}
|
||||
|
||||
/// True for a VCL (slice) NAL — in a conforming AU no parameter set follows the first one,
|
||||
/// so the format-description scan can stop there.
|
||||
fileprivate func isVCL(_ first: UInt8) -> Bool {
|
||||
let t = nalType(first)
|
||||
return self == .hevc ? t <= 31 : (1...5).contains(t)
|
||||
}
|
||||
}
|
||||
|
||||
public enum AnnexB {
|
||||
/// Walk the NAL units of `data` without copying: `body` receives the buffer base and each
|
||||
/// NAL's byte range (start codes 00 00 01 / 00 00 00 01 excluded), and returns false to
|
||||
/// stop the walk early (e.g. at the first VCL NAL). All zeros immediately preceding a
|
||||
/// start code are dropped: they're either the 4-byte-code prefix or `trailing_zero_8bits`
|
||||
/// padding, never NAL payload (emulation prevention keeps 00 00 0x out of conforming NAL
|
||||
/// bytes) — same policy as ffmpeg. The base pointer is only valid inside `body`.
|
||||
static func forEachNAL(
|
||||
in data: Data, _ body: (_ base: UnsafePointer<UInt8>, _ range: Range<Int>) -> Bool
|
||||
) {
|
||||
data.withUnsafeBytes { (raw: UnsafeRawBufferPointer) in
|
||||
guard let base = raw.bindMemory(to: UInt8.self).baseAddress else { return }
|
||||
let count = raw.count
|
||||
var i = 0
|
||||
var start = -1
|
||||
while i + 2 < count {
|
||||
if base[i] == 0, base[i + 1] == 0, base[i + 2] == 1 {
|
||||
var codeStart = i
|
||||
while codeStart > 0, base[codeStart - 1] == 0 {
|
||||
codeStart -= 1
|
||||
}
|
||||
if start >= 0, start < codeStart, !body(base, start..<codeStart) { return }
|
||||
start = i + 3
|
||||
i += 3
|
||||
} else {
|
||||
i += 1
|
||||
}
|
||||
}
|
||||
if start >= 0, start < count {
|
||||
_ = body(base, start..<count)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Split an Annex-B stream into NAL units (start codes stripped — see `forEachNAL` for
|
||||
/// the boundary policy). Materializes a Data per NAL; the streaming paths use
|
||||
/// `forEachNAL` directly instead.
|
||||
public static func nalUnits(in data: Data) -> [Data] {
|
||||
var nals: [Data] = []
|
||||
forEachNAL(in: data) { base, range in
|
||||
nals.append(Data(bytes: base + range.lowerBound, count: range.count))
|
||||
return true
|
||||
}
|
||||
return nals
|
||||
}
|
||||
|
||||
/// HEVC NAL unit type (bits 1..6 of the first byte).
|
||||
public static func hevcNalType(_ nal: Data) -> UInt8 {
|
||||
guard let first = nal.first else { return 0xFF }
|
||||
return (first >> 1) & 0x3F
|
||||
}
|
||||
|
||||
/// H.264 NAL unit type (bits 0..4 of the first byte).
|
||||
public static func h264NalType(_ nal: Data) -> UInt8 {
|
||||
guard let first = nal.first else { return 0xFF }
|
||||
return first & 0x1F
|
||||
}
|
||||
|
||||
/// Build a format description from an IDR AU's in-band parameter sets (HEVC: VPS/SPS/PPS;
|
||||
/// H.264: SPS/PPS). Returns nil when the AU carries no parameter sets (non-IDR). Runs per
|
||||
/// AU on the pump thread: parameter sets precede the first VCL NAL in a conforming AU, so
|
||||
/// the scan stops there — a delta frame (no leading parameter sets) costs a few byte
|
||||
/// compares, no copies.
|
||||
public static func formatDescription(
|
||||
fromIDR au: Data, codec: VideoCodec
|
||||
) -> CMVideoFormatDescription? {
|
||||
var vps: Data?, sps: Data?, pps: Data?
|
||||
forEachNAL(in: au) { base, range in
|
||||
let first = base[range.lowerBound]
|
||||
switch codec.nalType(first) {
|
||||
case 32 where codec == .hevc:
|
||||
vps = Data(bytes: base + range.lowerBound, count: range.count)
|
||||
case 33 where codec == .hevc, 7 where codec == .h264:
|
||||
sps = Data(bytes: base + range.lowerBound, count: range.count)
|
||||
case 34 where codec == .hevc, 8 where codec == .h264:
|
||||
pps = Data(bytes: base + range.lowerBound, count: range.count)
|
||||
default:
|
||||
if codec.isVCL(first) { return false } // no parameter sets can follow
|
||||
// AUD/SEI/… may precede the slices; keep scanning.
|
||||
}
|
||||
return true
|
||||
}
|
||||
guard let sps, let pps else { return nil }
|
||||
// In the order VideoToolbox wants them: HEVC VPS,SPS,PPS (VPS required); H.264 SPS,PPS.
|
||||
let sets: [Data]
|
||||
switch codec {
|
||||
case .hevc:
|
||||
guard let vps else { return nil }
|
||||
sets = [vps, sps, pps]
|
||||
case .h264:
|
||||
sets = [sps, pps]
|
||||
}
|
||||
|
||||
var format: CMVideoFormatDescription?
|
||||
// Pin every parameter set's bytes for the duration of the create call, then hand
|
||||
// VideoToolbox parallel pointer/size arrays.
|
||||
var pointers: [UnsafePointer<UInt8>] = []
|
||||
var sizes: [Int] = []
|
||||
func withAll(_ i: Int, _ body: () -> Void) {
|
||||
if i == sets.count { body(); return }
|
||||
sets[i].withUnsafeBytes { raw in
|
||||
pointers.append(raw.bindMemory(to: UInt8.self).baseAddress!)
|
||||
sizes.append(sets[i].count)
|
||||
withAll(i + 1, body)
|
||||
}
|
||||
}
|
||||
var status: OSStatus = -1
|
||||
withAll(0) {
|
||||
switch codec {
|
||||
case .hevc:
|
||||
status = CMVideoFormatDescriptionCreateFromHEVCParameterSets(
|
||||
allocator: kCFAllocatorDefault,
|
||||
parameterSetCount: pointers.count,
|
||||
parameterSetPointers: pointers,
|
||||
parameterSetSizes: sizes,
|
||||
nalUnitHeaderLength: 4,
|
||||
extensions: nil,
|
||||
formatDescriptionOut: &format)
|
||||
case .h264:
|
||||
status = CMVideoFormatDescriptionCreateFromH264ParameterSets(
|
||||
allocator: kCFAllocatorDefault,
|
||||
parameterSetCount: pointers.count,
|
||||
parameterSetPointers: pointers,
|
||||
parameterSetSizes: sizes,
|
||||
nalUnitHeaderLength: 4,
|
||||
formatDescriptionOut: &format)
|
||||
}
|
||||
}
|
||||
return status == noErr ? format : nil
|
||||
}
|
||||
|
||||
/// Re-pack an Annex-B AU as AVCC (4-byte big-endian length before each NAL), dropping
|
||||
/// the parameter-set NALs (they live in the format description).
|
||||
public static func avcc(from au: Data, codec: VideoCodec) -> Data {
|
||||
var out = Data(capacity: au.count + 16)
|
||||
forEachNAL(in: au) { base, range in
|
||||
if codec.isParameterSet(base[range.lowerBound]) { return true }
|
||||
var len = UInt32(range.count).bigEndian
|
||||
withUnsafeBytes(of: &len) { out.append(contentsOf: $0) }
|
||||
out.append(UnsafeBufferPointer(start: base + range.lowerBound, count: range.count))
|
||||
return true
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
/// Wrap one AU as a decode-ready CMSampleBuffer. The AVCC form is packed directly into
|
||||
/// the CMBlockBuffer's allocation (sized by a first cheap scan) — no intermediate Data.
|
||||
public static func sampleBuffer(
|
||||
au: AccessUnit, format: CMVideoFormatDescription, codec: VideoCodec
|
||||
) -> CMSampleBuffer? {
|
||||
// Pass 1: byte scan only — total AVCC size of the payload (non-parameter-set) NALs.
|
||||
var total = 0
|
||||
forEachNAL(in: au.data) { base, range in
|
||||
if !codec.isParameterSet(base[range.lowerBound]) { total += 4 + range.count }
|
||||
return true
|
||||
}
|
||||
// Nothing decodable (a parameter-set-only AU — our host never sends one): drop it
|
||||
// rather than hand the decoder an empty sample.
|
||||
guard total > 0 else { return nil }
|
||||
|
||||
var blockBuffer: CMBlockBuffer?
|
||||
guard CMBlockBufferCreateWithMemoryBlock(
|
||||
allocator: kCFAllocatorDefault, memoryBlock: nil,
|
||||
blockLength: total, blockAllocator: kCFAllocatorDefault,
|
||||
customBlockSource: nil, offsetToData: 0, dataLength: total,
|
||||
flags: kCMBlockBufferAssureMemoryNowFlag, blockBufferOut: &blockBuffer) == noErr,
|
||||
let block = blockBuffer
|
||||
else { return nil }
|
||||
var dstLen = 0
|
||||
var dstPtr: UnsafeMutablePointer<CChar>?
|
||||
guard CMBlockBufferGetDataPointer(
|
||||
block, atOffset: 0, lengthAtOffsetOut: nil, totalLengthOut: &dstLen,
|
||||
dataPointerOut: &dstPtr) == noErr,
|
||||
dstLen == total, let dstPtr
|
||||
else { return nil }
|
||||
// Pass 2: the single copy — length prefix + payload per NAL, straight into the block.
|
||||
let dst = UnsafeMutableRawPointer(dstPtr)
|
||||
var off = 0
|
||||
forEachNAL(in: au.data) { base, range in
|
||||
if codec.isParameterSet(base[range.lowerBound]) { return true }
|
||||
var len = UInt32(range.count).bigEndian
|
||||
withUnsafeBytes(of: &len) {
|
||||
dst.advanced(by: off).copyMemory(from: $0.baseAddress!, byteCount: 4)
|
||||
}
|
||||
dst.advanced(by: off + 4)
|
||||
.copyMemory(from: base + range.lowerBound, byteCount: range.count)
|
||||
off += 4 + range.count
|
||||
return true
|
||||
}
|
||||
|
||||
var timing = CMSampleTimingInfo(
|
||||
duration: .invalid,
|
||||
presentationTimeStamp: CMTime(value: Int64(au.ptsNs), timescale: 1_000_000_000),
|
||||
decodeTimeStamp: .invalid)
|
||||
var sampleSize = total
|
||||
var sample: CMSampleBuffer?
|
||||
guard CMSampleBufferCreate(
|
||||
allocator: kCFAllocatorDefault, dataBuffer: block, dataReady: true,
|
||||
makeDataReadyCallback: nil, refcon: nil, formatDescription: format,
|
||||
sampleCount: 1, sampleTimingEntryCount: 1, sampleTimingArray: &timing,
|
||||
sampleSizeEntryCount: 1, sampleSizeArray: &sampleSize,
|
||||
sampleBufferOut: &sample) == noErr
|
||||
else { return nil }
|
||||
// Low-latency display: render on arrival, don't wait for a clock.
|
||||
if let attachments = CMSampleBufferGetSampleAttachmentsArray(sample!, createIfNecessary: true) {
|
||||
let dict = unsafeBitCast(CFArrayGetValueAtIndex(attachments, 0), to: CFMutableDictionary.self)
|
||||
CFDictionarySetValue(
|
||||
dict,
|
||||
Unmanaged.passUnretained(kCMSampleAttachmentKey_DisplayImmediately).toOpaque(),
|
||||
Unmanaged.passUnretained(kCFBooleanTrue).toOpaque())
|
||||
}
|
||||
return sample
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,29 @@
|
||||
// Throttled host keyframe requests for decode recovery, shared by both pumps (StreamPump /
|
||||
// Stage2Pipeline). Wedge signals arrive from several threads — the decoder's async error callback
|
||||
// (a VT thread), a submit failure on the pump thread, the framesDropped poll — and the decode stays
|
||||
// stalled for several frames until the requested IDR lands, so requests are coalesced (100 ms, the
|
||||
// throttle the working Android path uses: fast enough that a lost recovery IDR is re-requested
|
||||
// promptly, bounded so a sustained freeze can't flood the control stream). Bound to the live
|
||||
// connection at pump start, unbound on stop.
|
||||
|
||||
import Foundation
|
||||
|
||||
final class KeyframeRecovery: @unchecked Sendable {
|
||||
private let lock = NSLock()
|
||||
private var connection: PunktfunkConnection?
|
||||
private var lastNs: UInt64 = 0
|
||||
|
||||
func bind(_ c: PunktfunkConnection?) {
|
||||
lock.lock(); connection = c; lastNs = 0; lock.unlock()
|
||||
}
|
||||
|
||||
func request() {
|
||||
lock.lock()
|
||||
let now = DispatchTime.now().uptimeNanoseconds
|
||||
let due = lastNs == 0 || now &- lastNs > 100_000_000 // ≥ 100 ms since the last request
|
||||
if due { lastNs = now }
|
||||
let conn = due ? connection : nil
|
||||
lock.unlock()
|
||||
conn?.requestKeyframe()
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,78 @@
|
||||
// Per-frame latency sampler for the live HUD: records capture->client-receipt latency and drains
|
||||
// percentiles on demand. NSLock rather than an actor — the writer is the non-async pump/arrival
|
||||
// path (same pattern as the app's FrameMeter).
|
||||
|
||||
import Foundation
|
||||
|
||||
/// Samples the **capture->client-receipt** latency of each access unit and reports percentiles.
|
||||
///
|
||||
/// The latency is `now - pts_ns`, where `pts_ns` is the host's capture wall clock (the AU's pts) and
|
||||
/// `now` is the client's `CLOCK_REALTIME` instant the AU was received, shifted by the connect-time
|
||||
/// **clock-skew offset** (`PunktfunkConnection.clockOffsetNs`, host minus client) so the difference
|
||||
/// is valid across machines. `offsetNs == 0` means an old host that didn't answer the skew handshake
|
||||
/// (or genuinely synced clocks) — the number is then only meaningful same-host.
|
||||
///
|
||||
/// SCOPE (stage-1 presenter): this covers host capture -> encode -> FEC -> network -> reassembly ->
|
||||
/// decrypt -> handed to the presenter. It does **not** include the on-device VideoToolbox decode or
|
||||
/// the `AVSampleBufferDisplayLayer` present — that layer decodes and presents compressed samples
|
||||
/// internally with no per-frame callback. True decode->present (the full glass-to-glass) needs the
|
||||
/// stage-2 presenter (`VTDecompressionSession` decode-completion + `CAMetalLayer`/display-link
|
||||
/// present); this meter is the substrate it will extend.
|
||||
public final class LatencyMeter: @unchecked Sendable {
|
||||
private let lock = NSLock()
|
||||
private var samplesUs: [Int64] = []
|
||||
private var skewCorrected = false
|
||||
|
||||
public init() {}
|
||||
|
||||
/// Record one frame at receipt (now). `ptsNs` is the host capture clock (the AU's pts);
|
||||
/// `offsetNs` is the host-client clock offset from the skew handshake (0 = uncorrected).
|
||||
public func record(ptsNs: UInt64, offsetNs: Int64) {
|
||||
var ts = timespec()
|
||||
clock_gettime(CLOCK_REALTIME, &ts)
|
||||
let nowNs = Int64(ts.tv_sec) * 1_000_000_000 + Int64(ts.tv_nsec)
|
||||
record(ptsNs: ptsNs, atNs: nowNs, offsetNs: offsetNs)
|
||||
}
|
||||
|
||||
/// Record one frame whose latency is `atNs + offsetNs - ptsNs` — an EXPLICIT client instant
|
||||
/// rather than now. The stage-2 presenter uses this to stamp capture→present at the display
|
||||
/// link's target present time (not the moment the present call ran). All in `CLOCK_REALTIME`.
|
||||
public func record(ptsNs: UInt64, atNs: Int64, offsetNs: Int64) {
|
||||
let latNs = atNs &+ offsetNs &- Int64(bitPattern: ptsNs)
|
||||
// Drop absurd values (a clock step, a wildly wrong offset, or garbage pts).
|
||||
guard latNs > 0, latNs < 10_000_000_000 else { return }
|
||||
lock.lock()
|
||||
samplesUs.append(latNs / 1000)
|
||||
if offsetNs != 0 { skewCorrected = true }
|
||||
lock.unlock()
|
||||
}
|
||||
|
||||
public struct Stats: Sendable {
|
||||
public let p50Ms: Double
|
||||
public let p95Ms: Double
|
||||
public let p99Ms: Double
|
||||
public let count: Int
|
||||
/// True if the skew offset was applied (a host that answered the handshake) — i.e. the
|
||||
/// numbers are cross-machine valid, not just same-host.
|
||||
public let skewCorrected: Bool
|
||||
}
|
||||
|
||||
/// Percentiles over the samples accumulated since the last drain, then reset the window. `nil`
|
||||
/// when no samples arrived in the interval.
|
||||
public func drain() -> Stats? {
|
||||
lock.lock()
|
||||
let sorted = samplesUs.sorted()
|
||||
let corrected = skewCorrected
|
||||
samplesUs.removeAll(keepingCapacity: true)
|
||||
skewCorrected = false
|
||||
lock.unlock()
|
||||
guard !sorted.isEmpty else { return nil }
|
||||
func pct(_ p: Double) -> Double {
|
||||
let i = min(Int(Double(sorted.count) * p), sorted.count - 1)
|
||||
return Double(sorted[i]) / 1000.0 // us -> ms
|
||||
}
|
||||
return Stats(
|
||||
p50Ms: pct(0.50), p95Ms: pct(0.95), p99Ms: pct(0.99),
|
||||
count: sorted.count, skewCorrected: corrected)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,413 @@
|
||||
// Stage-2 presenter, present half: draw a decoded NV12 / P010 / 4:4:4 CVPixelBuffer into a CAMetalLayer
|
||||
// drawable with a Y′CbCr→RGB shader. The hosting view's CADisplayLink drives `render` once per vsync
|
||||
// (via Stage2Pipeline.renderTick) with the target present time, so a present can be stamped and the
|
||||
// present tail hand-paced. See docs apple-stage2-presenter.md.
|
||||
//
|
||||
// Main-thread only: created during view setup, `render`/`configure` called from the view's CADisplayLink
|
||||
// (which fires on the main runloop). The Metal objects + texture cache are touched only here. The one
|
||||
// exception is `setHdrMeta`, called from the pump thread — it hops the layer write to main so every
|
||||
// CALayer mutation stays on one thread.
|
||||
|
||||
#if canImport(Metal) && canImport(QuartzCore)
|
||||
import CoreGraphics
|
||||
import CoreVideo
|
||||
import Metal
|
||||
import QuartzCore
|
||||
import os
|
||||
|
||||
private let presenterLog = Logger(subsystem: "io.unom.punktfunk", category: "presenter")
|
||||
|
||||
/// HDR reference white (BT.2408 "HDR Reference White"): the absolute luminance, in nits, that the
|
||||
/// PQ signal's diffuse white sits at. Passed to `CAEDRMetadata.hdr10(opticalOutputScale:)`, it anchors
|
||||
/// 203-nit diffuse white at EDR 1.0 (the display's SDR-white level) and lets the system tone-map the
|
||||
/// brighter highlights into the panel's headroom. This is the missing anchor that made the old HDR path
|
||||
/// render "way too bright" (no `edrMetadata` → no reference-white anchoring); a LARGER value renders
|
||||
/// dimmer. Matches the host's standard PQ reference white.
|
||||
private let hdrReferenceWhiteNits: Float = 203.0
|
||||
|
||||
/// Runtime-compiled (no metallib build step needed in SwiftPM): a fullscreen triangle and BT.709 SDR
|
||||
/// and BT.2020-PQ HDR Y′CbCr→RGB fragment shaders. uv.y is flipped (1 - p.y) so the top-left-origin
|
||||
/// texture presents upright (NDC y is up). The HDR shader outputs PQ-encoded R′G′B′ as-is — the
|
||||
/// CAMetalLayer's `itur_2100_PQ` colour space + `edrMetadata` tell the system compositor the samples
|
||||
/// are PQ and how to tone-map them (no EOTF here, matching the host's BT.2020 PQ emission).
|
||||
private let shaderSource = """
|
||||
#include <metal_stdlib>
|
||||
using namespace metal;
|
||||
|
||||
struct VOut { float4 pos [[position]]; float2 uv; };
|
||||
|
||||
vertex VOut pf_vtx(uint vid [[vertex_id]]) {
|
||||
float2 p = float2(float((vid << 1) & 2), float(vid & 2));
|
||||
VOut o;
|
||||
o.pos = float4(p * 2.0 - 1.0, 0.0, 1.0);
|
||||
o.uv = float2(p.x, 1.0 - p.y);
|
||||
return o;
|
||||
}
|
||||
|
||||
// Bicubic (Catmull-Rom) sampling of the single-channel luma plane. The drawable is sized to the
|
||||
// LAYER's pixels (see `render`), so this kernel performs the decoded→on-screen scale: when the
|
||||
// window/view is bigger than the host's fixed mode a bilinear upscale looks soft; Catmull-Rom
|
||||
// keeps edges crisp — matching AVSampleBufferDisplayLayer's (stage-1) scaler — and reduces to the
|
||||
// exact texel at 1:1, so a native-resolution present stays pixel-exact.
|
||||
// Nine bilinear taps (TheRealMJP's optimisation of the 16-tap kernel); `s` MUST be a linear
|
||||
// sampler. Luma carries the perceived detail, so only it gets bicubic; chroma stays bilinear.
|
||||
float catmullRomLuma(texture2d<float> tex, sampler s, float2 uv) {
|
||||
float2 texSize = float2(tex.get_width(), tex.get_height());
|
||||
float2 samplePos = uv * texSize;
|
||||
float2 tc1 = floor(samplePos - 0.5) + 0.5;
|
||||
float2 f = samplePos - tc1;
|
||||
float2 w0 = f * (-0.5 + f * (1.0 - 0.5 * f));
|
||||
float2 w1 = 1.0 + f * f * (-2.5 + 1.5 * f);
|
||||
float2 w2 = f * (0.5 + f * (2.0 - 1.5 * f));
|
||||
float2 w3 = f * f * (-0.5 + 0.5 * f);
|
||||
float2 w12 = w1 + w2;
|
||||
float2 off12 = w2 / w12;
|
||||
float2 tc0 = (tc1 - 1.0) / texSize;
|
||||
float2 tc3 = (tc1 + 2.0) / texSize;
|
||||
float2 tc12 = (tc1 + off12) / texSize;
|
||||
float r = 0.0;
|
||||
r += tex.sample(s, float2(tc0.x, tc0.y)).r * (w0.x * w0.y);
|
||||
r += tex.sample(s, float2(tc12.x, tc0.y)).r * (w12.x * w0.y);
|
||||
r += tex.sample(s, float2(tc3.x, tc0.y)).r * (w3.x * w0.y);
|
||||
r += tex.sample(s, float2(tc0.x, tc12.y)).r * (w0.x * w12.y);
|
||||
r += tex.sample(s, float2(tc12.x, tc12.y)).r * (w12.x * w12.y);
|
||||
r += tex.sample(s, float2(tc3.x, tc12.y)).r * (w3.x * w12.y);
|
||||
r += tex.sample(s, float2(tc0.x, tc3.y)).r * (w0.x * w3.y);
|
||||
r += tex.sample(s, float2(tc12.x, tc3.y)).r * (w12.x * w3.y);
|
||||
r += tex.sample(s, float2(tc3.x, tc3.y)).r * (w3.x * w3.y);
|
||||
return r;
|
||||
}
|
||||
|
||||
// 4:2:0 chroma is left-cosited horizontally (H.273 chroma_loc type 0 — the MPEG convention the
|
||||
// host encodes and VideoToolbox decodes as-is), but sampling the half-res plane at the luma UV
|
||||
// assumes CENTER siting — a ~0.5-luma-px rightward chroma shift on hard colored edges. Offset the
|
||||
// sample by +0.25 chroma texels to re-align (libplacebo/mpv's correction). Vertical siting for
|
||||
// type 0 is centered, which plain sampling already matches. A full-size 4:4:4 plane has no
|
||||
// subsampling to correct — the offset self-disables when the plane widths match.
|
||||
float2 chromaUV(texture2d<float> lumaTex, texture2d<float> chromaTex, float2 uv) {
|
||||
if (chromaTex.get_width() < lumaTex.get_width()) {
|
||||
uv.x += 0.25 / float(chromaTex.get_width());
|
||||
}
|
||||
return uv;
|
||||
}
|
||||
|
||||
// SDR: 8-bit NV12 / 4:4:4 (BT.709, limited/video range) → full-range RGB. Chroma is sampled at the
|
||||
// (siting-corrected) luma UV, so a full-size 4:4:4 chroma plane needs no shader change vs 4:2:0.
|
||||
fragment float4 pf_frag(VOut in [[stage_in]],
|
||||
texture2d<float> lumaTex [[texture(0)]],
|
||||
texture2d<float> chromaTex [[texture(1)]]) {
|
||||
constexpr sampler s(filter::linear, address::clamp_to_edge);
|
||||
float y = catmullRomLuma(lumaTex, s, in.uv);
|
||||
float2 c = chromaTex.sample(s, chromaUV(lumaTex, chromaTex, in.uv)).rg;
|
||||
// BT.709, 8-bit limited (video) range → full-range RGB.
|
||||
y = (y - 16.0/255.0) * (255.0/219.0);
|
||||
float u = (c.x - 128.0/255.0) * (255.0/224.0);
|
||||
float v = (c.y - 128.0/255.0) * (255.0/224.0);
|
||||
float r = y + 1.5748 * v;
|
||||
float g = y - 0.1873 * u - 0.4681 * v;
|
||||
float b = y + 1.8556 * u;
|
||||
return float4(saturate(float3(r, g, b)), 1.0);
|
||||
}
|
||||
|
||||
// HDR: 10-bit P010 / 4:4:4 (BT.2020, limited range), Y′CbCr that is PQ-encoded. We apply the BT.2020
|
||||
// matrix to get PQ-encoded R′G′B′ and output it as-is — the CAMetalLayer's itur_2100_PQ colour space
|
||||
// + edrMetadata tell the compositor the samples are PQ, so it does the PQ→display tone-map. No EOTF
|
||||
// here. P010/x444 store the 10-bit code in the high bits of each 16-bit sample, so an .r16Unorm sample
|
||||
// reads ~code/1023 (the /1024 vs /1023 error is < 0.1%).
|
||||
fragment float4 pf_frag_hdr(VOut in [[stage_in]],
|
||||
texture2d<float> lumaTex [[texture(0)]],
|
||||
texture2d<float> chromaTex [[texture(1)]]) {
|
||||
constexpr sampler s(filter::linear, address::clamp_to_edge);
|
||||
float y = catmullRomLuma(lumaTex, s, in.uv);
|
||||
float2 c = chromaTex.sample(s, chromaUV(lumaTex, chromaTex, in.uv)).rg;
|
||||
// BT.2020 10-bit limited (video) range → full-range PQ R′G′B′.
|
||||
y = (y - 64.0/1023.0) * (1023.0/876.0);
|
||||
float u = (c.x - 512.0/1023.0) * (1023.0/896.0);
|
||||
float v = (c.y - 512.0/1023.0) * (1023.0/896.0);
|
||||
float r = y + 1.4746 * v;
|
||||
float g = y - 0.16455 * u - 0.57135 * v;
|
||||
float b = y + 1.8814 * u;
|
||||
return float4(saturate(float3(r, g, b)), 1.0);
|
||||
}
|
||||
"""
|
||||
|
||||
public final class MetalVideoPresenter {
|
||||
/// The layer the hosting view installs (as a sublayer) and sizes to its bounds.
|
||||
public let layer: CAMetalLayer
|
||||
|
||||
private let device: MTLDevice
|
||||
private let queue: MTLCommandQueue
|
||||
/// SDR (BT.709 8-bit → bgra8) and HDR (BT.2020 PQ 10-bit → rgba16Float) pipelines. Selected per
|
||||
/// frame in `render`; the layer is reconfigured to match when the session flips (HDR toggle).
|
||||
private let pipelineSDR: MTLRenderPipelineState
|
||||
private let pipelineHDR: MTLRenderPipelineState
|
||||
private var textureCache: CVMetalTextureCache?
|
||||
|
||||
/// Current layer configuration — switched in `configure(hdr:)` when a frame's HDR-ness differs.
|
||||
/// Main-thread only (read + written from `render`/`configure`, all on the display-link runloop).
|
||||
private var hdrActive = false
|
||||
/// Last HDR mastering grade received via `setHdrMeta` (the host's 0xCE). Cached so a mid-session
|
||||
/// SDR→HDR flip's `configureColor` re-applies the real grade instead of clobbering it back to the
|
||||
/// bare reference-white anchor (an out-of-order race otherwise: `setHdrMeta` and the flip both write
|
||||
/// `edrMetadata`). Main-thread only.
|
||||
private var lastHdrMeta: PunktfunkConnection.HdrMeta?
|
||||
|
||||
#if DEBUG
|
||||
/// Last logged "decoded→drawable" signature, so the diagnostic logs only on a size/HDR change.
|
||||
private var lastSizeSig = ""
|
||||
#endif
|
||||
|
||||
/// nil if Metal is unavailable (no GPU / a headless CI) or a shader fails to compile — the caller
|
||||
/// falls back to stage-1.
|
||||
public static func make() -> MetalVideoPresenter? {
|
||||
guard let device = MTLCreateSystemDefaultDevice(),
|
||||
let queue = device.makeCommandQueue()
|
||||
else { return nil }
|
||||
let pipelineSDR: MTLRenderPipelineState
|
||||
let pipelineHDR: MTLRenderPipelineState
|
||||
do {
|
||||
let library = try device.makeLibrary(source: shaderSource, options: nil)
|
||||
let vtx = library.makeFunction(name: "pf_vtx")
|
||||
let sdr = MTLRenderPipelineDescriptor()
|
||||
sdr.vertexFunction = vtx
|
||||
sdr.fragmentFunction = library.makeFunction(name: "pf_frag")
|
||||
sdr.colorAttachments[0].pixelFormat = .bgra8Unorm
|
||||
pipelineSDR = try device.makeRenderPipelineState(descriptor: sdr)
|
||||
let hdr = MTLRenderPipelineDescriptor()
|
||||
hdr.vertexFunction = vtx
|
||||
hdr.fragmentFunction = library.makeFunction(name: "pf_frag_hdr")
|
||||
hdr.colorAttachments[0].pixelFormat = .rgba16Float // EDR-capable
|
||||
pipelineHDR = try device.makeRenderPipelineState(descriptor: hdr)
|
||||
} catch {
|
||||
return nil
|
||||
}
|
||||
var cache: CVMetalTextureCache?
|
||||
CVMetalTextureCacheCreate(kCFAllocatorDefault, nil, device, nil, &cache)
|
||||
guard let textureCache = cache else { return nil }
|
||||
|
||||
let layer = CAMetalLayer()
|
||||
layer.device = device
|
||||
layer.pixelFormat = .bgra8Unorm
|
||||
layer.framebufferOnly = true
|
||||
layer.isOpaque = true
|
||||
#if os(macOS)
|
||||
// The display link already paces exactly one present per vsync. Leaving the layer's own vsync
|
||||
// wait on means `commandBuffer.present` ALSO blocks for the hardware vsync, so `nextDrawable()`
|
||||
// stalls the MAIN thread until a drawable frees — windowed, the WindowServer's looser
|
||||
// compositing hides it; FULLSCREEN's tighter path serializes the main thread to the display and
|
||||
// the stall surfaces as bad judder. Disabling the layer-level sync lets present return promptly
|
||||
// (the display link is the pacing source) — the fix for the fullscreen stutter. macOS-only.
|
||||
layer.displaySyncEnabled = false
|
||||
#endif
|
||||
// The drawable is rendered at the LAYER's pixel size (set per-frame in `render`), so the
|
||||
// shader — not the compositor — performs the decoded→on-screen scale (bicubic luma; the
|
||||
// compositor's contentsGravity path is plain bilinear). The gravity stays aspect-fit as a
|
||||
// transient fallback: during a live resize the compositor may composite a drawable from
|
||||
// the previous layout before the next render catches up.
|
||||
layer.contentsGravity = .resizeAspect
|
||||
// Triple-buffer: more in-flight drawables before `nextDrawable()` (called on the display-link /
|
||||
// MAIN thread) has to block waiting for one to free.
|
||||
layer.maximumDrawableCount = 3
|
||||
|
||||
return MetalVideoPresenter(
|
||||
device: device, queue: queue, pipelineSDR: pipelineSDR, pipelineHDR: pipelineHDR,
|
||||
textureCache: textureCache, layer: layer)
|
||||
}
|
||||
|
||||
private init(
|
||||
device: MTLDevice, queue: MTLCommandQueue, pipelineSDR: MTLRenderPipelineState,
|
||||
pipelineHDR: MTLRenderPipelineState, textureCache: CVMetalTextureCache, layer: CAMetalLayer
|
||||
) {
|
||||
self.device = device
|
||||
self.queue = queue
|
||||
self.pipelineSDR = pipelineSDR
|
||||
self.pipelineHDR = pipelineHDR
|
||||
self.textureCache = textureCache
|
||||
self.layer = layer
|
||||
}
|
||||
|
||||
/// Configure the layer + active pipeline for an SDR or HDR session. MAIN THREAD ONLY. Called once at
|
||||
/// session start and again per-frame from `render` (idempotent — the guard makes a same-state call a
|
||||
/// no-op), so a mid-session HDR toggle (the host re-inits its encoder; the decoded `frame.isHDR`
|
||||
/// flips) reconfigures here automatically. HDR uses an rgba16Float drawable + BT.2020 PQ colour space
|
||||
/// + EDR with a 203-nit reference-white anchor; SDR uses the plain 8-bit sRGB path.
|
||||
public func configure(hdr: Bool) {
|
||||
guard hdr != hdrActive else { return }
|
||||
hdrActive = hdr
|
||||
configureColor(hdr: hdr)
|
||||
}
|
||||
|
||||
/// Set the layer's pixel format + colour config for SDR or HDR. MAIN THREAD ONLY. EDR is requested
|
||||
/// on macOS + iOS (the old `#if os(macOS)` guard left iOS EDR half-engaged). tvOS has NO EDR API
|
||||
/// (`wantsExtendedDynamicRangeContent`/`edrMetadata`/`CAEDRMetadata` are all unavailable there), so
|
||||
/// it gets the PQ pixel format + colour space only — the tvOS compositor tone-maps from those.
|
||||
private func configureColor(hdr: Bool) {
|
||||
if hdr {
|
||||
layer.pixelFormat = .rgba16Float
|
||||
layer.colorspace = CGColorSpace(name: CGColorSpace.itur_2100_PQ)
|
||||
#if !os(tvOS)
|
||||
layer.wantsExtendedDynamicRangeContent = true
|
||||
// Anchor reference white. Re-apply the real grade if one already arrived (0xCE before the
|
||||
// flip); otherwise the bare 203-nit anchor. Without this anchor the PQ signal is too bright.
|
||||
layer.edrMetadata = makeEDR(lastHdrMeta)
|
||||
#endif
|
||||
} else {
|
||||
// SDR: gamma-encoded BT.709 [0,1] in an 8-bit drawable; a nil colorspace tags it device/sRGB
|
||||
// (the proven SDR path — never showed the "too bright" issue, which was HDR-only).
|
||||
layer.pixelFormat = .bgra8Unorm
|
||||
layer.colorspace = nil
|
||||
#if !os(tvOS)
|
||||
layer.wantsExtendedDynamicRangeContent = false
|
||||
layer.edrMetadata = nil
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
#if !os(tvOS)
|
||||
private func makeEDR(_ meta: PunktfunkConnection.HdrMeta?) -> CAEDRMetadata {
|
||||
CAEDRMetadata.hdr10(
|
||||
displayInfo: meta?.masteringDisplayColorVolume(),
|
||||
contentInfo: meta?.contentLightLevelInfo(),
|
||||
opticalOutputScale: hdrReferenceWhiteNits)
|
||||
}
|
||||
#endif
|
||||
|
||||
/// Update the HDR mastering metadata (drained from the host's 0xCE datagram) to refine the system
|
||||
/// tone-map from the real grade. Called from the PUMP thread, so the layer write is hopped to MAIN
|
||||
/// (every CALayer mutation stays on one thread). The grade is cached so a later SDR→HDR
|
||||
/// `configureColor` re-applies it; the `edrMetadata` write is gated on `hdrActive` (setting it on an
|
||||
/// SDR layer is harmless but pointless, and the flip will apply it anyway).
|
||||
public func setHdrMeta(_ meta: PunktfunkConnection.HdrMeta) {
|
||||
DispatchQueue.main.async { [weak self] in
|
||||
guard let self else { return }
|
||||
self.lastHdrMeta = meta
|
||||
// tvOS has no edrMetadata — the cached grade is still kept above (harmless), it just can't
|
||||
// be applied to the layer there. macOS/iOS refine the system tone-map from the real grade.
|
||||
#if !os(tvOS)
|
||||
if self.hdrActive { self.layer.edrMetadata = self.makeEDR(meta) }
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
/// Draw one decoded frame to the next drawable and present it. MAIN THREAD (the display link).
|
||||
/// `isHDR` selects the 10-bit BT.2020 PQ path vs the 8-bit BT.709 path and is reconciled with the
|
||||
/// layer config via `configure`. Returns true on success; false when there's no drawable yet, a
|
||||
/// texture couldn't be made, or Metal errored — the caller then doesn't stamp a present (and can
|
||||
/// requeue the frame). `onPresented` fires once the drawable actually reached glass, with the
|
||||
/// `CLOCK_REALTIME` instant from the drawable's `presentedTime` — or nil when the system reports
|
||||
/// none (a dropped drawable). It runs on a Metal callback thread; keep the handler thread-safe.
|
||||
@discardableResult
|
||||
public func render(
|
||||
_ pixelBuffer: CVPixelBuffer, isHDR: Bool = false,
|
||||
onPresented: ((Int64?) -> Void)? = nil
|
||||
) -> Bool {
|
||||
// Reconcile the layer with the decoded frame's HDR-ness (handles a mid-session SDR↔HDR flip).
|
||||
configure(hdr: isHDR)
|
||||
|
||||
// P010/x444 store 10-bit luma/chroma in 16-bit samples → R16/RG16; NV12/444v is 8-bit → R8/RG8.
|
||||
// Derived from the actual decoded buffer so a 4:4:4 (full chroma plane) frame just works.
|
||||
let pf = CVPixelBufferGetPixelFormatType(pixelBuffer)
|
||||
let tenBit =
|
||||
pf == kCVPixelFormatType_420YpCbCr10BiPlanarVideoRange
|
||||
|| pf == kCVPixelFormatType_420YpCbCr10BiPlanarFullRange
|
||||
|| pf == kCVPixelFormatType_444YpCbCr10BiPlanarVideoRange
|
||||
|| pf == kCVPixelFormatType_444YpCbCr10BiPlanarFullRange
|
||||
guard let textureCache,
|
||||
let luma = makeTexture(
|
||||
pixelBuffer, plane: 0, format: tenBit ? .r16Unorm : .r8Unorm, cache: textureCache),
|
||||
let chroma = makeTexture(
|
||||
pixelBuffer, plane: 1, format: tenBit ? .rg16Unorm : .rg8Unorm, cache: textureCache)
|
||||
else { return false }
|
||||
|
||||
// Size the drawable to the LAYER's pixels (bounds × contentsScale, both set by the hosting
|
||||
// view's layout) so the Catmull-Rom shader performs the decoded→on-screen scale in one pass:
|
||||
// a native-mode session stays exactly 1:1 (the kernel reduces to the identity texel), and a
|
||||
// window bigger than the host's mode gets bicubic luma instead of the compositor's bilinear.
|
||||
// Before the first layout (empty bounds) fall back to the decoded size. drawableSize does NOT
|
||||
// track bounds (defaults to 0), so set it BEFORE nextDrawable; re-set only on a change
|
||||
// (layout / Reconfigure / HDR flip — and every frame of a live resize, which is fine).
|
||||
let decodedSize = CGSize(
|
||||
width: CVPixelBufferGetWidth(pixelBuffer), height: CVPixelBufferGetHeight(pixelBuffer))
|
||||
let scale = layer.contentsScale
|
||||
let boundsSize = layer.bounds.size
|
||||
let targetSize = (boundsSize.width > 0 && boundsSize.height > 0)
|
||||
? CGSize(
|
||||
width: (boundsSize.width * scale).rounded(),
|
||||
height: (boundsSize.height * scale).rounded())
|
||||
: decodedSize
|
||||
if layer.drawableSize != targetSize { layer.drawableSize = targetSize }
|
||||
#if DEBUG
|
||||
logSizeIfChanged(decoded: decodedSize, drawable: targetSize)
|
||||
#endif
|
||||
guard let drawable = layer.nextDrawable(),
|
||||
let commandBuffer = queue.makeCommandBuffer()
|
||||
else { return false }
|
||||
|
||||
let pass = MTLRenderPassDescriptor()
|
||||
pass.colorAttachments[0].texture = drawable.texture
|
||||
pass.colorAttachments[0].loadAction = .clear
|
||||
pass.colorAttachments[0].clearColor = MTLClearColor(red: 0, green: 0, blue: 0, alpha: 1)
|
||||
pass.colorAttachments[0].storeAction = .store
|
||||
guard let encoder = commandBuffer.makeRenderCommandEncoder(descriptor: pass) else {
|
||||
return false
|
||||
}
|
||||
encoder.setRenderPipelineState(hdrActive ? pipelineHDR : pipelineSDR)
|
||||
encoder.setFragmentTexture(CVMetalTextureGetTexture(luma), index: 0)
|
||||
encoder.setFragmentTexture(CVMetalTextureGetTexture(chroma), index: 1)
|
||||
encoder.drawPrimitives(type: .triangle, vertexStart: 0, vertexCount: 3)
|
||||
encoder.endEncoding()
|
||||
if let onPresented {
|
||||
#if targetEnvironment(simulator)
|
||||
// The simulator SDK exposes neither addPresentedHandler nor presentedTime — report
|
||||
// nil so the caller stamps with its display-link estimate (the pre-presentedTime
|
||||
// behavior; simulator numbers are indicative only anyway).
|
||||
onPresented(nil)
|
||||
#else
|
||||
// Registered BEFORE present. presentedTime is CACurrentMediaTime-based; 0 means the
|
||||
// system never put this drawable on glass (dropped) — report nil, the caller falls
|
||||
// back to its display-link estimate.
|
||||
drawable.addPresentedHandler { d in
|
||||
onPresented(
|
||||
d.presentedTime > 0
|
||||
? Stage2Pipeline.realtimeNs(forDisplayLinkTimestamp: d.presentedTime)
|
||||
: nil)
|
||||
}
|
||||
#endif
|
||||
}
|
||||
commandBuffer.present(drawable) // present at the next vsync — lowest latency
|
||||
// Hold the CVMetalTextures + source pixel buffer (its IOSurface) alive until the GPU finishes
|
||||
// sampling — releasing them at scope exit could free the backing mid-read.
|
||||
commandBuffer.addCompletedHandler { _ in _ = (luma, chroma, pixelBuffer) }
|
||||
commandBuffer.commit()
|
||||
return true
|
||||
}
|
||||
|
||||
/// Returns the CVMetalTexture (not just its MTLTexture) so the caller can keep it alive past the
|
||||
/// draw — the MTLTexture is only valid while its CVMetalTexture is retained.
|
||||
private func makeTexture(
|
||||
_ pixelBuffer: CVPixelBuffer, plane: Int, format: MTLPixelFormat, cache: CVMetalTextureCache
|
||||
) -> CVMetalTexture? {
|
||||
let w = CVPixelBufferGetWidthOfPlane(pixelBuffer, plane)
|
||||
let h = CVPixelBufferGetHeightOfPlane(pixelBuffer, plane)
|
||||
var cvTexture: CVMetalTexture?
|
||||
let status = CVMetalTextureCacheCreateTextureFromImage(
|
||||
kCFAllocatorDefault, cache, pixelBuffer, nil, format, w, h, plane, &cvTexture)
|
||||
guard status == kCVReturnSuccess, let cvTexture,
|
||||
CVMetalTextureGetTexture(cvTexture) != nil
|
||||
else { return nil }
|
||||
return cvTexture
|
||||
}
|
||||
|
||||
#if DEBUG
|
||||
private func logSizeIfChanged(decoded: CGSize, drawable: CGSize) {
|
||||
let sig = "\(Int(decoded.width))x\(Int(decoded.height))→\(Int(drawable.width))x\(Int(drawable.height))|hdr\(hdrActive ? 1 : 0)"
|
||||
if sig != lastSizeSig {
|
||||
lastSizeSig = sig
|
||||
let msg =
|
||||
"stage2: decoded \(Int(decoded.width))x\(Int(decoded.height)) → drawable \(Int(drawable.width))x\(Int(drawable.height)) hdr=\(hdrActive)"
|
||||
presenterLog.info("\(msg, privacy: .public)")
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
@@ -0,0 +1,36 @@
|
||||
// Synthetic 4:4:4 HEVC keyframes used only by `Stage444Probe` to probe decode capability.
|
||||
//
|
||||
// Each is the first IDR access unit (VPS + SPS + PPS + IDR slice, Annex-B) of a 256×256 HEVC
|
||||
// Range-Extensions clip — `chroma_format_idc = 3` — generated offline with libx265:
|
||||
// ffmpeg -f lavfi -i color=c=gray:s=256x256:r=30:d=0.1 -frames:v 3 \
|
||||
// -pix_fmt yuv444p[10le] -c:v libx265 \
|
||||
// -x265-params keyint=1:min-keyint=1:no-info=1:repeat-headers=1:aud=0 -f hevc out.hevc
|
||||
// 256×256 clears the hardware decoder's minimum-dimension floor (a 16×16 clip is rejected for every
|
||||
// chroma format). Validated to hardware-decode to `444v`/`x444` on Apple Silicon (M3).
|
||||
enum Probe444Blobs {
|
||||
/// 256×256 HEVC Range-Extensions 4:4:4 keyframe (Annex-B): 134 bytes.
|
||||
static let au444_8bit: [UInt8] = [
|
||||
0x00, 0x00, 0x00, 0x01, 0x40, 0x01, 0x0c, 0x01, 0xff, 0xff, 0x04, 0x08, 0x00, 0x00, 0x03, 0x00,
|
||||
0x9e, 0x28, 0x00, 0x00, 0x03, 0x00, 0x00, 0x3c, 0xba, 0x02, 0x40, 0x00, 0x00, 0x00, 0x01, 0x42,
|
||||
0x01, 0x01, 0x04, 0x08, 0x00, 0x00, 0x03, 0x00, 0x9e, 0x28, 0x00, 0x00, 0x03, 0x00, 0x00, 0x3c,
|
||||
0x90, 0x01, 0x01, 0x00, 0x80, 0xb2, 0xdd, 0x49, 0x26, 0x57, 0x80, 0xb4, 0x04, 0x00, 0x00, 0x03,
|
||||
0x00, 0x04, 0x00, 0x00, 0x03, 0x00, 0x78, 0x20, 0x00, 0x00, 0x00, 0x01, 0x44, 0x01, 0xc1, 0x72,
|
||||
0x86, 0x0c, 0x06, 0x24, 0x00, 0x00, 0x00, 0x01, 0x28, 0x01, 0xaf, 0x72, 0x15, 0xe8, 0x34, 0xeb,
|
||||
0xae, 0xfb, 0xfe, 0x75, 0x57, 0xca, 0xc1, 0x71, 0x43, 0x16, 0xf5, 0xc2, 0x40, 0xbd, 0x80, 0xa6,
|
||||
0x65, 0x35, 0x20, 0x28, 0x81, 0xa2, 0x5e, 0xc0, 0x93, 0x04, 0x10, 0x9b, 0x00, 0x34, 0xe0, 0x87,
|
||||
0x00, 0x00, 0x03, 0x00, 0x5b, 0x40,
|
||||
]
|
||||
|
||||
/// 256×256 HEVC Range-Extensions 4:4:4 10-bit keyframe (Annex-B): 133 bytes.
|
||||
static let au444_10bit: [UInt8] = [
|
||||
0x00, 0x00, 0x00, 0x01, 0x40, 0x01, 0x0c, 0x01, 0xff, 0xff, 0x04, 0x08, 0x00, 0x00, 0x03, 0x00,
|
||||
0x9c, 0x28, 0x00, 0x00, 0x03, 0x00, 0x00, 0x3c, 0xba, 0x02, 0x40, 0x00, 0x00, 0x00, 0x01, 0x42,
|
||||
0x01, 0x01, 0x04, 0x08, 0x00, 0x00, 0x03, 0x00, 0x9c, 0x28, 0x00, 0x00, 0x03, 0x00, 0x00, 0x3c,
|
||||
0x90, 0x01, 0x01, 0x00, 0x80, 0x9b, 0x2d, 0xd4, 0x92, 0x65, 0x78, 0x0b, 0x40, 0x40, 0x00, 0x00,
|
||||
0x03, 0x00, 0x40, 0x00, 0x00, 0x07, 0x82, 0x00, 0x00, 0x00, 0x01, 0x44, 0x01, 0xc1, 0x72, 0x86,
|
||||
0x0c, 0x06, 0x24, 0x00, 0x00, 0x00, 0x01, 0x28, 0x01, 0xaf, 0x72, 0x15, 0xe8, 0x34, 0xeb, 0xae,
|
||||
0xfb, 0xfe, 0x75, 0x57, 0xca, 0xc1, 0x71, 0x43, 0x16, 0xf5, 0xc2, 0x40, 0xbd, 0x80, 0xa6, 0x65,
|
||||
0x35, 0x20, 0x28, 0x81, 0xa2, 0x5e, 0xc0, 0x93, 0x04, 0x10, 0x9b, 0x00, 0x34, 0xe0, 0x87, 0x00,
|
||||
0x00, 0x03, 0x00, 0x5b, 0x40,
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,153 @@
|
||||
// Per-session presenter stack shared by the macOS and iOS/tvOS stream views: stage-2 (explicit
|
||||
// VTDecompressionSession decode → CAMetalLayer, driven by the hosting view's CADisplayLink) is the
|
||||
// default; stage-1 (StreamPump → AVSampleBufferDisplayLayer) is the Metal-unavailable / DEBUG
|
||||
// fallback. The views own the platform bits — capture, window/scale tracking, and constructing the
|
||||
// display link — and delegate the shared presenter lifecycle here.
|
||||
//
|
||||
// Main-thread only: start/layout/stop and the display-link tick all run on the main runloop.
|
||||
|
||||
#if canImport(Metal) && canImport(QuartzCore)
|
||||
import AVFoundation
|
||||
import Foundation
|
||||
import QuartzCore
|
||||
|
||||
/// Weak-target wrapper for CADisplayLink. The link retains its target, so targeting a view or
|
||||
/// presenter directly makes a `owner → link → owner` cycle that only `invalidate()` breaks — if a
|
||||
/// teardown is ever missed the owner leaks and keeps ticking. The proxy is what the link retains;
|
||||
/// the handler closure captures the owner `[weak]`, so the owner can deallocate and its `deinit`
|
||||
/// invalidate the link.
|
||||
public final class DisplayLinkProxy: NSObject {
|
||||
private let onTick: (CADisplayLink) -> Void
|
||||
public init(_ onTick: @escaping (CADisplayLink) -> Void) { self.onTick = onTick }
|
||||
@objc public func tick(_ link: CADisplayLink) { onTick(link) }
|
||||
}
|
||||
|
||||
final class SessionPresenter {
|
||||
private var pump: StreamPump?
|
||||
private var stage2: Stage2Pipeline?
|
||||
private var stage2Link: CADisplayLink?
|
||||
private var metalLayer: CAMetalLayer?
|
||||
private var connection: PunktfunkConnection?
|
||||
|
||||
/// Start the presenter for `connection`. `baseLayer` is the view's AVSampleBufferDisplayLayer:
|
||||
/// stage-1 enqueues into it; stage-2 leaves it idle and composites an opaque CAMetalLayer
|
||||
/// sublayer over it. `makeDisplayLink` supplies the platform link (macOS `NSView.displayLink`
|
||||
/// tracks the view's display; iOS/tvOS uses the plain `CADisplayLink` init) — only called when
|
||||
/// stage-2 engages. Call `layout(in:contentsScale:)` right after so the sublayer has a frame
|
||||
/// before the first tick.
|
||||
func start(
|
||||
connection: PunktfunkConnection,
|
||||
baseLayer: AVSampleBufferDisplayLayer,
|
||||
presentMeter: LatencyMeter?,
|
||||
presentTailMeter: LatencyMeter? = nil,
|
||||
makeDisplayLink: (AnyObject, Selector) -> CADisplayLink,
|
||||
onFrame: (@Sendable (AccessUnit) -> Void)?,
|
||||
onSessionEnd: (@Sendable () -> Void)?
|
||||
) {
|
||||
stop()
|
||||
self.connection = connection
|
||||
|
||||
// Presenter choice — stage-2 is the DEFAULT (explicit VTDecompressionSession decode + a
|
||||
// CAMetalLayer/display-link present): it can detect + recover a wedged decoder where
|
||||
// stage-1's AVSampleBufferDisplayLayer freezes hard on a lost HEVC reference. Stage-1 is
|
||||
// reachable only via the DEBUG presenter toggle; release always takes stage-2 (the stage-1
|
||||
// pump below stays the automatic fallback if Metal is missing).
|
||||
#if DEBUG
|
||||
let forceStage1 = UserDefaults.standard.string(forKey: DefaultsKey.presenter) == "stage1"
|
||||
#else
|
||||
let forceStage1 = false
|
||||
#endif
|
||||
if !forceStage1,
|
||||
let pipeline = Stage2Pipeline(
|
||||
presentMeter: presentMeter, presentTailMeter: presentTailMeter) {
|
||||
let metal = pipeline.layer
|
||||
// The opaque metal layer composites OVER the AVSampleBufferDisplayLayer base, which
|
||||
// sits idle (un-enqueued) in stage-2. contentsScale + frame are set in layout().
|
||||
baseLayer.addSublayer(metal)
|
||||
metalLayer = metal
|
||||
stage2 = pipeline
|
||||
let proxy = DisplayLinkProxy { [weak self] link in
|
||||
self?.stage2?.renderTick(
|
||||
targetPresentNs: Stage2Pipeline.realtimeNs(
|
||||
forDisplayLinkTimestamp: link.targetTimestamp))
|
||||
}
|
||||
let link = makeDisplayLink(proxy, #selector(DisplayLinkProxy.tick(_:)))
|
||||
link.add(to: .main, forMode: .common)
|
||||
stage2Link = link
|
||||
syncFrameRate(hz: connection.currentMode().refreshHz)
|
||||
pipeline.start(connection: connection, onFrame: onFrame, onSessionEnd: onSessionEnd)
|
||||
} else {
|
||||
let pump = StreamPump()
|
||||
pump.start(
|
||||
connection: connection, layer: baseLayer,
|
||||
onFrame: onFrame, onSessionEnd: onSessionEnd)
|
||||
self.pump = pump
|
||||
}
|
||||
}
|
||||
|
||||
/// Ask the display link for the stream's own cadence. iOS/tvOS-only: without an explicit
|
||||
/// range, ProMotion devices cap CADisplayLink at 60 Hz (iPhones additionally need
|
||||
/// `CADisableMinimumFrameDurationOnPhone` in Info.plist), so a 120 fps stream would present
|
||||
/// at half rate with the ring silently dropping every other frame. `maximum` allows up to
|
||||
/// 120 so the system MAY tick faster than a sub-120 stream (each extra tick is a near-free
|
||||
/// empty `renderTick`, and presenting on a denser grid shortens the decode→glass wait); the
|
||||
/// macOS NSView link already tracks its display and must NOT be capped to the stream rate.
|
||||
/// Re-applied from `layout` so a mid-session `Reconfigure` picks up a new refresh.
|
||||
private func syncFrameRate(hz: UInt32) {
|
||||
#if !os(macOS)
|
||||
guard hz > 0, let link = stage2Link else { return }
|
||||
let hzF = Float(hz)
|
||||
if link.preferredFrameRateRange.preferred != hzF {
|
||||
link.preferredFrameRateRange = CAFrameRateRange(
|
||||
minimum: min(30, hzF), maximum: max(hzF, 120), preferred: hzF)
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/// Position the stage-2 metal sublayer aspect-fit in the hosting view (the host streams at the
|
||||
/// client's native mode, so this is usually the full bounds; it letterboxes a resized window).
|
||||
/// The layer FRAME + contentsScale set here are what the presenter sizes its drawable from
|
||||
/// (frame × scale) — the shader then performs the decoded→on-screen scale (bicubic luma), so a
|
||||
/// native-mode session stays pixel-exact 1:1 and a mismatched window beats the compositor's
|
||||
/// bilinear. No-op for stage-1 or before start.
|
||||
func layout(in bounds: CGRect, contentsScale: CGFloat) {
|
||||
guard let metalLayer, let connection else { return }
|
||||
let mode = connection.currentMode()
|
||||
syncFrameRate(hz: mode.refreshHz) // track a mid-session Reconfigure's new refresh
|
||||
let fit: CGRect = (mode.width > 0 && mode.height > 0)
|
||||
? AVMakeRect(
|
||||
aspectRatio: CGSize(width: Int(mode.width), height: Int(mode.height)),
|
||||
insideRect: bounds)
|
||||
: bounds
|
||||
// No implicit resize animation; contentsScale tracks the view's backing/display scale.
|
||||
CATransaction.begin()
|
||||
CATransaction.setDisableActions(true)
|
||||
metalLayer.contentsScale = contentsScale
|
||||
metalLayer.frame = fit
|
||||
CATransaction.commit()
|
||||
}
|
||||
|
||||
/// Stop the active pump/pipeline (≤ one poll timeout; stage-2 joins its pump) and detach the
|
||||
/// stage-2 layer + link. Does not close the connection — that stays with whoever owns it.
|
||||
/// Idempotent.
|
||||
func stop() {
|
||||
pump?.stop()
|
||||
pump = nil
|
||||
stage2Link?.invalidate()
|
||||
stage2Link = nil
|
||||
stage2?.stop() // stops the pump (synchronous join) + drops the decode session
|
||||
stage2 = nil
|
||||
metalLayer?.removeFromSuperlayer()
|
||||
metalLayer = nil
|
||||
connection = nil
|
||||
}
|
||||
|
||||
deinit {
|
||||
// The owning view's stop() normally ran already; this covers a missed teardown so the
|
||||
// display link can't keep ticking a deallocated pipeline.
|
||||
stage2Link?.invalidate()
|
||||
stage2?.stop()
|
||||
pump?.stop()
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@@ -0,0 +1,217 @@
|
||||
// Stage-2 presenter orchestrator: a pump thread pulls AUs → VideoDecoder; the decoder's async output
|
||||
// drops the newest decoded frame into a 1-slot ring; the hosting view's display link calls `renderTick`
|
||||
// once per vsync to draw + present the newest ready frame and stamp capture→present. Mirrors
|
||||
// StreamPump's lifecycle (one per start; cancel is permanent).
|
||||
//
|
||||
// Threading: the pump runs on its own thread; the decoder callback on a VT thread; `renderTick` +
|
||||
// `start`/`stop` on the MAIN thread (the view's CADisplayLink fires there). Only the ring (lock-guarded)
|
||||
// and the decoder/presenter (internally locked / main-hopped) cross threads.
|
||||
|
||||
#if canImport(Metal) && canImport(QuartzCore)
|
||||
import AVFoundation
|
||||
import Foundation
|
||||
import QuartzCore
|
||||
|
||||
/// Newest-ready 1-slot ring: the decoder overwrites (drops the older undisplayed frame — lowest
|
||||
/// latency, no smoothing buffer), the display link takes-and-clears. Sendable; lock-guarded.
|
||||
private final class ReadyRing: @unchecked Sendable {
|
||||
private let lock = NSLock()
|
||||
private var frame: ReadyFrame?
|
||||
func submit(_ f: ReadyFrame) {
|
||||
lock.lock(); frame = f; lock.unlock()
|
||||
}
|
||||
func take() -> ReadyFrame? {
|
||||
lock.lock(); defer { lock.unlock() }
|
||||
let f = frame; frame = nil; return f
|
||||
}
|
||||
/// Return a frame the display link took but could not present (a transient `nextDrawable`
|
||||
/// failure). Kept only while the slot is still empty — a newer decoded frame wins, so
|
||||
/// newest-ready ordering is preserved. Without this, a failed render silently LOSES the
|
||||
/// frame, and under the host's infinite GOP a static scene sends no replacement until the
|
||||
/// next damage — the stale picture would persist.
|
||||
func putBack(_ f: ReadyFrame) {
|
||||
lock.lock()
|
||||
if frame == nil { frame = f }
|
||||
lock.unlock()
|
||||
}
|
||||
}
|
||||
|
||||
public final class Stage2Pipeline {
|
||||
private let ring = ReadyRing()
|
||||
private let presenter: MetalVideoPresenter
|
||||
private let decoder: VideoDecoder
|
||||
private let presentMeter: LatencyMeter?
|
||||
private let presentTailMeter: LatencyMeter?
|
||||
private let recovery = KeyframeRecovery()
|
||||
private var token = StopFlag()
|
||||
private var offsetNs: Int64 = 0
|
||||
/// Signalled when the pump thread exits, so `stop()` can join it (bounded) before `decoder.reset()`
|
||||
/// — otherwise a pump iteration already past its `token.isStopped` check can rebuild a decode session
|
||||
/// right after the reset (a brief orphan session). `pumpJoinable` is armed by `start`, consumed by
|
||||
/// the first `stop` (so the idempotent second `stop`/deinit doesn't block on an already-drained
|
||||
/// semaphore). start/stop are sequential lifecycle calls, so the plain flag is safe.
|
||||
private let pumpStopped = DispatchSemaphore(value: 0)
|
||||
private var pumpJoinable = false
|
||||
|
||||
/// The Metal layer the hosting view installs + sizes.
|
||||
public var layer: CAMetalLayer { presenter.layer }
|
||||
|
||||
/// `presentMeter` records capture→present (the glass-to-glass term); `presentTailMeter`
|
||||
/// records decode-completion→present (the ring wait + render — the tail stage-2 exists to
|
||||
/// shorten). Both optional: metering never gates the presenter choice. Returns nil if Metal
|
||||
/// can't be set up (headless / no GPU) — caller falls back to the stage-1 presenter.
|
||||
public init?(presentMeter: LatencyMeter?, presentTailMeter: LatencyMeter? = nil) {
|
||||
guard let presenter = MetalVideoPresenter.make() else { return nil }
|
||||
self.presenter = presenter
|
||||
self.presentMeter = presentMeter
|
||||
self.presentTailMeter = presentTailMeter
|
||||
let ring = ring
|
||||
let recovery = recovery
|
||||
self.decoder = VideoDecoder(
|
||||
onDecoded: { ring.submit($0) },
|
||||
// Async decode failure (a bad P-frame referencing a lost/corrupt IDR): the pump resets to
|
||||
// re-gate on the next IDR, and we ask the host to send one now (infinite GOP — it wouldn't
|
||||
// otherwise come soon). Throttled in KeyframeRecovery.
|
||||
onDecodeError: { _ in recovery.request() })
|
||||
}
|
||||
|
||||
/// Start pulling AUs into the decoder. MAIN THREAD. `onFrame` fires per AU at receipt (capture→client
|
||||
/// meter, exactly as stage-1); `onSessionEnd` on close. `clockOffsetNs` (host minus client) makes the
|
||||
/// present stamp cross-machine valid.
|
||||
public func start(
|
||||
connection: PunktfunkConnection,
|
||||
onFrame: (@Sendable (AccessUnit) -> Void)?,
|
||||
onSessionEnd: (@Sendable () -> Void)?
|
||||
) {
|
||||
offsetNs = connection.clockOffsetNs
|
||||
recovery.bind(connection) // arm host-keyframe recovery for this session
|
||||
token = StopFlag() // fresh token per start — a stop is permanent (like StreamPump)
|
||||
|
||||
// Configure the decoder's chroma + the layer's initial colorimetry before the first frame. The
|
||||
// chroma subsampling drives only the decode pixel format (orthogonal to HDR/depth); the HDR
|
||||
// config is the Welcome's latched value, which a mid-session flip then overrides per-frame.
|
||||
decoder.setChroma444(connection.isChroma444)
|
||||
decoder.setCodec(connection.videoCodec)
|
||||
presenter.configure(hdr: connection.isHDR)
|
||||
|
||||
let token = token
|
||||
let decoder = decoder
|
||||
let recovery = recovery
|
||||
let presenter = presenter
|
||||
let pumpStopped = pumpStopped
|
||||
let thread = Thread {
|
||||
defer { pumpStopped.signal() } // let stop() join the pump (bounded) before decoder.reset()
|
||||
var format: CMVideoFormatDescription?
|
||||
var lastFramesDropped = connection.framesDropped()
|
||||
// Persistent recovery WANT, not a one-shot edge (see StreamPump for the full rationale):
|
||||
// keep asking until an IDR lands so a request swallowed by the throttle is re-sent.
|
||||
var awaitingIDR = false
|
||||
// 4:4:4 backstop: a run of decode/create failures in a 4:4:4 session means this device can't
|
||||
// decode 4:4:4 at the negotiated resolution (the HW probe clears the common case but not a
|
||||
// resolution-ceiling miss). End cleanly instead of looping on a black screen.
|
||||
var decodeFailRun = 0
|
||||
while !token.isStopped {
|
||||
do {
|
||||
// Loss recovery (the primary path). The reassembler drops unrecoverable AUs and the
|
||||
// decoder conceals the reference-missing deltas — often WITHOUT an error callback —
|
||||
// so key off the drop count climbing, then keep asking (awaitingIDR) until a fresh
|
||||
// IDR re-anchors decode.
|
||||
let dropped = connection.framesDropped()
|
||||
if dropped > lastFramesDropped {
|
||||
lastFramesDropped = dropped
|
||||
awaitingIDR = true
|
||||
}
|
||||
if awaitingIDR { recovery.request() }
|
||||
// Drain HDR mastering metadata (0xCE) and hand it to the PRESENTER (→ CAEDRMetadata).
|
||||
// Polled UNCONDITIONALLY (not gated on connection.isHDR, the fixed Welcome flag): the
|
||||
// host sends 0xCE only for HDR, INCLUDING a mid-session SDR→HDR transition (a game
|
||||
// entering HDR — the host re-inits its encoder) the Welcome flag would never reflect.
|
||||
// Non-blocking; nil for an SDR stream.
|
||||
if let meta = try? connection.nextHdrMeta(timeoutMs: 0) {
|
||||
presenter.setHdrMeta(meta)
|
||||
}
|
||||
guard let au = try connection.nextAU(timeoutMs: 100) else { continue }
|
||||
onFrame?(au)
|
||||
if let f = AnnexB.formatDescription(fromIDR: au.data, codec: connection.videoCodec) {
|
||||
format = f // refreshed on every IDR (mode changes included)
|
||||
awaitingIDR = false // a fresh IDR re-anchored decode — recovery complete
|
||||
}
|
||||
guard let f = format, !token.isStopped else { continue }
|
||||
if decoder.decode(au: au, format: f) {
|
||||
decodeFailRun = 0
|
||||
} else {
|
||||
// Submit/decoder error: drop the session and re-gate on the next IDR's in-band
|
||||
// parameter sets (a delta frame can't recover) and keep asking for that IDR.
|
||||
decoder.reset()
|
||||
awaitingIDR = true
|
||||
decodeFailRun += 1
|
||||
// ~3 s of solid failure in a 4:4:4 session (and only there — a 4:2:0 loss
|
||||
// recovers within a GOP) ⇒ 4:4:4 isn't decodable here; end the session.
|
||||
if connection.isChroma444, decodeFailRun >= 180 {
|
||||
if !token.isStopped { onSessionEnd?() }
|
||||
break
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
if !token.isStopped { onSessionEnd?() }
|
||||
break // session closed
|
||||
}
|
||||
}
|
||||
}
|
||||
thread.name = "punktfunk-stage2-pump"
|
||||
thread.qualityOfService = .userInteractive
|
||||
pumpJoinable = true
|
||||
thread.start()
|
||||
}
|
||||
|
||||
/// MAIN thread, once per vsync. Present the newest ready frame (if any). The latency stamps
|
||||
/// use the drawable's ACTUAL on-glass instant (`addPresentedHandler`/`presentedTime` — the
|
||||
/// handler fires on a Metal callback thread; the meters are thread-safe), falling back to
|
||||
/// `targetPresentNs` — the display link's target present instant, already converted to
|
||||
/// `CLOCK_REALTIME` (see `realtimeNs(forDisplayLinkTimestamp:)`) — when the system reports
|
||||
/// no presented time (a dropped drawable). A frame that could not be rendered (no drawable
|
||||
/// yet) goes back into the ring so the next tick retries it.
|
||||
public func renderTick(targetPresentNs: Int64) {
|
||||
guard let frame = ring.take() else { return }
|
||||
let offsetNs = offsetNs
|
||||
let presentMeter = presentMeter
|
||||
let presentTailMeter = presentTailMeter
|
||||
let rendered = presenter.render(frame.pixelBuffer, isHDR: frame.isHDR) { presentedNs in
|
||||
let atNs = presentedNs ?? targetPresentNs
|
||||
presentMeter?.record(ptsNs: frame.ptsNs, atNs: atNs, offsetNs: offsetNs)
|
||||
// Present tail = decode-completion → on-glass. Both instants are client
|
||||
// CLOCK_REALTIME, so no skew offset applies.
|
||||
presentTailMeter?.record(ptsNs: UInt64(frame.decodedNs), atNs: atNs, offsetNs: 0)
|
||||
}
|
||||
if !rendered { ring.putBack(frame) }
|
||||
}
|
||||
|
||||
/// Stop the pump (≤ one poll timeout) and drop the decode session. MAIN THREAD; idempotent. Does not
|
||||
/// close the connection. A restart needs a fresh Stage2Pipeline (the stop is permanent).
|
||||
public func stop() {
|
||||
token.stop()
|
||||
// Join the pump (bounded: ≤ one nextAU poll + an in-flight decode) before resetting the decoder,
|
||||
// so the pump can't rebuild a session right after the reset. Only the first stop joins; a
|
||||
// repeat/deinit stop skips the already-drained semaphore.
|
||||
if pumpJoinable {
|
||||
pumpJoinable = false
|
||||
_ = pumpStopped.wait(timeout: .now() + 0.5)
|
||||
}
|
||||
decoder.reset()
|
||||
recovery.bind(nil) // stop requesting keyframes once the session is torn down
|
||||
}
|
||||
|
||||
deinit { token.stop() }
|
||||
|
||||
/// Convert a `CADisplayLink.targetTimestamp` (CACurrentMediaTime basis) to a `CLOCK_REALTIME`
|
||||
/// nanosecond instant — the present clock the AU pts + skew offset live in. Projects to the target
|
||||
/// present time (when the frame is actually on glass), not the moment we drew.
|
||||
public static func realtimeNs(forDisplayLinkTimestamp t: CFTimeInterval) -> Int64 {
|
||||
let caNow = CACurrentMediaTime()
|
||||
var ts = timespec()
|
||||
clock_gettime(CLOCK_REALTIME, &ts)
|
||||
let realtimeNow = Int64(ts.tv_sec) * 1_000_000_000 + Int64(ts.tv_nsec)
|
||||
return realtimeNow + Int64((t - caNow) * 1_000_000_000)
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@@ -0,0 +1,83 @@
|
||||
// Runtime 4:4:4 HEVC decode-capability probe.
|
||||
//
|
||||
// We advertise `VIDEO_CAP_444` (so the host upgrades to a full-chroma 4:4:4 stream) ONLY when this
|
||||
// device can decode 4:4:4 HEVC *in hardware* — software 4:4:4 decode works but is far too slow for a
|
||||
// real-time stream at the negotiated resolution, so a software-only device must keep 4:2:0.
|
||||
//
|
||||
// `VTIsHardwareDecodeSupported(HEVC)` and the HEVC-decoder-capabilities dictionary report HEVC HW
|
||||
// decode but expose nothing about `chroma_format_idc`, so the only reliable signal is to actually
|
||||
// create a *hardware-required* `VTDecompressionSession` for a tiny synthetic 4:4:4 keyframe and
|
||||
// confirm it both creates and decodes to the expected biplanar 4:4:4 pixel format. Validated on an
|
||||
// Apple M3 (HW 4:4:4 8- and 10-bit decode to `444v`/`x444`); a software-only decoder fails the
|
||||
// hardware-required create and we fall back to 4:2:0.
|
||||
//
|
||||
// The probe blobs are 256×256 (above the hardware decoder's minimum-dimension floor — a 16×16 clip
|
||||
// is rejected for ALL chroma formats, including 4:2:0) HEVC Range-Extensions keyframes generated
|
||||
// offline with libx265; see scripts notes. Results are cached (device-static) in lazy statics.
|
||||
|
||||
import CoreMedia
|
||||
import CoreVideo
|
||||
import Foundation
|
||||
import VideoToolbox
|
||||
|
||||
public enum Stage444Probe {
|
||||
/// True iff this device hardware-decodes 8-bit 4:4:4 HEVC (the host's current 4:4:4 path —
|
||||
/// BT.709 limited `yuv444p`). Cached after first evaluation.
|
||||
public static let hwDecode444_8bit: Bool = probeHardware444(
|
||||
au: Probe444Blobs.au444_8bit,
|
||||
want: kCVPixelFormatType_444YpCbCr8BiPlanarVideoRange,
|
||||
fullRangeSibling: kCVPixelFormatType_444YpCbCr8BiPlanarFullRange)
|
||||
|
||||
/// True iff this device hardware-decodes 10-bit 4:4:4 HEVC (the 4:4:4 ∩ HDR/10-bit intersection).
|
||||
/// Cached after first evaluation.
|
||||
public static let hwDecode444_10bit: Bool = probeHardware444(
|
||||
au: Probe444Blobs.au444_10bit,
|
||||
want: kCVPixelFormatType_444YpCbCr10BiPlanarVideoRange,
|
||||
fullRangeSibling: kCVPixelFormatType_444YpCbCr10BiPlanarFullRange)
|
||||
|
||||
/// Create a hardware-REQUIRED `VTDecompressionSession` for the synthetic 4:4:4 keyframe and
|
||||
/// decode it, returning true only when the decoder produces the expected (video- or full-range)
|
||||
/// biplanar 4:4:4 pixel format. Any failure (no hardware path, wrong output format, decode error)
|
||||
/// → false → we keep 4:2:0.
|
||||
private static func probeHardware444(
|
||||
au auBytes: [UInt8], want: OSType, fullRangeSibling: OSType
|
||||
) -> Bool {
|
||||
let data = Data(auBytes)
|
||||
guard let format = AnnexB.formatDescription(fromIDR: data, codec: .hevc) else { return false }
|
||||
// Require a hardware decoder — a software false-positive would make us advertise 4:4:4 and
|
||||
// then decode every real frame on the CPU, blowing the latency budget.
|
||||
let spec: [CFString: Any] = [
|
||||
kVTVideoDecoderSpecification_RequireHardwareAcceleratedVideoDecoder: true,
|
||||
]
|
||||
let attrs: [CFString: Any] = [
|
||||
kCVPixelBufferPixelFormatTypeKey: want,
|
||||
kCVPixelBufferMetalCompatibilityKey: true,
|
||||
]
|
||||
var session: VTDecompressionSession?
|
||||
let created = VTDecompressionSessionCreate(
|
||||
allocator: kCFAllocatorDefault, formatDescription: format,
|
||||
decoderSpecification: spec as CFDictionary, imageBufferAttributes: attrs as CFDictionary,
|
||||
outputCallback: nil, decompressionSessionOut: &session)
|
||||
guard created == noErr, let session else { return false }
|
||||
defer { VTDecompressionSessionInvalidate(session) }
|
||||
|
||||
let au = AccessUnit(data: data, ptsNs: 0, frameIndex: 0, flags: 0)
|
||||
guard let sample = AnnexB.sampleBuffer(au: au, format: format, codec: .hevc) else { return false }
|
||||
|
||||
var produced: OSType = 0
|
||||
let done = DispatchSemaphore(value: 0)
|
||||
let status = VTDecompressionSessionDecodeFrame(
|
||||
session, sampleBuffer: sample,
|
||||
flags: [._EnableAsynchronousDecompression], infoFlagsOut: nil
|
||||
) { status, _, imageBuffer, _, _ in
|
||||
if status == noErr, let imageBuffer {
|
||||
produced = CVPixelBufferGetPixelFormatType(imageBuffer)
|
||||
}
|
||||
done.signal()
|
||||
}
|
||||
guard status == noErr else { return false }
|
||||
VTDecompressionSessionWaitForAsynchronousFrames(session)
|
||||
_ = done.wait(timeout: .now() + 1.0)
|
||||
return produced == want || produced == fullRangeSibling
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,121 @@
|
||||
// The platform-independent heart of the presenters: one thread pulling AUs from the
|
||||
// connection into an AVSampleBufferDisplayLayer, with the format description refreshed
|
||||
// on every IDR (the host opens with an IDR carrying in-band parameter sets; recovery
|
||||
// keyframes re-send them — there is no out-of-band extradata, ever). Shared by the
|
||||
// macOS StreamLayerView and the iOS/iPadOS stream view.
|
||||
|
||||
import AVFoundation
|
||||
import Foundation
|
||||
import os
|
||||
|
||||
private let pumpLog = Logger(subsystem: "io.unom.punktfunk", category: "video")
|
||||
|
||||
/// One pump per instance; create a fresh StreamPump per start (the stop is permanent —
|
||||
/// a restart hands the old pump its own token, so it can never be revived by a newer start()).
|
||||
final class StreamPump {
|
||||
private let token = StopFlag()
|
||||
|
||||
/// Pump thread: pull AUs, wrap, enqueue. Non-IDR AUs before the first format
|
||||
/// description are dropped. `onFrame`/`onSessionEnd` fire on the pump thread.
|
||||
func start(
|
||||
connection: PunktfunkConnection,
|
||||
layer: AVSampleBufferDisplayLayer,
|
||||
onFrame: (@Sendable (AccessUnit) -> Void)?,
|
||||
onSessionEnd: (@Sendable () -> Void)?
|
||||
) {
|
||||
let token = token
|
||||
// Coalesced host keyframe requests (100 ms throttle — see KeyframeRecovery).
|
||||
let recovery = KeyframeRecovery()
|
||||
recovery.bind(connection)
|
||||
// The layer is non-Sendable but its enqueue/flush are documented thread-safe, and after
|
||||
// this point only the pump thread drives it — assert that so the @Sendable Thread closure
|
||||
// may capture it.
|
||||
nonisolated(unsafe) let layer = layer
|
||||
layer.flush() // drop any frames a previous connection left queued
|
||||
|
||||
let thread = Thread {
|
||||
var format: CMVideoFormatDescription?
|
||||
var lastFramesDropped = connection.framesDropped()
|
||||
// Recovery is a persistent WANT, not a one-shot edge: set it on detected loss (or a
|
||||
// decoder reset), retry the throttled request EVERY iteration, and clear it only when a
|
||||
// fresh IDR actually re-anchors decode. The old code advanced `lastFramesDropped` on the
|
||||
// same edge it fired the throttled request — so a request swallowed by the throttle (a
|
||||
// second drop within the window, e.g. the lost recovery IDR itself being pruned) was
|
||||
// never re-sent: the counter went flat, the climb never re-fired, and the picture stayed
|
||||
// frozen for good while audio kept playing. The iPhone's lossy Wi-Fi hits this where the
|
||||
// Mac's Ethernet never does.
|
||||
var awaitingIDR = false
|
||||
var awaitingSince = Date.distantPast // when the current recovery began (for the resume log)
|
||||
var wasFailed = false
|
||||
while !token.isStopped {
|
||||
do {
|
||||
// Loss recovery (the primary path). Under the host's infinite GOP the only
|
||||
// recovery keyframe is one we request. The reassembler drops unrecoverable AUs
|
||||
// (framesDropped); the decoder then *conceals* the reference-missing deltas — a
|
||||
// frozen / garbage picture that never flips the layer to .failed — so key off the
|
||||
// drop count climbing, then keep asking (awaitingIDR) until an IDR lands. Polled
|
||||
// every iteration so a total-loss drought still recovers when packets resume.
|
||||
let dropped = connection.framesDropped()
|
||||
if dropped > lastFramesDropped {
|
||||
// Log only on the false→true transition (once per recovery cycle), not per
|
||||
// dropped AU, so heavy loss doesn't spam the log.
|
||||
if !awaitingIDR {
|
||||
awaitingSince = Date()
|
||||
pumpLog.notice(
|
||||
"video: unrecoverable drop (framesDropped=\(dropped, privacy: .public)) — requesting recovery IDR")
|
||||
}
|
||||
lastFramesDropped = dropped
|
||||
awaitingIDR = true
|
||||
}
|
||||
if awaitingIDR { recovery.request() }
|
||||
|
||||
guard let au = try connection.nextAU(timeoutMs: 100) else { continue }
|
||||
onFrame?(au)
|
||||
let idrFormat = AnnexB.formatDescription(fromIDR: au.data, codec: connection.videoCodec)
|
||||
if let f = idrFormat {
|
||||
format = f // refreshed on every IDR (mode changes included)
|
||||
if awaitingIDR {
|
||||
let ms = Int(Date().timeIntervalSince(awaitingSince) * 1000)
|
||||
pumpLog.notice("video: recovery IDR received — resumed after \(ms, privacy: .public) ms")
|
||||
}
|
||||
awaitingIDR = false // a fresh IDR re-anchored decode — recovery complete
|
||||
}
|
||||
let failed = layer.status == .failed
|
||||
if failed {
|
||||
// Decode wedged hard (the cold-first-connect case — a lost/corrupt opening
|
||||
// IDR): flush and, unless THIS AU is the recovering IDR (re-anchored above),
|
||||
// re-gate on the next in-band parameter sets and keep asking — enqueuing a
|
||||
// delta into a failed layer can't recover it.
|
||||
if !wasFailed { pumpLog.warning("video: display layer .failed — flushing + re-anchoring") }
|
||||
layer.flush()
|
||||
if idrFormat == nil {
|
||||
format = nil
|
||||
awaitingIDR = true
|
||||
}
|
||||
}
|
||||
wasFailed = failed
|
||||
guard let f = format,
|
||||
let sample = AnnexB.sampleBuffer(au: au, format: f, codec: connection.videoCodec),
|
||||
!token.isStopped // don't enqueue a stale frame after a restart
|
||||
else { continue }
|
||||
layer.enqueue(sample)
|
||||
} catch {
|
||||
if !token.isStopped {
|
||||
onSessionEnd?()
|
||||
}
|
||||
break // session closed
|
||||
}
|
||||
}
|
||||
}
|
||||
thread.name = "punktfunk-pump"
|
||||
thread.qualityOfService = .userInteractive
|
||||
thread.start()
|
||||
}
|
||||
|
||||
/// Stop pumping (≤ one poll timeout). Does not close the connection.
|
||||
func stop() {
|
||||
token.stop()
|
||||
}
|
||||
|
||||
deinit { token.stop() }
|
||||
}
|
||||
@@ -0,0 +1,247 @@
|
||||
// Stage-2 presenter, decode half: explicit VideoToolbox decode of the host's HEVC AUs.
|
||||
//
|
||||
// Stage-1 hands compressed samples to AVSampleBufferDisplayLayer, which decodes AND presents
|
||||
// internally with no per-frame callback — so neither decode-completion nor present can be
|
||||
// stamped, and frames can't be hand-paced. Here we drive VTDecompressionSession ourselves: the
|
||||
// output callback delivers a decoded CVPixelBuffer, we stamp decode-completion, and push it into
|
||||
// a ready ring the presenter's display link drains. See docs apple-stage2-presenter.md.
|
||||
|
||||
import CoreMedia
|
||||
import CoreVideo
|
||||
import Foundation
|
||||
import VideoToolbox
|
||||
|
||||
/// One decoded frame waiting to be presented. Owns a retained `CVPixelBuffer` until shown.
|
||||
public struct ReadyFrame: @unchecked Sendable {
|
||||
/// Host capture clock (the AU's pts), in nanoseconds.
|
||||
public let ptsNs: UInt64
|
||||
/// Client `CLOCK_REALTIME` instant decode completed, in nanoseconds.
|
||||
public let decodedNs: Int64
|
||||
/// The decoded image — 8-bit NV12 biplanar (SDR) or 10-bit P010 biplanar (HDR), Metal-compatible.
|
||||
public let pixelBuffer: CVPixelBuffer
|
||||
/// True when the stream is HDR (BT.2020 PQ): the buffer is 10-bit P010 and the presenter must
|
||||
/// configure EDR + BT.2020 PQ output. Derived from the decoded buffer's pixel format.
|
||||
public let isHDR: Bool
|
||||
}
|
||||
|
||||
/// The C output callback can't capture context, so VideoToolbox hands it the refcon we set at
|
||||
/// session creation — a pointer back to the owning `VideoDecoder`.
|
||||
private let decoderOutputCallback: VTDecompressionOutputCallback = {
|
||||
refcon, _, status, _, imageBuffer, pts, _ in
|
||||
guard let refcon else { return }
|
||||
Unmanaged<VideoDecoder>.fromOpaque(refcon)
|
||||
.takeUnretainedValue()
|
||||
.handleDecoded(status: status, imageBuffer: imageBuffer, pts: pts)
|
||||
}
|
||||
|
||||
/// Owns a `VTDecompressionSession` rebuilt whenever the format description changes (every IDR /
|
||||
/// mode change, the same trigger stage-1 uses). Thread-safe: `decode` runs on the pump thread,
|
||||
/// the output callback on a VT-managed thread; the only shared mutable state is the session +
|
||||
/// format, guarded by `lock`. `@unchecked Sendable` — the lock enforces the contract.
|
||||
public final class VideoDecoder: @unchecked Sendable {
|
||||
private let lock = NSLock()
|
||||
private var session: VTDecompressionSession?
|
||||
private var format: CMVideoFormatDescription?
|
||||
|
||||
/// Called on the VT thread for each successfully decoded frame — stamp + enqueue, don't block.
|
||||
private let onDecoded: @Sendable (ReadyFrame) -> Void
|
||||
/// Called on the VT thread when a frame fails to decode (bad data / decoder reset) so the
|
||||
/// pump can re-gate on the next IDR.
|
||||
private let onDecodeError: @Sendable (OSStatus) -> Void
|
||||
|
||||
/// Whether the negotiated stream is full-chroma 4:4:4 (`connection.isChroma444`), set once at
|
||||
/// session start before any decode. Selects the 4:4:4 decode pixel format (orthogonal to bit
|
||||
/// depth / HDR). Read inside `createSessionLocked` under `lock`.
|
||||
private var chroma444 = false
|
||||
|
||||
/// The negotiated codec (`connection.videoCodec`), set once at session start. Drives the AnnexB
|
||||
/// NAL parsing (H.264 vs HEVC parameter sets). Read under `lock`.
|
||||
private var codec: VideoCodec = .hevc
|
||||
|
||||
public init(
|
||||
onDecoded: @escaping @Sendable (ReadyFrame) -> Void,
|
||||
onDecodeError: @escaping @Sendable (OSStatus) -> Void = { _ in }
|
||||
) {
|
||||
self.onDecoded = onDecoded
|
||||
self.onDecodeError = onDecodeError
|
||||
}
|
||||
|
||||
deinit { teardown() }
|
||||
|
||||
/// Select the chroma subsampling of the decode output (4:2:0 vs full-chroma 4:4:4). Call once at
|
||||
/// session start, before decoding, from `connection.isChroma444`. Takes effect on the next
|
||||
/// session (re)build. Thread-safe.
|
||||
public func setChroma444(_ on: Bool) {
|
||||
lock.lock()
|
||||
chroma444 = on
|
||||
lock.unlock()
|
||||
}
|
||||
|
||||
/// Select the negotiated codec (H.264 vs HEVC). Call once at session start, before decoding,
|
||||
/// from `connection.videoCodec`. Thread-safe.
|
||||
public func setCodec(_ c: VideoCodec) {
|
||||
lock.lock()
|
||||
codec = c
|
||||
lock.unlock()
|
||||
}
|
||||
|
||||
/// Submit one AU for asynchronous decode, (re)creating the session if `format` changed. The
|
||||
/// caller resolves `format` from the IDR exactly as stage-1 does (`AnnexB.formatDescription`).
|
||||
/// Returns false if the session couldn't be created or the frame couldn't be submitted.
|
||||
@discardableResult
|
||||
public func decode(au: AccessUnit, format newFormat: CMVideoFormatDescription) -> Bool {
|
||||
lock.lock()
|
||||
let needsNew: Bool = {
|
||||
guard let session, let format else { return true }
|
||||
if CMFormatDescriptionEqual(format, otherFormatDescription: newFormat) { return false }
|
||||
// A new desc that the live session can still accept (rare for HEVC) avoids a rebuild.
|
||||
return !VTDecompressionSessionCanAcceptFormatDescription(session, formatDescription: newFormat)
|
||||
}()
|
||||
if needsNew, !createSessionLocked(format: newFormat) {
|
||||
lock.unlock()
|
||||
return false
|
||||
}
|
||||
// Submit WHILE holding the lock so a concurrent reset()/teardown (main thread) can't
|
||||
// invalidate the session between here and DecodeFrame. The VT output callback takes the
|
||||
// ring lock, not this one, so there's no re-entrancy. DecodeFrame is async — non-blocking.
|
||||
guard let session,
|
||||
let sample = AnnexB.sampleBuffer(au: au, format: newFormat, codec: codec)
|
||||
else { lock.unlock(); return false }
|
||||
var infoOut = VTDecodeInfoFlags()
|
||||
let status = VTDecompressionSessionDecodeFrame(
|
||||
session,
|
||||
sampleBuffer: sample,
|
||||
flags: [._EnableAsynchronousDecompression],
|
||||
frameRefcon: nil,
|
||||
infoFlagsOut: &infoOut)
|
||||
lock.unlock()
|
||||
if status != noErr {
|
||||
onDecodeError(status)
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
/// Drop the session — the next `decode` rebuilds it. Used on stop and to recover from a
|
||||
/// wedged decoder (re-gates on the next in-band parameter sets, like stage-1's flush).
|
||||
public func reset() {
|
||||
lock.lock()
|
||||
teardownLocked()
|
||||
lock.unlock()
|
||||
}
|
||||
|
||||
private func teardown() {
|
||||
lock.lock()
|
||||
teardownLocked()
|
||||
lock.unlock()
|
||||
}
|
||||
|
||||
private func teardownLocked() {
|
||||
if let session {
|
||||
VTDecompressionSessionWaitForAsynchronousFrames(session)
|
||||
VTDecompressionSessionInvalidate(session)
|
||||
}
|
||||
session = nil
|
||||
format = nil
|
||||
}
|
||||
|
||||
/// True when `newFormat` carries a PQ (SMPTE ST 2084) or HLG transfer function — i.e. the host
|
||||
/// is sending HDR (BT.2020). VideoToolbox populates the transfer-function extension from the
|
||||
/// HEVC VUI, so this picks the decode bit depth (10-bit P010/x444 vs 8-bit NV12/444v) from the
|
||||
/// stream — and can flip mid-session (a game entering HDR re-inits the host encoder). The
|
||||
/// presenter follows the decoded frame's resulting `isHDR`, not the Welcome's latched flag
|
||||
/// (`render` reconciles the layer per frame via the idempotent `configure(hdr:)`).
|
||||
static func isHDRFormat(_ format: CMVideoFormatDescription) -> Bool {
|
||||
guard
|
||||
let tf = CMFormatDescriptionGetExtension(
|
||||
format, extensionKey: kCMFormatDescriptionExtension_TransferFunction)
|
||||
else { return false }
|
||||
let s = tf as? String
|
||||
return s == (kCMFormatDescriptionTransferFunction_SMPTE_ST_2084_PQ as String)
|
||||
|| s == (kCMFormatDescriptionTransferFunction_ITU_R_2100_HLG as String)
|
||||
}
|
||||
|
||||
/// `lock` held. Replace the session with one for `newFormat`. SDR streams decode to 8-bit NV12;
|
||||
/// HDR streams (BT.2020 PQ) decode to 10-bit P010 so the presenter can drive EDR.
|
||||
private func createSessionLocked(format newFormat: CMVideoFormatDescription) -> Bool {
|
||||
if let session {
|
||||
VTDecompressionSessionWaitForAsynchronousFrames(session)
|
||||
VTDecompressionSessionInvalidate(session)
|
||||
}
|
||||
session = nil
|
||||
format = nil
|
||||
|
||||
// Decode pixel format is a 2×2 of (chroma, depth/HDR), both biplanar so the presenter binds
|
||||
// plane 0 = luma, plane 1 = interleaved chroma uniformly — 4:4:4 just delivers a full-size
|
||||
// chroma plane. 10-bit (P010 / `x444`) for HDR (PQ/HLG), 8-bit (NV12 / `444v`) otherwise.
|
||||
let hdr = Self.isHDRFormat(newFormat)
|
||||
let pixelFormat: OSType = {
|
||||
switch (chroma444, hdr) {
|
||||
case (false, false): return kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange // NV12
|
||||
case (false, true): return kCVPixelFormatType_420YpCbCr10BiPlanarVideoRange // P010
|
||||
case (true, false): return kCVPixelFormatType_444YpCbCr8BiPlanarVideoRange // 444v
|
||||
case (true, true): return kCVPixelFormatType_444YpCbCr10BiPlanarVideoRange // x444
|
||||
}
|
||||
}()
|
||||
let imageAttrs: [CFString: Any] = [
|
||||
kCVPixelBufferMetalCompatibilityKey: true,
|
||||
kCVPixelBufferPixelFormatTypeKey: pixelFormat,
|
||||
]
|
||||
var callback = VTDecompressionOutputCallbackRecord(
|
||||
decompressionOutputCallback: decoderOutputCallback,
|
||||
decompressionOutputRefCon: Unmanaged.passUnretained(self).toOpaque())
|
||||
// 4:4:4 sessions REQUIRE a hardware decoder: we only advertise 4:4:4 when the hardware probe
|
||||
// passed, so a hardware-incapable mode (e.g. a resolution past the HW 4:4:4 ceiling) must fail
|
||||
// HERE, synchronously, letting the pump's backstop end the session — rather than silently
|
||||
// falling back to a software 4:4:4 decoder far too slow for a real-time stream. 4:2:0 keeps the
|
||||
// software fallback (nil spec) as a robustness net.
|
||||
let spec: CFDictionary? =
|
||||
chroma444
|
||||
? [kVTVideoDecoderSpecification_RequireHardwareAcceleratedVideoDecoder: true] as CFDictionary
|
||||
: nil
|
||||
var newSession: VTDecompressionSession?
|
||||
let status = VTDecompressionSessionCreate(
|
||||
allocator: kCFAllocatorDefault,
|
||||
formatDescription: newFormat,
|
||||
decoderSpecification: spec,
|
||||
imageBufferAttributes: imageAttrs as CFDictionary,
|
||||
outputCallback: &callback,
|
||||
decompressionSessionOut: &newSession)
|
||||
guard status == noErr, let newSession else { return false }
|
||||
// Real-time hint: schedule this session for live-streaming latency rather than
|
||||
// throughput/efficiency. Best-effort — decoders that don't support the property
|
||||
// return an error, which is fine to ignore.
|
||||
VTSessionSetProperty(
|
||||
newSession, key: kVTDecompressionPropertyKey_RealTime, value: kCFBooleanTrue)
|
||||
session = newSession
|
||||
format = newFormat
|
||||
return true
|
||||
}
|
||||
|
||||
/// VT thread. Stamp decode-completion and enqueue, or report the error.
|
||||
fileprivate func handleDecoded(status: OSStatus, imageBuffer: CVImageBuffer?, pts: CMTime) {
|
||||
guard status == noErr, let imageBuffer else {
|
||||
onDecodeError(status)
|
||||
return
|
||||
}
|
||||
var ts = timespec()
|
||||
clock_gettime(CLOCK_REALTIME, &ts)
|
||||
let decodedNs = Int64(ts.tv_sec) * 1_000_000_000 + Int64(ts.tv_nsec)
|
||||
// pts was stamped at timescale 1e9 (AnnexB.sampleBuffer); normalize defensively.
|
||||
let p = CMTimeConvertScale(pts, timescale: 1_000_000_000, method: .default)
|
||||
let ptsNs = p.value > 0 ? UInt64(p.value) : 0
|
||||
// HDR iff the decoder produced a 10-bit buffer (we only request a 10-bit format for PQ/HLG
|
||||
// streams). Covers 4:2:0 (P010) and 4:4:4 (`x444`), video- and full-range, so a 10-bit 4:4:4
|
||||
// HDR frame isn't misclassified as SDR. (The mastering metadata is applied to the presenter's
|
||||
// CAMetalLayer via CAEDRMetadata, not to this source buffer — a separate-drawable presenter
|
||||
// never composites the source buffer's attachments, so attaching them here would be dead.)
|
||||
let fmt = CVPixelBufferGetPixelFormatType(imageBuffer)
|
||||
let isHDR =
|
||||
fmt == kCVPixelFormatType_420YpCbCr10BiPlanarVideoRange
|
||||
|| fmt == kCVPixelFormatType_420YpCbCr10BiPlanarFullRange
|
||||
|| fmt == kCVPixelFormatType_444YpCbCr10BiPlanarVideoRange
|
||||
|| fmt == kCVPixelFormatType_444YpCbCr10BiPlanarFullRange
|
||||
onDecoded(
|
||||
ReadyFrame(ptsNs: ptsNs, decodedNs: decodedNs, pixelBuffer: imageBuffer, isHDR: isHDR))
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user