// Real-bitstream proof of the decode-prep path: VTCompressionSession encodes HEVC, we // rebuild the host's wire shape (Annex-B AU with in-band VPS/SPS/PPS — exactly what // punktfunk-host emits on every IDR), run it through AnnexB, and hand the result to a real // VTDecompressionSession. Pixels out = the whole client decode path is sound. import AVFoundation import CoreMedia import VideoToolbox import XCTest @testable import PunktfunkKit /// Sendable holder for the values the (background-thread) decode callback writes. private final class FrameBox: @unchecked Sendable { let lock = NSLock() var frame: ReadyFrame? var error: OSStatus? } final class VideoToolboxRoundTripTests: XCTestCase { private let width = 320 private let height = 240 func testEncodeAnnexBDecodeRoundTrip() throws { let (formatDesc, avccSample) = try encodeOneHEVCKeyframe() // Rebuild the host's wire format: Annex-B AU, parameter sets in-band before the VCL. let annexB = try annexBAU(formatDesc: formatDesc, avccSample: avccSample) // 1) Parameter-set extraction → format description. let rebuilt = try XCTUnwrap( AnnexB.formatDescription(fromIDR: annexB), "in-band VPS/SPS/PPS should yield a format description") let dims = CMVideoFormatDescriptionGetDimensions(rebuilt) XCTAssertEqual(Int(dims.width), width) XCTAssertEqual(Int(dims.height), height) // 2) Annex-B → AVCC re-pack must reproduce the encoder's own sample bytes. XCTAssertEqual(AnnexB.avcc(from: annexB), avccSample) // 3) Sample buffer → real decoder → pixels. let au = AccessUnit(data: annexB, ptsNs: 1_000_000, frameIndex: 0, flags: 0) let sample = try XCTUnwrap(AnnexB.sampleBuffer(au: au, format: rebuilt)) var session: VTDecompressionSession? XCTAssertEqual( VTDecompressionSessionCreate( allocator: nil, formatDescription: rebuilt, decoderSpecification: nil, imageBufferAttributes: nil, outputCallback: nil, decompressionSessionOut: &session), noErr) let decoder = try XCTUnwrap(session) defer { VTDecompressionSessionInvalidate(decoder) } var decoded: CVImageBuffer? var decodeStatus: OSStatus = -1 // No async flag → the handler runs before DecodeFrame returns. VTDecompressionSessionDecodeFrame( decoder, sampleBuffer: sample, flags: [], infoFlagsOut: nil ) { status, _, imageBuffer, _, _ in decodeStatus = status decoded = imageBuffer } XCTAssertEqual(decodeStatus, noErr) let pixels = try XCTUnwrap(decoded) // CVImageBuffer and CVPixelBuffer are the same CF type XCTAssertEqual(CVPixelBufferGetWidth(pixels), width) XCTAssertEqual(CVPixelBufferGetHeight(pixels), height) } /// Stage-2 decode half: the same known IDR through `VideoDecoder` — assert its async output /// callback fires with a CVPixelBuffer of the right dimensions, the pts round-trips, and /// decode-completion is stamped. func testVideoDecoderAsyncCallbackDeliversPixels() throws { let (formatDesc, avccSample) = try encodeOneHEVCKeyframe() let annexB = try annexBAU(formatDesc: formatDesc, avccSample: avccSample) let format = try XCTUnwrap(AnnexB.formatDescription(fromIDR: annexB)) let au = AccessUnit(data: annexB, ptsNs: 42_000_000, frameIndex: 0, flags: 0) let box = FrameBox() let done = DispatchSemaphore(value: 0) let decoder = VideoDecoder( onDecoded: { frame in box.lock.lock(); box.frame = frame; box.lock.unlock() done.signal() }, onDecodeError: { status in box.lock.lock(); box.error = status; box.lock.unlock() done.signal() }) XCTAssertTrue(decoder.decode(au: au, format: format), "frame submit should succeed") XCTAssertEqual(done.wait(timeout: .now() + 10), .success, "the decode callback must fire") decoder.reset() box.lock.lock() let frame = box.frame let error = box.error box.lock.unlock() XCTAssertNil(error.map { "decode error \($0)" }) let ready = try XCTUnwrap(frame, "the async output callback must deliver a ReadyFrame") XCTAssertEqual(CVPixelBufferGetWidth(ready.pixelBuffer), width) XCTAssertEqual(CVPixelBufferGetHeight(ready.pixelBuffer), height) XCTAssertEqual(ready.ptsNs, 42_000_000, "pts round-trips through the decoder") XCTAssertGreaterThan(ready.decodedNs, 0, "decode-completion is stamped") } // MARK: - encode helpers /// One forced-IDR HEVC frame; returns its format description and raw AVCC sample bytes. private func encodeOneHEVCKeyframe() throws -> (CMVideoFormatDescription, Data) { var session: VTCompressionSession? let rc = VTCompressionSessionCreate( allocator: nil, width: Int32(width), height: Int32(height), codecType: kCMVideoCodecType_HEVC, encoderSpecification: nil, imageBufferAttributes: nil, compressedDataAllocator: nil, outputCallback: nil, refcon: nil, compressionSessionOut: &session) guard rc == noErr, let encoder = session else { throw XCTSkip("no HEVC encoder available (\(rc))") } defer { VTCompressionSessionInvalidate(encoder) } VTSessionSetProperty(encoder, key: kVTCompressionPropertyKey_RealTime, value: kCFBooleanTrue) VTSessionSetProperty( encoder, key: kVTCompressionPropertyKey_AllowFrameReordering, value: kCFBooleanFalse) let lock = NSLock() var output: CMSampleBuffer? let done = expectation(description: "encoded") VTCompressionSessionEncodeFrame( encoder, imageBuffer: try gradientPixelBuffer(), presentationTimeStamp: CMTime(value: 0, timescale: 30), duration: CMTime(value: 1, timescale: 30), frameProperties: [kVTEncodeFrameOptionKey_ForceKeyFrame: kCFBooleanTrue] as CFDictionary, infoFlagsOut: nil ) { status, _, sample in XCTAssertEqual(status, noErr) lock.lock() output = sample lock.unlock() done.fulfill() } VTCompressionSessionCompleteFrames(encoder, untilPresentationTimeStamp: .invalid) wait(for: [done], timeout: 10) lock.lock() defer { lock.unlock() } let sample = try XCTUnwrap(output) let desc = try XCTUnwrap(CMSampleBufferGetFormatDescription(sample)) let block = try XCTUnwrap(CMSampleBufferGetDataBuffer(sample)) var bytes = Data(count: CMBlockBufferGetDataLength(block)) try bytes.withUnsafeMutableBytes { raw in let rc = CMBlockBufferCopyDataBytes( block, atOffset: 0, dataLength: raw.count, destination: raw.baseAddress!) if rc != noErr { throw NSError(domain: "CMBlockBuffer", code: Int(rc)) } } return (desc, bytes) } /// The host's wire shape: 4-byte start codes, VPS/SPS/PPS in-band, then the VCL NALs. private func annexBAU(formatDesc: CMVideoFormatDescription, avccSample: Data) throws -> Data { var au = Data() var psCount = 0 var nalHeaderLen: Int32 = 0 XCTAssertEqual( CMVideoFormatDescriptionGetHEVCParameterSetAtIndex( formatDesc, parameterSetIndex: 0, parameterSetPointerOut: nil, parameterSetSizeOut: nil, parameterSetCountOut: &psCount, nalUnitHeaderLengthOut: &nalHeaderLen), noErr) XCTAssertEqual(nalHeaderLen, 4, "AnnexB.avcc assumes 4-byte NAL length prefixes") for i in 0..? var size = 0 XCTAssertEqual( CMVideoFormatDescriptionGetHEVCParameterSetAtIndex( formatDesc, parameterSetIndex: i, parameterSetPointerOut: &ptr, parameterSetSizeOut: &size, parameterSetCountOut: nil, nalUnitHeaderLengthOut: nil), noErr) au.append(contentsOf: [0, 0, 0, 1]) au.append(Data(bytes: try XCTUnwrap(ptr), count: size)) } // AVCC sample (4-byte BE length per NAL) → start codes. var i = avccSample.startIndex while i + 4 <= avccSample.endIndex { let len = avccSample[i.. CVPixelBuffer { var pb: CVPixelBuffer? let attrs = [kCVPixelBufferIOSurfacePropertiesKey: [:]] as CFDictionary XCTAssertEqual( CVPixelBufferCreate(nil, width, height, kCVPixelFormatType_32BGRA, attrs, &pb), kCVReturnSuccess) let buf = try XCTUnwrap(pb) CVPixelBufferLockBaseAddress(buf, []) defer { CVPixelBufferUnlockBaseAddress(buf, []) } let base = try XCTUnwrap(CVPixelBufferGetBaseAddress(buf)) let stride = CVPixelBufferGetBytesPerRow(buf) for y in 0..