// Real-bitstream proof of the decode-prep path: VTCompressionSession encodes HEVC, we // rebuild the host's wire shape (Annex-B AU with in-band VPS/SPS/PPS — exactly what // punktfunk-host emits on every IDR), run it through AnnexB, and hand the result to a real // VTDecompressionSession. Pixels out = the whole client decode path is sound. import AVFoundation import CoreMedia import VideoToolbox import XCTest @testable import PunktfunkKit final class VideoToolboxRoundTripTests: XCTestCase { private let width = 320 private let height = 240 func testEncodeAnnexBDecodeRoundTrip() throws { let (formatDesc, avccSample) = try encodeOneHEVCKeyframe() // Rebuild the host's wire format: Annex-B AU, parameter sets in-band before the VCL. let annexB = try annexBAU(formatDesc: formatDesc, avccSample: avccSample) // 1) Parameter-set extraction → format description. let rebuilt = try XCTUnwrap( AnnexB.formatDescription(fromIDR: annexB), "in-band VPS/SPS/PPS should yield a format description") let dims = CMVideoFormatDescriptionGetDimensions(rebuilt) XCTAssertEqual(Int(dims.width), width) XCTAssertEqual(Int(dims.height), height) // 2) Annex-B → AVCC re-pack must reproduce the encoder's own sample bytes. XCTAssertEqual(AnnexB.avcc(from: annexB), avccSample) // 3) Sample buffer → real decoder → pixels. let au = AccessUnit(data: annexB, ptsNs: 1_000_000, frameIndex: 0, flags: 0) let sample = try XCTUnwrap(AnnexB.sampleBuffer(au: au, format: rebuilt)) var session: VTDecompressionSession? XCTAssertEqual( VTDecompressionSessionCreate( allocator: nil, formatDescription: rebuilt, decoderSpecification: nil, imageBufferAttributes: nil, outputCallback: nil, decompressionSessionOut: &session), noErr) let decoder = try XCTUnwrap(session) defer { VTDecompressionSessionInvalidate(decoder) } var decoded: CVImageBuffer? var decodeStatus: OSStatus = -1 // No async flag → the handler runs before DecodeFrame returns. VTDecompressionSessionDecodeFrame( decoder, sampleBuffer: sample, flags: [], infoFlagsOut: nil ) { status, _, imageBuffer, _, _ in decodeStatus = status decoded = imageBuffer } XCTAssertEqual(decodeStatus, noErr) let pixels = try XCTUnwrap(decoded) // CVImageBuffer and CVPixelBuffer are the same CF type XCTAssertEqual(CVPixelBufferGetWidth(pixels), width) XCTAssertEqual(CVPixelBufferGetHeight(pixels), height) } // MARK: - encode helpers /// One forced-IDR HEVC frame; returns its format description and raw AVCC sample bytes. private func encodeOneHEVCKeyframe() throws -> (CMVideoFormatDescription, Data) { var session: VTCompressionSession? let rc = VTCompressionSessionCreate( allocator: nil, width: Int32(width), height: Int32(height), codecType: kCMVideoCodecType_HEVC, encoderSpecification: nil, imageBufferAttributes: nil, compressedDataAllocator: nil, outputCallback: nil, refcon: nil, compressionSessionOut: &session) guard rc == noErr, let encoder = session else { throw XCTSkip("no HEVC encoder available (\(rc))") } defer { VTCompressionSessionInvalidate(encoder) } VTSessionSetProperty(encoder, key: kVTCompressionPropertyKey_RealTime, value: kCFBooleanTrue) VTSessionSetProperty( encoder, key: kVTCompressionPropertyKey_AllowFrameReordering, value: kCFBooleanFalse) let lock = NSLock() var output: CMSampleBuffer? let done = expectation(description: "encoded") VTCompressionSessionEncodeFrame( encoder, imageBuffer: try gradientPixelBuffer(), presentationTimeStamp: CMTime(value: 0, timescale: 30), duration: CMTime(value: 1, timescale: 30), frameProperties: [kVTEncodeFrameOptionKey_ForceKeyFrame: kCFBooleanTrue] as CFDictionary, infoFlagsOut: nil ) { status, _, sample in XCTAssertEqual(status, noErr) lock.lock() output = sample lock.unlock() done.fulfill() } VTCompressionSessionCompleteFrames(encoder, untilPresentationTimeStamp: .invalid) wait(for: [done], timeout: 10) lock.lock() defer { lock.unlock() } let sample = try XCTUnwrap(output) let desc = try XCTUnwrap(CMSampleBufferGetFormatDescription(sample)) let block = try XCTUnwrap(CMSampleBufferGetDataBuffer(sample)) var bytes = Data(count: CMBlockBufferGetDataLength(block)) try bytes.withUnsafeMutableBytes { raw in let rc = CMBlockBufferCopyDataBytes( block, atOffset: 0, dataLength: raw.count, destination: raw.baseAddress!) if rc != noErr { throw NSError(domain: "CMBlockBuffer", code: Int(rc)) } } return (desc, bytes) } /// The host's wire shape: 4-byte start codes, VPS/SPS/PPS in-band, then the VCL NALs. private func annexBAU(formatDesc: CMVideoFormatDescription, avccSample: Data) throws -> Data { var au = Data() var psCount = 0 var nalHeaderLen: Int32 = 0 XCTAssertEqual( CMVideoFormatDescriptionGetHEVCParameterSetAtIndex( formatDesc, parameterSetIndex: 0, parameterSetPointerOut: nil, parameterSetSizeOut: nil, parameterSetCountOut: &psCount, nalUnitHeaderLengthOut: &nalHeaderLen), noErr) XCTAssertEqual(nalHeaderLen, 4, "AnnexB.avcc assumes 4-byte NAL length prefixes") for i in 0..? var size = 0 XCTAssertEqual( CMVideoFormatDescriptionGetHEVCParameterSetAtIndex( formatDesc, parameterSetIndex: i, parameterSetPointerOut: &ptr, parameterSetSizeOut: &size, parameterSetCountOut: nil, nalUnitHeaderLengthOut: nil), noErr) au.append(contentsOf: [0, 0, 0, 1]) au.append(Data(bytes: try XCTUnwrap(ptr), count: size)) } // AVCC sample (4-byte BE length per NAL) → start codes. var i = avccSample.startIndex while i + 4 <= avccSample.endIndex { let len = avccSample[i.. CVPixelBuffer { var pb: CVPixelBuffer? let attrs = [kCVPixelBufferIOSurfacePropertiesKey: [:]] as CFDictionary XCTAssertEqual( CVPixelBufferCreate(nil, width, height, kCVPixelFormatType_32BGRA, attrs, &pb), kCVReturnSuccess) let buf = try XCTUnwrap(pb) CVPixelBufferLockBaseAddress(buf, []) defer { CVPixelBufferUnlockBaseAddress(buf, []) } let base = try XCTUnwrap(CVPixelBufferGetBaseAddress(buf)) let stride = CVPixelBufferGetBytesPerRow(buf) for y in 0..