苹果Vision Pro空间视频解码：深入了解MV-HEVC格式

右手菌

2024-09-12 15:34:16

153

VisionPro VisionProMVHEVC格式解码MVHEVC格式 VisionPro空间视频格式

1、使用videoToolBox解码空间视频

第一步: 创建解码会话

1、创建解码器方法介绍

VTDecompressionSessionCreate(
CFAllocatorRef allocator,
CMVideoFormatDescriptionRef videoFormatDescription,
CFDictionaryRef videoDecoderSpecification,
CFDictionaryRef destinationImageBufferAttributes,
const VTDecompressionOutputCallbackRecord * CM_NULLABLE outputCallback,
VTDecompressionSessionRef * CM_NONNULL decompressionSessionOut) API_AVAILABLE(macosx(10.8), ios(8.0), tvos(10.2));

1.allocator（分配器）:
- 类型：CM_NULLABLE CFAllocatorRef
- 描述：指定用于分配解压缩会话及其相关数据结构的内存分配器。可以传入 NULL，表示使用默认的内存分配器。

2.videoFormatDescription（视频格式描述）:
- 类型：CM_NONNULL CMVideoFormatDescriptionRef
- 描述：包含有关视频数据格式的信息的CoreMedia对象，通常由视频流的元数据提供。此参数用于描述待解压缩的视频数据的格式。

3.videoDecoderSpecification（视频解码器规格）:
- 类型：CM_NULLABLE CFDictionaryRef
- 描述：可选参数，允许您指定用于解码视频的特定解码器。传入 NULL 表示使用默认的解码器。

4.destinationImageBufferAttributes（目标图像缓冲区属性）:
- 类型：CM_NULLABLE CFDictionaryRef
- 描述：可选参数，允许您指定输出图像缓冲区的属性。例如，可以指定图像的像素格式等信息。

5.outputCallback（输出回调）:
- 类型：const VTDecompressionOutputCallbackRecord * CM_NULLABLE
- 描述：可选参数，如果不为 NULL，则在解压缩每帧后调用此回调函数以处理输出。可以为 NULL，表示不需要输出回调。

6.decompressionSessionOut（解压缩会话输出）:
- 类型：CM_RETURNS_RETAINED_PARAMETER CM_NULLABLE VTDecompressionSessionRef * CM_NONNULL
- 描述：指向将由此函数创建的解压缩会话的指针的指针。如果函数成功，则此指针将包含对新创建的解压缩会话的引用。需要调用者负责释放这个引用。

2、创建解码器需要的参数-CMFormatDescriptionRef

- (CMFormatDescriptionRef)getVideoFormatDescriptionFromURL:(NSURL *)videoURL {
AVURLAsset *videoAsset = [AVURLAsset assetWithURL:videoURL];
AVAssetTrack *videoTrack = [[videoAsset tracksWithMediaType:AVMediaTypeVideo] firstObject];
if (!videoTrack) {
NSLog(@"Video track not found in the asset.");
return NULL;
}

CMFormatDescriptionRef formatDescription = (__bridge CMFormatDescriptionRef)videoTrack.formatDescriptions.firstObject;
if (!formatDescription) {
NSLog(@"Format description not found for the video track.");
return NULL;
}

return formatDescription;
}

3、创建解码器需要的参数-CFDictionaryRef

- (NSDictionary *)createDecoderSpecification {
NSDictionary *decoderSpecification = @{
(__bridge NSString *)kVTVideoDecoderSpecification_EnableHardwareAcceleratedVideoDecoder : @YES,
(__bridge NSString *)kVTVideoDecoderSpecification_RequireHardwareAcceleratedVideoDecoder : @YES,
// 可以根据需要添加其他属性...
};

return decoderSpecification;
}

4、创建解码器

- (VTDecompressionSessionRef)createDecompressionSessionWithFormatDescription:(CMFormatDescriptionRef)videoFormatDescription {
if (!videoFormatDescription) {
NSLog(@"Invalid video format description.");
return NULL;
}
NSDictionary *decoderSpecification = [self createDecoderSpecification];
VTDecompressionSessionRef decompressionSession;
OSStatus status = VTDecompressionSessionCreate(NULL,
videoFormatDescription,
(__bridge CFDictionaryRef)decoderSpecification,
NULL,
NULL,
&decompressionSession);

NSLog(@"status: %d",status);
if (status != noErr) {

NSLog(@"Failed to create decompression session");
return NULL;
}
return decompressionSession;
}

第二步：设置解码器属性

NSArray *requestedLayerIDs = @[@0, @1];

// 设置解码器属性
OSStatus status = VTSessionSetProperty(sessionRef, kVTDecompressionPropertyKey_RequestedMVHEVCVideoLayerIDs, (__bridge CFArrayRef)requestedLayerIDs);

if (status != noErr) {
NSLog(@"设置 kVTDecompressionPropertyKey_RequestedMVHEVCVideoLayerIDs 属性失败，错误代码：%d", (int)status);
} else {
NSLog(@"成功设置 kVTDecompressionPropertyKey_RequestedMVHEVCVideoLayerIDs 属性");
}

第三步：解析视频

// 根据 url 将完整视频逐帧去解码
- (void)processVideoWithURLV2:(NSURL *)url {

AVAsset *asset = [AVAsset assetWithURL:url];
NSError *error = nil;
AVAssetReader *assetReader = [[AVAssetReader alloc] initWithAsset:asset error:&error];

if (error) {
NSLog(@"Error creating asset reader: %@", [error localizedDescription]);
return;
}
AVAssetTrack *videoTrack = [[asset tracksWithMediaType:AVMediaTypeVideo] firstObject];
AVAssetReaderTrackOutput *output = [[AVAssetReaderTrackOutput alloc] initWithTrack:videoTrack outputSettings:nil];
[assetReader addOutput:output];

[assetReadereading];

while (assetReader.status == AVAssetReaderStatusReading) {
CMSampleBufferRef sampleBuffer = [output copyNextSampleBuffer];

if (sampleBuffer) {
// 第三步: 创建解码标志
// 创建一个空的 VTDecodeFrameFlags 变量
VTDecodeFrameFlags decodeFrameFlags = 0;

// 设置 VTDecodeFrameFlags 的标志位（flags），例如：
decodeFrameFlags |= kVTDecodeFrame_EnableAsynchronousDecompression; // 启用异步解压缩
decodeFrameFlags |= kVTDecodeFrame_1xRealTimePlayback; // 设置实时播放

// 第四步: 创建一个 VTDecodeInfoFlags 指针，用于接收解码操作的信息
// 创建一个空的 VTDecodeInfoFlags 变量
VTDecodeInfoFlags decodeInfoFlags = 0;

// 设置 VTDecodeInfoFlags 的标志位（flags），例如：
decodeInfoFlags |= kVTDecodeInfo_Asynchronous; // 设置异步解码标志
decodeInfoFlags |= kVTDecodeInfo_FrameDropped; // 设置帧被丢弃的标志

VTDecompressionMultiImageCapableOutputHandler multiImageCapableOutputHandler = ^(OSStatus status, VTDecodeInfoFlags infoFlags, CVImageBufferRef _Nullable imageBuffer, CMTaggedBufferGroupRef _Nullable taggedBufferGroup, CMTime presentationTimeStamp, CMTime presentationDuration) {
// 在这里调用 ViewController 的方法
[self processMultiImageBufferGroup:status
infoFlags:infoFlags
imageBuffer:imageBuffer
taggedBufferGroup:taggedBufferGroup
presentationTimeStamp:presentationTimeStamp
presentationDuration:presentationDuration];
};

// 第六步: 调用函数
OSStatus status = VTDecompressionSessionDecodeFrameWithMultiImageCapableOutputHandler(_decomSession, sampleBuffer, decodeFrameFlags, &decodeInfoFlags, multiImageCapableOutputHandler);

// 将 OSStatus 转换为字符串
NSString *statusString = [NSString stringWithFormat:@"Status: %d", (int)status];

NSLog(@"OutputHandler status: %d",status);

CFRelease(sampleBuffer);
}
}

if (assetReader.status == AVAssetReaderStatusCompleted) {
NSLog(@"Finished reading asset.");
} else {
NSLog(@"Error reading asset: %@", [assetReader.error localizedDescription]);
}

[assetReader cancelReading];
}

第四步：获取到左右视图

- (void)processMultiImageBufferGroup:(OSStatus)status infoFlags:(VTDecodeInfoFlags)infoFlags imageBuffer:(CVImageBufferRef)imageBuffer taggedBufferGroup:(CMTaggedBufferGroupRef)taggedBufferGroup presentationTimeStamp:(CMTime)presentationTimeStamp presentationDuration:(CMTime)presentationDuration {
// 在这里处理解码后的图像
NSLog(@"走到多图像帧回调 taggedBufferGroup :%@",taggedBufferGroup);
if (status == noErr) {
if (taggedBufferGroup) {
CVPixelBufferRef pixelBfLef = CMTaggedBufferGroupGetCVPixelBufferAtIndex(taggedBufferGroup, 0);
CVPixelBufferRef pixelBfRit = CMTaggedBufferGroupGetCVPixelBufferAtIndex(taggedBufferGroup, 1);

} else {
NSLog(@"解码图像时发生错误，错误码：%d", (int)status);
}
}
}

2、使用AVPlayer

1、播放视频，获取左右眼信息

@IBAction func check(_ sender: UIButton) {
// 在视图中添加 imageView
view.addSubview(imageView)

// 获取视频文件的URL
guard let videoUrl = Bundle.main.url(forResource: "IMG_0056", withExtension: "MOV") else { return }

// 创建 AVAsset
let asset = AVAsset(url: videoUrl)

// 创建 AVPlayer，并使用 AVPlayerItem 初始化
let player = AVPlayer(playerItem: AVPlayerItem(asset: asset))

// 创建 AVVideoOutputSpecification，指定视觉输出的规范
let outputSpecification = AVVideoOutputSpecification(tagCollections: [.stereoscopicForVideoOutput()])

// 创建 AVPlayerVideoOutput，使用规范初始化
let videoOutput = AVPlayerVideoOutput(specification: outputSpecification)

// 将 videoOutput 设置为 player 的视频输出
player.videoOutput = videoOutput

// 创建 AVPlayerLayer，使用 player 初始化
let playerLayer = AVPlayerLayer(player: player)

// 设置 playerLayer 的 frame
playerLayer.frame = CGRect(x: 0, y: 0, width: 100, height: 100)

// 将 playerLayer 添加到视图的图层
view.layer.addSublayer(playerLayer)

// 通过添加周期性时间观察器，监听播放器的播放进度
player.addPeriodicTimeObserver(forInterval: CMTime(value: 1, timescale: 60), queue: .main) { time in
// 获取当前帧的 taggedBufferGroup、presentationTime 和 activeConfiguration
guard let (taggedBufferGroup, presentationTime, activeConfiguration) = videoOutput.taggedBuffers(forHostTime: CMClockGetTime(.hostTimeClock)) else {
return
}

// 遍历 taggedBufferGroup
for taggedBuffer in taggedBufferGroup {
// 从 taggedBufferGroup 中提取左右眼图像
guard let leftEyeBuffer = taggedBuffer.first(where: { $0.tags.first(matchingCategory: .stereoView) == .stereoView(.leftEye) })?.buffer,
let rightEyeBuffer = taggedBuffer.first(where: { $0.tags.first(matchingCategory: .stereoView) == .stereoView(.rightEye) })?.buffer,
case let .pixelBuffer(leftEyePixelBuffer) = leftEyeBuffer,
case let .pixelBuffer(rightEyePixelBuffer) = rightEyeBuffer else {
continue
}

// 如果左右眼图像都存在，执行以下代码块
// 进行一些有趣的处理，例如显示图像

// 将 CVPixelBuffer 转换为 UIImage
let leftEyeImage = self.imageFromPixelBuffer(pixelBuffer: leftEyePixelBuffer)
let rightEyeImage = self.imageFromPixelBuffer(pixelBuffer: rightEyePixelBuffer)

// 合并左右眼图像
let combinedImage = self.combineImages(leftEyeImage: leftEyeImage, rightEyeImage: rightEyeImage)

// 在 imageView 中显示左右眼图像
self.imageView.image = combinedImage

if !self.isWritingStarted {
self.isWritingStarted = true
// 一旦帧可用，就开始写入
self.startVideoWriting()
}

// 将合并的帧追加到视频中
self.writeFrame(image: combinedImage!)
}
}

// 播放视频
player.play()
}

2、CVPixelBuffer转换成UIImage

// 将 CVPixelBuffer 转换为 UIImage
func imageFromPixelBuffer(pixelBuffer: CVPixelBuffer) -> UIImage? {
let ciImage = CIImage(cvPixelBuffer: pixelBuffer)
let context = CIContext()

if let cgImage = context.createCGImage(ciImage, from: ciImage.extent) {
return UIImage(cgImage: cgImage)
}

return nil
}

3、CGImage转换成CVPixelBuffer

private func pixelBufferFromCGImage(cgImage: CGImage) -> CVPixelBuffer? {
let options: [String: Any] = [
kCVPixelBufferCGImageCompatibilityKey as String: true,
kCVPixelBufferCGBitmapContextCompatibilityKey as String: true
]

var pixelBuffer: CVPixelBuffer?
let status = CVPixelBufferCreate(
kCFAllocatorDefault,
cgImage.width,
cgImage.height,
kCVPixelFormatType_32ARGB,
options as CFDictionary,
&pixelBuffer
)

guard status == kCVReturnSuccess, let buffer = pixelBuffer else {
print("Error: Failed to create pixel buffer.")
return nil
}

CVPixelBufferLockBaseAddress(buffer, CVPixelBufferLockFlags(rawValue: 0))
let context = CGContext(
data: CVPixelBufferGetBaseAddress(buffer),
width: cgImage.width,
height: cgImage.height,
bitsPerComponent: cgImage.bitsPerComponent,
bytesPerRow: CVPixelBufferGetBytesPerRow(buffer),
space: CGColorSpaceCreateDeviceRGB(),
bitmapInfo: CGImageAlphaInfo.noneSkipFirst.rawValue
)

context?.draw(cgImage, in: CGRect(origin: .zero, size: CGSize(width: cgImage.width, height: cgImage.height)))
CVPixelBufferUnlockBaseAddress(buffer, CVPixelBufferLockFlags(rawValue: 0))

return buffer
}

4、左右眼合并

func combineImages(leftEyeImage: UIImage?, rightEyeImage: UIImage?) -> UIImage? {
let size = CGSize(width: 1080, height: 720) // 根据你的需求设置合适的大小
UIGraphicsBeginImageContext(size)

// 绘制左眼图像
leftEyeImage?.draw(in: CGRect(x: 0, y: 0, width: size.width / 2, height: size.height))

// 绘制右眼图像
rightEyeImage?.draw(in: CGRect(x: size.width / 2, y: 0, width: size.width / 2, height: size.height))

// 合并图像
let combinedImage = UIGraphicsGetImageFromCurrentImageContext()
UIGraphicsEndImageContext()

return combinedImage
}

5、初始化保存mp4文件

var adaptor: AVAssetWriterInputPixelBufferAdaptor?
funcideoWriting() {
let outputURL = FileManager.default.temporaryDirectory.appendingPathComponent("zyb103.mp4")
do {
videoWriter = try AVAssetWriter(outputURL: outputURL, fileType: .mp4)
} catch {
print("Error initializing AVAssetWriter: \(error)")
return
}

// Initialize AVAssetWriterInput
let videoSettings: [String: Any] = [
AVVideoCodecKey: AVVideoCodecType.h264,
AVVideoWidthKey: 1080,
AVVideoHeightKey: 720
]

videoWriterInput = AVAssetWriterInput(mediaType: .video, outputSettings: videoSettings)

videoWriterInput?.expectsMediaDataInRealTime = true

let pixelBufferAttributes: [String: Any] = [
kCVPixelBufferPixelFormatTypeKey as String: kCVPixelFormatType_32BGRA
]
adaptor = AVAssetWriterInputPixelBufferAdaptor(assetWriterInput: videoWriterInput!, sourcePixelBufferAttributes: pixelBufferAttributes)

if let videoWriter = videoWriter, videoWriter.canAdd(videoWriterInput!) {
videoWriter.add(videoWriterInput!)
} else {
print("Error: Cannot add asset writer input.")
return
}

// Start AVAssetWriter
videoWriter?.startWriting()
videoWriter?.startSession(atSourceTime: CMTime.zero)
}

6、开始写入mp4文件

var index=0.0
func writeFrame(image: UIImage) {
guard let cgImage = image.cgImage else {
print("Error: Failed to get CGImage from UIImage.")
return
}
let presentationTime = CMTimeMake(value: Int64(index), timescale: 30)

if videoWriterInput?.isReadyForMoreMediaData == true {
if let pixelBuffer = pixelBufferFromCGImage(cgImage: cgImage) {
adaptor?.append(pixelBuffer, withPresentationTime: presentationTime)
index=index+1.0
}
}
}

7、监听视频播放完成，结束写入文件

// 添加播放完成的通知监听
NotificationCenter.default.addObserver(self,
selector: #selector(playerDidFinishPlaying),
name: .AVPlayerItemDidPlayToEndTime,
object: nil)

// 播放完成通知的处理函数
@objc func playerDidFinishPlaying() {
// 停止视频写入
finishWriting()
}

func finishWriting() {
videoWriterInput?.markAsFinished()
videoWriter?.finishWriting {
print("Video writing completed.")

if let outputURL = self.videoWriter?.outputURL {
if FileManager.default.fileExists(atPath: outputURL.path) {
// 执行保存到相册的逻辑
self.saveVideoToPhotosLibrary(videoURL: outputURL)
} else {
print("Error: Video file does not exist at path \(outputURL.path)")
}

}
}
}

8、将音频和无声视频合并

func addAudioToVideo(_ temp:String,_ finalSource:String) {
do {

print("temp:\(temp) finalSource:\(finalSource)")
// 输入视频文件（没有声音的MP4）
// let videoUrl = Bundle.main.url(forResource: "temp", withExtension: "mp4")!
let videoUrl = URL(fileURLWithPath: temp)
let videoAsset = AVAsset(url: videoUrl)

// 输入音频文件（从MOV中提取的音频）
let audioUrl = Bundle.main.url(forResource: "IMG_0056", withExtension: "MOV")!
let audioAsset = AVAsset(url: audioUrl)

// 创建一个可变的音视频组合
let mixComposition = AVMutableComposition()

// 添加视频轨道
let videoTrack = mixComposition.addMutableTrack(withMediaType: .video, preferredTrackID: kCMPersistentTrackID_Invalid)
try videoTrack?.insertTimeRange(CMTimeRangeMake(start: CMTime.zero, duration: videoAsset.duration), of: videoAsset.tracks(withMediaType: .video)[0], at: CMTime.zero)

// 添加音频轨道
let audioTrack = mixComposition.addMutableTrack(withMediaType: .audio, preferredTrackID: kCMPersistentTrackID_Invalid)
try audioTrack?.insertTimeRange(CMTimeRangeMake(start: CMTime.zero, duration: audioAsset.duration), of: audioAsset.tracks(withMediaType: .audio)[0], at: CMTime.zero)

// 创建一个输出路径来保存最终的合成视频
let outputPath = NSTemporaryDirectory() + finalSource+".mp4"
let outputUrl = URL(fileURLWithPath: outputPath)

// 创建一个 AVAssetExportSession 用于将合成的音视频导出到文件
guard let exportSession = AVAssetExportSession(asset: mixComposition, presetName: AVAssetExportPresetHighestQuality) else {
print("Error creating export session")
return
}

exportSession.outputFileType = .mp4
exportSession.outputURL = outputUrl

// 导出合成的音视频
exportSession.exportAsynchronously {
if exportSession.status == .completed {
print("Export successful: \(outputUrl)")
} else if let error = exportSession.error {
print("Export failed with error: \(error)")
}
}
} catch {
print("Error: \(error)")
}
}

登录才可参与讨论