Support RGB frames in RTCCVPixelBuffer
In addition to NV12 frames, also support cropping/scaling RGB frames and
converting RGB frames to i420.
This CL also removes the hardcoding of pixel format in
RTCCameraVideoCapturer. Instead, use the first available format for the
output device that our pipeline supports.
Bug: webrtc:8351
Change-Id: If479b4934c47cd2994936913f55e60fbbee3893b
Reviewed-on: https://webrtc-review.googlesource.com/8920
Commit-Queue: Anders Carlsson <andersc@webrtc.org>
Reviewed-by: Magnus Jedvert <magjed@webrtc.org>
Reviewed-by: Daniela Jovanoska Petrenko <denicija@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#20396}
diff --git a/sdk/objc/Framework/Classes/VideoToolbox/RTCVideoEncoderH264.mm b/sdk/objc/Framework/Classes/VideoToolbox/RTCVideoEncoderH264.mm
index 5b08e74..3d4ae71 100644
--- a/sdk/objc/Framework/Classes/VideoToolbox/RTCVideoEncoderH264.mm
+++ b/sdk/objc/Framework/Classes/VideoToolbox/RTCVideoEncoderH264.mm
@@ -60,6 +60,8 @@
const int kLowH264QpThreshold = 28;
const int kHighH264QpThreshold = 39;
+const OSType kNV12PixelFormat = kCVPixelFormatType_420YpCbCr8BiPlanarFullRange;
+
// Struct that we pass to the encoder per frame to encode. We receive it again
// in the encoder callback.
struct RTCFrameEncodeParams {
@@ -90,10 +92,9 @@
// We receive I420Frames as input, but we need to feed CVPixelBuffers into the
// encoder. This performs the copy and format conversion.
// TODO(tkchin): See if encoder will accept i420 frames and compare performance.
-bool CopyVideoFrameToPixelBuffer(id<RTCI420Buffer> frameBuffer, CVPixelBufferRef pixelBuffer) {
+bool CopyVideoFrameToNV12PixelBuffer(id<RTCI420Buffer> frameBuffer, CVPixelBufferRef pixelBuffer) {
RTC_DCHECK(pixelBuffer);
- RTC_DCHECK_EQ(CVPixelBufferGetPixelFormatType(pixelBuffer),
- kCVPixelFormatType_420YpCbCr8BiPlanarFullRange);
+ RTC_DCHECK_EQ(CVPixelBufferGetPixelFormatType(pixelBuffer), kNV12PixelFormat);
RTC_DCHECK_EQ(CVPixelBufferGetHeightOfPlane(pixelBuffer, 0), frameBuffer.height);
RTC_DCHECK_EQ(CVPixelBufferGetWidthOfPlane(pixelBuffer, 0), frameBuffer.width);
@@ -286,7 +287,7 @@
RTCVideoCodecMode _mode;
webrtc::H264BitstreamParser _h264BitstreamParser;
- std::vector<uint8_t> _nv12ScaleBuffer;
+ std::vector<uint8_t> _frameScaleBuffer;
}
// .5 is set as a mininum to prevent overcompensating for large temporary
@@ -333,7 +334,7 @@
// TODO(tkchin): Try setting payload size via
// kVTCompressionPropertyKey_MaxH264SliceBytes.
- return [self resetCompressionSession];
+ return [self resetCompressionSessionWithPixelFormat:kNV12PixelFormat];
}
- (NSInteger)encode:(RTCVideoFrame *)frame
@@ -356,20 +357,10 @@
// Get a pixel buffer from the pool and copy frame data over.
CVPixelBufferPoolRef pixelBufferPool =
VTCompressionSessionGetPixelBufferPool(_compressionSession);
-
-#if defined(WEBRTC_IOS)
- if (!pixelBufferPool) {
- // Kind of a hack. On backgrounding, the compression session seems to get
- // invalidated, which causes this pool call to fail when the application
- // is foregrounded and frames are being sent for encoding again.
- // Resetting the session when this happens fixes the issue.
- // In addition we request a keyframe so video can recover quickly.
- [self resetCompressionSession];
+ if ([self resetCompressionSessionIfNeededForPool:pixelBufferPool withFrame:frame]) {
pixelBufferPool = VTCompressionSessionGetPixelBufferPool(_compressionSession);
isKeyframeRequired = YES;
- LOG(LS_INFO) << "Resetting compression session due to invalid pool.";
}
-#endif
CVPixelBufferRef pixelBuffer = nullptr;
if ([frame.buffer isKindOfClass:[RTCCVPixelBuffer class]]) {
@@ -393,12 +384,12 @@
if ([rtcPixelBuffer requiresScalingToWidth:dstWidth height:dstHeight]) {
int size =
[rtcPixelBuffer bufferSizeForCroppingAndScalingToWidth:dstWidth height:dstHeight];
- _nv12ScaleBuffer.resize(size);
+ _frameScaleBuffer.resize(size);
} else {
- _nv12ScaleBuffer.clear();
+ _frameScaleBuffer.clear();
}
- _nv12ScaleBuffer.shrink_to_fit();
- if (![rtcPixelBuffer cropAndScaleTo:pixelBuffer withTempBuffer:_nv12ScaleBuffer.data()]) {
+ _frameScaleBuffer.shrink_to_fit();
+ if (![rtcPixelBuffer cropAndScaleTo:pixelBuffer withTempBuffer:_frameScaleBuffer.data()]) {
return WEBRTC_VIDEO_CODEC_ERROR;
}
}
@@ -411,7 +402,7 @@
return WEBRTC_VIDEO_CODEC_ERROR;
}
RTC_DCHECK(pixelBuffer);
- if (!CopyVideoFrameToPixelBuffer([frame.buffer toI420], pixelBuffer)) {
+ if (!CopyVideoFrameToNV12PixelBuffer([frame.buffer toI420], pixelBuffer)) {
LOG(LS_ERROR) << "Failed to copy frame data.";
CVBufferRelease(pixelBuffer);
return WEBRTC_VIDEO_CODEC_ERROR;
@@ -491,7 +482,56 @@
return WEBRTC_VIDEO_CODEC_OK;
}
-- (int)resetCompressionSession {
+- (BOOL)resetCompressionSessionIfNeededForPool:(CVPixelBufferPoolRef)pixelBufferPool
+ withFrame:(RTCVideoFrame *)frame {
+ BOOL resetCompressionSession = NO;
+
+#if defined(WEBRTC_IOS)
+ if (!pixelBufferPool) {
+ // Kind of a hack. On backgrounding, the compression session seems to get
+ // invalidated, which causes this pool call to fail when the application
+ // is foregrounded and frames are being sent for encoding again.
+ // Resetting the session when this happens fixes the issue.
+ // In addition we request a keyframe so video can recover quickly.
+ resetCompressionSession = YES;
+ LOG(LS_INFO) << "Resetting compression session due to invalid pool.";
+ }
+#endif
+
+ // If we're capturing native frames in another pixel format than the compression session is
+ // configured with, make sure the compression session is reset using the correct pixel format.
+ OSType framePixelFormat = kNV12PixelFormat;
+ if (pixelBufferPool && [frame.buffer isKindOfClass:[RTCCVPixelBuffer class]]) {
+ RTCCVPixelBuffer *rtcPixelBuffer = (RTCCVPixelBuffer *)frame.buffer;
+ framePixelFormat = CVPixelBufferGetPixelFormatType(rtcPixelBuffer.pixelBuffer);
+
+ // The pool attribute `kCVPixelBufferPixelFormatTypeKey` can contain either an array of pixel
+ // formats or a single pixel format.
+ NSDictionary *poolAttributes =
+ (__bridge NSDictionary *)CVPixelBufferPoolGetPixelBufferAttributes(pixelBufferPool);
+ id pixelFormats =
+ [poolAttributes objectForKey:(__bridge NSString *)kCVPixelBufferPixelFormatTypeKey];
+ NSArray<NSNumber *> *compressionSessionPixelFormats = nil;
+ if ([pixelFormats isKindOfClass:[NSArray class]]) {
+ compressionSessionPixelFormats = (NSArray *)pixelFormats;
+ } else {
+ compressionSessionPixelFormats = @[ (NSNumber *)pixelFormats ];
+ }
+
+ if (![compressionSessionPixelFormats
+ containsObject:[NSNumber numberWithLong:framePixelFormat]]) {
+ resetCompressionSession = YES;
+ LOG(LS_INFO) << "Resetting compression session due to non-matching pixel format.";
+ }
+ }
+
+ if (resetCompressionSession) {
+ [self resetCompressionSessionWithPixelFormat:framePixelFormat];
+ }
+ return resetCompressionSession;
+}
+
+- (int)resetCompressionSessionWithPixelFormat:(OSType)framePixelFormat {
[self destroyCompressionSession];
// Set source image buffer attributes. These attributes will be present on
@@ -507,8 +547,8 @@
kCVPixelBufferPixelFormatTypeKey
};
CFDictionaryRef ioSurfaceValue = CreateCFTypeDictionary(nullptr, nullptr, 0);
- int64_t nv12type = kCVPixelFormatType_420YpCbCr8BiPlanarFullRange;
- CFNumberRef pixelFormat = CFNumberCreate(nullptr, kCFNumberLongType, &nv12type);
+ int64_t pixelFormatType = framePixelFormat;
+ CFNumberRef pixelFormat = CFNumberCreate(nullptr, kCFNumberLongType, &pixelFormatType);
CFTypeRef values[attributesSize] = {kCFBooleanTrue, ioSurfaceValue, pixelFormat};
CFDictionaryRef sourceAttributes = CreateCFTypeDictionary(keys, values, attributesSize);
if (ioSurfaceValue) {