Adding simulcast/spatial layering support to VideoProcessor.
Encoded frames are preserved and decoded after all layers are
encoded.
Each spatial layer is decoded with separate decoder.
For quality evaluation of lowres layers original input frame is
downscaled with bilinear interpolation.
Encoded and decoded frames are dumped into separate files.
For async codecs encoded frames are passed to decoder in encode
callback, as before.
Bug: webrtc:8524
Change-Id: Idb0c92c7274c1915cff9a011a2794f1cf4bc8cb1
Reviewed-on: https://webrtc-review.googlesource.com/43381
Commit-Queue: Sergey Silkin <ssilkin@webrtc.org>
Reviewed-by: Rasmus Brandt <brandtr@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#21844}
diff --git a/modules/video_coding/codecs/test/videoprocessor.cc b/modules/video_coding/codecs/test/videoprocessor.cc
index 7e1047f..6c06087 100644
--- a/modules/video_coding/codecs/test/videoprocessor.cc
+++ b/modules/video_coding/codecs/test/videoprocessor.cc
@@ -24,6 +24,7 @@
#include "rtc_base/checks.h"
#include "rtc_base/timeutils.h"
#include "test/gtest.h"
+#include "third_party/libyuv/include/libyuv/scale.h"
namespace webrtc {
namespace test {
@@ -96,57 +97,70 @@
} // namespace
VideoProcessor::VideoProcessor(webrtc::VideoEncoder* encoder,
- webrtc::VideoDecoder* decoder,
- FrameReader* analysis_frame_reader,
+ VideoDecoderList* decoders,
+ FrameReader* input_frame_reader,
const TestConfig& config,
- Stats* stats,
- IvfFileWriter* encoded_frame_writer,
- FrameWriter* decoded_frame_writer)
+ std::vector<Stats>* stats,
+ IvfFileWriterList* encoded_frame_writers,
+ FrameWriterList* decoded_frame_writers)
: config_(config),
+ num_simulcast_or_spatial_layers_(
+ std::max(config_.NumberOfSimulcastStreams(),
+ config_.NumberOfSpatialLayers())),
encoder_(encoder),
- decoder_(decoder),
+ decoders_(decoders),
bitrate_allocator_(CreateBitrateAllocator(&config_)),
encode_callback_(this),
decode_callback_(this),
- analysis_frame_reader_(analysis_frame_reader),
- encoded_frame_writer_(encoded_frame_writer),
- decoded_frame_writer_(decoded_frame_writer),
+ input_frame_reader_(input_frame_reader),
+ encoded_frame_writers_(encoded_frame_writers),
+ decoded_frame_writers_(decoded_frame_writers),
last_inputed_frame_num_(0),
last_encoded_frame_num_(0),
+ last_encoded_simulcast_svc_idx_(0),
last_decoded_frame_num_(0),
num_encoded_frames_(0),
num_decoded_frames_(0),
- last_decoded_frame_buffer_(analysis_frame_reader->FrameLength()),
stats_(stats) {
- RTC_DCHECK(encoder);
- RTC_DCHECK(decoder);
- RTC_DCHECK(analysis_frame_reader);
- RTC_DCHECK(stats);
+ RTC_CHECK(encoder);
+ RTC_CHECK(decoders && decoders->size() == num_simulcast_or_spatial_layers_);
+ RTC_CHECK(input_frame_reader);
+ RTC_CHECK(stats);
+ RTC_CHECK(!encoded_frame_writers ||
+ encoded_frame_writers->size() == num_simulcast_or_spatial_layers_);
+ RTC_CHECK(!decoded_frame_writers ||
+ decoded_frame_writers->size() == num_simulcast_or_spatial_layers_);
- // Setup required callbacks for the encoder and decoder.
+ // Setup required callbacks for the encoder and decoder and initialize them.
RTC_CHECK_EQ(encoder_->RegisterEncodeCompleteCallback(&encode_callback_),
WEBRTC_VIDEO_CODEC_OK);
- RTC_CHECK_EQ(decoder_->RegisterDecodeCompleteCallback(&decode_callback_),
- WEBRTC_VIDEO_CODEC_OK);
- // Initialize the encoder and decoder.
RTC_CHECK_EQ(encoder_->InitEncode(&config_.codec_settings,
static_cast<int>(config_.NumberOfCores()),
config_.max_payload_size_bytes),
WEBRTC_VIDEO_CODEC_OK);
- RTC_CHECK_EQ(decoder_->InitDecode(&config_.codec_settings,
- static_cast<int>(config_.NumberOfCores())),
- WEBRTC_VIDEO_CODEC_OK);
+
+ for (auto& decoder : *decoders_) {
+ RTC_CHECK_EQ(decoder->InitDecode(&config_.codec_settings,
+ static_cast<int>(config_.NumberOfCores())),
+ WEBRTC_VIDEO_CODEC_OK);
+ RTC_CHECK_EQ(decoder->RegisterDecodeCompleteCallback(&decode_callback_),
+ WEBRTC_VIDEO_CODEC_OK);
+ }
}
VideoProcessor::~VideoProcessor() {
RTC_DCHECK_CALLED_SEQUENTIALLY(&sequence_checker_);
RTC_CHECK_EQ(encoder_->Release(), WEBRTC_VIDEO_CODEC_OK);
- RTC_CHECK_EQ(decoder_->Release(), WEBRTC_VIDEO_CODEC_OK);
-
encoder_->RegisterEncodeCompleteCallback(nullptr);
- decoder_->RegisterDecodeCompleteCallback(nullptr);
+
+ for (auto& decoder : *decoders_) {
+ RTC_CHECK_EQ(decoder->Release(), WEBRTC_VIDEO_CODEC_OK);
+ decoder->RegisterDecodeCompleteCallback(nullptr);
+ }
+
+ RTC_CHECK(last_encoded_frames_.empty());
}
void VideoProcessor::ProcessFrame() {
@@ -155,7 +169,7 @@
// Get frame from file.
rtc::scoped_refptr<I420BufferInterface> buffer(
- analysis_frame_reader_->ReadFrame());
+ input_frame_reader_->ReadFrame());
RTC_CHECK(buffer) << "Tried to read too many frames from the file.";
size_t rtp_timestamp =
@@ -170,14 +184,69 @@
std::vector<FrameType> frame_types = config_.FrameTypeForFrame(frame_number);
- // Create frame statistics object used for aggregation at end of test run.
- FrameStatistic* frame_stat = stats_->AddFrame(rtp_timestamp);
+ // Create frame statistics object for all simulcast /spatial layers.
+ for (size_t simulcast_svc_idx = 0;
+ simulcast_svc_idx < num_simulcast_or_spatial_layers_;
+ ++simulcast_svc_idx) {
+ stats_->at(simulcast_svc_idx).AddFrame(rtp_timestamp);
+ }
// For the highest measurement accuracy of the encode time, the start/stop
// time recordings should wrap the Encode call as tightly as possible.
- frame_stat->encode_start_ns = rtc::TimeNanos();
- frame_stat->encode_return_code =
+ const int64_t encode_start_ns = rtc::TimeNanos();
+ for (size_t simulcast_svc_idx = 0;
+ simulcast_svc_idx < num_simulcast_or_spatial_layers_;
+ ++simulcast_svc_idx) {
+ FrameStatistic* frame_stat =
+ stats_->at(simulcast_svc_idx).GetFrame(frame_number);
+ frame_stat->encode_start_ns = encode_start_ns;
+ }
+
+ const int encode_return_code =
encoder_->Encode(*input_frames_[frame_number], nullptr, &frame_types);
+
+ for (size_t simulcast_svc_idx = 0;
+ simulcast_svc_idx < num_simulcast_or_spatial_layers_;
+ ++simulcast_svc_idx) {
+ FrameStatistic* frame_stat =
+ stats_->at(simulcast_svc_idx).GetFrame(frame_number);
+ frame_stat->encode_return_code = encode_return_code;
+ }
+
+ // For async codecs frame decoding is done in frame encode callback.
+ if (!config_.IsAsyncCodec()) {
+ for (size_t simulcast_svc_idx = 0;
+ simulcast_svc_idx < num_simulcast_or_spatial_layers_;
+ ++simulcast_svc_idx) {
+ if (last_encoded_frames_.find(simulcast_svc_idx) !=
+ last_encoded_frames_.end()) {
+ EncodedImage& encoded_image = last_encoded_frames_[simulcast_svc_idx];
+
+ FrameStatistic* frame_stat =
+ stats_->at(simulcast_svc_idx).GetFrame(frame_number);
+
+ if (encoded_frame_writers_) {
+ RTC_CHECK(encoded_frame_writers_->at(simulcast_svc_idx)
+ ->WriteFrame(encoded_image,
+ config_.codec_settings.codecType));
+ }
+
+ // For the highest measurement accuracy of the decode time, the
+ // start/stop time recordings should wrap the Decode call as tightly as
+ // possible.
+ frame_stat->decode_start_ns = rtc::TimeNanos();
+ frame_stat->decode_return_code =
+ decoders_->at(simulcast_svc_idx)
+ ->Decode(encoded_image, false, nullptr);
+
+ RTC_CHECK(encoded_image._buffer);
+ delete[] encoded_image._buffer;
+ encoded_image._buffer = nullptr;
+
+ last_encoded_frames_.erase(simulcast_svc_idx);
+ }
+ }
+ }
}
void VideoProcessor::SetRates(size_t bitrate_kbps, size_t framerate_fps) {
@@ -192,49 +261,91 @@
<< "Failed to update encoder with new rate " << bitrate_kbps << ".";
}
-void VideoProcessor::FrameEncoded(webrtc::VideoCodecType codec,
- const EncodedImage& encoded_image) {
+void VideoProcessor::FrameEncoded(
+ const webrtc::EncodedImage& encoded_image,
+ const webrtc::CodecSpecificInfo& codec_specific) {
RTC_DCHECK_CALLED_SEQUENTIALLY(&sequence_checker_);
// For the highest measurement accuracy of the encode time, the start/stop
// time recordings should wrap the Encode call as tightly as possible.
int64_t encode_stop_ns = rtc::TimeNanos();
+ const VideoCodecType codec = codec_specific.codecType;
if (config_.encoded_frame_checker) {
config_.encoded_frame_checker->CheckEncodedFrame(codec, encoded_image);
}
- FrameStatistic* frame_stat =
- stats_->GetFrameWithTimestamp(encoded_image._timeStamp);
+ size_t simulcast_svc_idx = 0;
+ size_t temporal_idx = 0;
- // Ensure strict monotonicity.
+ if (codec == kVideoCodecVP8) {
+ simulcast_svc_idx = codec_specific.codecSpecific.VP8.simulcastIdx;
+ temporal_idx = codec_specific.codecSpecific.VP8.temporalIdx;
+ } else if (codec == kVideoCodecVP9) {
+ simulcast_svc_idx = codec_specific.codecSpecific.VP9.spatial_idx;
+ temporal_idx = codec_specific.codecSpecific.VP9.temporal_idx;
+ }
+
+ if (simulcast_svc_idx == kNoSpatialIdx) {
+ simulcast_svc_idx = 0;
+ }
+
+ if (temporal_idx == kNoTemporalIdx) {
+ temporal_idx = 0;
+ }
+
+ const size_t frame_wxh =
+ encoded_image._encodedWidth * encoded_image._encodedHeight;
+ frame_wxh_to_simulcast_svc_idx_[frame_wxh] = simulcast_svc_idx;
+
+ FrameStatistic* frame_stat =
+ stats_->at(simulcast_svc_idx)
+ .GetFrameWithTimestamp(encoded_image._timeStamp);
const size_t frame_number = frame_stat->frame_number;
- if (num_encoded_frames_ > 0) {
- RTC_CHECK_GT(frame_number, last_encoded_frame_num_);
+
+ // Reordering is unexpected. Frames of different layers have the same value
+ // of frame_number. VP8 multi-res delivers frames starting from hires layer.
+ RTC_CHECK_GE(frame_number, last_encoded_frame_num_);
+
+ // Ensure SVC spatial layers are delivered in ascending order.
+ if (config_.NumberOfSpatialLayers() > 1) {
+ RTC_CHECK(simulcast_svc_idx > last_encoded_simulcast_svc_idx_ ||
+ frame_number != last_encoded_frame_num_ ||
+ num_encoded_frames_ == 0);
}
last_encoded_frame_num_ = frame_number;
+ last_encoded_simulcast_svc_idx_ = simulcast_svc_idx;
// Update frame statistics.
+ frame_stat->encoding_successful = true;
frame_stat->encode_time_us =
GetElapsedTimeMicroseconds(frame_stat->encode_start_ns, encode_stop_ns);
- frame_stat->encoding_successful = true;
+
+ // TODO(ssilkin): Implement bitrate allocation for VP9 SVC. For now set
+ // target for base layers equal to total target to avoid devision by zero
+ // at analysis.
+ frame_stat->target_bitrate_kbps =
+ bitrate_allocation_.GetSpatialLayerSum(
+ codec == kVideoCodecVP9 ? 0 : simulcast_svc_idx) /
+ 1000;
frame_stat->encoded_frame_size_bytes = encoded_image._length;
frame_stat->frame_type = encoded_image._frameType;
- frame_stat->temporal_layer_idx = config_.TemporalLayerForFrame(frame_number);
- frame_stat->qp = encoded_image.qp_;
- frame_stat->target_bitrate_kbps =
- bitrate_allocation_.GetSpatialLayerSum(0) / 1000;
+ frame_stat->temporal_layer_idx = temporal_idx;
+ frame_stat->simulcast_svc_idx = simulcast_svc_idx;
frame_stat->max_nalu_size_bytes = GetMaxNaluSizeBytes(encoded_image, config_);
+ frame_stat->qp = encoded_image.qp_;
- // For the highest measurement accuracy of the decode time, the start/stop
- // time recordings should wrap the Decode call as tightly as possible.
- frame_stat->decode_start_ns = rtc::TimeNanos();
- frame_stat->decode_return_code =
- decoder_->Decode(encoded_image, false, nullptr);
-
- if (encoded_frame_writer_) {
- RTC_CHECK(encoded_frame_writer_->WriteFrame(encoded_image, codec));
+ if (!config_.IsAsyncCodec()) {
+ // Store encoded frame. It will be decoded after all layers are encoded.
+ CopyEncodedImage(encoded_image, codec, frame_number, simulcast_svc_idx);
+ } else {
+ const size_t simulcast_idx =
+ codec == kVideoCodecVP8 ? codec_specific.codecSpecific.VP8.simulcastIdx
+ : 0;
+ frame_stat->decode_start_ns = rtc::TimeNanos();
+ frame_stat->decode_return_code =
+ decoders_->at(simulcast_idx)->Decode(encoded_image, false, nullptr);
}
++num_encoded_frames_;
@@ -247,41 +358,49 @@
// time recordings should wrap the Decode call as tightly as possible.
int64_t decode_stop_ns = rtc::TimeNanos();
- // Update frame statistics.
+ RTC_CHECK(frame_wxh_to_simulcast_svc_idx_.find(decoded_frame.size()) !=
+ frame_wxh_to_simulcast_svc_idx_.end());
+ const size_t simulcast_svc_idx =
+ frame_wxh_to_simulcast_svc_idx_[decoded_frame.size()];
+
FrameStatistic* frame_stat =
- stats_->GetFrameWithTimestamp(decoded_frame.timestamp());
- frame_stat->decoded_width = decoded_frame.width();
- frame_stat->decoded_height = decoded_frame.height();
- frame_stat->decode_time_us =
- GetElapsedTimeMicroseconds(frame_stat->decode_start_ns, decode_stop_ns);
- frame_stat->decoding_successful = true;
-
- // Ensure strict monotonicity.
+ stats_->at(simulcast_svc_idx)
+ .GetFrameWithTimestamp(decoded_frame.timestamp());
const size_t frame_number = frame_stat->frame_number;
- if (num_decoded_frames_ > 0) {
- RTC_CHECK_GT(frame_number, last_decoded_frame_num_);
- }
- // Check if the codecs have resized the frame since previously decoded frame.
- if (frame_number > 0) {
- if (decoded_frame_writer_ && num_decoded_frames_ > 0) {
- // For dropped/lost frames, write out the last decoded frame to make it
- // look like a freeze at playback.
- const size_t num_dropped_frames =
- frame_number - last_decoded_frame_num_ - 1;
- for (size_t i = 0; i < num_dropped_frames; i++) {
- WriteDecodedFrameToFile(&last_decoded_frame_buffer_);
+ // Reordering is unexpected. Frames of different layers have the same value
+ // of frame_number.
+ RTC_CHECK_GE(frame_number, last_decoded_frame_num_);
+
+ if (decoded_frame_writers_ && num_decoded_frames_ > 0) {
+ // For dropped frames, write out the last decoded frame to make it look like
+ // a freeze at playback.
+ for (size_t num_dropped_frames = 0; num_dropped_frames < frame_number;
+ ++num_dropped_frames) {
+ const FrameStatistic* prev_frame_stat =
+ stats_->at(simulcast_svc_idx)
+ .GetFrame(frame_number - num_dropped_frames - 1);
+ if (prev_frame_stat->decoding_successful) {
+ break;
}
+ WriteDecodedFrameToFile(&last_decoded_frame_buffers_[simulcast_svc_idx],
+ simulcast_svc_idx);
}
}
+
last_decoded_frame_num_ = frame_number;
+ // Update frame statistics.
+ frame_stat->decoding_successful = true;
+ frame_stat->decode_time_us =
+ GetElapsedTimeMicroseconds(frame_stat->decode_start_ns, decode_stop_ns);
+ frame_stat->decoded_width = decoded_frame.width();
+ frame_stat->decoded_height = decoded_frame.height();
+
// Skip quality metrics calculation to not affect CPU usage.
if (!config_.measure_cpu) {
- frame_stat->psnr =
- I420PSNR(input_frames_[frame_number].get(), &decoded_frame);
- frame_stat->ssim =
- I420SSIM(input_frames_[frame_number].get(), &decoded_frame);
+ CalculateFrameQuality(*input_frames_[frame_number], decoded_frame,
+ frame_stat);
}
// Delay erasing of input frames by one frame. The current frame might
@@ -291,19 +410,96 @@
input_frames_.erase(input_frames_.begin(), input_frame_erase_to);
}
- if (decoded_frame_writer_) {
+ if (decoded_frame_writers_) {
ExtractBufferWithSize(decoded_frame, config_.codec_settings.width,
config_.codec_settings.height,
- &last_decoded_frame_buffer_);
- WriteDecodedFrameToFile(&last_decoded_frame_buffer_);
+ &last_decoded_frame_buffers_[simulcast_svc_idx]);
+ WriteDecodedFrameToFile(&last_decoded_frame_buffers_[simulcast_svc_idx],
+ simulcast_svc_idx);
}
++num_decoded_frames_;
}
-void VideoProcessor::WriteDecodedFrameToFile(rtc::Buffer* buffer) {
- RTC_DCHECK_EQ(buffer->size(), decoded_frame_writer_->FrameLength());
- RTC_CHECK(decoded_frame_writer_->WriteFrame(buffer->data()));
+void VideoProcessor::CopyEncodedImage(const EncodedImage& encoded_image,
+ const VideoCodecType codec,
+ size_t frame_number,
+ size_t simulcast_svc_idx) {
+ RTC_DCHECK_CALLED_SEQUENTIALLY(&sequence_checker_);
+
+ EncodedImage base_image;
+ RTC_CHECK_EQ(base_image._length, 0);
+
+ // Each SVC layer is decoded with dedicated decoder. Add data of base layers
+ // to current coded frame buffer.
+ if (config_.NumberOfSpatialLayers() > 1 && simulcast_svc_idx > 0) {
+ RTC_CHECK(last_encoded_frames_.find(simulcast_svc_idx - 1) !=
+ last_encoded_frames_.end());
+ base_image = last_encoded_frames_[simulcast_svc_idx - 1];
+ }
+
+ const size_t payload_size_bytes = base_image._length + encoded_image._length;
+ const size_t buffer_size_bytes =
+ payload_size_bytes + EncodedImage::GetBufferPaddingBytes(codec);
+
+ uint8_t* copied_buffer = new uint8_t[buffer_size_bytes];
+ RTC_CHECK(copied_buffer);
+
+ if (base_image._length) {
+ memcpy(copied_buffer, base_image._buffer, base_image._length);
+ }
+
+ memcpy(copied_buffer + base_image._length, encoded_image._buffer,
+ encoded_image._length);
+
+ EncodedImage copied_image = encoded_image;
+ copied_image = encoded_image;
+ copied_image._buffer = copied_buffer;
+ copied_image._length = payload_size_bytes;
+ copied_image._size = buffer_size_bytes;
+
+ last_encoded_frames_[simulcast_svc_idx] = copied_image;
+}
+
+void VideoProcessor::CalculateFrameQuality(const VideoFrame& ref_frame,
+ const VideoFrame& dec_frame,
+ FrameStatistic* frame_stat) {
+ if (ref_frame.width() == dec_frame.width() ||
+ ref_frame.height() == dec_frame.height()) {
+ frame_stat->psnr = I420PSNR(&ref_frame, &dec_frame);
+ frame_stat->ssim = I420SSIM(&ref_frame, &dec_frame);
+ } else {
+ RTC_CHECK_GE(ref_frame.width(), dec_frame.width());
+ RTC_CHECK_GE(ref_frame.height(), dec_frame.height());
+ // Downscale reference frame. Use bilinear interpolation since it is used
+ // to get lowres inputs for encoder at simulcasting.
+ // TODO(ssilkin): Sync with VP9 SVC which uses 8-taps polyphase.
+ rtc::scoped_refptr<I420Buffer> scaled_buffer =
+ I420Buffer::Create(dec_frame.width(), dec_frame.height());
+ const I420BufferInterface& ref_buffer =
+ *ref_frame.video_frame_buffer()->ToI420();
+ I420Scale(ref_buffer.DataY(), ref_buffer.StrideY(), ref_buffer.DataU(),
+ ref_buffer.StrideU(), ref_buffer.DataV(), ref_buffer.StrideV(),
+ ref_buffer.width(), ref_buffer.height(),
+ scaled_buffer->MutableDataY(), scaled_buffer->StrideY(),
+ scaled_buffer->MutableDataU(), scaled_buffer->StrideU(),
+ scaled_buffer->MutableDataV(), scaled_buffer->StrideV(),
+ scaled_buffer->width(), scaled_buffer->height(),
+ libyuv::kFilterBilinear);
+ frame_stat->psnr =
+ I420PSNR(*scaled_buffer, *dec_frame.video_frame_buffer()->ToI420());
+ frame_stat->ssim =
+ I420SSIM(*scaled_buffer, *dec_frame.video_frame_buffer()->ToI420());
+ }
+}
+
+void VideoProcessor::WriteDecodedFrameToFile(rtc::Buffer* buffer,
+ size_t simulcast_svc_idx) {
+ RTC_CHECK(simulcast_svc_idx < decoded_frame_writers_->size());
+ RTC_DCHECK_EQ(buffer->size(),
+ decoded_frame_writers_->at(simulcast_svc_idx)->FrameLength());
+ RTC_CHECK(decoded_frame_writers_->at(simulcast_svc_idx)
+ ->WriteFrame(buffer->data()));
}
} // namespace test