blob: 54c04da466820757d108ad1689e79db5fd00134f [file] [log] [blame]
niklase@google.com470e71d2011-07-07 08:21:25 +00001/*
marpan@webrtc.org9d76b4e2012-02-28 23:39:31 +00002 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
niklase@google.com470e71d2011-07-07 08:21:25 +00003 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
Henrik Kjellander0f59a882015-11-18 22:31:24 +010010#include "webrtc/modules/video_processing/content_analysis.h"
niklase@google.com470e71d2011-07-07 08:21:25 +000011
12#include <math.h>
13#include <stdlib.h>
andrew@webrtc.orgc8d012f2012-01-13 19:43:09 +000014
Henrik Kjellander98f53512015-10-28 18:17:40 +010015#include "webrtc/system_wrappers/include/cpu_features_wrapper.h"
16#include "webrtc/system_wrappers/include/tick_util.h"
pbos@webrtc.org6f3d8fc2013-05-27 14:12:16 +000017
niklase@google.com470e71d2011-07-07 08:21:25 +000018namespace webrtc {
19
mikhal@webrtc.orgb43d8072013-10-03 16:42:41 +000020VPMContentAnalysis::VPMContentAnalysis(bool runtime_cpu_detection)
21 : orig_frame_(NULL),
22 prev_frame_(NULL),
23 width_(0),
24 height_(0),
25 skip_num_(1),
26 border_(8),
27 motion_magnitude_(0.0f),
28 spatial_pred_err_(0.0f),
29 spatial_pred_err_h_(0.0f),
30 spatial_pred_err_v_(0.0f),
31 first_frame_(true),
32 ca_Init_(false),
33 content_metrics_(NULL) {
34 ComputeSpatialMetrics = &VPMContentAnalysis::ComputeSpatialMetrics_C;
35 TemporalDiffMetric = &VPMContentAnalysis::TemporalDiffMetric_C;
frkoenig@google.com6d171c42011-08-15 15:56:23 +000036
mikhal@webrtc.orgb43d8072013-10-03 16:42:41 +000037 if (runtime_cpu_detection) {
andrew@webrtc.orgc8d012f2012-01-13 19:43:09 +000038#if defined(WEBRTC_ARCH_X86_FAMILY)
mikhal@webrtc.orgb43d8072013-10-03 16:42:41 +000039 if (WebRtc_GetCPUInfo(kSSE2)) {
40 ComputeSpatialMetrics = &VPMContentAnalysis::ComputeSpatialMetrics_SSE2;
41 TemporalDiffMetric = &VPMContentAnalysis::TemporalDiffMetric_SSE2;
42 }
andrew@webrtc.orgc8d012f2012-01-13 19:43:09 +000043#endif
mikhal@webrtc.orgb43d8072013-10-03 16:42:41 +000044 }
45 Release();
niklase@google.com470e71d2011-07-07 08:21:25 +000046}
47
mikhal@webrtc.orgb43d8072013-10-03 16:42:41 +000048VPMContentAnalysis::~VPMContentAnalysis() {
49 Release();
niklase@google.com470e71d2011-07-07 08:21:25 +000050}
51
mikhal@webrtc.orgb43d8072013-10-03 16:42:41 +000052VideoContentMetrics* VPMContentAnalysis::ComputeContentMetrics(
Miguel Casas-Sanchez47650702015-05-29 17:21:40 -070053 const VideoFrame& inputFrame) {
mikhal@webrtc.orgb43d8072013-10-03 16:42:41 +000054 if (inputFrame.IsZeroSize())
55 return NULL;
niklase@google.com470e71d2011-07-07 08:21:25 +000056
mikhal@webrtc.orgb43d8072013-10-03 16:42:41 +000057 // Init if needed (native dimension change).
58 if (width_ != inputFrame.width() || height_ != inputFrame.height()) {
59 if (VPM_OK != Initialize(inputFrame.width(), inputFrame.height()))
60 return NULL;
61 }
62 // Only interested in the Y plane.
nisse5b3c4432016-04-29 02:39:24 -070063 orig_frame_ = inputFrame.buffer(kYPlane);
niklase@google.com470e71d2011-07-07 08:21:25 +000064
mikhal@webrtc.orgb43d8072013-10-03 16:42:41 +000065 // Compute spatial metrics: 3 spatial prediction errors.
66 (this->*ComputeSpatialMetrics)();
niklase@google.com470e71d2011-07-07 08:21:25 +000067
mikhal@webrtc.orgb43d8072013-10-03 16:42:41 +000068 // Compute motion metrics
69 if (first_frame_ == false)
70 ComputeMotionMetrics();
niklase@google.com470e71d2011-07-07 08:21:25 +000071
mikhal@webrtc.orgb43d8072013-10-03 16:42:41 +000072 // Saving current frame as previous one: Y only.
73 memcpy(prev_frame_, orig_frame_, width_ * height_);
niklase@google.com470e71d2011-07-07 08:21:25 +000074
mflodman99ab9442015-12-07 22:54:50 -080075 first_frame_ = false;
mikhal@webrtc.orgb43d8072013-10-03 16:42:41 +000076 ca_Init_ = true;
niklase@google.com470e71d2011-07-07 08:21:25 +000077
mikhal@webrtc.orgb43d8072013-10-03 16:42:41 +000078 return ContentMetrics();
niklase@google.com470e71d2011-07-07 08:21:25 +000079}
80
mikhal@webrtc.orgb43d8072013-10-03 16:42:41 +000081int32_t VPMContentAnalysis::Release() {
82 if (content_metrics_ != NULL) {
83 delete content_metrics_;
84 content_metrics_ = NULL;
85 }
niklase@google.com470e71d2011-07-07 08:21:25 +000086
mikhal@webrtc.orgb43d8072013-10-03 16:42:41 +000087 if (prev_frame_ != NULL) {
mflodman99ab9442015-12-07 22:54:50 -080088 delete[] prev_frame_;
mikhal@webrtc.orgb43d8072013-10-03 16:42:41 +000089 prev_frame_ = NULL;
90 }
niklase@google.com470e71d2011-07-07 08:21:25 +000091
mikhal@webrtc.orgb43d8072013-10-03 16:42:41 +000092 width_ = 0;
93 height_ = 0;
94 first_frame_ = true;
niklase@google.com470e71d2011-07-07 08:21:25 +000095
mikhal@webrtc.orgb43d8072013-10-03 16:42:41 +000096 return VPM_OK;
niklase@google.com470e71d2011-07-07 08:21:25 +000097}
98
mikhal@webrtc.orgb43d8072013-10-03 16:42:41 +000099int32_t VPMContentAnalysis::Initialize(int width, int height) {
100 width_ = width;
101 height_ = height;
102 first_frame_ = true;
niklase@google.com470e71d2011-07-07 08:21:25 +0000103
mikhal@webrtc.orgb43d8072013-10-03 16:42:41 +0000104 // skip parameter: # of skipped rows: for complexity reduction
105 // temporal also currently uses it for column reduction.
106 skip_num_ = 1;
frkoenig@google.com6d171c42011-08-15 15:56:23 +0000107
mikhal@webrtc.orgb43d8072013-10-03 16:42:41 +0000108 // use skipNum = 2 for 4CIF, WHD
mflodman99ab9442015-12-07 22:54:50 -0800109 if ((height_ >= 576) && (width_ >= 704)) {
mikhal@webrtc.orgb43d8072013-10-03 16:42:41 +0000110 skip_num_ = 2;
111 }
112 // use skipNum = 4 for FULLL_HD images
mflodman99ab9442015-12-07 22:54:50 -0800113 if ((height_ >= 1080) && (width_ >= 1920)) {
mikhal@webrtc.orgb43d8072013-10-03 16:42:41 +0000114 skip_num_ = 4;
115 }
frkoenig@google.com6d171c42011-08-15 15:56:23 +0000116
mikhal@webrtc.orgb43d8072013-10-03 16:42:41 +0000117 if (content_metrics_ != NULL) {
118 delete content_metrics_;
119 }
niklase@google.com470e71d2011-07-07 08:21:25 +0000120
mikhal@webrtc.orgb43d8072013-10-03 16:42:41 +0000121 if (prev_frame_ != NULL) {
mflodman99ab9442015-12-07 22:54:50 -0800122 delete[] prev_frame_;
mikhal@webrtc.orgb43d8072013-10-03 16:42:41 +0000123 }
frkoenig@google.com6d171c42011-08-15 15:56:23 +0000124
mikhal@webrtc.orgb43d8072013-10-03 16:42:41 +0000125 // Spatial Metrics don't work on a border of 8. Minimum processing
126 // block size is 16 pixels. So make sure the width and height support this.
127 if (width_ <= 32 || height_ <= 32) {
128 ca_Init_ = false;
129 return VPM_PARAMETER_ERROR;
130 }
frkoenig@google.com6d171c42011-08-15 15:56:23 +0000131
mikhal@webrtc.orgb43d8072013-10-03 16:42:41 +0000132 content_metrics_ = new VideoContentMetrics();
133 if (content_metrics_ == NULL) {
134 return VPM_MEMORY;
135 }
frkoenig@google.com6d171c42011-08-15 15:56:23 +0000136
mikhal@webrtc.orgb43d8072013-10-03 16:42:41 +0000137 prev_frame_ = new uint8_t[width_ * height_]; // Y only.
mflodman99ab9442015-12-07 22:54:50 -0800138 if (prev_frame_ == NULL)
139 return VPM_MEMORY;
niklase@google.com470e71d2011-07-07 08:21:25 +0000140
mikhal@webrtc.orgb43d8072013-10-03 16:42:41 +0000141 return VPM_OK;
niklase@google.com470e71d2011-07-07 08:21:25 +0000142}
143
frkoenig@google.com6d171c42011-08-15 15:56:23 +0000144// Compute motion metrics: magnitude over non-zero motion vectors,
145// and size of zero cluster
mikhal@webrtc.orgb43d8072013-10-03 16:42:41 +0000146int32_t VPMContentAnalysis::ComputeMotionMetrics() {
147 // Motion metrics: only one is derived from normalized
148 // (MAD) temporal difference
149 (this->*TemporalDiffMetric)();
150 return VPM_OK;
niklase@google.com470e71d2011-07-07 08:21:25 +0000151}
152
frkoenig@google.com6d171c42011-08-15 15:56:23 +0000153// Normalized temporal difference (MAD): used as a motion level metric
154// Normalize MAD by spatial contrast: images with more contrast
155// (pixel variance) likely have larger temporal difference
156// To reduce complexity, we compute the metric for a reduced set of points.
mikhal@webrtc.orgb43d8072013-10-03 16:42:41 +0000157int32_t VPMContentAnalysis::TemporalDiffMetric_C() {
158 // size of original frame
159 int sizei = height_;
160 int sizej = width_;
161 uint32_t tempDiffSum = 0;
162 uint32_t pixelSum = 0;
163 uint64_t pixelSqSum = 0;
niklase@google.com470e71d2011-07-07 08:21:25 +0000164
mikhal@webrtc.orgb43d8072013-10-03 16:42:41 +0000165 uint32_t num_pixels = 0; // Counter for # of pixels.
mflodman99ab9442015-12-07 22:54:50 -0800166 const int width_end = ((width_ - 2 * border_) & -16) + border_;
niklase@google.com470e71d2011-07-07 08:21:25 +0000167
mikhal@webrtc.orgb43d8072013-10-03 16:42:41 +0000168 for (int i = border_; i < sizei - border_; i += skip_num_) {
169 for (int j = border_; j < width_end; j++) {
170 num_pixels += 1;
mflodman99ab9442015-12-07 22:54:50 -0800171 int ssn = i * sizej + j;
niklase@google.com470e71d2011-07-07 08:21:25 +0000172
mflodman99ab9442015-12-07 22:54:50 -0800173 uint8_t currPixel = orig_frame_[ssn];
174 uint8_t prevPixel = prev_frame_[ssn];
frkoenig@google.comff476c62011-08-17 23:24:57 +0000175
mflodman99ab9442015-12-07 22:54:50 -0800176 tempDiffSum +=
177 static_cast<uint32_t>(abs((int16_t)(currPixel - prevPixel)));
178 pixelSum += static_cast<uint32_t>(currPixel);
179 pixelSqSum += static_cast<uint64_t>(currPixel * currPixel);
niklase@google.com470e71d2011-07-07 08:21:25 +0000180 }
mikhal@webrtc.orgb43d8072013-10-03 16:42:41 +0000181 }
niklase@google.com470e71d2011-07-07 08:21:25 +0000182
mikhal@webrtc.orgb43d8072013-10-03 16:42:41 +0000183 // Default.
184 motion_magnitude_ = 0.0f;
niklase@google.com470e71d2011-07-07 08:21:25 +0000185
mflodman99ab9442015-12-07 22:54:50 -0800186 if (tempDiffSum == 0)
187 return VPM_OK;
niklase@google.com470e71d2011-07-07 08:21:25 +0000188
mikhal@webrtc.orgb43d8072013-10-03 16:42:41 +0000189 // Normalize over all pixels.
mflodman99ab9442015-12-07 22:54:50 -0800190 float const tempDiffAvg =
191 static_cast<float>(tempDiffSum) / static_cast<float>(num_pixels);
192 float const pixelSumAvg =
193 static_cast<float>(pixelSum) / static_cast<float>(num_pixels);
194 float const pixelSqSumAvg =
195 static_cast<float>(pixelSqSum) / static_cast<float>(num_pixels);
mikhal@webrtc.orgb43d8072013-10-03 16:42:41 +0000196 float contrast = pixelSqSumAvg - (pixelSumAvg * pixelSumAvg);
niklase@google.com470e71d2011-07-07 08:21:25 +0000197
mikhal@webrtc.orgb43d8072013-10-03 16:42:41 +0000198 if (contrast > 0.0) {
199 contrast = sqrt(contrast);
mflodman99ab9442015-12-07 22:54:50 -0800200 motion_magnitude_ = tempDiffAvg / contrast;
mikhal@webrtc.orgb43d8072013-10-03 16:42:41 +0000201 }
202 return VPM_OK;
niklase@google.com470e71d2011-07-07 08:21:25 +0000203}
204
frkoenig@google.com6d171c42011-08-15 15:56:23 +0000205// Compute spatial metrics:
206// To reduce complexity, we compute the metric for a reduced set of points.
207// The spatial metrics are rough estimates of the prediction error cost for
208// each QM spatial mode: 2x2,1x2,2x1
209// The metrics are a simple estimate of the up-sampling prediction error,
210// estimated assuming sub-sampling for decimation (no filtering),
211// and up-sampling back up with simple bilinear interpolation.
mikhal@webrtc.orgb43d8072013-10-03 16:42:41 +0000212int32_t VPMContentAnalysis::ComputeSpatialMetrics_C() {
213 const int sizei = height_;
214 const int sizej = width_;
niklase@google.com470e71d2011-07-07 08:21:25 +0000215
mikhal@webrtc.orgb43d8072013-10-03 16:42:41 +0000216 // Pixel mean square average: used to normalize the spatial metrics.
217 uint32_t pixelMSA = 0;
niklase@google.com470e71d2011-07-07 08:21:25 +0000218
mikhal@webrtc.orgb43d8072013-10-03 16:42:41 +0000219 uint32_t spatialErrSum = 0;
220 uint32_t spatialErrVSum = 0;
221 uint32_t spatialErrHSum = 0;
niklase@google.com470e71d2011-07-07 08:21:25 +0000222
mikhal@webrtc.orgb43d8072013-10-03 16:42:41 +0000223 // make sure work section is a multiple of 16
mflodman99ab9442015-12-07 22:54:50 -0800224 const int width_end = ((sizej - 2 * border_) & -16) + border_;
frkoenig@google.com6d171c42011-08-15 15:56:23 +0000225
mikhal@webrtc.orgb43d8072013-10-03 16:42:41 +0000226 for (int i = border_; i < sizei - border_; i += skip_num_) {
227 for (int j = border_; j < width_end; j++) {
mflodman99ab9442015-12-07 22:54:50 -0800228 int ssn1 = i * sizej + j;
229 int ssn2 = (i + 1) * sizej + j; // bottom
230 int ssn3 = (i - 1) * sizej + j; // top
231 int ssn4 = i * sizej + j + 1; // right
232 int ssn5 = i * sizej + j - 1; // left
niklase@google.com470e71d2011-07-07 08:21:25 +0000233
mflodman99ab9442015-12-07 22:54:50 -0800234 uint16_t refPixel1 = orig_frame_[ssn1] << 1;
235 uint16_t refPixel2 = orig_frame_[ssn1] << 2;
niklase@google.com470e71d2011-07-07 08:21:25 +0000236
mikhal@webrtc.orgb43d8072013-10-03 16:42:41 +0000237 uint8_t bottPixel = orig_frame_[ssn2];
238 uint8_t topPixel = orig_frame_[ssn3];
239 uint8_t rightPixel = orig_frame_[ssn4];
240 uint8_t leftPixel = orig_frame_[ssn5];
niklase@google.com470e71d2011-07-07 08:21:25 +0000241
mflodman99ab9442015-12-07 22:54:50 -0800242 spatialErrSum += static_cast<uint32_t>(abs(static_cast<int16_t>(
243 refPixel2 - static_cast<uint16_t>(bottPixel + topPixel + leftPixel +
244 rightPixel))));
245 spatialErrVSum += static_cast<uint32_t>(abs(static_cast<int16_t>(
246 refPixel1 - static_cast<uint16_t>(bottPixel + topPixel))));
247 spatialErrHSum += static_cast<uint32_t>(abs(static_cast<int16_t>(
248 refPixel1 - static_cast<uint16_t>(leftPixel + rightPixel))));
mikhal@webrtc.orgb43d8072013-10-03 16:42:41 +0000249 pixelMSA += orig_frame_[ssn1];
niklase@google.com470e71d2011-07-07 08:21:25 +0000250 }
mikhal@webrtc.orgb43d8072013-10-03 16:42:41 +0000251 }
niklase@google.com470e71d2011-07-07 08:21:25 +0000252
mikhal@webrtc.orgb43d8072013-10-03 16:42:41 +0000253 // Normalize over all pixels.
mflodman99ab9442015-12-07 22:54:50 -0800254 const float spatialErr = static_cast<float>(spatialErrSum >> 2);
255 const float spatialErrH = static_cast<float>(spatialErrHSum >> 1);
256 const float spatialErrV = static_cast<float>(spatialErrVSum >> 1);
257 const float norm = static_cast<float>(pixelMSA);
niklase@google.com470e71d2011-07-07 08:21:25 +0000258
mikhal@webrtc.orgb43d8072013-10-03 16:42:41 +0000259 // 2X2:
260 spatial_pred_err_ = spatialErr / norm;
261 // 1X2:
262 spatial_pred_err_h_ = spatialErrH / norm;
263 // 2X1:
264 spatial_pred_err_v_ = spatialErrV / norm;
265 return VPM_OK;
niklase@google.com470e71d2011-07-07 08:21:25 +0000266}
267
mikhal@webrtc.orgb43d8072013-10-03 16:42:41 +0000268VideoContentMetrics* VPMContentAnalysis::ContentMetrics() {
mflodman99ab9442015-12-07 22:54:50 -0800269 if (ca_Init_ == false)
270 return NULL;
niklase@google.com470e71d2011-07-07 08:21:25 +0000271
mikhal@webrtc.orgb43d8072013-10-03 16:42:41 +0000272 content_metrics_->spatial_pred_err = spatial_pred_err_;
273 content_metrics_->spatial_pred_err_h = spatial_pred_err_h_;
274 content_metrics_->spatial_pred_err_v = spatial_pred_err_v_;
275 // Motion metric: normalized temporal difference (MAD).
276 content_metrics_->motion_magnitude = motion_magnitude_;
marpan@webrtc.orgbd5648f2012-02-17 23:16:58 +0000277
mikhal@webrtc.orgb43d8072013-10-03 16:42:41 +0000278 return content_metrics_;
niklase@google.com470e71d2011-07-07 08:21:25 +0000279}
280
mikhal@webrtc.orgb43d8072013-10-03 16:42:41 +0000281} // namespace webrtc