pbos@webrtc.org | 788acd1 | 2014-12-15 09:41:24 +0000 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. |
| 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license |
| 5 | * that can be found in the LICENSE file in the root of the source |
| 6 | * tree. An additional intellectual property rights grant can be found |
| 7 | * in the file PATENTS. All contributing project authors may |
| 8 | * be found in the AUTHORS file in the root of the source tree. |
| 9 | */ |
| 10 | |
peah | bbe4233 | 2016-06-08 06:42:02 -0700 | [diff] [blame] | 11 | #include "webrtc/modules/audio_processing/agc/loudness_histogram.h" |
pbos@webrtc.org | 788acd1 | 2014-12-15 09:41:24 +0000 | [diff] [blame] | 12 | |
| 13 | #include <cmath> |
| 14 | #include <cstring> |
| 15 | |
Henrik Kjellander | ff761fb | 2015-11-04 08:31:52 +0100 | [diff] [blame] | 16 | #include "webrtc/modules/include/module_common_types.h" |
Edward Lemur | c20978e | 2017-07-06 19:44:34 +0200 | [diff] [blame] | 17 | #include "webrtc/rtc_base/checks.h" |
pbos@webrtc.org | 788acd1 | 2014-12-15 09:41:24 +0000 | [diff] [blame] | 18 | |
| 19 | namespace webrtc { |
| 20 | |
| 21 | static const double kHistBinCenters[] = { |
peah | bbe4233 | 2016-06-08 06:42:02 -0700 | [diff] [blame] | 22 | 7.59621091765857e-02, 9.02036021061016e-02, 1.07115112009343e-01, |
| 23 | 1.27197217770508e-01, 1.51044347572047e-01, 1.79362373905283e-01, |
| 24 | 2.12989507320644e-01, 2.52921107370304e-01, 3.00339145144454e-01, |
| 25 | 3.56647189489147e-01, 4.23511952494003e-01, 5.02912623991786e-01, |
| 26 | 5.97199455365749e-01, 7.09163326739184e-01, 8.42118356728544e-01, |
| 27 | 1.00000000000000e+00, 1.18748153630660e+00, 1.41011239906908e+00, |
| 28 | 1.67448243801153e+00, 1.98841697800836e+00, 2.36120844786349e+00, |
| 29 | 2.80389143520905e+00, 3.32956930911896e+00, 3.95380207843188e+00, |
| 30 | 4.69506696634852e+00, 5.57530533426190e+00, 6.62057214370769e+00, |
| 31 | 7.86180718043869e+00, 9.33575086877358e+00, 1.10860317842269e+01, |
| 32 | 1.31644580546776e+01, 1.56325508754123e+01, 1.85633655299256e+01, |
| 33 | 2.20436538184971e+01, 2.61764319021997e+01, 3.10840295702492e+01, |
| 34 | 3.69117111886792e+01, 4.38319755100383e+01, 5.20496616180135e+01, |
| 35 | 6.18080121423973e+01, 7.33958732149108e+01, 8.71562442838066e+01, |
| 36 | 1.03496430860848e+02, 1.22900100720889e+02, 1.45941600416277e+02, |
| 37 | 1.73302955873365e+02, 2.05794060286978e+02, 2.44376646872353e+02, |
| 38 | 2.90192756065437e+02, 3.44598539797631e+02, 4.09204403447902e+02, |
| 39 | 4.85922673669740e+02, 5.77024203055553e+02, 6.85205587130498e+02, |
| 40 | 8.13668983291589e+02, 9.66216894324125e+02, 1.14736472207740e+03, |
| 41 | 1.36247442287647e+03, 1.61791322085579e+03, 1.92124207711260e+03, |
| 42 | 2.28143949334655e+03, 2.70916727454970e+03, 3.21708611729384e+03, |
| 43 | 3.82023036499473e+03, 4.53645302286906e+03, 5.38695420497926e+03, |
| 44 | 6.39690865534207e+03, 7.59621091765857e+03, 9.02036021061016e+03, |
| 45 | 1.07115112009343e+04, 1.27197217770508e+04, 1.51044347572047e+04, |
| 46 | 1.79362373905283e+04, 2.12989507320644e+04, 2.52921107370304e+04, |
| 47 | 3.00339145144454e+04, 3.56647189489147e+04}; |
pbos@webrtc.org | 788acd1 | 2014-12-15 09:41:24 +0000 | [diff] [blame] | 48 | |
| 49 | static const double kProbQDomain = 1024.0; |
| 50 | // Loudness of -15 dB (smallest expected loudness) in log domain, |
| 51 | // loudness_db = 13.5 * log10(rms); |
| 52 | static const double kLogDomainMinBinCenter = -2.57752062648587; |
| 53 | // Loudness step of 1 dB in log domain |
peah | bbe4233 | 2016-06-08 06:42:02 -0700 | [diff] [blame] | 54 | static const double kLogDomainStepSizeInverse = 5.81954605750359; |
pbos@webrtc.org | 788acd1 | 2014-12-15 09:41:24 +0000 | [diff] [blame] | 55 | |
| 56 | static const int kTransientWidthThreshold = 7; |
| 57 | static const double kLowProbabilityThreshold = 0.2; |
| 58 | |
peah | bbe4233 | 2016-06-08 06:42:02 -0700 | [diff] [blame] | 59 | static const int kLowProbThresholdQ10 = |
| 60 | static_cast<int>(kLowProbabilityThreshold * kProbQDomain); |
pbos@webrtc.org | 788acd1 | 2014-12-15 09:41:24 +0000 | [diff] [blame] | 61 | |
peah | bbe4233 | 2016-06-08 06:42:02 -0700 | [diff] [blame] | 62 | LoudnessHistogram::LoudnessHistogram() |
pbos@webrtc.org | 788acd1 | 2014-12-15 09:41:24 +0000 | [diff] [blame] | 63 | : num_updates_(0), |
| 64 | audio_content_q10_(0), |
| 65 | bin_count_q10_(), |
| 66 | activity_probability_(), |
| 67 | hist_bin_index_(), |
| 68 | buffer_index_(0), |
| 69 | buffer_is_full_(false), |
| 70 | len_circular_buffer_(0), |
| 71 | len_high_activity_(0) { |
kwiberg@webrtc.org | 2ebfac5 | 2015-01-14 10:51:54 +0000 | [diff] [blame] | 72 | static_assert( |
| 73 | kHistSize == sizeof(kHistBinCenters) / sizeof(kHistBinCenters[0]), |
| 74 | "histogram bin centers incorrect size"); |
pbos@webrtc.org | 788acd1 | 2014-12-15 09:41:24 +0000 | [diff] [blame] | 75 | } |
| 76 | |
peah | bbe4233 | 2016-06-08 06:42:02 -0700 | [diff] [blame] | 77 | LoudnessHistogram::LoudnessHistogram(int window_size) |
pbos@webrtc.org | 788acd1 | 2014-12-15 09:41:24 +0000 | [diff] [blame] | 78 | : num_updates_(0), |
| 79 | audio_content_q10_(0), |
| 80 | bin_count_q10_(), |
| 81 | activity_probability_(new int[window_size]), |
| 82 | hist_bin_index_(new int[window_size]), |
| 83 | buffer_index_(0), |
| 84 | buffer_is_full_(false), |
| 85 | len_circular_buffer_(window_size), |
| 86 | len_high_activity_(0) {} |
| 87 | |
peah | bbe4233 | 2016-06-08 06:42:02 -0700 | [diff] [blame] | 88 | LoudnessHistogram::~LoudnessHistogram() {} |
pbos@webrtc.org | 788acd1 | 2014-12-15 09:41:24 +0000 | [diff] [blame] | 89 | |
peah | bbe4233 | 2016-06-08 06:42:02 -0700 | [diff] [blame] | 90 | void LoudnessHistogram::Update(double rms, double activity_probaility) { |
pbos@webrtc.org | 788acd1 | 2014-12-15 09:41:24 +0000 | [diff] [blame] | 91 | // If circular histogram is activated then remove the oldest entry. |
| 92 | if (len_circular_buffer_ > 0) |
| 93 | RemoveOldestEntryAndUpdate(); |
| 94 | |
| 95 | // Find the corresponding bin. |
| 96 | int hist_index = GetBinIndex(rms); |
| 97 | // To Q10 domain. |
peah | bbe4233 | 2016-06-08 06:42:02 -0700 | [diff] [blame] | 98 | int prob_q10 = |
| 99 | static_cast<int16_t>(floor(activity_probaility * kProbQDomain)); |
pbos@webrtc.org | 788acd1 | 2014-12-15 09:41:24 +0000 | [diff] [blame] | 100 | InsertNewestEntryAndUpdate(prob_q10, hist_index); |
| 101 | } |
| 102 | |
| 103 | // Doing nothing if buffer is not full, yet. |
peah | bbe4233 | 2016-06-08 06:42:02 -0700 | [diff] [blame] | 104 | void LoudnessHistogram::RemoveOldestEntryAndUpdate() { |
kwiberg | 9e2be5f | 2016-09-14 05:23:22 -0700 | [diff] [blame] | 105 | RTC_DCHECK_GT(len_circular_buffer_, 0); |
pbos@webrtc.org | 788acd1 | 2014-12-15 09:41:24 +0000 | [diff] [blame] | 106 | // Do nothing if circular buffer is not full. |
| 107 | if (!buffer_is_full_) |
| 108 | return; |
| 109 | |
| 110 | int oldest_prob = activity_probability_[buffer_index_]; |
| 111 | int oldest_hist_index = hist_bin_index_[buffer_index_]; |
| 112 | UpdateHist(-oldest_prob, oldest_hist_index); |
| 113 | } |
| 114 | |
peah | bbe4233 | 2016-06-08 06:42:02 -0700 | [diff] [blame] | 115 | void LoudnessHistogram::RemoveTransient() { |
pbos@webrtc.org | 788acd1 | 2014-12-15 09:41:24 +0000 | [diff] [blame] | 116 | // Don't expect to be here if high-activity region is longer than |
| 117 | // |kTransientWidthThreshold| or there has not been any transient. |
kwiberg | 9e2be5f | 2016-09-14 05:23:22 -0700 | [diff] [blame] | 118 | RTC_DCHECK_LE(len_high_activity_, kTransientWidthThreshold); |
peah | bbe4233 | 2016-06-08 06:42:02 -0700 | [diff] [blame] | 119 | int index = |
| 120 | (buffer_index_ > 0) ? (buffer_index_ - 1) : len_circular_buffer_ - 1; |
pbos@webrtc.org | 788acd1 | 2014-12-15 09:41:24 +0000 | [diff] [blame] | 121 | while (len_high_activity_ > 0) { |
| 122 | UpdateHist(-activity_probability_[index], hist_bin_index_[index]); |
| 123 | activity_probability_[index] = 0; |
| 124 | index = (index > 0) ? (index - 1) : (len_circular_buffer_ - 1); |
| 125 | len_high_activity_--; |
| 126 | } |
| 127 | } |
| 128 | |
peah | bbe4233 | 2016-06-08 06:42:02 -0700 | [diff] [blame] | 129 | void LoudnessHistogram::InsertNewestEntryAndUpdate(int activity_prob_q10, |
| 130 | int hist_index) { |
pbos@webrtc.org | 788acd1 | 2014-12-15 09:41:24 +0000 | [diff] [blame] | 131 | // Update the circular buffer if it is enabled. |
| 132 | if (len_circular_buffer_ > 0) { |
| 133 | // Removing transient. |
| 134 | if (activity_prob_q10 <= kLowProbThresholdQ10) { |
| 135 | // Lower than threshold probability, set it to zero. |
| 136 | activity_prob_q10 = 0; |
| 137 | // Check if this has been a transient. |
| 138 | if (len_high_activity_ <= kTransientWidthThreshold) |
| 139 | RemoveTransient(); // Remove this transient. |
| 140 | len_high_activity_ = 0; |
| 141 | } else if (len_high_activity_ <= kTransientWidthThreshold) { |
| 142 | len_high_activity_++; |
| 143 | } |
| 144 | // Updating the circular buffer. |
| 145 | activity_probability_[buffer_index_] = activity_prob_q10; |
| 146 | hist_bin_index_[buffer_index_] = hist_index; |
| 147 | // Increment the buffer index and check for wrap-around. |
| 148 | buffer_index_++; |
| 149 | if (buffer_index_ >= len_circular_buffer_) { |
| 150 | buffer_index_ = 0; |
| 151 | buffer_is_full_ = true; |
| 152 | } |
| 153 | } |
| 154 | |
| 155 | num_updates_++; |
| 156 | if (num_updates_ < 0) |
| 157 | num_updates_--; |
| 158 | |
| 159 | UpdateHist(activity_prob_q10, hist_index); |
| 160 | } |
| 161 | |
peah | bbe4233 | 2016-06-08 06:42:02 -0700 | [diff] [blame] | 162 | void LoudnessHistogram::UpdateHist(int activity_prob_q10, int hist_index) { |
pbos@webrtc.org | 788acd1 | 2014-12-15 09:41:24 +0000 | [diff] [blame] | 163 | bin_count_q10_[hist_index] += activity_prob_q10; |
| 164 | audio_content_q10_ += activity_prob_q10; |
| 165 | } |
| 166 | |
peah | bbe4233 | 2016-06-08 06:42:02 -0700 | [diff] [blame] | 167 | double LoudnessHistogram::AudioContent() const { |
pbos@webrtc.org | 788acd1 | 2014-12-15 09:41:24 +0000 | [diff] [blame] | 168 | return audio_content_q10_ / kProbQDomain; |
| 169 | } |
| 170 | |
peah | bbe4233 | 2016-06-08 06:42:02 -0700 | [diff] [blame] | 171 | LoudnessHistogram* LoudnessHistogram::Create() { |
| 172 | return new LoudnessHistogram; |
pbos@webrtc.org | 788acd1 | 2014-12-15 09:41:24 +0000 | [diff] [blame] | 173 | } |
| 174 | |
peah | bbe4233 | 2016-06-08 06:42:02 -0700 | [diff] [blame] | 175 | LoudnessHistogram* LoudnessHistogram::Create(int window_size) { |
pbos@webrtc.org | 788acd1 | 2014-12-15 09:41:24 +0000 | [diff] [blame] | 176 | if (window_size < 0) |
| 177 | return NULL; |
peah | bbe4233 | 2016-06-08 06:42:02 -0700 | [diff] [blame] | 178 | return new LoudnessHistogram(window_size); |
pbos@webrtc.org | 788acd1 | 2014-12-15 09:41:24 +0000 | [diff] [blame] | 179 | } |
| 180 | |
peah | bbe4233 | 2016-06-08 06:42:02 -0700 | [diff] [blame] | 181 | void LoudnessHistogram::Reset() { |
pbos@webrtc.org | 788acd1 | 2014-12-15 09:41:24 +0000 | [diff] [blame] | 182 | // Reset the histogram, audio-content and number of updates. |
| 183 | memset(bin_count_q10_, 0, sizeof(bin_count_q10_)); |
| 184 | audio_content_q10_ = 0; |
| 185 | num_updates_ = 0; |
| 186 | // Empty the circular buffer. |
| 187 | buffer_index_ = 0; |
| 188 | buffer_is_full_ = false; |
| 189 | len_high_activity_ = 0; |
| 190 | } |
| 191 | |
peah | bbe4233 | 2016-06-08 06:42:02 -0700 | [diff] [blame] | 192 | int LoudnessHistogram::GetBinIndex(double rms) { |
pbos@webrtc.org | 788acd1 | 2014-12-15 09:41:24 +0000 | [diff] [blame] | 193 | // First exclude overload cases. |
| 194 | if (rms <= kHistBinCenters[0]) { |
| 195 | return 0; |
| 196 | } else if (rms >= kHistBinCenters[kHistSize - 1]) { |
| 197 | return kHistSize - 1; |
| 198 | } else { |
| 199 | // The quantizer is uniform in log domain. Alternatively we could do binary |
| 200 | // search in linear domain. |
| 201 | double rms_log = log(rms); |
| 202 | |
peah | bbe4233 | 2016-06-08 06:42:02 -0700 | [diff] [blame] | 203 | int index = static_cast<int>( |
| 204 | floor((rms_log - kLogDomainMinBinCenter) * kLogDomainStepSizeInverse)); |
pbos@webrtc.org | 788acd1 | 2014-12-15 09:41:24 +0000 | [diff] [blame] | 205 | // The final decision is in linear domain. |
| 206 | double b = 0.5 * (kHistBinCenters[index] + kHistBinCenters[index + 1]); |
| 207 | if (rms > b) { |
| 208 | return index + 1; |
| 209 | } |
| 210 | return index; |
| 211 | } |
| 212 | } |
| 213 | |
peah | bbe4233 | 2016-06-08 06:42:02 -0700 | [diff] [blame] | 214 | double LoudnessHistogram::CurrentRms() const { |
pbos@webrtc.org | 788acd1 | 2014-12-15 09:41:24 +0000 | [diff] [blame] | 215 | double p; |
| 216 | double mean_val = 0; |
| 217 | if (audio_content_q10_ > 0) { |
| 218 | double p_total_inverse = 1. / static_cast<double>(audio_content_q10_); |
| 219 | for (int n = 0; n < kHistSize; n++) { |
| 220 | p = static_cast<double>(bin_count_q10_[n]) * p_total_inverse; |
| 221 | mean_val += p * kHistBinCenters[n]; |
| 222 | } |
| 223 | } else { |
| 224 | mean_val = kHistBinCenters[0]; |
| 225 | } |
| 226 | return mean_val; |
| 227 | } |
| 228 | |
| 229 | } // namespace webrtc |