peah | ca4cac7 | 2016-06-29 15:26:12 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. |
| 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license |
| 5 | * that can be found in the LICENSE file in the root of the source |
| 6 | * tree. An additional intellectual property rights grant can be found |
| 7 | * in the file PATENTS. All contributing project authors may |
| 8 | * be found in the AUTHORS file in the root of the source tree. |
| 9 | */ |
| 10 | |
Mirko Bonadei | 92ea95e | 2017-09-15 06:47:31 +0200 | [diff] [blame] | 11 | #include "modules/audio_processing/level_controller/level_controller.h" |
peah | ca4cac7 | 2016-06-29 15:26:12 -0700 | [diff] [blame] | 12 | |
| 13 | #include <math.h> |
| 14 | #include <algorithm> |
| 15 | #include <numeric> |
| 16 | |
Mirko Bonadei | 92ea95e | 2017-09-15 06:47:31 +0200 | [diff] [blame] | 17 | #include "api/array_view.h" |
| 18 | #include "modules/audio_processing/audio_buffer.h" |
| 19 | #include "modules/audio_processing/level_controller/gain_applier.h" |
| 20 | #include "modules/audio_processing/level_controller/gain_selector.h" |
| 21 | #include "modules/audio_processing/level_controller/noise_level_estimator.h" |
| 22 | #include "modules/audio_processing/level_controller/peak_level_estimator.h" |
| 23 | #include "modules/audio_processing/level_controller/saturating_gain_estimator.h" |
| 24 | #include "modules/audio_processing/level_controller/signal_classifier.h" |
| 25 | #include "modules/audio_processing/logging/apm_data_dumper.h" |
| 26 | #include "rtc_base/arraysize.h" |
| 27 | #include "rtc_base/checks.h" |
| 28 | #include "rtc_base/logging.h" |
| 29 | #include "system_wrappers/include/metrics.h" |
peah | ca4cac7 | 2016-06-29 15:26:12 -0700 | [diff] [blame] | 30 | |
| 31 | namespace webrtc { |
| 32 | namespace { |
| 33 | |
| 34 | void UpdateAndRemoveDcLevel(float forgetting_factor, |
| 35 | float* dc_level, |
| 36 | rtc::ArrayView<float> x) { |
| 37 | RTC_DCHECK(!x.empty()); |
| 38 | float mean = |
maxmorin | 3f746ea | 2016-08-25 04:00:20 -0700 | [diff] [blame] | 39 | std::accumulate(x.begin(), x.end(), 0.0f) / static_cast<float>(x.size()); |
peah | ca4cac7 | 2016-06-29 15:26:12 -0700 | [diff] [blame] | 40 | *dc_level += forgetting_factor * (mean - *dc_level); |
| 41 | |
| 42 | for (float& v : x) { |
| 43 | v -= *dc_level; |
| 44 | } |
| 45 | } |
| 46 | |
| 47 | float FrameEnergy(const AudioBuffer& audio) { |
| 48 | float energy = 0.f; |
| 49 | for (size_t k = 0; k < audio.num_channels(); ++k) { |
| 50 | float channel_energy = |
| 51 | std::accumulate(audio.channels_const_f()[k], |
oprypin | 30431d5 | 2017-09-05 09:49:30 -0700 | [diff] [blame] | 52 | audio.channels_const_f()[k] + audio.num_frames(), 0.f, |
peah | ca4cac7 | 2016-06-29 15:26:12 -0700 | [diff] [blame] | 53 | [](float a, float b) -> float { return a + b * b; }); |
| 54 | energy = std::max(channel_energy, energy); |
| 55 | } |
| 56 | return energy; |
| 57 | } |
| 58 | |
| 59 | float PeakLevel(const AudioBuffer& audio) { |
| 60 | float peak_level = 0.f; |
| 61 | for (size_t k = 0; k < audio.num_channels(); ++k) { |
kjellander | 7c85658 | 2017-02-26 19:53:40 -0800 | [diff] [blame] | 62 | auto* channel_peak_level = std::max_element( |
peah | ca4cac7 | 2016-06-29 15:26:12 -0700 | [diff] [blame] | 63 | audio.channels_const_f()[k], |
| 64 | audio.channels_const_f()[k] + audio.num_frames(), |
| 65 | [](float a, float b) { return std::abs(a) < std::abs(b); }); |
| 66 | peak_level = std::max(*channel_peak_level, peak_level); |
| 67 | } |
| 68 | return peak_level; |
| 69 | } |
| 70 | |
| 71 | const int kMetricsFrameInterval = 1000; |
| 72 | |
| 73 | } // namespace |
| 74 | |
| 75 | int LevelController::instance_count_ = 0; |
| 76 | |
| 77 | void LevelController::Metrics::Initialize(int sample_rate_hz) { |
| 78 | RTC_DCHECK(sample_rate_hz == AudioProcessing::kSampleRate8kHz || |
| 79 | sample_rate_hz == AudioProcessing::kSampleRate16kHz || |
| 80 | sample_rate_hz == AudioProcessing::kSampleRate32kHz || |
| 81 | sample_rate_hz == AudioProcessing::kSampleRate48kHz); |
| 82 | |
| 83 | Reset(); |
| 84 | frame_length_ = rtc::CheckedDivExact(sample_rate_hz, 100); |
| 85 | } |
| 86 | |
| 87 | void LevelController::Metrics::Reset() { |
| 88 | metrics_frame_counter_ = 0; |
| 89 | gain_sum_ = 0.f; |
| 90 | peak_level_sum_ = 0.f; |
| 91 | noise_energy_sum_ = 0.f; |
| 92 | max_gain_ = 0.f; |
| 93 | max_peak_level_ = 0.f; |
| 94 | max_noise_energy_ = 0.f; |
| 95 | } |
| 96 | |
peah | 3026ee8 | 2016-08-26 11:15:47 -0700 | [diff] [blame] | 97 | void LevelController::Metrics::Update(float long_term_peak_level, |
peah | ca4cac7 | 2016-06-29 15:26:12 -0700 | [diff] [blame] | 98 | float noise_energy, |
peah | 3026ee8 | 2016-08-26 11:15:47 -0700 | [diff] [blame] | 99 | float gain, |
| 100 | float frame_peak_level) { |
peah | ca4cac7 | 2016-06-29 15:26:12 -0700 | [diff] [blame] | 101 | const float kdBFSOffset = 90.3090f; |
| 102 | gain_sum_ += gain; |
peah | 3026ee8 | 2016-08-26 11:15:47 -0700 | [diff] [blame] | 103 | peak_level_sum_ += long_term_peak_level; |
peah | ca4cac7 | 2016-06-29 15:26:12 -0700 | [diff] [blame] | 104 | noise_energy_sum_ += noise_energy; |
| 105 | max_gain_ = std::max(max_gain_, gain); |
peah | 3026ee8 | 2016-08-26 11:15:47 -0700 | [diff] [blame] | 106 | max_peak_level_ = std::max(max_peak_level_, long_term_peak_level); |
peah | ca4cac7 | 2016-06-29 15:26:12 -0700 | [diff] [blame] | 107 | max_noise_energy_ = std::max(max_noise_energy_, noise_energy); |
| 108 | |
| 109 | ++metrics_frame_counter_; |
| 110 | if (metrics_frame_counter_ == kMetricsFrameInterval) { |
peah | 3026ee8 | 2016-08-26 11:15:47 -0700 | [diff] [blame] | 111 | RTC_DCHECK_LT(0, frame_length_); |
| 112 | RTC_DCHECK_LT(0, kMetricsFrameInterval); |
peah | ca4cac7 | 2016-06-29 15:26:12 -0700 | [diff] [blame] | 113 | |
peah | 3026ee8 | 2016-08-26 11:15:47 -0700 | [diff] [blame] | 114 | const int max_noise_power_dbfs = static_cast<int>( |
| 115 | 10 * log10(max_noise_energy_ / frame_length_ + 1e-10f) - kdBFSOffset); |
| 116 | RTC_HISTOGRAM_COUNTS("WebRTC.Audio.LevelControl.MaxNoisePower", |
| 117 | max_noise_power_dbfs, -90, 0, 50); |
| 118 | |
| 119 | const int average_noise_power_dbfs = static_cast<int>( |
| 120 | 10 * log10(noise_energy_sum_ / (frame_length_ * kMetricsFrameInterval) + |
| 121 | 1e-10f) - |
| 122 | kdBFSOffset); |
| 123 | RTC_HISTOGRAM_COUNTS("WebRTC.Audio.LevelControl.AverageNoisePower", |
| 124 | average_noise_power_dbfs, -90, 0, 50); |
| 125 | |
| 126 | const int max_peak_level_dbfs = static_cast<int>( |
| 127 | 10 * log10(max_peak_level_ * max_peak_level_ + 1e-10f) - kdBFSOffset); |
| 128 | RTC_HISTOGRAM_COUNTS("WebRTC.Audio.LevelControl.MaxPeakLevel", |
| 129 | max_peak_level_dbfs, -90, 0, 50); |
| 130 | |
| 131 | const int average_peak_level_dbfs = static_cast<int>( |
| 132 | 10 * log10(peak_level_sum_ * peak_level_sum_ / |
| 133 | (kMetricsFrameInterval * kMetricsFrameInterval) + |
| 134 | 1e-10f) - |
| 135 | kdBFSOffset); |
| 136 | RTC_HISTOGRAM_COUNTS("WebRTC.Audio.LevelControl.AveragePeakLevel", |
| 137 | average_peak_level_dbfs, -90, 0, 50); |
peah | ca4cac7 | 2016-06-29 15:26:12 -0700 | [diff] [blame] | 138 | |
| 139 | RTC_DCHECK_LE(1.f, max_gain_); |
| 140 | RTC_DCHECK_LE(1.f, gain_sum_ / kMetricsFrameInterval); |
peah | 3026ee8 | 2016-08-26 11:15:47 -0700 | [diff] [blame] | 141 | |
| 142 | const int max_gain_db = static_cast<int>(10 * log10(max_gain_ * max_gain_)); |
| 143 | RTC_HISTOGRAM_COUNTS("WebRTC.Audio.LevelControl.MaxGain", max_gain_db, 0, |
| 144 | 33, 30); |
| 145 | |
| 146 | const int average_gain_db = static_cast<int>( |
| 147 | 10 * log10(gain_sum_ * gain_sum_ / |
| 148 | (kMetricsFrameInterval * kMetricsFrameInterval))); |
peah | ca4cac7 | 2016-06-29 15:26:12 -0700 | [diff] [blame] | 149 | RTC_HISTOGRAM_COUNTS("WebRTC.Audio.LevelControl.AverageGain", |
peah | 3026ee8 | 2016-08-26 11:15:47 -0700 | [diff] [blame] | 150 | average_gain_db, 0, 33, 30); |
| 151 | |
| 152 | const int long_term_peak_level_dbfs = static_cast<int>( |
| 153 | 10 * log10(long_term_peak_level * long_term_peak_level + 1e-10f) - |
| 154 | kdBFSOffset); |
| 155 | |
| 156 | const int frame_peak_level_dbfs = static_cast<int>( |
| 157 | 10 * log10(frame_peak_level * frame_peak_level + 1e-10f) - kdBFSOffset); |
| 158 | |
Mirko Bonadei | 675513b | 2017-11-09 11:09:25 +0100 | [diff] [blame^] | 159 | RTC_LOG(LS_INFO) << "Level Controller metrics: {" |
| 160 | << "Max noise power: " << max_noise_power_dbfs << " dBFS, " |
| 161 | << "Average noise power: " << average_noise_power_dbfs |
| 162 | << " dBFS, " |
| 163 | << "Max long term peak level: " << max_peak_level_dbfs |
| 164 | << " dBFS, " |
| 165 | << "Average long term peak level: " |
| 166 | << average_peak_level_dbfs << " dBFS, " |
| 167 | << "Max gain: " << max_gain_db << " dB, " |
| 168 | << "Average gain: " << average_gain_db << " dB, " |
| 169 | << "Long term peak level: " << long_term_peak_level_dbfs |
| 170 | << " dBFS, " |
| 171 | << "Last frame peak level: " << frame_peak_level_dbfs |
| 172 | << " dBFS" |
| 173 | << "}"; |
peah | 3026ee8 | 2016-08-26 11:15:47 -0700 | [diff] [blame] | 174 | |
peah | ca4cac7 | 2016-06-29 15:26:12 -0700 | [diff] [blame] | 175 | Reset(); |
| 176 | } |
| 177 | } |
| 178 | |
| 179 | LevelController::LevelController() |
| 180 | : data_dumper_(new ApmDataDumper(instance_count_)), |
| 181 | gain_applier_(data_dumper_.get()), |
peah | c19f312 | 2016-10-07 14:54:10 -0700 | [diff] [blame] | 182 | signal_classifier_(data_dumper_.get()), |
| 183 | peak_level_estimator_(kTargetLcPeakLeveldBFS) { |
peah | ca4cac7 | 2016-06-29 15:26:12 -0700 | [diff] [blame] | 184 | Initialize(AudioProcessing::kSampleRate48kHz); |
| 185 | ++instance_count_; |
| 186 | } |
| 187 | |
| 188 | LevelController::~LevelController() {} |
| 189 | |
| 190 | void LevelController::Initialize(int sample_rate_hz) { |
| 191 | RTC_DCHECK(sample_rate_hz == AudioProcessing::kSampleRate8kHz || |
| 192 | sample_rate_hz == AudioProcessing::kSampleRate16kHz || |
| 193 | sample_rate_hz == AudioProcessing::kSampleRate32kHz || |
| 194 | sample_rate_hz == AudioProcessing::kSampleRate48kHz); |
| 195 | data_dumper_->InitiateNewSetOfRecordings(); |
| 196 | gain_selector_.Initialize(sample_rate_hz); |
| 197 | gain_applier_.Initialize(sample_rate_hz); |
| 198 | signal_classifier_.Initialize(sample_rate_hz); |
| 199 | noise_level_estimator_.Initialize(sample_rate_hz); |
peah | c19f312 | 2016-10-07 14:54:10 -0700 | [diff] [blame] | 200 | peak_level_estimator_.Initialize(config_.initial_peak_level_dbfs); |
peah | ca4cac7 | 2016-06-29 15:26:12 -0700 | [diff] [blame] | 201 | saturating_gain_estimator_.Initialize(); |
| 202 | metrics_.Initialize(sample_rate_hz); |
| 203 | |
| 204 | last_gain_ = 1.0f; |
| 205 | sample_rate_hz_ = rtc::Optional<int>(sample_rate_hz); |
| 206 | dc_forgetting_factor_ = 0.01f * sample_rate_hz / 48000.f; |
| 207 | std::fill(dc_level_, dc_level_ + arraysize(dc_level_), 0.f); |
| 208 | } |
| 209 | |
| 210 | void LevelController::Process(AudioBuffer* audio) { |
kwiberg | af476c7 | 2016-11-28 15:21:39 -0800 | [diff] [blame] | 211 | RTC_DCHECK_LT(0, audio->num_channels()); |
| 212 | RTC_DCHECK_GE(2, audio->num_channels()); |
peah | ca4cac7 | 2016-06-29 15:26:12 -0700 | [diff] [blame] | 213 | RTC_DCHECK_NE(0.f, dc_forgetting_factor_); |
| 214 | RTC_DCHECK(sample_rate_hz_); |
| 215 | data_dumper_->DumpWav("lc_input", audio->num_frames(), |
| 216 | audio->channels_const_f()[0], *sample_rate_hz_, 1); |
| 217 | |
| 218 | // Remove DC level. |
| 219 | for (size_t k = 0; k < audio->num_channels(); ++k) { |
| 220 | UpdateAndRemoveDcLevel( |
| 221 | dc_forgetting_factor_, &dc_level_[k], |
| 222 | rtc::ArrayView<float>(audio->channels_f()[k], audio->num_frames())); |
| 223 | } |
| 224 | |
| 225 | SignalClassifier::SignalType signal_type; |
| 226 | signal_classifier_.Analyze(*audio, &signal_type); |
| 227 | int tmp = static_cast<int>(signal_type); |
| 228 | data_dumper_->DumpRaw("lc_signal_type", 1, &tmp); |
| 229 | |
| 230 | // Estimate the noise energy. |
| 231 | float noise_energy = |
| 232 | noise_level_estimator_.Analyze(signal_type, FrameEnergy(*audio)); |
| 233 | |
| 234 | // Estimate the overall signal peak level. |
peah | 3026ee8 | 2016-08-26 11:15:47 -0700 | [diff] [blame] | 235 | const float frame_peak_level = PeakLevel(*audio); |
| 236 | const float long_term_peak_level = |
| 237 | peak_level_estimator_.Analyze(signal_type, frame_peak_level); |
peah | ca4cac7 | 2016-06-29 15:26:12 -0700 | [diff] [blame] | 238 | |
| 239 | float saturating_gain = saturating_gain_estimator_.GetGain(); |
| 240 | |
| 241 | // Compute the new gain to apply. |
peah | c19f312 | 2016-10-07 14:54:10 -0700 | [diff] [blame] | 242 | last_gain_ = |
| 243 | gain_selector_.GetNewGain(long_term_peak_level, noise_energy, |
| 244 | saturating_gain, gain_jumpstart_, signal_type); |
| 245 | |
| 246 | // Unflag the jumpstart of the gain as it should only happen once. |
| 247 | gain_jumpstart_ = false; |
peah | ca4cac7 | 2016-06-29 15:26:12 -0700 | [diff] [blame] | 248 | |
| 249 | // Apply the gain to the signal. |
| 250 | int num_saturations = gain_applier_.Process(last_gain_, audio); |
| 251 | |
| 252 | // Estimate the gain that saturates the overall signal. |
| 253 | saturating_gain_estimator_.Update(last_gain_, num_saturations); |
| 254 | |
| 255 | // Update the metrics. |
peah | 3026ee8 | 2016-08-26 11:15:47 -0700 | [diff] [blame] | 256 | metrics_.Update(long_term_peak_level, noise_energy, last_gain_, |
| 257 | frame_peak_level); |
peah | ca4cac7 | 2016-06-29 15:26:12 -0700 | [diff] [blame] | 258 | |
| 259 | data_dumper_->DumpRaw("lc_selected_gain", 1, &last_gain_); |
| 260 | data_dumper_->DumpRaw("lc_noise_energy", 1, &noise_energy); |
peah | 3026ee8 | 2016-08-26 11:15:47 -0700 | [diff] [blame] | 261 | data_dumper_->DumpRaw("lc_peak_level", 1, &long_term_peak_level); |
peah | ca4cac7 | 2016-06-29 15:26:12 -0700 | [diff] [blame] | 262 | data_dumper_->DumpRaw("lc_saturating_gain", 1, &saturating_gain); |
| 263 | |
| 264 | data_dumper_->DumpWav("lc_output", audio->num_frames(), |
| 265 | audio->channels_f()[0], *sample_rate_hz_, 1); |
| 266 | } |
| 267 | |
peah | c19f312 | 2016-10-07 14:54:10 -0700 | [diff] [blame] | 268 | void LevelController::ApplyConfig( |
| 269 | const AudioProcessing::Config::LevelController& config) { |
| 270 | RTC_DCHECK(Validate(config)); |
| 271 | config_ = config; |
| 272 | peak_level_estimator_.Initialize(config_.initial_peak_level_dbfs); |
| 273 | gain_jumpstart_ = true; |
| 274 | } |
| 275 | |
peah | 88ac853 | 2016-09-12 16:47:25 -0700 | [diff] [blame] | 276 | std::string LevelController::ToString( |
| 277 | const AudioProcessing::Config::LevelController& config) { |
| 278 | std::stringstream ss; |
| 279 | ss << "{" |
peah | c19f312 | 2016-10-07 14:54:10 -0700 | [diff] [blame] | 280 | << "enabled: " << (config.enabled ? "true" : "false") << ", " |
| 281 | << "initial_peak_level_dbfs: " << config.initial_peak_level_dbfs << "}"; |
peah | 88ac853 | 2016-09-12 16:47:25 -0700 | [diff] [blame] | 282 | return ss.str(); |
| 283 | } |
| 284 | |
| 285 | bool LevelController::Validate( |
| 286 | const AudioProcessing::Config::LevelController& config) { |
peah | c19f312 | 2016-10-07 14:54:10 -0700 | [diff] [blame] | 287 | return (config.initial_peak_level_dbfs < |
| 288 | std::numeric_limits<float>::epsilon() && |
| 289 | config.initial_peak_level_dbfs > |
| 290 | -(100.f + std::numeric_limits<float>::epsilon())); |
peah | 88ac853 | 2016-09-12 16:47:25 -0700 | [diff] [blame] | 291 | } |
| 292 | |
peah | ca4cac7 | 2016-06-29 15:26:12 -0700 | [diff] [blame] | 293 | } // namespace webrtc |