blob: a600ac7bf8d3042a831b199e2f8925cc6fd97433 [file] [log] [blame]
peahca4cac72016-06-29 15:26:12 -07001/*
2 * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020011#include "modules/audio_processing/level_controller/level_controller.h"
peahca4cac72016-06-29 15:26:12 -070012
13#include <math.h>
14#include <algorithm>
15#include <numeric>
16
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020017#include "api/array_view.h"
18#include "modules/audio_processing/audio_buffer.h"
19#include "modules/audio_processing/level_controller/gain_applier.h"
20#include "modules/audio_processing/level_controller/gain_selector.h"
21#include "modules/audio_processing/level_controller/noise_level_estimator.h"
22#include "modules/audio_processing/level_controller/peak_level_estimator.h"
23#include "modules/audio_processing/level_controller/saturating_gain_estimator.h"
24#include "modules/audio_processing/level_controller/signal_classifier.h"
25#include "modules/audio_processing/logging/apm_data_dumper.h"
26#include "rtc_base/arraysize.h"
27#include "rtc_base/checks.h"
28#include "rtc_base/logging.h"
29#include "system_wrappers/include/metrics.h"
peahca4cac72016-06-29 15:26:12 -070030
31namespace webrtc {
32namespace {
33
34void UpdateAndRemoveDcLevel(float forgetting_factor,
35 float* dc_level,
36 rtc::ArrayView<float> x) {
37 RTC_DCHECK(!x.empty());
38 float mean =
maxmorin3f746ea2016-08-25 04:00:20 -070039 std::accumulate(x.begin(), x.end(), 0.0f) / static_cast<float>(x.size());
peahca4cac72016-06-29 15:26:12 -070040 *dc_level += forgetting_factor * (mean - *dc_level);
41
42 for (float& v : x) {
43 v -= *dc_level;
44 }
45}
46
47float FrameEnergy(const AudioBuffer& audio) {
48 float energy = 0.f;
49 for (size_t k = 0; k < audio.num_channels(); ++k) {
50 float channel_energy =
51 std::accumulate(audio.channels_const_f()[k],
oprypin30431d52017-09-05 09:49:30 -070052 audio.channels_const_f()[k] + audio.num_frames(), 0.f,
peahca4cac72016-06-29 15:26:12 -070053 [](float a, float b) -> float { return a + b * b; });
54 energy = std::max(channel_energy, energy);
55 }
56 return energy;
57}
58
59float PeakLevel(const AudioBuffer& audio) {
60 float peak_level = 0.f;
61 for (size_t k = 0; k < audio.num_channels(); ++k) {
kjellander7c856582017-02-26 19:53:40 -080062 auto* channel_peak_level = std::max_element(
peahca4cac72016-06-29 15:26:12 -070063 audio.channels_const_f()[k],
64 audio.channels_const_f()[k] + audio.num_frames(),
65 [](float a, float b) { return std::abs(a) < std::abs(b); });
66 peak_level = std::max(*channel_peak_level, peak_level);
67 }
68 return peak_level;
69}
70
71const int kMetricsFrameInterval = 1000;
72
73} // namespace
74
75int LevelController::instance_count_ = 0;
76
77void LevelController::Metrics::Initialize(int sample_rate_hz) {
78 RTC_DCHECK(sample_rate_hz == AudioProcessing::kSampleRate8kHz ||
79 sample_rate_hz == AudioProcessing::kSampleRate16kHz ||
80 sample_rate_hz == AudioProcessing::kSampleRate32kHz ||
81 sample_rate_hz == AudioProcessing::kSampleRate48kHz);
82
83 Reset();
84 frame_length_ = rtc::CheckedDivExact(sample_rate_hz, 100);
85}
86
87void LevelController::Metrics::Reset() {
88 metrics_frame_counter_ = 0;
89 gain_sum_ = 0.f;
90 peak_level_sum_ = 0.f;
91 noise_energy_sum_ = 0.f;
92 max_gain_ = 0.f;
93 max_peak_level_ = 0.f;
94 max_noise_energy_ = 0.f;
95}
96
peah3026ee82016-08-26 11:15:47 -070097void LevelController::Metrics::Update(float long_term_peak_level,
peahca4cac72016-06-29 15:26:12 -070098 float noise_energy,
peah3026ee82016-08-26 11:15:47 -070099 float gain,
100 float frame_peak_level) {
peahca4cac72016-06-29 15:26:12 -0700101 const float kdBFSOffset = 90.3090f;
102 gain_sum_ += gain;
peah3026ee82016-08-26 11:15:47 -0700103 peak_level_sum_ += long_term_peak_level;
peahca4cac72016-06-29 15:26:12 -0700104 noise_energy_sum_ += noise_energy;
105 max_gain_ = std::max(max_gain_, gain);
peah3026ee82016-08-26 11:15:47 -0700106 max_peak_level_ = std::max(max_peak_level_, long_term_peak_level);
peahca4cac72016-06-29 15:26:12 -0700107 max_noise_energy_ = std::max(max_noise_energy_, noise_energy);
108
109 ++metrics_frame_counter_;
110 if (metrics_frame_counter_ == kMetricsFrameInterval) {
peah3026ee82016-08-26 11:15:47 -0700111 RTC_DCHECK_LT(0, frame_length_);
112 RTC_DCHECK_LT(0, kMetricsFrameInterval);
peahca4cac72016-06-29 15:26:12 -0700113
peah3026ee82016-08-26 11:15:47 -0700114 const int max_noise_power_dbfs = static_cast<int>(
115 10 * log10(max_noise_energy_ / frame_length_ + 1e-10f) - kdBFSOffset);
116 RTC_HISTOGRAM_COUNTS("WebRTC.Audio.LevelControl.MaxNoisePower",
117 max_noise_power_dbfs, -90, 0, 50);
118
119 const int average_noise_power_dbfs = static_cast<int>(
120 10 * log10(noise_energy_sum_ / (frame_length_ * kMetricsFrameInterval) +
121 1e-10f) -
122 kdBFSOffset);
123 RTC_HISTOGRAM_COUNTS("WebRTC.Audio.LevelControl.AverageNoisePower",
124 average_noise_power_dbfs, -90, 0, 50);
125
126 const int max_peak_level_dbfs = static_cast<int>(
127 10 * log10(max_peak_level_ * max_peak_level_ + 1e-10f) - kdBFSOffset);
128 RTC_HISTOGRAM_COUNTS("WebRTC.Audio.LevelControl.MaxPeakLevel",
129 max_peak_level_dbfs, -90, 0, 50);
130
131 const int average_peak_level_dbfs = static_cast<int>(
132 10 * log10(peak_level_sum_ * peak_level_sum_ /
133 (kMetricsFrameInterval * kMetricsFrameInterval) +
134 1e-10f) -
135 kdBFSOffset);
136 RTC_HISTOGRAM_COUNTS("WebRTC.Audio.LevelControl.AveragePeakLevel",
137 average_peak_level_dbfs, -90, 0, 50);
peahca4cac72016-06-29 15:26:12 -0700138
139 RTC_DCHECK_LE(1.f, max_gain_);
140 RTC_DCHECK_LE(1.f, gain_sum_ / kMetricsFrameInterval);
peah3026ee82016-08-26 11:15:47 -0700141
142 const int max_gain_db = static_cast<int>(10 * log10(max_gain_ * max_gain_));
143 RTC_HISTOGRAM_COUNTS("WebRTC.Audio.LevelControl.MaxGain", max_gain_db, 0,
144 33, 30);
145
146 const int average_gain_db = static_cast<int>(
147 10 * log10(gain_sum_ * gain_sum_ /
148 (kMetricsFrameInterval * kMetricsFrameInterval)));
peahca4cac72016-06-29 15:26:12 -0700149 RTC_HISTOGRAM_COUNTS("WebRTC.Audio.LevelControl.AverageGain",
peah3026ee82016-08-26 11:15:47 -0700150 average_gain_db, 0, 33, 30);
151
152 const int long_term_peak_level_dbfs = static_cast<int>(
153 10 * log10(long_term_peak_level * long_term_peak_level + 1e-10f) -
154 kdBFSOffset);
155
156 const int frame_peak_level_dbfs = static_cast<int>(
157 10 * log10(frame_peak_level * frame_peak_level + 1e-10f) - kdBFSOffset);
158
peah8a9b0f82016-10-04 00:06:04 -0700159 LOG(LS_INFO) << "Level Controller metrics: {"
160 << "Max noise power: " << max_noise_power_dbfs << " dBFS, "
161 << "Average noise power: " << average_noise_power_dbfs
162 << " dBFS, "
163 << "Max long term peak level: " << max_peak_level_dbfs
164 << " dBFS, "
peah3026ee82016-08-26 11:15:47 -0700165 << "Average long term peak level: " << average_peak_level_dbfs
peah8a9b0f82016-10-04 00:06:04 -0700166 << " dBFS, "
167 << "Max gain: " << max_gain_db << " dB, "
168 << "Average gain: " << average_gain_db << " dB, "
169 << "Long term peak level: " << long_term_peak_level_dbfs
170 << " dBFS, "
171 << "Last frame peak level: " << frame_peak_level_dbfs
172 << " dBFS"
173 << "}";
peah3026ee82016-08-26 11:15:47 -0700174
peahca4cac72016-06-29 15:26:12 -0700175 Reset();
176 }
177}
178
179LevelController::LevelController()
180 : data_dumper_(new ApmDataDumper(instance_count_)),
181 gain_applier_(data_dumper_.get()),
peahc19f3122016-10-07 14:54:10 -0700182 signal_classifier_(data_dumper_.get()),
183 peak_level_estimator_(kTargetLcPeakLeveldBFS) {
peahca4cac72016-06-29 15:26:12 -0700184 Initialize(AudioProcessing::kSampleRate48kHz);
185 ++instance_count_;
186}
187
188LevelController::~LevelController() {}
189
190void LevelController::Initialize(int sample_rate_hz) {
191 RTC_DCHECK(sample_rate_hz == AudioProcessing::kSampleRate8kHz ||
192 sample_rate_hz == AudioProcessing::kSampleRate16kHz ||
193 sample_rate_hz == AudioProcessing::kSampleRate32kHz ||
194 sample_rate_hz == AudioProcessing::kSampleRate48kHz);
195 data_dumper_->InitiateNewSetOfRecordings();
196 gain_selector_.Initialize(sample_rate_hz);
197 gain_applier_.Initialize(sample_rate_hz);
198 signal_classifier_.Initialize(sample_rate_hz);
199 noise_level_estimator_.Initialize(sample_rate_hz);
peahc19f3122016-10-07 14:54:10 -0700200 peak_level_estimator_.Initialize(config_.initial_peak_level_dbfs);
peahca4cac72016-06-29 15:26:12 -0700201 saturating_gain_estimator_.Initialize();
202 metrics_.Initialize(sample_rate_hz);
203
204 last_gain_ = 1.0f;
205 sample_rate_hz_ = rtc::Optional<int>(sample_rate_hz);
206 dc_forgetting_factor_ = 0.01f * sample_rate_hz / 48000.f;
207 std::fill(dc_level_, dc_level_ + arraysize(dc_level_), 0.f);
208}
209
210void LevelController::Process(AudioBuffer* audio) {
kwibergaf476c72016-11-28 15:21:39 -0800211 RTC_DCHECK_LT(0, audio->num_channels());
212 RTC_DCHECK_GE(2, audio->num_channels());
peahca4cac72016-06-29 15:26:12 -0700213 RTC_DCHECK_NE(0.f, dc_forgetting_factor_);
214 RTC_DCHECK(sample_rate_hz_);
215 data_dumper_->DumpWav("lc_input", audio->num_frames(),
216 audio->channels_const_f()[0], *sample_rate_hz_, 1);
217
218 // Remove DC level.
219 for (size_t k = 0; k < audio->num_channels(); ++k) {
220 UpdateAndRemoveDcLevel(
221 dc_forgetting_factor_, &dc_level_[k],
222 rtc::ArrayView<float>(audio->channels_f()[k], audio->num_frames()));
223 }
224
225 SignalClassifier::SignalType signal_type;
226 signal_classifier_.Analyze(*audio, &signal_type);
227 int tmp = static_cast<int>(signal_type);
228 data_dumper_->DumpRaw("lc_signal_type", 1, &tmp);
229
230 // Estimate the noise energy.
231 float noise_energy =
232 noise_level_estimator_.Analyze(signal_type, FrameEnergy(*audio));
233
234 // Estimate the overall signal peak level.
peah3026ee82016-08-26 11:15:47 -0700235 const float frame_peak_level = PeakLevel(*audio);
236 const float long_term_peak_level =
237 peak_level_estimator_.Analyze(signal_type, frame_peak_level);
peahca4cac72016-06-29 15:26:12 -0700238
239 float saturating_gain = saturating_gain_estimator_.GetGain();
240
241 // Compute the new gain to apply.
peahc19f3122016-10-07 14:54:10 -0700242 last_gain_ =
243 gain_selector_.GetNewGain(long_term_peak_level, noise_energy,
244 saturating_gain, gain_jumpstart_, signal_type);
245
246 // Unflag the jumpstart of the gain as it should only happen once.
247 gain_jumpstart_ = false;
peahca4cac72016-06-29 15:26:12 -0700248
249 // Apply the gain to the signal.
250 int num_saturations = gain_applier_.Process(last_gain_, audio);
251
252 // Estimate the gain that saturates the overall signal.
253 saturating_gain_estimator_.Update(last_gain_, num_saturations);
254
255 // Update the metrics.
peah3026ee82016-08-26 11:15:47 -0700256 metrics_.Update(long_term_peak_level, noise_energy, last_gain_,
257 frame_peak_level);
peahca4cac72016-06-29 15:26:12 -0700258
259 data_dumper_->DumpRaw("lc_selected_gain", 1, &last_gain_);
260 data_dumper_->DumpRaw("lc_noise_energy", 1, &noise_energy);
peah3026ee82016-08-26 11:15:47 -0700261 data_dumper_->DumpRaw("lc_peak_level", 1, &long_term_peak_level);
peahca4cac72016-06-29 15:26:12 -0700262 data_dumper_->DumpRaw("lc_saturating_gain", 1, &saturating_gain);
263
264 data_dumper_->DumpWav("lc_output", audio->num_frames(),
265 audio->channels_f()[0], *sample_rate_hz_, 1);
266}
267
peahc19f3122016-10-07 14:54:10 -0700268void LevelController::ApplyConfig(
269 const AudioProcessing::Config::LevelController& config) {
270 RTC_DCHECK(Validate(config));
271 config_ = config;
272 peak_level_estimator_.Initialize(config_.initial_peak_level_dbfs);
273 gain_jumpstart_ = true;
274}
275
peah88ac8532016-09-12 16:47:25 -0700276std::string LevelController::ToString(
277 const AudioProcessing::Config::LevelController& config) {
278 std::stringstream ss;
279 ss << "{"
peahc19f3122016-10-07 14:54:10 -0700280 << "enabled: " << (config.enabled ? "true" : "false") << ", "
281 << "initial_peak_level_dbfs: " << config.initial_peak_level_dbfs << "}";
peah88ac8532016-09-12 16:47:25 -0700282 return ss.str();
283}
284
285bool LevelController::Validate(
286 const AudioProcessing::Config::LevelController& config) {
peahc19f3122016-10-07 14:54:10 -0700287 return (config.initial_peak_level_dbfs <
288 std::numeric_limits<float>::epsilon() &&
289 config.initial_peak_level_dbfs >
290 -(100.f + std::numeric_limits<float>::epsilon()));
peah88ac8532016-09-12 16:47:25 -0700291}
292
peahca4cac72016-06-29 15:26:12 -0700293} // namespace webrtc