blob: 9beaf008239c6130d76a1300fdd8303050683fcd [file] [log] [blame]
alessiob3ec96df2017-05-22 06:57:06 -07001/*
2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
Alex Loikoe36e8bb2018-02-16 11:54:07 +010011#include "modules/audio_processing/gain_controller2.h"
alessiob3ec96df2017-05-22 06:57:06 -070012
Alessio Bazzica38901042021-10-14 12:14:21 +020013#include <memory>
14#include <utility>
15
Alessio Bazzica3e4c77f2018-11-01 21:31:38 +010016#include "common_audio/include/audio_util.h"
Alessio Bazzicadfba28e2022-12-09 10:02:41 +010017#include "modules/audio_processing/agc2/agc2_common.h"
Alessio Bazzicab4d4ae22021-10-15 13:57:56 +020018#include "modules/audio_processing/agc2/cpu_features.h"
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020019#include "modules/audio_processing/audio_buffer.h"
Alex Loikoe36e8bb2018-02-16 11:54:07 +010020#include "modules/audio_processing/include/audio_frame_view.h"
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020021#include "modules/audio_processing/logging/apm_data_dumper.h"
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020022#include "rtc_base/checks.h"
Alessio Bazzica08d2a702020-11-20 16:26:24 +010023#include "rtc_base/logging.h"
Jonas Olsson366a50c2018-09-06 13:41:30 +020024#include "rtc_base/strings/string_builder.h"
Alessio Bazzicab4d4ae22021-10-15 13:57:56 +020025#include "system_wrappers/include/field_trial.h"
alessiob3ec96df2017-05-22 06:57:06 -070026
27namespace webrtc {
Alessio Bazzica82ea4ee2021-10-07 09:21:02 +020028namespace {
Alessio Bazzica38901042021-10-14 12:14:21 +020029
30using Agc2Config = AudioProcessing::Config::GainController2;
Hanna Silena6574902022-11-30 16:59:05 +010031using InputVolumeControllerConfig = InputVolumeController::Config;
Alessio Bazzica38901042021-10-14 12:14:21 +020032
Alessio Bazzica82ea4ee2021-10-07 09:21:02 +020033constexpr int kLogLimiterStatsPeriodMs = 30'000;
34constexpr int kFrameLengthMs = 10;
35constexpr int kLogLimiterStatsPeriodNumFrames =
36 kLogLimiterStatsPeriodMs / kFrameLengthMs;
Alessio Bazzica38901042021-10-14 12:14:21 +020037
Alessio Bazzicab4d4ae22021-10-15 13:57:56 +020038// Detects the available CPU features and applies any kill-switches.
39AvailableCpuFeatures GetAllowedCpuFeatures() {
40 AvailableCpuFeatures features = GetAvailableCpuFeatures();
41 if (field_trial::IsEnabled("WebRTC-Agc2SimdSse2KillSwitch")) {
42 features.sse2 = false;
43 }
44 if (field_trial::IsEnabled("WebRTC-Agc2SimdAvx2KillSwitch")) {
45 features.avx2 = false;
46 }
47 if (field_trial::IsEnabled("WebRTC-Agc2SimdNeonKillSwitch")) {
48 features.neon = false;
49 }
50 return features;
51}
52
Alessio Bazzica17e14fd2022-12-07 17:08:45 +010053// Peak and RMS audio levels in dBFS.
54struct AudioLevels {
55 float peak_dbfs;
56 float rms_dbfs;
57};
Alessio Bazzica38901042021-10-14 12:14:21 +020058
Alessio Bazzica17e14fd2022-12-07 17:08:45 +010059// Speech level info.
60struct SpeechLevel {
61 bool is_confident;
62 float rms_dbfs;
63};
64
65// Computes the audio levels for the first channel in `frame`.
66AudioLevels ComputeAudioLevels(AudioFrameView<float> frame,
67 ApmDataDumper& data_dumper) {
68 float peak = 0.0f;
69 float rms = 0.0f;
70 for (const auto& x : frame.channel(0)) {
71 peak = std::max(std::fabs(x), peak);
72 rms += x * x;
Hanna Silend7cfbe32022-11-02 19:12:20 +010073 }
Alessio Bazzica17e14fd2022-12-07 17:08:45 +010074 AudioLevels levels{
75 FloatS16ToDbfs(peak),
76 FloatS16ToDbfs(std::sqrt(rms / frame.samples_per_channel()))};
77 data_dumper.DumpRaw("agc2_input_rms_dbfs", levels.rms_dbfs);
78 data_dumper.DumpRaw("agc2_input_peak_dbfs", levels.peak_dbfs);
79 return levels;
Hanna Silend7cfbe32022-11-02 19:12:20 +010080}
81
Alessio Bazzica82ea4ee2021-10-07 09:21:02 +020082} // namespace
alessiob3ec96df2017-05-22 06:57:06 -070083
Niels Möller7a669002022-06-27 09:47:02 +020084std::atomic<int> GainController2::instance_count_(0);
alessiob3ec96df2017-05-22 06:57:06 -070085
Hanna Silena6574902022-11-30 16:59:05 +010086GainController2::GainController2(
87 const Agc2Config& config,
88 const InputVolumeControllerConfig& input_volume_controller_config,
89 int sample_rate_hz,
90 int num_channels,
91 bool use_internal_vad)
Alessio Bazzicab4d4ae22021-10-15 13:57:56 +020092 : cpu_features_(GetAllowedCpuFeatures()),
Niels Möller7a669002022-06-27 09:47:02 +020093 data_dumper_(instance_count_.fetch_add(1) + 1),
Alessio Bazzica60f675f2021-10-15 15:36:11 +020094 fixed_gain_applier_(
95 /*hard_clip_samples=*/false,
96 /*initial_gain_factor=*/DbToRatio(config.fixed_digital.gain_db)),
Alessio Bazzica38901042021-10-14 12:14:21 +020097 limiter_(sample_rate_hz, &data_dumper_, /*histogram_name_prefix=*/"Agc2"),
Alessio Bazzicafcf1af32022-09-07 17:14:26 +020098 calls_since_last_limiter_log_(0) {
Alessio Bazzica38901042021-10-14 12:14:21 +020099 RTC_DCHECK(Validate(config));
100 data_dumper_.InitiateNewSetOfRecordings();
Alessio Bazzica17e14fd2022-12-07 17:08:45 +0100101
102 if (config.input_volume_controller.enabled ||
103 config.adaptive_digital.enabled) {
104 // Create dependencies.
105 speech_level_estimator_ = std::make_unique<SpeechLevelEstimator>(
Alessio Bazzicadfba28e2022-12-09 10:02:41 +0100106 &data_dumper_, config.adaptive_digital, kAdjacentSpeechFramesThreshold);
107 if (use_internal_vad)
Alessio Bazzica17e14fd2022-12-07 17:08:45 +0100108 vad_ = std::make_unique<VoiceActivityDetectorWrapper>(
Alessio Bazzicadfba28e2022-12-09 10:02:41 +0100109 kVadResetPeriodMs, cpu_features_, sample_rate_hz);
Alessio Bazzicab4d4ae22021-10-15 13:57:56 +0200110 }
Alessio Bazzica17e14fd2022-12-07 17:08:45 +0100111
112 if (config.input_volume_controller.enabled) {
113 // Create controller.
114 input_volume_controller_ = std::make_unique<InputVolumeController>(
115 num_channels, input_volume_controller_config);
116 // TODO(bugs.webrtc.org/7494): Call `Initialize` in ctor and remove method.
Hanna Silend7cfbe32022-11-02 19:12:20 +0100117 input_volume_controller_->Initialize();
118 }
Alessio Bazzica17e14fd2022-12-07 17:08:45 +0100119
120 if (config.adaptive_digital.enabled) {
121 // Create dependencies.
122 noise_level_estimator_ = CreateNoiseFloorEstimator(&data_dumper_);
123 saturation_protector_ = CreateSaturationProtector(
Alessio Bazzicadfba28e2022-12-09 10:02:41 +0100124 kSaturationProtectorInitialHeadroomDb, kAdjacentSpeechFramesThreshold,
Alessio Bazzica17e14fd2022-12-07 17:08:45 +0100125 &data_dumper_);
126 // Create controller.
Alessio Bazzicaf72bc5f2022-12-09 08:46:06 +0100127 adaptive_digital_controller_ =
128 std::make_unique<AdaptiveDigitalGainController>(
Alessio Bazzicadfba28e2022-12-09 10:02:41 +0100129 &data_dumper_, config.adaptive_digital,
Alessio Bazzica2bfa7672022-12-09 14:16:30 +0100130 kAdjacentSpeechFramesThreshold);
Alessio Bazzica17e14fd2022-12-07 17:08:45 +0100131 }
Per Åhgren2bd85ab2020-01-03 10:36:34 +0100132}
alessiob3ec96df2017-05-22 06:57:06 -0700133
134GainController2::~GainController2() = default;
135
Hanna Silend7cfbe32022-11-02 19:12:20 +0100136// TODO(webrtc:7494): Pass the flag also to the other components.
137void GainController2::SetCaptureOutputUsed(bool capture_output_used) {
138 if (input_volume_controller_) {
139 input_volume_controller_->HandleCaptureOutputUsedChange(
140 capture_output_used);
141 }
142}
143
Alessio Bazzica38901042021-10-14 12:14:21 +0200144void GainController2::SetFixedGainDb(float gain_db) {
145 const float gain_factor = DbToRatio(gain_db);
146 if (fixed_gain_applier_.GetGainFactor() != gain_factor) {
147 // Reset the limiter to quickly react on abrupt level changes caused by
148 // large changes of the fixed gain.
149 limiter_.Reset();
150 }
151 fixed_gain_applier_.SetGainFactor(gain_factor);
Alessio Bazzica270f7b52017-10-13 11:05:17 +0200152}
153
Hanna Silend7cfbe32022-11-02 19:12:20 +0100154void GainController2::Analyze(int applied_input_volume,
155 const AudioBuffer& audio_buffer) {
Hanna Silen597a2ba2022-12-14 12:48:37 +0100156 recommended_input_volume_ = absl::nullopt;
157
Hanna Silend7cfbe32022-11-02 19:12:20 +0100158 RTC_DCHECK_GE(applied_input_volume, 0);
159 RTC_DCHECK_LE(applied_input_volume, 255);
160
161 if (input_volume_controller_) {
Hanna Silen597a2ba2022-12-14 12:48:37 +0100162 input_volume_controller_->AnalyzeInputAudio(applied_input_volume,
163 audio_buffer);
Hanna Silend7cfbe32022-11-02 19:12:20 +0100164 }
165}
166
Hanna Silen0c1ad292022-06-16 16:35:45 +0200167void GainController2::Process(absl::optional<float> speech_probability,
Alessio Bazzicafcf1af32022-09-07 17:14:26 +0200168 bool input_volume_changed,
Hanna Silen0c1ad292022-06-16 16:35:45 +0200169 AudioBuffer* audio) {
Hanna Silen597a2ba2022-12-14 12:48:37 +0100170 recommended_input_volume_ = absl::nullopt;
171
Alessio Bazzicafcf1af32022-09-07 17:14:26 +0200172 data_dumper_.DumpRaw("agc2_applied_input_volume_changed",
173 input_volume_changed);
Alessio Bazzica17e14fd2022-12-07 17:08:45 +0100174 if (input_volume_changed) {
175 // Handle input volume changes.
176 if (speech_level_estimator_)
177 speech_level_estimator_->Reset();
178 if (saturation_protector_)
179 saturation_protector_->Reset();
Alessio Bazzicafcf1af32022-09-07 17:14:26 +0200180 }
181
Per Åhgrend47941e2019-08-22 11:51:13 +0200182 AudioFrameView<float> float_frame(audio->channels(), audio->num_channels(),
Alex Loikoe36e8bb2018-02-16 11:54:07 +0100183 audio->num_frames());
Alessio Bazzica17e14fd2022-12-07 17:08:45 +0100184 // Compute speech probability.
Alessio Bazzicab4d4ae22021-10-15 13:57:56 +0200185 if (vad_) {
186 speech_probability = vad_->Analyze(float_frame);
Hanna Silen0c1ad292022-06-16 16:35:45 +0200187 } else if (speech_probability.has_value()) {
Alessio Bazzica17e14fd2022-12-07 17:08:45 +0100188 RTC_DCHECK_GE(*speech_probability, 0.0f);
189 RTC_DCHECK_LE(*speech_probability, 1.0f);
Hanna Silen0c1ad292022-06-16 16:35:45 +0200190 }
Alessio Bazzica17e14fd2022-12-07 17:08:45 +0100191 // The speech probability may not be defined at this step (e.g., when the
192 // fixed digital controller alone is enabled).
193 if (speech_probability.has_value())
194 data_dumper_.DumpRaw("agc2_speech_probability", *speech_probability);
195
196 // Compute audio, noise and speech levels.
197 AudioLevels audio_levels = ComputeAudioLevels(float_frame, data_dumper_);
198 absl::optional<float> noise_rms_dbfs;
199 if (noise_level_estimator_) {
200 // TODO(bugs.webrtc.org/7494): Pass `audio_levels` to remove duplicated
201 // computation in `noise_level_estimator_`.
202 noise_rms_dbfs = noise_level_estimator_->Analyze(float_frame);
203 }
204 absl::optional<SpeechLevel> speech_level;
205 if (speech_level_estimator_) {
206 RTC_DCHECK(speech_probability.has_value());
207 speech_level_estimator_->Update(
208 audio_levels.rms_dbfs, audio_levels.peak_dbfs, *speech_probability);
209 speech_level =
210 SpeechLevel{.is_confident = speech_level_estimator_->is_confident(),
211 .rms_dbfs = speech_level_estimator_->level_dbfs()};
Alessio Bazzicab4d4ae22021-10-15 13:57:56 +0200212 }
Hanna Silend7cfbe32022-11-02 19:12:20 +0100213
Alessio Bazzica17e14fd2022-12-07 17:08:45 +0100214 // Update the recommended input volume.
Hanna Silend7cfbe32022-11-02 19:12:20 +0100215 if (input_volume_controller_) {
Alessio Bazzica17e14fd2022-12-07 17:08:45 +0100216 RTC_DCHECK(speech_level.has_value());
Hanna Silen27fed452022-11-22 15:00:58 +0100217 RTC_DCHECK(speech_probability.has_value());
218 if (speech_probability.has_value()) {
Hanna Silen597a2ba2022-12-14 12:48:37 +0100219 recommended_input_volume_ =
220 input_volume_controller_->RecommendInputVolume(
221 *speech_probability,
222 speech_level->is_confident
223 ? absl::optional<float>(speech_level->rms_dbfs)
224 : absl::nullopt);
Hanna Silen27fed452022-11-22 15:00:58 +0100225 }
Hanna Silend7cfbe32022-11-02 19:12:20 +0100226 }
227
Alessio Bazzica82ea4ee2021-10-07 09:21:02 +0200228 if (adaptive_digital_controller_) {
Alessio Bazzica17e14fd2022-12-07 17:08:45 +0100229 RTC_DCHECK(saturation_protector_);
Alessio Bazzicab4d4ae22021-10-15 13:57:56 +0200230 RTC_DCHECK(speech_probability.has_value());
Alessio Bazzica17e14fd2022-12-07 17:08:45 +0100231 RTC_DCHECK(speech_level.has_value());
232 saturation_protector_->Analyze(*speech_probability, audio_levels.peak_dbfs,
233 speech_level->rms_dbfs);
234 float headroom_db = saturation_protector_->HeadroomDb();
235 data_dumper_.DumpRaw("agc2_headroom_db", headroom_db);
236 float limiter_envelope_dbfs = FloatS16ToDbfs(limiter_.LastAudioLevel());
237 data_dumper_.DumpRaw("agc2_limiter_envelope_dbfs", limiter_envelope_dbfs);
238 RTC_DCHECK(noise_rms_dbfs.has_value());
Alessio Bazzicab4d4ae22021-10-15 13:57:56 +0200239 adaptive_digital_controller_->Process(
Alessio Bazzica17e14fd2022-12-07 17:08:45 +0100240 /*info=*/{.speech_probability = *speech_probability,
241 .speech_level_dbfs = speech_level->rms_dbfs,
242 .speech_level_reliable = speech_level->is_confident,
243 .noise_rms_dbfs = *noise_rms_dbfs,
244 .headroom_db = headroom_db,
245 .limiter_envelope_dbfs = limiter_envelope_dbfs},
246 float_frame);
Alex Loikoe5831742018-08-24 11:28:36 +0200247 }
Alessio Bazzica4366c542022-12-05 16:31:16 +0100248
Alessio Bazzica17e14fd2022-12-07 17:08:45 +0100249 // TODO(bugs.webrtc.org/7494): Pass `audio_levels` to remove duplicated
250 // computation in `limiter_`.
Alessio Bazzica4366c542022-12-05 16:31:16 +0100251 fixed_gain_applier_.ApplyGain(float_frame);
252
Alessio Bazzica3e4c77f2018-11-01 21:31:38 +0100253 limiter_.Process(float_frame);
Alessio Bazzica08d2a702020-11-20 16:26:24 +0100254
Alessio Bazzica82ea4ee2021-10-07 09:21:02 +0200255 // Periodically log limiter stats.
256 if (++calls_since_last_limiter_log_ == kLogLimiterStatsPeriodNumFrames) {
Alessio Bazzica08d2a702020-11-20 16:26:24 +0100257 calls_since_last_limiter_log_ = 0;
258 InterpolatedGainCurve::Stats stats = limiter_.GetGainCurveStats();
Alessio Bazzica17e14fd2022-12-07 17:08:45 +0100259 RTC_LOG(LS_INFO) << "[AGC2] limiter stats"
Alessio Bazzica08d2a702020-11-20 16:26:24 +0100260 << " | identity: " << stats.look_ups_identity_region
261 << " | knee: " << stats.look_ups_knee_region
262 << " | limiter: " << stats.look_ups_limiter_region
263 << " | saturation: " << stats.look_ups_saturation_region;
264 }
alessiob3ec96df2017-05-22 06:57:06 -0700265}
266
267bool GainController2::Validate(
268 const AudioProcessing::Config::GainController2& config) {
Alessio Bazzica0c83e152020-10-14 12:49:54 +0200269 const auto& fixed = config.fixed_digital;
270 const auto& adaptive = config.adaptive_digital;
Alessio Bazzica17e14fd2022-12-07 17:08:45 +0100271 return fixed.gain_db >= 0.0f && fixed.gain_db < 50.0f &&
Alessio Bazzicaa850e6c2021-10-04 13:35:55 +0200272 adaptive.headroom_db >= 0.0f && adaptive.max_gain_db > 0.0f &&
273 adaptive.initial_gain_db >= 0.0f &&
Alessio Bazzica1ac4f2a2021-09-24 14:59:30 +0200274 adaptive.max_gain_change_db_per_second > 0.0f &&
275 adaptive.max_output_noise_level_dbfs <= 0.0f;
alessiob3ec96df2017-05-22 06:57:06 -0700276}
277
278} // namespace webrtc