blob: d25ce7a09446a5ddd2607428c11001b405d556bd [file] [log] [blame]
alessiob3ec96df2017-05-22 06:57:06 -07001/*
2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
Alex Loikoe36e8bb2018-02-16 11:54:07 +010011#include "modules/audio_processing/gain_controller2.h"
alessiob3ec96df2017-05-22 06:57:06 -070012
Alessio Bazzica38901042021-10-14 12:14:21 +020013#include <memory>
14#include <utility>
15
Alessio Bazzica3e4c77f2018-11-01 21:31:38 +010016#include "common_audio/include/audio_util.h"
Alessio Bazzicab4d4ae22021-10-15 13:57:56 +020017#include "modules/audio_processing/agc2/cpu_features.h"
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020018#include "modules/audio_processing/audio_buffer.h"
Alex Loikoe36e8bb2018-02-16 11:54:07 +010019#include "modules/audio_processing/include/audio_frame_view.h"
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020020#include "modules/audio_processing/logging/apm_data_dumper.h"
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020021#include "rtc_base/checks.h"
Alessio Bazzica08d2a702020-11-20 16:26:24 +010022#include "rtc_base/logging.h"
Jonas Olsson366a50c2018-09-06 13:41:30 +020023#include "rtc_base/strings/string_builder.h"
Alessio Bazzicab4d4ae22021-10-15 13:57:56 +020024#include "system_wrappers/include/field_trial.h"
alessiob3ec96df2017-05-22 06:57:06 -070025
26namespace webrtc {
Alessio Bazzica82ea4ee2021-10-07 09:21:02 +020027namespace {
Alessio Bazzica38901042021-10-14 12:14:21 +020028
29using Agc2Config = AudioProcessing::Config::GainController2;
Hanna Silena6574902022-11-30 16:59:05 +010030using InputVolumeControllerConfig = InputVolumeController::Config;
Alessio Bazzica38901042021-10-14 12:14:21 +020031
Alessio Bazzica82ea4ee2021-10-07 09:21:02 +020032constexpr int kLogLimiterStatsPeriodMs = 30'000;
33constexpr int kFrameLengthMs = 10;
34constexpr int kLogLimiterStatsPeriodNumFrames =
35 kLogLimiterStatsPeriodMs / kFrameLengthMs;
Alessio Bazzica38901042021-10-14 12:14:21 +020036
Alessio Bazzicab4d4ae22021-10-15 13:57:56 +020037// Detects the available CPU features and applies any kill-switches.
38AvailableCpuFeatures GetAllowedCpuFeatures() {
39 AvailableCpuFeatures features = GetAvailableCpuFeatures();
40 if (field_trial::IsEnabled("WebRTC-Agc2SimdSse2KillSwitch")) {
41 features.sse2 = false;
42 }
43 if (field_trial::IsEnabled("WebRTC-Agc2SimdAvx2KillSwitch")) {
44 features.avx2 = false;
45 }
46 if (field_trial::IsEnabled("WebRTC-Agc2SimdNeonKillSwitch")) {
47 features.neon = false;
48 }
49 return features;
50}
51
Alessio Bazzica17e14fd2022-12-07 17:08:45 +010052// Peak and RMS audio levels in dBFS.
53struct AudioLevels {
54 float peak_dbfs;
55 float rms_dbfs;
56};
Alessio Bazzica38901042021-10-14 12:14:21 +020057
Alessio Bazzica17e14fd2022-12-07 17:08:45 +010058// Speech level info.
59struct SpeechLevel {
60 bool is_confident;
61 float rms_dbfs;
62};
63
64// Computes the audio levels for the first channel in `frame`.
65AudioLevels ComputeAudioLevels(AudioFrameView<float> frame,
66 ApmDataDumper& data_dumper) {
67 float peak = 0.0f;
68 float rms = 0.0f;
69 for (const auto& x : frame.channel(0)) {
70 peak = std::max(std::fabs(x), peak);
71 rms += x * x;
Hanna Silend7cfbe32022-11-02 19:12:20 +010072 }
Alessio Bazzica17e14fd2022-12-07 17:08:45 +010073 AudioLevels levels{
74 FloatS16ToDbfs(peak),
75 FloatS16ToDbfs(std::sqrt(rms / frame.samples_per_channel()))};
76 data_dumper.DumpRaw("agc2_input_rms_dbfs", levels.rms_dbfs);
77 data_dumper.DumpRaw("agc2_input_peak_dbfs", levels.peak_dbfs);
78 return levels;
Hanna Silend7cfbe32022-11-02 19:12:20 +010079}
80
Alessio Bazzica82ea4ee2021-10-07 09:21:02 +020081} // namespace
alessiob3ec96df2017-05-22 06:57:06 -070082
Niels Möller7a669002022-06-27 09:47:02 +020083std::atomic<int> GainController2::instance_count_(0);
alessiob3ec96df2017-05-22 06:57:06 -070084
Hanna Silena6574902022-11-30 16:59:05 +010085GainController2::GainController2(
86 const Agc2Config& config,
87 const InputVolumeControllerConfig& input_volume_controller_config,
88 int sample_rate_hz,
89 int num_channels,
90 bool use_internal_vad)
Alessio Bazzicab4d4ae22021-10-15 13:57:56 +020091 : cpu_features_(GetAllowedCpuFeatures()),
Niels Möller7a669002022-06-27 09:47:02 +020092 data_dumper_(instance_count_.fetch_add(1) + 1),
Alessio Bazzica60f675f2021-10-15 15:36:11 +020093 fixed_gain_applier_(
94 /*hard_clip_samples=*/false,
95 /*initial_gain_factor=*/DbToRatio(config.fixed_digital.gain_db)),
Alessio Bazzica38901042021-10-14 12:14:21 +020096 limiter_(sample_rate_hz, &data_dumper_, /*histogram_name_prefix=*/"Agc2"),
Alessio Bazzicafcf1af32022-09-07 17:14:26 +020097 calls_since_last_limiter_log_(0) {
Alessio Bazzica38901042021-10-14 12:14:21 +020098 RTC_DCHECK(Validate(config));
99 data_dumper_.InitiateNewSetOfRecordings();
Alessio Bazzica17e14fd2022-12-07 17:08:45 +0100100
101 if (config.input_volume_controller.enabled ||
102 config.adaptive_digital.enabled) {
103 // Create dependencies.
104 speech_level_estimator_ = std::make_unique<SpeechLevelEstimator>(
105 &data_dumper_, config.adaptive_digital);
106 if (use_internal_vad) {
107 // TODO(bugs.webrtc.org/7494): Move `vad_reset_period_ms` from adaptive
108 // digital to gain controller 2 config.
109 vad_ = std::make_unique<VoiceActivityDetectorWrapper>(
110 config.adaptive_digital.vad_reset_period_ms, cpu_features_,
111 sample_rate_hz);
112 }
Alessio Bazzicab4d4ae22021-10-15 13:57:56 +0200113 }
Alessio Bazzica17e14fd2022-12-07 17:08:45 +0100114
115 if (config.input_volume_controller.enabled) {
116 // Create controller.
117 input_volume_controller_ = std::make_unique<InputVolumeController>(
118 num_channels, input_volume_controller_config);
119 // TODO(bugs.webrtc.org/7494): Call `Initialize` in ctor and remove method.
Hanna Silend7cfbe32022-11-02 19:12:20 +0100120 input_volume_controller_->Initialize();
121 }
Alessio Bazzica17e14fd2022-12-07 17:08:45 +0100122
123 if (config.adaptive_digital.enabled) {
124 // Create dependencies.
125 noise_level_estimator_ = CreateNoiseFloorEstimator(&data_dumper_);
126 saturation_protector_ = CreateSaturationProtector(
127 kSaturationProtectorInitialHeadroomDb,
128 config.adaptive_digital.adjacent_speech_frames_threshold,
129 &data_dumper_);
130 // Create controller.
Alessio Bazzicaf72bc5f2022-12-09 08:46:06 +0100131 adaptive_digital_controller_ =
132 std::make_unique<AdaptiveDigitalGainController>(
133 &data_dumper_, config.adaptive_digital, sample_rate_hz,
134 num_channels);
Alessio Bazzica17e14fd2022-12-07 17:08:45 +0100135 }
Per Åhgren2bd85ab2020-01-03 10:36:34 +0100136}
alessiob3ec96df2017-05-22 06:57:06 -0700137
138GainController2::~GainController2() = default;
139
Hanna Silend7cfbe32022-11-02 19:12:20 +0100140// TODO(webrtc:7494): Pass the flag also to the other components.
141void GainController2::SetCaptureOutputUsed(bool capture_output_used) {
142 if (input_volume_controller_) {
143 input_volume_controller_->HandleCaptureOutputUsedChange(
144 capture_output_used);
145 }
146}
147
Alessio Bazzica38901042021-10-14 12:14:21 +0200148void GainController2::SetFixedGainDb(float gain_db) {
149 const float gain_factor = DbToRatio(gain_db);
150 if (fixed_gain_applier_.GetGainFactor() != gain_factor) {
151 // Reset the limiter to quickly react on abrupt level changes caused by
152 // large changes of the fixed gain.
153 limiter_.Reset();
154 }
155 fixed_gain_applier_.SetGainFactor(gain_factor);
Alessio Bazzica270f7b52017-10-13 11:05:17 +0200156}
157
Hanna Silend7cfbe32022-11-02 19:12:20 +0100158void GainController2::Analyze(int applied_input_volume,
159 const AudioBuffer& audio_buffer) {
160 RTC_DCHECK_GE(applied_input_volume, 0);
161 RTC_DCHECK_LE(applied_input_volume, 255);
162
163 if (input_volume_controller_) {
Alessio Bazzica17e14fd2022-12-07 17:08:45 +0100164 // TODO(bugs.webrtc.org/7494): Pass applied volume to `AnalyzePreProcess()`.
Hanna Silend7cfbe32022-11-02 19:12:20 +0100165 input_volume_controller_->set_stream_analog_level(applied_input_volume);
166 input_volume_controller_->AnalyzePreProcess(audio_buffer);
167 }
168}
169
170absl::optional<int> GainController2::GetRecommendedInputVolume() const {
171 return input_volume_controller_
172 ? absl::optional<int>(
173 input_volume_controller_->recommended_analog_level())
174 : absl::nullopt;
175}
176
Hanna Silen0c1ad292022-06-16 16:35:45 +0200177void GainController2::Process(absl::optional<float> speech_probability,
Alessio Bazzicafcf1af32022-09-07 17:14:26 +0200178 bool input_volume_changed,
Hanna Silen0c1ad292022-06-16 16:35:45 +0200179 AudioBuffer* audio) {
Alessio Bazzicafcf1af32022-09-07 17:14:26 +0200180 data_dumper_.DumpRaw("agc2_applied_input_volume_changed",
181 input_volume_changed);
Alessio Bazzica17e14fd2022-12-07 17:08:45 +0100182 if (input_volume_changed) {
183 // Handle input volume changes.
184 if (speech_level_estimator_)
185 speech_level_estimator_->Reset();
186 if (saturation_protector_)
187 saturation_protector_->Reset();
Alessio Bazzicafcf1af32022-09-07 17:14:26 +0200188 }
189
Per Åhgrend47941e2019-08-22 11:51:13 +0200190 AudioFrameView<float> float_frame(audio->channels(), audio->num_channels(),
Alex Loikoe36e8bb2018-02-16 11:54:07 +0100191 audio->num_frames());
Alessio Bazzica17e14fd2022-12-07 17:08:45 +0100192 // Compute speech probability.
Alessio Bazzicab4d4ae22021-10-15 13:57:56 +0200193 if (vad_) {
194 speech_probability = vad_->Analyze(float_frame);
Hanna Silen0c1ad292022-06-16 16:35:45 +0200195 } else if (speech_probability.has_value()) {
Alessio Bazzica17e14fd2022-12-07 17:08:45 +0100196 RTC_DCHECK_GE(*speech_probability, 0.0f);
197 RTC_DCHECK_LE(*speech_probability, 1.0f);
Hanna Silen0c1ad292022-06-16 16:35:45 +0200198 }
Alessio Bazzica17e14fd2022-12-07 17:08:45 +0100199 // The speech probability may not be defined at this step (e.g., when the
200 // fixed digital controller alone is enabled).
201 if (speech_probability.has_value())
202 data_dumper_.DumpRaw("agc2_speech_probability", *speech_probability);
203
204 // Compute audio, noise and speech levels.
205 AudioLevels audio_levels = ComputeAudioLevels(float_frame, data_dumper_);
206 absl::optional<float> noise_rms_dbfs;
207 if (noise_level_estimator_) {
208 // TODO(bugs.webrtc.org/7494): Pass `audio_levels` to remove duplicated
209 // computation in `noise_level_estimator_`.
210 noise_rms_dbfs = noise_level_estimator_->Analyze(float_frame);
211 }
212 absl::optional<SpeechLevel> speech_level;
213 if (speech_level_estimator_) {
214 RTC_DCHECK(speech_probability.has_value());
215 speech_level_estimator_->Update(
216 audio_levels.rms_dbfs, audio_levels.peak_dbfs, *speech_probability);
217 speech_level =
218 SpeechLevel{.is_confident = speech_level_estimator_->is_confident(),
219 .rms_dbfs = speech_level_estimator_->level_dbfs()};
Alessio Bazzicab4d4ae22021-10-15 13:57:56 +0200220 }
Hanna Silend7cfbe32022-11-02 19:12:20 +0100221
Alessio Bazzica17e14fd2022-12-07 17:08:45 +0100222 // Update the recommended input volume.
Hanna Silend7cfbe32022-11-02 19:12:20 +0100223 if (input_volume_controller_) {
Alessio Bazzica17e14fd2022-12-07 17:08:45 +0100224 RTC_DCHECK(speech_level.has_value());
Hanna Silen27fed452022-11-22 15:00:58 +0100225 RTC_DCHECK(speech_probability.has_value());
226 if (speech_probability.has_value()) {
Alessio Bazzica17e14fd2022-12-07 17:08:45 +0100227 // TODO(bugs.webrtc.org/7494): Rename `Process()` to `RecommendVolume()`
228 // and let it return the recommended input volume.
229 input_volume_controller_->Process(
230 *speech_probability,
231 speech_level->is_confident
232 ? absl::optional<float>(speech_level->rms_dbfs)
233 : absl::nullopt);
Hanna Silen27fed452022-11-22 15:00:58 +0100234 }
Hanna Silend7cfbe32022-11-02 19:12:20 +0100235 }
236
Alessio Bazzica82ea4ee2021-10-07 09:21:02 +0200237 if (adaptive_digital_controller_) {
Alessio Bazzica17e14fd2022-12-07 17:08:45 +0100238 RTC_DCHECK(saturation_protector_);
Alessio Bazzicab4d4ae22021-10-15 13:57:56 +0200239 RTC_DCHECK(speech_probability.has_value());
Alessio Bazzica17e14fd2022-12-07 17:08:45 +0100240 RTC_DCHECK(speech_level.has_value());
241 saturation_protector_->Analyze(*speech_probability, audio_levels.peak_dbfs,
242 speech_level->rms_dbfs);
243 float headroom_db = saturation_protector_->HeadroomDb();
244 data_dumper_.DumpRaw("agc2_headroom_db", headroom_db);
245 float limiter_envelope_dbfs = FloatS16ToDbfs(limiter_.LastAudioLevel());
246 data_dumper_.DumpRaw("agc2_limiter_envelope_dbfs", limiter_envelope_dbfs);
247 RTC_DCHECK(noise_rms_dbfs.has_value());
Alessio Bazzicab4d4ae22021-10-15 13:57:56 +0200248 adaptive_digital_controller_->Process(
Alessio Bazzica17e14fd2022-12-07 17:08:45 +0100249 /*info=*/{.speech_probability = *speech_probability,
250 .speech_level_dbfs = speech_level->rms_dbfs,
251 .speech_level_reliable = speech_level->is_confident,
252 .noise_rms_dbfs = *noise_rms_dbfs,
253 .headroom_db = headroom_db,
254 .limiter_envelope_dbfs = limiter_envelope_dbfs},
255 float_frame);
Alex Loikoe5831742018-08-24 11:28:36 +0200256 }
Alessio Bazzica4366c542022-12-05 16:31:16 +0100257
Alessio Bazzica17e14fd2022-12-07 17:08:45 +0100258 // TODO(bugs.webrtc.org/7494): Pass `audio_levels` to remove duplicated
259 // computation in `limiter_`.
Alessio Bazzica4366c542022-12-05 16:31:16 +0100260 fixed_gain_applier_.ApplyGain(float_frame);
261
Alessio Bazzica3e4c77f2018-11-01 21:31:38 +0100262 limiter_.Process(float_frame);
Alessio Bazzica08d2a702020-11-20 16:26:24 +0100263
Alessio Bazzica82ea4ee2021-10-07 09:21:02 +0200264 // Periodically log limiter stats.
265 if (++calls_since_last_limiter_log_ == kLogLimiterStatsPeriodNumFrames) {
Alessio Bazzica08d2a702020-11-20 16:26:24 +0100266 calls_since_last_limiter_log_ = 0;
267 InterpolatedGainCurve::Stats stats = limiter_.GetGainCurveStats();
Alessio Bazzica17e14fd2022-12-07 17:08:45 +0100268 RTC_LOG(LS_INFO) << "[AGC2] limiter stats"
Alessio Bazzica08d2a702020-11-20 16:26:24 +0100269 << " | identity: " << stats.look_ups_identity_region
270 << " | knee: " << stats.look_ups_knee_region
271 << " | limiter: " << stats.look_ups_limiter_region
272 << " | saturation: " << stats.look_ups_saturation_region;
273 }
alessiob3ec96df2017-05-22 06:57:06 -0700274}
275
276bool GainController2::Validate(
277 const AudioProcessing::Config::GainController2& config) {
Alessio Bazzica0c83e152020-10-14 12:49:54 +0200278 const auto& fixed = config.fixed_digital;
279 const auto& adaptive = config.adaptive_digital;
Alessio Bazzica17e14fd2022-12-07 17:08:45 +0100280 return fixed.gain_db >= 0.0f && fixed.gain_db < 50.0f &&
Alessio Bazzicaa850e6c2021-10-04 13:35:55 +0200281 adaptive.headroom_db >= 0.0f && adaptive.max_gain_db > 0.0f &&
282 adaptive.initial_gain_db >= 0.0f &&
Alessio Bazzica1ac4f2a2021-09-24 14:59:30 +0200283 adaptive.max_gain_change_db_per_second > 0.0f &&
284 adaptive.max_output_noise_level_dbfs <= 0.0f;
alessiob3ec96df2017-05-22 06:57:06 -0700285}
286
287} // namespace webrtc