Blame - modules/audio_processing/gain_controller2.cc - webrtc.googlesource.com/src

blob: 9beaf008239c6130d76a1300fdd8303050683fcd [file] [log] [blame]

alessiob	3ec96df	2017-05-22 06:57:06 -0700	[diff] [blame]	1	/*
				2	* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
				3	*
				4	* Use of this source code is governed by a BSD-style license
				5	* that can be found in the LICENSE file in the root of the source
				6	* tree. An additional intellectual property rights grant can be found
				7	* in the file PATENTS. All contributing project authors may
				8	* be found in the AUTHORS file in the root of the source tree.
				9	*/
				10
Alex Loiko	e36e8bb	2018-02-16 11:54:07 +0100	[diff] [blame]	11	#include "modules/audio_processing/gain_controller2.h"
alessiob	3ec96df	2017-05-22 06:57:06 -0700	[diff] [blame]	12
Alessio Bazzica	3890104	2021-10-14 12:14:21 +0200	[diff] [blame]	13	#include <memory>
				14	#include <utility>
				15
Alessio Bazzica	3e4c77f	2018-11-01 21:31:38 +0100	[diff] [blame]	16	#include "common_audio/include/audio_util.h"
Alessio Bazzica	dfba28e	2022-12-09 10:02:41 +0100	[diff] [blame]	17	#include "modules/audio_processing/agc2/agc2_common.h"
Alessio Bazzica	b4d4ae2	2021-10-15 13:57:56 +0200	[diff] [blame]	18	#include "modules/audio_processing/agc2/cpu_features.h"
Mirko Bonadei	92ea95e	2017-09-15 06:47:31 +0200	[diff] [blame]	19	#include "modules/audio_processing/audio_buffer.h"
Alex Loiko	e36e8bb	2018-02-16 11:54:07 +0100	[diff] [blame]	20	#include "modules/audio_processing/include/audio_frame_view.h"
Mirko Bonadei	92ea95e	2017-09-15 06:47:31 +0200	[diff] [blame]	21	#include "modules/audio_processing/logging/apm_data_dumper.h"
Mirko Bonadei	92ea95e	2017-09-15 06:47:31 +0200	[diff] [blame]	22	#include "rtc_base/checks.h"
Alessio Bazzica	08d2a70	2020-11-20 16:26:24 +0100	[diff] [blame]	23	#include "rtc_base/logging.h"
Jonas Olsson	366a50c	2018-09-06 13:41:30 +0200	[diff] [blame]	24	#include "rtc_base/strings/string_builder.h"
Alessio Bazzica	b4d4ae2	2021-10-15 13:57:56 +0200	[diff] [blame]	25	#include "system_wrappers/include/field_trial.h"
alessiob	3ec96df	2017-05-22 06:57:06 -0700	[diff] [blame]	26
				27	namespace webrtc {
Alessio Bazzica	82ea4ee	2021-10-07 09:21:02 +0200	[diff] [blame]	28	namespace {
Alessio Bazzica	3890104	2021-10-14 12:14:21 +0200	[diff] [blame]	29
				30	using Agc2Config = AudioProcessing::Config::GainController2;
Hanna Silen	a657490	2022-11-30 16:59:05 +0100	[diff] [blame]	31	using InputVolumeControllerConfig = InputVolumeController::Config;
Alessio Bazzica	3890104	2021-10-14 12:14:21 +0200	[diff] [blame]	32
Alessio Bazzica	82ea4ee	2021-10-07 09:21:02 +0200	[diff] [blame]	33	constexpr int kLogLimiterStatsPeriodMs = 30'000;
				34	constexpr int kFrameLengthMs = 10;
				35	constexpr int kLogLimiterStatsPeriodNumFrames =
				36	kLogLimiterStatsPeriodMs / kFrameLengthMs;
Alessio Bazzica	3890104	2021-10-14 12:14:21 +0200	[diff] [blame]	37
Alessio Bazzica	b4d4ae2	2021-10-15 13:57:56 +0200	[diff] [blame]	38	// Detects the available CPU features and applies any kill-switches.
				39	AvailableCpuFeatures GetAllowedCpuFeatures() {
				40	AvailableCpuFeatures features = GetAvailableCpuFeatures();
				41	if (field_trial::IsEnabled("WebRTC-Agc2SimdSse2KillSwitch")) {
				42	features.sse2 = false;
				43	}
				44	if (field_trial::IsEnabled("WebRTC-Agc2SimdAvx2KillSwitch")) {
				45	features.avx2 = false;
				46	}
				47	if (field_trial::IsEnabled("WebRTC-Agc2SimdNeonKillSwitch")) {
				48	features.neon = false;
				49	}
				50	return features;
				51	}
				52
Alessio Bazzica	17e14fd	2022-12-07 17:08:45 +0100	[diff] [blame]	53	// Peak and RMS audio levels in dBFS.
				54	struct AudioLevels {
				55	float peak_dbfs;
				56	float rms_dbfs;
				57	};
Alessio Bazzica	3890104	2021-10-14 12:14:21 +0200	[diff] [blame]	58
Alessio Bazzica	17e14fd	2022-12-07 17:08:45 +0100	[diff] [blame]	59	// Speech level info.
				60	struct SpeechLevel {
				61	bool is_confident;
				62	float rms_dbfs;
				63	};
				64
				65	// Computes the audio levels for the first channel in `frame`.
				66	AudioLevels ComputeAudioLevels(AudioFrameView<float> frame,
				67	ApmDataDumper& data_dumper) {
				68	float peak = 0.0f;
				69	float rms = 0.0f;
				70	for (const auto& x : frame.channel(0)) {
				71	peak = std::max(std::fabs(x), peak);
				72	rms += x * x;
Hanna Silen	d7cfbe3	2022-11-02 19:12:20 +0100	[diff] [blame]	73	}
Alessio Bazzica	17e14fd	2022-12-07 17:08:45 +0100	[diff] [blame]	74	AudioLevels levels{
				75	FloatS16ToDbfs(peak),
				76	FloatS16ToDbfs(std::sqrt(rms / frame.samples_per_channel()))};
				77	data_dumper.DumpRaw("agc2_input_rms_dbfs", levels.rms_dbfs);
				78	data_dumper.DumpRaw("agc2_input_peak_dbfs", levels.peak_dbfs);
				79	return levels;
Hanna Silen	d7cfbe3	2022-11-02 19:12:20 +0100	[diff] [blame]	80	}
				81
Alessio Bazzica	82ea4ee	2021-10-07 09:21:02 +0200	[diff] [blame]	82	} // namespace
alessiob	3ec96df	2017-05-22 06:57:06 -0700	[diff] [blame]	83
Niels Möller	7a66900	2022-06-27 09:47:02 +0200	[diff] [blame]	84	std::atomic<int> GainController2::instance_count_(0);
alessiob	3ec96df	2017-05-22 06:57:06 -0700	[diff] [blame]	85
Hanna Silen	a657490	2022-11-30 16:59:05 +0100	[diff] [blame]	86	GainController2::GainController2(
				87	const Agc2Config& config,
				88	const InputVolumeControllerConfig& input_volume_controller_config,
				89	int sample_rate_hz,
				90	int num_channels,
				91	bool use_internal_vad)
Alessio Bazzica	b4d4ae2	2021-10-15 13:57:56 +0200	[diff] [blame]	92	: cpu_features_(GetAllowedCpuFeatures()),
Niels Möller	7a66900	2022-06-27 09:47:02 +0200	[diff] [blame]	93	data_dumper_(instance_count_.fetch_add(1) + 1),
Alessio Bazzica	60f675f	2021-10-15 15:36:11 +0200	[diff] [blame]	94	fixed_gain_applier_(
				95	/hard_clip_samples=/false,
				96	/initial_gain_factor=/DbToRatio(config.fixed_digital.gain_db)),
Alessio Bazzica	3890104	2021-10-14 12:14:21 +0200	[diff] [blame]	97	limiter_(sample_rate_hz, &data_dumper_, /histogram_name_prefix=/"Agc2"),
Alessio Bazzica	fcf1af3	2022-09-07 17:14:26 +0200	[diff] [blame]	98	calls_since_last_limiter_log_(0) {
Alessio Bazzica	3890104	2021-10-14 12:14:21 +0200	[diff] [blame]	99	RTC_DCHECK(Validate(config));
				100	data_dumper_.InitiateNewSetOfRecordings();
Alessio Bazzica	17e14fd	2022-12-07 17:08:45 +0100	[diff] [blame]	101
				102	if (config.input_volume_controller.enabled \|\|
				103	config.adaptive_digital.enabled) {
				104	// Create dependencies.
				105	speech_level_estimator_ = std::make_unique<SpeechLevelEstimator>(
Alessio Bazzica	dfba28e	2022-12-09 10:02:41 +0100	[diff] [blame]	106	&data_dumper_, config.adaptive_digital, kAdjacentSpeechFramesThreshold);
				107	if (use_internal_vad)
Alessio Bazzica	17e14fd	2022-12-07 17:08:45 +0100	[diff] [blame]	108	vad_ = std::make_unique<VoiceActivityDetectorWrapper>(
Alessio Bazzica	dfba28e	2022-12-09 10:02:41 +0100	[diff] [blame]	109	kVadResetPeriodMs, cpu_features_, sample_rate_hz);
Alessio Bazzica	b4d4ae2	2021-10-15 13:57:56 +0200	[diff] [blame]	110	}
Alessio Bazzica	17e14fd	2022-12-07 17:08:45 +0100	[diff] [blame]	111
				112	if (config.input_volume_controller.enabled) {
				113	// Create controller.
				114	input_volume_controller_ = std::make_unique<InputVolumeController>(
				115	num_channels, input_volume_controller_config);
				116	// TODO(bugs.webrtc.org/7494): Call `Initialize` in ctor and remove method.
Hanna Silen	d7cfbe3	2022-11-02 19:12:20 +0100	[diff] [blame]	117	input_volume_controller_->Initialize();
				118	}
Alessio Bazzica	17e14fd	2022-12-07 17:08:45 +0100	[diff] [blame]	119
				120	if (config.adaptive_digital.enabled) {
				121	// Create dependencies.
				122	noise_level_estimator_ = CreateNoiseFloorEstimator(&data_dumper_);
				123	saturation_protector_ = CreateSaturationProtector(
Alessio Bazzica	dfba28e	2022-12-09 10:02:41 +0100	[diff] [blame]	124	kSaturationProtectorInitialHeadroomDb, kAdjacentSpeechFramesThreshold,
Alessio Bazzica	17e14fd	2022-12-07 17:08:45 +0100	[diff] [blame]	125	&data_dumper_);
				126	// Create controller.
Alessio Bazzica	f72bc5f	2022-12-09 08:46:06 +0100	[diff] [blame]	127	adaptive_digital_controller_ =
				128	std::make_unique<AdaptiveDigitalGainController>(
Alessio Bazzica	dfba28e	2022-12-09 10:02:41 +0100	[diff] [blame]	129	&data_dumper_, config.adaptive_digital,
Alessio Bazzica	2bfa767	2022-12-09 14:16:30 +0100	[diff] [blame]	130	kAdjacentSpeechFramesThreshold);
Alessio Bazzica	17e14fd	2022-12-07 17:08:45 +0100	[diff] [blame]	131	}
Per Åhgren	2bd85ab	2020-01-03 10:36:34 +0100	[diff] [blame]	132	}
alessiob	3ec96df	2017-05-22 06:57:06 -0700	[diff] [blame]	133
				134	GainController2::~GainController2() = default;
				135
Hanna Silen	d7cfbe3	2022-11-02 19:12:20 +0100	[diff] [blame]	136	// TODO(webrtc:7494): Pass the flag also to the other components.
				137	void GainController2::SetCaptureOutputUsed(bool capture_output_used) {
				138	if (input_volume_controller_) {
				139	input_volume_controller_->HandleCaptureOutputUsedChange(
				140	capture_output_used);
				141	}
				142	}
				143
Alessio Bazzica	3890104	2021-10-14 12:14:21 +0200	[diff] [blame]	144	void GainController2::SetFixedGainDb(float gain_db) {
				145	const float gain_factor = DbToRatio(gain_db);
				146	if (fixed_gain_applier_.GetGainFactor() != gain_factor) {
				147	// Reset the limiter to quickly react on abrupt level changes caused by
				148	// large changes of the fixed gain.
				149	limiter_.Reset();
				150	}
				151	fixed_gain_applier_.SetGainFactor(gain_factor);
Alessio Bazzica	270f7b5	2017-10-13 11:05:17 +0200	[diff] [blame]	152	}
				153
Hanna Silen	d7cfbe3	2022-11-02 19:12:20 +0100	[diff] [blame]	154	void GainController2::Analyze(int applied_input_volume,
				155	const AudioBuffer& audio_buffer) {
Hanna Silen	597a2ba	2022-12-14 12:48:37 +0100	[diff] [blame^]	156	recommended_input_volume_ = absl::nullopt;
				157
Hanna Silen	d7cfbe3	2022-11-02 19:12:20 +0100	[diff] [blame]	158	RTC_DCHECK_GE(applied_input_volume, 0);
				159	RTC_DCHECK_LE(applied_input_volume, 255);
				160
				161	if (input_volume_controller_) {
Hanna Silen	597a2ba	2022-12-14 12:48:37 +0100	[diff] [blame^]	162	input_volume_controller_->AnalyzeInputAudio(applied_input_volume,
				163	audio_buffer);
Hanna Silen	d7cfbe3	2022-11-02 19:12:20 +0100	[diff] [blame]	164	}
				165	}
				166
Hanna Silen	0c1ad29	2022-06-16 16:35:45 +0200	[diff] [blame]	167	void GainController2::Process(absl::optional<float> speech_probability,
Alessio Bazzica	fcf1af3	2022-09-07 17:14:26 +0200	[diff] [blame]	168	bool input_volume_changed,
Hanna Silen	0c1ad29	2022-06-16 16:35:45 +0200	[diff] [blame]	169	AudioBuffer* audio) {
Hanna Silen	597a2ba	2022-12-14 12:48:37 +0100	[diff] [blame^]	170	recommended_input_volume_ = absl::nullopt;
				171
Alessio Bazzica	fcf1af3	2022-09-07 17:14:26 +0200	[diff] [blame]	172	data_dumper_.DumpRaw("agc2_applied_input_volume_changed",
				173	input_volume_changed);
Alessio Bazzica	17e14fd	2022-12-07 17:08:45 +0100	[diff] [blame]	174	if (input_volume_changed) {
				175	// Handle input volume changes.
				176	if (speech_level_estimator_)
				177	speech_level_estimator_->Reset();
				178	if (saturation_protector_)
				179	saturation_protector_->Reset();
Alessio Bazzica	fcf1af3	2022-09-07 17:14:26 +0200	[diff] [blame]	180	}
				181
Per Åhgren	d47941e	2019-08-22 11:51:13 +0200	[diff] [blame]	182	AudioFrameView<float> float_frame(audio->channels(), audio->num_channels(),
Alex Loiko	e36e8bb	2018-02-16 11:54:07 +0100	[diff] [blame]	183	audio->num_frames());
Alessio Bazzica	17e14fd	2022-12-07 17:08:45 +0100	[diff] [blame]	184	// Compute speech probability.
Alessio Bazzica	b4d4ae2	2021-10-15 13:57:56 +0200	[diff] [blame]	185	if (vad_) {
				186	speech_probability = vad_->Analyze(float_frame);
Hanna Silen	0c1ad29	2022-06-16 16:35:45 +0200	[diff] [blame]	187	} else if (speech_probability.has_value()) {
Alessio Bazzica	17e14fd	2022-12-07 17:08:45 +0100	[diff] [blame]	188	RTC_DCHECK_GE(*speech_probability, 0.0f);
				189	RTC_DCHECK_LE(*speech_probability, 1.0f);
Hanna Silen	0c1ad29	2022-06-16 16:35:45 +0200	[diff] [blame]	190	}
Alessio Bazzica	17e14fd	2022-12-07 17:08:45 +0100	[diff] [blame]	191	// The speech probability may not be defined at this step (e.g., when the
				192	// fixed digital controller alone is enabled).
				193	if (speech_probability.has_value())
				194	data_dumper_.DumpRaw("agc2_speech_probability", *speech_probability);
				195
				196	// Compute audio, noise and speech levels.
				197	AudioLevels audio_levels = ComputeAudioLevels(float_frame, data_dumper_);
				198	absl::optional<float> noise_rms_dbfs;
				199	if (noise_level_estimator_) {
				200	// TODO(bugs.webrtc.org/7494): Pass `audio_levels` to remove duplicated
				201	// computation in `noise_level_estimator_`.
				202	noise_rms_dbfs = noise_level_estimator_->Analyze(float_frame);
				203	}
				204	absl::optional<SpeechLevel> speech_level;
				205	if (speech_level_estimator_) {
				206	RTC_DCHECK(speech_probability.has_value());
				207	speech_level_estimator_->Update(
				208	audio_levels.rms_dbfs, audio_levels.peak_dbfs, *speech_probability);
				209	speech_level =
				210	SpeechLevel{.is_confident = speech_level_estimator_->is_confident(),
				211	.rms_dbfs = speech_level_estimator_->level_dbfs()};
Alessio Bazzica	b4d4ae2	2021-10-15 13:57:56 +0200	[diff] [blame]	212	}
Hanna Silen	d7cfbe3	2022-11-02 19:12:20 +0100	[diff] [blame]	213
Alessio Bazzica	17e14fd	2022-12-07 17:08:45 +0100	[diff] [blame]	214	// Update the recommended input volume.
Hanna Silen	d7cfbe3	2022-11-02 19:12:20 +0100	[diff] [blame]	215	if (input_volume_controller_) {
Alessio Bazzica	17e14fd	2022-12-07 17:08:45 +0100	[diff] [blame]	216	RTC_DCHECK(speech_level.has_value());
Hanna Silen	27fed45	2022-11-22 15:00:58 +0100	[diff] [blame]	217	RTC_DCHECK(speech_probability.has_value());
				218	if (speech_probability.has_value()) {
Hanna Silen	597a2ba	2022-12-14 12:48:37 +0100	[diff] [blame^]	219	recommended_input_volume_ =
				220	input_volume_controller_->RecommendInputVolume(
				221	*speech_probability,
				222	speech_level->is_confident
				223	? absl::optional<float>(speech_level->rms_dbfs)
				224	: absl::nullopt);
Hanna Silen	27fed45	2022-11-22 15:00:58 +0100	[diff] [blame]	225	}
Hanna Silen	d7cfbe3	2022-11-02 19:12:20 +0100	[diff] [blame]	226	}
				227
Alessio Bazzica	82ea4ee	2021-10-07 09:21:02 +0200	[diff] [blame]	228	if (adaptive_digital_controller_) {
Alessio Bazzica	17e14fd	2022-12-07 17:08:45 +0100	[diff] [blame]	229	RTC_DCHECK(saturation_protector_);
Alessio Bazzica	b4d4ae2	2021-10-15 13:57:56 +0200	[diff] [blame]	230	RTC_DCHECK(speech_probability.has_value());
Alessio Bazzica	17e14fd	2022-12-07 17:08:45 +0100	[diff] [blame]	231	RTC_DCHECK(speech_level.has_value());
				232	saturation_protector_->Analyze(*speech_probability, audio_levels.peak_dbfs,
				233	speech_level->rms_dbfs);
				234	float headroom_db = saturation_protector_->HeadroomDb();
				235	data_dumper_.DumpRaw("agc2_headroom_db", headroom_db);
				236	float limiter_envelope_dbfs = FloatS16ToDbfs(limiter_.LastAudioLevel());
				237	data_dumper_.DumpRaw("agc2_limiter_envelope_dbfs", limiter_envelope_dbfs);
				238	RTC_DCHECK(noise_rms_dbfs.has_value());
Alessio Bazzica	b4d4ae2	2021-10-15 13:57:56 +0200	[diff] [blame]	239	adaptive_digital_controller_->Process(
Alessio Bazzica	17e14fd	2022-12-07 17:08:45 +0100	[diff] [blame]	240	/info=/{.speech_probability = *speech_probability,
				241	.speech_level_dbfs = speech_level->rms_dbfs,
				242	.speech_level_reliable = speech_level->is_confident,
				243	.noise_rms_dbfs = *noise_rms_dbfs,
				244	.headroom_db = headroom_db,
				245	.limiter_envelope_dbfs = limiter_envelope_dbfs},
				246	float_frame);
Alex Loiko	e583174	2018-08-24 11:28:36 +0200	[diff] [blame]	247	}
Alessio Bazzica	4366c54	2022-12-05 16:31:16 +0100	[diff] [blame]	248
Alessio Bazzica	17e14fd	2022-12-07 17:08:45 +0100	[diff] [blame]	249	// TODO(bugs.webrtc.org/7494): Pass `audio_levels` to remove duplicated
				250	// computation in `limiter_`.
Alessio Bazzica	4366c54	2022-12-05 16:31:16 +0100	[diff] [blame]	251	fixed_gain_applier_.ApplyGain(float_frame);
				252
Alessio Bazzica	3e4c77f	2018-11-01 21:31:38 +0100	[diff] [blame]	253	limiter_.Process(float_frame);
Alessio Bazzica	08d2a70	2020-11-20 16:26:24 +0100	[diff] [blame]	254
Alessio Bazzica	82ea4ee	2021-10-07 09:21:02 +0200	[diff] [blame]	255	// Periodically log limiter stats.
				256	if (++calls_since_last_limiter_log_ == kLogLimiterStatsPeriodNumFrames) {
Alessio Bazzica	08d2a70	2020-11-20 16:26:24 +0100	[diff] [blame]	257	calls_since_last_limiter_log_ = 0;
				258	InterpolatedGainCurve::Stats stats = limiter_.GetGainCurveStats();
Alessio Bazzica	17e14fd	2022-12-07 17:08:45 +0100	[diff] [blame]	259	RTC_LOG(LS_INFO) << "[AGC2] limiter stats"
Alessio Bazzica	08d2a70	2020-11-20 16:26:24 +0100	[diff] [blame]	260	<< " \| identity: " << stats.look_ups_identity_region
				261	<< " \| knee: " << stats.look_ups_knee_region
				262	<< " \| limiter: " << stats.look_ups_limiter_region
				263	<< " \| saturation: " << stats.look_ups_saturation_region;
				264	}
alessiob	3ec96df	2017-05-22 06:57:06 -0700	[diff] [blame]	265	}
				266
				267	bool GainController2::Validate(
				268	const AudioProcessing::Config::GainController2& config) {
Alessio Bazzica	0c83e15	2020-10-14 12:49:54 +0200	[diff] [blame]	269	const auto& fixed = config.fixed_digital;
				270	const auto& adaptive = config.adaptive_digital;
Alessio Bazzica	17e14fd	2022-12-07 17:08:45 +0100	[diff] [blame]	271	return fixed.gain_db >= 0.0f && fixed.gain_db < 50.0f &&
Alessio Bazzica	a850e6c	2021-10-04 13:35:55 +0200	[diff] [blame]	272	adaptive.headroom_db >= 0.0f && adaptive.max_gain_db > 0.0f &&
				273	adaptive.initial_gain_db >= 0.0f &&
Alessio Bazzica	1ac4f2a	2021-09-24 14:59:30 +0200	[diff] [blame]	274	adaptive.max_gain_change_db_per_second > 0.0f &&
				275	adaptive.max_output_noise_level_dbfs <= 0.0f;
alessiob	3ec96df	2017-05-22 06:57:06 -0700	[diff] [blame]	276	}
				277
				278	} // namespace webrtc