Blame - modules/audio_processing/level_controller/level_controller.cc - webrtc.googlesource.com/src

blob: 6343ef7a019da55987724d2a303603a721c85088 [file] [log] [blame]

peah	ca4cac7	2016-06-29 15:26:12 -0700	[diff] [blame]	1	/*
				2	* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
				3	*
				4	* Use of this source code is governed by a BSD-style license
				5	* that can be found in the LICENSE file in the root of the source
				6	* tree. An additional intellectual property rights grant can be found
				7	* in the file PATENTS. All contributing project authors may
				8	* be found in the AUTHORS file in the root of the source tree.
				9	*/
				10
Mirko Bonadei	92ea95e	2017-09-15 06:47:31 +0200	[diff] [blame]	11	#include "modules/audio_processing/level_controller/level_controller.h"
peah	ca4cac7	2016-06-29 15:26:12 -0700	[diff] [blame]	12
				13	#include <math.h>
				14	#include <algorithm>
				15	#include <numeric>
				16
Mirko Bonadei	92ea95e	2017-09-15 06:47:31 +0200	[diff] [blame]	17	#include "api/array_view.h"
				18	#include "modules/audio_processing/audio_buffer.h"
				19	#include "modules/audio_processing/level_controller/gain_applier.h"
				20	#include "modules/audio_processing/level_controller/gain_selector.h"
				21	#include "modules/audio_processing/level_controller/noise_level_estimator.h"
				22	#include "modules/audio_processing/level_controller/peak_level_estimator.h"
				23	#include "modules/audio_processing/level_controller/saturating_gain_estimator.h"
				24	#include "modules/audio_processing/level_controller/signal_classifier.h"
				25	#include "modules/audio_processing/logging/apm_data_dumper.h"
				26	#include "rtc_base/arraysize.h"
				27	#include "rtc_base/checks.h"
				28	#include "rtc_base/logging.h"
				29	#include "system_wrappers/include/metrics.h"
peah	ca4cac7	2016-06-29 15:26:12 -0700	[diff] [blame]	30
				31	namespace webrtc {
				32	namespace {
				33
				34	void UpdateAndRemoveDcLevel(float forgetting_factor,
				35	float* dc_level,
				36	rtc::ArrayView<float> x) {
				37	RTC_DCHECK(!x.empty());
				38	float mean =
maxmorin	3f746ea	2016-08-25 04:00:20 -0700	[diff] [blame]	39	std::accumulate(x.begin(), x.end(), 0.0f) / static_cast<float>(x.size());
peah	ca4cac7	2016-06-29 15:26:12 -0700	[diff] [blame]	40	dc_level += forgetting_factor (mean - *dc_level);
				41
				42	for (float& v : x) {
				43	v -= *dc_level;
				44	}
				45	}
				46
				47	float FrameEnergy(const AudioBuffer& audio) {
				48	float energy = 0.f;
				49	for (size_t k = 0; k < audio.num_channels(); ++k) {
				50	float channel_energy =
				51	std::accumulate(audio.channels_const_f()[k],
oprypin	30431d5	2017-09-05 09:49:30 -0700	[diff] [blame]	52	audio.channels_const_f()[k] + audio.num_frames(), 0.f,
peah	ca4cac7	2016-06-29 15:26:12 -0700	[diff] [blame]	53	[](float a, float b) -> float { return a + b * b; });
				54	energy = std::max(channel_energy, energy);
				55	}
				56	return energy;
				57	}
				58
				59	float PeakLevel(const AudioBuffer& audio) {
				60	float peak_level = 0.f;
				61	for (size_t k = 0; k < audio.num_channels(); ++k) {
kjellander	7c85658	2017-02-26 19:53:40 -0800	[diff] [blame]	62	auto* channel_peak_level = std::max_element(
peah	ca4cac7	2016-06-29 15:26:12 -0700	[diff] [blame]	63	audio.channels_const_f()[k],
				64	audio.channels_const_f()[k] + audio.num_frames(),
				65	[](float a, float b) { return std::abs(a) < std::abs(b); });
				66	peak_level = std::max(*channel_peak_level, peak_level);
				67	}
				68	return peak_level;
				69	}
				70
				71	const int kMetricsFrameInterval = 1000;
				72
				73	} // namespace
				74
				75	int LevelController::instance_count_ = 0;
				76
				77	void LevelController::Metrics::Initialize(int sample_rate_hz) {
				78	RTC_DCHECK(sample_rate_hz == AudioProcessing::kSampleRate8kHz \|\|
				79	sample_rate_hz == AudioProcessing::kSampleRate16kHz \|\|
				80	sample_rate_hz == AudioProcessing::kSampleRate32kHz \|\|
				81	sample_rate_hz == AudioProcessing::kSampleRate48kHz);
				82
				83	Reset();
				84	frame_length_ = rtc::CheckedDivExact(sample_rate_hz, 100);
				85	}
				86
				87	void LevelController::Metrics::Reset() {
				88	metrics_frame_counter_ = 0;
				89	gain_sum_ = 0.f;
				90	peak_level_sum_ = 0.f;
				91	noise_energy_sum_ = 0.f;
				92	max_gain_ = 0.f;
				93	max_peak_level_ = 0.f;
				94	max_noise_energy_ = 0.f;
				95	}
				96
peah	3026ee8	2016-08-26 11:15:47 -0700	[diff] [blame]	97	void LevelController::Metrics::Update(float long_term_peak_level,
peah	ca4cac7	2016-06-29 15:26:12 -0700	[diff] [blame]	98	float noise_energy,
peah	3026ee8	2016-08-26 11:15:47 -0700	[diff] [blame]	99	float gain,
				100	float frame_peak_level) {
peah	ca4cac7	2016-06-29 15:26:12 -0700	[diff] [blame]	101	const float kdBFSOffset = 90.3090f;
				102	gain_sum_ += gain;
peah	3026ee8	2016-08-26 11:15:47 -0700	[diff] [blame]	103	peak_level_sum_ += long_term_peak_level;
peah	ca4cac7	2016-06-29 15:26:12 -0700	[diff] [blame]	104	noise_energy_sum_ += noise_energy;
				105	max_gain_ = std::max(max_gain_, gain);
peah	3026ee8	2016-08-26 11:15:47 -0700	[diff] [blame]	106	max_peak_level_ = std::max(max_peak_level_, long_term_peak_level);
peah	ca4cac7	2016-06-29 15:26:12 -0700	[diff] [blame]	107	max_noise_energy_ = std::max(max_noise_energy_, noise_energy);
				108
				109	++metrics_frame_counter_;
				110	if (metrics_frame_counter_ == kMetricsFrameInterval) {
peah	3026ee8	2016-08-26 11:15:47 -0700	[diff] [blame]	111	RTC_DCHECK_LT(0, frame_length_);
				112	RTC_DCHECK_LT(0, kMetricsFrameInterval);
peah	ca4cac7	2016-06-29 15:26:12 -0700	[diff] [blame]	113
peah	3026ee8	2016-08-26 11:15:47 -0700	[diff] [blame]	114	const int max_noise_power_dbfs = static_cast<int>(
				115	10 * log10(max_noise_energy_ / frame_length_ + 1e-10f) - kdBFSOffset);
				116	RTC_HISTOGRAM_COUNTS("WebRTC.Audio.LevelControl.MaxNoisePower",
				117	max_noise_power_dbfs, -90, 0, 50);
				118
				119	const int average_noise_power_dbfs = static_cast<int>(
				120	10 * log10(noise_energy_sum_ / (frame_length_ * kMetricsFrameInterval) +
				121	1e-10f) -
				122	kdBFSOffset);
				123	RTC_HISTOGRAM_COUNTS("WebRTC.Audio.LevelControl.AverageNoisePower",
				124	average_noise_power_dbfs, -90, 0, 50);
				125
				126	const int max_peak_level_dbfs = static_cast<int>(
				127	10 * log10(max_peak_level_ * max_peak_level_ + 1e-10f) - kdBFSOffset);
				128	RTC_HISTOGRAM_COUNTS("WebRTC.Audio.LevelControl.MaxPeakLevel",
				129	max_peak_level_dbfs, -90, 0, 50);
				130
				131	const int average_peak_level_dbfs = static_cast<int>(
				132	10 * log10(peak_level_sum_ * peak_level_sum_ /
				133	(kMetricsFrameInterval * kMetricsFrameInterval) +
				134	1e-10f) -
				135	kdBFSOffset);
				136	RTC_HISTOGRAM_COUNTS("WebRTC.Audio.LevelControl.AveragePeakLevel",
				137	average_peak_level_dbfs, -90, 0, 50);
peah	ca4cac7	2016-06-29 15:26:12 -0700	[diff] [blame]	138
				139	RTC_DCHECK_LE(1.f, max_gain_);
				140	RTC_DCHECK_LE(1.f, gain_sum_ / kMetricsFrameInterval);
peah	3026ee8	2016-08-26 11:15:47 -0700	[diff] [blame]	141
				142	const int max_gain_db = static_cast<int>(10 * log10(max_gain_ * max_gain_));
				143	RTC_HISTOGRAM_COUNTS("WebRTC.Audio.LevelControl.MaxGain", max_gain_db, 0,
				144	33, 30);
				145
				146	const int average_gain_db = static_cast<int>(
				147	10 * log10(gain_sum_ * gain_sum_ /
				148	(kMetricsFrameInterval * kMetricsFrameInterval)));
peah	ca4cac7	2016-06-29 15:26:12 -0700	[diff] [blame]	149	RTC_HISTOGRAM_COUNTS("WebRTC.Audio.LevelControl.AverageGain",
peah	3026ee8	2016-08-26 11:15:47 -0700	[diff] [blame]	150	average_gain_db, 0, 33, 30);
				151
				152	const int long_term_peak_level_dbfs = static_cast<int>(
				153	10 * log10(long_term_peak_level * long_term_peak_level + 1e-10f) -
				154	kdBFSOffset);
				155
				156	const int frame_peak_level_dbfs = static_cast<int>(
				157	10 * log10(frame_peak_level * frame_peak_level + 1e-10f) - kdBFSOffset);
				158
Mirko Bonadei	675513b	2017-11-09 11:09:25 +0100	[diff] [blame^]	159	RTC_LOG(LS_INFO) << "Level Controller metrics: {"
				160	<< "Max noise power: " << max_noise_power_dbfs << " dBFS, "
				161	<< "Average noise power: " << average_noise_power_dbfs
				162	<< " dBFS, "
				163	<< "Max long term peak level: " << max_peak_level_dbfs
				164	<< " dBFS, "
				165	<< "Average long term peak level: "
				166	<< average_peak_level_dbfs << " dBFS, "
				167	<< "Max gain: " << max_gain_db << " dB, "
				168	<< "Average gain: " << average_gain_db << " dB, "
				169	<< "Long term peak level: " << long_term_peak_level_dbfs
				170	<< " dBFS, "
				171	<< "Last frame peak level: " << frame_peak_level_dbfs
				172	<< " dBFS"
				173	<< "}";
peah	3026ee8	2016-08-26 11:15:47 -0700	[diff] [blame]	174
peah	ca4cac7	2016-06-29 15:26:12 -0700	[diff] [blame]	175	Reset();
				176	}
				177	}
				178
				179	LevelController::LevelController()
				180	: data_dumper_(new ApmDataDumper(instance_count_)),
				181	gain_applier_(data_dumper_.get()),
peah	c19f312	2016-10-07 14:54:10 -0700	[diff] [blame]	182	signal_classifier_(data_dumper_.get()),
				183	peak_level_estimator_(kTargetLcPeakLeveldBFS) {
peah	ca4cac7	2016-06-29 15:26:12 -0700	[diff] [blame]	184	Initialize(AudioProcessing::kSampleRate48kHz);
				185	++instance_count_;
				186	}
				187
				188	LevelController::~LevelController() {}
				189
				190	void LevelController::Initialize(int sample_rate_hz) {
				191	RTC_DCHECK(sample_rate_hz == AudioProcessing::kSampleRate8kHz \|\|
				192	sample_rate_hz == AudioProcessing::kSampleRate16kHz \|\|
				193	sample_rate_hz == AudioProcessing::kSampleRate32kHz \|\|
				194	sample_rate_hz == AudioProcessing::kSampleRate48kHz);
				195	data_dumper_->InitiateNewSetOfRecordings();
				196	gain_selector_.Initialize(sample_rate_hz);
				197	gain_applier_.Initialize(sample_rate_hz);
				198	signal_classifier_.Initialize(sample_rate_hz);
				199	noise_level_estimator_.Initialize(sample_rate_hz);
peah	c19f312	2016-10-07 14:54:10 -0700	[diff] [blame]	200	peak_level_estimator_.Initialize(config_.initial_peak_level_dbfs);
peah	ca4cac7	2016-06-29 15:26:12 -0700	[diff] [blame]	201	saturating_gain_estimator_.Initialize();
				202	metrics_.Initialize(sample_rate_hz);
				203
				204	last_gain_ = 1.0f;
				205	sample_rate_hz_ = rtc::Optional<int>(sample_rate_hz);
				206	dc_forgetting_factor_ = 0.01f * sample_rate_hz / 48000.f;
				207	std::fill(dc_level_, dc_level_ + arraysize(dc_level_), 0.f);
				208	}
				209
				210	void LevelController::Process(AudioBuffer* audio) {
kwiberg	af476c7	2016-11-28 15:21:39 -0800	[diff] [blame]	211	RTC_DCHECK_LT(0, audio->num_channels());
				212	RTC_DCHECK_GE(2, audio->num_channels());
peah	ca4cac7	2016-06-29 15:26:12 -0700	[diff] [blame]	213	RTC_DCHECK_NE(0.f, dc_forgetting_factor_);
				214	RTC_DCHECK(sample_rate_hz_);
				215	data_dumper_->DumpWav("lc_input", audio->num_frames(),
				216	audio->channels_const_f()[0], *sample_rate_hz_, 1);
				217
				218	// Remove DC level.
				219	for (size_t k = 0; k < audio->num_channels(); ++k) {
				220	UpdateAndRemoveDcLevel(
				221	dc_forgetting_factor_, &dc_level_[k],
				222	rtc::ArrayView<float>(audio->channels_f()[k], audio->num_frames()));
				223	}
				224
				225	SignalClassifier::SignalType signal_type;
				226	signal_classifier_.Analyze(*audio, &signal_type);
				227	int tmp = static_cast<int>(signal_type);
				228	data_dumper_->DumpRaw("lc_signal_type", 1, &tmp);
				229
				230	// Estimate the noise energy.
				231	float noise_energy =
				232	noise_level_estimator_.Analyze(signal_type, FrameEnergy(*audio));
				233
				234	// Estimate the overall signal peak level.
peah	3026ee8	2016-08-26 11:15:47 -0700	[diff] [blame]	235	const float frame_peak_level = PeakLevel(*audio);
				236	const float long_term_peak_level =
				237	peak_level_estimator_.Analyze(signal_type, frame_peak_level);
peah	ca4cac7	2016-06-29 15:26:12 -0700	[diff] [blame]	238
				239	float saturating_gain = saturating_gain_estimator_.GetGain();
				240
				241	// Compute the new gain to apply.
peah	c19f312	2016-10-07 14:54:10 -0700	[diff] [blame]	242	last_gain_ =
				243	gain_selector_.GetNewGain(long_term_peak_level, noise_energy,
				244	saturating_gain, gain_jumpstart_, signal_type);
				245
				246	// Unflag the jumpstart of the gain as it should only happen once.
				247	gain_jumpstart_ = false;
peah	ca4cac7	2016-06-29 15:26:12 -0700	[diff] [blame]	248
				249	// Apply the gain to the signal.
				250	int num_saturations = gain_applier_.Process(last_gain_, audio);
				251
				252	// Estimate the gain that saturates the overall signal.
				253	saturating_gain_estimator_.Update(last_gain_, num_saturations);
				254
				255	// Update the metrics.
peah	3026ee8	2016-08-26 11:15:47 -0700	[diff] [blame]	256	metrics_.Update(long_term_peak_level, noise_energy, last_gain_,
				257	frame_peak_level);
peah	ca4cac7	2016-06-29 15:26:12 -0700	[diff] [blame]	258
				259	data_dumper_->DumpRaw("lc_selected_gain", 1, &last_gain_);
				260	data_dumper_->DumpRaw("lc_noise_energy", 1, &noise_energy);
peah	3026ee8	2016-08-26 11:15:47 -0700	[diff] [blame]	261	data_dumper_->DumpRaw("lc_peak_level", 1, &long_term_peak_level);
peah	ca4cac7	2016-06-29 15:26:12 -0700	[diff] [blame]	262	data_dumper_->DumpRaw("lc_saturating_gain", 1, &saturating_gain);
				263
				264	data_dumper_->DumpWav("lc_output", audio->num_frames(),
				265	audio->channels_f()[0], *sample_rate_hz_, 1);
				266	}
				267
peah	c19f312	2016-10-07 14:54:10 -0700	[diff] [blame]	268	void LevelController::ApplyConfig(
				269	const AudioProcessing::Config::LevelController& config) {
				270	RTC_DCHECK(Validate(config));
				271	config_ = config;
				272	peak_level_estimator_.Initialize(config_.initial_peak_level_dbfs);
				273	gain_jumpstart_ = true;
				274	}
				275
peah	88ac853	2016-09-12 16:47:25 -0700	[diff] [blame]	276	std::string LevelController::ToString(
				277	const AudioProcessing::Config::LevelController& config) {
				278	std::stringstream ss;
				279	ss << "{"
peah	c19f312	2016-10-07 14:54:10 -0700	[diff] [blame]	280	<< "enabled: " << (config.enabled ? "true" : "false") << ", "
				281	<< "initial_peak_level_dbfs: " << config.initial_peak_level_dbfs << "}";
peah	88ac853	2016-09-12 16:47:25 -0700	[diff] [blame]	282	return ss.str();
				283	}
				284
				285	bool LevelController::Validate(
				286	const AudioProcessing::Config::LevelController& config) {
peah	c19f312	2016-10-07 14:54:10 -0700	[diff] [blame]	287	return (config.initial_peak_level_dbfs <
				288	std::numeric_limits<float>::epsilon() &&
				289	config.initial_peak_level_dbfs >
				290	-(100.f + std::numeric_limits<float>::epsilon()));
peah	88ac853	2016-09-12 16:47:25 -0700	[diff] [blame]	291	}
				292
peah	ca4cac7	2016-06-29 15:26:12 -0700	[diff] [blame]	293	} // namespace webrtc