Blame - modules/audio_processing/agc/clipping_predictor.cc - webrtc.googlesource.com/src

blob: deb95f633e6cac6c9787c4aa64ac1527d2b70403 [file] [log] [blame]

Hanna Silen	4b3a061	2021-06-02 23:03:24 +0200	[diff] [blame^]	1	/*
				2	* Copyright (c) 2021 The WebRTC project authors. All Rights Reserved.
				3	*
				4	* Use of this source code is governed by a BSD-style license
				5	* that can be found in the LICENSE file in the root of the source
				6	* tree. An additional intellectual property rights grant can be found
				7	* in the file PATENTS. All contributing project authors may
				8	* be found in the AUTHORS file in the root of the source tree.
				9	*/
				10
				11	#include "modules/audio_processing/agc/clipping_predictor.h"
				12
				13	#include <algorithm>
				14	#include <memory>
				15
				16	#include "common_audio/include/audio_util.h"
				17	#include "modules/audio_processing/agc/clipping_predictor_level_buffer.h"
				18	#include "modules/audio_processing/agc/gain_map_internal.h"
				19	#include "rtc_base/checks.h"
				20	#include "rtc_base/logging.h"
				21	#include "rtc_base/numerics/safe_minmax.h"
				22
				23	namespace webrtc {
				24	namespace {
				25
				26	constexpr int kClippingPredictorMaxGainChange = 15;
				27
				28	using ClippingPredictorConfig = AudioProcessing::Config::GainController1::
				29	AnalogGainController::ClippingPredictor;
				30
				31	// Estimates the new level from the gain error; a copy of the function
				32	// `LevelFromGainError` in agc_manager_direct.cc.
				33	int LevelFromGainError(int gain_error,
				34	int level,
				35	int min_mic_level,
				36	int max_mic_level) {
				37	RTC_DCHECK_GE(level, 0);
				38	RTC_DCHECK_LE(level, max_mic_level);
				39	if (gain_error == 0) {
				40	return level;
				41	}
				42	int new_level = level;
				43	if (gain_error > 0) {
				44	while (kGainMap[new_level] - kGainMap[level] < gain_error &&
				45	new_level < max_mic_level) {
				46	++new_level;
				47	}
				48	} else {
				49	while (kGainMap[new_level] - kGainMap[level] > gain_error &&
				50	new_level > min_mic_level) {
				51	--new_level;
				52	}
				53	}
				54	return new_level;
				55	}
				56
				57	float ComputeCrestFactor(const ClippingPredictorLevelBuffer::Level& level) {
				58	const float crest_factor =
				59	FloatS16ToDbfs(level.max) - FloatS16ToDbfs(std::sqrt(level.average));
				60	return crest_factor;
				61	}
				62
				63	// Crest factor-based clipping prediction and clipped level step estimation.
				64	class ClippingEventPredictor : public ClippingPredictor {
				65	public:
				66	// ClippingEventPredictor with `num_channels` channels (limited to values
				67	// higher than zero); window size `window_length` and reference window size
				68	// `reference_window_length` (both referring to the number of frames in the
				69	// respective sliding windows and limited to values higher than zero);
				70	// reference window delay `reference_window_delay` (delay in frames, limited
				71	// to values zero and higher with an additional requirement of
				72	// `window_length` < `reference_window_length` + reference_window_delay`);
				73	// and an estimation peak threshold `clipping_threshold` and a crest factor
				74	// drop threshold `crest_factor_margin` (both in dB).
				75	ClippingEventPredictor(int num_channels,
				76	int window_length,
				77	int reference_window_length,
				78	int reference_window_delay,
				79	float clipping_threshold,
				80	float crest_factor_margin)
				81	: window_length_(window_length),
				82	reference_window_length_(reference_window_length),
				83	reference_window_delay_(reference_window_delay),
				84	clipping_threshold_(clipping_threshold),
				85	crest_factor_margin_(crest_factor_margin) {
				86	RTC_DCHECK_GT(num_channels, 0);
				87	RTC_DCHECK_GT(window_length, 0);
				88	RTC_DCHECK_GT(reference_window_length, 0);
				89	RTC_DCHECK_GE(reference_window_delay, 0);
				90	RTC_DCHECK_GT(reference_window_length + reference_window_delay,
				91	window_length);
				92	const int buffer_length = GetMinFramesProcessed();
				93	RTC_DCHECK_GT(buffer_length, 0);
				94	for (int i = 0; i < num_channels; ++i) {
				95	ch_buffers_.push_back(
				96	std::make_unique<ClippingPredictorLevelBuffer>(buffer_length));
				97	}
				98	}
				99
				100	ClippingEventPredictor(const ClippingEventPredictor&) = delete;
				101	ClippingEventPredictor& operator=(const ClippingEventPredictor&) = delete;
				102	~ClippingEventPredictor() {}
				103
				104	void Reset() {
				105	const int num_channels = ch_buffers_.size();
				106	for (int i = 0; i < num_channels; ++i) {
				107	ch_buffers_[i]->Reset();
				108	}
				109	}
				110
				111	// Analyzes a frame of audio and stores the framewise metrics in
				112	// `ch_buffers_`.
				113	void Process(const AudioFrameView<const float>& frame) {
				114	const int num_channels = frame.num_channels();
				115	RTC_DCHECK_EQ(num_channels, ch_buffers_.size());
				116	const int samples_per_channel = frame.samples_per_channel();
				117	RTC_DCHECK_GT(samples_per_channel, 0);
				118	for (int channel = 0; channel < num_channels; ++channel) {
				119	float sum_squares = 0.0f;
				120	float peak = 0.0f;
				121	for (const auto& sample : frame.channel(channel)) {
				122	sum_squares += sample * sample;
				123	peak = std::max(std::fabs(sample), peak);
				124	}
				125	ch_buffers_[channel]->Push(
				126	{sum_squares / static_cast<float>(samples_per_channel), peak});
				127	}
				128	}
				129
				130	// Estimates the analog gain adjustment for channel `channel` using a
				131	// sliding window over the frame-wise metrics in `ch_buffers_`. Returns an
				132	// estimate for the clipped level step equal to `default_clipped_level_step_`
				133	// if at least `GetMinFramesProcessed()` frames have been processed since the
				134	// last reset and a clipping event is predicted. `level`, `min_mic_level`, and
				135	// `max_mic_level` are limited to [0, 255] and `default_step` to [1, 255].
				136	absl::optional<int> EstimateClippedLevelStep(int channel,
				137	int level,
				138	int default_step,
				139	int min_mic_level,
				140	int max_mic_level) const {
				141	RTC_CHECK_GE(channel, 0);
				142	RTC_CHECK_LT(channel, ch_buffers_.size());
				143	RTC_DCHECK_GE(level, 0);
				144	RTC_DCHECK_LE(level, 255);
				145	RTC_DCHECK_GT(default_step, 0);
				146	RTC_DCHECK_LE(default_step, 255);
				147	RTC_DCHECK_GE(min_mic_level, 0);
				148	RTC_DCHECK_LE(min_mic_level, 255);
				149	RTC_DCHECK_GE(max_mic_level, 0);
				150	RTC_DCHECK_LE(max_mic_level, 255);
				151	if (level <= min_mic_level) {
				152	return absl::nullopt;
				153	}
				154	if (PredictClippingEvent(channel)) {
				155	const int new_level =
				156	rtc::SafeClamp(level - default_step, min_mic_level, max_mic_level);
				157	const int step = level - new_level;
				158	if (step > 0) {
				159	return step;
				160	}
				161	}
				162	return absl::nullopt;
				163	}
				164
				165	private:
				166	int GetMinFramesProcessed() const {
				167	return reference_window_delay_ + reference_window_length_;
				168	}
				169
				170	// Predicts clipping events based on the processed audio frames. Returns
				171	// true if a clipping event is likely.
				172	bool PredictClippingEvent(int channel) const {
				173	const auto metrics =
				174	ch_buffers_[channel]->ComputePartialMetrics(0, window_length_);
				175	if (!metrics.has_value() \|\|
				176	!(FloatS16ToDbfs(metrics.value().max) > clipping_threshold_)) {
				177	return false;
				178	}
				179	const auto reference_metrics = ch_buffers_[channel]->ComputePartialMetrics(
				180	reference_window_delay_, reference_window_length_);
				181	if (!reference_metrics.has_value()) {
				182	return false;
				183	}
				184	const float crest_factor = ComputeCrestFactor(metrics.value());
				185	const float reference_crest_factor =
				186	ComputeCrestFactor(reference_metrics.value());
				187	if (crest_factor < reference_crest_factor - crest_factor_margin_) {
				188	return true;
				189	}
				190	return false;
				191	}
				192
				193	std::vector<std::unique_ptr<ClippingPredictorLevelBuffer>> ch_buffers_;
				194	const int window_length_;
				195	const int reference_window_length_;
				196	const int reference_window_delay_;
				197	const float clipping_threshold_;
				198	const float crest_factor_margin_;
				199	};
				200
				201	// Performs crest factor-based clipping peak prediction.
				202	class ClippingPeakPredictor : public ClippingPredictor {
				203	public:
				204	// Ctor. ClippingPeakPredictor with `num_channels` channels (limited to values
				205	// higher than zero); window size `window_length` and reference window size
				206	// `reference_window_length` (both referring to the number of frames in the
				207	// respective sliding windows and limited to values higher than zero);
				208	// reference window delay `reference_window_delay` (delay in frames, limited
				209	// to values zero and higher with an additional requirement of
				210	// `window_length` < `reference_window_length` + reference_window_delay`);
				211	// and a clipping prediction threshold `clipping_threshold` (in dB). Adaptive
				212	// clipped level step estimation is used if `adaptive_step_estimation` is
				213	// true.
				214	explicit ClippingPeakPredictor(int num_channels,
				215	int window_length,
				216	int reference_window_length,
				217	int reference_window_delay,
				218	int clipping_threshold,
				219	bool adaptive_step_estimation)
				220	: window_length_(window_length),
				221	reference_window_length_(reference_window_length),
				222	reference_window_delay_(reference_window_delay),
				223	clipping_threshold_(clipping_threshold),
				224	adaptive_step_estimation_(adaptive_step_estimation) {
				225	RTC_DCHECK_GT(num_channels, 0);
				226	RTC_DCHECK_GT(window_length, 0);
				227	RTC_DCHECK_GT(reference_window_length, 0);
				228	RTC_DCHECK_GE(reference_window_delay, 0);
				229	RTC_DCHECK_GT(reference_window_length + reference_window_delay,
				230	window_length);
				231	const int buffer_length = GetMinFramesProcessed();
				232	RTC_DCHECK_GT(buffer_length, 0);
				233	for (int i = 0; i < num_channels; ++i) {
				234	ch_buffers_.push_back(
				235	std::make_unique<ClippingPredictorLevelBuffer>(buffer_length));
				236	}
				237	}
				238
				239	ClippingPeakPredictor(const ClippingPeakPredictor&) = delete;
				240	ClippingPeakPredictor& operator=(const ClippingPeakPredictor&) = delete;
				241	~ClippingPeakPredictor() {}
				242
				243	void Reset() {
				244	const int num_channels = ch_buffers_.size();
				245	for (int i = 0; i < num_channels; ++i) {
				246	ch_buffers_[i]->Reset();
				247	}
				248	}
				249
				250	// Analyzes a frame of audio and stores the framewise metrics in
				251	// `ch_buffers_`.
				252	void Process(const AudioFrameView<const float>& frame) {
				253	const int num_channels = frame.num_channels();
				254	RTC_DCHECK_EQ(num_channels, ch_buffers_.size());
				255	const int samples_per_channel = frame.samples_per_channel();
				256	RTC_DCHECK_GT(samples_per_channel, 0);
				257	for (int channel = 0; channel < num_channels; ++channel) {
				258	float sum_squares = 0.0f;
				259	float peak = 0.0f;
				260	for (const auto& sample : frame.channel(channel)) {
				261	sum_squares += sample * sample;
				262	peak = std::max(std::fabs(sample), peak);
				263	}
				264	ch_buffers_[channel]->Push(
				265	{sum_squares / static_cast<float>(samples_per_channel), peak});
				266	}
				267	}
				268
				269	// Estimates the analog gain adjustment for channel `channel` using a
				270	// sliding window over the frame-wise metrics in `ch_buffers_`. Returns an
				271	// estimate for the clipped level step (equal to
				272	// `default_clipped_level_step_` if `adaptive_estimation_` is false) if at
				273	// least `GetMinFramesProcessed()` frames have been processed since the last
				274	// reset and a clipping event is predicted. `level`, `min_mic_level`, and
				275	// `max_mic_level` are limited to [0, 255] and `default_step` to [1, 255].
				276	absl::optional<int> EstimateClippedLevelStep(int channel,
				277	int level,
				278	int default_step,
				279	int min_mic_level,
				280	int max_mic_level) const {
				281	RTC_DCHECK_GE(channel, 0);
				282	RTC_DCHECK_LT(channel, ch_buffers_.size());
				283	RTC_DCHECK_GE(level, 0);
				284	RTC_DCHECK_LE(level, 255);
				285	RTC_DCHECK_GT(default_step, 0);
				286	RTC_DCHECK_LE(default_step, 255);
				287	RTC_DCHECK_GE(min_mic_level, 0);
				288	RTC_DCHECK_LE(min_mic_level, 255);
				289	RTC_DCHECK_GE(max_mic_level, 0);
				290	RTC_DCHECK_LE(max_mic_level, 255);
				291	if (level <= min_mic_level) {
				292	return absl::nullopt;
				293	}
				294	absl::optional<float> estimate_db = EstimatePeakValue(channel);
				295	if (estimate_db.has_value() && estimate_db.value() > clipping_threshold_) {
				296	int step = 0;
				297	if (!adaptive_step_estimation_) {
				298	step = default_step;
				299	} else {
				300	const int estimated_gain_change =
				301	rtc::SafeClamp(-static_cast<int>(std::ceil(estimate_db.value())),
				302	-kClippingPredictorMaxGainChange, 0);
				303	step =
				304	std::max(level - LevelFromGainError(estimated_gain_change, level,
				305	min_mic_level, max_mic_level),
				306	default_step);
				307	}
				308	const int new_level =
				309	rtc::SafeClamp(level - step, min_mic_level, max_mic_level);
				310	if (level > new_level) {
				311	return level - new_level;
				312	}
				313	}
				314	return absl::nullopt;
				315	}
				316
				317	private:
				318	int GetMinFramesProcessed() {
				319	return reference_window_delay_ + reference_window_length_;
				320	}
				321
				322	// Predicts clipping sample peaks based on the processed audio frames.
				323	// Returns the estimated peak value if clipping is predicted. Otherwise
				324	// returns absl::nullopt.
				325	absl::optional<float> EstimatePeakValue(int channel) const {
				326	const auto reference_metrics = ch_buffers_[channel]->ComputePartialMetrics(
				327	reference_window_delay_, reference_window_length_);
				328	if (!reference_metrics.has_value()) {
				329	return absl::nullopt;
				330	}
				331	const auto metrics =
				332	ch_buffers_[channel]->ComputePartialMetrics(0, window_length_);
				333	if (!metrics.has_value() \|\|
				334	!(FloatS16ToDbfs(metrics.value().max) > clipping_threshold_)) {
				335	return absl::nullopt;
				336	}
				337	const float reference_crest_factor =
				338	ComputeCrestFactor(reference_metrics.value());
				339	const float& mean_squares = metrics.value().average;
				340	const float projected_peak =
				341	reference_crest_factor + FloatS16ToDbfs(std::sqrt(mean_squares));
				342	return projected_peak;
				343	}
				344
				345	std::vector<std::unique_ptr<ClippingPredictorLevelBuffer>> ch_buffers_;
				346	const int window_length_;
				347	const int reference_window_length_;
				348	const int reference_window_delay_;
				349	const int clipping_threshold_;
				350	const bool adaptive_step_estimation_;
				351	};
				352
				353	} // namespace
				354
				355	std::unique_ptr<ClippingPredictor> CreateClippingEventPredictor(
				356	int num_channels,
				357	const ClippingPredictorConfig& config) {
				358	return std::make_unique<ClippingEventPredictor>(
				359	num_channels, config.window_length, config.reference_window_length,
				360	config.reference_window_delay, config.clipping_threshold,
				361	config.crest_factor_margin);
				362	}
				363
				364	std::unique_ptr<ClippingPredictor> CreateFixedStepClippingPeakPredictor(
				365	int num_channels,
				366	const ClippingPredictorConfig& config) {
				367	return std::make_unique<ClippingPeakPredictor>(
				368	num_channels, config.window_length, config.reference_window_length,
				369	config.reference_window_delay, config.clipping_threshold,
				370	/adaptive_step_estimation=/false);
				371	}
				372
				373	std::unique_ptr<ClippingPredictor> CreateAdaptiveStepClippingPeakPredictor(
				374	int num_channels,
				375	const ClippingPredictorConfig& config) {
				376	return std::make_unique<ClippingPeakPredictor>(
				377	num_channels, config.window_length, config.reference_window_length,
				378	config.reference_window_delay, config.clipping_threshold,
				379	/adaptive_step_estimation=/true);
				380	}
				381
				382	} // namespace webrtc