Blame - modules/audio_processing/agc/clipping_predictor.cc - webrtc.googlesource.com/src

blob: 58b3a2769cdaa72ff4045c7c23fd006237140198 [file] [log] [blame]

Hanna Silen	4b3a061	2021-06-02 23:03:24 +0200	[diff] [blame]	1	/*
				2	* Copyright (c) 2021 The WebRTC project authors. All Rights Reserved.
				3	*
				4	* Use of this source code is governed by a BSD-style license
				5	* that can be found in the LICENSE file in the root of the source
				6	* tree. An additional intellectual property rights grant can be found
				7	* in the file PATENTS. All contributing project authors may
				8	* be found in the AUTHORS file in the root of the source tree.
				9	*/
				10
				11	#include "modules/audio_processing/agc/clipping_predictor.h"
				12
				13	#include <algorithm>
				14	#include <memory>
				15
				16	#include "common_audio/include/audio_util.h"
				17	#include "modules/audio_processing/agc/clipping_predictor_level_buffer.h"
				18	#include "modules/audio_processing/agc/gain_map_internal.h"
				19	#include "rtc_base/checks.h"
				20	#include "rtc_base/logging.h"
				21	#include "rtc_base/numerics/safe_minmax.h"
				22
				23	namespace webrtc {
				24	namespace {
				25
				26	constexpr int kClippingPredictorMaxGainChange = 15;
				27
Hanna Silen	4b3a061	2021-06-02 23:03:24 +0200	[diff] [blame]	28	// Estimates the new level from the gain error; a copy of the function
				29	// `LevelFromGainError` in agc_manager_direct.cc.
				30	int LevelFromGainError(int gain_error,
				31	int level,
				32	int min_mic_level,
				33	int max_mic_level) {
				34	RTC_DCHECK_GE(level, 0);
				35	RTC_DCHECK_LE(level, max_mic_level);
				36	if (gain_error == 0) {
				37	return level;
				38	}
				39	int new_level = level;
				40	if (gain_error > 0) {
				41	while (kGainMap[new_level] - kGainMap[level] < gain_error &&
				42	new_level < max_mic_level) {
				43	++new_level;
				44	}
				45	} else {
				46	while (kGainMap[new_level] - kGainMap[level] > gain_error &&
				47	new_level > min_mic_level) {
				48	--new_level;
				49	}
				50	}
				51	return new_level;
				52	}
				53
				54	float ComputeCrestFactor(const ClippingPredictorLevelBuffer::Level& level) {
				55	const float crest_factor =
				56	FloatS16ToDbfs(level.max) - FloatS16ToDbfs(std::sqrt(level.average));
				57	return crest_factor;
				58	}
				59
				60	// Crest factor-based clipping prediction and clipped level step estimation.
				61	class ClippingEventPredictor : public ClippingPredictor {
				62	public:
				63	// ClippingEventPredictor with `num_channels` channels (limited to values
				64	// higher than zero); window size `window_length` and reference window size
				65	// `reference_window_length` (both referring to the number of frames in the
				66	// respective sliding windows and limited to values higher than zero);
				67	// reference window delay `reference_window_delay` (delay in frames, limited
				68	// to values zero and higher with an additional requirement of
				69	// `window_length` < `reference_window_length` + reference_window_delay`);
				70	// and an estimation peak threshold `clipping_threshold` and a crest factor
				71	// drop threshold `crest_factor_margin` (both in dB).
				72	ClippingEventPredictor(int num_channels,
				73	int window_length,
				74	int reference_window_length,
				75	int reference_window_delay,
				76	float clipping_threshold,
				77	float crest_factor_margin)
				78	: window_length_(window_length),
				79	reference_window_length_(reference_window_length),
				80	reference_window_delay_(reference_window_delay),
				81	clipping_threshold_(clipping_threshold),
				82	crest_factor_margin_(crest_factor_margin) {
				83	RTC_DCHECK_GT(num_channels, 0);
				84	RTC_DCHECK_GT(window_length, 0);
				85	RTC_DCHECK_GT(reference_window_length, 0);
				86	RTC_DCHECK_GE(reference_window_delay, 0);
				87	RTC_DCHECK_GT(reference_window_length + reference_window_delay,
				88	window_length);
				89	const int buffer_length = GetMinFramesProcessed();
				90	RTC_DCHECK_GT(buffer_length, 0);
				91	for (int i = 0; i < num_channels; ++i) {
				92	ch_buffers_.push_back(
				93	std::make_unique<ClippingPredictorLevelBuffer>(buffer_length));
				94	}
				95	}
				96
				97	ClippingEventPredictor(const ClippingEventPredictor&) = delete;
				98	ClippingEventPredictor& operator=(const ClippingEventPredictor&) = delete;
				99	~ClippingEventPredictor() {}
				100
				101	void Reset() {
				102	const int num_channels = ch_buffers_.size();
				103	for (int i = 0; i < num_channels; ++i) {
				104	ch_buffers_[i]->Reset();
				105	}
				106	}
				107
				108	// Analyzes a frame of audio and stores the framewise metrics in
				109	// `ch_buffers_`.
Alessio Bazzica	b237a87	2021-06-11 12:37:54 +0200	[diff] [blame]	110	void Analyze(const AudioFrameView<const float>& frame) {
Hanna Silen	4b3a061	2021-06-02 23:03:24 +0200	[diff] [blame]	111	const int num_channels = frame.num_channels();
				112	RTC_DCHECK_EQ(num_channels, ch_buffers_.size());
				113	const int samples_per_channel = frame.samples_per_channel();
				114	RTC_DCHECK_GT(samples_per_channel, 0);
				115	for (int channel = 0; channel < num_channels; ++channel) {
				116	float sum_squares = 0.0f;
				117	float peak = 0.0f;
				118	for (const auto& sample : frame.channel(channel)) {
				119	sum_squares += sample * sample;
				120	peak = std::max(std::fabs(sample), peak);
				121	}
				122	ch_buffers_[channel]->Push(
				123	{sum_squares / static_cast<float>(samples_per_channel), peak});
				124	}
				125	}
				126
				127	// Estimates the analog gain adjustment for channel `channel` using a
				128	// sliding window over the frame-wise metrics in `ch_buffers_`. Returns an
				129	// estimate for the clipped level step equal to `default_clipped_level_step_`
				130	// if at least `GetMinFramesProcessed()` frames have been processed since the
				131	// last reset and a clipping event is predicted. `level`, `min_mic_level`, and
				132	// `max_mic_level` are limited to [0, 255] and `default_step` to [1, 255].
				133	absl::optional<int> EstimateClippedLevelStep(int channel,
				134	int level,
				135	int default_step,
				136	int min_mic_level,
				137	int max_mic_level) const {
				138	RTC_CHECK_GE(channel, 0);
				139	RTC_CHECK_LT(channel, ch_buffers_.size());
				140	RTC_DCHECK_GE(level, 0);
				141	RTC_DCHECK_LE(level, 255);
				142	RTC_DCHECK_GT(default_step, 0);
				143	RTC_DCHECK_LE(default_step, 255);
				144	RTC_DCHECK_GE(min_mic_level, 0);
				145	RTC_DCHECK_LE(min_mic_level, 255);
				146	RTC_DCHECK_GE(max_mic_level, 0);
				147	RTC_DCHECK_LE(max_mic_level, 255);
				148	if (level <= min_mic_level) {
				149	return absl::nullopt;
				150	}
				151	if (PredictClippingEvent(channel)) {
				152	const int new_level =
				153	rtc::SafeClamp(level - default_step, min_mic_level, max_mic_level);
				154	const int step = level - new_level;
				155	if (step > 0) {
				156	return step;
				157	}
				158	}
				159	return absl::nullopt;
				160	}
				161
				162	private:
				163	int GetMinFramesProcessed() const {
				164	return reference_window_delay_ + reference_window_length_;
				165	}
				166
				167	// Predicts clipping events based on the processed audio frames. Returns
				168	// true if a clipping event is likely.
				169	bool PredictClippingEvent(int channel) const {
				170	const auto metrics =
				171	ch_buffers_[channel]->ComputePartialMetrics(0, window_length_);
				172	if (!metrics.has_value() \|\|
				173	!(FloatS16ToDbfs(metrics.value().max) > clipping_threshold_)) {
				174	return false;
				175	}
				176	const auto reference_metrics = ch_buffers_[channel]->ComputePartialMetrics(
				177	reference_window_delay_, reference_window_length_);
				178	if (!reference_metrics.has_value()) {
				179	return false;
				180	}
				181	const float crest_factor = ComputeCrestFactor(metrics.value());
				182	const float reference_crest_factor =
				183	ComputeCrestFactor(reference_metrics.value());
				184	if (crest_factor < reference_crest_factor - crest_factor_margin_) {
				185	return true;
				186	}
				187	return false;
				188	}
				189
				190	std::vector<std::unique_ptr<ClippingPredictorLevelBuffer>> ch_buffers_;
				191	const int window_length_;
				192	const int reference_window_length_;
				193	const int reference_window_delay_;
				194	const float clipping_threshold_;
				195	const float crest_factor_margin_;
				196	};
				197
				198	// Performs crest factor-based clipping peak prediction.
				199	class ClippingPeakPredictor : public ClippingPredictor {
				200	public:
				201	// Ctor. ClippingPeakPredictor with `num_channels` channels (limited to values
				202	// higher than zero); window size `window_length` and reference window size
				203	// `reference_window_length` (both referring to the number of frames in the
				204	// respective sliding windows and limited to values higher than zero);
				205	// reference window delay `reference_window_delay` (delay in frames, limited
				206	// to values zero and higher with an additional requirement of
				207	// `window_length` < `reference_window_length` + reference_window_delay`);
				208	// and a clipping prediction threshold `clipping_threshold` (in dB). Adaptive
				209	// clipped level step estimation is used if `adaptive_step_estimation` is
				210	// true.
				211	explicit ClippingPeakPredictor(int num_channels,
				212	int window_length,
				213	int reference_window_length,
				214	int reference_window_delay,
				215	int clipping_threshold,
				216	bool adaptive_step_estimation)
				217	: window_length_(window_length),
				218	reference_window_length_(reference_window_length),
				219	reference_window_delay_(reference_window_delay),
				220	clipping_threshold_(clipping_threshold),
				221	adaptive_step_estimation_(adaptive_step_estimation) {
				222	RTC_DCHECK_GT(num_channels, 0);
				223	RTC_DCHECK_GT(window_length, 0);
				224	RTC_DCHECK_GT(reference_window_length, 0);
				225	RTC_DCHECK_GE(reference_window_delay, 0);
				226	RTC_DCHECK_GT(reference_window_length + reference_window_delay,
				227	window_length);
				228	const int buffer_length = GetMinFramesProcessed();
				229	RTC_DCHECK_GT(buffer_length, 0);
				230	for (int i = 0; i < num_channels; ++i) {
				231	ch_buffers_.push_back(
				232	std::make_unique<ClippingPredictorLevelBuffer>(buffer_length));
				233	}
				234	}
				235
				236	ClippingPeakPredictor(const ClippingPeakPredictor&) = delete;
				237	ClippingPeakPredictor& operator=(const ClippingPeakPredictor&) = delete;
				238	~ClippingPeakPredictor() {}
				239
				240	void Reset() {
				241	const int num_channels = ch_buffers_.size();
				242	for (int i = 0; i < num_channels; ++i) {
				243	ch_buffers_[i]->Reset();
				244	}
				245	}
				246
				247	// Analyzes a frame of audio and stores the framewise metrics in
				248	// `ch_buffers_`.
Alessio Bazzica	b237a87	2021-06-11 12:37:54 +0200	[diff] [blame]	249	void Analyze(const AudioFrameView<const float>& frame) {
Hanna Silen	4b3a061	2021-06-02 23:03:24 +0200	[diff] [blame]	250	const int num_channels = frame.num_channels();
				251	RTC_DCHECK_EQ(num_channels, ch_buffers_.size());
				252	const int samples_per_channel = frame.samples_per_channel();
				253	RTC_DCHECK_GT(samples_per_channel, 0);
				254	for (int channel = 0; channel < num_channels; ++channel) {
				255	float sum_squares = 0.0f;
				256	float peak = 0.0f;
				257	for (const auto& sample : frame.channel(channel)) {
				258	sum_squares += sample * sample;
				259	peak = std::max(std::fabs(sample), peak);
				260	}
				261	ch_buffers_[channel]->Push(
				262	{sum_squares / static_cast<float>(samples_per_channel), peak});
				263	}
				264	}
				265
				266	// Estimates the analog gain adjustment for channel `channel` using a
				267	// sliding window over the frame-wise metrics in `ch_buffers_`. Returns an
				268	// estimate for the clipped level step (equal to
				269	// `default_clipped_level_step_` if `adaptive_estimation_` is false) if at
				270	// least `GetMinFramesProcessed()` frames have been processed since the last
				271	// reset and a clipping event is predicted. `level`, `min_mic_level`, and
				272	// `max_mic_level` are limited to [0, 255] and `default_step` to [1, 255].
				273	absl::optional<int> EstimateClippedLevelStep(int channel,
				274	int level,
				275	int default_step,
				276	int min_mic_level,
				277	int max_mic_level) const {
				278	RTC_DCHECK_GE(channel, 0);
				279	RTC_DCHECK_LT(channel, ch_buffers_.size());
				280	RTC_DCHECK_GE(level, 0);
				281	RTC_DCHECK_LE(level, 255);
				282	RTC_DCHECK_GT(default_step, 0);
				283	RTC_DCHECK_LE(default_step, 255);
				284	RTC_DCHECK_GE(min_mic_level, 0);
				285	RTC_DCHECK_LE(min_mic_level, 255);
				286	RTC_DCHECK_GE(max_mic_level, 0);
				287	RTC_DCHECK_LE(max_mic_level, 255);
				288	if (level <= min_mic_level) {
				289	return absl::nullopt;
				290	}
				291	absl::optional<float> estimate_db = EstimatePeakValue(channel);
				292	if (estimate_db.has_value() && estimate_db.value() > clipping_threshold_) {
				293	int step = 0;
				294	if (!adaptive_step_estimation_) {
				295	step = default_step;
				296	} else {
				297	const int estimated_gain_change =
				298	rtc::SafeClamp(-static_cast<int>(std::ceil(estimate_db.value())),
				299	-kClippingPredictorMaxGainChange, 0);
				300	step =
				301	std::max(level - LevelFromGainError(estimated_gain_change, level,
				302	min_mic_level, max_mic_level),
				303	default_step);
				304	}
				305	const int new_level =
				306	rtc::SafeClamp(level - step, min_mic_level, max_mic_level);
				307	if (level > new_level) {
				308	return level - new_level;
				309	}
				310	}
				311	return absl::nullopt;
				312	}
				313
				314	private:
				315	int GetMinFramesProcessed() {
				316	return reference_window_delay_ + reference_window_length_;
				317	}
				318
				319	// Predicts clipping sample peaks based on the processed audio frames.
				320	// Returns the estimated peak value if clipping is predicted. Otherwise
				321	// returns absl::nullopt.
				322	absl::optional<float> EstimatePeakValue(int channel) const {
				323	const auto reference_metrics = ch_buffers_[channel]->ComputePartialMetrics(
				324	reference_window_delay_, reference_window_length_);
				325	if (!reference_metrics.has_value()) {
				326	return absl::nullopt;
				327	}
				328	const auto metrics =
				329	ch_buffers_[channel]->ComputePartialMetrics(0, window_length_);
				330	if (!metrics.has_value() \|\|
				331	!(FloatS16ToDbfs(metrics.value().max) > clipping_threshold_)) {
				332	return absl::nullopt;
				333	}
				334	const float reference_crest_factor =
				335	ComputeCrestFactor(reference_metrics.value());
				336	const float& mean_squares = metrics.value().average;
				337	const float projected_peak =
				338	reference_crest_factor + FloatS16ToDbfs(std::sqrt(mean_squares));
				339	return projected_peak;
				340	}
				341
				342	std::vector<std::unique_ptr<ClippingPredictorLevelBuffer>> ch_buffers_;
				343	const int window_length_;
				344	const int reference_window_length_;
				345	const int reference_window_delay_;
				346	const int clipping_threshold_;
				347	const bool adaptive_step_estimation_;
				348	};
				349
				350	} // namespace
				351
Alessio Bazzica	b237a87	2021-06-11 12:37:54 +0200	[diff] [blame]	352	std::unique_ptr<ClippingPredictor> CreateClippingPredictor(
Hanna Silen	4b3a061	2021-06-02 23:03:24 +0200	[diff] [blame]	353	int num_channels,
Alessio Bazzica	b237a87	2021-06-11 12:37:54 +0200	[diff] [blame]	354	const AudioProcessing::Config::GainController1::AnalogGainController::
				355	ClippingPredictor& config) {
				356	if (!config.enabled) {
				357	RTC_LOG(LS_INFO) << "[agc] Clipping prediction disabled.";
				358	return nullptr;
				359	}
				360	RTC_LOG(LS_INFO) << "[agc] Clipping prediction enabled.";
				361	using ClippingPredictorMode = AudioProcessing::Config::GainController1::
				362	AnalogGainController::ClippingPredictor::Mode;
				363	switch (config.mode) {
				364	case ClippingPredictorMode::kClippingEventPrediction:
				365	return std::make_unique<ClippingEventPredictor>(
				366	num_channels, config.window_length, config.reference_window_length,
				367	config.reference_window_delay, config.clipping_threshold,
				368	config.crest_factor_margin);
				369	case ClippingPredictorMode::kAdaptiveStepClippingPeakPrediction:
				370	return std::make_unique<ClippingPeakPredictor>(
				371	num_channels, config.window_length, config.reference_window_length,
				372	config.reference_window_delay, config.clipping_threshold,
				373	/adaptive_step_estimation=/true);
				374	case ClippingPredictorMode::kFixedStepClippingPeakPrediction:
				375	return std::make_unique<ClippingPeakPredictor>(
				376	num_channels, config.window_length, config.reference_window_length,
				377	config.reference_window_delay, config.clipping_threshold,
				378	/adaptive_step_estimation=/false);
				379	}
Artem Titov	d325196	2021-11-15 16:57:07 +0100	[diff] [blame]	380	RTC_DCHECK_NOTREACHED();
Hanna Silen	4b3a061	2021-06-02 23:03:24 +0200	[diff] [blame]	381	}
				382
				383	} // namespace webrtc