Blame - modules/audio_processing/ns/noise_estimator.cc - webrtc.googlesource.com/src

blob: 5367545f25db5122610bcb1e6f4dc11ba8c42f26 [file] [log] [blame]

Per Åhgren	0cbb58e	2019-10-29 22:59:44 +0100	[diff] [blame^]	1	/*
				2	* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
				3	*
				4	* Use of this source code is governed by a BSD-style license
				5	* that can be found in the LICENSE file in the root of the source
				6	* tree. An additional intellectual property rights grant can be found
				7	* in the file PATENTS. All contributing project authors may
				8	* be found in the AUTHORS file in the root of the source tree.
				9	*/
				10
				11	#include "modules/audio_processing/ns/noise_estimator.h"
				12
				13	#include <algorithm>
				14
				15	#include "modules/audio_processing/ns/fast_math.h"
				16	#include "rtc_base/checks.h"
				17
				18	namespace webrtc {
				19
				20	namespace {
				21
				22	// Log(i).
				23	constexpr std::array<float, 129> log_table = {
				24	0.f, 0.f, 0.f, 0.f, 0.f, 1.609438f, 1.791759f,
				25	1.945910f, 2.079442f, 2.197225f, 2.302585f, 2.397895f, 2.484907f, 2.564949f,
				26	2.639057f, 2.708050f, 2.772589f, 2.833213f, 2.890372f, 2.944439f, 2.995732f,
				27	3.044522f, 3.091043f, 3.135494f, 3.178054f, 3.218876f, 3.258097f, 3.295837f,
				28	3.332205f, 3.367296f, 3.401197f, 3.433987f, 3.465736f, 3.496507f, 3.526361f,
				29	3.555348f, 3.583519f, 3.610918f, 3.637586f, 3.663562f, 3.688879f, 3.713572f,
				30	3.737669f, 3.761200f, 3.784190f, 3.806663f, 3.828641f, 3.850147f, 3.871201f,
				31	3.891820f, 3.912023f, 3.931826f, 3.951244f, 3.970292f, 3.988984f, 4.007333f,
				32	4.025352f, 4.043051f, 4.060443f, 4.077538f, 4.094345f, 4.110874f, 4.127134f,
				33	4.143135f, 4.158883f, 4.174387f, 4.189655f, 4.204693f, 4.219508f, 4.234107f,
				34	4.248495f, 4.262680f, 4.276666f, 4.290460f, 4.304065f, 4.317488f, 4.330733f,
				35	4.343805f, 4.356709f, 4.369448f, 4.382027f, 4.394449f, 4.406719f, 4.418841f,
				36	4.430817f, 4.442651f, 4.454347f, 4.465908f, 4.477337f, 4.488636f, 4.499810f,
				37	4.510859f, 4.521789f, 4.532599f, 4.543295f, 4.553877f, 4.564348f, 4.574711f,
				38	4.584968f, 4.595119f, 4.605170f, 4.615121f, 4.624973f, 4.634729f, 4.644391f,
				39	4.653960f, 4.663439f, 4.672829f, 4.682131f, 4.691348f, 4.700480f, 4.709530f,
				40	4.718499f, 4.727388f, 4.736198f, 4.744932f, 4.753591f, 4.762174f, 4.770685f,
				41	4.779124f, 4.787492f, 4.795791f, 4.804021f, 4.812184f, 4.820282f, 4.828314f,
				42	4.836282f, 4.844187f, 4.852030f};
				43
				44	} // namespace
				45
				46	NoiseEstimator::NoiseEstimator(const SuppressionParams& suppression_params)
				47	: suppression_params_(suppression_params) {
				48	noise_spectrum_.fill(0.f);
				49	prev_noise_spectrum_.fill(0.f);
				50	conservative_noise_spectrum_.fill(0.f);
				51	parametric_noise_spectrum_.fill(0.f);
				52	}
				53
				54	void NoiseEstimator::PrepareAnalysis() {
				55	std::copy(noise_spectrum_.begin(), noise_spectrum_.end(),
				56	prev_noise_spectrum_.begin());
				57	}
				58
				59	void NoiseEstimator::PreUpdate(
				60	int32_t num_analyzed_frames,
				61	rtc::ArrayView<const float, kFftSizeBy2Plus1> signal_spectrum,
				62	float signal_spectral_sum) {
				63	quantile_noise_estimator_.Estimate(signal_spectrum, noise_spectrum_);
				64
				65	if (num_analyzed_frames < kShortStartupPhaseBlocks) {
				66	// Compute simplified noise model during startup.
				67	const size_t kStartBand = 5;
				68	float sum_log_i_log_magn = 0.f;
				69	float sum_log_i = 0.f;
				70	float sum_log_i_square = 0.f;
				71	float sum_log_magn = 0.f;
				72	for (size_t i = kStartBand; i < kFftSizeBy2Plus1; ++i) {
				73	float log_i = log_table[i];
				74	sum_log_i += log_i;
				75	sum_log_i_square += log_i * log_i;
				76	float log_signal = LogApproximation(signal_spectrum[i]);
				77	sum_log_magn += log_signal;
				78	sum_log_i_log_magn += log_i * log_signal;
				79	}
				80
				81	// Estimate the parameter for the level of the white noise.
				82	constexpr float kOneByFftSizeBy2Plus1 = 1.f / kFftSizeBy2Plus1;
				83	white_noise_level_ += signal_spectral_sum * kOneByFftSizeBy2Plus1 *
				84	suppression_params_.over_subtraction_factor;
				85
				86	// Estimate pink noise parameters.
				87	float denom = sum_log_i_square * (kFftSizeBy2Plus1 - kStartBand) -
				88	sum_log_i * sum_log_i;
				89	float num =
				90	sum_log_i_square * sum_log_magn - sum_log_i * sum_log_i_log_magn;
				91	RTC_DCHECK_NE(denom, 0.f);
				92	float pink_noise_adjustment = num / denom;
				93
				94	// Constrain the estimated spectrum to be positive.
				95	pink_noise_adjustment = std::max(pink_noise_adjustment, 0.f);
				96	pink_noise_numerator_ += pink_noise_adjustment;
				97	num = sum_log_i * sum_log_magn -
				98	(kFftSizeBy2Plus1 - kStartBand) * sum_log_i_log_magn;
				99	RTC_DCHECK_NE(denom, 0.f);
				100	pink_noise_adjustment = num / denom;
				101
				102	// Constrain the pink noise power to be in the interval [0, 1].
				103	pink_noise_adjustment = std::max(std::min(pink_noise_adjustment, 1.f), 0.f);
				104
				105	pink_noise_exp_ += pink_noise_adjustment;
				106
				107	const float one_by_num_analyzed_frames_plus_1 =
				108	1.f / (num_analyzed_frames + 1.f);
				109
				110	// Calculate the frequency-independent parts of parametric noise estimate.
				111	float parametric_exp = 0.f;
				112	float parametric_num = 0.f;
				113	if (pink_noise_exp_ > 0.f) {
				114	// Use pink noise estimate.
				115	parametric_num = ExpApproximation(pink_noise_numerator_ *
				116	one_by_num_analyzed_frames_plus_1);
				117	parametric_num *= num_analyzed_frames + 1.f;
				118	parametric_exp = pink_noise_exp_ * one_by_num_analyzed_frames_plus_1;
				119	}
				120
				121	constexpr float kOneByShortStartupPhaseBlocks =
				122	1.f / kShortStartupPhaseBlocks;
				123	for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) {
				124	// Estimate the background noise using the white and pink noise
				125	// parameters.
				126	if (pink_noise_exp_ == 0.f) {
				127	// Use white noise estimate.
				128	parametric_noise_spectrum_[i] = white_noise_level_;
				129	} else {
				130	// Use pink noise estimate.
				131	float use_band = i < kStartBand ? kStartBand : i;
				132	float denom = PowApproximation(use_band, parametric_exp);
				133	RTC_DCHECK_NE(denom, 0.f);
				134	parametric_noise_spectrum_[i] = parametric_num / denom;
				135	}
				136	}
				137
				138	// Weight quantile noise with modeled noise.
				139	for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) {
				140	noise_spectrum_[i] *= num_analyzed_frames;
				141	float tmp = parametric_noise_spectrum_[i] *
				142	(kShortStartupPhaseBlocks - num_analyzed_frames);
				143	noise_spectrum_[i] += tmp * one_by_num_analyzed_frames_plus_1;
				144	noise_spectrum_[i] *= kOneByShortStartupPhaseBlocks;
				145	}
				146	}
				147	}
				148
				149	void NoiseEstimator::PostUpdate(
				150	rtc::ArrayView<const float> speech_probability,
				151	rtc::ArrayView<const float, kFftSizeBy2Plus1> signal_spectrum) {
				152	// Time-avg parameter for noise_spectrum update.
				153	constexpr float kNoiseUpdate = 0.9f;
				154
				155	float gamma = kNoiseUpdate;
				156	for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) {
				157	const float prob_speech = speech_probability[i];
				158	const float prob_non_speech = 1.f - prob_speech;
				159
				160	// Temporary noise update used for speech frames if update value is less
				161	// than previous.
				162	float noise_update_tmp =
				163	gamma * prev_noise_spectrum_[i] +
				164	(1.f - gamma) * (prob_non_speech * signal_spectrum[i] +
				165	prob_speech * prev_noise_spectrum_[i]);
				166
				167	// Time-constant based on speech/noise_spectrum state.
				168	float gamma_old = gamma;
				169
				170	// Increase gamma for frame likely to be seech.
				171	constexpr float kProbRange = .2f;
				172	gamma = prob_speech > kProbRange ? .99f : kNoiseUpdate;
				173
				174	// Conservative noise_spectrum update.
				175	if (prob_speech < kProbRange) {
				176	conservative_noise_spectrum_[i] +=
				177	0.05f * (signal_spectrum[i] - conservative_noise_spectrum_[i]);
				178	}
				179
				180	// Noise_spectrum update.
				181	if (gamma == gamma_old) {
				182	noise_spectrum_[i] = noise_update_tmp;
				183	} else {
				184	noise_spectrum_[i] =
				185	gamma * prev_noise_spectrum_[i] +
				186	(1.f - gamma) * (prob_non_speech * signal_spectrum[i] +
				187	prob_speech * prev_noise_spectrum_[i]);
				188	// Allow for noise_spectrum update downwards: If noise_spectrum update
				189	// decreases the noise_spectrum, it is safe, so allow it to happen.
				190	noise_spectrum_[i] = std::min(noise_spectrum_[i], noise_update_tmp);
				191	}
				192	}
				193	}
				194
				195	} // namespace webrtc