Per Ã…hgren | 0cbb58e | 2019-10-29 22:59:44 +0100 | [diff] [blame^] | 1 | /* |
| 2 | * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. |
| 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license |
| 5 | * that can be found in the LICENSE file in the root of the source |
| 6 | * tree. An additional intellectual property rights grant can be found |
| 7 | * in the file PATENTS. All contributing project authors may |
| 8 | * be found in the AUTHORS file in the root of the source tree. |
| 9 | */ |
| 10 | |
| 11 | #include "modules/audio_processing/ns/noise_estimator.h" |
| 12 | |
| 13 | #include <algorithm> |
| 14 | |
| 15 | #include "modules/audio_processing/ns/fast_math.h" |
| 16 | #include "rtc_base/checks.h" |
| 17 | |
| 18 | namespace webrtc { |
| 19 | |
| 20 | namespace { |
| 21 | |
| 22 | // Log(i). |
| 23 | constexpr std::array<float, 129> log_table = { |
| 24 | 0.f, 0.f, 0.f, 0.f, 0.f, 1.609438f, 1.791759f, |
| 25 | 1.945910f, 2.079442f, 2.197225f, 2.302585f, 2.397895f, 2.484907f, 2.564949f, |
| 26 | 2.639057f, 2.708050f, 2.772589f, 2.833213f, 2.890372f, 2.944439f, 2.995732f, |
| 27 | 3.044522f, 3.091043f, 3.135494f, 3.178054f, 3.218876f, 3.258097f, 3.295837f, |
| 28 | 3.332205f, 3.367296f, 3.401197f, 3.433987f, 3.465736f, 3.496507f, 3.526361f, |
| 29 | 3.555348f, 3.583519f, 3.610918f, 3.637586f, 3.663562f, 3.688879f, 3.713572f, |
| 30 | 3.737669f, 3.761200f, 3.784190f, 3.806663f, 3.828641f, 3.850147f, 3.871201f, |
| 31 | 3.891820f, 3.912023f, 3.931826f, 3.951244f, 3.970292f, 3.988984f, 4.007333f, |
| 32 | 4.025352f, 4.043051f, 4.060443f, 4.077538f, 4.094345f, 4.110874f, 4.127134f, |
| 33 | 4.143135f, 4.158883f, 4.174387f, 4.189655f, 4.204693f, 4.219508f, 4.234107f, |
| 34 | 4.248495f, 4.262680f, 4.276666f, 4.290460f, 4.304065f, 4.317488f, 4.330733f, |
| 35 | 4.343805f, 4.356709f, 4.369448f, 4.382027f, 4.394449f, 4.406719f, 4.418841f, |
| 36 | 4.430817f, 4.442651f, 4.454347f, 4.465908f, 4.477337f, 4.488636f, 4.499810f, |
| 37 | 4.510859f, 4.521789f, 4.532599f, 4.543295f, 4.553877f, 4.564348f, 4.574711f, |
| 38 | 4.584968f, 4.595119f, 4.605170f, 4.615121f, 4.624973f, 4.634729f, 4.644391f, |
| 39 | 4.653960f, 4.663439f, 4.672829f, 4.682131f, 4.691348f, 4.700480f, 4.709530f, |
| 40 | 4.718499f, 4.727388f, 4.736198f, 4.744932f, 4.753591f, 4.762174f, 4.770685f, |
| 41 | 4.779124f, 4.787492f, 4.795791f, 4.804021f, 4.812184f, 4.820282f, 4.828314f, |
| 42 | 4.836282f, 4.844187f, 4.852030f}; |
| 43 | |
| 44 | } // namespace |
| 45 | |
| 46 | NoiseEstimator::NoiseEstimator(const SuppressionParams& suppression_params) |
| 47 | : suppression_params_(suppression_params) { |
| 48 | noise_spectrum_.fill(0.f); |
| 49 | prev_noise_spectrum_.fill(0.f); |
| 50 | conservative_noise_spectrum_.fill(0.f); |
| 51 | parametric_noise_spectrum_.fill(0.f); |
| 52 | } |
| 53 | |
| 54 | void NoiseEstimator::PrepareAnalysis() { |
| 55 | std::copy(noise_spectrum_.begin(), noise_spectrum_.end(), |
| 56 | prev_noise_spectrum_.begin()); |
| 57 | } |
| 58 | |
| 59 | void NoiseEstimator::PreUpdate( |
| 60 | int32_t num_analyzed_frames, |
| 61 | rtc::ArrayView<const float, kFftSizeBy2Plus1> signal_spectrum, |
| 62 | float signal_spectral_sum) { |
| 63 | quantile_noise_estimator_.Estimate(signal_spectrum, noise_spectrum_); |
| 64 | |
| 65 | if (num_analyzed_frames < kShortStartupPhaseBlocks) { |
| 66 | // Compute simplified noise model during startup. |
| 67 | const size_t kStartBand = 5; |
| 68 | float sum_log_i_log_magn = 0.f; |
| 69 | float sum_log_i = 0.f; |
| 70 | float sum_log_i_square = 0.f; |
| 71 | float sum_log_magn = 0.f; |
| 72 | for (size_t i = kStartBand; i < kFftSizeBy2Plus1; ++i) { |
| 73 | float log_i = log_table[i]; |
| 74 | sum_log_i += log_i; |
| 75 | sum_log_i_square += log_i * log_i; |
| 76 | float log_signal = LogApproximation(signal_spectrum[i]); |
| 77 | sum_log_magn += log_signal; |
| 78 | sum_log_i_log_magn += log_i * log_signal; |
| 79 | } |
| 80 | |
| 81 | // Estimate the parameter for the level of the white noise. |
| 82 | constexpr float kOneByFftSizeBy2Plus1 = 1.f / kFftSizeBy2Plus1; |
| 83 | white_noise_level_ += signal_spectral_sum * kOneByFftSizeBy2Plus1 * |
| 84 | suppression_params_.over_subtraction_factor; |
| 85 | |
| 86 | // Estimate pink noise parameters. |
| 87 | float denom = sum_log_i_square * (kFftSizeBy2Plus1 - kStartBand) - |
| 88 | sum_log_i * sum_log_i; |
| 89 | float num = |
| 90 | sum_log_i_square * sum_log_magn - sum_log_i * sum_log_i_log_magn; |
| 91 | RTC_DCHECK_NE(denom, 0.f); |
| 92 | float pink_noise_adjustment = num / denom; |
| 93 | |
| 94 | // Constrain the estimated spectrum to be positive. |
| 95 | pink_noise_adjustment = std::max(pink_noise_adjustment, 0.f); |
| 96 | pink_noise_numerator_ += pink_noise_adjustment; |
| 97 | num = sum_log_i * sum_log_magn - |
| 98 | (kFftSizeBy2Plus1 - kStartBand) * sum_log_i_log_magn; |
| 99 | RTC_DCHECK_NE(denom, 0.f); |
| 100 | pink_noise_adjustment = num / denom; |
| 101 | |
| 102 | // Constrain the pink noise power to be in the interval [0, 1]. |
| 103 | pink_noise_adjustment = std::max(std::min(pink_noise_adjustment, 1.f), 0.f); |
| 104 | |
| 105 | pink_noise_exp_ += pink_noise_adjustment; |
| 106 | |
| 107 | const float one_by_num_analyzed_frames_plus_1 = |
| 108 | 1.f / (num_analyzed_frames + 1.f); |
| 109 | |
| 110 | // Calculate the frequency-independent parts of parametric noise estimate. |
| 111 | float parametric_exp = 0.f; |
| 112 | float parametric_num = 0.f; |
| 113 | if (pink_noise_exp_ > 0.f) { |
| 114 | // Use pink noise estimate. |
| 115 | parametric_num = ExpApproximation(pink_noise_numerator_ * |
| 116 | one_by_num_analyzed_frames_plus_1); |
| 117 | parametric_num *= num_analyzed_frames + 1.f; |
| 118 | parametric_exp = pink_noise_exp_ * one_by_num_analyzed_frames_plus_1; |
| 119 | } |
| 120 | |
| 121 | constexpr float kOneByShortStartupPhaseBlocks = |
| 122 | 1.f / kShortStartupPhaseBlocks; |
| 123 | for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { |
| 124 | // Estimate the background noise using the white and pink noise |
| 125 | // parameters. |
| 126 | if (pink_noise_exp_ == 0.f) { |
| 127 | // Use white noise estimate. |
| 128 | parametric_noise_spectrum_[i] = white_noise_level_; |
| 129 | } else { |
| 130 | // Use pink noise estimate. |
| 131 | float use_band = i < kStartBand ? kStartBand : i; |
| 132 | float denom = PowApproximation(use_band, parametric_exp); |
| 133 | RTC_DCHECK_NE(denom, 0.f); |
| 134 | parametric_noise_spectrum_[i] = parametric_num / denom; |
| 135 | } |
| 136 | } |
| 137 | |
| 138 | // Weight quantile noise with modeled noise. |
| 139 | for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { |
| 140 | noise_spectrum_[i] *= num_analyzed_frames; |
| 141 | float tmp = parametric_noise_spectrum_[i] * |
| 142 | (kShortStartupPhaseBlocks - num_analyzed_frames); |
| 143 | noise_spectrum_[i] += tmp * one_by_num_analyzed_frames_plus_1; |
| 144 | noise_spectrum_[i] *= kOneByShortStartupPhaseBlocks; |
| 145 | } |
| 146 | } |
| 147 | } |
| 148 | |
| 149 | void NoiseEstimator::PostUpdate( |
| 150 | rtc::ArrayView<const float> speech_probability, |
| 151 | rtc::ArrayView<const float, kFftSizeBy2Plus1> signal_spectrum) { |
| 152 | // Time-avg parameter for noise_spectrum update. |
| 153 | constexpr float kNoiseUpdate = 0.9f; |
| 154 | |
| 155 | float gamma = kNoiseUpdate; |
| 156 | for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { |
| 157 | const float prob_speech = speech_probability[i]; |
| 158 | const float prob_non_speech = 1.f - prob_speech; |
| 159 | |
| 160 | // Temporary noise update used for speech frames if update value is less |
| 161 | // than previous. |
| 162 | float noise_update_tmp = |
| 163 | gamma * prev_noise_spectrum_[i] + |
| 164 | (1.f - gamma) * (prob_non_speech * signal_spectrum[i] + |
| 165 | prob_speech * prev_noise_spectrum_[i]); |
| 166 | |
| 167 | // Time-constant based on speech/noise_spectrum state. |
| 168 | float gamma_old = gamma; |
| 169 | |
| 170 | // Increase gamma for frame likely to be seech. |
| 171 | constexpr float kProbRange = .2f; |
| 172 | gamma = prob_speech > kProbRange ? .99f : kNoiseUpdate; |
| 173 | |
| 174 | // Conservative noise_spectrum update. |
| 175 | if (prob_speech < kProbRange) { |
| 176 | conservative_noise_spectrum_[i] += |
| 177 | 0.05f * (signal_spectrum[i] - conservative_noise_spectrum_[i]); |
| 178 | } |
| 179 | |
| 180 | // Noise_spectrum update. |
| 181 | if (gamma == gamma_old) { |
| 182 | noise_spectrum_[i] = noise_update_tmp; |
| 183 | } else { |
| 184 | noise_spectrum_[i] = |
| 185 | gamma * prev_noise_spectrum_[i] + |
| 186 | (1.f - gamma) * (prob_non_speech * signal_spectrum[i] + |
| 187 | prob_speech * prev_noise_spectrum_[i]); |
| 188 | // Allow for noise_spectrum update downwards: If noise_spectrum update |
| 189 | // decreases the noise_spectrum, it is safe, so allow it to happen. |
| 190 | noise_spectrum_[i] = std::min(noise_spectrum_[i], noise_update_tmp); |
| 191 | } |
| 192 | } |
| 193 | } |
| 194 | |
| 195 | } // namespace webrtc |