Per Ã…hgren | 0cbb58e | 2019-10-29 22:59:44 +0100 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. |
| 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license |
| 5 | * that can be found in the LICENSE file in the root of the source |
| 6 | * tree. An additional intellectual property rights grant can be found |
| 7 | * in the file PATENTS. All contributing project authors may |
| 8 | * be found in the AUTHORS file in the root of the source tree. |
| 9 | */ |
| 10 | |
| 11 | #include "modules/audio_processing/ns/noise_suppressor.h" |
| 12 | |
| 13 | #include <math.h> |
| 14 | #include <stdlib.h> |
| 15 | #include <string.h> |
| 16 | #include <algorithm> |
| 17 | |
| 18 | #include "modules/audio_processing/ns/fast_math.h" |
| 19 | #include "rtc_base/checks.h" |
| 20 | |
| 21 | namespace webrtc { |
| 22 | |
| 23 | namespace { |
| 24 | |
| 25 | // Maps sample rate to number of bands. |
| 26 | size_t NumBandsForRate(size_t sample_rate_hz) { |
| 27 | RTC_DCHECK(sample_rate_hz == 16000 || sample_rate_hz == 32000 || |
| 28 | sample_rate_hz == 48000); |
| 29 | return sample_rate_hz / 16000; |
| 30 | } |
| 31 | |
| 32 | // Maximum number of channels for which the channel data is stored on |
| 33 | // the stack. If the number of channels are larger than this, they are stored |
| 34 | // using scratch memory that is pre-allocated on the heap. The reason for this |
| 35 | // partitioning is not to waste heap space for handling the more common numbers |
| 36 | // of channels, while at the same time not limiting the support for higher |
| 37 | // numbers of channels by enforcing the channel data to be stored on the |
| 38 | // stack using a fixed maximum value. |
| 39 | constexpr size_t kMaxNumChannelsOnStack = 2; |
| 40 | |
| 41 | // Chooses the number of channels to store on the heap when that is required due |
| 42 | // to the number of channels being larger than the pre-defined number |
| 43 | // of channels to store on the stack. |
| 44 | size_t NumChannelsOnHeap(size_t num_channels) { |
| 45 | return num_channels > kMaxNumChannelsOnStack ? num_channels : 0; |
| 46 | } |
| 47 | |
| 48 | // Hybrib Hanning and flat window for the filterbank. |
| 49 | constexpr std::array<float, 96> kBlocks160w256FirstHalf = { |
| 50 | 0.00000000f, 0.01636173f, 0.03271908f, 0.04906767f, 0.06540313f, |
| 51 | 0.08172107f, 0.09801714f, 0.11428696f, 0.13052619f, 0.14673047f, |
| 52 | 0.16289547f, 0.17901686f, 0.19509032f, 0.21111155f, 0.22707626f, |
| 53 | 0.24298018f, 0.25881905f, 0.27458862f, 0.29028468f, 0.30590302f, |
| 54 | 0.32143947f, 0.33688985f, 0.35225005f, 0.36751594f, 0.38268343f, |
| 55 | 0.39774847f, 0.41270703f, 0.42755509f, 0.44228869f, 0.45690388f, |
| 56 | 0.47139674f, 0.48576339f, 0.50000000f, 0.51410274f, 0.52806785f, |
| 57 | 0.54189158f, 0.55557023f, 0.56910015f, 0.58247770f, 0.59569930f, |
| 58 | 0.60876143f, 0.62166057f, 0.63439328f, 0.64695615f, 0.65934582f, |
| 59 | 0.67155895f, 0.68359230f, 0.69544264f, 0.70710678f, 0.71858162f, |
| 60 | 0.72986407f, 0.74095113f, 0.75183981f, 0.76252720f, 0.77301045f, |
| 61 | 0.78328675f, 0.79335334f, 0.80320753f, 0.81284668f, 0.82226822f, |
| 62 | 0.83146961f, 0.84044840f, 0.84920218f, 0.85772861f, 0.86602540f, |
| 63 | 0.87409034f, 0.88192126f, 0.88951608f, 0.89687274f, 0.90398929f, |
| 64 | 0.91086382f, 0.91749450f, 0.92387953f, 0.93001722f, 0.93590593f, |
| 65 | 0.94154407f, 0.94693013f, 0.95206268f, 0.95694034f, 0.96156180f, |
| 66 | 0.96592583f, 0.97003125f, 0.97387698f, 0.97746197f, 0.98078528f, |
| 67 | 0.98384601f, 0.98664333f, 0.98917651f, 0.99144486f, 0.99344778f, |
| 68 | 0.99518473f, 0.99665524f, 0.99785892f, 0.99879546f, 0.99946459f, |
| 69 | 0.99986614f}; |
| 70 | |
| 71 | // Applies the filterbank window to a buffer. |
| 72 | void ApplyFilterBankWindow(rtc::ArrayView<float, kFftSize> x) { |
| 73 | for (size_t i = 0; i < 96; ++i) { |
| 74 | x[i] = kBlocks160w256FirstHalf[i] * x[i]; |
| 75 | } |
| 76 | |
| 77 | for (size_t i = 161, k = 95; i < kFftSize; ++i, --k) { |
| 78 | RTC_DCHECK_NE(0, k); |
| 79 | x[i] = kBlocks160w256FirstHalf[k] * x[i]; |
| 80 | } |
| 81 | } |
| 82 | |
| 83 | // Extends a frame with previous data. |
| 84 | void FormExtendedFrame(rtc::ArrayView<const float, kNsFrameSize> frame, |
| 85 | rtc::ArrayView<float, kFftSize - kNsFrameSize> old_data, |
| 86 | rtc::ArrayView<float, kFftSize> extended_frame) { |
| 87 | std::copy(old_data.begin(), old_data.end(), extended_frame.begin()); |
| 88 | std::copy(frame.begin(), frame.end(), |
| 89 | extended_frame.begin() + old_data.size()); |
| 90 | std::copy(extended_frame.end() - old_data.size(), extended_frame.end(), |
| 91 | old_data.begin()); |
| 92 | } |
| 93 | |
| 94 | // Uses overlap-and-add to produce an output frame. |
| 95 | void OverlapAndAdd(rtc::ArrayView<const float, kFftSize> extended_frame, |
| 96 | rtc::ArrayView<float, kOverlapSize> overlap_memory, |
| 97 | rtc::ArrayView<float, kNsFrameSize> output_frame) { |
| 98 | for (size_t i = 0; i < kOverlapSize; ++i) { |
| 99 | output_frame[i] = overlap_memory[i] + extended_frame[i]; |
| 100 | } |
| 101 | std::copy(extended_frame.begin() + kOverlapSize, |
| 102 | extended_frame.begin() + kNsFrameSize, |
| 103 | output_frame.begin() + kOverlapSize); |
| 104 | std::copy(extended_frame.begin() + kNsFrameSize, extended_frame.end(), |
| 105 | overlap_memory.begin()); |
| 106 | } |
| 107 | |
| 108 | // Produces a delayed frame. |
| 109 | void DelaySignal(rtc::ArrayView<const float, kNsFrameSize> frame, |
| 110 | rtc::ArrayView<float, kFftSize - kNsFrameSize> delay_buffer, |
| 111 | rtc::ArrayView<float, kNsFrameSize> delayed_frame) { |
| 112 | constexpr size_t kSamplesFromFrame = kNsFrameSize - (kFftSize - kNsFrameSize); |
| 113 | std::copy(delay_buffer.begin(), delay_buffer.end(), delayed_frame.begin()); |
| 114 | std::copy(frame.begin(), frame.begin() + kSamplesFromFrame, |
| 115 | delayed_frame.begin() + delay_buffer.size()); |
| 116 | |
| 117 | std::copy(frame.begin() + kSamplesFromFrame, frame.end(), |
| 118 | delay_buffer.begin()); |
| 119 | } |
| 120 | |
| 121 | // Computes the energy of an extended frame. |
| 122 | float ComputeEnergyOfExtendedFrame(rtc::ArrayView<const float, kFftSize> x) { |
| 123 | float energy = 0.f; |
| 124 | for (float x_k : x) { |
| 125 | energy += x_k * x_k; |
| 126 | } |
| 127 | |
| 128 | return energy; |
| 129 | } |
| 130 | |
| 131 | // Computes the energy of an extended frame based on its subcomponents. |
| 132 | float ComputeEnergyOfExtendedFrame( |
| 133 | rtc::ArrayView<const float, kNsFrameSize> frame, |
| 134 | rtc::ArrayView<float, kFftSize - kNsFrameSize> old_data) { |
| 135 | float energy = 0.f; |
| 136 | for (float v : old_data) { |
| 137 | energy += v * v; |
| 138 | } |
| 139 | for (float v : frame) { |
| 140 | energy += v * v; |
| 141 | } |
| 142 | |
| 143 | return energy; |
| 144 | } |
| 145 | |
| 146 | // Computes the magnitude spectrum based on an FFT output. |
| 147 | void ComputeMagnitudeSpectrum( |
| 148 | rtc::ArrayView<const float, kFftSize> real, |
| 149 | rtc::ArrayView<const float, kFftSize> imag, |
| 150 | rtc::ArrayView<float, kFftSizeBy2Plus1> signal_spectrum) { |
| 151 | signal_spectrum[0] = fabsf(real[0]) + 1.f; |
| 152 | signal_spectrum[kFftSizeBy2Plus1 - 1] = |
| 153 | fabsf(real[kFftSizeBy2Plus1 - 1]) + 1.f; |
| 154 | |
| 155 | for (size_t i = 1; i < kFftSizeBy2Plus1 - 1; ++i) { |
| 156 | signal_spectrum[i] = |
| 157 | SqrtFastApproximation(real[i] * real[i] + imag[i] * imag[i]) + 1.f; |
| 158 | } |
| 159 | } |
| 160 | |
| 161 | // Compute prior and post SNR. |
| 162 | void ComputeSnr(rtc::ArrayView<const float, kFftSizeBy2Plus1> filter, |
| 163 | rtc::ArrayView<const float> prev_signal_spectrum, |
| 164 | rtc::ArrayView<const float> signal_spectrum, |
| 165 | rtc::ArrayView<const float> prev_noise_spectrum, |
| 166 | rtc::ArrayView<const float> noise_spectrum, |
| 167 | rtc::ArrayView<float> prior_snr, |
| 168 | rtc::ArrayView<float> post_snr) { |
| 169 | for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { |
| 170 | // Previous post SNR. |
| 171 | // Previous estimate: based on previous frame with gain filter. |
| 172 | float prev_estimate = prev_signal_spectrum[i] / |
| 173 | (prev_noise_spectrum[i] + 0.0001f) * filter[i]; |
| 174 | // Post SNR. |
| 175 | if (signal_spectrum[i] > noise_spectrum[i]) { |
| 176 | post_snr[i] = signal_spectrum[i] / (noise_spectrum[i] + 0.0001f) - 1.f; |
| 177 | } else { |
| 178 | post_snr[i] = 0.f; |
| 179 | } |
| 180 | // The directed decision estimate of the prior SNR is a sum the current and |
| 181 | // previous estimates. |
| 182 | prior_snr[i] = 0.98f * prev_estimate + (1.f - 0.98f) * post_snr[i]; |
| 183 | } |
| 184 | } |
| 185 | |
| 186 | // Computes the attenuating gain for the noise suppression of the upper bands. |
| 187 | float ComputeUpperBandsGain( |
| 188 | float minimum_attenuating_gain, |
| 189 | rtc::ArrayView<const float, kFftSizeBy2Plus1> filter, |
| 190 | rtc::ArrayView<const float> speech_probability, |
| 191 | rtc::ArrayView<const float, kFftSizeBy2Plus1> prev_analysis_signal_spectrum, |
| 192 | rtc::ArrayView<const float, kFftSizeBy2Plus1> signal_spectrum) { |
| 193 | // Average speech prob and filter gain for the end of the lowest band. |
| 194 | constexpr int kNumAvgBins = 32; |
| 195 | constexpr float kOneByNumAvgBins = 1.f / kNumAvgBins; |
| 196 | |
| 197 | float avg_prob_speech = 0.f; |
| 198 | float avg_filter_gain = 0.f; |
| 199 | for (size_t i = kFftSizeBy2Plus1 - kNumAvgBins - 1; i < kFftSizeBy2Plus1 - 1; |
| 200 | i++) { |
| 201 | avg_prob_speech += speech_probability[i]; |
| 202 | avg_filter_gain += filter[i]; |
| 203 | } |
| 204 | avg_prob_speech = avg_prob_speech * kOneByNumAvgBins; |
| 205 | avg_filter_gain = avg_filter_gain * kOneByNumAvgBins; |
| 206 | |
| 207 | // If the speech was suppressed by a component between Analyze and Process, an |
| 208 | // example being by an AEC, it should not be considered speech for the purpose |
| 209 | // of high band suppression. To that end, the speech probability is scaled |
| 210 | // accordingly. |
| 211 | float sum_analysis_spectrum = 0.f; |
| 212 | float sum_processing_spectrum = 0.f; |
| 213 | for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { |
| 214 | sum_analysis_spectrum += prev_analysis_signal_spectrum[i]; |
| 215 | sum_processing_spectrum += signal_spectrum[i]; |
| 216 | } |
| 217 | |
| 218 | // The magnitude spectrum computation enforces the spectrum to be strictly |
| 219 | // positive. |
| 220 | RTC_DCHECK_GT(sum_analysis_spectrum, 0.f); |
| 221 | avg_prob_speech *= sum_processing_spectrum / sum_analysis_spectrum; |
| 222 | |
| 223 | // Compute gain based on speech probability. |
| 224 | float gain = |
| 225 | 0.5f * (1.f + static_cast<float>(tanh(2.f * avg_prob_speech - 1.f))); |
| 226 | |
| 227 | // Combine gain with low band gain. |
| 228 | if (avg_prob_speech >= 0.5f) { |
| 229 | gain = 0.25f * gain + 0.75f * avg_filter_gain; |
| 230 | } else { |
| 231 | gain = 0.5f * gain + 0.5f * avg_filter_gain; |
| 232 | } |
| 233 | |
| 234 | // Make sure gain is within flooring range. |
| 235 | return std::min(std::max(gain, minimum_attenuating_gain), 1.f); |
| 236 | } |
| 237 | |
| 238 | } // namespace |
| 239 | |
| 240 | NoiseSuppressor::ChannelState::ChannelState( |
| 241 | const SuppressionParams& suppression_params, |
| 242 | size_t num_bands) |
| 243 | : wiener_filter(suppression_params), |
| 244 | noise_estimator(suppression_params), |
| 245 | process_delay_memory(num_bands > 1 ? num_bands - 1 : 0) { |
| 246 | analyze_analysis_memory.fill(0.f); |
| 247 | prev_analysis_signal_spectrum.fill(1.f); |
| 248 | process_analysis_memory.fill(0.f); |
| 249 | process_synthesis_memory.fill(0.f); |
| 250 | for (auto& d : process_delay_memory) { |
| 251 | d.fill(0.f); |
| 252 | } |
| 253 | } |
| 254 | |
| 255 | NoiseSuppressor::NoiseSuppressor(const NsConfig& config, |
| 256 | size_t sample_rate_hz, |
| 257 | size_t num_channels) |
| 258 | : num_bands_(NumBandsForRate(sample_rate_hz)), |
| 259 | num_channels_(num_channels), |
| 260 | suppression_params_(config.target_level), |
| 261 | filter_bank_states_heap_(NumChannelsOnHeap(num_channels_)), |
| 262 | upper_band_gains_heap_(NumChannelsOnHeap(num_channels_)), |
| 263 | energies_before_filtering_heap_(NumChannelsOnHeap(num_channels_)), |
| 264 | gain_adjustments_heap_(NumChannelsOnHeap(num_channels_)), |
| 265 | channels_(num_channels_) { |
| 266 | for (size_t ch = 0; ch < num_channels_; ++ch) { |
| 267 | channels_[ch] = |
| 268 | std::make_unique<ChannelState>(suppression_params_, num_bands_); |
| 269 | } |
| 270 | } |
| 271 | |
| 272 | void NoiseSuppressor::AggregateWienerFilters( |
| 273 | rtc::ArrayView<float, kFftSizeBy2Plus1> filter) const { |
| 274 | rtc::ArrayView<const float, kFftSizeBy2Plus1> filter0 = |
| 275 | channels_[0]->wiener_filter.get_filter(); |
| 276 | std::copy(filter0.begin(), filter0.end(), filter.begin()); |
| 277 | |
| 278 | for (size_t ch = 1; ch < num_channels_; ++ch) { |
| 279 | rtc::ArrayView<const float, kFftSizeBy2Plus1> filter_ch = |
| 280 | channels_[ch]->wiener_filter.get_filter(); |
| 281 | |
| 282 | for (size_t k = 0; k < kFftSizeBy2Plus1; ++k) { |
| 283 | filter[k] = std::min(filter[k], filter_ch[k]); |
| 284 | } |
| 285 | } |
| 286 | } |
| 287 | |
| 288 | void NoiseSuppressor::Analyze(const AudioBuffer& audio) { |
| 289 | // Prepare the noise estimator for the analysis stage. |
| 290 | for (size_t ch = 0; ch < num_channels_; ++ch) { |
| 291 | channels_[ch]->noise_estimator.PrepareAnalysis(); |
| 292 | } |
| 293 | |
| 294 | // Check for zero frames. |
| 295 | bool zero_frame = true; |
| 296 | for (size_t ch = 0; ch < num_channels_; ++ch) { |
| 297 | rtc::ArrayView<const float, kNsFrameSize> y_band0( |
| 298 | &audio.split_bands_const(ch)[0][0], kNsFrameSize); |
| 299 | float energy = ComputeEnergyOfExtendedFrame( |
| 300 | y_band0, channels_[ch]->analyze_analysis_memory); |
| 301 | if (energy > 0.f) { |
| 302 | zero_frame = false; |
| 303 | break; |
| 304 | } |
| 305 | } |
| 306 | |
| 307 | if (zero_frame) { |
| 308 | // We want to avoid updating statistics in this case: |
| 309 | // Updating feature statistics when we have zeros only will cause |
| 310 | // thresholds to move towards zero signal situations. This in turn has the |
| 311 | // effect that once the signal is "turned on" (non-zero values) everything |
| 312 | // will be treated as speech and there is no noise suppression effect. |
| 313 | // Depending on the duration of the inactive signal it takes a |
| 314 | // considerable amount of time for the system to learn what is noise and |
| 315 | // what is speech. |
| 316 | return; |
| 317 | } |
| 318 | |
| 319 | // Only update analysis counter for frames that are properly analyzed. |
| 320 | if (++num_analyzed_frames_ < 0) { |
| 321 | num_analyzed_frames_ = 0; |
| 322 | } |
| 323 | |
| 324 | // Analyze all channels. |
| 325 | for (size_t ch = 0; ch < num_channels_; ++ch) { |
| 326 | std::unique_ptr<ChannelState>& ch_p = channels_[ch]; |
| 327 | rtc::ArrayView<const float, kNsFrameSize> y_band0( |
| 328 | &audio.split_bands_const(ch)[0][0], kNsFrameSize); |
| 329 | |
| 330 | // Form an extended frame and apply analysis filter bank windowing. |
| 331 | std::array<float, kFftSize> extended_frame; |
| 332 | FormExtendedFrame(y_band0, ch_p->analyze_analysis_memory, extended_frame); |
| 333 | ApplyFilterBankWindow(extended_frame); |
| 334 | |
| 335 | // Compute the magnitude spectrum. |
| 336 | std::array<float, kFftSize> real; |
| 337 | std::array<float, kFftSize> imag; |
| 338 | fft_.Fft(extended_frame, real, imag); |
| 339 | |
| 340 | std::array<float, kFftSizeBy2Plus1> signal_spectrum; |
| 341 | ComputeMagnitudeSpectrum(real, imag, signal_spectrum); |
| 342 | |
| 343 | // Compute energies. |
| 344 | float signal_energy = 0.f; |
| 345 | for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { |
| 346 | signal_energy += real[i] * real[i] + imag[i] * imag[i]; |
| 347 | } |
| 348 | signal_energy /= kFftSizeBy2Plus1; |
| 349 | |
| 350 | float signal_spectral_sum = 0.f; |
| 351 | for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { |
| 352 | signal_spectral_sum += signal_spectrum[i]; |
| 353 | } |
| 354 | |
| 355 | // Estimate the noise spectra and the probability estimates of speech |
| 356 | // presence. |
| 357 | ch_p->noise_estimator.PreUpdate(num_analyzed_frames_, signal_spectrum, |
| 358 | signal_spectral_sum); |
| 359 | |
| 360 | std::array<float, kFftSizeBy2Plus1> post_snr; |
| 361 | std::array<float, kFftSizeBy2Plus1> prior_snr; |
| 362 | ComputeSnr(ch_p->wiener_filter.get_filter(), |
| 363 | ch_p->prev_analysis_signal_spectrum, signal_spectrum, |
| 364 | ch_p->noise_estimator.get_prev_noise_spectrum(), |
| 365 | ch_p->noise_estimator.get_noise_spectrum(), prior_snr, post_snr); |
| 366 | |
| 367 | ch_p->speech_probability_estimator.Update( |
| 368 | num_analyzed_frames_, prior_snr, post_snr, |
| 369 | ch_p->noise_estimator.get_conservative_noise_spectrum(), |
| 370 | signal_spectrum, signal_spectral_sum, signal_energy); |
| 371 | |
| 372 | ch_p->noise_estimator.PostUpdate( |
| 373 | ch_p->speech_probability_estimator.get_probability(), signal_spectrum); |
| 374 | |
| 375 | // Store the magnitude spectrum to make it avalilable for the process |
| 376 | // method. |
| 377 | std::copy(signal_spectrum.begin(), signal_spectrum.end(), |
| 378 | ch_p->prev_analysis_signal_spectrum.begin()); |
| 379 | } |
| 380 | } |
| 381 | |
| 382 | void NoiseSuppressor::Process(AudioBuffer* audio) { |
| 383 | // Select the space for storing data during the processing. |
| 384 | std::array<FilterBankState, kMaxNumChannelsOnStack> filter_bank_states_stack; |
| 385 | rtc::ArrayView<FilterBankState> filter_bank_states( |
| 386 | filter_bank_states_stack.data(), num_channels_); |
| 387 | std::array<float, kMaxNumChannelsOnStack> upper_band_gains_stack; |
| 388 | rtc::ArrayView<float> upper_band_gains(upper_band_gains_stack.data(), |
| 389 | num_channels_); |
| 390 | std::array<float, kMaxNumChannelsOnStack> energies_before_filtering_stack; |
| 391 | rtc::ArrayView<float> energies_before_filtering( |
| 392 | energies_before_filtering_stack.data(), num_channels_); |
| 393 | std::array<float, kMaxNumChannelsOnStack> gain_adjustments_stack; |
| 394 | rtc::ArrayView<float> gain_adjustments(gain_adjustments_stack.data(), |
| 395 | num_channels_); |
| 396 | if (NumChannelsOnHeap(num_channels_) > 0) { |
| 397 | // If the stack-allocated space is too small, use the heap for storing the |
| 398 | // data. |
| 399 | filter_bank_states = rtc::ArrayView<FilterBankState>( |
| 400 | filter_bank_states_heap_.data(), num_channels_); |
| 401 | upper_band_gains = |
| 402 | rtc::ArrayView<float>(upper_band_gains_heap_.data(), num_channels_); |
| 403 | energies_before_filtering = rtc::ArrayView<float>( |
| 404 | energies_before_filtering_heap_.data(), num_channels_); |
| 405 | gain_adjustments = |
| 406 | rtc::ArrayView<float>(gain_adjustments_heap_.data(), num_channels_); |
| 407 | } |
| 408 | |
| 409 | // Compute the suppression filters for all channels. |
| 410 | for (size_t ch = 0; ch < num_channels_; ++ch) { |
| 411 | // Form an extended frame and apply analysis filter bank windowing. |
| 412 | rtc::ArrayView<float, kNsFrameSize> y_band0(&audio->split_bands(ch)[0][0], |
| 413 | kNsFrameSize); |
| 414 | |
| 415 | FormExtendedFrame(y_band0, channels_[ch]->process_analysis_memory, |
| 416 | filter_bank_states[ch].extended_frame); |
| 417 | |
| 418 | ApplyFilterBankWindow(filter_bank_states[ch].extended_frame); |
| 419 | |
| 420 | energies_before_filtering[ch] = |
| 421 | ComputeEnergyOfExtendedFrame(filter_bank_states[ch].extended_frame); |
| 422 | |
| 423 | // Perform filter bank analysis and compute the magnitude spectrum. |
| 424 | fft_.Fft(filter_bank_states[ch].extended_frame, filter_bank_states[ch].real, |
| 425 | filter_bank_states[ch].imag); |
| 426 | |
| 427 | std::array<float, kFftSizeBy2Plus1> signal_spectrum; |
| 428 | ComputeMagnitudeSpectrum(filter_bank_states[ch].real, |
| 429 | filter_bank_states[ch].imag, signal_spectrum); |
| 430 | |
| 431 | // Compute the frequency domain gain filter for noise attenuation. |
| 432 | channels_[ch]->wiener_filter.Update( |
| 433 | num_analyzed_frames_, |
| 434 | channels_[ch]->noise_estimator.get_noise_spectrum(), |
| 435 | channels_[ch]->noise_estimator.get_prev_noise_spectrum(), |
| 436 | channels_[ch]->noise_estimator.get_parametric_noise_spectrum(), |
| 437 | signal_spectrum); |
| 438 | |
| 439 | if (num_bands_ > 1) { |
| 440 | // Compute the time-domain gain for attenuating the noise in the upper |
| 441 | // bands. |
| 442 | |
| 443 | upper_band_gains[ch] = ComputeUpperBandsGain( |
| 444 | suppression_params_.minimum_attenuating_gain, |
| 445 | channels_[ch]->wiener_filter.get_filter(), |
| 446 | channels_[ch]->speech_probability_estimator.get_probability(), |
| 447 | channels_[ch]->prev_analysis_signal_spectrum, signal_spectrum); |
| 448 | } |
| 449 | } |
| 450 | |
| 451 | // Aggregate the Wiener filters for all channels. |
| 452 | std::array<float, kFftSizeBy2Plus1> filter_data; |
| 453 | rtc::ArrayView<const float, kFftSizeBy2Plus1> filter = filter_data; |
| 454 | if (num_channels_ == 1) { |
| 455 | filter = channels_[0]->wiener_filter.get_filter(); |
| 456 | } else { |
| 457 | AggregateWienerFilters(filter_data); |
| 458 | } |
| 459 | |
| 460 | for (size_t ch = 0; ch < num_channels_; ++ch) { |
| 461 | // Apply the filter to the lower band. |
| 462 | for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { |
| 463 | filter_bank_states[ch].real[i] *= filter[i]; |
| 464 | filter_bank_states[ch].imag[i] *= filter[i]; |
| 465 | } |
| 466 | } |
| 467 | |
| 468 | // Perform filter bank synthesis |
| 469 | for (size_t ch = 0; ch < num_channels_; ++ch) { |
| 470 | fft_.Ifft(filter_bank_states[ch].real, filter_bank_states[ch].imag, |
| 471 | filter_bank_states[ch].extended_frame); |
| 472 | } |
| 473 | |
| 474 | for (size_t ch = 0; ch < num_channels_; ++ch) { |
| 475 | const float energy_after_filtering = |
| 476 | ComputeEnergyOfExtendedFrame(filter_bank_states[ch].extended_frame); |
| 477 | |
| 478 | // Apply synthesis window. |
| 479 | ApplyFilterBankWindow(filter_bank_states[ch].extended_frame); |
| 480 | |
| 481 | // Compute the adjustment of the noise attenuation filter based on the |
| 482 | // effect of the attenuation. |
| 483 | gain_adjustments[ch] = |
| 484 | channels_[ch]->wiener_filter.ComputeOverallScalingFactor( |
| 485 | num_analyzed_frames_, |
| 486 | channels_[ch]->speech_probability_estimator.get_prior_probability(), |
| 487 | energies_before_filtering[ch], energy_after_filtering); |
| 488 | } |
| 489 | |
| 490 | // Select and apply adjustment of the noise attenuation filter based on the |
| 491 | // effect of the attenuation. |
| 492 | float gain_adjustment = gain_adjustments[0]; |
| 493 | for (size_t ch = 1; ch < num_channels_; ++ch) { |
| 494 | gain_adjustment = std::min(gain_adjustment, gain_adjustments[ch]); |
| 495 | } |
| 496 | for (size_t ch = 0; ch < num_channels_; ++ch) { |
| 497 | for (size_t i = 0; i < kFftSize; ++i) { |
| 498 | filter_bank_states[ch].extended_frame[i] = |
| 499 | gain_adjustment * filter_bank_states[ch].extended_frame[i]; |
| 500 | } |
| 501 | } |
| 502 | |
| 503 | // Use overlap-and-add to form the output frame of the lowest band. |
| 504 | for (size_t ch = 0; ch < num_channels_; ++ch) { |
| 505 | rtc::ArrayView<float, kNsFrameSize> y_band0(&audio->split_bands(ch)[0][0], |
| 506 | kNsFrameSize); |
| 507 | OverlapAndAdd(filter_bank_states[ch].extended_frame, |
| 508 | channels_[ch]->process_synthesis_memory, y_band0); |
| 509 | } |
| 510 | |
| 511 | if (num_bands_ > 1) { |
| 512 | // Select the noise attenuating gain to apply to the upper band. |
| 513 | float upper_band_gain = upper_band_gains[0]; |
| 514 | for (size_t ch = 1; ch < num_channels_; ++ch) { |
| 515 | upper_band_gain = std::min(upper_band_gain, upper_band_gains[ch]); |
| 516 | } |
| 517 | |
| 518 | // Process the upper bands. |
| 519 | for (size_t ch = 0; ch < num_channels_; ++ch) { |
| 520 | for (size_t b = 1; b < num_bands_; ++b) { |
| 521 | // Delay the upper bands to match the delay of the filterbank applied to |
| 522 | // the lowest band. |
| 523 | rtc::ArrayView<float, kNsFrameSize> y_band( |
| 524 | &audio->split_bands(ch)[b][0], kNsFrameSize); |
| 525 | std::array<float, kNsFrameSize> delayed_frame; |
| 526 | DelaySignal(y_band, channels_[ch]->process_delay_memory[b - 1], |
| 527 | delayed_frame); |
| 528 | |
| 529 | // Apply the time-domain noise-attenuating gain. |
| 530 | for (size_t j = 0; j < kNsFrameSize; j++) { |
| 531 | y_band[j] = upper_band_gain * delayed_frame[j]; |
| 532 | } |
| 533 | } |
| 534 | } |
| 535 | } |
| 536 | |
| 537 | // Limit the output the allowed range. |
| 538 | for (size_t ch = 0; ch < num_channels_; ++ch) { |
| 539 | for (size_t b = 0; b < num_bands_; ++b) { |
| 540 | rtc::ArrayView<float, kNsFrameSize> y_band(&audio->split_bands(ch)[b][0], |
| 541 | kNsFrameSize); |
| 542 | for (size_t j = 0; j < kNsFrameSize; j++) { |
| 543 | y_band[j] = std::min(std::max(y_band[j], -32768.f), 32767.f); |
| 544 | } |
| 545 | } |
| 546 | } |
| 547 | } |
| 548 | |
| 549 | } // namespace webrtc |