blob: 7b0e828a7c15c5bd923b8655d1fee8ae2ce51b7c [file] [log] [blame]
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +00001/*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include "webrtc/modules/audio_coding/neteq4/background_noise.h"
12
13#include <assert.h>
14
15#include <algorithm> // min, max
16#include <cstring> // memcpy
17
18#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
19#include "webrtc/modules/audio_coding/neteq4/audio_multi_vector.h"
20#include "webrtc/modules/audio_coding/neteq4/post_decode_vad.h"
21
22namespace webrtc {
23
24void BackgroundNoise::Reset() {
25 initialized_ = false;
26 for (size_t channel = 0; channel < num_channels_; ++channel) {
27 channel_parameters_[channel].Reset();
28 }
29 // Keep _bgnMode as it is.
30}
31
32void BackgroundNoise::Update(const AudioMultiVector<int16_t>& input,
33 const PostDecodeVad& vad) {
34 if (vad.running() && vad.active_speech()) {
35 // Do not update the background noise parameters if we know that the signal
36 // is active speech.
37 return;
38 }
39
40 int32_t auto_correlation[kMaxLpcOrder + 1];
41 int16_t fiter_output[kMaxLpcOrder + kResidualLength];
42 int16_t reflection_coefficients[kMaxLpcOrder];
43 int16_t lpc_coefficients[kMaxLpcOrder + 1];
44
45 for (size_t channel_ix = 0; channel_ix < num_channels_; ++channel_ix) {
46 ChannelParameters& parameters = channel_parameters_[channel_ix];
47 int16_t temp_signal_array[kVecLen + kMaxLpcOrder] = {0};
48 int16_t* temp_signal = &temp_signal_array[kMaxLpcOrder];
49 memcpy(temp_signal,
50 &input[channel_ix][input.Size() - kVecLen],
51 sizeof(int16_t) * kVecLen);
52
53 int32_t sample_energy = CalculateAutoCorrelation(temp_signal, kVecLen,
54 auto_correlation);
55
56 if ((!vad.running() &&
57 sample_energy < parameters.energy_update_threshold) ||
58 (vad.running() && !vad.active_speech())) {
59 // Generate LPC coefficients.
60 if (auto_correlation[0] > 0) {
61 // Regardless of whether the filter is actually updated or not,
62 // update energy threshold levels, since we have in fact observed
63 // a low energy signal.
64 if (sample_energy < parameters.energy_update_threshold) {
65 // Never go under 1.0 in average sample energy.
66 parameters.energy_update_threshold = std::max(sample_energy, 1);
67 parameters.low_energy_update_threshold = 0;
68 }
69
70 // Only update BGN if filter is stable, i.e., if return value from
71 // Levinson-Durbin function is 1.
72 if (WebRtcSpl_LevinsonDurbin(auto_correlation, lpc_coefficients,
73 reflection_coefficients,
74 kMaxLpcOrder) != 1) {
75 return;
76 }
77 } else {
78 // Center value in auto-correlation is not positive. Do not update.
79 return;
80 }
81
82 // Generate the CNG gain factor by looking at the energy of the residual.
83 WebRtcSpl_FilterMAFastQ12(temp_signal + kVecLen - kResidualLength,
84 fiter_output, lpc_coefficients,
85 kMaxLpcOrder + 1, kResidualLength);
86 int32_t residual_energy = WebRtcSpl_DotProductWithScale(fiter_output,
87 fiter_output,
88 kResidualLength,
89 0);
90
91 // Check spectral flatness.
92 // Comparing the residual variance with the input signal variance tells
93 // if the spectrum is flat or not.
94 // If 20 * residual_energy >= sample_energy << 6, the spectrum is flat
95 // enough. Also ensure that the energy is non-zero.
96 if ((residual_energy * 20 >= (sample_energy << 6)) &&
97 (sample_energy > 0)) {
98 // Spectrum is flat enough; save filter parameters.
99 // |temp_signal| + |kVecLen| - |kMaxLpcOrder| points at the first of the
100 // |kMaxLpcOrder| samples in the residual signal, which will form the
101 // filter state for the next noise generation.
102 SaveParameters(channel_ix, lpc_coefficients,
103 temp_signal + kVecLen - kMaxLpcOrder, sample_energy,
104 residual_energy);
105 }
106 } else {
107 // Will only happen if post-decode VAD is disabled and |sample_energy| is
108 // not low enough. Increase the threshold for update so that it increases
109 // by a factor 4 in 4 seconds.
110 IncrementEnergyThreshold(channel_ix, sample_energy);
111 }
112 }
113 return;
114}
115
116int32_t BackgroundNoise::Energy(size_t channel) const {
117 assert(channel < num_channels_);
118 return channel_parameters_[channel].energy;
119}
120
121void BackgroundNoise::SetMuteFactor(size_t channel, int16_t value) {
122 assert(channel < num_channels_);
123 channel_parameters_[channel].mute_factor = value;
124}
125
126int16_t BackgroundNoise::MuteFactor(size_t channel) const {
127 assert(channel < num_channels_);
128 return channel_parameters_[channel].mute_factor;
129}
130
131const int16_t* BackgroundNoise::Filter(size_t channel) const {
132 assert(channel < num_channels_);
133 return channel_parameters_[channel].filter;
134}
135
136const int16_t* BackgroundNoise::FilterState(size_t channel) const {
137 assert(channel < num_channels_);
138 return channel_parameters_[channel].filter_state;
139}
140
141void BackgroundNoise::SetFilterState(size_t channel, const int16_t* input,
142 size_t length) {
143 assert(channel < num_channels_);
144 length = std::min(length, static_cast<size_t>(kMaxLpcOrder));
145 memcpy(channel_parameters_[channel].filter_state, input,
146 length * sizeof(int16_t));
147}
148
149int16_t BackgroundNoise::Scale(size_t channel) const {
150 assert(channel < num_channels_);
151 return channel_parameters_[channel].scale;
152}
153int16_t BackgroundNoise::ScaleShift(size_t channel) const {
154 assert(channel < num_channels_);
155 return channel_parameters_[channel].scale_shift;
156}
157
158int32_t BackgroundNoise::CalculateAutoCorrelation(
159 const int16_t* signal, size_t length, int32_t* auto_correlation) const {
160 int16_t signal_max = WebRtcSpl_MaxAbsValueW16(signal, length);
161 int correlation_scale = kLogVecLen -
162 WebRtcSpl_NormW32(signal_max * signal_max);
163 correlation_scale = std::max(0, correlation_scale);
164
165 static const int kCorrelationStep = -1;
166 WebRtcSpl_CrossCorrelation(auto_correlation, signal, signal,
167 length, kMaxLpcOrder + 1, correlation_scale,
168 kCorrelationStep);
169
170 // Number of shifts to normalize energy to energy/sample.
171 int energy_sample_shift = kLogVecLen - correlation_scale;
172 return auto_correlation[0] >> energy_sample_shift;
173}
174
175void BackgroundNoise::IncrementEnergyThreshold(size_t channel,
176 int32_t sample_energy) {
177 // TODO(hlundin): Simplify the below threshold update. What this code
178 // does is simply "threshold += (increment * threshold) >> 16", but due
179 // to the limited-width operations, it is not exactly the same. The
180 // difference should be inaudible, but bit-exactness would not be
181 // maintained.
182 assert(channel < num_channels_);
183 ChannelParameters& parameters = channel_parameters_[channel];
184 int32_t temp_energy =
185 WEBRTC_SPL_MUL_16_16_RSFT(kThresholdIncrement,
186 parameters.low_energy_update_threshold, 16);
187 temp_energy += kThresholdIncrement *
188 (parameters.energy_update_threshold & 0xFF);
189 temp_energy += (kThresholdIncrement *
190 ((parameters.energy_update_threshold>>8) & 0xFF)) << 8;
191 parameters.low_energy_update_threshold += temp_energy;
192
193 parameters.energy_update_threshold += kThresholdIncrement *
194 (parameters.energy_update_threshold>>16);
195 parameters.energy_update_threshold +=
196 parameters.low_energy_update_threshold >> 16;
197 parameters.low_energy_update_threshold =
198 parameters.low_energy_update_threshold & 0x0FFFF;
199
200 // Update maximum energy.
201 // Decrease by a factor 1/1024 each time.
202 parameters.max_energy = parameters.max_energy -
203 (parameters.max_energy >> 10);
204 if (sample_energy > parameters.max_energy) {
205 parameters.max_energy = sample_energy;
206 }
207
208 // Set |energy_update_threshold| to no less than 60 dB lower than
209 // |max_energy_|. Adding 524288 assures proper rounding.
210 int32_t energy_update_threshold = (parameters.max_energy + 524288) >> 20;
211 if (energy_update_threshold > parameters.energy_update_threshold) {
212 parameters.energy_update_threshold = energy_update_threshold;
213 }
214}
215
216void BackgroundNoise::SaveParameters(size_t channel,
217 const int16_t* lpc_coefficients,
218 const int16_t* filter_state,
219 int32_t sample_energy,
220 int32_t residual_energy) {
221 assert(channel < num_channels_);
222 ChannelParameters& parameters = channel_parameters_[channel];
223 memcpy(parameters.filter, lpc_coefficients,
224 (kMaxLpcOrder+1) * sizeof(int16_t));
225 memcpy(parameters.filter_state, filter_state,
226 kMaxLpcOrder * sizeof(int16_t));
227 // Save energy level and update energy threshold levels.
228 // Never get under 1.0 in average sample energy.
229 parameters.energy = std::max(sample_energy, 1);
230 parameters.energy_update_threshold = parameters.energy;
231 parameters.low_energy_update_threshold = 0;
232
233 // Normalize residual_energy to 29 or 30 bits before sqrt.
234 int norm_shift = WebRtcSpl_NormW32(residual_energy) - 1;
235 if (norm_shift & 0x1) {
236 norm_shift -= 1; // Even number of shifts required.
237 }
238 assert(norm_shift >= 0); // Should always be positive.
239 residual_energy = residual_energy << norm_shift;
240
241 // Calculate scale and shift factor.
242 parameters.scale = WebRtcSpl_SqrtFloor(residual_energy);
243 // Add 13 to the |scale_shift_|, since the random numbers table is in
244 // Q13.
245 // TODO(hlundin): Move the "13" to where the |scale_shift_| is used?
246 parameters.scale_shift = 13 + ((kLogResidualLength + norm_shift) / 2);
247
248 initialized_ = true;
249}
250
251} // namespace webrtc