henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. |
| 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license |
| 5 | * that can be found in the LICENSE file in the root of the source |
| 6 | * tree. An additional intellectual property rights grant can be found |
| 7 | * in the file PATENTS. All contributing project authors may |
| 8 | * be found in the AUTHORS file in the root of the source tree. |
| 9 | */ |
| 10 | |
henrik.lundin@webrtc.org | 9c55f0f | 2014-06-09 08:10:28 +0000 | [diff] [blame] | 11 | #include "webrtc/modules/audio_coding/neteq/expand.h" |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 12 | |
| 13 | #include <assert.h> |
pbos@webrtc.org | 12dc1a3 | 2013-08-05 16:22:53 +0000 | [diff] [blame] | 14 | #include <string.h> // memset |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 15 | |
| 16 | #include <algorithm> // min, max |
turaj@webrtc.org | 7126b38 | 2013-07-31 16:05:09 +0000 | [diff] [blame] | 17 | #include <limits> // numeric_limits<T> |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 18 | |
Henrik Lundin | bef77e2 | 2015-08-18 14:58:09 +0200 | [diff] [blame^] | 19 | #include "webrtc/base/safe_conversions.h" |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 20 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" |
henrik.lundin@webrtc.org | 9c55f0f | 2014-06-09 08:10:28 +0000 | [diff] [blame] | 21 | #include "webrtc/modules/audio_coding/neteq/background_noise.h" |
| 22 | #include "webrtc/modules/audio_coding/neteq/dsp_helper.h" |
| 23 | #include "webrtc/modules/audio_coding/neteq/random_vector.h" |
Henrik Lundin | bef77e2 | 2015-08-18 14:58:09 +0200 | [diff] [blame^] | 24 | #include "webrtc/modules/audio_coding/neteq/statistics_calculator.h" |
henrik.lundin@webrtc.org | 9c55f0f | 2014-06-09 08:10:28 +0000 | [diff] [blame] | 25 | #include "webrtc/modules/audio_coding/neteq/sync_buffer.h" |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 26 | |
| 27 | namespace webrtc { |
| 28 | |
Karl Wiberg | 7f6c4d4 | 2015-04-09 15:44:22 +0200 | [diff] [blame] | 29 | Expand::Expand(BackgroundNoise* background_noise, |
| 30 | SyncBuffer* sync_buffer, |
| 31 | RandomVector* random_vector, |
Henrik Lundin | bef77e2 | 2015-08-18 14:58:09 +0200 | [diff] [blame^] | 32 | StatisticsCalculator* statistics, |
Karl Wiberg | 7f6c4d4 | 2015-04-09 15:44:22 +0200 | [diff] [blame] | 33 | int fs, |
| 34 | size_t num_channels) |
| 35 | : random_vector_(random_vector), |
| 36 | sync_buffer_(sync_buffer), |
| 37 | first_expand_(true), |
| 38 | fs_hz_(fs), |
| 39 | num_channels_(num_channels), |
| 40 | consecutive_expands_(0), |
| 41 | background_noise_(background_noise), |
Henrik Lundin | bef77e2 | 2015-08-18 14:58:09 +0200 | [diff] [blame^] | 42 | statistics_(statistics), |
Karl Wiberg | 7f6c4d4 | 2015-04-09 15:44:22 +0200 | [diff] [blame] | 43 | overlap_length_(5 * fs / 8000), |
| 44 | lag_index_direction_(0), |
| 45 | current_lag_index_(0), |
| 46 | stop_muting_(false), |
Henrik Lundin | bef77e2 | 2015-08-18 14:58:09 +0200 | [diff] [blame^] | 47 | expand_duration_samples_(0), |
Karl Wiberg | 7f6c4d4 | 2015-04-09 15:44:22 +0200 | [diff] [blame] | 48 | channel_parameters_(new ChannelParameters[num_channels_]) { |
| 49 | assert(fs == 8000 || fs == 16000 || fs == 32000 || fs == 48000); |
| 50 | assert(fs <= kMaxSampleRate); // Should not be possible. |
| 51 | assert(num_channels_ > 0); |
| 52 | memset(expand_lags_, 0, sizeof(expand_lags_)); |
| 53 | Reset(); |
| 54 | } |
| 55 | |
| 56 | Expand::~Expand() = default; |
| 57 | |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 58 | void Expand::Reset() { |
| 59 | first_expand_ = true; |
| 60 | consecutive_expands_ = 0; |
| 61 | max_lag_ = 0; |
| 62 | for (size_t ix = 0; ix < num_channels_; ++ix) { |
| 63 | channel_parameters_[ix].expand_vector0.Clear(); |
| 64 | channel_parameters_[ix].expand_vector1.Clear(); |
| 65 | } |
| 66 | } |
| 67 | |
henrik.lundin@webrtc.org | fd11bbf | 2013-09-30 20:38:44 +0000 | [diff] [blame] | 68 | int Expand::Process(AudioMultiVector* output) { |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 69 | int16_t random_vector[kMaxSampleRate / 8000 * 120 + 30]; |
| 70 | int16_t scaled_random_vector[kMaxSampleRate / 8000 * 125]; |
| 71 | static const int kTempDataSize = 3600; |
| 72 | int16_t temp_data[kTempDataSize]; // TODO(hlundin) Remove this. |
| 73 | int16_t* voiced_vector_storage = temp_data; |
| 74 | int16_t* voiced_vector = &voiced_vector_storage[overlap_length_]; |
| 75 | static const int kNoiseLpcOrder = BackgroundNoise::kMaxLpcOrder; |
| 76 | int16_t unvoiced_array_memory[kNoiseLpcOrder + kMaxSampleRate / 8000 * 125]; |
| 77 | int16_t* unvoiced_vector = unvoiced_array_memory + kUnvoicedLpcOrder; |
| 78 | int16_t* noise_vector = unvoiced_array_memory + kNoiseLpcOrder; |
| 79 | |
| 80 | int fs_mult = fs_hz_ / 8000; |
| 81 | |
| 82 | if (first_expand_) { |
| 83 | // Perform initial setup if this is the first expansion since last reset. |
| 84 | AnalyzeSignal(random_vector); |
| 85 | first_expand_ = false; |
Henrik Lundin | bef77e2 | 2015-08-18 14:58:09 +0200 | [diff] [blame^] | 86 | expand_duration_samples_ = 0; |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 87 | } else { |
| 88 | // This is not the first expansion, parameters are already estimated. |
| 89 | // Extract a noise segment. |
| 90 | int16_t rand_length = max_lag_; |
turaj@webrtc.org | 8d1cdaa | 2014-04-11 18:47:55 +0000 | [diff] [blame] | 91 | // This only applies to SWB where length could be larger than 256. |
| 92 | assert(rand_length <= kMaxSampleRate / 8000 * 120 + 30); |
| 93 | GenerateRandomVector(2, rand_length, random_vector); |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 94 | } |
| 95 | |
| 96 | |
| 97 | // Generate signal. |
| 98 | UpdateLagIndex(); |
| 99 | |
| 100 | // Voiced part. |
| 101 | // Generate a weighted vector with the current lag. |
| 102 | size_t expansion_vector_length = max_lag_ + overlap_length_; |
| 103 | size_t current_lag = expand_lags_[current_lag_index_]; |
| 104 | // Copy lag+overlap data. |
| 105 | size_t expansion_vector_position = expansion_vector_length - current_lag - |
| 106 | overlap_length_; |
| 107 | size_t temp_length = current_lag + overlap_length_; |
| 108 | for (size_t channel_ix = 0; channel_ix < num_channels_; ++channel_ix) { |
| 109 | ChannelParameters& parameters = channel_parameters_[channel_ix]; |
| 110 | if (current_lag_index_ == 0) { |
| 111 | // Use only expand_vector0. |
| 112 | assert(expansion_vector_position + temp_length <= |
| 113 | parameters.expand_vector0.Size()); |
| 114 | memcpy(voiced_vector_storage, |
| 115 | ¶meters.expand_vector0[expansion_vector_position], |
| 116 | sizeof(int16_t) * temp_length); |
| 117 | } else if (current_lag_index_ == 1) { |
| 118 | // Mix 3/4 of expand_vector0 with 1/4 of expand_vector1. |
| 119 | WebRtcSpl_ScaleAndAddVectorsWithRound( |
| 120 | ¶meters.expand_vector0[expansion_vector_position], 3, |
| 121 | ¶meters.expand_vector1[expansion_vector_position], 1, 2, |
turaj@webrtc.org | 362a55e | 2013-09-20 16:25:28 +0000 | [diff] [blame] | 122 | voiced_vector_storage, static_cast<int>(temp_length)); |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 123 | } else if (current_lag_index_ == 2) { |
| 124 | // Mix 1/2 of expand_vector0 with 1/2 of expand_vector1. |
| 125 | assert(expansion_vector_position + temp_length <= |
| 126 | parameters.expand_vector0.Size()); |
| 127 | assert(expansion_vector_position + temp_length <= |
| 128 | parameters.expand_vector1.Size()); |
| 129 | WebRtcSpl_ScaleAndAddVectorsWithRound( |
| 130 | ¶meters.expand_vector0[expansion_vector_position], 1, |
| 131 | ¶meters.expand_vector1[expansion_vector_position], 1, 1, |
turaj@webrtc.org | 362a55e | 2013-09-20 16:25:28 +0000 | [diff] [blame] | 132 | voiced_vector_storage, static_cast<int>(temp_length)); |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 133 | } |
| 134 | |
| 135 | // Get tapering window parameters. Values are in Q15. |
| 136 | int16_t muting_window, muting_window_increment; |
| 137 | int16_t unmuting_window, unmuting_window_increment; |
| 138 | if (fs_hz_ == 8000) { |
| 139 | muting_window = DspHelper::kMuteFactorStart8kHz; |
| 140 | muting_window_increment = DspHelper::kMuteFactorIncrement8kHz; |
| 141 | unmuting_window = DspHelper::kUnmuteFactorStart8kHz; |
| 142 | unmuting_window_increment = DspHelper::kUnmuteFactorIncrement8kHz; |
| 143 | } else if (fs_hz_ == 16000) { |
| 144 | muting_window = DspHelper::kMuteFactorStart16kHz; |
| 145 | muting_window_increment = DspHelper::kMuteFactorIncrement16kHz; |
| 146 | unmuting_window = DspHelper::kUnmuteFactorStart16kHz; |
| 147 | unmuting_window_increment = DspHelper::kUnmuteFactorIncrement16kHz; |
| 148 | } else if (fs_hz_ == 32000) { |
| 149 | muting_window = DspHelper::kMuteFactorStart32kHz; |
| 150 | muting_window_increment = DspHelper::kMuteFactorIncrement32kHz; |
| 151 | unmuting_window = DspHelper::kUnmuteFactorStart32kHz; |
| 152 | unmuting_window_increment = DspHelper::kUnmuteFactorIncrement32kHz; |
| 153 | } else { // fs_ == 48000 |
| 154 | muting_window = DspHelper::kMuteFactorStart48kHz; |
| 155 | muting_window_increment = DspHelper::kMuteFactorIncrement48kHz; |
| 156 | unmuting_window = DspHelper::kUnmuteFactorStart48kHz; |
| 157 | unmuting_window_increment = DspHelper::kUnmuteFactorIncrement48kHz; |
| 158 | } |
| 159 | |
| 160 | // Smooth the expanded if it has not been muted to a low amplitude and |
| 161 | // |current_voice_mix_factor| is larger than 0.5. |
| 162 | if ((parameters.mute_factor > 819) && |
| 163 | (parameters.current_voice_mix_factor > 8192)) { |
| 164 | size_t start_ix = sync_buffer_->Size() - overlap_length_; |
| 165 | for (size_t i = 0; i < overlap_length_; i++) { |
| 166 | // Do overlap add between new vector and overlap. |
| 167 | (*sync_buffer_)[channel_ix][start_ix + i] = |
| 168 | (((*sync_buffer_)[channel_ix][start_ix + i] * muting_window) + |
| 169 | (((parameters.mute_factor * voiced_vector_storage[i]) >> 14) * |
| 170 | unmuting_window) + 16384) >> 15; |
| 171 | muting_window += muting_window_increment; |
| 172 | unmuting_window += unmuting_window_increment; |
| 173 | } |
| 174 | } else if (parameters.mute_factor == 0) { |
| 175 | // The expanded signal will consist of only comfort noise if |
| 176 | // mute_factor = 0. Set the output length to 15 ms for best noise |
| 177 | // production. |
| 178 | // TODO(hlundin): This has been disabled since the length of |
| 179 | // parameters.expand_vector0 and parameters.expand_vector1 no longer |
| 180 | // match with expand_lags_, causing invalid reads and writes. Is it a good |
| 181 | // idea to enable this again, and solve the vector size problem? |
| 182 | // max_lag_ = fs_mult * 120; |
| 183 | // expand_lags_[0] = fs_mult * 120; |
| 184 | // expand_lags_[1] = fs_mult * 120; |
| 185 | // expand_lags_[2] = fs_mult * 120; |
| 186 | } |
| 187 | |
| 188 | // Unvoiced part. |
| 189 | // Filter |scaled_random_vector| through |ar_filter_|. |
| 190 | memcpy(unvoiced_vector - kUnvoicedLpcOrder, parameters.ar_filter_state, |
| 191 | sizeof(int16_t) * kUnvoicedLpcOrder); |
| 192 | int32_t add_constant = 0; |
| 193 | if (parameters.ar_gain_scale > 0) { |
| 194 | add_constant = 1 << (parameters.ar_gain_scale - 1); |
| 195 | } |
| 196 | WebRtcSpl_AffineTransformVector(scaled_random_vector, random_vector, |
| 197 | parameters.ar_gain, add_constant, |
turaj@webrtc.org | 362a55e | 2013-09-20 16:25:28 +0000 | [diff] [blame] | 198 | parameters.ar_gain_scale, |
| 199 | static_cast<int>(current_lag)); |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 200 | WebRtcSpl_FilterARFastQ12(scaled_random_vector, unvoiced_vector, |
turaj@webrtc.org | 362a55e | 2013-09-20 16:25:28 +0000 | [diff] [blame] | 201 | parameters.ar_filter, kUnvoicedLpcOrder + 1, |
| 202 | static_cast<int>(current_lag)); |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 203 | memcpy(parameters.ar_filter_state, |
| 204 | &(unvoiced_vector[current_lag - kUnvoicedLpcOrder]), |
| 205 | sizeof(int16_t) * kUnvoicedLpcOrder); |
| 206 | |
| 207 | // Combine voiced and unvoiced contributions. |
| 208 | |
| 209 | // Set a suitable cross-fading slope. |
| 210 | // For lag = |
| 211 | // <= 31 * fs_mult => go from 1 to 0 in about 8 ms; |
| 212 | // (>= 31 .. <= 63) * fs_mult => go from 1 to 0 in about 16 ms; |
| 213 | // >= 64 * fs_mult => go from 1 to 0 in about 32 ms. |
| 214 | // temp_shift = getbits(max_lag_) - 5. |
| 215 | int temp_shift = (31 - WebRtcSpl_NormW32(max_lag_)) - 5; |
| 216 | int16_t mix_factor_increment = 256 >> temp_shift; |
| 217 | if (stop_muting_) { |
| 218 | mix_factor_increment = 0; |
| 219 | } |
| 220 | |
| 221 | // Create combined signal by shifting in more and more of unvoiced part. |
| 222 | temp_shift = 8 - temp_shift; // = getbits(mix_factor_increment). |
Peter Kasting | 728d903 | 2015-06-11 14:31:38 -0700 | [diff] [blame] | 223 | size_t temp_length = (parameters.current_voice_mix_factor - |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 224 | parameters.voice_mix_factor) >> temp_shift; |
Peter Kasting | 728d903 | 2015-06-11 14:31:38 -0700 | [diff] [blame] | 225 | temp_length = std::min(temp_length, current_lag); |
| 226 | DspHelper::CrossFade(voiced_vector, unvoiced_vector, temp_length, |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 227 | ¶meters.current_voice_mix_factor, |
| 228 | mix_factor_increment, temp_data); |
| 229 | |
| 230 | // End of cross-fading period was reached before end of expanded signal |
| 231 | // path. Mix the rest with a fixed mixing factor. |
Peter Kasting | 728d903 | 2015-06-11 14:31:38 -0700 | [diff] [blame] | 232 | if (temp_length < current_lag) { |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 233 | if (mix_factor_increment != 0) { |
| 234 | parameters.current_voice_mix_factor = parameters.voice_mix_factor; |
| 235 | } |
Peter Kasting | b7e5054 | 2015-06-11 12:55:50 -0700 | [diff] [blame] | 236 | int16_t temp_scale = 16384 - parameters.current_voice_mix_factor; |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 237 | WebRtcSpl_ScaleAndAddVectorsWithRound( |
Peter Kasting | 728d903 | 2015-06-11 14:31:38 -0700 | [diff] [blame] | 238 | voiced_vector + temp_length, parameters.current_voice_mix_factor, |
| 239 | unvoiced_vector + temp_length, temp_scale, 14, |
| 240 | temp_data + temp_length, static_cast<int>(current_lag - temp_length)); |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 241 | } |
| 242 | |
| 243 | // Select muting slope depending on how many consecutive expands we have |
| 244 | // done. |
| 245 | if (consecutive_expands_ == 3) { |
| 246 | // Let the mute factor decrease from 1.0 to 0.95 in 6.25 ms. |
| 247 | // mute_slope = 0.0010 / fs_mult in Q20. |
Peter Kasting | 36b7cc3 | 2015-06-11 19:57:18 -0700 | [diff] [blame] | 248 | parameters.mute_slope = std::max(parameters.mute_slope, 1049 / fs_mult); |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 249 | } |
| 250 | if (consecutive_expands_ == 7) { |
| 251 | // Let the mute factor decrease from 1.0 to 0.90 in 6.25 ms. |
| 252 | // mute_slope = 0.0020 / fs_mult in Q20. |
Peter Kasting | 36b7cc3 | 2015-06-11 19:57:18 -0700 | [diff] [blame] | 253 | parameters.mute_slope = std::max(parameters.mute_slope, 2097 / fs_mult); |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 254 | } |
| 255 | |
| 256 | // Mute segment according to slope value. |
| 257 | if ((consecutive_expands_ != 0) || !parameters.onset) { |
| 258 | // Mute to the previous level, then continue with the muting. |
| 259 | WebRtcSpl_AffineTransformVector(temp_data, temp_data, |
| 260 | parameters.mute_factor, 8192, |
turaj@webrtc.org | 362a55e | 2013-09-20 16:25:28 +0000 | [diff] [blame] | 261 | 14, static_cast<int>(current_lag)); |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 262 | |
| 263 | if (!stop_muting_) { |
| 264 | DspHelper::MuteSignal(temp_data, parameters.mute_slope, current_lag); |
| 265 | |
| 266 | // Shift by 6 to go from Q20 to Q14. |
| 267 | // TODO(hlundin): Adding 8192 before shifting 6 steps seems wrong. |
| 268 | // Legacy. |
turaj@webrtc.org | 362a55e | 2013-09-20 16:25:28 +0000 | [diff] [blame] | 269 | int16_t gain = static_cast<int16_t>(16384 - |
| 270 | (((current_lag * parameters.mute_slope) + 8192) >> 6)); |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 271 | gain = ((gain * parameters.mute_factor) + 8192) >> 14; |
| 272 | |
| 273 | // Guard against getting stuck with very small (but sometimes audible) |
| 274 | // gain. |
| 275 | if ((consecutive_expands_ > 3) && (gain >= parameters.mute_factor)) { |
| 276 | parameters.mute_factor = 0; |
| 277 | } else { |
| 278 | parameters.mute_factor = gain; |
| 279 | } |
| 280 | } |
| 281 | } |
| 282 | |
| 283 | // Background noise part. |
turaj@webrtc.org | 8d1cdaa | 2014-04-11 18:47:55 +0000 | [diff] [blame] | 284 | GenerateBackgroundNoise(random_vector, |
| 285 | channel_ix, |
| 286 | channel_parameters_[channel_ix].mute_slope, |
| 287 | TooManyExpands(), |
| 288 | current_lag, |
| 289 | unvoiced_array_memory); |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 290 | |
| 291 | // Add background noise to the combined voiced-unvoiced signal. |
| 292 | for (size_t i = 0; i < current_lag; i++) { |
| 293 | temp_data[i] = temp_data[i] + noise_vector[i]; |
| 294 | } |
| 295 | if (channel_ix == 0) { |
| 296 | output->AssertSize(current_lag); |
| 297 | } else { |
| 298 | assert(output->Size() == current_lag); |
| 299 | } |
| 300 | memcpy(&(*output)[channel_ix][0], temp_data, |
| 301 | sizeof(temp_data[0]) * current_lag); |
| 302 | } |
| 303 | |
| 304 | // Increase call number and cap it. |
turaj@webrtc.org | 8d1cdaa | 2014-04-11 18:47:55 +0000 | [diff] [blame] | 305 | consecutive_expands_ = consecutive_expands_ >= kMaxConsecutiveExpands ? |
| 306 | kMaxConsecutiveExpands : consecutive_expands_ + 1; |
Henrik Lundin | bef77e2 | 2015-08-18 14:58:09 +0200 | [diff] [blame^] | 307 | expand_duration_samples_ += output->Size(); |
| 308 | // Clamp the duration counter at 2 seconds. |
| 309 | expand_duration_samples_ = |
| 310 | std::min(expand_duration_samples_, rtc::checked_cast<size_t>(fs_hz_ * 2)); |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 311 | return 0; |
| 312 | } |
| 313 | |
| 314 | void Expand::SetParametersForNormalAfterExpand() { |
| 315 | current_lag_index_ = 0; |
| 316 | lag_index_direction_ = 0; |
| 317 | stop_muting_ = true; // Do not mute signal any more. |
Henrik Lundin | bef77e2 | 2015-08-18 14:58:09 +0200 | [diff] [blame^] | 318 | statistics_->LogDelayedPacketOutageEvent( |
| 319 | rtc::checked_cast<int>(expand_duration_samples_) / (fs_hz_ / 1000)); |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 320 | } |
| 321 | |
| 322 | void Expand::SetParametersForMergeAfterExpand() { |
| 323 | current_lag_index_ = -1; /* out of the 3 possible ones */ |
| 324 | lag_index_direction_ = 1; /* make sure we get the "optimal" lag */ |
| 325 | stop_muting_ = true; |
| 326 | } |
| 327 | |
Karl Wiberg | 7f6c4d4 | 2015-04-09 15:44:22 +0200 | [diff] [blame] | 328 | size_t Expand::overlap_length() const { |
| 329 | return overlap_length_; |
| 330 | } |
| 331 | |
turaj@webrtc.org | 8d1cdaa | 2014-04-11 18:47:55 +0000 | [diff] [blame] | 332 | void Expand::InitializeForAnExpandPeriod() { |
| 333 | lag_index_direction_ = 1; |
| 334 | current_lag_index_ = -1; |
| 335 | stop_muting_ = false; |
| 336 | random_vector_->set_seed_increment(1); |
| 337 | consecutive_expands_ = 0; |
| 338 | for (size_t ix = 0; ix < num_channels_; ++ix) { |
| 339 | channel_parameters_[ix].current_voice_mix_factor = 16384; // 1.0 in Q14. |
| 340 | channel_parameters_[ix].mute_factor = 16384; // 1.0 in Q14. |
| 341 | // Start with 0 gain for background noise. |
| 342 | background_noise_->SetMuteFactor(ix, 0); |
| 343 | } |
| 344 | } |
| 345 | |
| 346 | bool Expand::TooManyExpands() { |
| 347 | return consecutive_expands_ >= kMaxConsecutiveExpands; |
| 348 | } |
| 349 | |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 350 | void Expand::AnalyzeSignal(int16_t* random_vector) { |
| 351 | int32_t auto_correlation[kUnvoicedLpcOrder + 1]; |
| 352 | int16_t reflection_coeff[kUnvoicedLpcOrder]; |
| 353 | int16_t correlation_vector[kMaxSampleRate / 8000 * 102]; |
| 354 | int best_correlation_index[kNumCorrelationCandidates]; |
| 355 | int16_t best_correlation[kNumCorrelationCandidates]; |
| 356 | int16_t best_distortion_index[kNumCorrelationCandidates]; |
| 357 | int16_t best_distortion[kNumCorrelationCandidates]; |
| 358 | int32_t correlation_vector2[(99 * kMaxSampleRate / 8000) + 1]; |
| 359 | int32_t best_distortion_w32[kNumCorrelationCandidates]; |
| 360 | static const int kNoiseLpcOrder = BackgroundNoise::kMaxLpcOrder; |
| 361 | int16_t unvoiced_array_memory[kNoiseLpcOrder + kMaxSampleRate / 8000 * 125]; |
| 362 | int16_t* unvoiced_vector = unvoiced_array_memory + kUnvoicedLpcOrder; |
| 363 | |
| 364 | int fs_mult = fs_hz_ / 8000; |
| 365 | |
| 366 | // Pre-calculate common multiplications with fs_mult. |
| 367 | int fs_mult_4 = fs_mult * 4; |
| 368 | int fs_mult_20 = fs_mult * 20; |
| 369 | int fs_mult_120 = fs_mult * 120; |
| 370 | int fs_mult_dist_len = fs_mult * kDistortionLength; |
| 371 | int fs_mult_lpc_analysis_len = fs_mult * kLpcAnalysisLength; |
| 372 | |
| 373 | const size_t signal_length = 256 * fs_mult; |
| 374 | const int16_t* audio_history = |
| 375 | &(*sync_buffer_)[0][sync_buffer_->Size() - signal_length]; |
| 376 | |
turaj@webrtc.org | 8d1cdaa | 2014-04-11 18:47:55 +0000 | [diff] [blame] | 377 | // Initialize. |
| 378 | InitializeForAnExpandPeriod(); |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 379 | |
| 380 | // Calculate correlation in downsampled domain (4 kHz sample rate). |
Peter Kasting | 36b7cc3 | 2015-06-11 19:57:18 -0700 | [diff] [blame] | 381 | int correlation_scale; |
turaj@webrtc.org | 58cd316 | 2013-10-31 15:15:55 +0000 | [diff] [blame] | 382 | int correlation_length = 51; // TODO(hlundin): Legacy bit-exactness. |
| 383 | // If it is decided to break bit-exactness |correlation_length| should be |
| 384 | // initialized to the return value of Correlation(). |
| 385 | Correlation(audio_history, signal_length, correlation_vector, |
| 386 | &correlation_scale); |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 387 | |
| 388 | // Find peaks in correlation vector. |
| 389 | DspHelper::PeakDetection(correlation_vector, correlation_length, |
| 390 | kNumCorrelationCandidates, fs_mult, |
| 391 | best_correlation_index, best_correlation); |
| 392 | |
| 393 | // Adjust peak locations; cross-correlation lags start at 2.5 ms |
| 394 | // (20 * fs_mult samples). |
| 395 | best_correlation_index[0] += fs_mult_20; |
| 396 | best_correlation_index[1] += fs_mult_20; |
| 397 | best_correlation_index[2] += fs_mult_20; |
| 398 | |
| 399 | // Calculate distortion around the |kNumCorrelationCandidates| best lags. |
| 400 | int distortion_scale = 0; |
| 401 | for (int i = 0; i < kNumCorrelationCandidates; i++) { |
| 402 | int16_t min_index = std::max(fs_mult_20, |
| 403 | best_correlation_index[i] - fs_mult_4); |
| 404 | int16_t max_index = std::min(fs_mult_120 - 1, |
| 405 | best_correlation_index[i] + fs_mult_4); |
| 406 | best_distortion_index[i] = DspHelper::MinDistortion( |
| 407 | &(audio_history[signal_length - fs_mult_dist_len]), min_index, |
| 408 | max_index, fs_mult_dist_len, &best_distortion_w32[i]); |
| 409 | distortion_scale = std::max(16 - WebRtcSpl_NormW32(best_distortion_w32[i]), |
| 410 | distortion_scale); |
| 411 | } |
| 412 | // Shift the distortion values to fit in 16 bits. |
| 413 | WebRtcSpl_VectorBitShiftW32ToW16(best_distortion, kNumCorrelationCandidates, |
| 414 | best_distortion_w32, distortion_scale); |
| 415 | |
| 416 | // Find the maximizing index |i| of the cost function |
| 417 | // f[i] = best_correlation[i] / best_distortion[i]. |
turaj@webrtc.org | 58cd316 | 2013-10-31 15:15:55 +0000 | [diff] [blame] | 418 | int32_t best_ratio = std::numeric_limits<int32_t>::min(); |
Peter Kasting | f045e4d | 2015-06-10 21:15:38 -0700 | [diff] [blame] | 419 | int best_index = std::numeric_limits<int>::max(); |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 420 | for (int i = 0; i < kNumCorrelationCandidates; ++i) { |
| 421 | int32_t ratio; |
| 422 | if (best_distortion[i] > 0) { |
| 423 | ratio = (best_correlation[i] << 16) / best_distortion[i]; |
turaj@webrtc.org | 7126b38 | 2013-07-31 16:05:09 +0000 | [diff] [blame] | 424 | } else if (best_correlation[i] == 0) { |
| 425 | ratio = 0; // No correlation set result to zero. |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 426 | } else { |
turaj@webrtc.org | 7126b38 | 2013-07-31 16:05:09 +0000 | [diff] [blame] | 427 | ratio = std::numeric_limits<int32_t>::max(); // Denominator is zero. |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 428 | } |
| 429 | if (ratio > best_ratio) { |
| 430 | best_index = i; |
| 431 | best_ratio = ratio; |
| 432 | } |
| 433 | } |
| 434 | |
| 435 | int distortion_lag = best_distortion_index[best_index]; |
| 436 | int correlation_lag = best_correlation_index[best_index]; |
| 437 | max_lag_ = std::max(distortion_lag, correlation_lag); |
| 438 | |
| 439 | // Calculate the exact best correlation in the range between |
| 440 | // |correlation_lag| and |distortion_lag|. |
Peter Kasting | 728d903 | 2015-06-11 14:31:38 -0700 | [diff] [blame] | 441 | correlation_length = |
| 442 | std::max(std::min(distortion_lag + 10, fs_mult_120), 60 * fs_mult); |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 443 | |
| 444 | int start_index = std::min(distortion_lag, correlation_lag); |
Peter Kasting | 728d903 | 2015-06-11 14:31:38 -0700 | [diff] [blame] | 445 | int correlation_lags = |
| 446 | WEBRTC_SPL_ABS_W16((distortion_lag-correlation_lag)) + 1; |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 447 | assert(correlation_lags <= 99 * fs_mult + 1); // Cannot be larger. |
| 448 | |
| 449 | for (size_t channel_ix = 0; channel_ix < num_channels_; ++channel_ix) { |
| 450 | ChannelParameters& parameters = channel_parameters_[channel_ix]; |
| 451 | // Calculate suitable scaling. |
| 452 | int16_t signal_max = WebRtcSpl_MaxAbsValueW16( |
| 453 | &audio_history[signal_length - correlation_length - start_index |
| 454 | - correlation_lags], |
| 455 | correlation_length + start_index + correlation_lags - 1); |
pkasting | b297c5a | 2015-07-22 15:17:22 -0700 | [diff] [blame] | 456 | correlation_scale = (31 - WebRtcSpl_NormW32(signal_max * signal_max)) + |
| 457 | (31 - WebRtcSpl_NormW32(correlation_length)) - 31; |
Peter Kasting | 36b7cc3 | 2015-06-11 19:57:18 -0700 | [diff] [blame] | 458 | correlation_scale = std::max(0, correlation_scale); |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 459 | |
| 460 | // Calculate the correlation, store in |correlation_vector2|. |
| 461 | WebRtcSpl_CrossCorrelation( |
| 462 | correlation_vector2, |
| 463 | &(audio_history[signal_length - correlation_length]), |
| 464 | &(audio_history[signal_length - correlation_length - start_index]), |
| 465 | correlation_length, correlation_lags, correlation_scale, -1); |
| 466 | |
| 467 | // Find maximizing index. |
| 468 | best_index = WebRtcSpl_MaxIndexW32(correlation_vector2, correlation_lags); |
| 469 | int32_t max_correlation = correlation_vector2[best_index]; |
| 470 | // Compensate index with start offset. |
| 471 | best_index = best_index + start_index; |
| 472 | |
| 473 | // Calculate energies. |
| 474 | int32_t energy1 = WebRtcSpl_DotProductWithScale( |
| 475 | &(audio_history[signal_length - correlation_length]), |
| 476 | &(audio_history[signal_length - correlation_length]), |
| 477 | correlation_length, correlation_scale); |
| 478 | int32_t energy2 = WebRtcSpl_DotProductWithScale( |
| 479 | &(audio_history[signal_length - correlation_length - best_index]), |
| 480 | &(audio_history[signal_length - correlation_length - best_index]), |
| 481 | correlation_length, correlation_scale); |
| 482 | |
| 483 | // Calculate the correlation coefficient between the two portions of the |
| 484 | // signal. |
Peter Kasting | 36b7cc3 | 2015-06-11 19:57:18 -0700 | [diff] [blame] | 485 | int32_t corr_coefficient; |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 486 | if ((energy1 > 0) && (energy2 > 0)) { |
| 487 | int energy1_scale = std::max(16 - WebRtcSpl_NormW32(energy1), 0); |
| 488 | int energy2_scale = std::max(16 - WebRtcSpl_NormW32(energy2), 0); |
| 489 | // Make sure total scaling is even (to simplify scale factor after sqrt). |
| 490 | if ((energy1_scale + energy2_scale) & 1) { |
| 491 | // If sum is odd, add 1 to make it even. |
| 492 | energy1_scale += 1; |
| 493 | } |
Peter Kasting | 36b7cc3 | 2015-06-11 19:57:18 -0700 | [diff] [blame] | 494 | int32_t scaled_energy1 = energy1 >> energy1_scale; |
| 495 | int32_t scaled_energy2 = energy2 >> energy2_scale; |
| 496 | int16_t sqrt_energy_product = static_cast<int16_t>( |
| 497 | WebRtcSpl_SqrtFloor(scaled_energy1 * scaled_energy2)); |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 498 | // Calculate max_correlation / sqrt(energy1 * energy2) in Q14. |
| 499 | int cc_shift = 14 - (energy1_scale + energy2_scale) / 2; |
| 500 | max_correlation = WEBRTC_SPL_SHIFT_W32(max_correlation, cc_shift); |
| 501 | corr_coefficient = WebRtcSpl_DivW32W16(max_correlation, |
| 502 | sqrt_energy_product); |
Peter Kasting | 36b7cc3 | 2015-06-11 19:57:18 -0700 | [diff] [blame] | 503 | // Cap at 1.0 in Q14. |
| 504 | corr_coefficient = std::min(16384, corr_coefficient); |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 505 | } else { |
| 506 | corr_coefficient = 0; |
| 507 | } |
| 508 | |
| 509 | // Extract the two vectors expand_vector0 and expand_vector1 from |
| 510 | // |audio_history|. |
turaj@webrtc.org | 362a55e | 2013-09-20 16:25:28 +0000 | [diff] [blame] | 511 | int16_t expansion_length = static_cast<int16_t>(max_lag_ + overlap_length_); |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 512 | const int16_t* vector1 = &(audio_history[signal_length - expansion_length]); |
| 513 | const int16_t* vector2 = vector1 - distortion_lag; |
| 514 | // Normalize the second vector to the same energy as the first. |
| 515 | energy1 = WebRtcSpl_DotProductWithScale(vector1, vector1, expansion_length, |
| 516 | correlation_scale); |
| 517 | energy2 = WebRtcSpl_DotProductWithScale(vector2, vector2, expansion_length, |
| 518 | correlation_scale); |
| 519 | // Confirm that amplitude ratio sqrt(energy1 / energy2) is within 0.5 - 2.0, |
| 520 | // i.e., energy1 / energy1 is within 0.25 - 4. |
| 521 | int16_t amplitude_ratio; |
| 522 | if ((energy1 / 4 < energy2) && (energy1 > energy2 / 4)) { |
| 523 | // Energy constraint fulfilled. Use both vectors and scale them |
| 524 | // accordingly. |
Peter Kasting | 36b7cc3 | 2015-06-11 19:57:18 -0700 | [diff] [blame] | 525 | int32_t scaled_energy2 = std::max(16 - WebRtcSpl_NormW32(energy2), 0); |
| 526 | int32_t scaled_energy1 = scaled_energy2 - 13; |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 527 | // Calculate scaled_energy1 / scaled_energy2 in Q13. |
| 528 | int32_t energy_ratio = WebRtcSpl_DivW32W16( |
| 529 | WEBRTC_SPL_SHIFT_W32(energy1, -scaled_energy1), |
bjornv@webrtc.org | a5ce7bb | 2014-10-20 08:24:54 +0000 | [diff] [blame] | 530 | energy2 >> scaled_energy2); |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 531 | // Calculate sqrt ratio in Q13 (sqrt of en1/en2 in Q26). |
| 532 | amplitude_ratio = WebRtcSpl_SqrtFloor(energy_ratio << 13); |
| 533 | // Copy the two vectors and give them the same energy. |
| 534 | parameters.expand_vector0.Clear(); |
| 535 | parameters.expand_vector0.PushBack(vector1, expansion_length); |
| 536 | parameters.expand_vector1.Clear(); |
| 537 | if (parameters.expand_vector1.Size() < |
| 538 | static_cast<size_t>(expansion_length)) { |
| 539 | parameters.expand_vector1.Extend( |
| 540 | expansion_length - parameters.expand_vector1.Size()); |
| 541 | } |
| 542 | WebRtcSpl_AffineTransformVector(¶meters.expand_vector1[0], |
| 543 | const_cast<int16_t*>(vector2), |
| 544 | amplitude_ratio, |
| 545 | 4096, |
| 546 | 13, |
| 547 | expansion_length); |
| 548 | } else { |
| 549 | // Energy change constraint not fulfilled. Only use last vector. |
| 550 | parameters.expand_vector0.Clear(); |
| 551 | parameters.expand_vector0.PushBack(vector1, expansion_length); |
| 552 | // Copy from expand_vector0 to expand_vector1. |
henrik.lundin@webrtc.org | f6ab6f8 | 2014-09-04 10:58:43 +0000 | [diff] [blame] | 553 | parameters.expand_vector0.CopyTo(¶meters.expand_vector1); |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 554 | // Set the energy_ratio since it is used by muting slope. |
| 555 | if ((energy1 / 4 < energy2) || (energy2 == 0)) { |
| 556 | amplitude_ratio = 4096; // 0.5 in Q13. |
| 557 | } else { |
| 558 | amplitude_ratio = 16384; // 2.0 in Q13. |
| 559 | } |
| 560 | } |
| 561 | |
| 562 | // Set the 3 lag values. |
Peter Kasting | f045e4d | 2015-06-10 21:15:38 -0700 | [diff] [blame] | 563 | if (distortion_lag == correlation_lag) { |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 564 | expand_lags_[0] = distortion_lag; |
| 565 | expand_lags_[1] = distortion_lag; |
| 566 | expand_lags_[2] = distortion_lag; |
| 567 | } else { |
| 568 | // |distortion_lag| and |correlation_lag| are not equal; use different |
| 569 | // combinations of the two. |
| 570 | // First lag is |distortion_lag| only. |
| 571 | expand_lags_[0] = distortion_lag; |
| 572 | // Second lag is the average of the two. |
| 573 | expand_lags_[1] = (distortion_lag + correlation_lag) / 2; |
| 574 | // Third lag is the average again, but rounding towards |correlation_lag|. |
Peter Kasting | f045e4d | 2015-06-10 21:15:38 -0700 | [diff] [blame] | 575 | if (distortion_lag > correlation_lag) { |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 576 | expand_lags_[2] = (distortion_lag + correlation_lag - 1) / 2; |
| 577 | } else { |
| 578 | expand_lags_[2] = (distortion_lag + correlation_lag + 1) / 2; |
| 579 | } |
| 580 | } |
| 581 | |
| 582 | // Calculate the LPC and the gain of the filters. |
| 583 | // Calculate scale value needed for auto-correlation. |
| 584 | correlation_scale = WebRtcSpl_MaxAbsValueW16( |
| 585 | &(audio_history[signal_length - fs_mult_lpc_analysis_len]), |
| 586 | fs_mult_lpc_analysis_len); |
| 587 | |
| 588 | correlation_scale = std::min(16 - WebRtcSpl_NormW32(correlation_scale), 0); |
| 589 | correlation_scale = std::max(correlation_scale * 2 + 7, 0); |
| 590 | |
| 591 | // Calculate kUnvoicedLpcOrder + 1 lags of the auto-correlation function. |
| 592 | size_t temp_index = signal_length - fs_mult_lpc_analysis_len - |
| 593 | kUnvoicedLpcOrder; |
| 594 | // Copy signal to temporary vector to be able to pad with leading zeros. |
| 595 | int16_t* temp_signal = new int16_t[fs_mult_lpc_analysis_len |
| 596 | + kUnvoicedLpcOrder]; |
| 597 | memset(temp_signal, 0, |
| 598 | sizeof(int16_t) * (fs_mult_lpc_analysis_len + kUnvoicedLpcOrder)); |
| 599 | memcpy(&temp_signal[kUnvoicedLpcOrder], |
| 600 | &audio_history[temp_index + kUnvoicedLpcOrder], |
| 601 | sizeof(int16_t) * fs_mult_lpc_analysis_len); |
| 602 | WebRtcSpl_CrossCorrelation(auto_correlation, |
| 603 | &temp_signal[kUnvoicedLpcOrder], |
| 604 | &temp_signal[kUnvoicedLpcOrder], |
| 605 | fs_mult_lpc_analysis_len, kUnvoicedLpcOrder + 1, |
| 606 | correlation_scale, -1); |
| 607 | delete [] temp_signal; |
| 608 | |
| 609 | // Verify that variance is positive. |
| 610 | if (auto_correlation[0] > 0) { |
| 611 | // Estimate AR filter parameters using Levinson-Durbin algorithm; |
| 612 | // kUnvoicedLpcOrder + 1 filter coefficients. |
| 613 | int16_t stability = WebRtcSpl_LevinsonDurbin(auto_correlation, |
| 614 | parameters.ar_filter, |
| 615 | reflection_coeff, |
| 616 | kUnvoicedLpcOrder); |
| 617 | |
| 618 | // Keep filter parameters only if filter is stable. |
| 619 | if (stability != 1) { |
| 620 | // Set first coefficient to 4096 (1.0 in Q12). |
| 621 | parameters.ar_filter[0] = 4096; |
| 622 | // Set remaining |kUnvoicedLpcOrder| coefficients to zero. |
| 623 | WebRtcSpl_MemSetW16(parameters.ar_filter + 1, 0, kUnvoicedLpcOrder); |
| 624 | } |
| 625 | } |
| 626 | |
| 627 | if (channel_ix == 0) { |
| 628 | // Extract a noise segment. |
| 629 | int16_t noise_length; |
| 630 | if (distortion_lag < 40) { |
| 631 | noise_length = 2 * distortion_lag + 30; |
| 632 | } else { |
| 633 | noise_length = distortion_lag + 30; |
| 634 | } |
| 635 | if (noise_length <= RandomVector::kRandomTableSize) { |
| 636 | memcpy(random_vector, RandomVector::kRandomTable, |
| 637 | sizeof(int16_t) * noise_length); |
| 638 | } else { |
| 639 | // Only applies to SWB where length could be larger than |
| 640 | // |kRandomTableSize|. |
| 641 | memcpy(random_vector, RandomVector::kRandomTable, |
| 642 | sizeof(int16_t) * RandomVector::kRandomTableSize); |
| 643 | assert(noise_length <= kMaxSampleRate / 8000 * 120 + 30); |
| 644 | random_vector_->IncreaseSeedIncrement(2); |
| 645 | random_vector_->Generate( |
| 646 | noise_length - RandomVector::kRandomTableSize, |
| 647 | &random_vector[RandomVector::kRandomTableSize]); |
| 648 | } |
| 649 | } |
| 650 | |
| 651 | // Set up state vector and calculate scale factor for unvoiced filtering. |
| 652 | memcpy(parameters.ar_filter_state, |
| 653 | &(audio_history[signal_length - kUnvoicedLpcOrder]), |
| 654 | sizeof(int16_t) * kUnvoicedLpcOrder); |
| 655 | memcpy(unvoiced_vector - kUnvoicedLpcOrder, |
| 656 | &(audio_history[signal_length - 128 - kUnvoicedLpcOrder]), |
| 657 | sizeof(int16_t) * kUnvoicedLpcOrder); |
bjornv@webrtc.org | c14e357 | 2015-01-12 05:50:52 +0000 | [diff] [blame] | 658 | WebRtcSpl_FilterMAFastQ12(&audio_history[signal_length - 128], |
| 659 | unvoiced_vector, |
| 660 | parameters.ar_filter, |
| 661 | kUnvoicedLpcOrder + 1, |
| 662 | 128); |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 663 | int16_t unvoiced_prescale; |
| 664 | if (WebRtcSpl_MaxAbsValueW16(unvoiced_vector, 128) > 4000) { |
| 665 | unvoiced_prescale = 4; |
| 666 | } else { |
| 667 | unvoiced_prescale = 0; |
| 668 | } |
| 669 | int32_t unvoiced_energy = WebRtcSpl_DotProductWithScale(unvoiced_vector, |
| 670 | unvoiced_vector, |
| 671 | 128, |
| 672 | unvoiced_prescale); |
| 673 | |
| 674 | // Normalize |unvoiced_energy| to 28 or 29 bits to preserve sqrt() accuracy. |
| 675 | int16_t unvoiced_scale = WebRtcSpl_NormW32(unvoiced_energy) - 3; |
| 676 | // Make sure we do an odd number of shifts since we already have 7 shifts |
| 677 | // from dividing with 128 earlier. This will make the total scale factor |
| 678 | // even, which is suitable for the sqrt. |
| 679 | unvoiced_scale += ((unvoiced_scale & 0x1) ^ 0x1); |
| 680 | unvoiced_energy = WEBRTC_SPL_SHIFT_W32(unvoiced_energy, unvoiced_scale); |
Peter Kasting | b7e5054 | 2015-06-11 12:55:50 -0700 | [diff] [blame] | 681 | int16_t unvoiced_gain = |
| 682 | static_cast<int16_t>(WebRtcSpl_SqrtFloor(unvoiced_energy)); |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 683 | parameters.ar_gain_scale = 13 |
| 684 | + (unvoiced_scale + 7 - unvoiced_prescale) / 2; |
| 685 | parameters.ar_gain = unvoiced_gain; |
| 686 | |
| 687 | // Calculate voice_mix_factor from corr_coefficient. |
| 688 | // Let x = corr_coefficient. Then, we compute: |
| 689 | // if (x > 0.48) |
| 690 | // voice_mix_factor = (-5179 + 19931x - 16422x^2 + 5776x^3) / 4096; |
| 691 | // else |
| 692 | // voice_mix_factor = 0; |
| 693 | if (corr_coefficient > 7875) { |
| 694 | int16_t x1, x2, x3; |
Peter Kasting | 36b7cc3 | 2015-06-11 19:57:18 -0700 | [diff] [blame] | 695 | // |corr_coefficient| is in Q14. |
| 696 | x1 = static_cast<int16_t>(corr_coefficient); |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 697 | x2 = (x1 * x1) >> 14; // Shift 14 to keep result in Q14. |
| 698 | x3 = (x1 * x2) >> 14; |
| 699 | static const int kCoefficients[4] = { -5179, 19931, -16422, 5776 }; |
| 700 | int32_t temp_sum = kCoefficients[0] << 14; |
| 701 | temp_sum += kCoefficients[1] * x1; |
| 702 | temp_sum += kCoefficients[2] * x2; |
| 703 | temp_sum += kCoefficients[3] * x3; |
Peter Kasting | f045e4d | 2015-06-10 21:15:38 -0700 | [diff] [blame] | 704 | parameters.voice_mix_factor = |
| 705 | static_cast<int16_t>(std::min(temp_sum / 4096, 16384)); |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 706 | parameters.voice_mix_factor = std::max(parameters.voice_mix_factor, |
| 707 | static_cast<int16_t>(0)); |
| 708 | } else { |
| 709 | parameters.voice_mix_factor = 0; |
| 710 | } |
| 711 | |
| 712 | // Calculate muting slope. Reuse value from earlier scaling of |
| 713 | // |expand_vector0| and |expand_vector1|. |
| 714 | int16_t slope = amplitude_ratio; |
| 715 | if (slope > 12288) { |
| 716 | // slope > 1.5. |
| 717 | // Calculate (1 - (1 / slope)) / distortion_lag = |
| 718 | // (slope - 1) / (distortion_lag * slope). |
| 719 | // |slope| is in Q13, so 1 corresponds to 8192. Shift up to Q25 before |
| 720 | // the division. |
| 721 | // Shift the denominator from Q13 to Q5 before the division. The result of |
| 722 | // the division will then be in Q20. |
Peter Kasting | 36b7cc3 | 2015-06-11 19:57:18 -0700 | [diff] [blame] | 723 | int temp_ratio = WebRtcSpl_DivW32W16( |
Peter Kasting | b7e5054 | 2015-06-11 12:55:50 -0700 | [diff] [blame] | 724 | (slope - 8192) << 12, |
| 725 | static_cast<int16_t>((distortion_lag * slope) >> 8)); |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 726 | if (slope > 14746) { |
| 727 | // slope > 1.8. |
| 728 | // Divide by 2, with proper rounding. |
| 729 | parameters.mute_slope = (temp_ratio + 1) / 2; |
| 730 | } else { |
| 731 | // Divide by 8, with proper rounding. |
| 732 | parameters.mute_slope = (temp_ratio + 4) / 8; |
| 733 | } |
| 734 | parameters.onset = true; |
| 735 | } else { |
| 736 | // Calculate (1 - slope) / distortion_lag. |
| 737 | // Shift |slope| by 7 to Q20 before the division. The result is in Q20. |
Peter Kasting | b7e5054 | 2015-06-11 12:55:50 -0700 | [diff] [blame] | 738 | parameters.mute_slope = WebRtcSpl_DivW32W16( |
| 739 | (8192 - slope) << 7, static_cast<int16_t>(distortion_lag)); |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 740 | if (parameters.voice_mix_factor <= 13107) { |
| 741 | // Make sure the mute factor decreases from 1.0 to 0.9 in no more than |
| 742 | // 6.25 ms. |
| 743 | // mute_slope >= 0.005 / fs_mult in Q20. |
Peter Kasting | 36b7cc3 | 2015-06-11 19:57:18 -0700 | [diff] [blame] | 744 | parameters.mute_slope = std::max(5243 / fs_mult, parameters.mute_slope); |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 745 | } else if (slope > 8028) { |
| 746 | parameters.mute_slope = 0; |
| 747 | } |
| 748 | parameters.onset = false; |
| 749 | } |
| 750 | } |
| 751 | } |
| 752 | |
Karl Wiberg | 7f6c4d4 | 2015-04-09 15:44:22 +0200 | [diff] [blame] | 753 | Expand::ChannelParameters::ChannelParameters() |
| 754 | : mute_factor(16384), |
| 755 | ar_gain(0), |
| 756 | ar_gain_scale(0), |
| 757 | voice_mix_factor(0), |
| 758 | current_voice_mix_factor(0), |
| 759 | onset(false), |
| 760 | mute_slope(0) { |
| 761 | memset(ar_filter, 0, sizeof(ar_filter)); |
| 762 | memset(ar_filter_state, 0, sizeof(ar_filter_state)); |
| 763 | } |
| 764 | |
Peter Kasting | 728d903 | 2015-06-11 14:31:38 -0700 | [diff] [blame] | 765 | void Expand::Correlation(const int16_t* input, |
| 766 | size_t input_length, |
| 767 | int16_t* output, |
Peter Kasting | 36b7cc3 | 2015-06-11 19:57:18 -0700 | [diff] [blame] | 768 | int* output_scale) const { |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 769 | // Set parameters depending on sample rate. |
| 770 | const int16_t* filter_coefficients; |
| 771 | int16_t num_coefficients; |
| 772 | int16_t downsampling_factor; |
| 773 | if (fs_hz_ == 8000) { |
| 774 | num_coefficients = 3; |
| 775 | downsampling_factor = 2; |
| 776 | filter_coefficients = DspHelper::kDownsample8kHzTbl; |
| 777 | } else if (fs_hz_ == 16000) { |
| 778 | num_coefficients = 5; |
| 779 | downsampling_factor = 4; |
| 780 | filter_coefficients = DspHelper::kDownsample16kHzTbl; |
| 781 | } else if (fs_hz_ == 32000) { |
| 782 | num_coefficients = 7; |
| 783 | downsampling_factor = 8; |
| 784 | filter_coefficients = DspHelper::kDownsample32kHzTbl; |
| 785 | } else { // fs_hz_ == 48000. |
| 786 | num_coefficients = 7; |
| 787 | downsampling_factor = 12; |
| 788 | filter_coefficients = DspHelper::kDownsample48kHzTbl; |
| 789 | } |
| 790 | |
| 791 | // Correlate from lag 10 to lag 60 in downsampled domain. |
| 792 | // (Corresponds to 20-120 for narrow-band, 40-240 for wide-band, and so on.) |
| 793 | static const int kCorrelationStartLag = 10; |
| 794 | static const int kNumCorrelationLags = 54; |
| 795 | static const int kCorrelationLength = 60; |
| 796 | // Downsample to 4 kHz sample rate. |
| 797 | static const int kDownsampledLength = kCorrelationStartLag |
| 798 | + kNumCorrelationLags + kCorrelationLength; |
| 799 | int16_t downsampled_input[kDownsampledLength]; |
| 800 | static const int kFilterDelay = 0; |
| 801 | WebRtcSpl_DownsampleFast( |
| 802 | input + input_length - kDownsampledLength * downsampling_factor, |
| 803 | kDownsampledLength * downsampling_factor, downsampled_input, |
| 804 | kDownsampledLength, filter_coefficients, num_coefficients, |
| 805 | downsampling_factor, kFilterDelay); |
| 806 | |
| 807 | // Normalize |downsampled_input| to using all 16 bits. |
| 808 | int16_t max_value = WebRtcSpl_MaxAbsValueW16(downsampled_input, |
| 809 | kDownsampledLength); |
| 810 | int16_t norm_shift = 16 - WebRtcSpl_NormW32(max_value); |
| 811 | WebRtcSpl_VectorBitShiftW16(downsampled_input, kDownsampledLength, |
| 812 | downsampled_input, norm_shift); |
| 813 | |
| 814 | int32_t correlation[kNumCorrelationLags]; |
| 815 | static const int kCorrelationShift = 6; |
| 816 | WebRtcSpl_CrossCorrelation( |
| 817 | correlation, |
| 818 | &downsampled_input[kDownsampledLength - kCorrelationLength], |
| 819 | &downsampled_input[kDownsampledLength - kCorrelationLength |
| 820 | - kCorrelationStartLag], |
| 821 | kCorrelationLength, kNumCorrelationLags, kCorrelationShift, -1); |
| 822 | |
| 823 | // Normalize and move data from 32-bit to 16-bit vector. |
| 824 | int32_t max_correlation = WebRtcSpl_MaxAbsValueW32(correlation, |
| 825 | kNumCorrelationLags); |
Peter Kasting | b7e5054 | 2015-06-11 12:55:50 -0700 | [diff] [blame] | 826 | int16_t norm_shift2 = static_cast<int16_t>( |
| 827 | std::max(18 - WebRtcSpl_NormW32(max_correlation), 0)); |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 828 | WebRtcSpl_VectorBitShiftW32ToW16(output, kNumCorrelationLags, correlation, |
| 829 | norm_shift2); |
| 830 | // Total scale factor (right shifts) of correlation value. |
| 831 | *output_scale = 2 * norm_shift + kCorrelationShift + norm_shift2; |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 832 | } |
| 833 | |
| 834 | void Expand::UpdateLagIndex() { |
| 835 | current_lag_index_ = current_lag_index_ + lag_index_direction_; |
| 836 | // Change direction if needed. |
| 837 | if (current_lag_index_ <= 0) { |
| 838 | lag_index_direction_ = 1; |
| 839 | } |
| 840 | if (current_lag_index_ >= kNumLags - 1) { |
| 841 | lag_index_direction_ = -1; |
| 842 | } |
| 843 | } |
| 844 | |
henrik.lundin@webrtc.org | d9faa46 | 2014-01-14 10:18:45 +0000 | [diff] [blame] | 845 | Expand* ExpandFactory::Create(BackgroundNoise* background_noise, |
| 846 | SyncBuffer* sync_buffer, |
| 847 | RandomVector* random_vector, |
Henrik Lundin | bef77e2 | 2015-08-18 14:58:09 +0200 | [diff] [blame^] | 848 | StatisticsCalculator* statistics, |
henrik.lundin@webrtc.org | d9faa46 | 2014-01-14 10:18:45 +0000 | [diff] [blame] | 849 | int fs, |
| 850 | size_t num_channels) const { |
Henrik Lundin | bef77e2 | 2015-08-18 14:58:09 +0200 | [diff] [blame^] | 851 | return new Expand(background_noise, sync_buffer, random_vector, statistics, |
| 852 | fs, num_channels); |
henrik.lundin@webrtc.org | d9faa46 | 2014-01-14 10:18:45 +0000 | [diff] [blame] | 853 | } |
| 854 | |
turaj@webrtc.org | 8d1cdaa | 2014-04-11 18:47:55 +0000 | [diff] [blame] | 855 | // TODO(turajs): This can be moved to BackgroundNoise class. |
| 856 | void Expand::GenerateBackgroundNoise(int16_t* random_vector, |
| 857 | size_t channel, |
Peter Kasting | 36b7cc3 | 2015-06-11 19:57:18 -0700 | [diff] [blame] | 858 | int mute_slope, |
turaj@webrtc.org | 8d1cdaa | 2014-04-11 18:47:55 +0000 | [diff] [blame] | 859 | bool too_many_expands, |
| 860 | size_t num_noise_samples, |
| 861 | int16_t* buffer) { |
| 862 | static const int kNoiseLpcOrder = BackgroundNoise::kMaxLpcOrder; |
| 863 | int16_t scaled_random_vector[kMaxSampleRate / 8000 * 125]; |
Peter Kasting | 728d903 | 2015-06-11 14:31:38 -0700 | [diff] [blame] | 864 | assert(num_noise_samples <= static_cast<size_t>(kMaxSampleRate / 8000 * 125)); |
turaj@webrtc.org | 8d1cdaa | 2014-04-11 18:47:55 +0000 | [diff] [blame] | 865 | int16_t* noise_samples = &buffer[kNoiseLpcOrder]; |
| 866 | if (background_noise_->initialized()) { |
| 867 | // Use background noise parameters. |
| 868 | memcpy(noise_samples - kNoiseLpcOrder, |
| 869 | background_noise_->FilterState(channel), |
| 870 | sizeof(int16_t) * kNoiseLpcOrder); |
| 871 | |
| 872 | int dc_offset = 0; |
| 873 | if (background_noise_->ScaleShift(channel) > 1) { |
| 874 | dc_offset = 1 << (background_noise_->ScaleShift(channel) - 1); |
| 875 | } |
| 876 | |
| 877 | // Scale random vector to correct energy level. |
| 878 | WebRtcSpl_AffineTransformVector( |
| 879 | scaled_random_vector, random_vector, |
| 880 | background_noise_->Scale(channel), dc_offset, |
| 881 | background_noise_->ScaleShift(channel), |
| 882 | static_cast<int>(num_noise_samples)); |
| 883 | |
| 884 | WebRtcSpl_FilterARFastQ12(scaled_random_vector, noise_samples, |
| 885 | background_noise_->Filter(channel), |
| 886 | kNoiseLpcOrder + 1, |
| 887 | static_cast<int>(num_noise_samples)); |
| 888 | |
| 889 | background_noise_->SetFilterState( |
| 890 | channel, |
| 891 | &(noise_samples[num_noise_samples - kNoiseLpcOrder]), |
| 892 | kNoiseLpcOrder); |
| 893 | |
| 894 | // Unmute the background noise. |
| 895 | int16_t bgn_mute_factor = background_noise_->MuteFactor(channel); |
henrik.lundin@webrtc.org | ea25784 | 2014-08-07 12:27:37 +0000 | [diff] [blame] | 896 | NetEq::BackgroundNoiseMode bgn_mode = background_noise_->mode(); |
| 897 | if (bgn_mode == NetEq::kBgnFade && too_many_expands && |
| 898 | bgn_mute_factor > 0) { |
turaj@webrtc.org | 8d1cdaa | 2014-04-11 18:47:55 +0000 | [diff] [blame] | 899 | // Fade BGN to zero. |
| 900 | // Calculate muting slope, approximately -2^18 / fs_hz. |
Peter Kasting | 36b7cc3 | 2015-06-11 19:57:18 -0700 | [diff] [blame] | 901 | int mute_slope; |
turaj@webrtc.org | 8d1cdaa | 2014-04-11 18:47:55 +0000 | [diff] [blame] | 902 | if (fs_hz_ == 8000) { |
| 903 | mute_slope = -32; |
| 904 | } else if (fs_hz_ == 16000) { |
| 905 | mute_slope = -16; |
| 906 | } else if (fs_hz_ == 32000) { |
| 907 | mute_slope = -8; |
| 908 | } else { |
| 909 | mute_slope = -5; |
| 910 | } |
| 911 | // Use UnmuteSignal function with negative slope. |
| 912 | // |bgn_mute_factor| is in Q14. |mute_slope| is in Q20. |
| 913 | DspHelper::UnmuteSignal(noise_samples, |
| 914 | num_noise_samples, |
| 915 | &bgn_mute_factor, |
| 916 | mute_slope, |
| 917 | noise_samples); |
| 918 | } else if (bgn_mute_factor < 16384) { |
henrik.lundin@webrtc.org | 023f12f | 2014-08-13 09:45:40 +0000 | [diff] [blame] | 919 | // If mode is kBgnOn, or if kBgnFade has started fading, |
| 920 | // use regular |mute_slope|. |
henrik.lundin@webrtc.org | ea25784 | 2014-08-07 12:27:37 +0000 | [diff] [blame] | 921 | if (!stop_muting_ && bgn_mode != NetEq::kBgnOff && |
| 922 | !(bgn_mode == NetEq::kBgnFade && too_many_expands)) { |
turaj@webrtc.org | 8d1cdaa | 2014-04-11 18:47:55 +0000 | [diff] [blame] | 923 | DspHelper::UnmuteSignal(noise_samples, |
| 924 | static_cast<int>(num_noise_samples), |
| 925 | &bgn_mute_factor, |
| 926 | mute_slope, |
| 927 | noise_samples); |
| 928 | } else { |
| 929 | // kBgnOn and stop muting, or |
| 930 | // kBgnOff (mute factor is always 0), or |
| 931 | // kBgnFade has reached 0. |
| 932 | WebRtcSpl_AffineTransformVector(noise_samples, noise_samples, |
| 933 | bgn_mute_factor, 8192, 14, |
| 934 | static_cast<int>(num_noise_samples)); |
| 935 | } |
| 936 | } |
| 937 | // Update mute_factor in BackgroundNoise class. |
| 938 | background_noise_->SetMuteFactor(channel, bgn_mute_factor); |
| 939 | } else { |
| 940 | // BGN parameters have not been initialized; use zero noise. |
| 941 | memset(noise_samples, 0, sizeof(int16_t) * num_noise_samples); |
| 942 | } |
| 943 | } |
| 944 | |
Peter Kasting | b7e5054 | 2015-06-11 12:55:50 -0700 | [diff] [blame] | 945 | void Expand::GenerateRandomVector(int16_t seed_increment, |
turaj@webrtc.org | 8d1cdaa | 2014-04-11 18:47:55 +0000 | [diff] [blame] | 946 | size_t length, |
| 947 | int16_t* random_vector) { |
| 948 | // TODO(turajs): According to hlundin The loop should not be needed. Should be |
| 949 | // just as good to generate all of the vector in one call. |
| 950 | size_t samples_generated = 0; |
| 951 | const size_t kMaxRandSamples = RandomVector::kRandomTableSize; |
henrik.lundin@webrtc.org | ea25784 | 2014-08-07 12:27:37 +0000 | [diff] [blame] | 952 | while (samples_generated < length) { |
turaj@webrtc.org | 8d1cdaa | 2014-04-11 18:47:55 +0000 | [diff] [blame] | 953 | size_t rand_length = std::min(length - samples_generated, kMaxRandSamples); |
| 954 | random_vector_->IncreaseSeedIncrement(seed_increment); |
| 955 | random_vector_->Generate(rand_length, &random_vector[samples_generated]); |
| 956 | samples_generated += rand_length; |
| 957 | } |
| 958 | } |
henrik.lundin@webrtc.org | d9faa46 | 2014-01-14 10:18:45 +0000 | [diff] [blame] | 959 | |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 960 | } // namespace webrtc |