blob: 2acaf2bdee503982e195e1ccd135b1b481061082 [file] [log] [blame]
ossu97ba30e2016-04-25 07:55:58 -07001/*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020011#include "modules/audio_coding/codecs/cng/webrtc_cng.h"
ossu97ba30e2016-04-25 07:55:58 -070012
13#include <algorithm>
14
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020015#include "common_audio/signal_processing/include/signal_processing_library.h"
Yves Gerey988cc082018-10-23 12:03:01 +020016#include "rtc_base/checks.h"
Karl Wiberge40468b2017-11-22 10:42:26 +010017#include "rtc_base/numerics/safe_conversions.h"
ossu97ba30e2016-04-25 07:55:58 -070018
19namespace webrtc {
20
21namespace {
22
23const size_t kCngMaxOutsizeOrder = 640;
24
25// TODO(ossu): Rename the left-over WebRtcCng according to style guide.
26void WebRtcCng_K2a16(int16_t* k, int useOrder, int16_t* a);
27
28const int32_t WebRtcCng_kDbov[94] = {
Yves Gerey665174f2018-06-19 15:03:05 +020029 1081109975, 858756178, 682134279, 541838517, 430397633, 341876992,
30 271562548, 215709799, 171344384, 136103682, 108110997, 85875618,
31 68213428, 54183852, 43039763, 34187699, 27156255, 21570980,
32 17134438, 13610368, 10811100, 8587562, 6821343, 5418385,
33 4303976, 3418770, 2715625, 2157098, 1713444, 1361037,
34 1081110, 858756, 682134, 541839, 430398, 341877,
35 271563, 215710, 171344, 136104, 108111, 85876,
36 68213, 54184, 43040, 34188, 27156, 21571,
37 17134, 13610, 10811, 8588, 6821, 5418,
38 4304, 3419, 2716, 2157, 1713, 1361,
39 1081, 859, 682, 542, 430, 342,
40 272, 216, 171, 136, 108, 86,
41 68, 54, 43, 34, 27, 22,
42 17, 14, 11, 9, 7, 5,
43 4, 3, 3, 2, 2, 1,
44 1, 1, 1, 1};
ossu97ba30e2016-04-25 07:55:58 -070045
46const int16_t WebRtcCng_kCorrWindow[WEBRTC_CNG_MAX_LPC_ORDER] = {
Yves Gerey665174f2018-06-19 15:03:05 +020047 32702, 32636, 32570, 32505, 32439, 32374,
48 32309, 32244, 32179, 32114, 32049, 31985};
ossu97ba30e2016-04-25 07:55:58 -070049
50} // namespace
51
52ComfortNoiseDecoder::ComfortNoiseDecoder() {
53 /* Needed to get the right function pointers in SPLIB. */
ossu97ba30e2016-04-25 07:55:58 -070054 Reset();
55}
56
57void ComfortNoiseDecoder::Reset() {
Yves Gerey665174f2018-06-19 15:03:05 +020058 dec_seed_ = 7777; /* For debugging only. */
ossu97ba30e2016-04-25 07:55:58 -070059 dec_target_energy_ = 0;
60 dec_used_energy_ = 0;
61 for (auto& c : dec_target_reflCoefs_)
62 c = 0;
63 for (auto& c : dec_used_reflCoefs_)
64 c = 0;
65 for (auto& c : dec_filtstate_)
66 c = 0;
67 for (auto& c : dec_filtstateLow_)
68 c = 0;
69 dec_order_ = 5;
70 dec_target_scale_factor_ = 0;
71 dec_used_scale_factor_ = 0;
72}
73
74void ComfortNoiseDecoder::UpdateSid(rtc::ArrayView<const uint8_t> sid) {
75 int16_t refCs[WEBRTC_CNG_MAX_LPC_ORDER];
76 int32_t targetEnergy;
77 size_t length = sid.size();
78 /* Throw away reflection coefficients of higher order than we can handle. */
79 if (length > (WEBRTC_CNG_MAX_LPC_ORDER + 1))
80 length = WEBRTC_CNG_MAX_LPC_ORDER + 1;
81
82 dec_order_ = static_cast<uint16_t>(length - 1);
83
84 uint8_t sid0 = std::min<uint8_t>(sid[0], 93);
85 targetEnergy = WebRtcCng_kDbov[sid0];
86 /* Take down target energy to 75%. */
87 targetEnergy = targetEnergy >> 1;
88 targetEnergy += targetEnergy >> 2;
89
90 dec_target_energy_ = targetEnergy;
91
92 /* Reconstruct coeffs with tweak for WebRtc implementation of RFC3389. */
93 if (dec_order_ == WEBRTC_CNG_MAX_LPC_ORDER) {
94 for (size_t i = 0; i < (dec_order_); i++) {
95 refCs[i] = sid[i + 1] << 8; /* Q7 to Q15*/
96 dec_target_reflCoefs_[i] = refCs[i];
97 }
98 } else {
99 for (size_t i = 0; i < (dec_order_); i++) {
Sam Zackrisson32c6ae22017-12-11 11:44:25 +0100100 refCs[i] = (sid[i + 1] - 127) * (1 << 8); /* Q7 to Q15. */
ossu97ba30e2016-04-25 07:55:58 -0700101 dec_target_reflCoefs_[i] = refCs[i];
102 }
103 }
104
105 for (size_t i = (dec_order_); i < WEBRTC_CNG_MAX_LPC_ORDER; i++) {
106 refCs[i] = 0;
107 dec_target_reflCoefs_[i] = refCs[i];
108 }
109}
110
111bool ComfortNoiseDecoder::Generate(rtc::ArrayView<int16_t> out_data,
112 bool new_period) {
113 int16_t excitation[kCngMaxOutsizeOrder];
114 int16_t low[kCngMaxOutsizeOrder];
115 int16_t lpPoly[WEBRTC_CNG_MAX_LPC_ORDER + 1];
Yves Gerey665174f2018-06-19 15:03:05 +0200116 int16_t ReflBetaStd = 26214; /* 0.8 in q15. */
117 int16_t ReflBetaCompStd = 6553; /* 0.2 in q15. */
118 int16_t ReflBetaNewP = 19661; /* 0.6 in q15. */
119 int16_t ReflBetaCompNewP = 13107; /* 0.4 in q15. */
120 int16_t Beta, BetaC; /* These are in Q15. */
ossu97ba30e2016-04-25 07:55:58 -0700121 int32_t targetEnergy;
122 int16_t En;
123 int16_t temp16;
124 const size_t num_samples = out_data.size();
125
126 if (num_samples > kCngMaxOutsizeOrder) {
127 return false;
128 }
129
130 if (new_period) {
131 dec_used_scale_factor_ = dec_target_scale_factor_;
132 Beta = ReflBetaNewP;
133 BetaC = ReflBetaCompNewP;
134 } else {
135 Beta = ReflBetaStd;
136 BetaC = ReflBetaCompStd;
137 }
138
ossu0eb19602016-11-22 05:15:21 -0800139 /* Calculate new scale factor in Q13 */
Yves Gerey665174f2018-06-19 15:03:05 +0200140 dec_used_scale_factor_ = rtc::checked_cast<int16_t>(
141 WEBRTC_SPL_MUL_16_16_RSFT(dec_used_scale_factor_, Beta >> 2, 13) +
142 WEBRTC_SPL_MUL_16_16_RSFT(dec_target_scale_factor_, BetaC >> 2, 13));
ossu97ba30e2016-04-25 07:55:58 -0700143
Yves Gerey665174f2018-06-19 15:03:05 +0200144 dec_used_energy_ = dec_used_energy_ >> 1;
ossu97ba30e2016-04-25 07:55:58 -0700145 dec_used_energy_ += dec_target_energy_ >> 1;
146
ossu0eb19602016-11-22 05:15:21 -0800147 /* Do the same for the reflection coeffs, albeit in Q15. */
ossu97ba30e2016-04-25 07:55:58 -0700148 for (size_t i = 0; i < WEBRTC_CNG_MAX_LPC_ORDER; i++) {
Yves Gerey665174f2018-06-19 15:03:05 +0200149 dec_used_reflCoefs_[i] =
150 (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(dec_used_reflCoefs_[i], Beta, 15);
151 dec_used_reflCoefs_[i] +=
152 (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(dec_target_reflCoefs_[i], BetaC, 15);
ossu97ba30e2016-04-25 07:55:58 -0700153 }
154
155 /* Compute the polynomial coefficients. */
156 WebRtcCng_K2a16(dec_used_reflCoefs_, WEBRTC_CNG_MAX_LPC_ORDER, lpPoly);
157
ossu97ba30e2016-04-25 07:55:58 -0700158 targetEnergy = dec_used_energy_;
159
160 /* Calculate scaling factor based on filter energy. */
Yves Gerey665174f2018-06-19 15:03:05 +0200161 En = 8192; /* 1.0 in Q13. */
ossu97ba30e2016-04-25 07:55:58 -0700162 for (size_t i = 0; i < (WEBRTC_CNG_MAX_LPC_ORDER); i++) {
163 /* Floating point value for reference.
164 E *= 1.0 - (dec_used_reflCoefs_[i] / 32768.0) *
165 (dec_used_reflCoefs_[i] / 32768.0);
166 */
167
168 /* Same in fixed point. */
169 /* K(i).^2 in Q15. */
Yves Gerey665174f2018-06-19 15:03:05 +0200170 temp16 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(dec_used_reflCoefs_[i],
171 dec_used_reflCoefs_[i], 15);
ossu97ba30e2016-04-25 07:55:58 -0700172 /* 1 - K(i).^2 in Q15. */
173 temp16 = 0x7fff - temp16;
Yves Gerey665174f2018-06-19 15:03:05 +0200174 En = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(En, temp16, 15);
ossu97ba30e2016-04-25 07:55:58 -0700175 }
176
177 /* float scaling= sqrt(E * dec_target_energy_ / (1 << 24)); */
178
179 /* Calculate sqrt(En * target_energy / excitation energy) */
180 targetEnergy = WebRtcSpl_Sqrt(dec_used_energy_);
181
Yves Gerey665174f2018-06-19 15:03:05 +0200182 En = (int16_t)WebRtcSpl_Sqrt(En) << 6;
183 En = (En * 3) >> 1; /* 1.5 estimates sqrt(2). */
ossu97ba30e2016-04-25 07:55:58 -0700184 dec_used_scale_factor_ = (int16_t)((En * targetEnergy) >> 12);
185
186 /* Generate excitation. */
187 /* Excitation energy per sample is 2.^24 - Q13 N(0,1). */
188 for (size_t i = 0; i < num_samples; i++) {
189 excitation[i] = WebRtcSpl_RandN(&dec_seed_) >> 1;
190 }
191
192 /* Scale to correct energy. */
193 WebRtcSpl_ScaleVector(excitation, excitation, dec_used_scale_factor_,
194 num_samples, 13);
195
196 /* |lpPoly| - Coefficients in Q12.
197 * |excitation| - Speech samples.
198 * |nst->dec_filtstate| - State preservation.
199 * |out_data| - Filtered speech samples. */
200 WebRtcSpl_FilterAR(lpPoly, WEBRTC_CNG_MAX_LPC_ORDER + 1, excitation,
201 num_samples, dec_filtstate_, WEBRTC_CNG_MAX_LPC_ORDER,
202 dec_filtstateLow_, WEBRTC_CNG_MAX_LPC_ORDER,
203 out_data.data(), low, num_samples);
204
205 return true;
206}
207
208ComfortNoiseEncoder::ComfortNoiseEncoder(int fs, int interval, int quality)
209 : enc_nrOfCoefs_(quality),
210 enc_sampfreq_(fs),
211 enc_interval_(interval),
212 enc_msSinceSid_(0),
213 enc_Energy_(0),
214 enc_reflCoefs_{0},
215 enc_corrVector_{0},
Yves Gerey665174f2018-06-19 15:03:05 +0200216 enc_seed_(7777) /* For debugging only. */ {
kwibergee89e782017-08-09 17:22:01 -0700217 RTC_CHECK_GT(quality, 0);
218 RTC_CHECK_LE(quality, WEBRTC_CNG_MAX_LPC_ORDER);
ossu97ba30e2016-04-25 07:55:58 -0700219}
220
221void ComfortNoiseEncoder::Reset(int fs, int interval, int quality) {
kwibergee89e782017-08-09 17:22:01 -0700222 RTC_CHECK_GT(quality, 0);
223 RTC_CHECK_LE(quality, WEBRTC_CNG_MAX_LPC_ORDER);
ossu97ba30e2016-04-25 07:55:58 -0700224 enc_nrOfCoefs_ = quality;
225 enc_sampfreq_ = fs;
226 enc_interval_ = interval;
227 enc_msSinceSid_ = 0;
228 enc_Energy_ = 0;
229 for (auto& c : enc_reflCoefs_)
230 c = 0;
231 for (auto& c : enc_corrVector_)
232 c = 0;
Yves Gerey665174f2018-06-19 15:03:05 +0200233 enc_seed_ = 7777; /* For debugging only. */
ossu97ba30e2016-04-25 07:55:58 -0700234}
235
236size_t ComfortNoiseEncoder::Encode(rtc::ArrayView<const int16_t> speech,
237 bool force_sid,
238 rtc::Buffer* output) {
239 int16_t arCoefs[WEBRTC_CNG_MAX_LPC_ORDER + 1];
240 int32_t corrVector[WEBRTC_CNG_MAX_LPC_ORDER + 1];
241 int16_t refCs[WEBRTC_CNG_MAX_LPC_ORDER + 1];
242 int16_t hanningW[kCngMaxOutsizeOrder];
243 int16_t ReflBeta = 19661; /* 0.6 in q15. */
244 int16_t ReflBetaComp = 13107; /* 0.4 in q15. */
245 int32_t outEnergy;
246 int outShifts;
247 size_t i;
248 int stab;
249 int acorrScale;
250 size_t index;
251 size_t ind, factor;
252 int32_t* bptr;
253 int32_t blo, bhi;
254 int16_t negate;
255 const int16_t* aptr;
256 int16_t speechBuf[kCngMaxOutsizeOrder];
257
258 const size_t num_samples = speech.size();
kwiberg352444f2016-11-28 15:58:53 -0800259 RTC_CHECK_LE(num_samples, kCngMaxOutsizeOrder);
ossu97ba30e2016-04-25 07:55:58 -0700260
261 for (i = 0; i < num_samples; i++) {
262 speechBuf[i] = speech[i];
263 }
264
265 factor = num_samples;
266
267 /* Calculate energy and a coefficients. */
268 outEnergy = WebRtcSpl_Energy(speechBuf, num_samples, &outShifts);
269 while (outShifts > 0) {
270 /* We can only do 5 shifts without destroying accuracy in
271 * division factor. */
272 if (outShifts > 5) {
273 outEnergy <<= (outShifts - 5);
274 outShifts = 5;
275 } else {
276 factor /= 2;
277 outShifts--;
278 }
279 }
280 outEnergy = WebRtcSpl_DivW32W16(outEnergy, (int16_t)factor);
281
282 if (outEnergy > 1) {
283 /* Create Hanning Window. */
284 WebRtcSpl_GetHanningWindow(hanningW, num_samples / 2);
285 for (i = 0; i < (num_samples / 2); i++)
286 hanningW[num_samples - i - 1] = hanningW[i];
287
288 WebRtcSpl_ElementwiseVectorMult(speechBuf, hanningW, speechBuf, num_samples,
289 14);
290
291 WebRtcSpl_AutoCorrelation(speechBuf, num_samples, enc_nrOfCoefs_,
292 corrVector, &acorrScale);
293
294 if (*corrVector == 0)
295 *corrVector = WEBRTC_SPL_WORD16_MAX;
296
297 /* Adds the bandwidth expansion. */
298 aptr = WebRtcCng_kCorrWindow;
299 bptr = corrVector;
300
301 /* (zzz) lpc16_1 = 17+1+820+2+2 = 842 (ordo2=700). */
302 for (ind = 0; ind < enc_nrOfCoefs_; ind++) {
303 /* The below code multiplies the 16 b corrWindow values (Q15) with
304 * the 32 b corrvector (Q0) and shifts the result down 15 steps. */
305 negate = *bptr < 0;
306 if (negate)
307 *bptr = -*bptr;
308
Yves Gerey665174f2018-06-19 15:03:05 +0200309 blo = (int32_t)*aptr * (*bptr & 0xffff);
310 bhi = ((blo >> 16) & 0xffff) +
311 ((int32_t)(*aptr++) * ((*bptr >> 16) & 0xffff));
ossu97ba30e2016-04-25 07:55:58 -0700312 blo = (blo & 0xffff) | ((bhi & 0xffff) << 16);
313
Yves Gerey665174f2018-06-19 15:03:05 +0200314 *bptr = (((bhi >> 16) & 0x7fff) << 17) | ((uint32_t)blo >> 15);
ossu97ba30e2016-04-25 07:55:58 -0700315 if (negate)
316 *bptr = -*bptr;
317 bptr++;
318 }
319 /* End of bandwidth expansion. */
320
Yves Gerey665174f2018-06-19 15:03:05 +0200321 stab = WebRtcSpl_LevinsonDurbin(corrVector, arCoefs, refCs, enc_nrOfCoefs_);
ossu97ba30e2016-04-25 07:55:58 -0700322
323 if (!stab) {
324 /* Disregard from this frame */
325 return 0;
326 }
327
328 } else {
329 for (i = 0; i < enc_nrOfCoefs_; i++)
330 refCs[i] = 0;
331 }
332
333 if (force_sid) {
334 /* Read instantaneous values instead of averaged. */
335 for (i = 0; i < enc_nrOfCoefs_; i++)
336 enc_reflCoefs_[i] = refCs[i];
337 enc_Energy_ = outEnergy;
338 } else {
339 /* Average history with new values. */
340 for (i = 0; i < enc_nrOfCoefs_; i++) {
Yves Gerey665174f2018-06-19 15:03:05 +0200341 enc_reflCoefs_[i] =
342 (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(enc_reflCoefs_[i], ReflBeta, 15);
ossu97ba30e2016-04-25 07:55:58 -0700343 enc_reflCoefs_[i] +=
Yves Gerey665174f2018-06-19 15:03:05 +0200344 (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(refCs[i], ReflBetaComp, 15);
ossu97ba30e2016-04-25 07:55:58 -0700345 }
Yves Gerey665174f2018-06-19 15:03:05 +0200346 enc_Energy_ = (outEnergy >> 2) + (enc_Energy_ >> 1) + (enc_Energy_ >> 2);
ossu97ba30e2016-04-25 07:55:58 -0700347 }
348
349 if (enc_Energy_ < 1) {
350 enc_Energy_ = 1;
351 }
352
353 if ((enc_msSinceSid_ > (enc_interval_ - 1)) || force_sid) {
354 /* Search for best dbov value. */
355 index = 0;
356 for (i = 1; i < 93; i++) {
357 /* Always round downwards. */
358 if ((enc_Energy_ - WebRtcCng_kDbov[i]) > 0) {
359 index = i;
360 break;
361 }
362 }
363 if ((i == 93) && (index == 0))
364 index = 94;
365
366 const size_t output_coefs = enc_nrOfCoefs_ + 1;
Yves Gerey665174f2018-06-19 15:03:05 +0200367 output->AppendData(output_coefs, [&](rtc::ArrayView<uint8_t> output) {
368 output[0] = (uint8_t)index;
ossu97ba30e2016-04-25 07:55:58 -0700369
Yves Gerey665174f2018-06-19 15:03:05 +0200370 /* Quantize coefficients with tweak for WebRtc implementation of
371 * RFC3389. */
372 if (enc_nrOfCoefs_ == WEBRTC_CNG_MAX_LPC_ORDER) {
373 for (i = 0; i < enc_nrOfCoefs_; i++) {
374 /* Q15 to Q7 with rounding. */
375 output[i + 1] = ((enc_reflCoefs_[i] + 128) >> 8);
ossu97ba30e2016-04-25 07:55:58 -0700376 }
Yves Gerey665174f2018-06-19 15:03:05 +0200377 } else {
378 for (i = 0; i < enc_nrOfCoefs_; i++) {
379 /* Q15 to Q7 with rounding. */
380 output[i + 1] = (127 + ((enc_reflCoefs_[i] + 128) >> 8));
381 }
382 }
ossu97ba30e2016-04-25 07:55:58 -0700383
Yves Gerey665174f2018-06-19 15:03:05 +0200384 return output_coefs;
385 });
ossu97ba30e2016-04-25 07:55:58 -0700386
387 enc_msSinceSid_ =
388 static_cast<int16_t>((1000 * num_samples) / enc_sampfreq_);
389 return output_coefs;
390 } else {
391 enc_msSinceSid_ +=
392 static_cast<int16_t>((1000 * num_samples) / enc_sampfreq_);
393 return 0;
394 }
395}
396
397namespace {
398/* Values in |k| are Q15, and |a| Q12. */
399void WebRtcCng_K2a16(int16_t* k, int useOrder, int16_t* a) {
400 int16_t any[WEBRTC_SPL_MAX_LPC_ORDER + 1];
401 int16_t* aptr;
402 int16_t* aptr2;
403 int16_t* anyptr;
404 const int16_t* kptr;
405 int m, i;
406
407 kptr = k;
408 *a = 4096; /* i.e., (Word16_MAX >> 3) + 1 */
409 *any = *a;
410 a[1] = (*k + 4) >> 3;
411 for (m = 1; m < useOrder; m++) {
412 kptr++;
413 aptr = a;
414 aptr++;
415 aptr2 = &a[m];
416 anyptr = any;
417 anyptr++;
418
419 any[m + 1] = (*kptr + 4) >> 3;
420 for (i = 0; i < m; i++) {
421 *anyptr++ =
422 (*aptr++) +
423 (int16_t)((((int32_t)(*aptr2--) * (int32_t)*kptr) + 16384) >> 15);
424 }
425
426 aptr = a;
427 anyptr = any;
428 for (i = 0; i < (m + 2); i++) {
429 *aptr++ = *anyptr++;
430 }
431 }
432}
433
434} // namespace
435
436} // namespace webrtc