blob: f18fb28e98807b3012dbddc06206093dbe03a598 [file] [log] [blame]
ossu97ba30e2016-04-25 07:55:58 -07001/*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020011#include "modules/audio_coding/codecs/cng/webrtc_cng.h"
ossu97ba30e2016-04-25 07:55:58 -070012
13#include <algorithm>
14
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020015#include "common_audio/signal_processing/include/signal_processing_library.h"
Yves Gerey988cc082018-10-23 12:03:01 +020016#include "rtc_base/checks.h"
Karl Wiberge40468b2017-11-22 10:42:26 +010017#include "rtc_base/numerics/safe_conversions.h"
ossu97ba30e2016-04-25 07:55:58 -070018
19namespace webrtc {
20
21namespace {
22
23const size_t kCngMaxOutsizeOrder = 640;
24
25// TODO(ossu): Rename the left-over WebRtcCng according to style guide.
26void WebRtcCng_K2a16(int16_t* k, int useOrder, int16_t* a);
27
28const int32_t WebRtcCng_kDbov[94] = {
Yves Gerey665174f2018-06-19 15:03:05 +020029 1081109975, 858756178, 682134279, 541838517, 430397633, 341876992,
30 271562548, 215709799, 171344384, 136103682, 108110997, 85875618,
31 68213428, 54183852, 43039763, 34187699, 27156255, 21570980,
32 17134438, 13610368, 10811100, 8587562, 6821343, 5418385,
33 4303976, 3418770, 2715625, 2157098, 1713444, 1361037,
34 1081110, 858756, 682134, 541839, 430398, 341877,
35 271563, 215710, 171344, 136104, 108111, 85876,
36 68213, 54184, 43040, 34188, 27156, 21571,
37 17134, 13610, 10811, 8588, 6821, 5418,
38 4304, 3419, 2716, 2157, 1713, 1361,
39 1081, 859, 682, 542, 430, 342,
40 272, 216, 171, 136, 108, 86,
41 68, 54, 43, 34, 27, 22,
42 17, 14, 11, 9, 7, 5,
43 4, 3, 3, 2, 2, 1,
44 1, 1, 1, 1};
ossu97ba30e2016-04-25 07:55:58 -070045
46const int16_t WebRtcCng_kCorrWindow[WEBRTC_CNG_MAX_LPC_ORDER] = {
Yves Gerey665174f2018-06-19 15:03:05 +020047 32702, 32636, 32570, 32505, 32439, 32374,
48 32309, 32244, 32179, 32114, 32049, 31985};
ossu97ba30e2016-04-25 07:55:58 -070049
50} // namespace
51
52ComfortNoiseDecoder::ComfortNoiseDecoder() {
53 /* Needed to get the right function pointers in SPLIB. */
54 WebRtcSpl_Init();
55 Reset();
56}
57
58void ComfortNoiseDecoder::Reset() {
Yves Gerey665174f2018-06-19 15:03:05 +020059 dec_seed_ = 7777; /* For debugging only. */
ossu97ba30e2016-04-25 07:55:58 -070060 dec_target_energy_ = 0;
61 dec_used_energy_ = 0;
62 for (auto& c : dec_target_reflCoefs_)
63 c = 0;
64 for (auto& c : dec_used_reflCoefs_)
65 c = 0;
66 for (auto& c : dec_filtstate_)
67 c = 0;
68 for (auto& c : dec_filtstateLow_)
69 c = 0;
70 dec_order_ = 5;
71 dec_target_scale_factor_ = 0;
72 dec_used_scale_factor_ = 0;
73}
74
75void ComfortNoiseDecoder::UpdateSid(rtc::ArrayView<const uint8_t> sid) {
76 int16_t refCs[WEBRTC_CNG_MAX_LPC_ORDER];
77 int32_t targetEnergy;
78 size_t length = sid.size();
79 /* Throw away reflection coefficients of higher order than we can handle. */
80 if (length > (WEBRTC_CNG_MAX_LPC_ORDER + 1))
81 length = WEBRTC_CNG_MAX_LPC_ORDER + 1;
82
83 dec_order_ = static_cast<uint16_t>(length - 1);
84
85 uint8_t sid0 = std::min<uint8_t>(sid[0], 93);
86 targetEnergy = WebRtcCng_kDbov[sid0];
87 /* Take down target energy to 75%. */
88 targetEnergy = targetEnergy >> 1;
89 targetEnergy += targetEnergy >> 2;
90
91 dec_target_energy_ = targetEnergy;
92
93 /* Reconstruct coeffs with tweak for WebRtc implementation of RFC3389. */
94 if (dec_order_ == WEBRTC_CNG_MAX_LPC_ORDER) {
95 for (size_t i = 0; i < (dec_order_); i++) {
96 refCs[i] = sid[i + 1] << 8; /* Q7 to Q15*/
97 dec_target_reflCoefs_[i] = refCs[i];
98 }
99 } else {
100 for (size_t i = 0; i < (dec_order_); i++) {
Sam Zackrisson32c6ae22017-12-11 11:44:25 +0100101 refCs[i] = (sid[i + 1] - 127) * (1 << 8); /* Q7 to Q15. */
ossu97ba30e2016-04-25 07:55:58 -0700102 dec_target_reflCoefs_[i] = refCs[i];
103 }
104 }
105
106 for (size_t i = (dec_order_); i < WEBRTC_CNG_MAX_LPC_ORDER; i++) {
107 refCs[i] = 0;
108 dec_target_reflCoefs_[i] = refCs[i];
109 }
110}
111
112bool ComfortNoiseDecoder::Generate(rtc::ArrayView<int16_t> out_data,
113 bool new_period) {
114 int16_t excitation[kCngMaxOutsizeOrder];
115 int16_t low[kCngMaxOutsizeOrder];
116 int16_t lpPoly[WEBRTC_CNG_MAX_LPC_ORDER + 1];
Yves Gerey665174f2018-06-19 15:03:05 +0200117 int16_t ReflBetaStd = 26214; /* 0.8 in q15. */
118 int16_t ReflBetaCompStd = 6553; /* 0.2 in q15. */
119 int16_t ReflBetaNewP = 19661; /* 0.6 in q15. */
120 int16_t ReflBetaCompNewP = 13107; /* 0.4 in q15. */
121 int16_t Beta, BetaC; /* These are in Q15. */
ossu97ba30e2016-04-25 07:55:58 -0700122 int32_t targetEnergy;
123 int16_t En;
124 int16_t temp16;
125 const size_t num_samples = out_data.size();
126
127 if (num_samples > kCngMaxOutsizeOrder) {
128 return false;
129 }
130
131 if (new_period) {
132 dec_used_scale_factor_ = dec_target_scale_factor_;
133 Beta = ReflBetaNewP;
134 BetaC = ReflBetaCompNewP;
135 } else {
136 Beta = ReflBetaStd;
137 BetaC = ReflBetaCompStd;
138 }
139
ossu0eb19602016-11-22 05:15:21 -0800140 /* Calculate new scale factor in Q13 */
Yves Gerey665174f2018-06-19 15:03:05 +0200141 dec_used_scale_factor_ = rtc::checked_cast<int16_t>(
142 WEBRTC_SPL_MUL_16_16_RSFT(dec_used_scale_factor_, Beta >> 2, 13) +
143 WEBRTC_SPL_MUL_16_16_RSFT(dec_target_scale_factor_, BetaC >> 2, 13));
ossu97ba30e2016-04-25 07:55:58 -0700144
Yves Gerey665174f2018-06-19 15:03:05 +0200145 dec_used_energy_ = dec_used_energy_ >> 1;
ossu97ba30e2016-04-25 07:55:58 -0700146 dec_used_energy_ += dec_target_energy_ >> 1;
147
ossu0eb19602016-11-22 05:15:21 -0800148 /* Do the same for the reflection coeffs, albeit in Q15. */
ossu97ba30e2016-04-25 07:55:58 -0700149 for (size_t i = 0; i < WEBRTC_CNG_MAX_LPC_ORDER; i++) {
Yves Gerey665174f2018-06-19 15:03:05 +0200150 dec_used_reflCoefs_[i] =
151 (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(dec_used_reflCoefs_[i], Beta, 15);
152 dec_used_reflCoefs_[i] +=
153 (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(dec_target_reflCoefs_[i], BetaC, 15);
ossu97ba30e2016-04-25 07:55:58 -0700154 }
155
156 /* Compute the polynomial coefficients. */
157 WebRtcCng_K2a16(dec_used_reflCoefs_, WEBRTC_CNG_MAX_LPC_ORDER, lpPoly);
158
ossu97ba30e2016-04-25 07:55:58 -0700159 targetEnergy = dec_used_energy_;
160
161 /* Calculate scaling factor based on filter energy. */
Yves Gerey665174f2018-06-19 15:03:05 +0200162 En = 8192; /* 1.0 in Q13. */
ossu97ba30e2016-04-25 07:55:58 -0700163 for (size_t i = 0; i < (WEBRTC_CNG_MAX_LPC_ORDER); i++) {
164 /* Floating point value for reference.
165 E *= 1.0 - (dec_used_reflCoefs_[i] / 32768.0) *
166 (dec_used_reflCoefs_[i] / 32768.0);
167 */
168
169 /* Same in fixed point. */
170 /* K(i).^2 in Q15. */
Yves Gerey665174f2018-06-19 15:03:05 +0200171 temp16 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(dec_used_reflCoefs_[i],
172 dec_used_reflCoefs_[i], 15);
ossu97ba30e2016-04-25 07:55:58 -0700173 /* 1 - K(i).^2 in Q15. */
174 temp16 = 0x7fff - temp16;
Yves Gerey665174f2018-06-19 15:03:05 +0200175 En = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(En, temp16, 15);
ossu97ba30e2016-04-25 07:55:58 -0700176 }
177
178 /* float scaling= sqrt(E * dec_target_energy_ / (1 << 24)); */
179
180 /* Calculate sqrt(En * target_energy / excitation energy) */
181 targetEnergy = WebRtcSpl_Sqrt(dec_used_energy_);
182
Yves Gerey665174f2018-06-19 15:03:05 +0200183 En = (int16_t)WebRtcSpl_Sqrt(En) << 6;
184 En = (En * 3) >> 1; /* 1.5 estimates sqrt(2). */
ossu97ba30e2016-04-25 07:55:58 -0700185 dec_used_scale_factor_ = (int16_t)((En * targetEnergy) >> 12);
186
187 /* Generate excitation. */
188 /* Excitation energy per sample is 2.^24 - Q13 N(0,1). */
189 for (size_t i = 0; i < num_samples; i++) {
190 excitation[i] = WebRtcSpl_RandN(&dec_seed_) >> 1;
191 }
192
193 /* Scale to correct energy. */
194 WebRtcSpl_ScaleVector(excitation, excitation, dec_used_scale_factor_,
195 num_samples, 13);
196
197 /* |lpPoly| - Coefficients in Q12.
198 * |excitation| - Speech samples.
199 * |nst->dec_filtstate| - State preservation.
200 * |out_data| - Filtered speech samples. */
201 WebRtcSpl_FilterAR(lpPoly, WEBRTC_CNG_MAX_LPC_ORDER + 1, excitation,
202 num_samples, dec_filtstate_, WEBRTC_CNG_MAX_LPC_ORDER,
203 dec_filtstateLow_, WEBRTC_CNG_MAX_LPC_ORDER,
204 out_data.data(), low, num_samples);
205
206 return true;
207}
208
209ComfortNoiseEncoder::ComfortNoiseEncoder(int fs, int interval, int quality)
210 : enc_nrOfCoefs_(quality),
211 enc_sampfreq_(fs),
212 enc_interval_(interval),
213 enc_msSinceSid_(0),
214 enc_Energy_(0),
215 enc_reflCoefs_{0},
216 enc_corrVector_{0},
Yves Gerey665174f2018-06-19 15:03:05 +0200217 enc_seed_(7777) /* For debugging only. */ {
kwibergee89e782017-08-09 17:22:01 -0700218 RTC_CHECK_GT(quality, 0);
219 RTC_CHECK_LE(quality, WEBRTC_CNG_MAX_LPC_ORDER);
ossu97ba30e2016-04-25 07:55:58 -0700220 /* Needed to get the right function pointers in SPLIB. */
221 WebRtcSpl_Init();
222}
223
224void ComfortNoiseEncoder::Reset(int fs, int interval, int quality) {
kwibergee89e782017-08-09 17:22:01 -0700225 RTC_CHECK_GT(quality, 0);
226 RTC_CHECK_LE(quality, WEBRTC_CNG_MAX_LPC_ORDER);
ossu97ba30e2016-04-25 07:55:58 -0700227 enc_nrOfCoefs_ = quality;
228 enc_sampfreq_ = fs;
229 enc_interval_ = interval;
230 enc_msSinceSid_ = 0;
231 enc_Energy_ = 0;
232 for (auto& c : enc_reflCoefs_)
233 c = 0;
234 for (auto& c : enc_corrVector_)
235 c = 0;
Yves Gerey665174f2018-06-19 15:03:05 +0200236 enc_seed_ = 7777; /* For debugging only. */
ossu97ba30e2016-04-25 07:55:58 -0700237}
238
239size_t ComfortNoiseEncoder::Encode(rtc::ArrayView<const int16_t> speech,
240 bool force_sid,
241 rtc::Buffer* output) {
242 int16_t arCoefs[WEBRTC_CNG_MAX_LPC_ORDER + 1];
243 int32_t corrVector[WEBRTC_CNG_MAX_LPC_ORDER + 1];
244 int16_t refCs[WEBRTC_CNG_MAX_LPC_ORDER + 1];
245 int16_t hanningW[kCngMaxOutsizeOrder];
246 int16_t ReflBeta = 19661; /* 0.6 in q15. */
247 int16_t ReflBetaComp = 13107; /* 0.4 in q15. */
248 int32_t outEnergy;
249 int outShifts;
250 size_t i;
251 int stab;
252 int acorrScale;
253 size_t index;
254 size_t ind, factor;
255 int32_t* bptr;
256 int32_t blo, bhi;
257 int16_t negate;
258 const int16_t* aptr;
259 int16_t speechBuf[kCngMaxOutsizeOrder];
260
261 const size_t num_samples = speech.size();
kwiberg352444f2016-11-28 15:58:53 -0800262 RTC_CHECK_LE(num_samples, kCngMaxOutsizeOrder);
ossu97ba30e2016-04-25 07:55:58 -0700263
264 for (i = 0; i < num_samples; i++) {
265 speechBuf[i] = speech[i];
266 }
267
268 factor = num_samples;
269
270 /* Calculate energy and a coefficients. */
271 outEnergy = WebRtcSpl_Energy(speechBuf, num_samples, &outShifts);
272 while (outShifts > 0) {
273 /* We can only do 5 shifts without destroying accuracy in
274 * division factor. */
275 if (outShifts > 5) {
276 outEnergy <<= (outShifts - 5);
277 outShifts = 5;
278 } else {
279 factor /= 2;
280 outShifts--;
281 }
282 }
283 outEnergy = WebRtcSpl_DivW32W16(outEnergy, (int16_t)factor);
284
285 if (outEnergy > 1) {
286 /* Create Hanning Window. */
287 WebRtcSpl_GetHanningWindow(hanningW, num_samples / 2);
288 for (i = 0; i < (num_samples / 2); i++)
289 hanningW[num_samples - i - 1] = hanningW[i];
290
291 WebRtcSpl_ElementwiseVectorMult(speechBuf, hanningW, speechBuf, num_samples,
292 14);
293
294 WebRtcSpl_AutoCorrelation(speechBuf, num_samples, enc_nrOfCoefs_,
295 corrVector, &acorrScale);
296
297 if (*corrVector == 0)
298 *corrVector = WEBRTC_SPL_WORD16_MAX;
299
300 /* Adds the bandwidth expansion. */
301 aptr = WebRtcCng_kCorrWindow;
302 bptr = corrVector;
303
304 /* (zzz) lpc16_1 = 17+1+820+2+2 = 842 (ordo2=700). */
305 for (ind = 0; ind < enc_nrOfCoefs_; ind++) {
306 /* The below code multiplies the 16 b corrWindow values (Q15) with
307 * the 32 b corrvector (Q0) and shifts the result down 15 steps. */
308 negate = *bptr < 0;
309 if (negate)
310 *bptr = -*bptr;
311
Yves Gerey665174f2018-06-19 15:03:05 +0200312 blo = (int32_t)*aptr * (*bptr & 0xffff);
313 bhi = ((blo >> 16) & 0xffff) +
314 ((int32_t)(*aptr++) * ((*bptr >> 16) & 0xffff));
ossu97ba30e2016-04-25 07:55:58 -0700315 blo = (blo & 0xffff) | ((bhi & 0xffff) << 16);
316
Yves Gerey665174f2018-06-19 15:03:05 +0200317 *bptr = (((bhi >> 16) & 0x7fff) << 17) | ((uint32_t)blo >> 15);
ossu97ba30e2016-04-25 07:55:58 -0700318 if (negate)
319 *bptr = -*bptr;
320 bptr++;
321 }
322 /* End of bandwidth expansion. */
323
Yves Gerey665174f2018-06-19 15:03:05 +0200324 stab = WebRtcSpl_LevinsonDurbin(corrVector, arCoefs, refCs, enc_nrOfCoefs_);
ossu97ba30e2016-04-25 07:55:58 -0700325
326 if (!stab) {
327 /* Disregard from this frame */
328 return 0;
329 }
330
331 } else {
332 for (i = 0; i < enc_nrOfCoefs_; i++)
333 refCs[i] = 0;
334 }
335
336 if (force_sid) {
337 /* Read instantaneous values instead of averaged. */
338 for (i = 0; i < enc_nrOfCoefs_; i++)
339 enc_reflCoefs_[i] = refCs[i];
340 enc_Energy_ = outEnergy;
341 } else {
342 /* Average history with new values. */
343 for (i = 0; i < enc_nrOfCoefs_; i++) {
Yves Gerey665174f2018-06-19 15:03:05 +0200344 enc_reflCoefs_[i] =
345 (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(enc_reflCoefs_[i], ReflBeta, 15);
ossu97ba30e2016-04-25 07:55:58 -0700346 enc_reflCoefs_[i] +=
Yves Gerey665174f2018-06-19 15:03:05 +0200347 (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(refCs[i], ReflBetaComp, 15);
ossu97ba30e2016-04-25 07:55:58 -0700348 }
Yves Gerey665174f2018-06-19 15:03:05 +0200349 enc_Energy_ = (outEnergy >> 2) + (enc_Energy_ >> 1) + (enc_Energy_ >> 2);
ossu97ba30e2016-04-25 07:55:58 -0700350 }
351
352 if (enc_Energy_ < 1) {
353 enc_Energy_ = 1;
354 }
355
356 if ((enc_msSinceSid_ > (enc_interval_ - 1)) || force_sid) {
357 /* Search for best dbov value. */
358 index = 0;
359 for (i = 1; i < 93; i++) {
360 /* Always round downwards. */
361 if ((enc_Energy_ - WebRtcCng_kDbov[i]) > 0) {
362 index = i;
363 break;
364 }
365 }
366 if ((i == 93) && (index == 0))
367 index = 94;
368
369 const size_t output_coefs = enc_nrOfCoefs_ + 1;
Yves Gerey665174f2018-06-19 15:03:05 +0200370 output->AppendData(output_coefs, [&](rtc::ArrayView<uint8_t> output) {
371 output[0] = (uint8_t)index;
ossu97ba30e2016-04-25 07:55:58 -0700372
Yves Gerey665174f2018-06-19 15:03:05 +0200373 /* Quantize coefficients with tweak for WebRtc implementation of
374 * RFC3389. */
375 if (enc_nrOfCoefs_ == WEBRTC_CNG_MAX_LPC_ORDER) {
376 for (i = 0; i < enc_nrOfCoefs_; i++) {
377 /* Q15 to Q7 with rounding. */
378 output[i + 1] = ((enc_reflCoefs_[i] + 128) >> 8);
ossu97ba30e2016-04-25 07:55:58 -0700379 }
Yves Gerey665174f2018-06-19 15:03:05 +0200380 } else {
381 for (i = 0; i < enc_nrOfCoefs_; i++) {
382 /* Q15 to Q7 with rounding. */
383 output[i + 1] = (127 + ((enc_reflCoefs_[i] + 128) >> 8));
384 }
385 }
ossu97ba30e2016-04-25 07:55:58 -0700386
Yves Gerey665174f2018-06-19 15:03:05 +0200387 return output_coefs;
388 });
ossu97ba30e2016-04-25 07:55:58 -0700389
390 enc_msSinceSid_ =
391 static_cast<int16_t>((1000 * num_samples) / enc_sampfreq_);
392 return output_coefs;
393 } else {
394 enc_msSinceSid_ +=
395 static_cast<int16_t>((1000 * num_samples) / enc_sampfreq_);
396 return 0;
397 }
398}
399
400namespace {
401/* Values in |k| are Q15, and |a| Q12. */
402void WebRtcCng_K2a16(int16_t* k, int useOrder, int16_t* a) {
403 int16_t any[WEBRTC_SPL_MAX_LPC_ORDER + 1];
404 int16_t* aptr;
405 int16_t* aptr2;
406 int16_t* anyptr;
407 const int16_t* kptr;
408 int m, i;
409
410 kptr = k;
411 *a = 4096; /* i.e., (Word16_MAX >> 3) + 1 */
412 *any = *a;
413 a[1] = (*k + 4) >> 3;
414 for (m = 1; m < useOrder; m++) {
415 kptr++;
416 aptr = a;
417 aptr++;
418 aptr2 = &a[m];
419 anyptr = any;
420 anyptr++;
421
422 any[m + 1] = (*kptr + 4) >> 3;
423 for (i = 0; i < m; i++) {
424 *anyptr++ =
425 (*aptr++) +
426 (int16_t)((((int32_t)(*aptr2--) * (int32_t)*kptr) + 16384) >> 15);
427 }
428
429 aptr = a;
430 anyptr = any;
431 for (i = 0; i < (m + 2); i++) {
432 *aptr++ = *anyptr++;
433 }
434 }
435}
436
437} // namespace
438
439} // namespace webrtc