blob: a07b093b3c7095bbdd4582726a63c9ed7f41aea7 [file] [log] [blame]
ossu97ba30e2016-04-25 07:55:58 -07001/*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020011#include "modules/audio_coding/codecs/cng/webrtc_cng.h"
ossu97ba30e2016-04-25 07:55:58 -070012
13#include <algorithm>
14
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020015#include "common_audio/signal_processing/include/signal_processing_library.h"
Karl Wiberge40468b2017-11-22 10:42:26 +010016#include "rtc_base/numerics/safe_conversions.h"
ossu97ba30e2016-04-25 07:55:58 -070017
18namespace webrtc {
19
20namespace {
21
22const size_t kCngMaxOutsizeOrder = 640;
23
24// TODO(ossu): Rename the left-over WebRtcCng according to style guide.
25void WebRtcCng_K2a16(int16_t* k, int useOrder, int16_t* a);
26
27const int32_t WebRtcCng_kDbov[94] = {
Yves Gerey665174f2018-06-19 15:03:05 +020028 1081109975, 858756178, 682134279, 541838517, 430397633, 341876992,
29 271562548, 215709799, 171344384, 136103682, 108110997, 85875618,
30 68213428, 54183852, 43039763, 34187699, 27156255, 21570980,
31 17134438, 13610368, 10811100, 8587562, 6821343, 5418385,
32 4303976, 3418770, 2715625, 2157098, 1713444, 1361037,
33 1081110, 858756, 682134, 541839, 430398, 341877,
34 271563, 215710, 171344, 136104, 108111, 85876,
35 68213, 54184, 43040, 34188, 27156, 21571,
36 17134, 13610, 10811, 8588, 6821, 5418,
37 4304, 3419, 2716, 2157, 1713, 1361,
38 1081, 859, 682, 542, 430, 342,
39 272, 216, 171, 136, 108, 86,
40 68, 54, 43, 34, 27, 22,
41 17, 14, 11, 9, 7, 5,
42 4, 3, 3, 2, 2, 1,
43 1, 1, 1, 1};
ossu97ba30e2016-04-25 07:55:58 -070044
45const int16_t WebRtcCng_kCorrWindow[WEBRTC_CNG_MAX_LPC_ORDER] = {
Yves Gerey665174f2018-06-19 15:03:05 +020046 32702, 32636, 32570, 32505, 32439, 32374,
47 32309, 32244, 32179, 32114, 32049, 31985};
ossu97ba30e2016-04-25 07:55:58 -070048
49} // namespace
50
51ComfortNoiseDecoder::ComfortNoiseDecoder() {
52 /* Needed to get the right function pointers in SPLIB. */
53 WebRtcSpl_Init();
54 Reset();
55}
56
57void ComfortNoiseDecoder::Reset() {
Yves Gerey665174f2018-06-19 15:03:05 +020058 dec_seed_ = 7777; /* For debugging only. */
ossu97ba30e2016-04-25 07:55:58 -070059 dec_target_energy_ = 0;
60 dec_used_energy_ = 0;
61 for (auto& c : dec_target_reflCoefs_)
62 c = 0;
63 for (auto& c : dec_used_reflCoefs_)
64 c = 0;
65 for (auto& c : dec_filtstate_)
66 c = 0;
67 for (auto& c : dec_filtstateLow_)
68 c = 0;
69 dec_order_ = 5;
70 dec_target_scale_factor_ = 0;
71 dec_used_scale_factor_ = 0;
72}
73
74void ComfortNoiseDecoder::UpdateSid(rtc::ArrayView<const uint8_t> sid) {
75 int16_t refCs[WEBRTC_CNG_MAX_LPC_ORDER];
76 int32_t targetEnergy;
77 size_t length = sid.size();
78 /* Throw away reflection coefficients of higher order than we can handle. */
79 if (length > (WEBRTC_CNG_MAX_LPC_ORDER + 1))
80 length = WEBRTC_CNG_MAX_LPC_ORDER + 1;
81
82 dec_order_ = static_cast<uint16_t>(length - 1);
83
84 uint8_t sid0 = std::min<uint8_t>(sid[0], 93);
85 targetEnergy = WebRtcCng_kDbov[sid0];
86 /* Take down target energy to 75%. */
87 targetEnergy = targetEnergy >> 1;
88 targetEnergy += targetEnergy >> 2;
89
90 dec_target_energy_ = targetEnergy;
91
92 /* Reconstruct coeffs with tweak for WebRtc implementation of RFC3389. */
93 if (dec_order_ == WEBRTC_CNG_MAX_LPC_ORDER) {
94 for (size_t i = 0; i < (dec_order_); i++) {
95 refCs[i] = sid[i + 1] << 8; /* Q7 to Q15*/
96 dec_target_reflCoefs_[i] = refCs[i];
97 }
98 } else {
99 for (size_t i = 0; i < (dec_order_); i++) {
Sam Zackrisson32c6ae22017-12-11 11:44:25 +0100100 refCs[i] = (sid[i + 1] - 127) * (1 << 8); /* Q7 to Q15. */
ossu97ba30e2016-04-25 07:55:58 -0700101 dec_target_reflCoefs_[i] = refCs[i];
102 }
103 }
104
105 for (size_t i = (dec_order_); i < WEBRTC_CNG_MAX_LPC_ORDER; i++) {
106 refCs[i] = 0;
107 dec_target_reflCoefs_[i] = refCs[i];
108 }
109}
110
111bool ComfortNoiseDecoder::Generate(rtc::ArrayView<int16_t> out_data,
112 bool new_period) {
113 int16_t excitation[kCngMaxOutsizeOrder];
114 int16_t low[kCngMaxOutsizeOrder];
115 int16_t lpPoly[WEBRTC_CNG_MAX_LPC_ORDER + 1];
Yves Gerey665174f2018-06-19 15:03:05 +0200116 int16_t ReflBetaStd = 26214; /* 0.8 in q15. */
117 int16_t ReflBetaCompStd = 6553; /* 0.2 in q15. */
118 int16_t ReflBetaNewP = 19661; /* 0.6 in q15. */
119 int16_t ReflBetaCompNewP = 13107; /* 0.4 in q15. */
120 int16_t Beta, BetaC; /* These are in Q15. */
ossu97ba30e2016-04-25 07:55:58 -0700121 int32_t targetEnergy;
122 int16_t En;
123 int16_t temp16;
124 const size_t num_samples = out_data.size();
125
126 if (num_samples > kCngMaxOutsizeOrder) {
127 return false;
128 }
129
130 if (new_period) {
131 dec_used_scale_factor_ = dec_target_scale_factor_;
132 Beta = ReflBetaNewP;
133 BetaC = ReflBetaCompNewP;
134 } else {
135 Beta = ReflBetaStd;
136 BetaC = ReflBetaCompStd;
137 }
138
ossu0eb19602016-11-22 05:15:21 -0800139 /* Calculate new scale factor in Q13 */
Yves Gerey665174f2018-06-19 15:03:05 +0200140 dec_used_scale_factor_ = rtc::checked_cast<int16_t>(
141 WEBRTC_SPL_MUL_16_16_RSFT(dec_used_scale_factor_, Beta >> 2, 13) +
142 WEBRTC_SPL_MUL_16_16_RSFT(dec_target_scale_factor_, BetaC >> 2, 13));
ossu97ba30e2016-04-25 07:55:58 -0700143
Yves Gerey665174f2018-06-19 15:03:05 +0200144 dec_used_energy_ = dec_used_energy_ >> 1;
ossu97ba30e2016-04-25 07:55:58 -0700145 dec_used_energy_ += dec_target_energy_ >> 1;
146
ossu0eb19602016-11-22 05:15:21 -0800147 /* Do the same for the reflection coeffs, albeit in Q15. */
ossu97ba30e2016-04-25 07:55:58 -0700148 for (size_t i = 0; i < WEBRTC_CNG_MAX_LPC_ORDER; i++) {
Yves Gerey665174f2018-06-19 15:03:05 +0200149 dec_used_reflCoefs_[i] =
150 (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(dec_used_reflCoefs_[i], Beta, 15);
151 dec_used_reflCoefs_[i] +=
152 (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(dec_target_reflCoefs_[i], BetaC, 15);
ossu97ba30e2016-04-25 07:55:58 -0700153 }
154
155 /* Compute the polynomial coefficients. */
156 WebRtcCng_K2a16(dec_used_reflCoefs_, WEBRTC_CNG_MAX_LPC_ORDER, lpPoly);
157
ossu97ba30e2016-04-25 07:55:58 -0700158 targetEnergy = dec_used_energy_;
159
160 /* Calculate scaling factor based on filter energy. */
Yves Gerey665174f2018-06-19 15:03:05 +0200161 En = 8192; /* 1.0 in Q13. */
ossu97ba30e2016-04-25 07:55:58 -0700162 for (size_t i = 0; i < (WEBRTC_CNG_MAX_LPC_ORDER); i++) {
163 /* Floating point value for reference.
164 E *= 1.0 - (dec_used_reflCoefs_[i] / 32768.0) *
165 (dec_used_reflCoefs_[i] / 32768.0);
166 */
167
168 /* Same in fixed point. */
169 /* K(i).^2 in Q15. */
Yves Gerey665174f2018-06-19 15:03:05 +0200170 temp16 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(dec_used_reflCoefs_[i],
171 dec_used_reflCoefs_[i], 15);
ossu97ba30e2016-04-25 07:55:58 -0700172 /* 1 - K(i).^2 in Q15. */
173 temp16 = 0x7fff - temp16;
Yves Gerey665174f2018-06-19 15:03:05 +0200174 En = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(En, temp16, 15);
ossu97ba30e2016-04-25 07:55:58 -0700175 }
176
177 /* float scaling= sqrt(E * dec_target_energy_ / (1 << 24)); */
178
179 /* Calculate sqrt(En * target_energy / excitation energy) */
180 targetEnergy = WebRtcSpl_Sqrt(dec_used_energy_);
181
Yves Gerey665174f2018-06-19 15:03:05 +0200182 En = (int16_t)WebRtcSpl_Sqrt(En) << 6;
183 En = (En * 3) >> 1; /* 1.5 estimates sqrt(2). */
ossu97ba30e2016-04-25 07:55:58 -0700184 dec_used_scale_factor_ = (int16_t)((En * targetEnergy) >> 12);
185
186 /* Generate excitation. */
187 /* Excitation energy per sample is 2.^24 - Q13 N(0,1). */
188 for (size_t i = 0; i < num_samples; i++) {
189 excitation[i] = WebRtcSpl_RandN(&dec_seed_) >> 1;
190 }
191
192 /* Scale to correct energy. */
193 WebRtcSpl_ScaleVector(excitation, excitation, dec_used_scale_factor_,
194 num_samples, 13);
195
196 /* |lpPoly| - Coefficients in Q12.
197 * |excitation| - Speech samples.
198 * |nst->dec_filtstate| - State preservation.
199 * |out_data| - Filtered speech samples. */
200 WebRtcSpl_FilterAR(lpPoly, WEBRTC_CNG_MAX_LPC_ORDER + 1, excitation,
201 num_samples, dec_filtstate_, WEBRTC_CNG_MAX_LPC_ORDER,
202 dec_filtstateLow_, WEBRTC_CNG_MAX_LPC_ORDER,
203 out_data.data(), low, num_samples);
204
205 return true;
206}
207
208ComfortNoiseEncoder::ComfortNoiseEncoder(int fs, int interval, int quality)
209 : enc_nrOfCoefs_(quality),
210 enc_sampfreq_(fs),
211 enc_interval_(interval),
212 enc_msSinceSid_(0),
213 enc_Energy_(0),
214 enc_reflCoefs_{0},
215 enc_corrVector_{0},
Yves Gerey665174f2018-06-19 15:03:05 +0200216 enc_seed_(7777) /* For debugging only. */ {
kwibergee89e782017-08-09 17:22:01 -0700217 RTC_CHECK_GT(quality, 0);
218 RTC_CHECK_LE(quality, WEBRTC_CNG_MAX_LPC_ORDER);
ossu97ba30e2016-04-25 07:55:58 -0700219 /* Needed to get the right function pointers in SPLIB. */
220 WebRtcSpl_Init();
221}
222
223void ComfortNoiseEncoder::Reset(int fs, int interval, int quality) {
kwibergee89e782017-08-09 17:22:01 -0700224 RTC_CHECK_GT(quality, 0);
225 RTC_CHECK_LE(quality, WEBRTC_CNG_MAX_LPC_ORDER);
ossu97ba30e2016-04-25 07:55:58 -0700226 enc_nrOfCoefs_ = quality;
227 enc_sampfreq_ = fs;
228 enc_interval_ = interval;
229 enc_msSinceSid_ = 0;
230 enc_Energy_ = 0;
231 for (auto& c : enc_reflCoefs_)
232 c = 0;
233 for (auto& c : enc_corrVector_)
234 c = 0;
Yves Gerey665174f2018-06-19 15:03:05 +0200235 enc_seed_ = 7777; /* For debugging only. */
ossu97ba30e2016-04-25 07:55:58 -0700236}
237
238size_t ComfortNoiseEncoder::Encode(rtc::ArrayView<const int16_t> speech,
239 bool force_sid,
240 rtc::Buffer* output) {
241 int16_t arCoefs[WEBRTC_CNG_MAX_LPC_ORDER + 1];
242 int32_t corrVector[WEBRTC_CNG_MAX_LPC_ORDER + 1];
243 int16_t refCs[WEBRTC_CNG_MAX_LPC_ORDER + 1];
244 int16_t hanningW[kCngMaxOutsizeOrder];
245 int16_t ReflBeta = 19661; /* 0.6 in q15. */
246 int16_t ReflBetaComp = 13107; /* 0.4 in q15. */
247 int32_t outEnergy;
248 int outShifts;
249 size_t i;
250 int stab;
251 int acorrScale;
252 size_t index;
253 size_t ind, factor;
254 int32_t* bptr;
255 int32_t blo, bhi;
256 int16_t negate;
257 const int16_t* aptr;
258 int16_t speechBuf[kCngMaxOutsizeOrder];
259
260 const size_t num_samples = speech.size();
kwiberg352444f2016-11-28 15:58:53 -0800261 RTC_CHECK_LE(num_samples, kCngMaxOutsizeOrder);
ossu97ba30e2016-04-25 07:55:58 -0700262
263 for (i = 0; i < num_samples; i++) {
264 speechBuf[i] = speech[i];
265 }
266
267 factor = num_samples;
268
269 /* Calculate energy and a coefficients. */
270 outEnergy = WebRtcSpl_Energy(speechBuf, num_samples, &outShifts);
271 while (outShifts > 0) {
272 /* We can only do 5 shifts without destroying accuracy in
273 * division factor. */
274 if (outShifts > 5) {
275 outEnergy <<= (outShifts - 5);
276 outShifts = 5;
277 } else {
278 factor /= 2;
279 outShifts--;
280 }
281 }
282 outEnergy = WebRtcSpl_DivW32W16(outEnergy, (int16_t)factor);
283
284 if (outEnergy > 1) {
285 /* Create Hanning Window. */
286 WebRtcSpl_GetHanningWindow(hanningW, num_samples / 2);
287 for (i = 0; i < (num_samples / 2); i++)
288 hanningW[num_samples - i - 1] = hanningW[i];
289
290 WebRtcSpl_ElementwiseVectorMult(speechBuf, hanningW, speechBuf, num_samples,
291 14);
292
293 WebRtcSpl_AutoCorrelation(speechBuf, num_samples, enc_nrOfCoefs_,
294 corrVector, &acorrScale);
295
296 if (*corrVector == 0)
297 *corrVector = WEBRTC_SPL_WORD16_MAX;
298
299 /* Adds the bandwidth expansion. */
300 aptr = WebRtcCng_kCorrWindow;
301 bptr = corrVector;
302
303 /* (zzz) lpc16_1 = 17+1+820+2+2 = 842 (ordo2=700). */
304 for (ind = 0; ind < enc_nrOfCoefs_; ind++) {
305 /* The below code multiplies the 16 b corrWindow values (Q15) with
306 * the 32 b corrvector (Q0) and shifts the result down 15 steps. */
307 negate = *bptr < 0;
308 if (negate)
309 *bptr = -*bptr;
310
Yves Gerey665174f2018-06-19 15:03:05 +0200311 blo = (int32_t)*aptr * (*bptr & 0xffff);
312 bhi = ((blo >> 16) & 0xffff) +
313 ((int32_t)(*aptr++) * ((*bptr >> 16) & 0xffff));
ossu97ba30e2016-04-25 07:55:58 -0700314 blo = (blo & 0xffff) | ((bhi & 0xffff) << 16);
315
Yves Gerey665174f2018-06-19 15:03:05 +0200316 *bptr = (((bhi >> 16) & 0x7fff) << 17) | ((uint32_t)blo >> 15);
ossu97ba30e2016-04-25 07:55:58 -0700317 if (negate)
318 *bptr = -*bptr;
319 bptr++;
320 }
321 /* End of bandwidth expansion. */
322
Yves Gerey665174f2018-06-19 15:03:05 +0200323 stab = WebRtcSpl_LevinsonDurbin(corrVector, arCoefs, refCs, enc_nrOfCoefs_);
ossu97ba30e2016-04-25 07:55:58 -0700324
325 if (!stab) {
326 /* Disregard from this frame */
327 return 0;
328 }
329
330 } else {
331 for (i = 0; i < enc_nrOfCoefs_; i++)
332 refCs[i] = 0;
333 }
334
335 if (force_sid) {
336 /* Read instantaneous values instead of averaged. */
337 for (i = 0; i < enc_nrOfCoefs_; i++)
338 enc_reflCoefs_[i] = refCs[i];
339 enc_Energy_ = outEnergy;
340 } else {
341 /* Average history with new values. */
342 for (i = 0; i < enc_nrOfCoefs_; i++) {
Yves Gerey665174f2018-06-19 15:03:05 +0200343 enc_reflCoefs_[i] =
344 (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(enc_reflCoefs_[i], ReflBeta, 15);
ossu97ba30e2016-04-25 07:55:58 -0700345 enc_reflCoefs_[i] +=
Yves Gerey665174f2018-06-19 15:03:05 +0200346 (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(refCs[i], ReflBetaComp, 15);
ossu97ba30e2016-04-25 07:55:58 -0700347 }
Yves Gerey665174f2018-06-19 15:03:05 +0200348 enc_Energy_ = (outEnergy >> 2) + (enc_Energy_ >> 1) + (enc_Energy_ >> 2);
ossu97ba30e2016-04-25 07:55:58 -0700349 }
350
351 if (enc_Energy_ < 1) {
352 enc_Energy_ = 1;
353 }
354
355 if ((enc_msSinceSid_ > (enc_interval_ - 1)) || force_sid) {
356 /* Search for best dbov value. */
357 index = 0;
358 for (i = 1; i < 93; i++) {
359 /* Always round downwards. */
360 if ((enc_Energy_ - WebRtcCng_kDbov[i]) > 0) {
361 index = i;
362 break;
363 }
364 }
365 if ((i == 93) && (index == 0))
366 index = 94;
367
368 const size_t output_coefs = enc_nrOfCoefs_ + 1;
Yves Gerey665174f2018-06-19 15:03:05 +0200369 output->AppendData(output_coefs, [&](rtc::ArrayView<uint8_t> output) {
370 output[0] = (uint8_t)index;
ossu97ba30e2016-04-25 07:55:58 -0700371
Yves Gerey665174f2018-06-19 15:03:05 +0200372 /* Quantize coefficients with tweak for WebRtc implementation of
373 * RFC3389. */
374 if (enc_nrOfCoefs_ == WEBRTC_CNG_MAX_LPC_ORDER) {
375 for (i = 0; i < enc_nrOfCoefs_; i++) {
376 /* Q15 to Q7 with rounding. */
377 output[i + 1] = ((enc_reflCoefs_[i] + 128) >> 8);
ossu97ba30e2016-04-25 07:55:58 -0700378 }
Yves Gerey665174f2018-06-19 15:03:05 +0200379 } else {
380 for (i = 0; i < enc_nrOfCoefs_; i++) {
381 /* Q15 to Q7 with rounding. */
382 output[i + 1] = (127 + ((enc_reflCoefs_[i] + 128) >> 8));
383 }
384 }
ossu97ba30e2016-04-25 07:55:58 -0700385
Yves Gerey665174f2018-06-19 15:03:05 +0200386 return output_coefs;
387 });
ossu97ba30e2016-04-25 07:55:58 -0700388
389 enc_msSinceSid_ =
390 static_cast<int16_t>((1000 * num_samples) / enc_sampfreq_);
391 return output_coefs;
392 } else {
393 enc_msSinceSid_ +=
394 static_cast<int16_t>((1000 * num_samples) / enc_sampfreq_);
395 return 0;
396 }
397}
398
399namespace {
400/* Values in |k| are Q15, and |a| Q12. */
401void WebRtcCng_K2a16(int16_t* k, int useOrder, int16_t* a) {
402 int16_t any[WEBRTC_SPL_MAX_LPC_ORDER + 1];
403 int16_t* aptr;
404 int16_t* aptr2;
405 int16_t* anyptr;
406 const int16_t* kptr;
407 int m, i;
408
409 kptr = k;
410 *a = 4096; /* i.e., (Word16_MAX >> 3) + 1 */
411 *any = *a;
412 a[1] = (*k + 4) >> 3;
413 for (m = 1; m < useOrder; m++) {
414 kptr++;
415 aptr = a;
416 aptr++;
417 aptr2 = &a[m];
418 anyptr = any;
419 anyptr++;
420
421 any[m + 1] = (*kptr + 4) >> 3;
422 for (i = 0; i < m; i++) {
423 *anyptr++ =
424 (*aptr++) +
425 (int16_t)((((int32_t)(*aptr2--) * (int32_t)*kptr) + 16384) >> 15);
426 }
427
428 aptr = a;
429 anyptr = any;
430 for (i = 0; i < (m + 2); i++) {
431 *aptr++ = *anyptr++;
432 }
433 }
434}
435
436} // namespace
437
438} // namespace webrtc