blob: b891d8423d28bb3febe6121d9d7dd5ea5bc8a031 [file] [log] [blame]
ossu97ba30e2016-04-25 07:55:58 -07001/*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include "webrtc/modules/audio_coding/codecs/cng/webrtc_cng.h"
12
13#include <algorithm>
14
15#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
Edward Lemurc20978e2017-07-06 19:44:34 +020016#include "webrtc/rtc_base/safe_conversions.h"
ossu97ba30e2016-04-25 07:55:58 -070017
18namespace webrtc {
19
20namespace {
21
22const size_t kCngMaxOutsizeOrder = 640;
23
24// TODO(ossu): Rename the left-over WebRtcCng according to style guide.
25void WebRtcCng_K2a16(int16_t* k, int useOrder, int16_t* a);
26
27const int32_t WebRtcCng_kDbov[94] = {
28 1081109975, 858756178, 682134279, 541838517, 430397633, 341876992,
29 271562548, 215709799, 171344384, 136103682, 108110997, 85875618,
30 68213428, 54183852, 43039763, 34187699, 27156255, 21570980,
31 17134438, 13610368, 10811100, 8587562, 6821343, 5418385,
32 4303976, 3418770, 2715625, 2157098, 1713444, 1361037,
33 1081110, 858756, 682134, 541839, 430398, 341877,
34 271563, 215710, 171344, 136104, 108111, 85876,
35 68213, 54184, 43040, 34188, 27156, 21571,
36 17134, 13610, 10811, 8588, 6821, 5418,
37 4304, 3419, 2716, 2157, 1713, 1361,
38 1081, 859, 682, 542, 430, 342,
39 272, 216, 171, 136, 108, 86,
40 68, 54, 43, 34, 27, 22,
41 17, 14, 11, 9, 7, 5,
42 4, 3, 3, 2, 2, 1,
43 1, 1, 1, 1
44};
45
46const int16_t WebRtcCng_kCorrWindow[WEBRTC_CNG_MAX_LPC_ORDER] = {
47 32702, 32636, 32570, 32505, 32439, 32374,
48 32309, 32244, 32179, 32114, 32049, 31985
49};
50
51} // namespace
52
53ComfortNoiseDecoder::ComfortNoiseDecoder() {
54 /* Needed to get the right function pointers in SPLIB. */
55 WebRtcSpl_Init();
56 Reset();
57}
58
59void ComfortNoiseDecoder::Reset() {
60 dec_seed_ = 7777; /* For debugging only. */
61 dec_target_energy_ = 0;
62 dec_used_energy_ = 0;
63 for (auto& c : dec_target_reflCoefs_)
64 c = 0;
65 for (auto& c : dec_used_reflCoefs_)
66 c = 0;
67 for (auto& c : dec_filtstate_)
68 c = 0;
69 for (auto& c : dec_filtstateLow_)
70 c = 0;
71 dec_order_ = 5;
72 dec_target_scale_factor_ = 0;
73 dec_used_scale_factor_ = 0;
74}
75
76void ComfortNoiseDecoder::UpdateSid(rtc::ArrayView<const uint8_t> sid) {
77 int16_t refCs[WEBRTC_CNG_MAX_LPC_ORDER];
78 int32_t targetEnergy;
79 size_t length = sid.size();
80 /* Throw away reflection coefficients of higher order than we can handle. */
81 if (length > (WEBRTC_CNG_MAX_LPC_ORDER + 1))
82 length = WEBRTC_CNG_MAX_LPC_ORDER + 1;
83
84 dec_order_ = static_cast<uint16_t>(length - 1);
85
86 uint8_t sid0 = std::min<uint8_t>(sid[0], 93);
87 targetEnergy = WebRtcCng_kDbov[sid0];
88 /* Take down target energy to 75%. */
89 targetEnergy = targetEnergy >> 1;
90 targetEnergy += targetEnergy >> 2;
91
92 dec_target_energy_ = targetEnergy;
93
94 /* Reconstruct coeffs with tweak for WebRtc implementation of RFC3389. */
95 if (dec_order_ == WEBRTC_CNG_MAX_LPC_ORDER) {
96 for (size_t i = 0; i < (dec_order_); i++) {
97 refCs[i] = sid[i + 1] << 8; /* Q7 to Q15*/
98 dec_target_reflCoefs_[i] = refCs[i];
99 }
100 } else {
101 for (size_t i = 0; i < (dec_order_); i++) {
102 refCs[i] = (sid[i + 1] - 127) << 8; /* Q7 to Q15. */
103 dec_target_reflCoefs_[i] = refCs[i];
104 }
105 }
106
107 for (size_t i = (dec_order_); i < WEBRTC_CNG_MAX_LPC_ORDER; i++) {
108 refCs[i] = 0;
109 dec_target_reflCoefs_[i] = refCs[i];
110 }
111}
112
113bool ComfortNoiseDecoder::Generate(rtc::ArrayView<int16_t> out_data,
114 bool new_period) {
115 int16_t excitation[kCngMaxOutsizeOrder];
116 int16_t low[kCngMaxOutsizeOrder];
117 int16_t lpPoly[WEBRTC_CNG_MAX_LPC_ORDER + 1];
118 int16_t ReflBetaStd = 26214; /* 0.8 in q15. */
119 int16_t ReflBetaCompStd = 6553; /* 0.2 in q15. */
120 int16_t ReflBetaNewP = 19661; /* 0.6 in q15. */
121 int16_t ReflBetaCompNewP = 13107; /* 0.4 in q15. */
ossu0eb19602016-11-22 05:15:21 -0800122 int16_t Beta, BetaC; /* These are in Q15. */
ossu97ba30e2016-04-25 07:55:58 -0700123 int32_t targetEnergy;
124 int16_t En;
125 int16_t temp16;
126 const size_t num_samples = out_data.size();
127
128 if (num_samples > kCngMaxOutsizeOrder) {
129 return false;
130 }
131
132 if (new_period) {
133 dec_used_scale_factor_ = dec_target_scale_factor_;
134 Beta = ReflBetaNewP;
135 BetaC = ReflBetaCompNewP;
136 } else {
137 Beta = ReflBetaStd;
138 BetaC = ReflBetaCompStd;
139 }
140
ossu0eb19602016-11-22 05:15:21 -0800141 /* Calculate new scale factor in Q13 */
142 dec_used_scale_factor_ =
143 rtc::checked_cast<int16_t>(
144 WEBRTC_SPL_MUL_16_16_RSFT(dec_used_scale_factor_, Beta >> 2, 13) +
145 WEBRTC_SPL_MUL_16_16_RSFT(dec_target_scale_factor_, BetaC >> 2, 13));
ossu97ba30e2016-04-25 07:55:58 -0700146
147 dec_used_energy_ = dec_used_energy_ >> 1;
148 dec_used_energy_ += dec_target_energy_ >> 1;
149
ossu0eb19602016-11-22 05:15:21 -0800150 /* Do the same for the reflection coeffs, albeit in Q15. */
ossu97ba30e2016-04-25 07:55:58 -0700151 for (size_t i = 0; i < WEBRTC_CNG_MAX_LPC_ORDER; i++) {
152 dec_used_reflCoefs_[i] = (int16_t) WEBRTC_SPL_MUL_16_16_RSFT(
153 dec_used_reflCoefs_[i], Beta, 15);
154 dec_used_reflCoefs_[i] += (int16_t) WEBRTC_SPL_MUL_16_16_RSFT(
155 dec_target_reflCoefs_[i], BetaC, 15);
156 }
157
158 /* Compute the polynomial coefficients. */
159 WebRtcCng_K2a16(dec_used_reflCoefs_, WEBRTC_CNG_MAX_LPC_ORDER, lpPoly);
160
161
162 targetEnergy = dec_used_energy_;
163
164 /* Calculate scaling factor based on filter energy. */
165 En = 8192; /* 1.0 in Q13. */
166 for (size_t i = 0; i < (WEBRTC_CNG_MAX_LPC_ORDER); i++) {
167 /* Floating point value for reference.
168 E *= 1.0 - (dec_used_reflCoefs_[i] / 32768.0) *
169 (dec_used_reflCoefs_[i] / 32768.0);
170 */
171
172 /* Same in fixed point. */
173 /* K(i).^2 in Q15. */
174 temp16 = (int16_t) WEBRTC_SPL_MUL_16_16_RSFT(
175 dec_used_reflCoefs_[i], dec_used_reflCoefs_[i], 15);
176 /* 1 - K(i).^2 in Q15. */
177 temp16 = 0x7fff - temp16;
178 En = (int16_t) WEBRTC_SPL_MUL_16_16_RSFT(En, temp16, 15);
179 }
180
181 /* float scaling= sqrt(E * dec_target_energy_ / (1 << 24)); */
182
183 /* Calculate sqrt(En * target_energy / excitation energy) */
184 targetEnergy = WebRtcSpl_Sqrt(dec_used_energy_);
185
186 En = (int16_t) WebRtcSpl_Sqrt(En) << 6;
187 En = (En * 3) >> 1; /* 1.5 estimates sqrt(2). */
188 dec_used_scale_factor_ = (int16_t)((En * targetEnergy) >> 12);
189
190 /* Generate excitation. */
191 /* Excitation energy per sample is 2.^24 - Q13 N(0,1). */
192 for (size_t i = 0; i < num_samples; i++) {
193 excitation[i] = WebRtcSpl_RandN(&dec_seed_) >> 1;
194 }
195
196 /* Scale to correct energy. */
197 WebRtcSpl_ScaleVector(excitation, excitation, dec_used_scale_factor_,
198 num_samples, 13);
199
200 /* |lpPoly| - Coefficients in Q12.
201 * |excitation| - Speech samples.
202 * |nst->dec_filtstate| - State preservation.
203 * |out_data| - Filtered speech samples. */
204 WebRtcSpl_FilterAR(lpPoly, WEBRTC_CNG_MAX_LPC_ORDER + 1, excitation,
205 num_samples, dec_filtstate_, WEBRTC_CNG_MAX_LPC_ORDER,
206 dec_filtstateLow_, WEBRTC_CNG_MAX_LPC_ORDER,
207 out_data.data(), low, num_samples);
208
209 return true;
210}
211
212ComfortNoiseEncoder::ComfortNoiseEncoder(int fs, int interval, int quality)
213 : enc_nrOfCoefs_(quality),
214 enc_sampfreq_(fs),
215 enc_interval_(interval),
216 enc_msSinceSid_(0),
217 enc_Energy_(0),
218 enc_reflCoefs_{0},
219 enc_corrVector_{0},
220 enc_seed_(7777) /* For debugging only. */ {
kwibergee89e782017-08-09 17:22:01 -0700221 RTC_CHECK_GT(quality, 0);
222 RTC_CHECK_LE(quality, WEBRTC_CNG_MAX_LPC_ORDER);
ossu97ba30e2016-04-25 07:55:58 -0700223 /* Needed to get the right function pointers in SPLIB. */
224 WebRtcSpl_Init();
225}
226
227void ComfortNoiseEncoder::Reset(int fs, int interval, int quality) {
kwibergee89e782017-08-09 17:22:01 -0700228 RTC_CHECK_GT(quality, 0);
229 RTC_CHECK_LE(quality, WEBRTC_CNG_MAX_LPC_ORDER);
ossu97ba30e2016-04-25 07:55:58 -0700230 enc_nrOfCoefs_ = quality;
231 enc_sampfreq_ = fs;
232 enc_interval_ = interval;
233 enc_msSinceSid_ = 0;
234 enc_Energy_ = 0;
235 for (auto& c : enc_reflCoefs_)
236 c = 0;
237 for (auto& c : enc_corrVector_)
238 c = 0;
239 enc_seed_ = 7777; /* For debugging only. */
240}
241
242size_t ComfortNoiseEncoder::Encode(rtc::ArrayView<const int16_t> speech,
243 bool force_sid,
244 rtc::Buffer* output) {
245 int16_t arCoefs[WEBRTC_CNG_MAX_LPC_ORDER + 1];
246 int32_t corrVector[WEBRTC_CNG_MAX_LPC_ORDER + 1];
247 int16_t refCs[WEBRTC_CNG_MAX_LPC_ORDER + 1];
248 int16_t hanningW[kCngMaxOutsizeOrder];
249 int16_t ReflBeta = 19661; /* 0.6 in q15. */
250 int16_t ReflBetaComp = 13107; /* 0.4 in q15. */
251 int32_t outEnergy;
252 int outShifts;
253 size_t i;
254 int stab;
255 int acorrScale;
256 size_t index;
257 size_t ind, factor;
258 int32_t* bptr;
259 int32_t blo, bhi;
260 int16_t negate;
261 const int16_t* aptr;
262 int16_t speechBuf[kCngMaxOutsizeOrder];
263
264 const size_t num_samples = speech.size();
kwiberg352444f2016-11-28 15:58:53 -0800265 RTC_CHECK_LE(num_samples, kCngMaxOutsizeOrder);
ossu97ba30e2016-04-25 07:55:58 -0700266
267 for (i = 0; i < num_samples; i++) {
268 speechBuf[i] = speech[i];
269 }
270
271 factor = num_samples;
272
273 /* Calculate energy and a coefficients. */
274 outEnergy = WebRtcSpl_Energy(speechBuf, num_samples, &outShifts);
275 while (outShifts > 0) {
276 /* We can only do 5 shifts without destroying accuracy in
277 * division factor. */
278 if (outShifts > 5) {
279 outEnergy <<= (outShifts - 5);
280 outShifts = 5;
281 } else {
282 factor /= 2;
283 outShifts--;
284 }
285 }
286 outEnergy = WebRtcSpl_DivW32W16(outEnergy, (int16_t)factor);
287
288 if (outEnergy > 1) {
289 /* Create Hanning Window. */
290 WebRtcSpl_GetHanningWindow(hanningW, num_samples / 2);
291 for (i = 0; i < (num_samples / 2); i++)
292 hanningW[num_samples - i - 1] = hanningW[i];
293
294 WebRtcSpl_ElementwiseVectorMult(speechBuf, hanningW, speechBuf, num_samples,
295 14);
296
297 WebRtcSpl_AutoCorrelation(speechBuf, num_samples, enc_nrOfCoefs_,
298 corrVector, &acorrScale);
299
300 if (*corrVector == 0)
301 *corrVector = WEBRTC_SPL_WORD16_MAX;
302
303 /* Adds the bandwidth expansion. */
304 aptr = WebRtcCng_kCorrWindow;
305 bptr = corrVector;
306
307 /* (zzz) lpc16_1 = 17+1+820+2+2 = 842 (ordo2=700). */
308 for (ind = 0; ind < enc_nrOfCoefs_; ind++) {
309 /* The below code multiplies the 16 b corrWindow values (Q15) with
310 * the 32 b corrvector (Q0) and shifts the result down 15 steps. */
311 negate = *bptr < 0;
312 if (negate)
313 *bptr = -*bptr;
314
315 blo = (int32_t) * aptr * (*bptr & 0xffff);
316 bhi = ((blo >> 16) & 0xffff)
317 + ((int32_t)(*aptr++) * ((*bptr >> 16) & 0xffff));
318 blo = (blo & 0xffff) | ((bhi & 0xffff) << 16);
319
320 *bptr = (((bhi >> 16) & 0x7fff) << 17) | ((uint32_t) blo >> 15);
321 if (negate)
322 *bptr = -*bptr;
323 bptr++;
324 }
325 /* End of bandwidth expansion. */
326
327 stab = WebRtcSpl_LevinsonDurbin(corrVector, arCoefs, refCs,
328 enc_nrOfCoefs_);
329
330 if (!stab) {
331 /* Disregard from this frame */
332 return 0;
333 }
334
335 } else {
336 for (i = 0; i < enc_nrOfCoefs_; i++)
337 refCs[i] = 0;
338 }
339
340 if (force_sid) {
341 /* Read instantaneous values instead of averaged. */
342 for (i = 0; i < enc_nrOfCoefs_; i++)
343 enc_reflCoefs_[i] = refCs[i];
344 enc_Energy_ = outEnergy;
345 } else {
346 /* Average history with new values. */
347 for (i = 0; i < enc_nrOfCoefs_; i++) {
348 enc_reflCoefs_[i] = (int16_t) WEBRTC_SPL_MUL_16_16_RSFT(
349 enc_reflCoefs_[i], ReflBeta, 15);
350 enc_reflCoefs_[i] +=
351 (int16_t) WEBRTC_SPL_MUL_16_16_RSFT(refCs[i], ReflBetaComp, 15);
352 }
353 enc_Energy_ =
354 (outEnergy >> 2) + (enc_Energy_ >> 1) + (enc_Energy_ >> 2);
355 }
356
357 if (enc_Energy_ < 1) {
358 enc_Energy_ = 1;
359 }
360
361 if ((enc_msSinceSid_ > (enc_interval_ - 1)) || force_sid) {
362 /* Search for best dbov value. */
363 index = 0;
364 for (i = 1; i < 93; i++) {
365 /* Always round downwards. */
366 if ((enc_Energy_ - WebRtcCng_kDbov[i]) > 0) {
367 index = i;
368 break;
369 }
370 }
371 if ((i == 93) && (index == 0))
372 index = 94;
373
374 const size_t output_coefs = enc_nrOfCoefs_ + 1;
375 output->AppendData(output_coefs, [&] (rtc::ArrayView<uint8_t> output) {
376 output[0] = (uint8_t)index;
377
378 /* Quantize coefficients with tweak for WebRtc implementation of
379 * RFC3389. */
380 if (enc_nrOfCoefs_ == WEBRTC_CNG_MAX_LPC_ORDER) {
381 for (i = 0; i < enc_nrOfCoefs_; i++) {
382 /* Q15 to Q7 with rounding. */
383 output[i + 1] = ((enc_reflCoefs_[i] + 128) >> 8);
384 }
385 } else {
386 for (i = 0; i < enc_nrOfCoefs_; i++) {
387 /* Q15 to Q7 with rounding. */
388 output[i + 1] = (127 + ((enc_reflCoefs_[i] + 128) >> 8));
389 }
390 }
391
392 return output_coefs;
393 });
394
395 enc_msSinceSid_ =
396 static_cast<int16_t>((1000 * num_samples) / enc_sampfreq_);
397 return output_coefs;
398 } else {
399 enc_msSinceSid_ +=
400 static_cast<int16_t>((1000 * num_samples) / enc_sampfreq_);
401 return 0;
402 }
403}
404
405namespace {
406/* Values in |k| are Q15, and |a| Q12. */
407void WebRtcCng_K2a16(int16_t* k, int useOrder, int16_t* a) {
408 int16_t any[WEBRTC_SPL_MAX_LPC_ORDER + 1];
409 int16_t* aptr;
410 int16_t* aptr2;
411 int16_t* anyptr;
412 const int16_t* kptr;
413 int m, i;
414
415 kptr = k;
416 *a = 4096; /* i.e., (Word16_MAX >> 3) + 1 */
417 *any = *a;
418 a[1] = (*k + 4) >> 3;
419 for (m = 1; m < useOrder; m++) {
420 kptr++;
421 aptr = a;
422 aptr++;
423 aptr2 = &a[m];
424 anyptr = any;
425 anyptr++;
426
427 any[m + 1] = (*kptr + 4) >> 3;
428 for (i = 0; i < m; i++) {
429 *anyptr++ =
430 (*aptr++) +
431 (int16_t)((((int32_t)(*aptr2--) * (int32_t)*kptr) + 16384) >> 15);
432 }
433
434 aptr = a;
435 anyptr = any;
436 for (i = 0; i < (m + 2); i++) {
437 *aptr++ = *anyptr++;
438 }
439 }
440}
441
442} // namespace
443
444} // namespace webrtc