blob: 998d080714320dfb81d3cab78cf4781d28ab3110 [file] [log] [blame]
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +00001/*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020011#ifndef MODULES_AUDIO_CODING_NETEQ_TIME_STRETCH_H_
12#define MODULES_AUDIO_CODING_NETEQ_TIME_STRETCH_H_
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000013
pbos@webrtc.org12dc1a32013-08-05 16:22:53 +000014#include <string.h> // memset, size_t
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000015
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020016#include "modules/audio_coding/neteq/audio_multi_vector.h"
Steve Anton10542f22019-01-11 09:11:00 -080017#include "rtc_base/constructor_magic.h"
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000018
19namespace webrtc {
20
21// Forward declarations.
22class BackgroundNoise;
23
24// This is the base class for Accelerate and PreemptiveExpand. This class
25// cannot be instantiated, but must be used through either of the derived
26// classes.
27class TimeStretch {
28 public:
29 enum ReturnCodes {
30 kSuccess = 0,
31 kSuccessLowEnergy = 1,
32 kNoStretch = 2,
33 kError = -1
34 };
35
Yves Gerey665174f2018-06-19 15:03:05 +020036 TimeStretch(int sample_rate_hz,
37 size_t num_channels,
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000038 const BackgroundNoise& background_noise)
39 : sample_rate_hz_(sample_rate_hz),
40 fs_mult_(sample_rate_hz / 8000),
Peter Kastingdce40cf2015-08-24 14:52:23 -070041 num_channels_(num_channels),
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000042 background_noise_(background_noise),
43 max_input_value_(0) {
Mirko Bonadei25ab3222021-07-08 20:08:20 +020044 RTC_DCHECK(sample_rate_hz_ == 8000 || sample_rate_hz_ == 16000 ||
45 sample_rate_hz_ == 32000 || sample_rate_hz_ == 48000);
46 RTC_DCHECK_GT(num_channels_, 0);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000047 memset(auto_correlation_, 0, sizeof(auto_correlation_));
48 }
49
50 virtual ~TimeStretch() {}
51
52 // This method performs the processing common to both Accelerate and
53 // PreemptiveExpand.
54 ReturnCodes Process(const int16_t* input,
55 size_t input_len,
Henrik Lundincf808d22015-05-27 14:33:29 +020056 bool fast_mode,
henrik.lundin@webrtc.orgfd11bbf2013-09-30 20:38:44 +000057 AudioMultiVector* output,
Peter Kastingdce40cf2015-08-24 14:52:23 -070058 size_t* length_change_samples);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000059
60 protected:
Artem Titovd00ce742021-07-28 20:00:17 +020061 // Sets the parameters `best_correlation` and `peak_index` to suitable
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000062 // values when the signal contains no active speech. This method must be
63 // implemented by the sub-classes.
turaj@webrtc.org362a55e2013-09-20 16:25:28 +000064 virtual void SetParametersForPassiveSpeech(size_t input_length,
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000065 int16_t* best_correlation,
Peter Kastingdce40cf2015-08-24 14:52:23 -070066 size_t* peak_index) const = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000067
68 // Checks the criteria for performing the time-stretching operation and,
69 // if possible, performs the time-stretching. This method must be implemented
70 // by the sub-classes.
71 virtual ReturnCodes CheckCriteriaAndStretch(
Henrik Lundincf808d22015-05-27 14:33:29 +020072 const int16_t* input,
73 size_t input_length,
74 size_t peak_index,
75 int16_t best_correlation,
76 bool active_speech,
77 bool fast_mode,
henrik.lundin@webrtc.orgfd11bbf2013-09-30 20:38:44 +000078 AudioMultiVector* output) const = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000079
Peter Kastingdce40cf2015-08-24 14:52:23 -070080 static const size_t kCorrelationLen = 50;
81 static const size_t kLogCorrelationLen = 6; // >= log2(kCorrelationLen).
82 static const size_t kMinLag = 10;
83 static const size_t kMaxLag = 60;
84 static const size_t kDownsampledLen = kCorrelationLen + kMaxLag;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000085 static const int kCorrelationThreshold = 14746; // 0.9 in Q14.
Henrik Lundin11b6f682020-06-29 12:17:42 +020086 static constexpr size_t kRefChannel = 0; // First channel is reference.
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000087
88 const int sample_rate_hz_;
89 const int fs_mult_; // Sample rate multiplier = sample_rate_hz_ / 8000.
Peter Kastingdce40cf2015-08-24 14:52:23 -070090 const size_t num_channels_;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000091 const BackgroundNoise& background_noise_;
92 int16_t max_input_value_;
93 int16_t downsampled_input_[kDownsampledLen];
Artem Titovd00ce742021-07-28 20:00:17 +020094 // Adding 1 to the size of `auto_correlation_` because of how it is used
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000095 // by the peak-detection algorithm.
96 int16_t auto_correlation_[kCorrelationLen + 1];
97
98 private:
Artem Titovd00ce742021-07-28 20:00:17 +020099 // Calculates the auto-correlation of `downsampled_input_` and writes the
100 // result to `auto_correlation_`.
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000101 void AutoCorrelation();
102
103 // Performs a simple voice-activity detection based on the input parameters.
Yves Gerey665174f2018-06-19 15:03:05 +0200104 bool SpeechDetection(int32_t vec1_energy,
105 int32_t vec2_energy,
106 size_t peak_index,
107 int scaling) const;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000108
henrikg3c089d72015-09-16 05:37:44 -0700109 RTC_DISALLOW_COPY_AND_ASSIGN(TimeStretch);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000110};
111
112} // namespace webrtc
Mirko Bonadei92ea95e2017-09-15 06:47:31 +0200113#endif // MODULES_AUDIO_CODING_NETEQ_TIME_STRETCH_H_