blob: f0ddaebeca6a54642d8f6d6c1dfb59ea29b077ba [file] [log] [blame]
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +00001/*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020011#ifndef MODULES_AUDIO_CODING_NETEQ_TIME_STRETCH_H_
12#define MODULES_AUDIO_CODING_NETEQ_TIME_STRETCH_H_
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000013
pbos@webrtc.org12dc1a32013-08-05 16:22:53 +000014#include <string.h> // memset, size_t
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000015
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020016#include "modules/audio_coding/neteq/audio_multi_vector.h"
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000017
18namespace webrtc {
19
20// Forward declarations.
21class BackgroundNoise;
22
23// This is the base class for Accelerate and PreemptiveExpand. This class
24// cannot be instantiated, but must be used through either of the derived
25// classes.
26class TimeStretch {
27 public:
28 enum ReturnCodes {
29 kSuccess = 0,
30 kSuccessLowEnergy = 1,
31 kNoStretch = 2,
32 kError = -1
33 };
34
Yves Gerey665174f2018-06-19 15:03:05 +020035 TimeStretch(int sample_rate_hz,
36 size_t num_channels,
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000037 const BackgroundNoise& background_noise)
38 : sample_rate_hz_(sample_rate_hz),
39 fs_mult_(sample_rate_hz / 8000),
Peter Kastingdce40cf2015-08-24 14:52:23 -070040 num_channels_(num_channels),
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000041 background_noise_(background_noise),
42 max_input_value_(0) {
Mirko Bonadei25ab3222021-07-08 20:08:20 +020043 RTC_DCHECK(sample_rate_hz_ == 8000 || sample_rate_hz_ == 16000 ||
44 sample_rate_hz_ == 32000 || sample_rate_hz_ == 48000);
45 RTC_DCHECK_GT(num_channels_, 0);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000046 memset(auto_correlation_, 0, sizeof(auto_correlation_));
47 }
48
49 virtual ~TimeStretch() {}
50
Byoungchan Lee604fd2f2022-01-21 09:49:39 +090051 TimeStretch(const TimeStretch&) = delete;
52 TimeStretch& operator=(const TimeStretch&) = delete;
53
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000054 // This method performs the processing common to both Accelerate and
55 // PreemptiveExpand.
56 ReturnCodes Process(const int16_t* input,
57 size_t input_len,
Henrik Lundincf808d22015-05-27 14:33:29 +020058 bool fast_mode,
henrik.lundin@webrtc.orgfd11bbf2013-09-30 20:38:44 +000059 AudioMultiVector* output,
Peter Kastingdce40cf2015-08-24 14:52:23 -070060 size_t* length_change_samples);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000061
62 protected:
Artem Titovd00ce742021-07-28 20:00:17 +020063 // Sets the parameters `best_correlation` and `peak_index` to suitable
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000064 // values when the signal contains no active speech. This method must be
65 // implemented by the sub-classes.
turaj@webrtc.org362a55e2013-09-20 16:25:28 +000066 virtual void SetParametersForPassiveSpeech(size_t input_length,
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000067 int16_t* best_correlation,
Peter Kastingdce40cf2015-08-24 14:52:23 -070068 size_t* peak_index) const = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000069
70 // Checks the criteria for performing the time-stretching operation and,
71 // if possible, performs the time-stretching. This method must be implemented
72 // by the sub-classes.
73 virtual ReturnCodes CheckCriteriaAndStretch(
Henrik Lundincf808d22015-05-27 14:33:29 +020074 const int16_t* input,
75 size_t input_length,
76 size_t peak_index,
77 int16_t best_correlation,
78 bool active_speech,
79 bool fast_mode,
henrik.lundin@webrtc.orgfd11bbf2013-09-30 20:38:44 +000080 AudioMultiVector* output) const = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000081
Peter Kastingdce40cf2015-08-24 14:52:23 -070082 static const size_t kCorrelationLen = 50;
83 static const size_t kLogCorrelationLen = 6; // >= log2(kCorrelationLen).
84 static const size_t kMinLag = 10;
85 static const size_t kMaxLag = 60;
86 static const size_t kDownsampledLen = kCorrelationLen + kMaxLag;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000087 static const int kCorrelationThreshold = 14746; // 0.9 in Q14.
Henrik Lundin11b6f682020-06-29 12:17:42 +020088 static constexpr size_t kRefChannel = 0; // First channel is reference.
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000089
90 const int sample_rate_hz_;
91 const int fs_mult_; // Sample rate multiplier = sample_rate_hz_ / 8000.
Peter Kastingdce40cf2015-08-24 14:52:23 -070092 const size_t num_channels_;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000093 const BackgroundNoise& background_noise_;
94 int16_t max_input_value_;
95 int16_t downsampled_input_[kDownsampledLen];
Artem Titovd00ce742021-07-28 20:00:17 +020096 // Adding 1 to the size of `auto_correlation_` because of how it is used
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000097 // by the peak-detection algorithm.
98 int16_t auto_correlation_[kCorrelationLen + 1];
99
100 private:
Artem Titovd00ce742021-07-28 20:00:17 +0200101 // Calculates the auto-correlation of `downsampled_input_` and writes the
102 // result to `auto_correlation_`.
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000103 void AutoCorrelation();
104
105 // Performs a simple voice-activity detection based on the input parameters.
Yves Gerey665174f2018-06-19 15:03:05 +0200106 bool SpeechDetection(int32_t vec1_energy,
107 int32_t vec2_energy,
108 size_t peak_index,
109 int scaling) const;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000110};
111
112} // namespace webrtc
Mirko Bonadei92ea95e2017-09-15 06:47:31 +0200113#endif // MODULES_AUDIO_CODING_NETEQ_TIME_STRETCH_H_