henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. |
| 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license |
| 5 | * that can be found in the LICENSE file in the root of the source |
| 6 | * tree. An additional intellectual property rights grant can be found |
| 7 | * in the file PATENTS. All contributing project authors may |
| 8 | * be found in the AUTHORS file in the root of the source tree. |
| 9 | */ |
| 10 | |
Mirko Bonadei | 92ea95e | 2017-09-15 06:47:31 +0200 | [diff] [blame] | 11 | #ifndef MODULES_AUDIO_CODING_NETEQ_MERGE_H_ |
| 12 | #define MODULES_AUDIO_CODING_NETEQ_MERGE_H_ |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 13 | |
| 14 | #include <assert.h> |
| 15 | |
Mirko Bonadei | 92ea95e | 2017-09-15 06:47:31 +0200 | [diff] [blame] | 16 | #include "modules/audio_coding/neteq/audio_multi_vector.h" |
| 17 | #include "rtc_base/constructormagic.h" |
Mirko Bonadei | 7120742 | 2017-09-15 13:58:09 +0200 | [diff] [blame] | 18 | #include "typedefs.h" // NOLINT(build/include) |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 19 | |
| 20 | namespace webrtc { |
| 21 | |
| 22 | // Forward declarations. |
| 23 | class Expand; |
| 24 | class SyncBuffer; |
| 25 | |
| 26 | // This class handles the transition from expansion to normal operation. |
| 27 | // When a packet is not available for decoding when needed, the expand operation |
| 28 | // is called to generate extrapolation data. If the missing packet arrives, |
| 29 | // i.e., it was just delayed, it can be decoded and appended directly to the |
| 30 | // end of the expanded data (thanks to how the Expand class operates). However, |
| 31 | // if a later packet arrives instead, the loss is a fact, and the new data must |
| 32 | // be stitched together with the end of the expanded data. This stitching is |
| 33 | // what the Merge class does. |
| 34 | class Merge { |
| 35 | public: |
Karl Wiberg | 7f6c4d4 | 2015-04-09 15:44:22 +0200 | [diff] [blame] | 36 | Merge(int fs_hz, |
| 37 | size_t num_channels, |
| 38 | Expand* expand, |
| 39 | SyncBuffer* sync_buffer); |
minyue | 5bd3397 | 2016-05-02 04:46:11 -0700 | [diff] [blame] | 40 | virtual ~Merge(); |
turaj@webrtc.org | 8d1cdaa | 2014-04-11 18:47:55 +0000 | [diff] [blame] | 41 | |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 42 | // The main method to produce the audio data. The decoded data is supplied in |
| 43 | // |input|, having |input_length| samples in total for all channels |
| 44 | // (interleaved). The result is written to |output|. The number of channels |
| 45 | // allocated in |output| defines the number of channels that will be used when |
Henrik Lundin | 6dc82e8 | 2018-05-22 10:40:23 +0200 | [diff] [blame] | 46 | // de-interleaving |input|. |
Yves Gerey | 665174f | 2018-06-19 15:03:05 +0200 | [diff] [blame^] | 47 | virtual size_t Process(int16_t* input, |
| 48 | size_t input_length, |
Peter Kasting | dce40cf | 2015-08-24 14:52:23 -0700 | [diff] [blame] | 49 | AudioMultiVector* output); |
turaj@webrtc.org | 8d1cdaa | 2014-04-11 18:47:55 +0000 | [diff] [blame] | 50 | |
Peter Kasting | dce40cf | 2015-08-24 14:52:23 -0700 | [diff] [blame] | 51 | virtual size_t RequiredFutureSamples(); |
turaj@webrtc.org | 8d1cdaa | 2014-04-11 18:47:55 +0000 | [diff] [blame] | 52 | |
| 53 | protected: |
| 54 | const int fs_hz_; |
| 55 | const size_t num_channels_; |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 56 | |
| 57 | private: |
| 58 | static const int kMaxSampleRate = 48000; |
Peter Kasting | dce40cf | 2015-08-24 14:52:23 -0700 | [diff] [blame] | 59 | static const size_t kExpandDownsampLength = 100; |
| 60 | static const size_t kInputDownsampLength = 40; |
| 61 | static const size_t kMaxCorrelationLength = 60; |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 62 | |
| 63 | // Calls |expand_| to get more expansion data to merge with. The data is |
| 64 | // written to |expanded_signal_|. Returns the length of the expanded data, |
| 65 | // while |expand_period| will be the number of samples in one expansion period |
| 66 | // (typically one pitch period). The value of |old_length| will be the number |
| 67 | // of samples that were taken from the |sync_buffer_|. |
Peter Kasting | dce40cf | 2015-08-24 14:52:23 -0700 | [diff] [blame] | 68 | size_t GetExpandedSignal(size_t* old_length, size_t* expand_period); |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 69 | |
minyue | 53ff70f | 2016-05-02 01:50:30 -0700 | [diff] [blame] | 70 | // Analyzes |input| and |expanded_signal| and returns muting factor (Q14) to |
| 71 | // be used on the new data. |
Yves Gerey | 665174f | 2018-06-19 15:03:05 +0200 | [diff] [blame^] | 72 | int16_t SignalScaling(const int16_t* input, |
| 73 | size_t input_length, |
minyue | 53ff70f | 2016-05-02 01:50:30 -0700 | [diff] [blame] | 74 | const int16_t* expanded_signal) const; |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 75 | |
| 76 | // Downsamples |input| (|input_length| samples) and |expanded_signal| to |
| 77 | // 4 kHz sample rate. The downsampled signals are written to |
| 78 | // |input_downsampled_| and |expanded_downsampled_|, respectively. |
Yves Gerey | 665174f | 2018-06-19 15:03:05 +0200 | [diff] [blame^] | 79 | void Downsample(const int16_t* input, |
| 80 | size_t input_length, |
| 81 | const int16_t* expanded_signal, |
| 82 | size_t expanded_length); |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 83 | |
| 84 | // Calculates cross-correlation between |input_downsampled_| and |
| 85 | // |expanded_downsampled_|, and finds the correlation maximum. The maximizing |
| 86 | // lag is returned. |
Yves Gerey | 665174f | 2018-06-19 15:03:05 +0200 | [diff] [blame^] | 87 | size_t CorrelateAndPeakSearch(size_t start_position, |
| 88 | size_t input_length, |
Peter Kasting | dce40cf | 2015-08-24 14:52:23 -0700 | [diff] [blame] | 89 | size_t expand_period) const; |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 90 | |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 91 | const int fs_mult_; // fs_hz_ / 8000. |
Peter Kasting | dce40cf | 2015-08-24 14:52:23 -0700 | [diff] [blame] | 92 | const size_t timestamps_per_call_; |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 93 | Expand* expand_; |
| 94 | SyncBuffer* sync_buffer_; |
| 95 | int16_t expanded_downsampled_[kExpandDownsampLength]; |
| 96 | int16_t input_downsampled_[kInputDownsampLength]; |
henrik.lundin@webrtc.org | fd11bbf | 2013-09-30 20:38:44 +0000 | [diff] [blame] | 97 | AudioMultiVector expanded_; |
minyue | 5bd3397 | 2016-05-02 04:46:11 -0700 | [diff] [blame] | 98 | std::vector<int16_t> temp_data_; |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 99 | |
henrikg | 3c089d7 | 2015-09-16 05:37:44 -0700 | [diff] [blame] | 100 | RTC_DISALLOW_COPY_AND_ASSIGN(Merge); |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 101 | }; |
| 102 | |
| 103 | } // namespace webrtc |
Mirko Bonadei | 92ea95e | 2017-09-15 06:47:31 +0200 | [diff] [blame] | 104 | #endif // MODULES_AUDIO_CODING_NETEQ_MERGE_H_ |