henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. |
| 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license |
| 5 | * that can be found in the LICENSE file in the root of the source |
| 6 | * tree. An additional intellectual property rights grant can be found |
| 7 | * in the file PATENTS. All contributing project authors may |
| 8 | * be found in the AUTHORS file in the root of the source tree. |
| 9 | */ |
| 10 | |
Mirko Bonadei | 92ea95e | 2017-09-15 06:47:31 +0200 | [diff] [blame] | 11 | #ifndef MODULES_AUDIO_CODING_NETEQ_MERGE_H_ |
| 12 | #define MODULES_AUDIO_CODING_NETEQ_MERGE_H_ |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 13 | |
Mirko Bonadei | 92ea95e | 2017-09-15 06:47:31 +0200 | [diff] [blame] | 14 | #include "modules/audio_coding/neteq/audio_multi_vector.h" |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 15 | |
| 16 | namespace webrtc { |
| 17 | |
| 18 | // Forward declarations. |
| 19 | class Expand; |
| 20 | class SyncBuffer; |
| 21 | |
| 22 | // This class handles the transition from expansion to normal operation. |
| 23 | // When a packet is not available for decoding when needed, the expand operation |
| 24 | // is called to generate extrapolation data. If the missing packet arrives, |
| 25 | // i.e., it was just delayed, it can be decoded and appended directly to the |
| 26 | // end of the expanded data (thanks to how the Expand class operates). However, |
| 27 | // if a later packet arrives instead, the loss is a fact, and the new data must |
| 28 | // be stitched together with the end of the expanded data. This stitching is |
| 29 | // what the Merge class does. |
| 30 | class Merge { |
| 31 | public: |
Karl Wiberg | 7f6c4d4 | 2015-04-09 15:44:22 +0200 | [diff] [blame] | 32 | Merge(int fs_hz, |
| 33 | size_t num_channels, |
| 34 | Expand* expand, |
| 35 | SyncBuffer* sync_buffer); |
minyue | 5bd3397 | 2016-05-02 04:46:11 -0700 | [diff] [blame] | 36 | virtual ~Merge(); |
turaj@webrtc.org | 8d1cdaa | 2014-04-11 18:47:55 +0000 | [diff] [blame] | 37 | |
Byoungchan Lee | 604fd2f | 2022-01-21 09:49:39 +0900 | [diff] [blame^] | 38 | Merge(const Merge&) = delete; |
| 39 | Merge& operator=(const Merge&) = delete; |
| 40 | |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 41 | // The main method to produce the audio data. The decoded data is supplied in |
Artem Titov | d00ce74 | 2021-07-28 20:00:17 +0200 | [diff] [blame] | 42 | // `input`, having `input_length` samples in total for all channels |
| 43 | // (interleaved). The result is written to `output`. The number of channels |
| 44 | // allocated in `output` defines the number of channels that will be used when |
| 45 | // de-interleaving `input`. |
Yves Gerey | 665174f | 2018-06-19 15:03:05 +0200 | [diff] [blame] | 46 | virtual size_t Process(int16_t* input, |
| 47 | size_t input_length, |
Peter Kasting | dce40cf | 2015-08-24 14:52:23 -0700 | [diff] [blame] | 48 | AudioMultiVector* output); |
turaj@webrtc.org | 8d1cdaa | 2014-04-11 18:47:55 +0000 | [diff] [blame] | 49 | |
Peter Kasting | dce40cf | 2015-08-24 14:52:23 -0700 | [diff] [blame] | 50 | virtual size_t RequiredFutureSamples(); |
turaj@webrtc.org | 8d1cdaa | 2014-04-11 18:47:55 +0000 | [diff] [blame] | 51 | |
| 52 | protected: |
| 53 | const int fs_hz_; |
| 54 | const size_t num_channels_; |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 55 | |
| 56 | private: |
| 57 | static const int kMaxSampleRate = 48000; |
Peter Kasting | dce40cf | 2015-08-24 14:52:23 -0700 | [diff] [blame] | 58 | static const size_t kExpandDownsampLength = 100; |
| 59 | static const size_t kInputDownsampLength = 40; |
| 60 | static const size_t kMaxCorrelationLength = 60; |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 61 | |
Artem Titov | d00ce74 | 2021-07-28 20:00:17 +0200 | [diff] [blame] | 62 | // Calls `expand_` to get more expansion data to merge with. The data is |
| 63 | // written to `expanded_signal_`. Returns the length of the expanded data, |
| 64 | // while `expand_period` will be the number of samples in one expansion period |
| 65 | // (typically one pitch period). The value of `old_length` will be the number |
| 66 | // of samples that were taken from the `sync_buffer_`. |
Peter Kasting | dce40cf | 2015-08-24 14:52:23 -0700 | [diff] [blame] | 67 | size_t GetExpandedSignal(size_t* old_length, size_t* expand_period); |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 68 | |
Artem Titov | d00ce74 | 2021-07-28 20:00:17 +0200 | [diff] [blame] | 69 | // Analyzes `input` and `expanded_signal` and returns muting factor (Q14) to |
minyue | 53ff70f | 2016-05-02 01:50:30 -0700 | [diff] [blame] | 70 | // be used on the new data. |
Yves Gerey | 665174f | 2018-06-19 15:03:05 +0200 | [diff] [blame] | 71 | int16_t SignalScaling(const int16_t* input, |
| 72 | size_t input_length, |
minyue | 53ff70f | 2016-05-02 01:50:30 -0700 | [diff] [blame] | 73 | const int16_t* expanded_signal) const; |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 74 | |
Artem Titov | d00ce74 | 2021-07-28 20:00:17 +0200 | [diff] [blame] | 75 | // Downsamples `input` (`input_length` samples) and `expanded_signal` to |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 76 | // 4 kHz sample rate. The downsampled signals are written to |
Artem Titov | d00ce74 | 2021-07-28 20:00:17 +0200 | [diff] [blame] | 77 | // `input_downsampled_` and `expanded_downsampled_`, respectively. |
Yves Gerey | 665174f | 2018-06-19 15:03:05 +0200 | [diff] [blame] | 78 | void Downsample(const int16_t* input, |
| 79 | size_t input_length, |
| 80 | const int16_t* expanded_signal, |
| 81 | size_t expanded_length); |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 82 | |
Artem Titov | d00ce74 | 2021-07-28 20:00:17 +0200 | [diff] [blame] | 83 | // Calculates cross-correlation between `input_downsampled_` and |
| 84 | // `expanded_downsampled_`, and finds the correlation maximum. The maximizing |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 85 | // lag is returned. |
Yves Gerey | 665174f | 2018-06-19 15:03:05 +0200 | [diff] [blame] | 86 | size_t CorrelateAndPeakSearch(size_t start_position, |
| 87 | size_t input_length, |
Peter Kasting | dce40cf | 2015-08-24 14:52:23 -0700 | [diff] [blame] | 88 | size_t expand_period) const; |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 89 | |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 90 | const int fs_mult_; // fs_hz_ / 8000. |
Peter Kasting | dce40cf | 2015-08-24 14:52:23 -0700 | [diff] [blame] | 91 | const size_t timestamps_per_call_; |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 92 | Expand* expand_; |
| 93 | SyncBuffer* sync_buffer_; |
| 94 | int16_t expanded_downsampled_[kExpandDownsampLength]; |
| 95 | int16_t input_downsampled_[kInputDownsampLength]; |
henrik.lundin@webrtc.org | fd11bbf | 2013-09-30 20:38:44 +0000 | [diff] [blame] | 96 | AudioMultiVector expanded_; |
minyue | 5bd3397 | 2016-05-02 04:46:11 -0700 | [diff] [blame] | 97 | std::vector<int16_t> temp_data_; |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 98 | }; |
| 99 | |
| 100 | } // namespace webrtc |
Mirko Bonadei | 92ea95e | 2017-09-15 06:47:31 +0200 | [diff] [blame] | 101 | #endif // MODULES_AUDIO_CODING_NETEQ_MERGE_H_ |