henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. |
| 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license |
| 5 | * that can be found in the LICENSE file in the root of the source |
| 6 | * tree. An additional intellectual property rights grant can be found |
| 7 | * in the file PATENTS. All contributing project authors may |
| 8 | * be found in the AUTHORS file in the root of the source tree. |
| 9 | */ |
| 10 | |
Mirko Bonadei | 92ea95e | 2017-09-15 06:47:31 +0200 | [diff] [blame^] | 11 | #ifndef MODULES_AUDIO_CODING_NETEQ_MERGE_H_ |
| 12 | #define MODULES_AUDIO_CODING_NETEQ_MERGE_H_ |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 13 | |
| 14 | #include <assert.h> |
| 15 | |
Mirko Bonadei | 92ea95e | 2017-09-15 06:47:31 +0200 | [diff] [blame^] | 16 | #include "modules/audio_coding/neteq/audio_multi_vector.h" |
| 17 | #include "rtc_base/constructormagic.h" |
| 18 | #include "typedefs.h" |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 19 | |
| 20 | namespace webrtc { |
| 21 | |
| 22 | // Forward declarations. |
| 23 | class Expand; |
| 24 | class SyncBuffer; |
| 25 | |
| 26 | // This class handles the transition from expansion to normal operation. |
| 27 | // When a packet is not available for decoding when needed, the expand operation |
| 28 | // is called to generate extrapolation data. If the missing packet arrives, |
| 29 | // i.e., it was just delayed, it can be decoded and appended directly to the |
| 30 | // end of the expanded data (thanks to how the Expand class operates). However, |
| 31 | // if a later packet arrives instead, the loss is a fact, and the new data must |
| 32 | // be stitched together with the end of the expanded data. This stitching is |
| 33 | // what the Merge class does. |
| 34 | class Merge { |
| 35 | public: |
Karl Wiberg | 7f6c4d4 | 2015-04-09 15:44:22 +0200 | [diff] [blame] | 36 | Merge(int fs_hz, |
| 37 | size_t num_channels, |
| 38 | Expand* expand, |
| 39 | SyncBuffer* sync_buffer); |
minyue | 5bd3397 | 2016-05-02 04:46:11 -0700 | [diff] [blame] | 40 | virtual ~Merge(); |
turaj@webrtc.org | 8d1cdaa | 2014-04-11 18:47:55 +0000 | [diff] [blame] | 41 | |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 42 | // The main method to produce the audio data. The decoded data is supplied in |
| 43 | // |input|, having |input_length| samples in total for all channels |
| 44 | // (interleaved). The result is written to |output|. The number of channels |
| 45 | // allocated in |output| defines the number of channels that will be used when |
| 46 | // de-interleaving |input|. The values in |external_mute_factor_array| (Q14) |
| 47 | // will be used to scale the audio, and is updated in the process. The array |
| 48 | // must have |num_channels_| elements. |
Peter Kasting | dce40cf | 2015-08-24 14:52:23 -0700 | [diff] [blame] | 49 | virtual size_t Process(int16_t* input, size_t input_length, |
| 50 | int16_t* external_mute_factor_array, |
| 51 | AudioMultiVector* output); |
turaj@webrtc.org | 8d1cdaa | 2014-04-11 18:47:55 +0000 | [diff] [blame] | 52 | |
Peter Kasting | dce40cf | 2015-08-24 14:52:23 -0700 | [diff] [blame] | 53 | virtual size_t RequiredFutureSamples(); |
turaj@webrtc.org | 8d1cdaa | 2014-04-11 18:47:55 +0000 | [diff] [blame] | 54 | |
| 55 | protected: |
| 56 | const int fs_hz_; |
| 57 | const size_t num_channels_; |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 58 | |
| 59 | private: |
| 60 | static const int kMaxSampleRate = 48000; |
Peter Kasting | dce40cf | 2015-08-24 14:52:23 -0700 | [diff] [blame] | 61 | static const size_t kExpandDownsampLength = 100; |
| 62 | static const size_t kInputDownsampLength = 40; |
| 63 | static const size_t kMaxCorrelationLength = 60; |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 64 | |
| 65 | // Calls |expand_| to get more expansion data to merge with. The data is |
| 66 | // written to |expanded_signal_|. Returns the length of the expanded data, |
| 67 | // while |expand_period| will be the number of samples in one expansion period |
| 68 | // (typically one pitch period). The value of |old_length| will be the number |
| 69 | // of samples that were taken from the |sync_buffer_|. |
Peter Kasting | dce40cf | 2015-08-24 14:52:23 -0700 | [diff] [blame] | 70 | size_t GetExpandedSignal(size_t* old_length, size_t* expand_period); |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 71 | |
minyue | 53ff70f | 2016-05-02 01:50:30 -0700 | [diff] [blame] | 72 | // Analyzes |input| and |expanded_signal| and returns muting factor (Q14) to |
| 73 | // be used on the new data. |
Peter Kasting | dce40cf | 2015-08-24 14:52:23 -0700 | [diff] [blame] | 74 | int16_t SignalScaling(const int16_t* input, size_t input_length, |
minyue | 53ff70f | 2016-05-02 01:50:30 -0700 | [diff] [blame] | 75 | const int16_t* expanded_signal) const; |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 76 | |
| 77 | // Downsamples |input| (|input_length| samples) and |expanded_signal| to |
| 78 | // 4 kHz sample rate. The downsampled signals are written to |
| 79 | // |input_downsampled_| and |expanded_downsampled_|, respectively. |
Peter Kasting | dce40cf | 2015-08-24 14:52:23 -0700 | [diff] [blame] | 80 | void Downsample(const int16_t* input, size_t input_length, |
| 81 | const int16_t* expanded_signal, size_t expanded_length); |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 82 | |
| 83 | // Calculates cross-correlation between |input_downsampled_| and |
| 84 | // |expanded_downsampled_|, and finds the correlation maximum. The maximizing |
| 85 | // lag is returned. |
minyue | 53ff70f | 2016-05-02 01:50:30 -0700 | [diff] [blame] | 86 | size_t CorrelateAndPeakSearch(size_t start_position, size_t input_length, |
Peter Kasting | dce40cf | 2015-08-24 14:52:23 -0700 | [diff] [blame] | 87 | size_t expand_period) const; |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 88 | |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 89 | const int fs_mult_; // fs_hz_ / 8000. |
Peter Kasting | dce40cf | 2015-08-24 14:52:23 -0700 | [diff] [blame] | 90 | const size_t timestamps_per_call_; |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 91 | Expand* expand_; |
| 92 | SyncBuffer* sync_buffer_; |
| 93 | int16_t expanded_downsampled_[kExpandDownsampLength]; |
| 94 | int16_t input_downsampled_[kInputDownsampLength]; |
henrik.lundin@webrtc.org | fd11bbf | 2013-09-30 20:38:44 +0000 | [diff] [blame] | 95 | AudioMultiVector expanded_; |
minyue | 5bd3397 | 2016-05-02 04:46:11 -0700 | [diff] [blame] | 96 | std::vector<int16_t> temp_data_; |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 97 | |
henrikg | 3c089d7 | 2015-09-16 05:37:44 -0700 | [diff] [blame] | 98 | RTC_DISALLOW_COPY_AND_ASSIGN(Merge); |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 99 | }; |
| 100 | |
| 101 | } // namespace webrtc |
Mirko Bonadei | 92ea95e | 2017-09-15 06:47:31 +0200 | [diff] [blame^] | 102 | #endif // MODULES_AUDIO_CODING_NETEQ_MERGE_H_ |