henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. |
| 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license |
| 5 | * that can be found in the LICENSE file in the root of the source |
| 6 | * tree. An additional intellectual property rights grant can be found |
| 7 | * in the file PATENTS. All contributing project authors may |
| 8 | * be found in the AUTHORS file in the root of the source tree. |
| 9 | */ |
| 10 | |
Henrik Kjellander | 7464089 | 2015-10-29 11:31:02 +0100 | [diff] [blame] | 11 | #ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_INCLUDE_AUDIO_DECODER_H_ |
| 12 | #define WEBRTC_MODULES_AUDIO_CODING_NETEQ_INCLUDE_AUDIO_DECODER_H_ |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 13 | |
ossu | 0d526d5 | 2016-09-21 01:57:31 -0700 | [diff] [blame^] | 14 | #include <memory> |
| 15 | #include <vector> |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 16 | |
ossu | 61a208b | 2016-09-20 01:38:00 -0700 | [diff] [blame] | 17 | #include <memory> |
| 18 | #include <vector> |
| 19 | |
| 20 | #include "webrtc/base/array_view.h" |
| 21 | #include "webrtc/base/buffer.h" |
henrike@webrtc.org | 88fbb2d | 2014-05-21 21:18:46 +0000 | [diff] [blame] | 22 | #include "webrtc/base/constructormagic.h" |
ossu | 61a208b | 2016-09-20 01:38:00 -0700 | [diff] [blame] | 23 | #include "webrtc/base/optional.h" |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 24 | #include "webrtc/typedefs.h" |
| 25 | |
| 26 | namespace webrtc { |
| 27 | |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 28 | // This is the interface class for decoders in NetEQ. Each codec type will have |
| 29 | // and implementation of this class. |
| 30 | class AudioDecoder { |
| 31 | public: |
| 32 | enum SpeechType { |
| 33 | kSpeech = 1, |
| 34 | kComfortNoise = 2 |
| 35 | }; |
| 36 | |
| 37 | // Used by PacketDuration below. Save the value -1 for errors. |
| 38 | enum { kNotImplemented = -2 }; |
| 39 | |
henrik.lundin@webrtc.org | 6dba1eb | 2015-03-18 09:47:08 +0000 | [diff] [blame] | 40 | AudioDecoder() = default; |
| 41 | virtual ~AudioDecoder() = default; |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 42 | |
ossu | 61a208b | 2016-09-20 01:38:00 -0700 | [diff] [blame] | 43 | class EncodedAudioFrame { |
| 44 | public: |
| 45 | struct DecodeResult { |
| 46 | size_t num_decoded_samples; |
| 47 | SpeechType speech_type; |
| 48 | }; |
| 49 | |
| 50 | virtual ~EncodedAudioFrame() = default; |
| 51 | |
| 52 | // Returns the duration in samples-per-channel of this audio frame. |
| 53 | // If no duration can be ascertained, returns zero. |
| 54 | virtual size_t Duration() const = 0; |
| 55 | |
| 56 | // Decodes this frame of audio and writes the result in |decoded|. |
| 57 | // |decoded| must be large enough to store as many samples as indicated by a |
| 58 | // call to Duration() . On success, returns an rtc::Optional containing the |
| 59 | // total number of samples across all channels, as well as whether the |
| 60 | // decoder produced comfort noise or speech. On failure, returns an empty |
| 61 | // rtc::Optional. Decode may be called at most once per frame object. |
| 62 | virtual rtc::Optional<DecodeResult> Decode( |
| 63 | rtc::ArrayView<int16_t> decoded) const = 0; |
| 64 | }; |
| 65 | |
| 66 | struct ParseResult { |
| 67 | ParseResult(); |
| 68 | ParseResult(uint32_t timestamp, |
| 69 | bool primary, |
| 70 | std::unique_ptr<EncodedAudioFrame> frame); |
| 71 | ParseResult(ParseResult&& b); |
| 72 | ~ParseResult(); |
| 73 | |
| 74 | ParseResult& operator=(ParseResult&& b); |
| 75 | |
| 76 | // The timestamp of the frame is in samples per channel. |
| 77 | uint32_t timestamp; |
| 78 | bool primary; |
| 79 | std::unique_ptr<EncodedAudioFrame> frame; |
| 80 | }; |
| 81 | |
| 82 | // Let the decoder parse this payload and prepare zero or more decodable |
| 83 | // frames. Each frame must be between 10 ms and 120 ms long. The caller must |
| 84 | // ensure that the AudioDecoder object outlives any frame objects returned by |
| 85 | // this call. The decoder is free to swap or move the data from the |payload| |
| 86 | // buffer. |timestamp| is the input timestamp, in samples, corresponding to |
| 87 | // the start of the payload. |
| 88 | virtual std::vector<ParseResult> ParsePayload(rtc::Buffer&& payload, |
| 89 | uint32_t timestamp, |
| 90 | bool is_primary); |
| 91 | |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 92 | // Decodes |encode_len| bytes from |encoded| and writes the result in |
minyue@webrtc.org | 7f7d7e3 | 2015-03-16 12:30:37 +0000 | [diff] [blame] | 93 | // |decoded|. The maximum bytes allowed to be written into |decoded| is |
Minyue | 323b132 | 2015-05-25 13:49:37 +0200 | [diff] [blame] | 94 | // |max_decoded_bytes|. Returns the total number of samples across all |
| 95 | // channels. If the decoder produced comfort noise, |speech_type| |
henrik.lundin@webrtc.org | 1eda4e3 | 2015-02-25 10:02:29 +0000 | [diff] [blame] | 96 | // is set to kComfortNoise, otherwise it is kSpeech. The desired output |
| 97 | // sample rate is provided in |sample_rate_hz|, which must be valid for the |
| 98 | // codec at hand. |
Peter Boström | d7b7ae8 | 2015-12-08 13:41:35 +0100 | [diff] [blame] | 99 | int Decode(const uint8_t* encoded, |
| 100 | size_t encoded_len, |
| 101 | int sample_rate_hz, |
| 102 | size_t max_decoded_bytes, |
| 103 | int16_t* decoded, |
| 104 | SpeechType* speech_type); |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 105 | |
| 106 | // Same as Decode(), but interfaces to the decoders redundant decode function. |
| 107 | // The default implementation simply calls the regular Decode() method. |
Peter Boström | d7b7ae8 | 2015-12-08 13:41:35 +0100 | [diff] [blame] | 108 | int DecodeRedundant(const uint8_t* encoded, |
| 109 | size_t encoded_len, |
| 110 | int sample_rate_hz, |
| 111 | size_t max_decoded_bytes, |
| 112 | int16_t* decoded, |
| 113 | SpeechType* speech_type); |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 114 | |
| 115 | // Indicates if the decoder implements the DecodePlc method. |
pbos@webrtc.org | 2d1a55c | 2013-07-31 15:54:00 +0000 | [diff] [blame] | 116 | virtual bool HasDecodePlc() const; |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 117 | |
| 118 | // Calls the packet-loss concealment of the decoder to update the state after |
minyuel | 6d92bf5 | 2015-09-23 15:20:39 +0200 | [diff] [blame] | 119 | // one or several lost packets. The caller has to make sure that the |
| 120 | // memory allocated in |decoded| should accommodate |num_frames| frames. |
Peter Kasting | dce40cf | 2015-08-24 14:52:23 -0700 | [diff] [blame] | 121 | virtual size_t DecodePlc(size_t num_frames, int16_t* decoded); |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 122 | |
Karl Wiberg | 4376648 | 2015-08-27 15:22:11 +0200 | [diff] [blame] | 123 | // Resets the decoder state (empty buffers etc.). |
| 124 | virtual void Reset() = 0; |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 125 | |
| 126 | // Notifies the decoder of an incoming packet to NetEQ. |
| 127 | virtual int IncomingPacket(const uint8_t* payload, |
| 128 | size_t payload_len, |
| 129 | uint16_t rtp_sequence_number, |
| 130 | uint32_t rtp_timestamp, |
pbos@webrtc.org | 2d1a55c | 2013-07-31 15:54:00 +0000 | [diff] [blame] | 131 | uint32_t arrival_timestamp); |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 132 | |
| 133 | // Returns the last error code from the decoder. |
pbos@webrtc.org | 2d1a55c | 2013-07-31 15:54:00 +0000 | [diff] [blame] | 134 | virtual int ErrorCode(); |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 135 | |
Minyue | 323b132 | 2015-05-25 13:49:37 +0200 | [diff] [blame] | 136 | // Returns the duration in samples-per-channel of the payload in |encoded| |
| 137 | // which is |encoded_len| bytes long. Returns kNotImplemented if no duration |
| 138 | // estimate is available, or -1 in case of an error. |
minyue@webrtc.org | a8cc344 | 2015-02-13 14:01:54 +0000 | [diff] [blame] | 139 | virtual int PacketDuration(const uint8_t* encoded, size_t encoded_len) const; |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 140 | |
Minyue | 323b132 | 2015-05-25 13:49:37 +0200 | [diff] [blame] | 141 | // Returns the duration in samples-per-channel of the redandant payload in |
| 142 | // |encoded| which is |encoded_len| bytes long. Returns kNotImplemented if no |
| 143 | // duration estimate is available, or -1 in case of an error. |
minyue@webrtc.org | b28bfa7 | 2014-03-21 12:07:40 +0000 | [diff] [blame] | 144 | virtual int PacketDurationRedundant(const uint8_t* encoded, |
| 145 | size_t encoded_len) const; |
| 146 | |
| 147 | // Detects whether a packet has forward error correction. The packet is |
| 148 | // comprised of the samples in |encoded| which is |encoded_len| bytes long. |
| 149 | // Returns true if the packet has FEC and false otherwise. |
| 150 | virtual bool PacketHasFec(const uint8_t* encoded, size_t encoded_len) const; |
| 151 | |
kwiberg | f882880 | 2016-06-02 03:19:23 -0700 | [diff] [blame] | 152 | // Returns the actual sample rate of the decoder's output. This value may not |
| 153 | // change during the lifetime of the decoder. |
kwiberg | 347d351 | 2016-06-16 01:59:09 -0700 | [diff] [blame] | 154 | virtual int SampleRateHz() const = 0; |
kwiberg | 6c2eab3 | 2016-05-31 02:46:20 -0700 | [diff] [blame] | 155 | |
kwiberg | f882880 | 2016-06-02 03:19:23 -0700 | [diff] [blame] | 156 | // The number of channels in the decoder's output. This value may not change |
| 157 | // during the lifetime of the decoder. |
henrik.lundin@webrtc.org | 6dba1eb | 2015-03-18 09:47:08 +0000 | [diff] [blame] | 158 | virtual size_t Channels() const = 0; |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 159 | |
| 160 | protected: |
| 161 | static SpeechType ConvertSpeechType(int16_t type); |
| 162 | |
minyue@webrtc.org | 7f7d7e3 | 2015-03-16 12:30:37 +0000 | [diff] [blame] | 163 | virtual int DecodeInternal(const uint8_t* encoded, |
| 164 | size_t encoded_len, |
| 165 | int sample_rate_hz, |
| 166 | int16_t* decoded, |
Peter Boström | d7b7ae8 | 2015-12-08 13:41:35 +0100 | [diff] [blame] | 167 | SpeechType* speech_type) = 0; |
minyue@webrtc.org | 7f7d7e3 | 2015-03-16 12:30:37 +0000 | [diff] [blame] | 168 | |
| 169 | virtual int DecodeRedundantInternal(const uint8_t* encoded, |
| 170 | size_t encoded_len, |
| 171 | int sample_rate_hz, |
| 172 | int16_t* decoded, |
| 173 | SpeechType* speech_type); |
| 174 | |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 175 | private: |
henrikg | 3c089d7 | 2015-09-16 05:37:44 -0700 | [diff] [blame] | 176 | RTC_DISALLOW_COPY_AND_ASSIGN(AudioDecoder); |
henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 177 | }; |
| 178 | |
| 179 | } // namespace webrtc |
Henrik Kjellander | 7464089 | 2015-10-29 11:31:02 +0100 | [diff] [blame] | 180 | #endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_INCLUDE_AUDIO_DECODER_H_ |