kwiberg | 087bd34 | 2017-02-10 08:15:44 -0800 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. |
| 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license |
| 5 | * that can be found in the LICENSE file in the root of the source |
| 6 | * tree. An additional intellectual property rights grant can be found |
| 7 | * in the file PATENTS. All contributing project authors may |
| 8 | * be found in the AUTHORS file in the root of the source tree. |
| 9 | */ |
| 10 | |
Mirko Bonadei | 92ea95e | 2017-09-15 06:47:31 +0200 | [diff] [blame] | 11 | #ifndef API_AUDIO_CODECS_AUDIO_DECODER_H_ |
| 12 | #define API_AUDIO_CODECS_AUDIO_DECODER_H_ |
kwiberg | 087bd34 | 2017-02-10 08:15:44 -0800 | [diff] [blame] | 13 | |
Yves Gerey | 988cc08 | 2018-10-23 12:03:01 +0200 | [diff] [blame] | 14 | #include <stddef.h> |
| 15 | #include <stdint.h> |
kwiberg | 087bd34 | 2017-02-10 08:15:44 -0800 | [diff] [blame] | 16 | #include <memory> |
| 17 | #include <vector> |
| 18 | |
Danil Chapovalov | 0bc58cf | 2018-06-21 13:32:56 +0200 | [diff] [blame] | 19 | #include "absl/types/optional.h" |
Mirko Bonadei | 92ea95e | 2017-09-15 06:47:31 +0200 | [diff] [blame] | 20 | #include "api/array_view.h" |
Mirko Bonadei | 92ea95e | 2017-09-15 06:47:31 +0200 | [diff] [blame] | 21 | #include "rtc_base/buffer.h" |
Steve Anton | 10542f2 | 2019-01-11 09:11:00 -0800 | [diff] [blame^] | 22 | #include "rtc_base/constructor_magic.h" |
kwiberg | 087bd34 | 2017-02-10 08:15:44 -0800 | [diff] [blame] | 23 | |
| 24 | namespace webrtc { |
| 25 | |
| 26 | class AudioDecoder { |
| 27 | public: |
| 28 | enum SpeechType { |
| 29 | kSpeech = 1, |
| 30 | kComfortNoise = 2, |
| 31 | }; |
| 32 | |
| 33 | // Used by PacketDuration below. Save the value -1 for errors. |
| 34 | enum { kNotImplemented = -2 }; |
| 35 | |
| 36 | AudioDecoder() = default; |
| 37 | virtual ~AudioDecoder() = default; |
| 38 | |
| 39 | class EncodedAudioFrame { |
| 40 | public: |
| 41 | struct DecodeResult { |
| 42 | size_t num_decoded_samples; |
| 43 | SpeechType speech_type; |
| 44 | }; |
| 45 | |
| 46 | virtual ~EncodedAudioFrame() = default; |
| 47 | |
| 48 | // Returns the duration in samples-per-channel of this audio frame. |
| 49 | // If no duration can be ascertained, returns zero. |
| 50 | virtual size_t Duration() const = 0; |
| 51 | |
Ivo Creusen | c7f09ad | 2018-05-22 13:21:01 +0200 | [diff] [blame] | 52 | // Returns true if this packet contains DTX. |
| 53 | virtual bool IsDtxPacket() const; |
| 54 | |
kwiberg | 087bd34 | 2017-02-10 08:15:44 -0800 | [diff] [blame] | 55 | // Decodes this frame of audio and writes the result in |decoded|. |
| 56 | // |decoded| must be large enough to store as many samples as indicated by a |
Danil Chapovalov | 0bc58cf | 2018-06-21 13:32:56 +0200 | [diff] [blame] | 57 | // call to Duration() . On success, returns an absl::optional containing the |
kwiberg | 087bd34 | 2017-02-10 08:15:44 -0800 | [diff] [blame] | 58 | // total number of samples across all channels, as well as whether the |
| 59 | // decoder produced comfort noise or speech. On failure, returns an empty |
Danil Chapovalov | 0bc58cf | 2018-06-21 13:32:56 +0200 | [diff] [blame] | 60 | // absl::optional. Decode may be called at most once per frame object. |
| 61 | virtual absl::optional<DecodeResult> Decode( |
kwiberg | 087bd34 | 2017-02-10 08:15:44 -0800 | [diff] [blame] | 62 | rtc::ArrayView<int16_t> decoded) const = 0; |
| 63 | }; |
| 64 | |
| 65 | struct ParseResult { |
| 66 | ParseResult(); |
| 67 | ParseResult(uint32_t timestamp, |
| 68 | int priority, |
| 69 | std::unique_ptr<EncodedAudioFrame> frame); |
| 70 | ParseResult(ParseResult&& b); |
| 71 | ~ParseResult(); |
| 72 | |
| 73 | ParseResult& operator=(ParseResult&& b); |
| 74 | |
| 75 | // The timestamp of the frame is in samples per channel. |
| 76 | uint32_t timestamp; |
| 77 | // The relative priority of the frame compared to other frames of the same |
| 78 | // payload and the same timeframe. A higher value means a lower priority. |
| 79 | // The highest priority is zero - negative values are not allowed. |
| 80 | int priority; |
| 81 | std::unique_ptr<EncodedAudioFrame> frame; |
| 82 | }; |
| 83 | |
| 84 | // Let the decoder parse this payload and prepare zero or more decodable |
| 85 | // frames. Each frame must be between 10 ms and 120 ms long. The caller must |
| 86 | // ensure that the AudioDecoder object outlives any frame objects returned by |
| 87 | // this call. The decoder is free to swap or move the data from the |payload| |
| 88 | // buffer. |timestamp| is the input timestamp, in samples, corresponding to |
| 89 | // the start of the payload. |
| 90 | virtual std::vector<ParseResult> ParsePayload(rtc::Buffer&& payload, |
| 91 | uint32_t timestamp); |
| 92 | |
Niels Möller | b7180c0 | 2018-12-06 13:07:11 +0100 | [diff] [blame] | 93 | // TODO(bugs.webrtc.org/10098): The Decode and DecodeRedundant methods are |
| 94 | // obsolete; callers should call ParsePayload instead. For now, subclasses |
| 95 | // must still implement DecodeInternal. |
| 96 | |
kwiberg | 087bd34 | 2017-02-10 08:15:44 -0800 | [diff] [blame] | 97 | // Decodes |encode_len| bytes from |encoded| and writes the result in |
| 98 | // |decoded|. The maximum bytes allowed to be written into |decoded| is |
| 99 | // |max_decoded_bytes|. Returns the total number of samples across all |
| 100 | // channels. If the decoder produced comfort noise, |speech_type| |
| 101 | // is set to kComfortNoise, otherwise it is kSpeech. The desired output |
| 102 | // sample rate is provided in |sample_rate_hz|, which must be valid for the |
| 103 | // codec at hand. |
| 104 | int Decode(const uint8_t* encoded, |
| 105 | size_t encoded_len, |
| 106 | int sample_rate_hz, |
| 107 | size_t max_decoded_bytes, |
| 108 | int16_t* decoded, |
| 109 | SpeechType* speech_type); |
| 110 | |
| 111 | // Same as Decode(), but interfaces to the decoders redundant decode function. |
| 112 | // The default implementation simply calls the regular Decode() method. |
| 113 | int DecodeRedundant(const uint8_t* encoded, |
| 114 | size_t encoded_len, |
| 115 | int sample_rate_hz, |
| 116 | size_t max_decoded_bytes, |
| 117 | int16_t* decoded, |
| 118 | SpeechType* speech_type); |
| 119 | |
| 120 | // Indicates if the decoder implements the DecodePlc method. |
| 121 | virtual bool HasDecodePlc() const; |
| 122 | |
| 123 | // Calls the packet-loss concealment of the decoder to update the state after |
| 124 | // one or several lost packets. The caller has to make sure that the |
| 125 | // memory allocated in |decoded| should accommodate |num_frames| frames. |
| 126 | virtual size_t DecodePlc(size_t num_frames, int16_t* decoded); |
| 127 | |
Henrik Lundin | 00eb12a | 2018-09-05 18:14:52 +0200 | [diff] [blame] | 128 | // Asks the decoder to generate packet-loss concealment and append it to the |
| 129 | // end of |concealment_audio|. The concealment audio should be in |
| 130 | // channel-interleaved format, with as many channels as the last decoded |
| 131 | // packet produced. The implementation must produce at least |
| 132 | // requested_samples_per_channel, or nothing at all. This is a signal to the |
| 133 | // caller to conceal the loss with other means. If the implementation provides |
| 134 | // concealment samples, it is also responsible for "stitching" it together |
| 135 | // with the decoded audio on either side of the concealment. |
| 136 | // Note: The default implementation of GeneratePlc will be deleted soon. All |
| 137 | // implementations must provide their own, which can be a simple as a no-op. |
| 138 | // TODO(bugs.webrtc.org/9676): Remove default impementation. |
| 139 | virtual void GeneratePlc(size_t requested_samples_per_channel, |
| 140 | rtc::BufferT<int16_t>* concealment_audio); |
| 141 | |
kwiberg | 087bd34 | 2017-02-10 08:15:44 -0800 | [diff] [blame] | 142 | // Resets the decoder state (empty buffers etc.). |
| 143 | virtual void Reset() = 0; |
| 144 | |
| 145 | // Notifies the decoder of an incoming packet to NetEQ. |
| 146 | virtual int IncomingPacket(const uint8_t* payload, |
| 147 | size_t payload_len, |
| 148 | uint16_t rtp_sequence_number, |
| 149 | uint32_t rtp_timestamp, |
| 150 | uint32_t arrival_timestamp); |
| 151 | |
| 152 | // Returns the last error code from the decoder. |
| 153 | virtual int ErrorCode(); |
| 154 | |
| 155 | // Returns the duration in samples-per-channel of the payload in |encoded| |
| 156 | // which is |encoded_len| bytes long. Returns kNotImplemented if no duration |
| 157 | // estimate is available, or -1 in case of an error. |
| 158 | virtual int PacketDuration(const uint8_t* encoded, size_t encoded_len) const; |
| 159 | |
| 160 | // Returns the duration in samples-per-channel of the redandant payload in |
| 161 | // |encoded| which is |encoded_len| bytes long. Returns kNotImplemented if no |
| 162 | // duration estimate is available, or -1 in case of an error. |
| 163 | virtual int PacketDurationRedundant(const uint8_t* encoded, |
| 164 | size_t encoded_len) const; |
| 165 | |
| 166 | // Detects whether a packet has forward error correction. The packet is |
| 167 | // comprised of the samples in |encoded| which is |encoded_len| bytes long. |
| 168 | // Returns true if the packet has FEC and false otherwise. |
| 169 | virtual bool PacketHasFec(const uint8_t* encoded, size_t encoded_len) const; |
| 170 | |
| 171 | // Returns the actual sample rate of the decoder's output. This value may not |
| 172 | // change during the lifetime of the decoder. |
| 173 | virtual int SampleRateHz() const = 0; |
| 174 | |
| 175 | // The number of channels in the decoder's output. This value may not change |
| 176 | // during the lifetime of the decoder. |
| 177 | virtual size_t Channels() const = 0; |
| 178 | |
| 179 | protected: |
| 180 | static SpeechType ConvertSpeechType(int16_t type); |
| 181 | |
| 182 | virtual int DecodeInternal(const uint8_t* encoded, |
| 183 | size_t encoded_len, |
| 184 | int sample_rate_hz, |
| 185 | int16_t* decoded, |
| 186 | SpeechType* speech_type) = 0; |
| 187 | |
| 188 | virtual int DecodeRedundantInternal(const uint8_t* encoded, |
| 189 | size_t encoded_len, |
| 190 | int sample_rate_hz, |
| 191 | int16_t* decoded, |
| 192 | SpeechType* speech_type); |
| 193 | |
| 194 | private: |
| 195 | RTC_DISALLOW_COPY_AND_ASSIGN(AudioDecoder); |
| 196 | }; |
| 197 | |
| 198 | } // namespace webrtc |
Mirko Bonadei | 92ea95e | 2017-09-15 06:47:31 +0200 | [diff] [blame] | 199 | #endif // API_AUDIO_CODECS_AUDIO_DECODER_H_ |