blob: e4e5e1eff47d866a4e27d422f7e356f878ba3b67 [file] [log] [blame]
kwiberg087bd342017-02-10 08:15:44 -08001/*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020011#ifndef API_AUDIO_CODECS_AUDIO_DECODER_H_
12#define API_AUDIO_CODECS_AUDIO_DECODER_H_
kwiberg087bd342017-02-10 08:15:44 -080013
Yves Gerey988cc082018-10-23 12:03:01 +020014#include <stddef.h>
15#include <stdint.h>
kwiberg087bd342017-02-10 08:15:44 -080016#include <memory>
17#include <vector>
18
Danil Chapovalov0bc58cf2018-06-21 13:32:56 +020019#include "absl/types/optional.h"
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020020#include "api/array_view.h"
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020021#include "rtc_base/buffer.h"
Steve Anton10542f22019-01-11 09:11:00 -080022#include "rtc_base/constructor_magic.h"
kwiberg087bd342017-02-10 08:15:44 -080023
24namespace webrtc {
25
26class AudioDecoder {
27 public:
28 enum SpeechType {
29 kSpeech = 1,
30 kComfortNoise = 2,
31 };
32
33 // Used by PacketDuration below. Save the value -1 for errors.
34 enum { kNotImplemented = -2 };
35
36 AudioDecoder() = default;
37 virtual ~AudioDecoder() = default;
38
39 class EncodedAudioFrame {
40 public:
41 struct DecodeResult {
42 size_t num_decoded_samples;
43 SpeechType speech_type;
44 };
45
46 virtual ~EncodedAudioFrame() = default;
47
48 // Returns the duration in samples-per-channel of this audio frame.
49 // If no duration can be ascertained, returns zero.
50 virtual size_t Duration() const = 0;
51
Ivo Creusenc7f09ad2018-05-22 13:21:01 +020052 // Returns true if this packet contains DTX.
53 virtual bool IsDtxPacket() const;
54
kwiberg087bd342017-02-10 08:15:44 -080055 // Decodes this frame of audio and writes the result in |decoded|.
56 // |decoded| must be large enough to store as many samples as indicated by a
Danil Chapovalov0bc58cf2018-06-21 13:32:56 +020057 // call to Duration() . On success, returns an absl::optional containing the
kwiberg087bd342017-02-10 08:15:44 -080058 // total number of samples across all channels, as well as whether the
59 // decoder produced comfort noise or speech. On failure, returns an empty
Danil Chapovalov0bc58cf2018-06-21 13:32:56 +020060 // absl::optional. Decode may be called at most once per frame object.
61 virtual absl::optional<DecodeResult> Decode(
kwiberg087bd342017-02-10 08:15:44 -080062 rtc::ArrayView<int16_t> decoded) const = 0;
63 };
64
65 struct ParseResult {
66 ParseResult();
67 ParseResult(uint32_t timestamp,
68 int priority,
69 std::unique_ptr<EncodedAudioFrame> frame);
70 ParseResult(ParseResult&& b);
71 ~ParseResult();
72
73 ParseResult& operator=(ParseResult&& b);
74
75 // The timestamp of the frame is in samples per channel.
76 uint32_t timestamp;
77 // The relative priority of the frame compared to other frames of the same
78 // payload and the same timeframe. A higher value means a lower priority.
79 // The highest priority is zero - negative values are not allowed.
80 int priority;
81 std::unique_ptr<EncodedAudioFrame> frame;
82 };
83
84 // Let the decoder parse this payload and prepare zero or more decodable
85 // frames. Each frame must be between 10 ms and 120 ms long. The caller must
86 // ensure that the AudioDecoder object outlives any frame objects returned by
87 // this call. The decoder is free to swap or move the data from the |payload|
88 // buffer. |timestamp| is the input timestamp, in samples, corresponding to
89 // the start of the payload.
90 virtual std::vector<ParseResult> ParsePayload(rtc::Buffer&& payload,
91 uint32_t timestamp);
92
Niels Möllerb7180c02018-12-06 13:07:11 +010093 // TODO(bugs.webrtc.org/10098): The Decode and DecodeRedundant methods are
94 // obsolete; callers should call ParsePayload instead. For now, subclasses
95 // must still implement DecodeInternal.
96
kwiberg087bd342017-02-10 08:15:44 -080097 // Decodes |encode_len| bytes from |encoded| and writes the result in
98 // |decoded|. The maximum bytes allowed to be written into |decoded| is
99 // |max_decoded_bytes|. Returns the total number of samples across all
100 // channels. If the decoder produced comfort noise, |speech_type|
101 // is set to kComfortNoise, otherwise it is kSpeech. The desired output
102 // sample rate is provided in |sample_rate_hz|, which must be valid for the
103 // codec at hand.
104 int Decode(const uint8_t* encoded,
105 size_t encoded_len,
106 int sample_rate_hz,
107 size_t max_decoded_bytes,
108 int16_t* decoded,
109 SpeechType* speech_type);
110
111 // Same as Decode(), but interfaces to the decoders redundant decode function.
112 // The default implementation simply calls the regular Decode() method.
113 int DecodeRedundant(const uint8_t* encoded,
114 size_t encoded_len,
115 int sample_rate_hz,
116 size_t max_decoded_bytes,
117 int16_t* decoded,
118 SpeechType* speech_type);
119
120 // Indicates if the decoder implements the DecodePlc method.
121 virtual bool HasDecodePlc() const;
122
123 // Calls the packet-loss concealment of the decoder to update the state after
124 // one or several lost packets. The caller has to make sure that the
125 // memory allocated in |decoded| should accommodate |num_frames| frames.
126 virtual size_t DecodePlc(size_t num_frames, int16_t* decoded);
127
Henrik Lundin00eb12a2018-09-05 18:14:52 +0200128 // Asks the decoder to generate packet-loss concealment and append it to the
129 // end of |concealment_audio|. The concealment audio should be in
130 // channel-interleaved format, with as many channels as the last decoded
131 // packet produced. The implementation must produce at least
132 // requested_samples_per_channel, or nothing at all. This is a signal to the
133 // caller to conceal the loss with other means. If the implementation provides
134 // concealment samples, it is also responsible for "stitching" it together
135 // with the decoded audio on either side of the concealment.
136 // Note: The default implementation of GeneratePlc will be deleted soon. All
137 // implementations must provide their own, which can be a simple as a no-op.
138 // TODO(bugs.webrtc.org/9676): Remove default impementation.
139 virtual void GeneratePlc(size_t requested_samples_per_channel,
140 rtc::BufferT<int16_t>* concealment_audio);
141
kwiberg087bd342017-02-10 08:15:44 -0800142 // Resets the decoder state (empty buffers etc.).
143 virtual void Reset() = 0;
144
145 // Notifies the decoder of an incoming packet to NetEQ.
146 virtual int IncomingPacket(const uint8_t* payload,
147 size_t payload_len,
148 uint16_t rtp_sequence_number,
149 uint32_t rtp_timestamp,
150 uint32_t arrival_timestamp);
151
152 // Returns the last error code from the decoder.
153 virtual int ErrorCode();
154
155 // Returns the duration in samples-per-channel of the payload in |encoded|
156 // which is |encoded_len| bytes long. Returns kNotImplemented if no duration
157 // estimate is available, or -1 in case of an error.
158 virtual int PacketDuration(const uint8_t* encoded, size_t encoded_len) const;
159
160 // Returns the duration in samples-per-channel of the redandant payload in
161 // |encoded| which is |encoded_len| bytes long. Returns kNotImplemented if no
162 // duration estimate is available, or -1 in case of an error.
163 virtual int PacketDurationRedundant(const uint8_t* encoded,
164 size_t encoded_len) const;
165
166 // Detects whether a packet has forward error correction. The packet is
167 // comprised of the samples in |encoded| which is |encoded_len| bytes long.
168 // Returns true if the packet has FEC and false otherwise.
169 virtual bool PacketHasFec(const uint8_t* encoded, size_t encoded_len) const;
170
171 // Returns the actual sample rate of the decoder's output. This value may not
172 // change during the lifetime of the decoder.
173 virtual int SampleRateHz() const = 0;
174
175 // The number of channels in the decoder's output. This value may not change
176 // during the lifetime of the decoder.
177 virtual size_t Channels() const = 0;
178
179 protected:
180 static SpeechType ConvertSpeechType(int16_t type);
181
182 virtual int DecodeInternal(const uint8_t* encoded,
183 size_t encoded_len,
184 int sample_rate_hz,
185 int16_t* decoded,
186 SpeechType* speech_type) = 0;
187
188 virtual int DecodeRedundantInternal(const uint8_t* encoded,
189 size_t encoded_len,
190 int sample_rate_hz,
191 int16_t* decoded,
192 SpeechType* speech_type);
193
194 private:
195 RTC_DISALLOW_COPY_AND_ASSIGN(AudioDecoder);
196};
197
198} // namespace webrtc
Mirko Bonadei92ea95e2017-09-15 06:47:31 +0200199#endif // API_AUDIO_CODECS_AUDIO_DECODER_H_