blob: 889e2c62d0ed89fa04879f8b7a8971ebb3a5fef4 [file] [log] [blame]
kwiberg087bd342017-02-10 08:15:44 -08001/*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020011#ifndef API_AUDIO_CODECS_AUDIO_DECODER_H_
12#define API_AUDIO_CODECS_AUDIO_DECODER_H_
kwiberg087bd342017-02-10 08:15:44 -080013
Yves Gerey988cc082018-10-23 12:03:01 +020014#include <stddef.h>
15#include <stdint.h>
kwiberg087bd342017-02-10 08:15:44 -080016#include <memory>
17#include <vector>
18
Danil Chapovalov0bc58cf2018-06-21 13:32:56 +020019#include "absl/types/optional.h"
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020020#include "api/array_view.h"
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020021#include "rtc_base/buffer.h"
22#include "rtc_base/constructormagic.h"
kwiberg087bd342017-02-10 08:15:44 -080023
24namespace webrtc {
25
26class AudioDecoder {
27 public:
28 enum SpeechType {
29 kSpeech = 1,
30 kComfortNoise = 2,
31 };
32
33 // Used by PacketDuration below. Save the value -1 for errors.
34 enum { kNotImplemented = -2 };
35
36 AudioDecoder() = default;
37 virtual ~AudioDecoder() = default;
38
39 class EncodedAudioFrame {
40 public:
41 struct DecodeResult {
42 size_t num_decoded_samples;
43 SpeechType speech_type;
44 };
45
46 virtual ~EncodedAudioFrame() = default;
47
48 // Returns the duration in samples-per-channel of this audio frame.
49 // If no duration can be ascertained, returns zero.
50 virtual size_t Duration() const = 0;
51
Ivo Creusenc7f09ad2018-05-22 13:21:01 +020052 // Returns true if this packet contains DTX.
53 virtual bool IsDtxPacket() const;
54
kwiberg087bd342017-02-10 08:15:44 -080055 // Decodes this frame of audio and writes the result in |decoded|.
56 // |decoded| must be large enough to store as many samples as indicated by a
Danil Chapovalov0bc58cf2018-06-21 13:32:56 +020057 // call to Duration() . On success, returns an absl::optional containing the
kwiberg087bd342017-02-10 08:15:44 -080058 // total number of samples across all channels, as well as whether the
59 // decoder produced comfort noise or speech. On failure, returns an empty
Danil Chapovalov0bc58cf2018-06-21 13:32:56 +020060 // absl::optional. Decode may be called at most once per frame object.
61 virtual absl::optional<DecodeResult> Decode(
kwiberg087bd342017-02-10 08:15:44 -080062 rtc::ArrayView<int16_t> decoded) const = 0;
63 };
64
65 struct ParseResult {
66 ParseResult();
67 ParseResult(uint32_t timestamp,
68 int priority,
69 std::unique_ptr<EncodedAudioFrame> frame);
70 ParseResult(ParseResult&& b);
71 ~ParseResult();
72
73 ParseResult& operator=(ParseResult&& b);
74
75 // The timestamp of the frame is in samples per channel.
76 uint32_t timestamp;
77 // The relative priority of the frame compared to other frames of the same
78 // payload and the same timeframe. A higher value means a lower priority.
79 // The highest priority is zero - negative values are not allowed.
80 int priority;
81 std::unique_ptr<EncodedAudioFrame> frame;
82 };
83
84 // Let the decoder parse this payload and prepare zero or more decodable
85 // frames. Each frame must be between 10 ms and 120 ms long. The caller must
86 // ensure that the AudioDecoder object outlives any frame objects returned by
87 // this call. The decoder is free to swap or move the data from the |payload|
88 // buffer. |timestamp| is the input timestamp, in samples, corresponding to
89 // the start of the payload.
90 virtual std::vector<ParseResult> ParsePayload(rtc::Buffer&& payload,
91 uint32_t timestamp);
92
93 // Decodes |encode_len| bytes from |encoded| and writes the result in
94 // |decoded|. The maximum bytes allowed to be written into |decoded| is
95 // |max_decoded_bytes|. Returns the total number of samples across all
96 // channels. If the decoder produced comfort noise, |speech_type|
97 // is set to kComfortNoise, otherwise it is kSpeech. The desired output
98 // sample rate is provided in |sample_rate_hz|, which must be valid for the
99 // codec at hand.
100 int Decode(const uint8_t* encoded,
101 size_t encoded_len,
102 int sample_rate_hz,
103 size_t max_decoded_bytes,
104 int16_t* decoded,
105 SpeechType* speech_type);
106
107 // Same as Decode(), but interfaces to the decoders redundant decode function.
108 // The default implementation simply calls the regular Decode() method.
109 int DecodeRedundant(const uint8_t* encoded,
110 size_t encoded_len,
111 int sample_rate_hz,
112 size_t max_decoded_bytes,
113 int16_t* decoded,
114 SpeechType* speech_type);
115
116 // Indicates if the decoder implements the DecodePlc method.
117 virtual bool HasDecodePlc() const;
118
119 // Calls the packet-loss concealment of the decoder to update the state after
120 // one or several lost packets. The caller has to make sure that the
121 // memory allocated in |decoded| should accommodate |num_frames| frames.
122 virtual size_t DecodePlc(size_t num_frames, int16_t* decoded);
123
Henrik Lundin00eb12a2018-09-05 18:14:52 +0200124 // Asks the decoder to generate packet-loss concealment and append it to the
125 // end of |concealment_audio|. The concealment audio should be in
126 // channel-interleaved format, with as many channels as the last decoded
127 // packet produced. The implementation must produce at least
128 // requested_samples_per_channel, or nothing at all. This is a signal to the
129 // caller to conceal the loss with other means. If the implementation provides
130 // concealment samples, it is also responsible for "stitching" it together
131 // with the decoded audio on either side of the concealment.
132 // Note: The default implementation of GeneratePlc will be deleted soon. All
133 // implementations must provide their own, which can be a simple as a no-op.
134 // TODO(bugs.webrtc.org/9676): Remove default impementation.
135 virtual void GeneratePlc(size_t requested_samples_per_channel,
136 rtc::BufferT<int16_t>* concealment_audio);
137
kwiberg087bd342017-02-10 08:15:44 -0800138 // Resets the decoder state (empty buffers etc.).
139 virtual void Reset() = 0;
140
141 // Notifies the decoder of an incoming packet to NetEQ.
142 virtual int IncomingPacket(const uint8_t* payload,
143 size_t payload_len,
144 uint16_t rtp_sequence_number,
145 uint32_t rtp_timestamp,
146 uint32_t arrival_timestamp);
147
148 // Returns the last error code from the decoder.
149 virtual int ErrorCode();
150
151 // Returns the duration in samples-per-channel of the payload in |encoded|
152 // which is |encoded_len| bytes long. Returns kNotImplemented if no duration
153 // estimate is available, or -1 in case of an error.
154 virtual int PacketDuration(const uint8_t* encoded, size_t encoded_len) const;
155
156 // Returns the duration in samples-per-channel of the redandant payload in
157 // |encoded| which is |encoded_len| bytes long. Returns kNotImplemented if no
158 // duration estimate is available, or -1 in case of an error.
159 virtual int PacketDurationRedundant(const uint8_t* encoded,
160 size_t encoded_len) const;
161
162 // Detects whether a packet has forward error correction. The packet is
163 // comprised of the samples in |encoded| which is |encoded_len| bytes long.
164 // Returns true if the packet has FEC and false otherwise.
165 virtual bool PacketHasFec(const uint8_t* encoded, size_t encoded_len) const;
166
167 // Returns the actual sample rate of the decoder's output. This value may not
168 // change during the lifetime of the decoder.
169 virtual int SampleRateHz() const = 0;
170
171 // The number of channels in the decoder's output. This value may not change
172 // during the lifetime of the decoder.
173 virtual size_t Channels() const = 0;
174
175 protected:
176 static SpeechType ConvertSpeechType(int16_t type);
177
178 virtual int DecodeInternal(const uint8_t* encoded,
179 size_t encoded_len,
180 int sample_rate_hz,
181 int16_t* decoded,
182 SpeechType* speech_type) = 0;
183
184 virtual int DecodeRedundantInternal(const uint8_t* encoded,
185 size_t encoded_len,
186 int sample_rate_hz,
187 int16_t* decoded,
188 SpeechType* speech_type);
189
190 private:
191 RTC_DISALLOW_COPY_AND_ASSIGN(AudioDecoder);
192};
193
194} // namespace webrtc
Mirko Bonadei92ea95e2017-09-15 06:47:31 +0200195#endif // API_AUDIO_CODECS_AUDIO_DECODER_H_