blob: 4852ad71e6dc3b064b64238cb3d2003869a986b6 [file] [log] [blame]
kwiberg087bd342017-02-10 08:15:44 -08001/*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020011#ifndef API_AUDIO_CODECS_AUDIO_DECODER_H_
12#define API_AUDIO_CODECS_AUDIO_DECODER_H_
kwiberg087bd342017-02-10 08:15:44 -080013
14#include <memory>
15#include <vector>
16
Danil Chapovalov0bc58cf2018-06-21 13:32:56 +020017#include "absl/types/optional.h"
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020018#include "api/array_view.h"
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020019#include "rtc_base/buffer.h"
20#include "rtc_base/constructormagic.h"
Mirko Bonadei71207422017-09-15 13:58:09 +020021#include "typedefs.h" // NOLINT(build/include)
kwiberg087bd342017-02-10 08:15:44 -080022
23namespace webrtc {
24
25class AudioDecoder {
26 public:
27 enum SpeechType {
28 kSpeech = 1,
29 kComfortNoise = 2,
30 };
31
32 // Used by PacketDuration below. Save the value -1 for errors.
33 enum { kNotImplemented = -2 };
34
35 AudioDecoder() = default;
36 virtual ~AudioDecoder() = default;
37
38 class EncodedAudioFrame {
39 public:
40 struct DecodeResult {
41 size_t num_decoded_samples;
42 SpeechType speech_type;
43 };
44
45 virtual ~EncodedAudioFrame() = default;
46
47 // Returns the duration in samples-per-channel of this audio frame.
48 // If no duration can be ascertained, returns zero.
49 virtual size_t Duration() const = 0;
50
Ivo Creusenc7f09ad2018-05-22 13:21:01 +020051 // Returns true if this packet contains DTX.
52 virtual bool IsDtxPacket() const;
53
kwiberg087bd342017-02-10 08:15:44 -080054 // Decodes this frame of audio and writes the result in |decoded|.
55 // |decoded| must be large enough to store as many samples as indicated by a
Danil Chapovalov0bc58cf2018-06-21 13:32:56 +020056 // call to Duration() . On success, returns an absl::optional containing the
kwiberg087bd342017-02-10 08:15:44 -080057 // total number of samples across all channels, as well as whether the
58 // decoder produced comfort noise or speech. On failure, returns an empty
Danil Chapovalov0bc58cf2018-06-21 13:32:56 +020059 // absl::optional. Decode may be called at most once per frame object.
60 virtual absl::optional<DecodeResult> Decode(
kwiberg087bd342017-02-10 08:15:44 -080061 rtc::ArrayView<int16_t> decoded) const = 0;
62 };
63
64 struct ParseResult {
65 ParseResult();
66 ParseResult(uint32_t timestamp,
67 int priority,
68 std::unique_ptr<EncodedAudioFrame> frame);
69 ParseResult(ParseResult&& b);
70 ~ParseResult();
71
72 ParseResult& operator=(ParseResult&& b);
73
74 // The timestamp of the frame is in samples per channel.
75 uint32_t timestamp;
76 // The relative priority of the frame compared to other frames of the same
77 // payload and the same timeframe. A higher value means a lower priority.
78 // The highest priority is zero - negative values are not allowed.
79 int priority;
80 std::unique_ptr<EncodedAudioFrame> frame;
81 };
82
83 // Let the decoder parse this payload and prepare zero or more decodable
84 // frames. Each frame must be between 10 ms and 120 ms long. The caller must
85 // ensure that the AudioDecoder object outlives any frame objects returned by
86 // this call. The decoder is free to swap or move the data from the |payload|
87 // buffer. |timestamp| is the input timestamp, in samples, corresponding to
88 // the start of the payload.
89 virtual std::vector<ParseResult> ParsePayload(rtc::Buffer&& payload,
90 uint32_t timestamp);
91
92 // Decodes |encode_len| bytes from |encoded| and writes the result in
93 // |decoded|. The maximum bytes allowed to be written into |decoded| is
94 // |max_decoded_bytes|. Returns the total number of samples across all
95 // channels. If the decoder produced comfort noise, |speech_type|
96 // is set to kComfortNoise, otherwise it is kSpeech. The desired output
97 // sample rate is provided in |sample_rate_hz|, which must be valid for the
98 // codec at hand.
99 int Decode(const uint8_t* encoded,
100 size_t encoded_len,
101 int sample_rate_hz,
102 size_t max_decoded_bytes,
103 int16_t* decoded,
104 SpeechType* speech_type);
105
106 // Same as Decode(), but interfaces to the decoders redundant decode function.
107 // The default implementation simply calls the regular Decode() method.
108 int DecodeRedundant(const uint8_t* encoded,
109 size_t encoded_len,
110 int sample_rate_hz,
111 size_t max_decoded_bytes,
112 int16_t* decoded,
113 SpeechType* speech_type);
114
115 // Indicates if the decoder implements the DecodePlc method.
116 virtual bool HasDecodePlc() const;
117
118 // Calls the packet-loss concealment of the decoder to update the state after
119 // one or several lost packets. The caller has to make sure that the
120 // memory allocated in |decoded| should accommodate |num_frames| frames.
121 virtual size_t DecodePlc(size_t num_frames, int16_t* decoded);
122
123 // Resets the decoder state (empty buffers etc.).
124 virtual void Reset() = 0;
125
126 // Notifies the decoder of an incoming packet to NetEQ.
127 virtual int IncomingPacket(const uint8_t* payload,
128 size_t payload_len,
129 uint16_t rtp_sequence_number,
130 uint32_t rtp_timestamp,
131 uint32_t arrival_timestamp);
132
133 // Returns the last error code from the decoder.
134 virtual int ErrorCode();
135
136 // Returns the duration in samples-per-channel of the payload in |encoded|
137 // which is |encoded_len| bytes long. Returns kNotImplemented if no duration
138 // estimate is available, or -1 in case of an error.
139 virtual int PacketDuration(const uint8_t* encoded, size_t encoded_len) const;
140
141 // Returns the duration in samples-per-channel of the redandant payload in
142 // |encoded| which is |encoded_len| bytes long. Returns kNotImplemented if no
143 // duration estimate is available, or -1 in case of an error.
144 virtual int PacketDurationRedundant(const uint8_t* encoded,
145 size_t encoded_len) const;
146
147 // Detects whether a packet has forward error correction. The packet is
148 // comprised of the samples in |encoded| which is |encoded_len| bytes long.
149 // Returns true if the packet has FEC and false otherwise.
150 virtual bool PacketHasFec(const uint8_t* encoded, size_t encoded_len) const;
151
152 // Returns the actual sample rate of the decoder's output. This value may not
153 // change during the lifetime of the decoder.
154 virtual int SampleRateHz() const = 0;
155
156 // The number of channels in the decoder's output. This value may not change
157 // during the lifetime of the decoder.
158 virtual size_t Channels() const = 0;
159
160 protected:
161 static SpeechType ConvertSpeechType(int16_t type);
162
163 virtual int DecodeInternal(const uint8_t* encoded,
164 size_t encoded_len,
165 int sample_rate_hz,
166 int16_t* decoded,
167 SpeechType* speech_type) = 0;
168
169 virtual int DecodeRedundantInternal(const uint8_t* encoded,
170 size_t encoded_len,
171 int sample_rate_hz,
172 int16_t* decoded,
173 SpeechType* speech_type);
174
175 private:
176 RTC_DISALLOW_COPY_AND_ASSIGN(AudioDecoder);
177};
178
179} // namespace webrtc
Mirko Bonadei92ea95e2017-09-15 06:47:31 +0200180#endif // API_AUDIO_CODECS_AUDIO_DECODER_H_