Blame - api/audio_codecs/audio_decoder.h - webrtc.googlesource.com/src

blob: 889e2c62d0ed89fa04879f8b7a8971ebb3a5fef4 [file] [log] [blame]

kwiberg	087bd34	2017-02-10 08:15:44 -0800	[diff] [blame]	1	/*
				2	* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
				3	*
				4	* Use of this source code is governed by a BSD-style license
				5	* that can be found in the LICENSE file in the root of the source
				6	* tree. An additional intellectual property rights grant can be found
				7	* in the file PATENTS. All contributing project authors may
				8	* be found in the AUTHORS file in the root of the source tree.
				9	*/
				10
Mirko Bonadei	92ea95e	2017-09-15 06:47:31 +0200	[diff] [blame]	11	#ifndef API_AUDIO_CODECS_AUDIO_DECODER_H_
				12	#define API_AUDIO_CODECS_AUDIO_DECODER_H_
kwiberg	087bd34	2017-02-10 08:15:44 -0800	[diff] [blame]	13
Yves Gerey	988cc08	2018-10-23 12:03:01 +0200	[diff] [blame^]	14	#include <stddef.h>
				15	#include <stdint.h>
kwiberg	087bd34	2017-02-10 08:15:44 -0800	[diff] [blame]	16	#include <memory>
				17	#include <vector>
				18
Danil Chapovalov	0bc58cf	2018-06-21 13:32:56 +0200	[diff] [blame]	19	#include "absl/types/optional.h"
Mirko Bonadei	92ea95e	2017-09-15 06:47:31 +0200	[diff] [blame]	20	#include "api/array_view.h"
Mirko Bonadei	92ea95e	2017-09-15 06:47:31 +0200	[diff] [blame]	21	#include "rtc_base/buffer.h"
				22	#include "rtc_base/constructormagic.h"
kwiberg	087bd34	2017-02-10 08:15:44 -0800	[diff] [blame]	23
				24	namespace webrtc {
				25
				26	class AudioDecoder {
				27	public:
				28	enum SpeechType {
				29	kSpeech = 1,
				30	kComfortNoise = 2,
				31	};
				32
				33	// Used by PacketDuration below. Save the value -1 for errors.
				34	enum { kNotImplemented = -2 };
				35
				36	AudioDecoder() = default;
				37	virtual ~AudioDecoder() = default;
				38
				39	class EncodedAudioFrame {
				40	public:
				41	struct DecodeResult {
				42	size_t num_decoded_samples;
				43	SpeechType speech_type;
				44	};
				45
				46	virtual ~EncodedAudioFrame() = default;
				47
				48	// Returns the duration in samples-per-channel of this audio frame.
				49	// If no duration can be ascertained, returns zero.
				50	virtual size_t Duration() const = 0;
				51
Ivo Creusen	c7f09ad	2018-05-22 13:21:01 +0200	[diff] [blame]	52	// Returns true if this packet contains DTX.
				53	virtual bool IsDtxPacket() const;
				54
kwiberg	087bd34	2017-02-10 08:15:44 -0800	[diff] [blame]	55	// Decodes this frame of audio and writes the result in \|decoded\|.
				56	// \|decoded\| must be large enough to store as many samples as indicated by a
Danil Chapovalov	0bc58cf	2018-06-21 13:32:56 +0200	[diff] [blame]	57	// call to Duration() . On success, returns an absl::optional containing the
kwiberg	087bd34	2017-02-10 08:15:44 -0800	[diff] [blame]	58	// total number of samples across all channels, as well as whether the
				59	// decoder produced comfort noise or speech. On failure, returns an empty
Danil Chapovalov	0bc58cf	2018-06-21 13:32:56 +0200	[diff] [blame]	60	// absl::optional. Decode may be called at most once per frame object.
				61	virtual absl::optional<DecodeResult> Decode(
kwiberg	087bd34	2017-02-10 08:15:44 -0800	[diff] [blame]	62	rtc::ArrayView<int16_t> decoded) const = 0;
				63	};
				64
				65	struct ParseResult {
				66	ParseResult();
				67	ParseResult(uint32_t timestamp,
				68	int priority,
				69	std::unique_ptr<EncodedAudioFrame> frame);
				70	ParseResult(ParseResult&& b);
				71	~ParseResult();
				72
				73	ParseResult& operator=(ParseResult&& b);
				74
				75	// The timestamp of the frame is in samples per channel.
				76	uint32_t timestamp;
				77	// The relative priority of the frame compared to other frames of the same
				78	// payload and the same timeframe. A higher value means a lower priority.
				79	// The highest priority is zero - negative values are not allowed.
				80	int priority;
				81	std::unique_ptr<EncodedAudioFrame> frame;
				82	};
				83
				84	// Let the decoder parse this payload and prepare zero or more decodable
				85	// frames. Each frame must be between 10 ms and 120 ms long. The caller must
				86	// ensure that the AudioDecoder object outlives any frame objects returned by
				87	// this call. The decoder is free to swap or move the data from the \|payload\|
				88	// buffer. \|timestamp\| is the input timestamp, in samples, corresponding to
				89	// the start of the payload.
				90	virtual std::vector<ParseResult> ParsePayload(rtc::Buffer&& payload,
				91	uint32_t timestamp);
				92
				93	// Decodes \|encode_len\| bytes from \|encoded\| and writes the result in
				94	// \|decoded\|. The maximum bytes allowed to be written into \|decoded\| is
				95	// \|max_decoded_bytes\|. Returns the total number of samples across all
				96	// channels. If the decoder produced comfort noise, \|speech_type\|
				97	// is set to kComfortNoise, otherwise it is kSpeech. The desired output
				98	// sample rate is provided in \|sample_rate_hz\|, which must be valid for the
				99	// codec at hand.
				100	int Decode(const uint8_t* encoded,
				101	size_t encoded_len,
				102	int sample_rate_hz,
				103	size_t max_decoded_bytes,
				104	int16_t* decoded,
				105	SpeechType* speech_type);
				106
				107	// Same as Decode(), but interfaces to the decoders redundant decode function.
				108	// The default implementation simply calls the regular Decode() method.
				109	int DecodeRedundant(const uint8_t* encoded,
				110	size_t encoded_len,
				111	int sample_rate_hz,
				112	size_t max_decoded_bytes,
				113	int16_t* decoded,
				114	SpeechType* speech_type);
				115
				116	// Indicates if the decoder implements the DecodePlc method.
				117	virtual bool HasDecodePlc() const;
				118
				119	// Calls the packet-loss concealment of the decoder to update the state after
				120	// one or several lost packets. The caller has to make sure that the
				121	// memory allocated in \|decoded\| should accommodate \|num_frames\| frames.
				122	virtual size_t DecodePlc(size_t num_frames, int16_t* decoded);
				123
Henrik Lundin	00eb12a	2018-09-05 18:14:52 +0200	[diff] [blame]	124	// Asks the decoder to generate packet-loss concealment and append it to the
				125	// end of \|concealment_audio\|. The concealment audio should be in
				126	// channel-interleaved format, with as many channels as the last decoded
				127	// packet produced. The implementation must produce at least
				128	// requested_samples_per_channel, or nothing at all. This is a signal to the
				129	// caller to conceal the loss with other means. If the implementation provides
				130	// concealment samples, it is also responsible for "stitching" it together
				131	// with the decoded audio on either side of the concealment.
				132	// Note: The default implementation of GeneratePlc will be deleted soon. All
				133	// implementations must provide their own, which can be a simple as a no-op.
				134	// TODO(bugs.webrtc.org/9676): Remove default impementation.
				135	virtual void GeneratePlc(size_t requested_samples_per_channel,
				136	rtc::BufferT<int16_t>* concealment_audio);
				137
kwiberg	087bd34	2017-02-10 08:15:44 -0800	[diff] [blame]	138	// Resets the decoder state (empty buffers etc.).
				139	virtual void Reset() = 0;
				140
				141	// Notifies the decoder of an incoming packet to NetEQ.
				142	virtual int IncomingPacket(const uint8_t* payload,
				143	size_t payload_len,
				144	uint16_t rtp_sequence_number,
				145	uint32_t rtp_timestamp,
				146	uint32_t arrival_timestamp);
				147
				148	// Returns the last error code from the decoder.
				149	virtual int ErrorCode();
				150
				151	// Returns the duration in samples-per-channel of the payload in \|encoded\|
				152	// which is \|encoded_len\| bytes long. Returns kNotImplemented if no duration
				153	// estimate is available, or -1 in case of an error.
				154	virtual int PacketDuration(const uint8_t* encoded, size_t encoded_len) const;
				155
				156	// Returns the duration in samples-per-channel of the redandant payload in
				157	// \|encoded\| which is \|encoded_len\| bytes long. Returns kNotImplemented if no
				158	// duration estimate is available, or -1 in case of an error.
				159	virtual int PacketDurationRedundant(const uint8_t* encoded,
				160	size_t encoded_len) const;
				161
				162	// Detects whether a packet has forward error correction. The packet is
				163	// comprised of the samples in \|encoded\| which is \|encoded_len\| bytes long.
				164	// Returns true if the packet has FEC and false otherwise.
				165	virtual bool PacketHasFec(const uint8_t* encoded, size_t encoded_len) const;
				166
				167	// Returns the actual sample rate of the decoder's output. This value may not
				168	// change during the lifetime of the decoder.
				169	virtual int SampleRateHz() const = 0;
				170
				171	// The number of channels in the decoder's output. This value may not change
				172	// during the lifetime of the decoder.
				173	virtual size_t Channels() const = 0;
				174
				175	protected:
				176	static SpeechType ConvertSpeechType(int16_t type);
				177
				178	virtual int DecodeInternal(const uint8_t* encoded,
				179	size_t encoded_len,
				180	int sample_rate_hz,
				181	int16_t* decoded,
				182	SpeechType* speech_type) = 0;
				183
				184	virtual int DecodeRedundantInternal(const uint8_t* encoded,
				185	size_t encoded_len,
				186	int sample_rate_hz,
				187	int16_t* decoded,
				188	SpeechType* speech_type);
				189
				190	private:
				191	RTC_DISALLOW_COPY_AND_ASSIGN(AudioDecoder);
				192	};
				193
				194	} // namespace webrtc
Mirko Bonadei	92ea95e	2017-09-15 06:47:31 +0200	[diff] [blame]	195	#endif // API_AUDIO_CODECS_AUDIO_DECODER_H_