Blame - webrtc/api/audio_codecs/audio_decoder.h - webrtc.googlesource.com/src

blob: d671839ba50654780a7a138a9eef97170c256344 [file] [log] [blame]

kwiberg	087bd34	2017-02-10 08:15:44 -0800	[diff] [blame]	1	/*
				2	* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
				3	*
				4	* Use of this source code is governed by a BSD-style license
				5	* that can be found in the LICENSE file in the root of the source
				6	* tree. An additional intellectual property rights grant can be found
				7	* in the file PATENTS. All contributing project authors may
				8	* be found in the AUTHORS file in the root of the source tree.
				9	*/
				10
				11	#ifndef WEBRTC_API_AUDIO_CODECS_AUDIO_DECODER_H_
				12	#define WEBRTC_API_AUDIO_CODECS_AUDIO_DECODER_H_
				13
				14	#include <memory>
				15	#include <vector>
				16
Edward Lemur	c20978e	2017-07-06 19:44:34 +0200	[diff] [blame^]	17	#include "webrtc/rtc_base/array_view.h"
				18	#include "webrtc/rtc_base/buffer.h"
				19	#include "webrtc/rtc_base/constructormagic.h"
				20	#include "webrtc/rtc_base/optional.h"
kwiberg	087bd34	2017-02-10 08:15:44 -0800	[diff] [blame]	21	#include "webrtc/typedefs.h"
				22
				23	namespace webrtc {
				24
				25	class AudioDecoder {
				26	public:
				27	enum SpeechType {
				28	kSpeech = 1,
				29	kComfortNoise = 2,
				30	};
				31
				32	// Used by PacketDuration below. Save the value -1 for errors.
				33	enum { kNotImplemented = -2 };
				34
				35	AudioDecoder() = default;
				36	virtual ~AudioDecoder() = default;
				37
				38	class EncodedAudioFrame {
				39	public:
				40	struct DecodeResult {
				41	size_t num_decoded_samples;
				42	SpeechType speech_type;
				43	};
				44
				45	virtual ~EncodedAudioFrame() = default;
				46
				47	// Returns the duration in samples-per-channel of this audio frame.
				48	// If no duration can be ascertained, returns zero.
				49	virtual size_t Duration() const = 0;
				50
				51	// Decodes this frame of audio and writes the result in \|decoded\|.
				52	// \|decoded\| must be large enough to store as many samples as indicated by a
				53	// call to Duration() . On success, returns an rtc::Optional containing the
				54	// total number of samples across all channels, as well as whether the
				55	// decoder produced comfort noise or speech. On failure, returns an empty
				56	// rtc::Optional. Decode may be called at most once per frame object.
				57	virtual rtc::Optional<DecodeResult> Decode(
				58	rtc::ArrayView<int16_t> decoded) const = 0;
				59	};
				60
				61	struct ParseResult {
				62	ParseResult();
				63	ParseResult(uint32_t timestamp,
				64	int priority,
				65	std::unique_ptr<EncodedAudioFrame> frame);
				66	ParseResult(ParseResult&& b);
				67	~ParseResult();
				68
				69	ParseResult& operator=(ParseResult&& b);
				70
				71	// The timestamp of the frame is in samples per channel.
				72	uint32_t timestamp;
				73	// The relative priority of the frame compared to other frames of the same
				74	// payload and the same timeframe. A higher value means a lower priority.
				75	// The highest priority is zero - negative values are not allowed.
				76	int priority;
				77	std::unique_ptr<EncodedAudioFrame> frame;
				78	};
				79
				80	// Let the decoder parse this payload and prepare zero or more decodable
				81	// frames. Each frame must be between 10 ms and 120 ms long. The caller must
				82	// ensure that the AudioDecoder object outlives any frame objects returned by
				83	// this call. The decoder is free to swap or move the data from the \|payload\|
				84	// buffer. \|timestamp\| is the input timestamp, in samples, corresponding to
				85	// the start of the payload.
				86	virtual std::vector<ParseResult> ParsePayload(rtc::Buffer&& payload,
				87	uint32_t timestamp);
				88
				89	// Decodes \|encode_len\| bytes from \|encoded\| and writes the result in
				90	// \|decoded\|. The maximum bytes allowed to be written into \|decoded\| is
				91	// \|max_decoded_bytes\|. Returns the total number of samples across all
				92	// channels. If the decoder produced comfort noise, \|speech_type\|
				93	// is set to kComfortNoise, otherwise it is kSpeech. The desired output
				94	// sample rate is provided in \|sample_rate_hz\|, which must be valid for the
				95	// codec at hand.
				96	int Decode(const uint8_t* encoded,
				97	size_t encoded_len,
				98	int sample_rate_hz,
				99	size_t max_decoded_bytes,
				100	int16_t* decoded,
				101	SpeechType* speech_type);
				102
				103	// Same as Decode(), but interfaces to the decoders redundant decode function.
				104	// The default implementation simply calls the regular Decode() method.
				105	int DecodeRedundant(const uint8_t* encoded,
				106	size_t encoded_len,
				107	int sample_rate_hz,
				108	size_t max_decoded_bytes,
				109	int16_t* decoded,
				110	SpeechType* speech_type);
				111
				112	// Indicates if the decoder implements the DecodePlc method.
				113	virtual bool HasDecodePlc() const;
				114
				115	// Calls the packet-loss concealment of the decoder to update the state after
				116	// one or several lost packets. The caller has to make sure that the
				117	// memory allocated in \|decoded\| should accommodate \|num_frames\| frames.
				118	virtual size_t DecodePlc(size_t num_frames, int16_t* decoded);
				119
				120	// Resets the decoder state (empty buffers etc.).
				121	virtual void Reset() = 0;
				122
				123	// Notifies the decoder of an incoming packet to NetEQ.
				124	virtual int IncomingPacket(const uint8_t* payload,
				125	size_t payload_len,
				126	uint16_t rtp_sequence_number,
				127	uint32_t rtp_timestamp,
				128	uint32_t arrival_timestamp);
				129
				130	// Returns the last error code from the decoder.
				131	virtual int ErrorCode();
				132
				133	// Returns the duration in samples-per-channel of the payload in \|encoded\|
				134	// which is \|encoded_len\| bytes long. Returns kNotImplemented if no duration
				135	// estimate is available, or -1 in case of an error.
				136	virtual int PacketDuration(const uint8_t* encoded, size_t encoded_len) const;
				137
				138	// Returns the duration in samples-per-channel of the redandant payload in
				139	// \|encoded\| which is \|encoded_len\| bytes long. Returns kNotImplemented if no
				140	// duration estimate is available, or -1 in case of an error.
				141	virtual int PacketDurationRedundant(const uint8_t* encoded,
				142	size_t encoded_len) const;
				143
				144	// Detects whether a packet has forward error correction. The packet is
				145	// comprised of the samples in \|encoded\| which is \|encoded_len\| bytes long.
				146	// Returns true if the packet has FEC and false otherwise.
				147	virtual bool PacketHasFec(const uint8_t* encoded, size_t encoded_len) const;
				148
				149	// Returns the actual sample rate of the decoder's output. This value may not
				150	// change during the lifetime of the decoder.
				151	virtual int SampleRateHz() const = 0;
				152
				153	// The number of channels in the decoder's output. This value may not change
				154	// during the lifetime of the decoder.
				155	virtual size_t Channels() const = 0;
				156
				157	protected:
				158	static SpeechType ConvertSpeechType(int16_t type);
				159
				160	virtual int DecodeInternal(const uint8_t* encoded,
				161	size_t encoded_len,
				162	int sample_rate_hz,
				163	int16_t* decoded,
				164	SpeechType* speech_type) = 0;
				165
				166	virtual int DecodeRedundantInternal(const uint8_t* encoded,
				167	size_t encoded_len,
				168	int sample_rate_hz,
				169	int16_t* decoded,
				170	SpeechType* speech_type);
				171
				172	private:
				173	RTC_DISALLOW_COPY_AND_ASSIGN(AudioDecoder);
				174	};
				175
				176	} // namespace webrtc
				177	#endif // WEBRTC_API_AUDIO_CODECS_AUDIO_DECODER_H_