Blame - webrtc/modules/audio_coding/codecs/audio_decoder.h - webrtc.googlesource.com/src

blob: b6338d2102a490d7c7272def3a62691daebef6eb [file] [log] [blame]

henrik.lundin@webrtc.org	d94659d	2013-01-29 12:09:21 +0000	[diff] [blame]	1	/*
				2	* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
				3	*
				4	* Use of this source code is governed by a BSD-style license
				5	* that can be found in the LICENSE file in the root of the source
				6	* tree. An additional intellectual property rights grant can be found
				7	* in the file PATENTS. All contributing project authors may
				8	* be found in the AUTHORS file in the root of the source tree.
				9	*/
				10
Henrik Kjellander	7464089	2015-10-29 11:31:02 +0100	[diff] [blame]	11	#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_INCLUDE_AUDIO_DECODER_H_
				12	#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_INCLUDE_AUDIO_DECODER_H_
henrik.lundin@webrtc.org	d94659d	2013-01-29 12:09:21 +0000	[diff] [blame]	13
ossu	0d526d5	2016-09-21 01:57:31 -0700	[diff] [blame^]	14	#include <memory>
				15	#include <vector>
henrik.lundin@webrtc.org	d94659d	2013-01-29 12:09:21 +0000	[diff] [blame]	16
ossu	61a208b	2016-09-20 01:38:00 -0700	[diff] [blame]	17	#include <memory>
				18	#include <vector>
				19
				20	#include "webrtc/base/array_view.h"
				21	#include "webrtc/base/buffer.h"
henrike@webrtc.org	88fbb2d	2014-05-21 21:18:46 +0000	[diff] [blame]	22	#include "webrtc/base/constructormagic.h"
ossu	61a208b	2016-09-20 01:38:00 -0700	[diff] [blame]	23	#include "webrtc/base/optional.h"
henrik.lundin@webrtc.org	d94659d	2013-01-29 12:09:21 +0000	[diff] [blame]	24	#include "webrtc/typedefs.h"
				25
				26	namespace webrtc {
				27
henrik.lundin@webrtc.org	d94659d	2013-01-29 12:09:21 +0000	[diff] [blame]	28	// This is the interface class for decoders in NetEQ. Each codec type will have
				29	// and implementation of this class.
				30	class AudioDecoder {
				31	public:
				32	enum SpeechType {
				33	kSpeech = 1,
				34	kComfortNoise = 2
				35	};
				36
				37	// Used by PacketDuration below. Save the value -1 for errors.
				38	enum { kNotImplemented = -2 };
				39
henrik.lundin@webrtc.org	6dba1eb	2015-03-18 09:47:08 +0000	[diff] [blame]	40	AudioDecoder() = default;
				41	virtual ~AudioDecoder() = default;
henrik.lundin@webrtc.org	d94659d	2013-01-29 12:09:21 +0000	[diff] [blame]	42
ossu	61a208b	2016-09-20 01:38:00 -0700	[diff] [blame]	43	class EncodedAudioFrame {
				44	public:
				45	struct DecodeResult {
				46	size_t num_decoded_samples;
				47	SpeechType speech_type;
				48	};
				49
				50	virtual ~EncodedAudioFrame() = default;
				51
				52	// Returns the duration in samples-per-channel of this audio frame.
				53	// If no duration can be ascertained, returns zero.
				54	virtual size_t Duration() const = 0;
				55
				56	// Decodes this frame of audio and writes the result in \|decoded\|.
				57	// \|decoded\| must be large enough to store as many samples as indicated by a
				58	// call to Duration() . On success, returns an rtc::Optional containing the
				59	// total number of samples across all channels, as well as whether the
				60	// decoder produced comfort noise or speech. On failure, returns an empty
				61	// rtc::Optional. Decode may be called at most once per frame object.
				62	virtual rtc::Optional<DecodeResult> Decode(
				63	rtc::ArrayView<int16_t> decoded) const = 0;
				64	};
				65
				66	struct ParseResult {
				67	ParseResult();
				68	ParseResult(uint32_t timestamp,
				69	bool primary,
				70	std::unique_ptr<EncodedAudioFrame> frame);
				71	ParseResult(ParseResult&& b);
				72	~ParseResult();
				73
				74	ParseResult& operator=(ParseResult&& b);
				75
				76	// The timestamp of the frame is in samples per channel.
				77	uint32_t timestamp;
				78	bool primary;
				79	std::unique_ptr<EncodedAudioFrame> frame;
				80	};
				81
				82	// Let the decoder parse this payload and prepare zero or more decodable
				83	// frames. Each frame must be between 10 ms and 120 ms long. The caller must
				84	// ensure that the AudioDecoder object outlives any frame objects returned by
				85	// this call. The decoder is free to swap or move the data from the \|payload\|
				86	// buffer. \|timestamp\| is the input timestamp, in samples, corresponding to
				87	// the start of the payload.
				88	virtual std::vector<ParseResult> ParsePayload(rtc::Buffer&& payload,
				89	uint32_t timestamp,
				90	bool is_primary);
				91
henrik.lundin@webrtc.org	d94659d	2013-01-29 12:09:21 +0000	[diff] [blame]	92	// Decodes \|encode_len\| bytes from \|encoded\| and writes the result in
minyue@webrtc.org	7f7d7e3	2015-03-16 12:30:37 +0000	[diff] [blame]	93	// \|decoded\|. The maximum bytes allowed to be written into \|decoded\| is
Minyue	323b132	2015-05-25 13:49:37 +0200	[diff] [blame]	94	// \|max_decoded_bytes\|. Returns the total number of samples across all
				95	// channels. If the decoder produced comfort noise, \|speech_type\|
henrik.lundin@webrtc.org	1eda4e3	2015-02-25 10:02:29 +0000	[diff] [blame]	96	// is set to kComfortNoise, otherwise it is kSpeech. The desired output
				97	// sample rate is provided in \|sample_rate_hz\|, which must be valid for the
				98	// codec at hand.
Peter Boström	d7b7ae8	2015-12-08 13:41:35 +0100	[diff] [blame]	99	int Decode(const uint8_t* encoded,
				100	size_t encoded_len,
				101	int sample_rate_hz,
				102	size_t max_decoded_bytes,
				103	int16_t* decoded,
				104	SpeechType* speech_type);
henrik.lundin@webrtc.org	d94659d	2013-01-29 12:09:21 +0000	[diff] [blame]	105
				106	// Same as Decode(), but interfaces to the decoders redundant decode function.
				107	// The default implementation simply calls the regular Decode() method.
Peter Boström	d7b7ae8	2015-12-08 13:41:35 +0100	[diff] [blame]	108	int DecodeRedundant(const uint8_t* encoded,
				109	size_t encoded_len,
				110	int sample_rate_hz,
				111	size_t max_decoded_bytes,
				112	int16_t* decoded,
				113	SpeechType* speech_type);
henrik.lundin@webrtc.org	d94659d	2013-01-29 12:09:21 +0000	[diff] [blame]	114
				115	// Indicates if the decoder implements the DecodePlc method.
pbos@webrtc.org	2d1a55c	2013-07-31 15:54:00 +0000	[diff] [blame]	116	virtual bool HasDecodePlc() const;
henrik.lundin@webrtc.org	d94659d	2013-01-29 12:09:21 +0000	[diff] [blame]	117
				118	// Calls the packet-loss concealment of the decoder to update the state after
minyuel	6d92bf5	2015-09-23 15:20:39 +0200	[diff] [blame]	119	// one or several lost packets. The caller has to make sure that the
				120	// memory allocated in \|decoded\| should accommodate \|num_frames\| frames.
Peter Kasting	dce40cf	2015-08-24 14:52:23 -0700	[diff] [blame]	121	virtual size_t DecodePlc(size_t num_frames, int16_t* decoded);
henrik.lundin@webrtc.org	d94659d	2013-01-29 12:09:21 +0000	[diff] [blame]	122
Karl Wiberg	4376648	2015-08-27 15:22:11 +0200	[diff] [blame]	123	// Resets the decoder state (empty buffers etc.).
				124	virtual void Reset() = 0;
henrik.lundin@webrtc.org	d94659d	2013-01-29 12:09:21 +0000	[diff] [blame]	125
				126	// Notifies the decoder of an incoming packet to NetEQ.
				127	virtual int IncomingPacket(const uint8_t* payload,
				128	size_t payload_len,
				129	uint16_t rtp_sequence_number,
				130	uint32_t rtp_timestamp,
pbos@webrtc.org	2d1a55c	2013-07-31 15:54:00 +0000	[diff] [blame]	131	uint32_t arrival_timestamp);
henrik.lundin@webrtc.org	d94659d	2013-01-29 12:09:21 +0000	[diff] [blame]	132
				133	// Returns the last error code from the decoder.
pbos@webrtc.org	2d1a55c	2013-07-31 15:54:00 +0000	[diff] [blame]	134	virtual int ErrorCode();
henrik.lundin@webrtc.org	d94659d	2013-01-29 12:09:21 +0000	[diff] [blame]	135
Minyue	323b132	2015-05-25 13:49:37 +0200	[diff] [blame]	136	// Returns the duration in samples-per-channel of the payload in \|encoded\|
				137	// which is \|encoded_len\| bytes long. Returns kNotImplemented if no duration
				138	// estimate is available, or -1 in case of an error.
minyue@webrtc.org	a8cc344	2015-02-13 14:01:54 +0000	[diff] [blame]	139	virtual int PacketDuration(const uint8_t* encoded, size_t encoded_len) const;
henrik.lundin@webrtc.org	d94659d	2013-01-29 12:09:21 +0000	[diff] [blame]	140
Minyue	323b132	2015-05-25 13:49:37 +0200	[diff] [blame]	141	// Returns the duration in samples-per-channel of the redandant payload in
				142	// \|encoded\| which is \|encoded_len\| bytes long. Returns kNotImplemented if no
				143	// duration estimate is available, or -1 in case of an error.
minyue@webrtc.org	b28bfa7	2014-03-21 12:07:40 +0000	[diff] [blame]	144	virtual int PacketDurationRedundant(const uint8_t* encoded,
				145	size_t encoded_len) const;
				146
				147	// Detects whether a packet has forward error correction. The packet is
				148	// comprised of the samples in \|encoded\| which is \|encoded_len\| bytes long.
				149	// Returns true if the packet has FEC and false otherwise.
				150	virtual bool PacketHasFec(const uint8_t* encoded, size_t encoded_len) const;
				151
kwiberg	f882880	2016-06-02 03:19:23 -0700	[diff] [blame]	152	// Returns the actual sample rate of the decoder's output. This value may not
				153	// change during the lifetime of the decoder.
kwiberg	347d351	2016-06-16 01:59:09 -0700	[diff] [blame]	154	virtual int SampleRateHz() const = 0;
kwiberg	6c2eab3	2016-05-31 02:46:20 -0700	[diff] [blame]	155
kwiberg	f882880	2016-06-02 03:19:23 -0700	[diff] [blame]	156	// The number of channels in the decoder's output. This value may not change
				157	// during the lifetime of the decoder.
henrik.lundin@webrtc.org	6dba1eb	2015-03-18 09:47:08 +0000	[diff] [blame]	158	virtual size_t Channels() const = 0;
henrik.lundin@webrtc.org	d94659d	2013-01-29 12:09:21 +0000	[diff] [blame]	159
				160	protected:
				161	static SpeechType ConvertSpeechType(int16_t type);
				162
minyue@webrtc.org	7f7d7e3	2015-03-16 12:30:37 +0000	[diff] [blame]	163	virtual int DecodeInternal(const uint8_t* encoded,
				164	size_t encoded_len,
				165	int sample_rate_hz,
				166	int16_t* decoded,
Peter Boström	d7b7ae8	2015-12-08 13:41:35 +0100	[diff] [blame]	167	SpeechType* speech_type) = 0;
minyue@webrtc.org	7f7d7e3	2015-03-16 12:30:37 +0000	[diff] [blame]	168
				169	virtual int DecodeRedundantInternal(const uint8_t* encoded,
				170	size_t encoded_len,
				171	int sample_rate_hz,
				172	int16_t* decoded,
				173	SpeechType* speech_type);
				174
henrik.lundin@webrtc.org	d94659d	2013-01-29 12:09:21 +0000	[diff] [blame]	175	private:
henrikg	3c089d7	2015-09-16 05:37:44 -0700	[diff] [blame]	176	RTC_DISALLOW_COPY_AND_ASSIGN(AudioDecoder);
henrik.lundin@webrtc.org	d94659d	2013-01-29 12:09:21 +0000	[diff] [blame]	177	};
				178
				179	} // namespace webrtc
Henrik Kjellander	7464089	2015-10-29 11:31:02 +0100	[diff] [blame]	180	#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_INCLUDE_AUDIO_DECODER_H_