blob: a77894b7daedd36c7686093e00dbaa2c30c7ce75 [file] [log] [blame]
ossueb1fde42017-05-02 06:46:30 -07001/*
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11#ifndef WEBRTC_API_AUDIO_CODECS_AUDIO_ENCODER_H_
12#define WEBRTC_API_AUDIO_CODECS_AUDIO_ENCODER_H_
13
14#include <algorithm>
15#include <memory>
16#include <string>
17#include <vector>
18
kwiberg529662a2017-09-04 05:43:17 -070019#include "webrtc/api/array_view.h"
kwiberg84f6a3f2017-09-05 08:43:13 -070020#include "webrtc/api/optional.h"
Edward Lemurc20978e2017-07-06 19:44:34 +020021#include "webrtc/rtc_base/buffer.h"
22#include "webrtc/rtc_base/deprecation.h"
ossueb1fde42017-05-02 06:46:30 -070023#include "webrtc/typedefs.h"
24
25namespace webrtc {
26
27class Clock;
28class RtcEventLog;
29
ivoce1198e02017-09-08 08:13:19 -070030// Statistics related to Audio Network Adaptation.
31struct ANAStats {
32 ANAStats();
33 ANAStats(const ANAStats&);
34 ~ANAStats();
35 // Number of actions taken by the ANA bitrate controller since the start of
36 // the call. If this value is not set, it indicates that the bitrate
37 // controller is disabled.
38 rtc::Optional<uint32_t> bitrate_action_counter;
39 // Number of actions taken by the ANA channel controller since the start of
40 // the call. If this value is not set, it indicates that the channel
41 // controller is disabled.
42 rtc::Optional<uint32_t> channel_action_counter;
43 // Number of actions taken by the ANA DTX controller since the start of the
44 // call. If this value is not set, it indicates that the DTX controller is
45 // disabled.
46 rtc::Optional<uint32_t> dtx_action_counter;
47 // Number of actions taken by the ANA FEC controller since the start of the
48 // call. If this value is not set, it indicates that the FEC controller is
49 // disabled.
50 rtc::Optional<uint32_t> fec_action_counter;
51 // Number of actions taken by the ANA frame length controller since the start
52 // of the call. If this value is not set, it indicates that the frame length
53 // controller is disabled.
54 rtc::Optional<uint32_t> frame_length_action_counter;
55};
56
ossueb1fde42017-05-02 06:46:30 -070057// This is the interface class for encoders in AudioCoding module. Each codec
58// type must have an implementation of this class.
59class AudioEncoder {
60 public:
61 // Used for UMA logging of codec usage. The same codecs, with the
62 // same values, must be listed in
63 // src/tools/metrics/histograms/histograms.xml in chromium to log
64 // correct values.
65 enum class CodecType {
66 kOther = 0, // Codec not specified, and/or not listed in this enum
67 kOpus = 1,
68 kIsac = 2,
69 kPcmA = 3,
70 kPcmU = 4,
71 kG722 = 5,
72 kIlbc = 6,
73
74 // Number of histogram bins in the UMA logging of codec types. The
75 // total number of different codecs that are logged cannot exceed this
76 // number.
77 kMaxLoggedAudioCodecTypes
78 };
79
80 struct EncodedInfoLeaf {
81 size_t encoded_bytes = 0;
82 uint32_t encoded_timestamp = 0;
83 int payload_type = 0;
84 bool send_even_if_empty = false;
85 bool speech = true;
86 CodecType encoder_type = CodecType::kOther;
87 };
88
89 // This is the main struct for auxiliary encoding information. Each encoded
90 // packet should be accompanied by one EncodedInfo struct, containing the
91 // total number of |encoded_bytes|, the |encoded_timestamp| and the
92 // |payload_type|. If the packet contains redundant encodings, the |redundant|
93 // vector will be populated with EncodedInfoLeaf structs. Each struct in the
94 // vector represents one encoding; the order of structs in the vector is the
95 // same as the order in which the actual payloads are written to the byte
96 // stream. When EncoderInfoLeaf structs are present in the vector, the main
97 // struct's |encoded_bytes| will be the sum of all the |encoded_bytes| in the
98 // vector.
99 struct EncodedInfo : public EncodedInfoLeaf {
100 EncodedInfo();
101 EncodedInfo(const EncodedInfo&);
102 EncodedInfo(EncodedInfo&&);
103 ~EncodedInfo();
104 EncodedInfo& operator=(const EncodedInfo&);
105 EncodedInfo& operator=(EncodedInfo&&);
106
107 std::vector<EncodedInfoLeaf> redundant;
108 };
109
110 virtual ~AudioEncoder() = default;
111
112 // Returns the input sample rate in Hz and the number of input channels.
113 // These are constants set at instantiation time.
114 virtual int SampleRateHz() const = 0;
115 virtual size_t NumChannels() const = 0;
116
117 // Returns the rate at which the RTP timestamps are updated. The default
118 // implementation returns SampleRateHz().
119 virtual int RtpTimestampRateHz() const;
120
121 // Returns the number of 10 ms frames the encoder will put in the next
122 // packet. This value may only change when Encode() outputs a packet; i.e.,
123 // the encoder may vary the number of 10 ms frames from packet to packet, but
124 // it must decide the length of the next packet no later than when outputting
125 // the preceding packet.
126 virtual size_t Num10MsFramesInNextPacket() const = 0;
127
128 // Returns the maximum value that can be returned by
129 // Num10MsFramesInNextPacket().
130 virtual size_t Max10MsFramesInAPacket() const = 0;
131
132 // Returns the current target bitrate in bits/s. The value -1 means that the
133 // codec adapts the target automatically, and a current target cannot be
134 // provided.
135 virtual int GetTargetBitrate() const = 0;
136
137 // Accepts one 10 ms block of input audio (i.e., SampleRateHz() / 100 *
138 // NumChannels() samples). Multi-channel audio must be sample-interleaved.
139 // The encoder appends zero or more bytes of output to |encoded| and returns
140 // additional encoding information. Encode() checks some preconditions, calls
141 // EncodeImpl() which does the actual work, and then checks some
142 // postconditions.
143 EncodedInfo Encode(uint32_t rtp_timestamp,
144 rtc::ArrayView<const int16_t> audio,
145 rtc::Buffer* encoded);
146
147 // Resets the encoder to its starting state, discarding any input that has
148 // been fed to the encoder but not yet emitted in a packet.
149 virtual void Reset() = 0;
150
151 // Enables or disables codec-internal FEC (forward error correction). Returns
152 // true if the codec was able to comply. The default implementation returns
153 // true when asked to disable FEC and false when asked to enable it (meaning
154 // that FEC isn't supported).
155 virtual bool SetFec(bool enable);
156
157 // Enables or disables codec-internal VAD/DTX. Returns true if the codec was
158 // able to comply. The default implementation returns true when asked to
159 // disable DTX and false when asked to enable it (meaning that DTX isn't
160 // supported).
161 virtual bool SetDtx(bool enable);
162
163 // Returns the status of codec-internal DTX. The default implementation always
164 // returns false.
165 virtual bool GetDtx() const;
166
167 // Sets the application mode. Returns true if the codec was able to comply.
168 // The default implementation just returns false.
169 enum class Application { kSpeech, kAudio };
170 virtual bool SetApplication(Application application);
171
172 // Tells the encoder about the highest sample rate the decoder is expected to
173 // use when decoding the bitstream. The encoder would typically use this
174 // information to adjust the quality of the encoding. The default
175 // implementation does nothing.
176 virtual void SetMaxPlaybackRate(int frequency_hz);
177
178 // This is to be deprecated. Please use |OnReceivedTargetAudioBitrate|
179 // instead.
180 // Tells the encoder what average bitrate we'd like it to produce. The
181 // encoder is free to adjust or disregard the given bitrate (the default
182 // implementation does the latter).
183 RTC_DEPRECATED virtual void SetTargetBitrate(int target_bps);
184
185 // Causes this encoder to let go of any other encoders it contains, and
186 // returns a pointer to an array where they are stored (which is required to
187 // live as long as this encoder). Unless the returned array is empty, you may
188 // not call any methods on this encoder afterwards, except for the
189 // destructor. The default implementation just returns an empty array.
190 // NOTE: This method is subject to change. Do not call or override it.
191 virtual rtc::ArrayView<std::unique_ptr<AudioEncoder>>
192 ReclaimContainedEncoders();
193
194 // Enables audio network adaptor. Returns true if successful.
195 virtual bool EnableAudioNetworkAdaptor(const std::string& config_string,
196 RtcEventLog* event_log);
197
198 // Disables audio network adaptor.
199 virtual void DisableAudioNetworkAdaptor();
200
201 // Provides uplink packet loss fraction to this encoder to allow it to adapt.
202 // |uplink_packet_loss_fraction| is in the range [0.0, 1.0].
203 virtual void OnReceivedUplinkPacketLossFraction(
204 float uplink_packet_loss_fraction);
205
206 // Provides 1st-order-FEC-recoverable uplink packet loss rate to this encoder
207 // to allow it to adapt.
208 // |uplink_recoverable_packet_loss_fraction| is in the range [0.0, 1.0].
209 virtual void OnReceivedUplinkRecoverablePacketLossFraction(
210 float uplink_recoverable_packet_loss_fraction);
211
212 // Provides target audio bitrate to this encoder to allow it to adapt.
213 virtual void OnReceivedTargetAudioBitrate(int target_bps);
214
215 // Provides target audio bitrate and corresponding probing interval of
216 // the bandwidth estimator to this encoder to allow it to adapt.
217 virtual void OnReceivedUplinkBandwidth(
218 int target_audio_bitrate_bps,
minyue93e45222017-05-18 14:32:41 -0700219 rtc::Optional<int64_t> bwe_period_ms);
ossueb1fde42017-05-02 06:46:30 -0700220
221 // Provides RTT to this encoder to allow it to adapt.
222 virtual void OnReceivedRtt(int rtt_ms);
223
224 // Provides overhead to this encoder to adapt. The overhead is the number of
225 // bytes that will be added to each packet the encoder generates.
226 virtual void OnReceivedOverhead(size_t overhead_bytes_per_packet);
227
228 // To allow encoder to adapt its frame length, it must be provided the frame
229 // length range that receivers can accept.
230 virtual void SetReceiverFrameLengthRange(int min_frame_length_ms,
231 int max_frame_length_ms);
232
ivoce1198e02017-09-08 08:13:19 -0700233 // Get statistics related to audio network adaptation.
234 virtual ANAStats GetANAStats() const;
235
ossueb1fde42017-05-02 06:46:30 -0700236 protected:
237 // Subclasses implement this to perform the actual encoding. Called by
238 // Encode().
239 virtual EncodedInfo EncodeImpl(uint32_t rtp_timestamp,
240 rtc::ArrayView<const int16_t> audio,
241 rtc::Buffer* encoded) = 0;
242};
243} // namespace webrtc
244#endif // WEBRTC_API_AUDIO_CODECS_AUDIO_ENCODER_H_