kjellander | 3e6db23 | 2015-11-26 04:44:54 -0800 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. |
| 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license |
| 5 | * that can be found in the LICENSE file in the root of the source |
| 6 | * tree. An additional intellectual property rights grant can be found |
| 7 | * in the file PATENTS. All contributing project authors may |
| 8 | * be found in the AUTHORS file in the root of the source tree. |
| 9 | */ |
| 10 | |
Mirko Bonadei | 92ea95e | 2017-09-15 06:47:31 +0200 | [diff] [blame] | 11 | #ifndef MODULES_AUDIO_CODING_INCLUDE_AUDIO_CODING_MODULE_H_ |
| 12 | #define MODULES_AUDIO_CODING_INCLUDE_AUDIO_CODING_MODULE_H_ |
kjellander | 3e6db23 | 2015-11-26 04:44:54 -0800 | [diff] [blame] | 13 | |
kwiberg | 84be511 | 2016-04-27 01:19:58 -0700 | [diff] [blame] | 14 | #include <memory> |
henrik.lundin | 4cf61dd | 2015-12-09 06:20:58 -0800 | [diff] [blame] | 15 | #include <string> |
Fredrik Solenberg | f693bfa | 2018-12-11 12:22:10 +0100 | [diff] [blame] | 16 | #include <utility> |
kjellander | 3e6db23 | 2015-11-26 04:44:54 -0800 | [diff] [blame] | 17 | #include <vector> |
| 18 | |
Danil Chapovalov | b602123 | 2018-06-19 13:26:36 +0200 | [diff] [blame] | 19 | #include "absl/types/optional.h" |
Mirko Bonadei | 92ea95e | 2017-09-15 06:47:31 +0200 | [diff] [blame] | 20 | #include "api/audio_codecs/audio_decoder_factory.h" |
| 21 | #include "api/audio_codecs/audio_encoder.h" |
Artem Titov | 741daaf | 2019-03-21 14:37:36 +0100 | [diff] [blame] | 22 | #include "api/function_view.h" |
Ivo Creusen | 3ce44a3 | 2019-10-31 14:38:11 +0100 | [diff] [blame] | 23 | #include "api/neteq/neteq.h" |
Ivo Creusen | c3d1f9b | 2019-11-01 11:47:51 +0100 | [diff] [blame] | 24 | #include "api/neteq/neteq_factory.h" |
Mirko Bonadei | 92ea95e | 2017-09-15 06:47:31 +0200 | [diff] [blame] | 25 | #include "modules/audio_coding/include/audio_coding_module_typedefs.h" |
Mirko Bonadei | 92ea95e | 2017-09-15 06:47:31 +0200 | [diff] [blame] | 26 | #include "system_wrappers/include/clock.h" |
kjellander | 3e6db23 | 2015-11-26 04:44:54 -0800 | [diff] [blame] | 27 | |
| 28 | namespace webrtc { |
| 29 | |
| 30 | // forward declarations |
kjellander | 3e6db23 | 2015-11-26 04:44:54 -0800 | [diff] [blame] | 31 | class AudioDecoder; |
| 32 | class AudioEncoder; |
| 33 | class AudioFrame; |
Niels Möller | afb5dbb | 2019-02-15 15:21:47 +0100 | [diff] [blame] | 34 | struct RTPHeader; |
kjellander | 3e6db23 | 2015-11-26 04:44:54 -0800 | [diff] [blame] | 35 | |
| 36 | #define WEBRTC_10MS_PCM_AUDIO 960 // 16 bits super wideband 48 kHz |
| 37 | |
| 38 | // Callback class used for sending data ready to be packetized |
| 39 | class AudioPacketizationCallback { |
| 40 | public: |
| 41 | virtual ~AudioPacketizationCallback() {} |
| 42 | |
Niels Möller | 87e2d78 | 2019-03-07 10:18:23 +0100 | [diff] [blame] | 43 | virtual int32_t SendData(AudioFrameType frame_type, |
kjellander | 3e6db23 | 2015-11-26 04:44:54 -0800 | [diff] [blame] | 44 | uint8_t payload_type, |
| 45 | uint32_t timestamp, |
| 46 | const uint8_t* payload_data, |
Niels Möller | 4babc68 | 2019-04-26 15:46:12 +0200 | [diff] [blame] | 47 | size_t payload_len_bytes) = 0; |
kjellander | 3e6db23 | 2015-11-26 04:44:54 -0800 | [diff] [blame] | 48 | }; |
| 49 | |
| 50 | // Callback class used for reporting VAD decision |
| 51 | class ACMVADCallback { |
| 52 | public: |
| 53 | virtual ~ACMVADCallback() {} |
| 54 | |
Niels Möller | 87e2d78 | 2019-03-07 10:18:23 +0100 | [diff] [blame] | 55 | virtual int32_t InFrameType(AudioFrameType frame_type) = 0; |
kjellander | 3e6db23 | 2015-11-26 04:44:54 -0800 | [diff] [blame] | 56 | }; |
| 57 | |
| 58 | class AudioCodingModule { |
| 59 | protected: |
| 60 | AudioCodingModule() {} |
| 61 | |
| 62 | public: |
| 63 | struct Config { |
Karl Wiberg | 5817d3d | 2018-04-06 10:06:42 +0200 | [diff] [blame] | 64 | explicit Config( |
| 65 | rtc::scoped_refptr<AudioDecoderFactory> decoder_factory = nullptr); |
kwiberg | 36a4388 | 2016-08-29 05:33:32 -0700 | [diff] [blame] | 66 | Config(const Config&); |
| 67 | ~Config(); |
kjellander | 3e6db23 | 2015-11-26 04:44:54 -0800 | [diff] [blame] | 68 | |
kjellander | 3e6db23 | 2015-11-26 04:44:54 -0800 | [diff] [blame] | 69 | NetEq::Config neteq_config; |
| 70 | Clock* clock; |
ossu | e352578 | 2016-05-25 07:37:43 -0700 | [diff] [blame] | 71 | rtc::scoped_refptr<AudioDecoderFactory> decoder_factory; |
Ivo Creusen | c3d1f9b | 2019-11-01 11:47:51 +0100 | [diff] [blame] | 72 | NetEqFactory* neteq_factory = nullptr; |
kjellander | 3e6db23 | 2015-11-26 04:44:54 -0800 | [diff] [blame] | 73 | }; |
| 74 | |
kjellander | 3e6db23 | 2015-11-26 04:44:54 -0800 | [diff] [blame] | 75 | static AudioCodingModule* Create(const Config& config); |
| 76 | virtual ~AudioCodingModule() = default; |
| 77 | |
| 78 | /////////////////////////////////////////////////////////////////////////// |
kjellander | 3e6db23 | 2015-11-26 04:44:54 -0800 | [diff] [blame] | 79 | // Sender |
| 80 | // |
| 81 | |
kwiberg | 4cdbd57 | 2016-03-30 03:10:05 -0700 | [diff] [blame] | 82 | // |modifier| is called exactly once with one argument: a pointer to the |
| 83 | // unique_ptr that holds the current encoder (which is null if there is no |
| 84 | // current encoder). For the duration of the call, |modifier| has exclusive |
| 85 | // access to the unique_ptr; it may call the encoder, steal the encoder and |
| 86 | // replace it with another encoder or with nullptr, etc. |
| 87 | virtual void ModifyEncoder( |
kwiberg | 24c7c12 | 2016-09-28 11:57:10 -0700 | [diff] [blame] | 88 | rtc::FunctionView<void(std::unique_ptr<AudioEncoder>*)> modifier) = 0; |
kwiberg | 4cdbd57 | 2016-03-30 03:10:05 -0700 | [diff] [blame] | 89 | |
| 90 | // Utility method for simply replacing the existing encoder with a new one. |
| 91 | void SetEncoder(std::unique_ptr<AudioEncoder> new_encoder) { |
| 92 | ModifyEncoder([&](std::unique_ptr<AudioEncoder>* encoder) { |
| 93 | *encoder = std::move(new_encoder); |
| 94 | }); |
| 95 | } |
| 96 | |
kjellander | 3e6db23 | 2015-11-26 04:44:54 -0800 | [diff] [blame] | 97 | // int32_t RegisterTransportCallback() |
| 98 | // Register a transport callback which will be called to deliver |
| 99 | // the encoded buffers whenever Process() is called and a |
| 100 | // bit-stream is ready. |
| 101 | // |
| 102 | // Input: |
| 103 | // -transport : pointer to the callback class |
| 104 | // transport->SendData() is called whenever |
| 105 | // Process() is called and bit-stream is ready |
| 106 | // to deliver. |
| 107 | // |
| 108 | // Return value: |
| 109 | // -1 if the transport callback could not be registered |
| 110 | // 0 if registration is successful. |
| 111 | // |
| 112 | virtual int32_t RegisterTransportCallback( |
| 113 | AudioPacketizationCallback* transport) = 0; |
| 114 | |
| 115 | /////////////////////////////////////////////////////////////////////////// |
| 116 | // int32_t Add10MsData() |
| 117 | // Add 10MS of raw (PCM) audio data and encode it. If the sampling |
| 118 | // frequency of the audio does not match the sampling frequency of the |
| 119 | // current encoder ACM will resample the audio. If an encoded packet was |
| 120 | // produced, it will be delivered via the callback object registered using |
| 121 | // RegisterTransportCallback, and the return value from this function will |
| 122 | // be the number of bytes encoded. |
| 123 | // |
| 124 | // Input: |
| 125 | // -audio_frame : the input audio frame, containing raw audio |
Fredrik Solenberg | bbf21a3 | 2018-04-12 22:44:09 +0200 | [diff] [blame] | 126 | // sampling frequency etc. |
kjellander | 3e6db23 | 2015-11-26 04:44:54 -0800 | [diff] [blame] | 127 | // |
| 128 | // Return value: |
| 129 | // >= 0 number of bytes encoded. |
| 130 | // -1 some error occurred. |
| 131 | // |
| 132 | virtual int32_t Add10MsData(const AudioFrame& audio_frame) = 0; |
| 133 | |
| 134 | /////////////////////////////////////////////////////////////////////////// |
kjellander | 3e6db23 | 2015-11-26 04:44:54 -0800 | [diff] [blame] | 135 | // int SetPacketLossRate() |
| 136 | // Sets expected packet loss rate for encoding. Some encoders provide packet |
| 137 | // loss gnostic encoding to make stream less sensitive to packet losses, |
| 138 | // through e.g., FEC. No effects on codecs that do not provide such encoding. |
| 139 | // |
| 140 | // Input: |
| 141 | // -packet_loss_rate : expected packet loss rate (0 -- 100 inclusive). |
| 142 | // |
| 143 | // Return value |
| 144 | // -1 if failed to set packet loss rate, |
| 145 | // 0 if succeeded. |
| 146 | // |
minyue | 7e30432 | 2016-10-12 05:00:55 -0700 | [diff] [blame] | 147 | // This is only used in test code that rely on old ACM APIs. |
| 148 | // TODO(minyue): Remove it when possible. |
kjellander | 3e6db23 | 2015-11-26 04:44:54 -0800 | [diff] [blame] | 149 | virtual int SetPacketLossRate(int packet_loss_rate) = 0; |
| 150 | |
| 151 | /////////////////////////////////////////////////////////////////////////// |
| 152 | // (VAD) Voice Activity Detection |
| 153 | // |
| 154 | |
| 155 | /////////////////////////////////////////////////////////////////////////// |
kjellander | 3e6db23 | 2015-11-26 04:44:54 -0800 | [diff] [blame] | 156 | // int32_t RegisterVADCallback() |
| 157 | // Call this method to register a callback function which is called |
| 158 | // any time that ACM encounters an empty frame. That is a frame which is |
| 159 | // recognized inactive. Depending on the codec WebRtc VAD or internal codec |
| 160 | // VAD is employed to identify a frame as active/inactive. |
| 161 | // |
| 162 | // Input: |
| 163 | // -vad_callback : pointer to a callback function. |
| 164 | // |
| 165 | // Return value: |
| 166 | // -1 if failed to register the callback function. |
| 167 | // 0 if the callback function is registered successfully. |
| 168 | // |
| 169 | virtual int32_t RegisterVADCallback(ACMVADCallback* vad_callback) = 0; |
| 170 | |
| 171 | /////////////////////////////////////////////////////////////////////////// |
| 172 | // Receiver |
| 173 | // |
| 174 | |
| 175 | /////////////////////////////////////////////////////////////////////////// |
| 176 | // int32_t InitializeReceiver() |
| 177 | // Any decoder-related state of ACM will be initialized to the |
| 178 | // same state when ACM is created. This will not interrupt or |
| 179 | // effect encoding functionality of ACM. ACM would lose all the |
| 180 | // decoding-related settings by calling this function. |
| 181 | // For instance, all registered codecs are deleted and have to be |
| 182 | // registered again. |
| 183 | // |
| 184 | // Return value: |
| 185 | // -1 if failed to initialize, |
| 186 | // 0 if succeeded. |
| 187 | // |
| 188 | virtual int32_t InitializeReceiver() = 0; |
| 189 | |
kwiberg | 1c07c70 | 2017-03-27 07:15:49 -0700 | [diff] [blame] | 190 | // Replace any existing decoders with the given payload type -> decoder map. |
| 191 | virtual void SetReceiveCodecs( |
| 192 | const std::map<int, SdpAudioFormat>& codecs) = 0; |
| 193 | |
kjellander | 3e6db23 | 2015-11-26 04:44:54 -0800 | [diff] [blame] | 194 | /////////////////////////////////////////////////////////////////////////// |
kjellander | 3e6db23 | 2015-11-26 04:44:54 -0800 | [diff] [blame] | 195 | // int32_t IncomingPacket() |
| 196 | // Call this function to insert a parsed RTP packet into ACM. |
| 197 | // |
| 198 | // Inputs: |
| 199 | // -incoming_payload : received payload. |
| 200 | // -payload_len_bytes : the length of payload in bytes. |
| 201 | // -rtp_info : the relevant information retrieved from RTP |
| 202 | // header. |
| 203 | // |
| 204 | // Return value: |
| 205 | // -1 if failed to push in the payload |
| 206 | // 0 if payload is successfully pushed in. |
| 207 | // |
| 208 | virtual int32_t IncomingPacket(const uint8_t* incoming_payload, |
| 209 | const size_t payload_len_bytes, |
Niels Möller | afb5dbb | 2019-02-15 15:21:47 +0100 | [diff] [blame] | 210 | const RTPHeader& rtp_header) = 0; |
kjellander | 3e6db23 | 2015-11-26 04:44:54 -0800 | [diff] [blame] | 211 | |
| 212 | /////////////////////////////////////////////////////////////////////////// |
kjellander | 3e6db23 | 2015-11-26 04:44:54 -0800 | [diff] [blame] | 213 | // int32_t PlayoutData10Ms( |
| 214 | // Get 10 milliseconds of raw audio data for playout, at the given sampling |
| 215 | // frequency. ACM will perform a resampling if required. |
| 216 | // |
| 217 | // Input: |
| 218 | // -desired_freq_hz : the desired sampling frequency, in Hertz, of the |
| 219 | // output audio. If set to -1, the function returns |
| 220 | // the audio at the current sampling frequency. |
| 221 | // |
| 222 | // Output: |
| 223 | // -audio_frame : output audio frame which contains raw audio data |
Fredrik Solenberg | bbf21a3 | 2018-04-12 22:44:09 +0200 | [diff] [blame] | 224 | // and other relevant parameters. |
henrik.lundin | 834a6ea | 2016-05-13 03:45:24 -0700 | [diff] [blame] | 225 | // -muted : if true, the sample data in audio_frame is not |
| 226 | // populated, and must be interpreted as all zero. |
kjellander | 3e6db23 | 2015-11-26 04:44:54 -0800 | [diff] [blame] | 227 | // |
| 228 | // Return value: |
| 229 | // -1 if the function fails, |
| 230 | // 0 if the function succeeds. |
| 231 | // |
| 232 | virtual int32_t PlayoutData10Ms(int32_t desired_freq_hz, |
henrik.lundin | 834a6ea | 2016-05-13 03:45:24 -0700 | [diff] [blame] | 233 | AudioFrame* audio_frame, |
| 234 | bool* muted) = 0; |
| 235 | |
kjellander | 3e6db23 | 2015-11-26 04:44:54 -0800 | [diff] [blame] | 236 | /////////////////////////////////////////////////////////////////////////// |
kjellander | 3e6db23 | 2015-11-26 04:44:54 -0800 | [diff] [blame] | 237 | // statistics |
| 238 | // |
| 239 | |
| 240 | /////////////////////////////////////////////////////////////////////////// |
| 241 | // int32_t GetNetworkStatistics() |
| 242 | // Get network statistics. Note that the internal statistics of NetEq are |
| 243 | // reset by this call. |
| 244 | // |
| 245 | // Input: |
| 246 | // -network_statistics : a structure that contains network statistics. |
| 247 | // |
| 248 | // Return value: |
| 249 | // -1 if failed to set the network statistics, |
| 250 | // 0 if statistics are set successfully. |
| 251 | // |
| 252 | virtual int32_t GetNetworkStatistics( |
| 253 | NetworkStatistics* network_statistics) = 0; |
| 254 | |
ivoc | e1198e0 | 2017-09-08 08:13:19 -0700 | [diff] [blame] | 255 | virtual ANAStats GetANAStats() const = 0; |
kjellander | 3e6db23 | 2015-11-26 04:44:54 -0800 | [diff] [blame] | 256 | }; |
| 257 | |
| 258 | } // namespace webrtc |
| 259 | |
Mirko Bonadei | 92ea95e | 2017-09-15 06:47:31 +0200 | [diff] [blame] | 260 | #endif // MODULES_AUDIO_CODING_INCLUDE_AUDIO_CODING_MODULE_H_ |