blob: ed466910238acf1687251b7a6ddc72fb8ec172db [file] [log] [blame]
ilnikd60d06a2017-04-05 03:02:20 -07001/*
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020011#ifndef API_VIDEO_CODECS_VIDEO_ENCODER_H_
12#define API_VIDEO_CODECS_VIDEO_ENCODER_H_
ilnikd60d06a2017-04-05 03:02:20 -070013
Erik Språngdbdd8392019-01-17 15:27:50 +010014#include <limits>
ilnikd60d06a2017-04-05 03:02:20 -070015#include <memory>
16#include <string>
17#include <vector>
18
Erik Språngdbdd8392019-01-17 15:27:50 +010019#include "absl/container/inlined_vector.h"
Danil Chapovalov0bc58cf2018-06-21 13:32:56 +020020#include "absl/types/optional.h"
Elad Alon8f01c4e2019-06-28 15:19:43 +020021#include "api/fec_controller_override.h"
Erik Språng4d9df382019-03-27 15:00:43 +010022#include "api/units/data_rate.h"
Niels Möller4dc66c52018-10-05 14:17:58 +020023#include "api/video/encoded_image.h"
Erik Språngec475652018-05-15 15:12:55 +020024#include "api/video/video_bitrate_allocation.h"
Erik Språngf93eda12019-01-16 17:10:57 +010025#include "api/video/video_codec_constants.h"
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020026#include "api/video/video_frame.h"
Niels Möller802506c2018-05-31 10:44:51 +020027#include "api/video_codecs/video_codec.h"
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020028#include "rtc_base/checks.h"
Mirko Bonadei276827c2018-10-16 14:13:50 +020029#include "rtc_base/system/rtc_export.h"
ilnikd60d06a2017-04-05 03:02:20 -070030
31namespace webrtc {
32
ilnikd60d06a2017-04-05 03:02:20 -070033// TODO(pbos): Expose these through a public (root) header or change these APIs.
34struct CodecSpecificInfo;
ilnikd60d06a2017-04-05 03:02:20 -070035
Henrik Boström4bab2fc2020-01-21 11:18:06 +010036constexpr int kDefaultMinPixelsPerFrame = 320 * 180;
37
Danil Chapovalov0490c372020-08-04 13:05:43 +020038class RTC_EXPORT EncodedImageCallback {
ilnikd60d06a2017-04-05 03:02:20 -070039 public:
40 virtual ~EncodedImageCallback() {}
41
42 struct Result {
43 enum Error {
44 OK,
45
46 // Failed to send the packet.
47 ERROR_SEND_FAILED,
48 };
49
mflodman351424e2017-08-10 02:43:14 -070050 explicit Result(Error error) : error(error) {}
ilnikd60d06a2017-04-05 03:02:20 -070051 Result(Error error, uint32_t frame_id) : error(error), frame_id(frame_id) {}
52
53 Error error;
54
55 // Frame ID assigned to the frame. The frame ID should be the same as the ID
56 // seen by the receiver for this frame. RTP timestamp of the frame is used
57 // as frame ID when RTP is used to send video. Must be used only when
58 // error=OK.
59 uint32_t frame_id = 0;
60
61 // Tells the encoder that the next frame is should be dropped.
62 bool drop_next_frame = false;
63 };
64
Ilya Nikolaevskiyd79314f2017-10-23 10:45:37 +020065 // Used to signal the encoder about reason a frame is dropped.
66 // kDroppedByMediaOptimizations - dropped by MediaOptimizations (for rate
67 // limiting purposes).
68 // kDroppedByEncoder - dropped by encoder's internal rate limiter.
69 enum class DropReason : uint8_t {
70 kDroppedByMediaOptimizations,
71 kDroppedByEncoder
72 };
73
ilnikd60d06a2017-04-05 03:02:20 -070074 // Callback function which is called when an image has been encoded.
Danil Chapovalov70b2cf8b2020-08-24 11:18:00 +020075 virtual Result OnEncodedImage(
76 const EncodedImage& encoded_image,
77 const CodecSpecificInfo* codec_specific_info) = 0;
ilnikd60d06a2017-04-05 03:02:20 -070078
Ilya Nikolaevskiyd79314f2017-10-23 10:45:37 +020079 virtual void OnDroppedFrame(DropReason reason) {}
ilnikd60d06a2017-04-05 03:02:20 -070080};
81
Mirko Bonadei276827c2018-10-16 14:13:50 +020082class RTC_EXPORT VideoEncoder {
ilnikd60d06a2017-04-05 03:02:20 -070083 public:
ilnikd60d06a2017-04-05 03:02:20 -070084 struct QpThresholds {
85 QpThresholds(int l, int h) : low(l), high(h) {}
86 QpThresholds() : low(-1), high(-1) {}
87 int low;
88 int high;
89 };
Elad Alon370f93a2019-06-11 14:57:57 +020090
Niels Möller225c7872018-02-22 15:03:53 +010091 // Quality scaling is enabled if thresholds are provided.
Hirokazu Honda11549152019-12-11 18:25:45 +090092 struct RTC_EXPORT ScalingSettings {
Niels Möller225c7872018-02-22 15:03:53 +010093 private:
94 // Private magic type for kOff, implicitly convertible to
95 // ScalingSettings.
96 struct KOff {};
97
98 public:
99 // TODO(nisse): Would be nicer if kOff were a constant ScalingSettings
Danil Chapovalov0bc58cf2018-06-21 13:32:56 +0200100 // rather than a magic value. However, absl::optional is not trivially copy
Niels Möller225c7872018-02-22 15:03:53 +0100101 // constructible, and hence a constant ScalingSettings needs a static
102 // initializer, which is strongly discouraged in Chrome. We can hopefully
103 // fix this when we switch to absl::optional or std::optional.
104 static constexpr KOff kOff = {};
105
106 ScalingSettings(int low, int high);
107 ScalingSettings(int low, int high, int min_pixels);
mflodman351424e2017-08-10 02:43:14 -0700108 ScalingSettings(const ScalingSettings&);
Niels Möller225c7872018-02-22 15:03:53 +0100109 ScalingSettings(KOff); // NOLINT(runtime/explicit)
mflodman351424e2017-08-10 02:43:14 -0700110 ~ScalingSettings();
111
Erik Språnge2fd86a2018-10-24 11:32:39 +0200112 absl::optional<QpThresholds> thresholds;
asapersson142fcc92017-08-17 08:58:54 -0700113
114 // We will never ask for a resolution lower than this.
115 // TODO(kthelgason): Lower this limit when better testing
116 // on MediaCodec and fallback implementations are in place.
117 // See https://bugs.chromium.org/p/webrtc/issues/detail?id=7206
Henrik Boström4bab2fc2020-01-21 11:18:06 +0100118 int min_pixels_per_frame = kDefaultMinPixelsPerFrame;
Niels Möller225c7872018-02-22 15:03:53 +0100119
120 private:
121 // Private constructor; to get an object without thresholds, use
122 // the magic constant ScalingSettings::kOff.
123 ScalingSettings();
ilnikd60d06a2017-04-05 03:02:20 -0700124 };
ilnikd60d06a2017-04-05 03:02:20 -0700125
Sergey Silkin3d642f82019-07-03 15:09:33 +0200126 // Bitrate limits for resolution.
127 struct ResolutionBitrateLimits {
128 ResolutionBitrateLimits(int frame_size_pixels,
129 int min_start_bitrate_bps,
130 int min_bitrate_bps,
131 int max_bitrate_bps)
Sergey Silkinbe0adee2019-06-26 14:11:03 +0200132 : frame_size_pixels(frame_size_pixels),
133 min_start_bitrate_bps(min_start_bitrate_bps),
134 min_bitrate_bps(min_bitrate_bps),
135 max_bitrate_bps(max_bitrate_bps) {}
136 // Size of video frame, in pixels, the bitrate thresholds are intended for.
137 int frame_size_pixels = 0;
138 // Recommended minimum bitrate to start encoding.
139 int min_start_bitrate_bps = 0;
140 // Recommended minimum bitrate.
141 int min_bitrate_bps = 0;
142 // Recommended maximum bitrate.
143 int max_bitrate_bps = 0;
Erik Språng79685302019-11-27 17:26:58 +0100144
145 bool operator==(const ResolutionBitrateLimits& rhs) const;
146 bool operator!=(const ResolutionBitrateLimits& rhs) const {
147 return !(*this == rhs);
148 }
Sergey Silkinbe0adee2019-06-26 14:11:03 +0200149 };
150
Erik Språnge2fd86a2018-10-24 11:32:39 +0200151 // Struct containing metadata about the encoder implementing this interface.
Mirko Bonadei54875d02019-11-06 20:16:12 +0100152 struct RTC_EXPORT EncoderInfo {
Erik Språngdbdd8392019-01-17 15:27:50 +0100153 static constexpr uint8_t kMaxFramerateFraction =
154 std::numeric_limits<uint8_t>::max();
155
Erik Språnge2fd86a2018-10-24 11:32:39 +0200156 EncoderInfo();
Mirta Dvornicic897a9912018-11-30 13:12:21 +0100157 EncoderInfo(const EncoderInfo&);
158
Erik Språnge2fd86a2018-10-24 11:32:39 +0200159 ~EncoderInfo();
160
Erik Språng79685302019-11-27 17:26:58 +0100161 std::string ToString() const;
162 bool operator==(const EncoderInfo& rhs) const;
163 bool operator!=(const EncoderInfo& rhs) const { return !(*this == rhs); }
164
Erik Språnge2fd86a2018-10-24 11:32:39 +0200165 // Any encoder implementation wishing to use the WebRTC provided
166 // quality scaler must populate this field.
167 ScalingSettings scaling_settings;
168
Rasmus Brandt5cad55b2019-12-19 09:47:11 +0100169 // The width and height of the incoming video frames should be divisible
170 // by |requested_resolution_alignment|. If they are not, the encoder may
171 // drop the incoming frame.
172 // For example: With I420, this value would be a multiple of 2.
173 // Note that this field is unrelated to any horizontal or vertical stride
174 // requirements the encoder has on the incoming video frame buffers.
175 int requested_resolution_alignment;
176
Åsa Perssonc5a74ff2020-09-20 17:50:00 +0200177 // Same as above but if true, each simulcast layer should also be divisible
178 // by |requested_resolution_alignment|.
179 // Note that scale factors |scale_resolution_down_by| may be adjusted so a
180 // common multiple is not too large to avoid largely cropped frames and
181 // possibly with an aspect ratio far from the original.
182 // Warning: large values of scale_resolution_down_by could be changed
183 // considerably, especially if |requested_resolution_alignment| is large.
184 bool apply_alignment_to_all_simulcast_layers;
185
Erik Språnge2fd86a2018-10-24 11:32:39 +0200186 // If true, encoder supports working with a native handle (e.g. texture
187 // handle for hw codecs) rather than requiring a raw I420 buffer.
188 bool supports_native_handle;
189
190 // The name of this particular encoder implementation, e.g. "libvpx".
191 std::string implementation_name;
Erik Språngd3438aa2018-11-08 16:56:43 +0100192
193 // If this field is true, the encoder rate controller must perform
194 // well even in difficult situations, and produce close to the specified
195 // target bitrate seen over a reasonable time window, drop frames if
196 // necessary in order to keep the rate correct, and react quickly to
197 // changing bitrate targets. If this method returns true, we disable the
198 // frame dropper in the media optimization module and rely entirely on the
199 // encoder to produce media at a bitrate that closely matches the target.
200 // Any overshooting may result in delay buildup. If this method returns
201 // false (default behavior), the media opt frame dropper will drop input
202 // frames if it suspect encoder misbehavior. Misbehavior is common,
203 // especially in hardware codecs. Disable media opt at your own risk.
204 bool has_trusted_rate_controller;
Mirta Dvornicic897a9912018-11-30 13:12:21 +0100205
206 // If this field is true, the encoder uses hardware support and different
207 // thresholds will be used in CPU adaptation.
208 bool is_hardware_accelerated;
209
210 // If this field is true, the encoder uses internal camera sources, meaning
211 // that it does not require/expect frames to be delivered via
212 // webrtc::VideoEncoder::Encode.
213 // Internal source encoders are deprecated and support for them will be
214 // phased out.
215 bool has_internal_source;
Erik Språngdbdd8392019-01-17 15:27:50 +0100216
217 // For each spatial layer (simulcast stream or SVC layer), represented as an
218 // element in |fps_allocation| a vector indicates how many temporal layers
219 // the encoder is using for that spatial layer.
220 // For each spatial/temporal layer pair, the frame rate fraction is given as
221 // an 8bit unsigned integer where 0 = 0% and 255 = 100%.
222 //
223 // If the vector is empty for a given spatial layer, it indicates that frame
224 // rates are not defined and we can't count on any specific frame rate to be
225 // generated. Likely this indicates Vp8TemporalLayersType::kBitrateDynamic.
226 //
227 // The encoder may update this on a per-frame basis in response to both
228 // internal and external signals.
229 //
230 // Spatial layers are treated independently, but temporal layers are
231 // cumulative. For instance, if:
232 // fps_allocation[0][0] = kFullFramerate / 2;
233 // fps_allocation[0][1] = kFullFramerate;
234 // Then half of the frames are in the base layer and half is in TL1, but
235 // since TL1 is assumed to depend on the base layer, the frame rate is
236 // indicated as the full 100% for the top layer.
237 //
238 // Defaults to a single spatial layer containing a single temporal layer
239 // with a 100% frame rate fraction.
240 absl::InlinedVector<uint8_t, kMaxTemporalStreams>
241 fps_allocation[kMaxSpatialLayers];
Sergey Silkinbe0adee2019-06-26 14:11:03 +0200242
Sergey Silkin3d642f82019-07-03 15:09:33 +0200243 // Recommended bitrate limits for different resolutions.
244 std::vector<ResolutionBitrateLimits> resolution_bitrate_limits;
Erik Språngf4e0c292019-10-01 18:50:03 +0200245
Henrik Boströmb0f2e0c2020-03-06 13:32:03 +0100246 // Obtains the limits from |resolution_bitrate_limits| that best matches the
247 // |frame_size_pixels|.
248 absl::optional<ResolutionBitrateLimits>
249 GetEncoderBitrateLimitsForResolution(int frame_size_pixels) const;
250
Erik Språngf4e0c292019-10-01 18:50:03 +0200251 // If true, this encoder has internal support for generating simulcast
252 // streams. Otherwise, an adapter class will be needed.
253 // Even if true, the config provided to InitEncode() might not be supported,
254 // in such case the encoder should return
255 // WEBRTC_VIDEO_CODEC_ERR_SIMULCAST_PARAMETERS_NOT_SUPPORTED.
256 bool supports_simulcast;
Erik Språnge2fd86a2018-10-24 11:32:39 +0200257 };
258
Mirko Bonadei8fa616f2019-11-12 10:05:05 +0100259 struct RTC_EXPORT RateControlParameters {
Erik Språng4c6ca302019-04-08 15:14:01 +0200260 RateControlParameters();
261 RateControlParameters(const VideoBitrateAllocation& bitrate,
Erik Språng16cb8f52019-04-12 13:59:09 +0200262 double framerate_fps);
263 RateControlParameters(const VideoBitrateAllocation& bitrate,
Erik Språng4c6ca302019-04-08 15:14:01 +0200264 double framerate_fps,
265 DataRate bandwidth_allocation);
266 virtual ~RateControlParameters();
267
Erik Språng4d9df382019-03-27 15:00:43 +0100268 // Target bitrate, per spatial/temporal layer.
269 // A target bitrate of 0bps indicates a layer should not be encoded at all.
Per Kjellanderd0a8f512020-10-07 11:28:41 +0200270 VideoBitrateAllocation target_bitrate;
271 // Adjusted target bitrate, per spatial/temporal layer. May be lower or
272 // higher than the target depending on encoder behaviour.
Erik Språng4d9df382019-03-27 15:00:43 +0100273 VideoBitrateAllocation bitrate;
274 // Target framerate, in fps. A value <= 0.0 is invalid and should be
275 // interpreted as framerate target not available. In this case the encoder
276 // should fall back to the max framerate specified in |codec_settings| of
277 // the last InitEncode() call.
278 double framerate_fps;
279 // The network bandwidth available for video. This is at least
280 // |bitrate.get_sum_bps()|, but may be higher if the application is not
281 // network constrained.
282 DataRate bandwidth_allocation;
Evan Shrubsole7c079f62019-09-26 09:55:03 +0200283
284 bool operator==(const RateControlParameters& rhs) const;
285 bool operator!=(const RateControlParameters& rhs) const;
Erik Språng4d9df382019-03-27 15:00:43 +0100286 };
287
Elad Alon6c371ca2019-04-04 12:28:51 +0200288 struct LossNotification {
289 // The timestamp of the last decodable frame *prior* to the last received.
290 // (The last received - described below - might itself be decodable or not.)
291 uint32_t timestamp_of_last_decodable;
292 // The timestamp of the last received frame.
293 uint32_t timestamp_of_last_received;
294 // Describes whether the dependencies of the last received frame were
295 // all decodable.
296 // |false| if some dependencies were undecodable, |true| if all dependencies
297 // were decodable, and |nullopt| if the dependencies are unknown.
Elad Alon20789e42019-04-09 11:56:14 +0200298 absl::optional<bool> dependencies_of_last_received_decodable;
Elad Alon6c371ca2019-04-04 12:28:51 +0200299 // Describes whether the received frame was decodable.
300 // |false| if some dependency was undecodable or if some packet belonging
301 // to the last received frame was missed.
302 // |true| if all dependencies were decodable and all packets belonging
303 // to the last received frame were received.
304 // |nullopt| if no packet belonging to the last frame was missed, but the
305 // last packet in the frame was not yet received.
Elad Alon20789e42019-04-09 11:56:14 +0200306 absl::optional<bool> last_received_decodable;
Elad Alon6c371ca2019-04-04 12:28:51 +0200307 };
308
Elad Alon370f93a2019-06-11 14:57:57 +0200309 // Negotiated capabilities which the VideoEncoder may expect the other
310 // side to use.
311 struct Capabilities {
312 explicit Capabilities(bool loss_notification)
313 : loss_notification(loss_notification) {}
314 bool loss_notification;
315 };
316
317 struct Settings {
318 Settings(const Capabilities& capabilities,
319 int number_of_cores,
320 size_t max_payload_size)
321 : capabilities(capabilities),
322 number_of_cores(number_of_cores),
323 max_payload_size(max_payload_size) {}
324
325 Capabilities capabilities;
326 int number_of_cores;
327 size_t max_payload_size;
328 };
329
ilnikd60d06a2017-04-05 03:02:20 -0700330 static VideoCodecVP8 GetDefaultVp8Settings();
331 static VideoCodecVP9 GetDefaultVp9Settings();
332 static VideoCodecH264 GetDefaultH264Settings();
333
334 virtual ~VideoEncoder() {}
335
Elad Alon8f01c4e2019-06-28 15:19:43 +0200336 // Set a FecControllerOverride, through which the encoder may override
337 // decisions made by FecController.
338 // TODO(bugs.webrtc.org/10769): Update downstream, then make pure-virtual.
339 virtual void SetFecControllerOverride(
340 FecControllerOverride* fec_controller_override);
341
ilnikd60d06a2017-04-05 03:02:20 -0700342 // Initialize the encoder with the information from the codecSettings
343 //
344 // Input:
345 // - codec_settings : Codec settings
Elad Alon370f93a2019-06-11 14:57:57 +0200346 // - settings : Settings affecting the encoding itself.
347 // Input for deprecated version:
ilnikd60d06a2017-04-05 03:02:20 -0700348 // - number_of_cores : Number of cores available for the encoder
349 // - max_payload_size : The maximum size each payload is allowed
350 // to have. Usually MTU - overhead.
351 //
352 // Return value : Set bit rate if OK
353 // <0 - Errors:
354 // WEBRTC_VIDEO_CODEC_ERR_PARAMETER
355 // WEBRTC_VIDEO_CODEC_ERR_SIZE
ilnikd60d06a2017-04-05 03:02:20 -0700356 // WEBRTC_VIDEO_CODEC_MEMORY
357 // WEBRTC_VIDEO_CODEC_ERROR
Elad Alon370f93a2019-06-11 14:57:57 +0200358 // TODO(bugs.webrtc.org/10720): After updating downstream projects and posting
359 // an announcement to discuss-webrtc, remove the three-parameters variant
360 // and make the two-parameters variant pure-virtual.
361 /* RTC_DEPRECATED */ virtual int32_t InitEncode(
362 const VideoCodec* codec_settings,
363 int32_t number_of_cores,
364 size_t max_payload_size);
365 virtual int InitEncode(const VideoCodec* codec_settings,
366 const VideoEncoder::Settings& settings);
ilnikd60d06a2017-04-05 03:02:20 -0700367
368 // Register an encode complete callback object.
369 //
370 // Input:
371 // - callback : Callback object which handles encoded images.
372 //
373 // Return value : WEBRTC_VIDEO_CODEC_OK if OK, < 0 otherwise.
374 virtual int32_t RegisterEncodeCompleteCallback(
375 EncodedImageCallback* callback) = 0;
376
377 // Free encoder memory.
378 // Return value : WEBRTC_VIDEO_CODEC_OK if OK, < 0 otherwise.
379 virtual int32_t Release() = 0;
380
Evan Shrubsole895556e2020-10-05 09:15:13 +0200381 // Encode an image (as a part of a video stream). The encoded image
ilnikd60d06a2017-04-05 03:02:20 -0700382 // will be returned to the user through the encode complete callback.
383 //
384 // Input:
385 // - frame : Image to be encoded
386 // - frame_types : Frame type to be generated by the encoder.
387 //
388 // Return value : WEBRTC_VIDEO_CODEC_OK if OK
389 // <0 - Errors:
390 // WEBRTC_VIDEO_CODEC_ERR_PARAMETER
391 // WEBRTC_VIDEO_CODEC_MEMORY
392 // WEBRTC_VIDEO_CODEC_ERROR
ilnikd60d06a2017-04-05 03:02:20 -0700393 virtual int32_t Encode(const VideoFrame& frame,
Niels Möller9d766b92019-03-28 09:19:35 +0100394 const std::vector<VideoFrameType>* frame_types) = 0;
ilnikd60d06a2017-04-05 03:02:20 -0700395
Erik Språng4d9df382019-03-27 15:00:43 +0100396 // Sets rate control parameters: bitrate, framerate, etc. These settings are
397 // instantaneous (i.e. not moving averages) and should apply from now until
398 // the next call to SetRates().
Erik Språng157b7812019-05-13 11:37:12 +0200399 virtual void SetRates(const RateControlParameters& parameters) = 0;
Erik Språng4d9df382019-03-27 15:00:43 +0100400
Elad Aloncde8ab22019-03-20 11:56:20 +0100401 // Inform the encoder when the packet loss rate changes.
402 //
403 // Input: - packet_loss_rate : The packet loss rate (0.0 to 1.0).
404 virtual void OnPacketLossRateUpdate(float packet_loss_rate);
405
406 // Inform the encoder when the round trip time changes.
407 //
408 // Input: - rtt_ms : The new RTT, in milliseconds.
409 virtual void OnRttUpdate(int64_t rtt_ms);
410
Elad Alon6c371ca2019-04-04 12:28:51 +0200411 // Called when a loss notification is received.
412 virtual void OnLossNotification(const LossNotification& loss_notification);
413
Erik Språngd3438aa2018-11-08 16:56:43 +0100414 // Returns meta-data about the encoder, such as implementation name.
415 // The output of this method may change during runtime. For instance if a
416 // hardware encoder fails, it may fall back to doing software encoding using
417 // an implementation with different characteristics.
Erik Språnge2fd86a2018-10-24 11:32:39 +0200418 virtual EncoderInfo GetEncoderInfo() const;
ilnikd60d06a2017-04-05 03:02:20 -0700419};
ilnikd60d06a2017-04-05 03:02:20 -0700420} // namespace webrtc
Mirko Bonadei92ea95e2017-09-15 06:47:31 +0200421#endif // API_VIDEO_CODECS_VIDEO_ENCODER_H_