blob: a80d139c518f3d00a9453b8d889c1886eaa74af5 [file] [log] [blame]
niklase@google.com470e71d2011-07-07 08:21:25 +00001/*
bjornv@webrtc.org0c6f9312012-01-30 09:39:08 +00002 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
niklase@google.com470e71d2011-07-07 08:21:25 +00003 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020011#ifndef MODULES_AUDIO_PROCESSING_ECHO_CANCELLATION_IMPL_H_
12#define MODULES_AUDIO_PROCESSING_ECHO_CANCELLATION_IMPL_H_
niklase@google.com470e71d2011-07-07 08:21:25 +000013
Yves Gerey988cc082018-10-23 12:03:01 +020014#include <stddef.h>
kwiberg88788ad2016-02-19 07:04:49 -080015#include <memory>
Yves Gerey988cc082018-10-23 12:03:01 +020016#include <string>
terelius85fa7d52016-03-24 01:51:52 -070017#include <vector>
kwiberg88788ad2016-02-19 07:04:49 -080018
Yves Gerey988cc082018-10-23 12:03:01 +020019#include "api/array_view.h"
Steve Anton10542f22019-01-11 09:11:00 -080020#include "rtc_base/constructor_magic.h"
niklase@google.com470e71d2011-07-07 08:21:25 +000021
22namespace webrtc {
andrew@webrtc.org61e596f2013-07-25 18:28:29 +000023
niklase@google.com470e71d2011-07-07 08:21:25 +000024class AudioBuffer;
25
sazabe490b22018-10-03 17:03:13 +020026// The acoustic echo cancellation (AEC) component provides better performance
27// than AECM but also requires more processing power and is dependent on delay
28// stability and reporting accuracy. As such it is well-suited and recommended
29// for PC and IP phone applications.
30class EchoCancellationImpl {
niklase@google.com470e71d2011-07-07 08:21:25 +000031 public:
Sam Zackrisson7f4dfa42018-11-01 08:59:29 +010032 explicit EchoCancellationImpl();
sazabe490b22018-10-03 17:03:13 +020033 ~EchoCancellationImpl();
niklase@google.com470e71d2011-07-07 08:21:25 +000034
peah764e3642016-10-22 05:04:30 -070035 void ProcessRenderAudio(rtc::ArrayView<const float> packed_render_audio);
peahb58a1582016-03-15 09:34:24 -070036 int ProcessCaptureAudio(AudioBuffer* audio, int stream_delay_ms);
niklase@google.com470e71d2011-07-07 08:21:25 +000037
sazabe490b22018-10-03 17:03:13 +020038
39 // Differences in clock speed on the primary and reverse streams can impact
40 // the AEC performance. On the client-side, this could be seen when different
41 // render and capture devices are used, particularly with webcams.
42 //
43 // This enables a compensation mechanism, and requires that
44 // set_stream_drift_samples() be called.
45 int enable_drift_compensation(bool enable);
46 bool is_drift_compensation_enabled() const;
47
48 // Sets the difference between the number of samples rendered and captured by
49 // the audio devices since the last call to |ProcessStream()|. Must be called
50 // if drift compensation is enabled, prior to |ProcessStream()|.
51 void set_stream_drift_samples(int drift);
52 int stream_drift_samples() const;
53
54 enum SuppressionLevel {
55 kLowSuppression,
56 kModerateSuppression,
57 kHighSuppression
58 };
59
60 // Sets the aggressiveness of the suppressor. A higher level trades off
61 // double-talk performance for increased echo suppression.
62 int set_suppression_level(SuppressionLevel level);
63 SuppressionLevel suppression_level() const;
64
65 // Returns false if the current frame almost certainly contains no echo
66 // and true if it _might_ contain echo.
67 bool stream_has_echo() const;
68
69 // Enables the computation of various echo metrics. These are obtained
70 // through |GetMetrics()|.
71 int enable_metrics(bool enable);
72 bool are_metrics_enabled() const;
73
74 // Each statistic is reported in dB.
75 // P_far: Far-end (render) signal power.
76 // P_echo: Near-end (capture) echo signal power.
77 // P_out: Signal power at the output of the AEC.
78 // P_a: Internal signal power at the point before the AEC's non-linear
79 // processor.
80 struct Metrics {
Sam Zackrisson28127632018-11-01 11:37:15 +010081 struct Statistic {
82 int instant = 0; // Instantaneous value.
83 int average = 0; // Long-term average.
84 int maximum = 0; // Long-term maximum.
85 int minimum = 0; // Long-term minimum.
86 };
sazabe490b22018-10-03 17:03:13 +020087 // RERL = ERL + ERLE
Sam Zackrisson28127632018-11-01 11:37:15 +010088 Statistic residual_echo_return_loss;
sazabe490b22018-10-03 17:03:13 +020089
90 // ERL = 10log_10(P_far / P_echo)
Sam Zackrisson28127632018-11-01 11:37:15 +010091 Statistic echo_return_loss;
sazabe490b22018-10-03 17:03:13 +020092
93 // ERLE = 10log_10(P_echo / P_out)
Sam Zackrisson28127632018-11-01 11:37:15 +010094 Statistic echo_return_loss_enhancement;
sazabe490b22018-10-03 17:03:13 +020095
96 // (Pre non-linear processing suppression) A_NLP = 10log_10(P_echo / P_a)
Sam Zackrisson28127632018-11-01 11:37:15 +010097 Statistic a_nlp;
sazabe490b22018-10-03 17:03:13 +020098
99 // Fraction of time that the AEC linear filter is divergent, in a 1-second
100 // non-overlapped aggregation window.
101 float divergent_filter_fraction;
102 };
103
Sam Zackrissoncdf0e6d2018-09-17 11:05:17 +0200104 // Provides various statistics about the AEC.
sazabe490b22018-10-03 17:03:13 +0200105 int GetMetrics(Metrics* metrics);
106
107 // Enables computation and logging of delay values. Statistics are obtained
108 // through |GetDelayMetrics()|.
109 int enable_delay_logging(bool enable);
110 bool is_delay_logging_enabled() const;
111
Sam Zackrissoncdf0e6d2018-09-17 11:05:17 +0200112 // Provides delay metrics.
sazabe490b22018-10-03 17:03:13 +0200113 // The delay metrics consists of the delay |median| and the delay standard
114 // deviation |std|. It also consists of the fraction of delay estimates
115 // |fraction_poor_delays| that can make the echo cancellation perform poorly.
116 // The values are aggregated until the first call to |GetDelayMetrics()| and
117 // afterwards aggregated and updated every second.
118 // Note that if there are several clients pulling metrics from
119 // |GetDelayMetrics()| during a session the first call from any of them will
120 // change to one second aggregation window for all.
121 int GetDelayMetrics(int* median, int* std);
122 int GetDelayMetrics(int* median, int* std, float* fraction_poor_delays);
123
124 // Returns a pointer to the low level AEC component. In case of multiple
125 // channels, the pointer to the first one is returned. A NULL pointer is
126 // returned when the AEC component is disabled or has not been initialized
127 // successfully.
128 struct AecCore* aec_core() const;
niklase@google.com470e71d2011-07-07 08:21:25 +0000129
peahb58a1582016-03-15 09:34:24 -0700130 void Initialize(int sample_rate_hz,
131 size_t num_reverse_channels_,
132 size_t num_output_channels_,
133 size_t num_proc_channels_);
Per Åhgrenf204faf2019-04-25 15:18:06 +0200134 void SetExtraOptions(bool use_extended_filter,
135 bool use_delay_agnostic,
136 bool use_refined_adaptive_filter);
Minyue13b96ba2015-10-03 00:39:14 +0200137 bool is_delay_agnostic_enabled() const;
138 bool is_extended_filter_enabled() const;
peah7789fe72016-04-15 01:19:44 -0700139 std::string GetExperimentsDescription();
peah0332c2d2016-04-15 11:23:33 -0700140 bool is_refined_adaptive_filter_enabled() const;
Minyue13b96ba2015-10-03 00:39:14 +0200141
peah20028c42016-03-04 11:50:54 -0800142 // Returns the system delay of the first AEC component.
143 int GetSystemDelayInSamples() const;
144
peah764e3642016-10-22 05:04:30 -0700145 static void PackRenderAudioBuffer(const AudioBuffer* audio,
146 size_t num_output_channels,
147 size_t num_channels,
148 std::vector<float>* packed_buffer);
149 static size_t NumCancellersRequired(size_t num_output_channels,
150 size_t num_reverse_channels);
151
niklase@google.com470e71d2011-07-07 08:21:25 +0000152 private:
peahb624d8c2016-03-05 03:01:14 -0800153 class Canceller;
peahb58a1582016-03-15 09:34:24 -0700154 struct StreamProperties;
peahb624d8c2016-03-05 03:01:14 -0800155
peahfa6228e2015-11-16 16:27:42 -0800156 void AllocateRenderQueue();
peahb624d8c2016-03-05 03:01:14 -0800157 int Configure();
peahfa6228e2015-11-16 16:27:42 -0800158
Sam Zackrisson7f4dfa42018-11-01 08:59:29 +0100159 bool drift_compensation_enabled_;
160 bool metrics_enabled_;
161 SuppressionLevel suppression_level_;
162 int stream_drift_samples_;
163 bool was_stream_drift_set_;
164 bool stream_has_echo_;
165 bool delay_logging_enabled_;
166 bool extended_filter_enabled_;
167 bool delay_agnostic_enabled_;
168 bool refined_adaptive_filter_enabled_ = false;
peahdf3efa82015-11-28 12:35:15 -0800169
Per Åhgren11556462017-12-22 16:13:57 +0100170 // Only active on Chrome OS devices.
Sam Zackrisson7f4dfa42018-11-01 08:59:29 +0100171 const bool enforce_zero_stream_delay_;
Per Åhgren11556462017-12-22 16:13:57 +0100172
peahb624d8c2016-03-05 03:01:14 -0800173 std::vector<std::unique_ptr<Canceller>> cancellers_;
peahb58a1582016-03-15 09:34:24 -0700174 std::unique_ptr<StreamProperties> stream_properties_;
niklase@google.com470e71d2011-07-07 08:21:25 +0000175};
andrew@webrtc.org61e596f2013-07-25 18:28:29 +0000176
niklase@google.com470e71d2011-07-07 08:21:25 +0000177} // namespace webrtc
178
Mirko Bonadei92ea95e2017-09-15 06:47:31 +0200179#endif // MODULES_AUDIO_PROCESSING_ECHO_CANCELLATION_IMPL_H_