blob: 25412cd8dd808d23459cea1438d3a4ad4038d655 [file] [log] [blame]
niklase@google.com470e71d2011-07-07 08:21:25 +00001/*
bjornv@webrtc.org0c6f9312012-01-30 09:39:08 +00002 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
niklase@google.com470e71d2011-07-07 08:21:25 +00003 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020011#ifndef MODULES_AUDIO_PROCESSING_ECHO_CANCELLATION_IMPL_H_
12#define MODULES_AUDIO_PROCESSING_ECHO_CANCELLATION_IMPL_H_
niklase@google.com470e71d2011-07-07 08:21:25 +000013
Yves Gerey988cc082018-10-23 12:03:01 +020014#include <stddef.h>
kwiberg88788ad2016-02-19 07:04:49 -080015#include <memory>
Yves Gerey988cc082018-10-23 12:03:01 +020016#include <string>
terelius85fa7d52016-03-24 01:51:52 -070017#include <vector>
kwiberg88788ad2016-02-19 07:04:49 -080018
Yves Gerey988cc082018-10-23 12:03:01 +020019#include "api/array_view.h"
Steve Anton10542f22019-01-11 09:11:00 -080020#include "rtc_base/constructor_magic.h"
niklase@google.com470e71d2011-07-07 08:21:25 +000021
22namespace webrtc {
andrew@webrtc.org61e596f2013-07-25 18:28:29 +000023
niklase@google.com470e71d2011-07-07 08:21:25 +000024class AudioBuffer;
25
sazabe490b22018-10-03 17:03:13 +020026// The acoustic echo cancellation (AEC) component provides better performance
27// than AECM but also requires more processing power and is dependent on delay
28// stability and reporting accuracy. As such it is well-suited and recommended
29// for PC and IP phone applications.
30class EchoCancellationImpl {
niklase@google.com470e71d2011-07-07 08:21:25 +000031 public:
Sam Zackrisson7f4dfa42018-11-01 08:59:29 +010032 explicit EchoCancellationImpl();
sazabe490b22018-10-03 17:03:13 +020033 ~EchoCancellationImpl();
niklase@google.com470e71d2011-07-07 08:21:25 +000034
peah764e3642016-10-22 05:04:30 -070035 void ProcessRenderAudio(rtc::ArrayView<const float> packed_render_audio);
peahb58a1582016-03-15 09:34:24 -070036 int ProcessCaptureAudio(AudioBuffer* audio, int stream_delay_ms);
niklase@google.com470e71d2011-07-07 08:21:25 +000037
sazabe490b22018-10-03 17:03:13 +020038 int Enable(bool enable);
39 bool is_enabled() const;
40
41 // Differences in clock speed on the primary and reverse streams can impact
42 // the AEC performance. On the client-side, this could be seen when different
43 // render and capture devices are used, particularly with webcams.
44 //
45 // This enables a compensation mechanism, and requires that
46 // set_stream_drift_samples() be called.
47 int enable_drift_compensation(bool enable);
48 bool is_drift_compensation_enabled() const;
49
50 // Sets the difference between the number of samples rendered and captured by
51 // the audio devices since the last call to |ProcessStream()|. Must be called
52 // if drift compensation is enabled, prior to |ProcessStream()|.
53 void set_stream_drift_samples(int drift);
54 int stream_drift_samples() const;
55
56 enum SuppressionLevel {
57 kLowSuppression,
58 kModerateSuppression,
59 kHighSuppression
60 };
61
62 // Sets the aggressiveness of the suppressor. A higher level trades off
63 // double-talk performance for increased echo suppression.
64 int set_suppression_level(SuppressionLevel level);
65 SuppressionLevel suppression_level() const;
66
67 // Returns false if the current frame almost certainly contains no echo
68 // and true if it _might_ contain echo.
69 bool stream_has_echo() const;
70
71 // Enables the computation of various echo metrics. These are obtained
72 // through |GetMetrics()|.
73 int enable_metrics(bool enable);
74 bool are_metrics_enabled() const;
75
76 // Each statistic is reported in dB.
77 // P_far: Far-end (render) signal power.
78 // P_echo: Near-end (capture) echo signal power.
79 // P_out: Signal power at the output of the AEC.
80 // P_a: Internal signal power at the point before the AEC's non-linear
81 // processor.
82 struct Metrics {
Sam Zackrisson28127632018-11-01 11:37:15 +010083 struct Statistic {
84 int instant = 0; // Instantaneous value.
85 int average = 0; // Long-term average.
86 int maximum = 0; // Long-term maximum.
87 int minimum = 0; // Long-term minimum.
88 };
sazabe490b22018-10-03 17:03:13 +020089 // RERL = ERL + ERLE
Sam Zackrisson28127632018-11-01 11:37:15 +010090 Statistic residual_echo_return_loss;
sazabe490b22018-10-03 17:03:13 +020091
92 // ERL = 10log_10(P_far / P_echo)
Sam Zackrisson28127632018-11-01 11:37:15 +010093 Statistic echo_return_loss;
sazabe490b22018-10-03 17:03:13 +020094
95 // ERLE = 10log_10(P_echo / P_out)
Sam Zackrisson28127632018-11-01 11:37:15 +010096 Statistic echo_return_loss_enhancement;
sazabe490b22018-10-03 17:03:13 +020097
98 // (Pre non-linear processing suppression) A_NLP = 10log_10(P_echo / P_a)
Sam Zackrisson28127632018-11-01 11:37:15 +010099 Statistic a_nlp;
sazabe490b22018-10-03 17:03:13 +0200100
101 // Fraction of time that the AEC linear filter is divergent, in a 1-second
102 // non-overlapped aggregation window.
103 float divergent_filter_fraction;
104 };
105
Sam Zackrissoncdf0e6d2018-09-17 11:05:17 +0200106 // Provides various statistics about the AEC.
sazabe490b22018-10-03 17:03:13 +0200107 int GetMetrics(Metrics* metrics);
108
109 // Enables computation and logging of delay values. Statistics are obtained
110 // through |GetDelayMetrics()|.
111 int enable_delay_logging(bool enable);
112 bool is_delay_logging_enabled() const;
113
Sam Zackrissoncdf0e6d2018-09-17 11:05:17 +0200114 // Provides delay metrics.
sazabe490b22018-10-03 17:03:13 +0200115 // The delay metrics consists of the delay |median| and the delay standard
116 // deviation |std|. It also consists of the fraction of delay estimates
117 // |fraction_poor_delays| that can make the echo cancellation perform poorly.
118 // The values are aggregated until the first call to |GetDelayMetrics()| and
119 // afterwards aggregated and updated every second.
120 // Note that if there are several clients pulling metrics from
121 // |GetDelayMetrics()| during a session the first call from any of them will
122 // change to one second aggregation window for all.
123 int GetDelayMetrics(int* median, int* std);
124 int GetDelayMetrics(int* median, int* std, float* fraction_poor_delays);
125
126 // Returns a pointer to the low level AEC component. In case of multiple
127 // channels, the pointer to the first one is returned. A NULL pointer is
128 // returned when the AEC component is disabled or has not been initialized
129 // successfully.
130 struct AecCore* aec_core() const;
niklase@google.com470e71d2011-07-07 08:21:25 +0000131
peahb58a1582016-03-15 09:34:24 -0700132 void Initialize(int sample_rate_hz,
133 size_t num_reverse_channels_,
134 size_t num_output_channels_,
135 size_t num_proc_channels_);
Per Åhgrenf204faf2019-04-25 15:18:06 +0200136 void SetExtraOptions(bool use_extended_filter,
137 bool use_delay_agnostic,
138 bool use_refined_adaptive_filter);
Minyue13b96ba2015-10-03 00:39:14 +0200139 bool is_delay_agnostic_enabled() const;
140 bool is_extended_filter_enabled() const;
peah7789fe72016-04-15 01:19:44 -0700141 std::string GetExperimentsDescription();
peah0332c2d2016-04-15 11:23:33 -0700142 bool is_refined_adaptive_filter_enabled() const;
Minyue13b96ba2015-10-03 00:39:14 +0200143
peah20028c42016-03-04 11:50:54 -0800144 // Returns the system delay of the first AEC component.
145 int GetSystemDelayInSamples() const;
146
peah764e3642016-10-22 05:04:30 -0700147 static void PackRenderAudioBuffer(const AudioBuffer* audio,
148 size_t num_output_channels,
149 size_t num_channels,
150 std::vector<float>* packed_buffer);
151 static size_t NumCancellersRequired(size_t num_output_channels,
152 size_t num_reverse_channels);
153
niklase@google.com470e71d2011-07-07 08:21:25 +0000154 private:
peahb624d8c2016-03-05 03:01:14 -0800155 class Canceller;
peahb58a1582016-03-15 09:34:24 -0700156 struct StreamProperties;
peahb624d8c2016-03-05 03:01:14 -0800157
peahfa6228e2015-11-16 16:27:42 -0800158 void AllocateRenderQueue();
peahb624d8c2016-03-05 03:01:14 -0800159 int Configure();
peahfa6228e2015-11-16 16:27:42 -0800160
peahb624d8c2016-03-05 03:01:14 -0800161 bool enabled_ = false;
Sam Zackrisson7f4dfa42018-11-01 08:59:29 +0100162 bool drift_compensation_enabled_;
163 bool metrics_enabled_;
164 SuppressionLevel suppression_level_;
165 int stream_drift_samples_;
166 bool was_stream_drift_set_;
167 bool stream_has_echo_;
168 bool delay_logging_enabled_;
169 bool extended_filter_enabled_;
170 bool delay_agnostic_enabled_;
171 bool refined_adaptive_filter_enabled_ = false;
peahdf3efa82015-11-28 12:35:15 -0800172
Per Åhgren11556462017-12-22 16:13:57 +0100173 // Only active on Chrome OS devices.
Sam Zackrisson7f4dfa42018-11-01 08:59:29 +0100174 const bool enforce_zero_stream_delay_;
Per Åhgren11556462017-12-22 16:13:57 +0100175
peahb624d8c2016-03-05 03:01:14 -0800176 std::vector<std::unique_ptr<Canceller>> cancellers_;
peahb58a1582016-03-15 09:34:24 -0700177 std::unique_ptr<StreamProperties> stream_properties_;
niklase@google.com470e71d2011-07-07 08:21:25 +0000178};
andrew@webrtc.org61e596f2013-07-25 18:28:29 +0000179
niklase@google.com470e71d2011-07-07 08:21:25 +0000180} // namespace webrtc
181
Mirko Bonadei92ea95e2017-09-15 06:47:31 +0200182#endif // MODULES_AUDIO_PROCESSING_ECHO_CANCELLATION_IMPL_H_