blob: 79be73bb6c6a5dd4b05b2071ee5720af926a3b05 [file] [log] [blame]
niklase@google.com470e71d2011-07-07 08:21:25 +00001/*
bjornv@webrtc.org0c6f9312012-01-30 09:39:08 +00002 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
niklase@google.com470e71d2011-07-07 08:21:25 +00003 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020011#ifndef MODULES_AUDIO_PROCESSING_ECHO_CANCELLATION_IMPL_H_
12#define MODULES_AUDIO_PROCESSING_ECHO_CANCELLATION_IMPL_H_
niklase@google.com470e71d2011-07-07 08:21:25 +000013
Yves Gerey988cc082018-10-23 12:03:01 +020014#include <stddef.h>
kwiberg88788ad2016-02-19 07:04:49 -080015#include <memory>
Yves Gerey988cc082018-10-23 12:03:01 +020016#include <string>
terelius85fa7d52016-03-24 01:51:52 -070017#include <vector>
kwiberg88788ad2016-02-19 07:04:49 -080018
Yves Gerey988cc082018-10-23 12:03:01 +020019#include "api/array_view.h"
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020020#include "modules/audio_processing/include/audio_processing.h"
Steve Anton10542f22019-01-11 09:11:00 -080021#include "rtc_base/constructor_magic.h"
22#include "rtc_base/critical_section.h"
Yves Gerey988cc082018-10-23 12:03:01 +020023#include "rtc_base/thread_annotations.h"
niklase@google.com470e71d2011-07-07 08:21:25 +000024
25namespace webrtc {
andrew@webrtc.org61e596f2013-07-25 18:28:29 +000026
niklase@google.com470e71d2011-07-07 08:21:25 +000027class AudioBuffer;
28
sazabe490b22018-10-03 17:03:13 +020029// The acoustic echo cancellation (AEC) component provides better performance
30// than AECM but also requires more processing power and is dependent on delay
31// stability and reporting accuracy. As such it is well-suited and recommended
32// for PC and IP phone applications.
33class EchoCancellationImpl {
niklase@google.com470e71d2011-07-07 08:21:25 +000034 public:
Sam Zackrisson7f4dfa42018-11-01 08:59:29 +010035 explicit EchoCancellationImpl();
sazabe490b22018-10-03 17:03:13 +020036 ~EchoCancellationImpl();
niklase@google.com470e71d2011-07-07 08:21:25 +000037
peah764e3642016-10-22 05:04:30 -070038 void ProcessRenderAudio(rtc::ArrayView<const float> packed_render_audio);
peahb58a1582016-03-15 09:34:24 -070039 int ProcessCaptureAudio(AudioBuffer* audio, int stream_delay_ms);
niklase@google.com470e71d2011-07-07 08:21:25 +000040
sazabe490b22018-10-03 17:03:13 +020041 int Enable(bool enable);
42 bool is_enabled() const;
43
44 // Differences in clock speed on the primary and reverse streams can impact
45 // the AEC performance. On the client-side, this could be seen when different
46 // render and capture devices are used, particularly with webcams.
47 //
48 // This enables a compensation mechanism, and requires that
49 // set_stream_drift_samples() be called.
50 int enable_drift_compensation(bool enable);
51 bool is_drift_compensation_enabled() const;
52
53 // Sets the difference between the number of samples rendered and captured by
54 // the audio devices since the last call to |ProcessStream()|. Must be called
55 // if drift compensation is enabled, prior to |ProcessStream()|.
56 void set_stream_drift_samples(int drift);
57 int stream_drift_samples() const;
58
59 enum SuppressionLevel {
60 kLowSuppression,
61 kModerateSuppression,
62 kHighSuppression
63 };
64
65 // Sets the aggressiveness of the suppressor. A higher level trades off
66 // double-talk performance for increased echo suppression.
67 int set_suppression_level(SuppressionLevel level);
68 SuppressionLevel suppression_level() const;
69
70 // Returns false if the current frame almost certainly contains no echo
71 // and true if it _might_ contain echo.
72 bool stream_has_echo() const;
73
74 // Enables the computation of various echo metrics. These are obtained
75 // through |GetMetrics()|.
76 int enable_metrics(bool enable);
77 bool are_metrics_enabled() const;
78
79 // Each statistic is reported in dB.
80 // P_far: Far-end (render) signal power.
81 // P_echo: Near-end (capture) echo signal power.
82 // P_out: Signal power at the output of the AEC.
83 // P_a: Internal signal power at the point before the AEC's non-linear
84 // processor.
85 struct Metrics {
Sam Zackrisson28127632018-11-01 11:37:15 +010086 struct Statistic {
87 int instant = 0; // Instantaneous value.
88 int average = 0; // Long-term average.
89 int maximum = 0; // Long-term maximum.
90 int minimum = 0; // Long-term minimum.
91 };
sazabe490b22018-10-03 17:03:13 +020092 // RERL = ERL + ERLE
Sam Zackrisson28127632018-11-01 11:37:15 +010093 Statistic residual_echo_return_loss;
sazabe490b22018-10-03 17:03:13 +020094
95 // ERL = 10log_10(P_far / P_echo)
Sam Zackrisson28127632018-11-01 11:37:15 +010096 Statistic echo_return_loss;
sazabe490b22018-10-03 17:03:13 +020097
98 // ERLE = 10log_10(P_echo / P_out)
Sam Zackrisson28127632018-11-01 11:37:15 +010099 Statistic echo_return_loss_enhancement;
sazabe490b22018-10-03 17:03:13 +0200100
101 // (Pre non-linear processing suppression) A_NLP = 10log_10(P_echo / P_a)
Sam Zackrisson28127632018-11-01 11:37:15 +0100102 Statistic a_nlp;
sazabe490b22018-10-03 17:03:13 +0200103
104 // Fraction of time that the AEC linear filter is divergent, in a 1-second
105 // non-overlapped aggregation window.
106 float divergent_filter_fraction;
107 };
108
Sam Zackrissoncdf0e6d2018-09-17 11:05:17 +0200109 // Provides various statistics about the AEC.
sazabe490b22018-10-03 17:03:13 +0200110 int GetMetrics(Metrics* metrics);
111
112 // Enables computation and logging of delay values. Statistics are obtained
113 // through |GetDelayMetrics()|.
114 int enable_delay_logging(bool enable);
115 bool is_delay_logging_enabled() const;
116
Sam Zackrissoncdf0e6d2018-09-17 11:05:17 +0200117 // Provides delay metrics.
sazabe490b22018-10-03 17:03:13 +0200118 // The delay metrics consists of the delay |median| and the delay standard
119 // deviation |std|. It also consists of the fraction of delay estimates
120 // |fraction_poor_delays| that can make the echo cancellation perform poorly.
121 // The values are aggregated until the first call to |GetDelayMetrics()| and
122 // afterwards aggregated and updated every second.
123 // Note that if there are several clients pulling metrics from
124 // |GetDelayMetrics()| during a session the first call from any of them will
125 // change to one second aggregation window for all.
126 int GetDelayMetrics(int* median, int* std);
127 int GetDelayMetrics(int* median, int* std, float* fraction_poor_delays);
128
129 // Returns a pointer to the low level AEC component. In case of multiple
130 // channels, the pointer to the first one is returned. A NULL pointer is
131 // returned when the AEC component is disabled or has not been initialized
132 // successfully.
133 struct AecCore* aec_core() const;
niklase@google.com470e71d2011-07-07 08:21:25 +0000134
peahb58a1582016-03-15 09:34:24 -0700135 void Initialize(int sample_rate_hz,
136 size_t num_reverse_channels_,
137 size_t num_output_channels_,
138 size_t num_proc_channels_);
peah88ac8532016-09-12 16:47:25 -0700139 void SetExtraOptions(const webrtc::Config& config);
Minyue13b96ba2015-10-03 00:39:14 +0200140 bool is_delay_agnostic_enabled() const;
141 bool is_extended_filter_enabled() const;
peah7789fe72016-04-15 01:19:44 -0700142 std::string GetExperimentsDescription();
peah0332c2d2016-04-15 11:23:33 -0700143 bool is_refined_adaptive_filter_enabled() const;
Minyue13b96ba2015-10-03 00:39:14 +0200144
peah20028c42016-03-04 11:50:54 -0800145 // Returns the system delay of the first AEC component.
146 int GetSystemDelayInSamples() const;
147
peah764e3642016-10-22 05:04:30 -0700148 static void PackRenderAudioBuffer(const AudioBuffer* audio,
149 size_t num_output_channels,
150 size_t num_channels,
151 std::vector<float>* packed_buffer);
152 static size_t NumCancellersRequired(size_t num_output_channels,
153 size_t num_reverse_channels);
154
niklase@google.com470e71d2011-07-07 08:21:25 +0000155 private:
peahb624d8c2016-03-05 03:01:14 -0800156 class Canceller;
peahb58a1582016-03-15 09:34:24 -0700157 struct StreamProperties;
peahb624d8c2016-03-05 03:01:14 -0800158
peahfa6228e2015-11-16 16:27:42 -0800159 void AllocateRenderQueue();
peahb624d8c2016-03-05 03:01:14 -0800160 int Configure();
peahfa6228e2015-11-16 16:27:42 -0800161
peahb624d8c2016-03-05 03:01:14 -0800162 bool enabled_ = false;
Sam Zackrisson7f4dfa42018-11-01 08:59:29 +0100163 bool drift_compensation_enabled_;
164 bool metrics_enabled_;
165 SuppressionLevel suppression_level_;
166 int stream_drift_samples_;
167 bool was_stream_drift_set_;
168 bool stream_has_echo_;
169 bool delay_logging_enabled_;
170 bool extended_filter_enabled_;
171 bool delay_agnostic_enabled_;
172 bool refined_adaptive_filter_enabled_ = false;
peahdf3efa82015-11-28 12:35:15 -0800173
Per Åhgren11556462017-12-22 16:13:57 +0100174 // Only active on Chrome OS devices.
Sam Zackrisson7f4dfa42018-11-01 08:59:29 +0100175 const bool enforce_zero_stream_delay_;
Per Åhgren11556462017-12-22 16:13:57 +0100176
peahb624d8c2016-03-05 03:01:14 -0800177 std::vector<std::unique_ptr<Canceller>> cancellers_;
peahb58a1582016-03-15 09:34:24 -0700178 std::unique_ptr<StreamProperties> stream_properties_;
niklase@google.com470e71d2011-07-07 08:21:25 +0000179};
andrew@webrtc.org61e596f2013-07-25 18:28:29 +0000180
niklase@google.com470e71d2011-07-07 08:21:25 +0000181} // namespace webrtc
182
Mirko Bonadei92ea95e2017-09-15 06:47:31 +0200183#endif // MODULES_AUDIO_PROCESSING_ECHO_CANCELLATION_IMPL_H_