blob: 34bd0bf8560c53052cbad06a42449b7197b0e972 [file] [log] [blame]
niklase@google.com470e71d2011-07-07 08:21:25 +00001/*
bjornv@webrtc.org0c6f9312012-01-30 09:39:08 +00002 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
niklase@google.com470e71d2011-07-07 08:21:25 +00003 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020011#ifndef MODULES_AUDIO_PROCESSING_ECHO_CANCELLATION_IMPL_H_
12#define MODULES_AUDIO_PROCESSING_ECHO_CANCELLATION_IMPL_H_
niklase@google.com470e71d2011-07-07 08:21:25 +000013
Yves Gerey988cc082018-10-23 12:03:01 +020014#include <stddef.h>
kwiberg88788ad2016-02-19 07:04:49 -080015#include <memory>
Yves Gerey988cc082018-10-23 12:03:01 +020016#include <string>
terelius85fa7d52016-03-24 01:51:52 -070017#include <vector>
kwiberg88788ad2016-02-19 07:04:49 -080018
Yves Gerey988cc082018-10-23 12:03:01 +020019#include "api/array_view.h"
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020020#include "modules/audio_processing/include/audio_processing.h"
21#include "rtc_base/constructormagic.h"
22#include "rtc_base/criticalsection.h"
Yves Gerey988cc082018-10-23 12:03:01 +020023#include "rtc_base/thread_annotations.h"
niklase@google.com470e71d2011-07-07 08:21:25 +000024
25namespace webrtc {
andrew@webrtc.org61e596f2013-07-25 18:28:29 +000026
niklase@google.com470e71d2011-07-07 08:21:25 +000027class AudioBuffer;
28
sazabe490b22018-10-03 17:03:13 +020029// The acoustic echo cancellation (AEC) component provides better performance
30// than AECM but also requires more processing power and is dependent on delay
31// stability and reporting accuracy. As such it is well-suited and recommended
32// for PC and IP phone applications.
33class EchoCancellationImpl {
niklase@google.com470e71d2011-07-07 08:21:25 +000034 public:
peahb58a1582016-03-15 09:34:24 -070035 EchoCancellationImpl(rtc::CriticalSection* crit_render,
peahdf3efa82015-11-28 12:35:15 -080036 rtc::CriticalSection* crit_capture);
sazabe490b22018-10-03 17:03:13 +020037 ~EchoCancellationImpl();
niklase@google.com470e71d2011-07-07 08:21:25 +000038
peah764e3642016-10-22 05:04:30 -070039 void ProcessRenderAudio(rtc::ArrayView<const float> packed_render_audio);
peahb58a1582016-03-15 09:34:24 -070040 int ProcessCaptureAudio(AudioBuffer* audio, int stream_delay_ms);
niklase@google.com470e71d2011-07-07 08:21:25 +000041
sazabe490b22018-10-03 17:03:13 +020042 int Enable(bool enable);
43 bool is_enabled() const;
44
45 // Differences in clock speed on the primary and reverse streams can impact
46 // the AEC performance. On the client-side, this could be seen when different
47 // render and capture devices are used, particularly with webcams.
48 //
49 // This enables a compensation mechanism, and requires that
50 // set_stream_drift_samples() be called.
51 int enable_drift_compensation(bool enable);
52 bool is_drift_compensation_enabled() const;
53
54 // Sets the difference between the number of samples rendered and captured by
55 // the audio devices since the last call to |ProcessStream()|. Must be called
56 // if drift compensation is enabled, prior to |ProcessStream()|.
57 void set_stream_drift_samples(int drift);
58 int stream_drift_samples() const;
59
60 enum SuppressionLevel {
61 kLowSuppression,
62 kModerateSuppression,
63 kHighSuppression
64 };
65
66 // Sets the aggressiveness of the suppressor. A higher level trades off
67 // double-talk performance for increased echo suppression.
68 int set_suppression_level(SuppressionLevel level);
69 SuppressionLevel suppression_level() const;
70
71 // Returns false if the current frame almost certainly contains no echo
72 // and true if it _might_ contain echo.
73 bool stream_has_echo() const;
74
75 // Enables the computation of various echo metrics. These are obtained
76 // through |GetMetrics()|.
77 int enable_metrics(bool enable);
78 bool are_metrics_enabled() const;
79
80 // Each statistic is reported in dB.
81 // P_far: Far-end (render) signal power.
82 // P_echo: Near-end (capture) echo signal power.
83 // P_out: Signal power at the output of the AEC.
84 // P_a: Internal signal power at the point before the AEC's non-linear
85 // processor.
86 struct Metrics {
87 // RERL = ERL + ERLE
88 AudioProcessing::Statistic residual_echo_return_loss;
89
90 // ERL = 10log_10(P_far / P_echo)
91 AudioProcessing::Statistic echo_return_loss;
92
93 // ERLE = 10log_10(P_echo / P_out)
94 AudioProcessing::Statistic echo_return_loss_enhancement;
95
96 // (Pre non-linear processing suppression) A_NLP = 10log_10(P_echo / P_a)
97 AudioProcessing::Statistic a_nlp;
98
99 // Fraction of time that the AEC linear filter is divergent, in a 1-second
100 // non-overlapped aggregation window.
101 float divergent_filter_fraction;
102 };
103
Sam Zackrissoncdf0e6d2018-09-17 11:05:17 +0200104 // Provides various statistics about the AEC.
sazabe490b22018-10-03 17:03:13 +0200105 int GetMetrics(Metrics* metrics);
106
107 // Enables computation and logging of delay values. Statistics are obtained
108 // through |GetDelayMetrics()|.
109 int enable_delay_logging(bool enable);
110 bool is_delay_logging_enabled() const;
111
Sam Zackrissoncdf0e6d2018-09-17 11:05:17 +0200112 // Provides delay metrics.
sazabe490b22018-10-03 17:03:13 +0200113 // The delay metrics consists of the delay |median| and the delay standard
114 // deviation |std|. It also consists of the fraction of delay estimates
115 // |fraction_poor_delays| that can make the echo cancellation perform poorly.
116 // The values are aggregated until the first call to |GetDelayMetrics()| and
117 // afterwards aggregated and updated every second.
118 // Note that if there are several clients pulling metrics from
119 // |GetDelayMetrics()| during a session the first call from any of them will
120 // change to one second aggregation window for all.
121 int GetDelayMetrics(int* median, int* std);
122 int GetDelayMetrics(int* median, int* std, float* fraction_poor_delays);
123
124 // Returns a pointer to the low level AEC component. In case of multiple
125 // channels, the pointer to the first one is returned. A NULL pointer is
126 // returned when the AEC component is disabled or has not been initialized
127 // successfully.
128 struct AecCore* aec_core() const;
niklase@google.com470e71d2011-07-07 08:21:25 +0000129
peahb58a1582016-03-15 09:34:24 -0700130 void Initialize(int sample_rate_hz,
131 size_t num_reverse_channels_,
132 size_t num_output_channels_,
133 size_t num_proc_channels_);
peah88ac8532016-09-12 16:47:25 -0700134 void SetExtraOptions(const webrtc::Config& config);
Minyue13b96ba2015-10-03 00:39:14 +0200135 bool is_delay_agnostic_enabled() const;
136 bool is_extended_filter_enabled() const;
peah7789fe72016-04-15 01:19:44 -0700137 std::string GetExperimentsDescription();
peah0332c2d2016-04-15 11:23:33 -0700138 bool is_refined_adaptive_filter_enabled() const;
Minyue13b96ba2015-10-03 00:39:14 +0200139
peah20028c42016-03-04 11:50:54 -0800140 // Returns the system delay of the first AEC component.
141 int GetSystemDelayInSamples() const;
142
peah764e3642016-10-22 05:04:30 -0700143 static void PackRenderAudioBuffer(const AudioBuffer* audio,
144 size_t num_output_channels,
145 size_t num_channels,
146 std::vector<float>* packed_buffer);
147 static size_t NumCancellersRequired(size_t num_output_channels,
148 size_t num_reverse_channels);
149
niklase@google.com470e71d2011-07-07 08:21:25 +0000150 private:
peahb624d8c2016-03-05 03:01:14 -0800151 class Canceller;
peahb58a1582016-03-15 09:34:24 -0700152 struct StreamProperties;
peahb624d8c2016-03-05 03:01:14 -0800153
peahfa6228e2015-11-16 16:27:42 -0800154 void AllocateRenderQueue();
peahb624d8c2016-03-05 03:01:14 -0800155 int Configure();
peahfa6228e2015-11-16 16:27:42 -0800156
danilchap56359be2017-09-07 07:53:45 -0700157 rtc::CriticalSection* const crit_render_ RTC_ACQUIRED_BEFORE(crit_capture_);
peahdf3efa82015-11-28 12:35:15 -0800158 rtc::CriticalSection* const crit_capture_;
peahfa6228e2015-11-16 16:27:42 -0800159
peahb624d8c2016-03-05 03:01:14 -0800160 bool enabled_ = false;
danilchap56359be2017-09-07 07:53:45 -0700161 bool drift_compensation_enabled_ RTC_GUARDED_BY(crit_capture_);
162 bool metrics_enabled_ RTC_GUARDED_BY(crit_capture_);
163 SuppressionLevel suppression_level_ RTC_GUARDED_BY(crit_capture_);
164 int stream_drift_samples_ RTC_GUARDED_BY(crit_capture_);
165 bool was_stream_drift_set_ RTC_GUARDED_BY(crit_capture_);
166 bool stream_has_echo_ RTC_GUARDED_BY(crit_capture_);
167 bool delay_logging_enabled_ RTC_GUARDED_BY(crit_capture_);
168 bool extended_filter_enabled_ RTC_GUARDED_BY(crit_capture_);
169 bool delay_agnostic_enabled_ RTC_GUARDED_BY(crit_capture_);
170 bool refined_adaptive_filter_enabled_ RTC_GUARDED_BY(crit_capture_) = false;
peahdf3efa82015-11-28 12:35:15 -0800171
Per Ã…hgren11556462017-12-22 16:13:57 +0100172 // Only active on Chrome OS devices.
173 const bool enforce_zero_stream_delay_ RTC_GUARDED_BY(crit_capture_);
174
peahb624d8c2016-03-05 03:01:14 -0800175 std::vector<std::unique_ptr<Canceller>> cancellers_;
peahb58a1582016-03-15 09:34:24 -0700176 std::unique_ptr<StreamProperties> stream_properties_;
177
peahb624d8c2016-03-05 03:01:14 -0800178 RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(EchoCancellationImpl);
niklase@google.com470e71d2011-07-07 08:21:25 +0000179};
andrew@webrtc.org61e596f2013-07-25 18:28:29 +0000180
niklase@google.com470e71d2011-07-07 08:21:25 +0000181} // namespace webrtc
182
Mirko Bonadei92ea95e2017-09-15 06:47:31 +0200183#endif // MODULES_AUDIO_PROCESSING_ECHO_CANCELLATION_IMPL_H_