blob: 79fe13e431dc0b840ad1a372aaaaa0d7b48000c2 [file] [log] [blame]
peah522d71b2017-02-23 05:16:26 -08001/*
2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020011#ifndef MODULES_AUDIO_PROCESSING_AEC3_AEC_STATE_H_
12#define MODULES_AUDIO_PROCESSING_AEC3_AEC_STATE_H_
peah522d71b2017-02-23 05:16:26 -080013
Yves Gerey988cc082018-10-23 12:03:01 +020014#include <stddef.h>
Jonas Olssona4d87372019-07-05 19:08:33 +020015
Yves Gerey988cc082018-10-23 12:03:01 +020016#include <array>
peah522d71b2017-02-23 05:16:26 -080017#include <memory>
18#include <vector>
19
Danil Chapovalovdb9f7ab2018-06-19 10:50:11 +020020#include "absl/types/optional.h"
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020021#include "api/array_view.h"
Gustaf Ullberg3646f972018-02-14 15:19:04 +010022#include "api/audio/echo_canceller3_config.h"
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020023#include "modules/audio_processing/aec3/aec3_common.h"
Per Åhgren3ab308f2018-02-21 08:46:03 +010024#include "modules/audio_processing/aec3/delay_estimate.h"
Jesús de Vicente Peñad5cb4772018-04-25 13:58:45 +020025#include "modules/audio_processing/aec3/echo_audibility.h"
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020026#include "modules/audio_processing/aec3/echo_path_variability.h"
27#include "modules/audio_processing/aec3/erl_estimator.h"
28#include "modules/audio_processing/aec3/erle_estimator.h"
Per Åhgren5c532d32018-03-22 00:29:25 +010029#include "modules/audio_processing/aec3/filter_analyzer.h"
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020030#include "modules/audio_processing/aec3/render_buffer.h"
Jesús de Vicente Peña496cedf2018-07-04 11:02:09 +020031#include "modules/audio_processing/aec3/reverb_model_estimator.h"
Per Åhgrenb20b9372018-07-13 00:22:54 +020032#include "modules/audio_processing/aec3/subtractor_output.h"
33#include "modules/audio_processing/aec3/subtractor_output_analyzer.h"
peah522d71b2017-02-23 05:16:26 -080034
35namespace webrtc {
36
37class ApmDataDumper;
38
39// Handles the state and the conditions for the echo removal functionality.
40class AecState {
41 public:
Sam Zackrisson8f736c02019-10-01 12:47:53 +020042 AecState(const EchoCanceller3Config& config, size_t num_capture_channels);
peah522d71b2017-02-23 05:16:26 -080043 ~AecState();
44
Per Åhgren4b3bc0f2017-12-20 15:26:13 +010045 // Returns whether the echo subtractor can be used to determine the residual
46 // echo.
Per Åhgrenc5a38ad2018-10-04 15:37:54 +020047 bool UsableLinearEstimate() const {
Gustaf Ullberg52caa0e2019-04-11 14:43:17 +020048 return filter_quality_state_.LinearFilterUsable() &&
49 config_.filter.use_linear_filter;
Per Åhgrenc5a38ad2018-10-04 15:37:54 +020050 }
peah522d71b2017-02-23 05:16:26 -080051
Per Åhgren5c532d32018-03-22 00:29:25 +010052 // Returns whether the echo subtractor output should be used as output.
Per Åhgrenc5a38ad2018-10-04 15:37:54 +020053 bool UseLinearFilterOutput() const {
Gustaf Ullberg52caa0e2019-04-11 14:43:17 +020054 return filter_quality_state_.LinearFilterUsable() &&
55 config_.filter.use_linear_filter;
Per Åhgrenc5a38ad2018-10-04 15:37:54 +020056 }
Per Åhgren5c532d32018-03-22 00:29:25 +010057
peah522d71b2017-02-23 05:16:26 -080058 // Returns whether the render signal is currently active.
Per Åhgren4b3bc0f2017-12-20 15:26:13 +010059 bool ActiveRender() const { return blocks_with_active_render_ > 200; }
peahebe77782017-02-27 07:29:21 -080060
Jesús de Vicente Peñad5cb4772018-04-25 13:58:45 +020061 // Returns the appropriate scaling of the residual echo to match the
62 // audibility.
Per Åhgrenc5a38ad2018-10-04 15:37:54 +020063 void GetResidualEchoScaling(rtc::ArrayView<float> residual_scaling) const;
Jesús de Vicente Peñad5cb4772018-04-25 13:58:45 +020064
65 // Returns whether the stationary properties of the signals are used in the
66 // aec.
Per Åhgrenb4161d32019-10-08 12:35:47 +020067 bool UseStationarityProperties() const {
Jesús de Vicente Peña70a59632019-04-16 12:32:15 +020068 return config_.echo_audibility.use_stationarity_properties;
Per Åhgrenf4801a12018-09-27 13:14:02 +020069 }
Jesús de Vicente Peñad5cb4772018-04-25 13:58:45 +020070
peah522d71b2017-02-23 05:16:26 -080071 // Returns the ERLE.
Per Åhgrenb4161d32019-10-08 12:35:47 +020072 rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Erle() const {
peah522d71b2017-02-23 05:16:26 -080073 return erle_estimator_.Erle();
74 }
75
Per Åhgrenc5a38ad2018-10-04 15:37:54 +020076 // Returns an offset to apply to the estimation of the residual echo
77 // computation. Returning nullopt means that no offset should be used, while
78 // any other value will be applied as a multiplier to the estimated residual
79 // echo.
80 absl::optional<float> ErleUncertainty() const;
Gustaf Ullberg6c618c72018-06-28 14:21:16 +020081
Jesús de Vicente Peñae9a7e902018-09-27 11:49:39 +020082 // Returns the fullband ERLE estimate in log2 units.
83 float FullBandErleLog2() const { return erle_estimator_.FullbandErleLog2(); }
Gustaf Ullberg332150d2017-11-22 14:17:39 +010084
peah522d71b2017-02-23 05:16:26 -080085 // Returns the ERL.
86 const std::array<float, kFftLengthBy2Plus1>& Erl() const {
87 return erl_estimator_.Erl();
88 }
89
Gustaf Ullberg332150d2017-11-22 14:17:39 +010090 // Returns the time-domain ERL.
91 float ErlTimeDomain() const { return erl_estimator_.ErlTimeDomain(); }
92
peah522d71b2017-02-23 05:16:26 -080093 // Returns the delay estimate based on the linear filter.
Per Åhgren8be669f2019-10-11 23:02:26 +020094 int FilterDelayBlocks() const {
95 return delay_state_.DirectPathFilterDelays()[0];
96 }
peah522d71b2017-02-23 05:16:26 -080097
peah522d71b2017-02-23 05:16:26 -080098 // Returns whether the capture signal is saturated.
99 bool SaturatedCapture() const { return capture_signal_saturation_; }
100
peah86afe9d2017-04-06 15:45:32 -0700101 // Returns whether the echo signal is saturated.
Gustaf Ullberg68d6d442019-01-29 10:08:15 +0100102 bool SaturatedEcho() const { return saturation_detector_.SaturatedEcho(); }
peah86afe9d2017-04-06 15:45:32 -0700103
peah522d71b2017-02-23 05:16:26 -0800104 // Updates the capture signal saturation.
105 void UpdateCaptureSaturation(bool capture_signal_saturation) {
106 capture_signal_saturation_ = capture_signal_saturation;
107 }
108
Per Åhgren1b4059e2017-10-15 20:19:21 +0200109 // Returns whether the transparent mode is active
Per Åhgrenc5a38ad2018-10-04 15:37:54 +0200110 bool TransparentMode() const { return transparent_state_.Active(); }
peah522d71b2017-02-23 05:16:26 -0800111
peah86afe9d2017-04-06 15:45:32 -0700112 // Takes appropriate action at an echo path change.
113 void HandleEchoPathChange(const EchoPathVariability& echo_path_variability);
114
peah89420452017-04-07 06:13:39 -0700115 // Returns the decay factor for the echo reverberation.
Jesús de Vicente Peña496cedf2018-07-04 11:02:09 +0200116 float ReverbDecay() const { return reverb_model_estimator_.ReverbDecay(); }
peah89420452017-04-07 06:13:39 -0700117
Per Åhgrenef5d5af2018-07-31 00:03:46 +0200118 // Return the frequency response of the reverberant echo.
119 rtc::ArrayView<const float> GetReverbFrequencyResponse() const {
120 return reverb_model_estimator_.GetReverbFrequencyResponse();
121 }
122
Jesús de Vicente Peña02e9e442018-08-29 13:34:07 +0200123 // Returns whether the transition for going out of the initial stated has
124 // been triggered.
Per Åhgrenc5a38ad2018-10-04 15:37:54 +0200125 bool TransitionTriggered() const {
126 return initial_state_.TransitionTriggered();
127 }
Per Åhgrena98c8072018-01-15 19:17:16 +0100128
peah522d71b2017-02-23 05:16:26 -0800129 // Updates the aec state.
Sam Zackrisson8f736c02019-10-01 12:47:53 +0200130 // TODO(bugs.webrtc.org/10913): Handle multi-channel adaptive filter response.
131 // TODO(bugs.webrtc.org/10913): Compute multi-channel ERL, ERLE, and reverb.
Sam Zackrisson46b01402019-10-08 16:17:48 +0200132 void Update(
133 const absl::optional<DelayEstimate>& external_delay,
134 rtc::ArrayView<const std::vector<std::array<float, kFftLengthBy2Plus1>>>
Per Åhgren8be669f2019-10-11 23:02:26 +0200135 adaptive_filter_frequency_responses,
136 rtc::ArrayView<const std::vector<float>>
137 adaptive_filter_impulse_responses,
Sam Zackrisson46b01402019-10-08 16:17:48 +0200138 const RenderBuffer& render_buffer,
Per Åhgrenf9807252019-10-09 13:57:07 +0200139 rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> E2_main,
140 rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Y2,
Sam Zackrisson46b01402019-10-08 16:17:48 +0200141 rtc::ArrayView<const SubtractorOutput> subtractor_output);
peah522d71b2017-02-23 05:16:26 -0800142
Jesús de Vicente Peña075cb2b2018-06-13 15:13:55 +0200143 // Returns filter length in blocks.
144 int FilterLengthBlocks() const {
Sam Zackrisson46b01402019-10-08 16:17:48 +0200145 // All filters have the same length, so arbitrarily return channel 0 length.
Per Åhgren8be669f2019-10-11 23:02:26 +0200146 return filter_analyzer_.FilterLengthBlocks();
Jesús de Vicente Peña075cb2b2018-06-13 15:13:55 +0200147 }
148
peah522d71b2017-02-23 05:16:26 -0800149 private:
150 static int instance_count_;
151 std::unique_ptr<ApmDataDumper> data_dumper_;
Per Åhgren90e3fbd2018-05-16 15:25:04 +0200152 const EchoCanceller3Config config_;
Per Åhgrenc5a38ad2018-10-04 15:37:54 +0200153
154 // Class for controlling the transition from the intial state, which in turn
155 // controls when the filter parameters for the initial state should be used.
156 class InitialState {
157 public:
158 explicit InitialState(const EchoCanceller3Config& config);
159 // Resets the state to again begin in the initial state.
160 void Reset();
161
162 // Updates the state based on new data.
163 void Update(bool active_render, bool saturated_capture);
164
165 // Returns whether the initial state is active or not.
166 bool InitialStateActive() const { return initial_state_; }
167
168 // Returns that the transition from the initial state has was started.
169 bool TransitionTriggered() const { return transition_triggered_; }
170
171 private:
172 const bool conservative_initial_phase_;
173 const float initial_state_seconds_;
174 bool transition_triggered_ = false;
175 bool initial_state_ = true;
176 size_t strong_not_saturated_render_blocks_ = 0;
177 } initial_state_;
178
179 // Class for choosing the direct-path delay relative to the beginning of the
180 // filter, as well as any other data related to the delay used within
181 // AecState.
182 class FilterDelay {
183 public:
Per Åhgren8be669f2019-10-11 23:02:26 +0200184 FilterDelay(const EchoCanceller3Config& config,
185 size_t num_capture_channels);
Per Åhgrenc5a38ad2018-10-04 15:37:54 +0200186
187 // Returns whether an external delay has been reported to the AecState (from
188 // the delay estimator).
189 bool ExternalDelayReported() const { return external_delay_reported_; }
190
191 // Returns the delay in blocks relative to the beginning of the filter that
192 // corresponds to the direct path of the echo.
Per Åhgren8be669f2019-10-11 23:02:26 +0200193 rtc::ArrayView<const int> DirectPathFilterDelays() const {
194 return filter_delays_blocks_;
195 }
Per Åhgrenc5a38ad2018-10-04 15:37:54 +0200196
197 // Updates the delay estimates based on new data.
Sam Zackrisson46b01402019-10-08 16:17:48 +0200198 void Update(
Per Åhgren8be669f2019-10-11 23:02:26 +0200199 rtc::ArrayView<const int> analyzer_filter_delay_estimates_blocks,
Sam Zackrisson46b01402019-10-08 16:17:48 +0200200 const absl::optional<DelayEstimate>& external_delay,
201 size_t blocks_with_proper_filter_adaptation);
Per Åhgrenc5a38ad2018-10-04 15:37:54 +0200202
203 private:
Gustaf Ullberg9249fbf2019-03-14 11:24:54 +0100204 const int delay_headroom_samples_;
Per Åhgrenc5a38ad2018-10-04 15:37:54 +0200205 bool external_delay_reported_ = false;
Per Åhgren8be669f2019-10-11 23:02:26 +0200206 std::vector<int> filter_delays_blocks_;
Per Åhgrenc5a38ad2018-10-04 15:37:54 +0200207 absl::optional<DelayEstimate> external_delay_;
208 } delay_state_;
209
210 // Class for detecting and toggling the transparent mode which causes the
211 // suppressor to apply no suppression.
212 class TransparentMode {
213 public:
214 explicit TransparentMode(const EchoCanceller3Config& config);
215
216 // Returns whether the transparent mode should be active.
217 bool Active() const { return transparency_activated_; }
218
219 // Resets the state of the detector.
220 void Reset();
221
222 // Updates the detection deciscion based on new data.
223 void Update(int filter_delay_blocks,
Sam Zackrisson46b01402019-10-08 16:17:48 +0200224 bool any_filter_consistent,
225 bool any_filter_converged,
226 bool all_filters_diverged,
Per Åhgrenc5a38ad2018-10-04 15:37:54 +0200227 bool active_render,
228 bool saturated_capture);
229
230 private:
231 const bool bounded_erl_;
232 const bool linear_and_stable_echo_path_;
233 size_t capture_block_counter_ = 0;
234 bool transparency_activated_ = false;
235 size_t active_blocks_since_sane_filter_;
236 bool sane_filter_observed_ = false;
237 bool finite_erl_recently_detected_ = false;
238 size_t non_converged_sequence_size_;
239 size_t diverged_sequence_size_ = 0;
240 size_t active_non_converged_sequence_size_ = 0;
241 size_t num_converged_blocks_ = 0;
242 bool recent_convergence_during_activity_ = false;
243 size_t strong_not_saturated_render_blocks_ = 0;
244 } transparent_state_;
245
246 // Class for analyzing how well the linear filter is, and can be expected to,
247 // perform on the current signals. The purpose of this is for using to
248 // select the echo suppression functionality as well as the input to the echo
249 // suppressor.
250 class FilteringQualityAnalyzer {
251 public:
Per Åhgren8be669f2019-10-11 23:02:26 +0200252 FilteringQualityAnalyzer(const EchoCanceller3Config& config,
253 size_t num_capture_channels);
Per Åhgren3e7b7b12018-10-16 14:38:10 +0200254
Per Åhgren8be669f2019-10-11 23:02:26 +0200255 // Returns whether the linear filter can be used for the echo
Per Åhgren3e7b7b12018-10-16 14:38:10 +0200256 // canceller output.
Per Åhgren8be669f2019-10-11 23:02:26 +0200257 bool LinearFilterUsable() const { return overall_usable_linear_estimates_; }
258
259 // Returns whether an individual filter output can be used for the echo
260 // canceller output.
261 const std::vector<bool>& UsableLinearFilterOutputs() const {
262 return usable_linear_filter_estimates_;
263 }
Per Åhgren3e7b7b12018-10-16 14:38:10 +0200264
265 // Resets the state of the analyzer.
266 void Reset();
267
268 // Updates the analysis based on new data.
269 void Update(bool active_render,
270 bool transparent_mode,
271 bool saturated_capture,
Per Åhgren3e7b7b12018-10-16 14:38:10 +0200272 const absl::optional<DelayEstimate>& external_delay,
Sam Zackrisson46b01402019-10-08 16:17:48 +0200273 bool any_filter_converged);
Per Åhgren3e7b7b12018-10-16 14:38:10 +0200274
275 private:
Per Åhgren8be669f2019-10-11 23:02:26 +0200276 const bool use_linear_filter_;
277 bool overall_usable_linear_estimates_ = false;
Per Åhgren3e7b7b12018-10-16 14:38:10 +0200278 size_t filter_update_blocks_since_reset_ = 0;
279 size_t filter_update_blocks_since_start_ = 0;
280 bool convergence_seen_ = false;
Per Åhgren8be669f2019-10-11 23:02:26 +0200281 std::vector<bool> usable_linear_filter_estimates_;
Per Åhgren3e7b7b12018-10-16 14:38:10 +0200282 } filter_quality_state_;
283
Per Åhgren3e7b7b12018-10-16 14:38:10 +0200284 // Class for detecting whether the echo is to be considered to be
285 // saturated.
Per Åhgrenc5a38ad2018-10-04 15:37:54 +0200286 class SaturationDetector {
287 public:
Per Åhgren3e7b7b12018-10-16 14:38:10 +0200288 // Returns whether the echo is to be considered saturated.
Nico Weber22f99252019-02-20 10:13:16 -0500289 bool SaturatedEcho() const { return saturated_echo_; }
Per Åhgren3e7b7b12018-10-16 14:38:10 +0200290
291 // Updates the detection decision based on new data.
Sam Zackrisson8f736c02019-10-01 12:47:53 +0200292 void Update(rtc::ArrayView<const std::vector<float>> x,
Per Åhgren3e7b7b12018-10-16 14:38:10 +0200293 bool saturated_capture,
294 bool usable_linear_estimate,
Sam Zackrisson8f736c02019-10-01 12:47:53 +0200295 rtc::ArrayView<const SubtractorOutput> subtractor_output,
Per Åhgren3e7b7b12018-10-16 14:38:10 +0200296 float echo_path_gain);
297
298 private:
299 bool saturated_echo_ = false;
300 } saturation_detector_;
301
peah522d71b2017-02-23 05:16:26 -0800302 ErlEstimator erl_estimator_;
303 ErleEstimator erle_estimator_;
Per Åhgrenc5a38ad2018-10-04 15:37:54 +0200304 size_t strong_not_saturated_render_blocks_ = 0;
Per Åhgren4b3bc0f2017-12-20 15:26:13 +0100305 size_t blocks_with_active_render_ = 0;
peah522d71b2017-02-23 05:16:26 -0800306 bool capture_signal_saturation_ = false;
Per Åhgren8be669f2019-10-11 23:02:26 +0200307 FilterAnalyzer filter_analyzer_;
Danil Chapovalovdb9f7ab2018-06-19 10:50:11 +0200308 absl::optional<DelayEstimate> external_delay_;
Jesús de Vicente Peñad5cb4772018-04-25 13:58:45 +0200309 EchoAudibility echo_audibility_;
Jesús de Vicente Peña496cedf2018-07-04 11:02:09 +0200310 ReverbModelEstimator reverb_model_estimator_;
Per Åhgren1d3008b2019-10-09 12:54:43 +0200311 ReverbModel reverb_model_;
Sam Zackrisson8f736c02019-10-01 12:47:53 +0200312 std::vector<SubtractorOutputAnalyzer> subtractor_output_analyzers_;
peah522d71b2017-02-23 05:16:26 -0800313};
314
315} // namespace webrtc
316
Mirko Bonadei92ea95e2017-09-15 06:47:31 +0200317#endif // MODULES_AUDIO_PROCESSING_AEC3_AEC_STATE_H_