blob: 3454214fd739d6dd36f0aafc0b8f2503ad5ba9a5 [file] [log] [blame]
ivoc9f4a4a02016-10-28 05:39:16 -07001/*
2 * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020011#include "modules/audio_processing/residual_echo_detector.h"
ivoc9f4a4a02016-10-28 05:39:16 -070012
ivocaf27ed02016-10-28 07:04:03 -070013#include <algorithm>
14#include <numeric>
15
Yves Gerey988cc082018-10-23 12:03:01 +020016#include "absl/types/optional.h"
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020017#include "modules/audio_processing/audio_buffer.h"
18#include "modules/audio_processing/logging/apm_data_dumper.h"
19#include "rtc_base/atomicops.h"
Yves Gerey988cc082018-10-23 12:03:01 +020020#include "rtc_base/checks.h"
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020021#include "rtc_base/logging.h"
22#include "system_wrappers/include/metrics.h"
ivoc9f4a4a02016-10-28 05:39:16 -070023
ivocaf27ed02016-10-28 07:04:03 -070024namespace {
25
26float Power(rtc::ArrayView<const float> input) {
Alex Loiko890988c2017-08-31 10:25:48 +020027 if (input.empty()) {
peah9e6a2902017-05-15 07:19:21 -070028 return 0.f;
29 }
30 return std::inner_product(input.begin(), input.end(), input.begin(), 0.f) /
31 input.size();
ivocaf27ed02016-10-28 07:04:03 -070032}
33
34constexpr size_t kLookbackFrames = 650;
35// TODO(ivoc): Verify the size of this buffer.
36constexpr size_t kRenderBufferSize = 30;
ivocfbb374d2016-11-17 06:19:47 -080037constexpr float kAlpha = 0.001f;
ivoc4e477a12017-01-15 08:29:46 -080038// 10 seconds of data, updated every 10 ms.
39constexpr size_t kAggregationBufferSize = 10 * 100;
ivocaf27ed02016-10-28 07:04:03 -070040
41} // namespace
42
ivoc9f4a4a02016-10-28 05:39:16 -070043namespace webrtc {
44
peah9e6a2902017-05-15 07:19:21 -070045int ResidualEchoDetector::instance_count_ = 0;
46
ivocaf27ed02016-10-28 07:04:03 -070047ResidualEchoDetector::ResidualEchoDetector()
peah9e6a2902017-05-15 07:19:21 -070048 : data_dumper_(
49 new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),
50 render_buffer_(kRenderBufferSize),
ivocaf27ed02016-10-28 07:04:03 -070051 render_power_(kLookbackFrames),
52 render_power_mean_(kLookbackFrames),
53 render_power_std_dev_(kLookbackFrames),
ivoc4e477a12017-01-15 08:29:46 -080054 covariances_(kLookbackFrames),
55 recent_likelihood_max_(kAggregationBufferSize) {}
ivoc9f4a4a02016-10-28 05:39:16 -070056
57ResidualEchoDetector::~ResidualEchoDetector() = default;
58
59void ResidualEchoDetector::AnalyzeRenderAudio(
ivocaf27ed02016-10-28 07:04:03 -070060 rtc::ArrayView<const float> render_audio) {
peah9e6a2902017-05-15 07:19:21 -070061 // Dump debug data assuming 48 kHz sample rate (if this assumption is not
62 // valid the dumped audio will need to be converted offline accordingly).
63 data_dumper_->DumpWav("ed_render", render_audio.size(), render_audio.data(),
64 48000, 1);
65
ivocaf27ed02016-10-28 07:04:03 -070066 if (render_buffer_.Size() == 0) {
67 frames_since_zero_buffer_size_ = 0;
68 } else if (frames_since_zero_buffer_size_ >= kRenderBufferSize) {
69 // This can happen in a few cases: at the start of a call, due to a glitch
70 // or due to clock drift. The excess capture value will be ignored.
71 // TODO(ivoc): Include how often this happens in APM stats.
72 render_buffer_.Pop();
73 frames_since_zero_buffer_size_ = 0;
74 }
75 ++frames_since_zero_buffer_size_;
76 float power = Power(render_audio);
77 render_buffer_.Push(power);
ivoc9f4a4a02016-10-28 05:39:16 -070078}
79
80void ResidualEchoDetector::AnalyzeCaptureAudio(
ivocaf27ed02016-10-28 07:04:03 -070081 rtc::ArrayView<const float> capture_audio) {
peah9e6a2902017-05-15 07:19:21 -070082 // Dump debug data assuming 48 kHz sample rate (if this assumption is not
83 // valid the dumped audio will need to be converted offline accordingly).
84 data_dumper_->DumpWav("ed_capture", capture_audio.size(),
85 capture_audio.data(), 48000, 1);
86
ivocaf27ed02016-10-28 07:04:03 -070087 if (first_process_call_) {
88 // On the first process call (so the start of a call), we must flush the
89 // render buffer, otherwise the render data will be delayed.
90 render_buffer_.Clear();
91 first_process_call_ = false;
92 }
93
94 // Get the next render value.
Danil Chapovalovdb9f7ab2018-06-19 10:50:11 +020095 const absl::optional<float> buffered_render_power = render_buffer_.Pop();
ivocaf27ed02016-10-28 07:04:03 -070096 if (!buffered_render_power) {
97 // This can happen in a few cases: at the start of a call, due to a glitch
98 // or due to clock drift. The excess capture value will be ignored.
99 // TODO(ivoc): Include how often this happens in APM stats.
100 return;
101 }
102 // Update the render statistics, and store the statistics in circular buffers.
103 render_statistics_.Update(*buffered_render_power);
104 RTC_DCHECK_LT(next_insertion_index_, kLookbackFrames);
105 render_power_[next_insertion_index_] = *buffered_render_power;
106 render_power_mean_[next_insertion_index_] = render_statistics_.mean();
107 render_power_std_dev_[next_insertion_index_] =
108 render_statistics_.std_deviation();
109
110 // Get the next capture value, update capture statistics and add the relevant
111 // values to the buffers.
112 const float capture_power = Power(capture_audio);
113 capture_statistics_.Update(capture_power);
114 const float capture_mean = capture_statistics_.mean();
115 const float capture_std_deviation = capture_statistics_.std_deviation();
116
117 // Update the covariance values and determine the new echo likelihood.
118 echo_likelihood_ = 0.f;
peah94f6fa02017-05-16 07:25:06 -0700119 size_t read_index = next_insertion_index_;
ivoc1592c742017-05-17 09:53:02 -0700120
121 int best_delay = -1;
ivocaf27ed02016-10-28 07:04:03 -0700122 for (size_t delay = 0; delay < covariances_.size(); ++delay) {
ivocaf27ed02016-10-28 07:04:03 -0700123 RTC_DCHECK_LT(read_index, render_power_.size());
124 covariances_[delay].Update(capture_power, capture_mean,
125 capture_std_deviation, render_power_[read_index],
126 render_power_mean_[read_index],
127 render_power_std_dev_[read_index]);
peah94f6fa02017-05-16 07:25:06 -0700128 read_index = read_index > 0 ? read_index - 1 : kLookbackFrames - 1;
ivoc1592c742017-05-17 09:53:02 -0700129
130 if (covariances_[delay].normalized_cross_correlation() > echo_likelihood_) {
131 echo_likelihood_ = covariances_[delay].normalized_cross_correlation();
132 best_delay = static_cast<int>(delay);
133 }
134 }
135 // This is a temporary log message to help find the underlying cause for echo
136 // likelihoods > 1.0.
137 // TODO(ivoc): Remove once the issue is resolved.
138 if (echo_likelihood_ > 1.1f) {
139 // Make sure we don't spam the log.
140 if (log_counter_ < 5 && best_delay != -1) {
141 size_t read_index = kLookbackFrames + next_insertion_index_ - best_delay;
142 if (read_index >= kLookbackFrames) {
143 read_index -= kLookbackFrames;
144 }
145 RTC_DCHECK_LT(read_index, render_power_.size());
Yves Gerey665174f2018-06-19 15:03:05 +0200146 RTC_LOG_F(LS_ERROR) << "Echo detector internal state: {"
147 "Echo likelihood: "
148 << echo_likelihood_ << ", Best Delay: " << best_delay
149 << ", Covariance: "
150 << covariances_[best_delay].covariance()
151 << ", Last capture power: " << capture_power
152 << ", Capture mean: " << capture_mean
153 << ", Capture_standard deviation: "
154 << capture_std_deviation << ", Last render power: "
155 << render_power_[read_index]
156 << ", Render mean: " << render_power_mean_[read_index]
157 << ", Render standard deviation: "
158 << render_power_std_dev_[read_index]
159 << ", Reliability: " << reliability_ << "}";
ivoc1592c742017-05-17 09:53:02 -0700160 log_counter_++;
161 }
ivocaf27ed02016-10-28 07:04:03 -0700162 }
ivoc860249e2017-05-16 06:50:11 -0700163 RTC_DCHECK_LT(echo_likelihood_, 1.1f);
ivoc1592c742017-05-17 09:53:02 -0700164
ivocfbb374d2016-11-17 06:19:47 -0800165 reliability_ = (1.0f - kAlpha) * reliability_ + kAlpha * 1.0f;
166 echo_likelihood_ *= reliability_;
ivoc8f94cd32017-05-05 05:50:10 -0700167 // This is a temporary fix to prevent echo likelihood values > 1.0.
168 // TODO(ivoc): Find the root cause of this issue and fix it.
169 echo_likelihood_ = std::min(echo_likelihood_, 1.0f);
ivocef6cbae2016-11-10 08:21:04 -0800170 int echo_percentage = static_cast<int>(echo_likelihood_ * 100);
171 RTC_HISTOGRAM_COUNTS("WebRTC.Audio.ResidualEchoDetector.EchoLikelihood",
172 echo_percentage, 0, 100, 100 /* number of bins */);
ivocaf27ed02016-10-28 07:04:03 -0700173
ivoc4e477a12017-01-15 08:29:46 -0800174 // Update the buffer of recent likelihood values.
175 recent_likelihood_max_.Update(echo_likelihood_);
176
ivocaf27ed02016-10-28 07:04:03 -0700177 // Update the next insertion index.
peah94f6fa02017-05-16 07:25:06 -0700178 next_insertion_index_ = next_insertion_index_ < (kLookbackFrames - 1)
179 ? next_insertion_index_ + 1
180 : 0;
ivoc9f4a4a02016-10-28 05:39:16 -0700181}
182
Ivo Creusen647ef092018-03-14 17:13:48 +0100183void ResidualEchoDetector::Initialize(int /*capture_sample_rate_hz*/,
184 int /*num_capture_channels*/,
185 int /*render_sample_rate_hz*/,
186 int /*num_render_channels*/) {
ivocaf27ed02016-10-28 07:04:03 -0700187 render_buffer_.Clear();
188 std::fill(render_power_.begin(), render_power_.end(), 0.f);
189 std::fill(render_power_mean_.begin(), render_power_mean_.end(), 0.f);
190 std::fill(render_power_std_dev_.begin(), render_power_std_dev_.end(), 0.f);
191 render_statistics_.Clear();
192 capture_statistics_.Clear();
ivoc4e477a12017-01-15 08:29:46 -0800193 recent_likelihood_max_.Clear();
ivocaf27ed02016-10-28 07:04:03 -0700194 for (auto& cov : covariances_) {
195 cov.Clear();
196 }
197 echo_likelihood_ = 0.f;
198 next_insertion_index_ = 0;
ivocfbb374d2016-11-17 06:19:47 -0800199 reliability_ = 0.f;
ivoc9f4a4a02016-10-28 05:39:16 -0700200}
201
Ivo Creusen09fa4b02018-01-11 16:08:54 +0100202void EchoDetector::PackRenderAudioBuffer(AudioBuffer* audio,
203 std::vector<float>* packed_buffer) {
ivoc9f4a4a02016-10-28 05:39:16 -0700204 packed_buffer->clear();
peah9e6a2902017-05-15 07:19:21 -0700205 packed_buffer->insert(packed_buffer->end(), audio->channels_f()[0],
206 audio->channels_f()[0] + audio->num_frames());
ivoc9f4a4a02016-10-28 05:39:16 -0700207}
208
Ivo Creusen09fa4b02018-01-11 16:08:54 +0100209EchoDetector::Metrics ResidualEchoDetector::GetMetrics() const {
210 EchoDetector::Metrics metrics;
211 metrics.echo_likelihood = echo_likelihood_;
212 metrics.echo_likelihood_recent_max = recent_likelihood_max_.max();
213 return metrics;
214}
ivoc9f4a4a02016-10-28 05:39:16 -0700215} // namespace webrtc