ivoc | 9f4a4a0 | 2016-10-28 05:39:16 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. |
| 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license |
| 5 | * that can be found in the LICENSE file in the root of the source |
| 6 | * tree. An additional intellectual property rights grant can be found |
| 7 | * in the file PATENTS. All contributing project authors may |
| 8 | * be found in the AUTHORS file in the root of the source tree. |
| 9 | */ |
| 10 | |
Mirko Bonadei | 92ea95e | 2017-09-15 06:47:31 +0200 | [diff] [blame] | 11 | #include "modules/audio_processing/residual_echo_detector.h" |
ivoc | 9f4a4a0 | 2016-10-28 05:39:16 -0700 | [diff] [blame] | 12 | |
ivoc | af27ed0 | 2016-10-28 07:04:03 -0700 | [diff] [blame] | 13 | #include <algorithm> |
| 14 | #include <numeric> |
| 15 | |
Yves Gerey | 988cc08 | 2018-10-23 12:03:01 +0200 | [diff] [blame^] | 16 | #include "absl/types/optional.h" |
Mirko Bonadei | 92ea95e | 2017-09-15 06:47:31 +0200 | [diff] [blame] | 17 | #include "modules/audio_processing/audio_buffer.h" |
| 18 | #include "modules/audio_processing/logging/apm_data_dumper.h" |
| 19 | #include "rtc_base/atomicops.h" |
Yves Gerey | 988cc08 | 2018-10-23 12:03:01 +0200 | [diff] [blame^] | 20 | #include "rtc_base/checks.h" |
Mirko Bonadei | 92ea95e | 2017-09-15 06:47:31 +0200 | [diff] [blame] | 21 | #include "rtc_base/logging.h" |
| 22 | #include "system_wrappers/include/metrics.h" |
ivoc | 9f4a4a0 | 2016-10-28 05:39:16 -0700 | [diff] [blame] | 23 | |
ivoc | af27ed0 | 2016-10-28 07:04:03 -0700 | [diff] [blame] | 24 | namespace { |
| 25 | |
| 26 | float Power(rtc::ArrayView<const float> input) { |
Alex Loiko | 890988c | 2017-08-31 10:25:48 +0200 | [diff] [blame] | 27 | if (input.empty()) { |
peah | 9e6a290 | 2017-05-15 07:19:21 -0700 | [diff] [blame] | 28 | return 0.f; |
| 29 | } |
| 30 | return std::inner_product(input.begin(), input.end(), input.begin(), 0.f) / |
| 31 | input.size(); |
ivoc | af27ed0 | 2016-10-28 07:04:03 -0700 | [diff] [blame] | 32 | } |
| 33 | |
| 34 | constexpr size_t kLookbackFrames = 650; |
| 35 | // TODO(ivoc): Verify the size of this buffer. |
| 36 | constexpr size_t kRenderBufferSize = 30; |
ivoc | fbb374d | 2016-11-17 06:19:47 -0800 | [diff] [blame] | 37 | constexpr float kAlpha = 0.001f; |
ivoc | 4e477a1 | 2017-01-15 08:29:46 -0800 | [diff] [blame] | 38 | // 10 seconds of data, updated every 10 ms. |
| 39 | constexpr size_t kAggregationBufferSize = 10 * 100; |
ivoc | af27ed0 | 2016-10-28 07:04:03 -0700 | [diff] [blame] | 40 | |
| 41 | } // namespace |
| 42 | |
ivoc | 9f4a4a0 | 2016-10-28 05:39:16 -0700 | [diff] [blame] | 43 | namespace webrtc { |
| 44 | |
peah | 9e6a290 | 2017-05-15 07:19:21 -0700 | [diff] [blame] | 45 | int ResidualEchoDetector::instance_count_ = 0; |
| 46 | |
ivoc | af27ed0 | 2016-10-28 07:04:03 -0700 | [diff] [blame] | 47 | ResidualEchoDetector::ResidualEchoDetector() |
peah | 9e6a290 | 2017-05-15 07:19:21 -0700 | [diff] [blame] | 48 | : data_dumper_( |
| 49 | new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))), |
| 50 | render_buffer_(kRenderBufferSize), |
ivoc | af27ed0 | 2016-10-28 07:04:03 -0700 | [diff] [blame] | 51 | render_power_(kLookbackFrames), |
| 52 | render_power_mean_(kLookbackFrames), |
| 53 | render_power_std_dev_(kLookbackFrames), |
ivoc | 4e477a1 | 2017-01-15 08:29:46 -0800 | [diff] [blame] | 54 | covariances_(kLookbackFrames), |
| 55 | recent_likelihood_max_(kAggregationBufferSize) {} |
ivoc | 9f4a4a0 | 2016-10-28 05:39:16 -0700 | [diff] [blame] | 56 | |
| 57 | ResidualEchoDetector::~ResidualEchoDetector() = default; |
| 58 | |
| 59 | void ResidualEchoDetector::AnalyzeRenderAudio( |
ivoc | af27ed0 | 2016-10-28 07:04:03 -0700 | [diff] [blame] | 60 | rtc::ArrayView<const float> render_audio) { |
peah | 9e6a290 | 2017-05-15 07:19:21 -0700 | [diff] [blame] | 61 | // Dump debug data assuming 48 kHz sample rate (if this assumption is not |
| 62 | // valid the dumped audio will need to be converted offline accordingly). |
| 63 | data_dumper_->DumpWav("ed_render", render_audio.size(), render_audio.data(), |
| 64 | 48000, 1); |
| 65 | |
ivoc | af27ed0 | 2016-10-28 07:04:03 -0700 | [diff] [blame] | 66 | if (render_buffer_.Size() == 0) { |
| 67 | frames_since_zero_buffer_size_ = 0; |
| 68 | } else if (frames_since_zero_buffer_size_ >= kRenderBufferSize) { |
| 69 | // This can happen in a few cases: at the start of a call, due to a glitch |
| 70 | // or due to clock drift. The excess capture value will be ignored. |
| 71 | // TODO(ivoc): Include how often this happens in APM stats. |
| 72 | render_buffer_.Pop(); |
| 73 | frames_since_zero_buffer_size_ = 0; |
| 74 | } |
| 75 | ++frames_since_zero_buffer_size_; |
| 76 | float power = Power(render_audio); |
| 77 | render_buffer_.Push(power); |
ivoc | 9f4a4a0 | 2016-10-28 05:39:16 -0700 | [diff] [blame] | 78 | } |
| 79 | |
| 80 | void ResidualEchoDetector::AnalyzeCaptureAudio( |
ivoc | af27ed0 | 2016-10-28 07:04:03 -0700 | [diff] [blame] | 81 | rtc::ArrayView<const float> capture_audio) { |
peah | 9e6a290 | 2017-05-15 07:19:21 -0700 | [diff] [blame] | 82 | // Dump debug data assuming 48 kHz sample rate (if this assumption is not |
| 83 | // valid the dumped audio will need to be converted offline accordingly). |
| 84 | data_dumper_->DumpWav("ed_capture", capture_audio.size(), |
| 85 | capture_audio.data(), 48000, 1); |
| 86 | |
ivoc | af27ed0 | 2016-10-28 07:04:03 -0700 | [diff] [blame] | 87 | if (first_process_call_) { |
| 88 | // On the first process call (so the start of a call), we must flush the |
| 89 | // render buffer, otherwise the render data will be delayed. |
| 90 | render_buffer_.Clear(); |
| 91 | first_process_call_ = false; |
| 92 | } |
| 93 | |
| 94 | // Get the next render value. |
Danil Chapovalov | db9f7ab | 2018-06-19 10:50:11 +0200 | [diff] [blame] | 95 | const absl::optional<float> buffered_render_power = render_buffer_.Pop(); |
ivoc | af27ed0 | 2016-10-28 07:04:03 -0700 | [diff] [blame] | 96 | if (!buffered_render_power) { |
| 97 | // This can happen in a few cases: at the start of a call, due to a glitch |
| 98 | // or due to clock drift. The excess capture value will be ignored. |
| 99 | // TODO(ivoc): Include how often this happens in APM stats. |
| 100 | return; |
| 101 | } |
| 102 | // Update the render statistics, and store the statistics in circular buffers. |
| 103 | render_statistics_.Update(*buffered_render_power); |
| 104 | RTC_DCHECK_LT(next_insertion_index_, kLookbackFrames); |
| 105 | render_power_[next_insertion_index_] = *buffered_render_power; |
| 106 | render_power_mean_[next_insertion_index_] = render_statistics_.mean(); |
| 107 | render_power_std_dev_[next_insertion_index_] = |
| 108 | render_statistics_.std_deviation(); |
| 109 | |
| 110 | // Get the next capture value, update capture statistics and add the relevant |
| 111 | // values to the buffers. |
| 112 | const float capture_power = Power(capture_audio); |
| 113 | capture_statistics_.Update(capture_power); |
| 114 | const float capture_mean = capture_statistics_.mean(); |
| 115 | const float capture_std_deviation = capture_statistics_.std_deviation(); |
| 116 | |
| 117 | // Update the covariance values and determine the new echo likelihood. |
| 118 | echo_likelihood_ = 0.f; |
peah | 94f6fa0 | 2017-05-16 07:25:06 -0700 | [diff] [blame] | 119 | size_t read_index = next_insertion_index_; |
ivoc | 1592c74 | 2017-05-17 09:53:02 -0700 | [diff] [blame] | 120 | |
| 121 | int best_delay = -1; |
ivoc | af27ed0 | 2016-10-28 07:04:03 -0700 | [diff] [blame] | 122 | for (size_t delay = 0; delay < covariances_.size(); ++delay) { |
ivoc | af27ed0 | 2016-10-28 07:04:03 -0700 | [diff] [blame] | 123 | RTC_DCHECK_LT(read_index, render_power_.size()); |
| 124 | covariances_[delay].Update(capture_power, capture_mean, |
| 125 | capture_std_deviation, render_power_[read_index], |
| 126 | render_power_mean_[read_index], |
| 127 | render_power_std_dev_[read_index]); |
peah | 94f6fa0 | 2017-05-16 07:25:06 -0700 | [diff] [blame] | 128 | read_index = read_index > 0 ? read_index - 1 : kLookbackFrames - 1; |
ivoc | 1592c74 | 2017-05-17 09:53:02 -0700 | [diff] [blame] | 129 | |
| 130 | if (covariances_[delay].normalized_cross_correlation() > echo_likelihood_) { |
| 131 | echo_likelihood_ = covariances_[delay].normalized_cross_correlation(); |
| 132 | best_delay = static_cast<int>(delay); |
| 133 | } |
| 134 | } |
| 135 | // This is a temporary log message to help find the underlying cause for echo |
| 136 | // likelihoods > 1.0. |
| 137 | // TODO(ivoc): Remove once the issue is resolved. |
| 138 | if (echo_likelihood_ > 1.1f) { |
| 139 | // Make sure we don't spam the log. |
| 140 | if (log_counter_ < 5 && best_delay != -1) { |
| 141 | size_t read_index = kLookbackFrames + next_insertion_index_ - best_delay; |
| 142 | if (read_index >= kLookbackFrames) { |
| 143 | read_index -= kLookbackFrames; |
| 144 | } |
| 145 | RTC_DCHECK_LT(read_index, render_power_.size()); |
Yves Gerey | 665174f | 2018-06-19 15:03:05 +0200 | [diff] [blame] | 146 | RTC_LOG_F(LS_ERROR) << "Echo detector internal state: {" |
| 147 | "Echo likelihood: " |
| 148 | << echo_likelihood_ << ", Best Delay: " << best_delay |
| 149 | << ", Covariance: " |
| 150 | << covariances_[best_delay].covariance() |
| 151 | << ", Last capture power: " << capture_power |
| 152 | << ", Capture mean: " << capture_mean |
| 153 | << ", Capture_standard deviation: " |
| 154 | << capture_std_deviation << ", Last render power: " |
| 155 | << render_power_[read_index] |
| 156 | << ", Render mean: " << render_power_mean_[read_index] |
| 157 | << ", Render standard deviation: " |
| 158 | << render_power_std_dev_[read_index] |
| 159 | << ", Reliability: " << reliability_ << "}"; |
ivoc | 1592c74 | 2017-05-17 09:53:02 -0700 | [diff] [blame] | 160 | log_counter_++; |
| 161 | } |
ivoc | af27ed0 | 2016-10-28 07:04:03 -0700 | [diff] [blame] | 162 | } |
ivoc | 860249e | 2017-05-16 06:50:11 -0700 | [diff] [blame] | 163 | RTC_DCHECK_LT(echo_likelihood_, 1.1f); |
ivoc | 1592c74 | 2017-05-17 09:53:02 -0700 | [diff] [blame] | 164 | |
ivoc | fbb374d | 2016-11-17 06:19:47 -0800 | [diff] [blame] | 165 | reliability_ = (1.0f - kAlpha) * reliability_ + kAlpha * 1.0f; |
| 166 | echo_likelihood_ *= reliability_; |
ivoc | 8f94cd3 | 2017-05-05 05:50:10 -0700 | [diff] [blame] | 167 | // This is a temporary fix to prevent echo likelihood values > 1.0. |
| 168 | // TODO(ivoc): Find the root cause of this issue and fix it. |
| 169 | echo_likelihood_ = std::min(echo_likelihood_, 1.0f); |
ivoc | ef6cbae | 2016-11-10 08:21:04 -0800 | [diff] [blame] | 170 | int echo_percentage = static_cast<int>(echo_likelihood_ * 100); |
| 171 | RTC_HISTOGRAM_COUNTS("WebRTC.Audio.ResidualEchoDetector.EchoLikelihood", |
| 172 | echo_percentage, 0, 100, 100 /* number of bins */); |
ivoc | af27ed0 | 2016-10-28 07:04:03 -0700 | [diff] [blame] | 173 | |
ivoc | 4e477a1 | 2017-01-15 08:29:46 -0800 | [diff] [blame] | 174 | // Update the buffer of recent likelihood values. |
| 175 | recent_likelihood_max_.Update(echo_likelihood_); |
| 176 | |
ivoc | af27ed0 | 2016-10-28 07:04:03 -0700 | [diff] [blame] | 177 | // Update the next insertion index. |
peah | 94f6fa0 | 2017-05-16 07:25:06 -0700 | [diff] [blame] | 178 | next_insertion_index_ = next_insertion_index_ < (kLookbackFrames - 1) |
| 179 | ? next_insertion_index_ + 1 |
| 180 | : 0; |
ivoc | 9f4a4a0 | 2016-10-28 05:39:16 -0700 | [diff] [blame] | 181 | } |
| 182 | |
Ivo Creusen | 647ef09 | 2018-03-14 17:13:48 +0100 | [diff] [blame] | 183 | void ResidualEchoDetector::Initialize(int /*capture_sample_rate_hz*/, |
| 184 | int /*num_capture_channels*/, |
| 185 | int /*render_sample_rate_hz*/, |
| 186 | int /*num_render_channels*/) { |
ivoc | af27ed0 | 2016-10-28 07:04:03 -0700 | [diff] [blame] | 187 | render_buffer_.Clear(); |
| 188 | std::fill(render_power_.begin(), render_power_.end(), 0.f); |
| 189 | std::fill(render_power_mean_.begin(), render_power_mean_.end(), 0.f); |
| 190 | std::fill(render_power_std_dev_.begin(), render_power_std_dev_.end(), 0.f); |
| 191 | render_statistics_.Clear(); |
| 192 | capture_statistics_.Clear(); |
ivoc | 4e477a1 | 2017-01-15 08:29:46 -0800 | [diff] [blame] | 193 | recent_likelihood_max_.Clear(); |
ivoc | af27ed0 | 2016-10-28 07:04:03 -0700 | [diff] [blame] | 194 | for (auto& cov : covariances_) { |
| 195 | cov.Clear(); |
| 196 | } |
| 197 | echo_likelihood_ = 0.f; |
| 198 | next_insertion_index_ = 0; |
ivoc | fbb374d | 2016-11-17 06:19:47 -0800 | [diff] [blame] | 199 | reliability_ = 0.f; |
ivoc | 9f4a4a0 | 2016-10-28 05:39:16 -0700 | [diff] [blame] | 200 | } |
| 201 | |
Ivo Creusen | 09fa4b0 | 2018-01-11 16:08:54 +0100 | [diff] [blame] | 202 | void EchoDetector::PackRenderAudioBuffer(AudioBuffer* audio, |
| 203 | std::vector<float>* packed_buffer) { |
ivoc | 9f4a4a0 | 2016-10-28 05:39:16 -0700 | [diff] [blame] | 204 | packed_buffer->clear(); |
peah | 9e6a290 | 2017-05-15 07:19:21 -0700 | [diff] [blame] | 205 | packed_buffer->insert(packed_buffer->end(), audio->channels_f()[0], |
| 206 | audio->channels_f()[0] + audio->num_frames()); |
ivoc | 9f4a4a0 | 2016-10-28 05:39:16 -0700 | [diff] [blame] | 207 | } |
| 208 | |
Ivo Creusen | 09fa4b0 | 2018-01-11 16:08:54 +0100 | [diff] [blame] | 209 | EchoDetector::Metrics ResidualEchoDetector::GetMetrics() const { |
| 210 | EchoDetector::Metrics metrics; |
| 211 | metrics.echo_likelihood = echo_likelihood_; |
| 212 | metrics.echo_likelihood_recent_max = recent_likelihood_max_.max(); |
| 213 | return metrics; |
| 214 | } |
ivoc | 9f4a4a0 | 2016-10-28 05:39:16 -0700 | [diff] [blame] | 215 | } // namespace webrtc |