ivoc | 9f4a4a0 | 2016-10-28 05:39:16 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. |
| 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license |
| 5 | * that can be found in the LICENSE file in the root of the source |
| 6 | * tree. An additional intellectual property rights grant can be found |
| 7 | * in the file PATENTS. All contributing project authors may |
| 8 | * be found in the AUTHORS file in the root of the source tree. |
| 9 | */ |
| 10 | |
Mirko Bonadei | 92ea95e | 2017-09-15 06:47:31 +0200 | [diff] [blame^] | 11 | #include "modules/audio_processing/residual_echo_detector.h" |
ivoc | 9f4a4a0 | 2016-10-28 05:39:16 -0700 | [diff] [blame] | 12 | |
ivoc | af27ed0 | 2016-10-28 07:04:03 -0700 | [diff] [blame] | 13 | #include <algorithm> |
| 14 | #include <numeric> |
| 15 | |
Mirko Bonadei | 92ea95e | 2017-09-15 06:47:31 +0200 | [diff] [blame^] | 16 | #include "modules/audio_processing/audio_buffer.h" |
| 17 | #include "modules/audio_processing/logging/apm_data_dumper.h" |
| 18 | #include "rtc_base/atomicops.h" |
| 19 | #include "rtc_base/logging.h" |
| 20 | #include "system_wrappers/include/metrics.h" |
ivoc | 9f4a4a0 | 2016-10-28 05:39:16 -0700 | [diff] [blame] | 21 | |
ivoc | af27ed0 | 2016-10-28 07:04:03 -0700 | [diff] [blame] | 22 | namespace { |
| 23 | |
| 24 | float Power(rtc::ArrayView<const float> input) { |
Alex Loiko | 890988c | 2017-08-31 10:25:48 +0200 | [diff] [blame] | 25 | if (input.empty()) { |
peah | 9e6a290 | 2017-05-15 07:19:21 -0700 | [diff] [blame] | 26 | return 0.f; |
| 27 | } |
| 28 | return std::inner_product(input.begin(), input.end(), input.begin(), 0.f) / |
| 29 | input.size(); |
ivoc | af27ed0 | 2016-10-28 07:04:03 -0700 | [diff] [blame] | 30 | } |
| 31 | |
| 32 | constexpr size_t kLookbackFrames = 650; |
| 33 | // TODO(ivoc): Verify the size of this buffer. |
| 34 | constexpr size_t kRenderBufferSize = 30; |
ivoc | fbb374d | 2016-11-17 06:19:47 -0800 | [diff] [blame] | 35 | constexpr float kAlpha = 0.001f; |
ivoc | 4e477a1 | 2017-01-15 08:29:46 -0800 | [diff] [blame] | 36 | // 10 seconds of data, updated every 10 ms. |
| 37 | constexpr size_t kAggregationBufferSize = 10 * 100; |
ivoc | af27ed0 | 2016-10-28 07:04:03 -0700 | [diff] [blame] | 38 | |
| 39 | } // namespace |
| 40 | |
ivoc | 9f4a4a0 | 2016-10-28 05:39:16 -0700 | [diff] [blame] | 41 | namespace webrtc { |
| 42 | |
peah | 9e6a290 | 2017-05-15 07:19:21 -0700 | [diff] [blame] | 43 | int ResidualEchoDetector::instance_count_ = 0; |
| 44 | |
ivoc | af27ed0 | 2016-10-28 07:04:03 -0700 | [diff] [blame] | 45 | ResidualEchoDetector::ResidualEchoDetector() |
peah | 9e6a290 | 2017-05-15 07:19:21 -0700 | [diff] [blame] | 46 | : data_dumper_( |
| 47 | new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))), |
| 48 | render_buffer_(kRenderBufferSize), |
ivoc | af27ed0 | 2016-10-28 07:04:03 -0700 | [diff] [blame] | 49 | render_power_(kLookbackFrames), |
| 50 | render_power_mean_(kLookbackFrames), |
| 51 | render_power_std_dev_(kLookbackFrames), |
ivoc | 4e477a1 | 2017-01-15 08:29:46 -0800 | [diff] [blame] | 52 | covariances_(kLookbackFrames), |
| 53 | recent_likelihood_max_(kAggregationBufferSize) {} |
ivoc | 9f4a4a0 | 2016-10-28 05:39:16 -0700 | [diff] [blame] | 54 | |
| 55 | ResidualEchoDetector::~ResidualEchoDetector() = default; |
| 56 | |
| 57 | void ResidualEchoDetector::AnalyzeRenderAudio( |
ivoc | af27ed0 | 2016-10-28 07:04:03 -0700 | [diff] [blame] | 58 | rtc::ArrayView<const float> render_audio) { |
peah | 9e6a290 | 2017-05-15 07:19:21 -0700 | [diff] [blame] | 59 | // Dump debug data assuming 48 kHz sample rate (if this assumption is not |
| 60 | // valid the dumped audio will need to be converted offline accordingly). |
| 61 | data_dumper_->DumpWav("ed_render", render_audio.size(), render_audio.data(), |
| 62 | 48000, 1); |
| 63 | |
ivoc | af27ed0 | 2016-10-28 07:04:03 -0700 | [diff] [blame] | 64 | if (render_buffer_.Size() == 0) { |
| 65 | frames_since_zero_buffer_size_ = 0; |
| 66 | } else if (frames_since_zero_buffer_size_ >= kRenderBufferSize) { |
| 67 | // This can happen in a few cases: at the start of a call, due to a glitch |
| 68 | // or due to clock drift. The excess capture value will be ignored. |
| 69 | // TODO(ivoc): Include how often this happens in APM stats. |
| 70 | render_buffer_.Pop(); |
| 71 | frames_since_zero_buffer_size_ = 0; |
| 72 | } |
| 73 | ++frames_since_zero_buffer_size_; |
| 74 | float power = Power(render_audio); |
| 75 | render_buffer_.Push(power); |
ivoc | 9f4a4a0 | 2016-10-28 05:39:16 -0700 | [diff] [blame] | 76 | } |
| 77 | |
| 78 | void ResidualEchoDetector::AnalyzeCaptureAudio( |
ivoc | af27ed0 | 2016-10-28 07:04:03 -0700 | [diff] [blame] | 79 | rtc::ArrayView<const float> capture_audio) { |
peah | 9e6a290 | 2017-05-15 07:19:21 -0700 | [diff] [blame] | 80 | // Dump debug data assuming 48 kHz sample rate (if this assumption is not |
| 81 | // valid the dumped audio will need to be converted offline accordingly). |
| 82 | data_dumper_->DumpWav("ed_capture", capture_audio.size(), |
| 83 | capture_audio.data(), 48000, 1); |
| 84 | |
ivoc | af27ed0 | 2016-10-28 07:04:03 -0700 | [diff] [blame] | 85 | if (first_process_call_) { |
| 86 | // On the first process call (so the start of a call), we must flush the |
| 87 | // render buffer, otherwise the render data will be delayed. |
| 88 | render_buffer_.Clear(); |
| 89 | first_process_call_ = false; |
| 90 | } |
| 91 | |
| 92 | // Get the next render value. |
| 93 | const rtc::Optional<float> buffered_render_power = render_buffer_.Pop(); |
| 94 | if (!buffered_render_power) { |
| 95 | // This can happen in a few cases: at the start of a call, due to a glitch |
| 96 | // or due to clock drift. The excess capture value will be ignored. |
| 97 | // TODO(ivoc): Include how often this happens in APM stats. |
| 98 | return; |
| 99 | } |
| 100 | // Update the render statistics, and store the statistics in circular buffers. |
| 101 | render_statistics_.Update(*buffered_render_power); |
| 102 | RTC_DCHECK_LT(next_insertion_index_, kLookbackFrames); |
| 103 | render_power_[next_insertion_index_] = *buffered_render_power; |
| 104 | render_power_mean_[next_insertion_index_] = render_statistics_.mean(); |
| 105 | render_power_std_dev_[next_insertion_index_] = |
| 106 | render_statistics_.std_deviation(); |
| 107 | |
| 108 | // Get the next capture value, update capture statistics and add the relevant |
| 109 | // values to the buffers. |
| 110 | const float capture_power = Power(capture_audio); |
| 111 | capture_statistics_.Update(capture_power); |
| 112 | const float capture_mean = capture_statistics_.mean(); |
| 113 | const float capture_std_deviation = capture_statistics_.std_deviation(); |
| 114 | |
| 115 | // Update the covariance values and determine the new echo likelihood. |
| 116 | echo_likelihood_ = 0.f; |
peah | 94f6fa0 | 2017-05-16 07:25:06 -0700 | [diff] [blame] | 117 | size_t read_index = next_insertion_index_; |
ivoc | 1592c74 | 2017-05-17 09:53:02 -0700 | [diff] [blame] | 118 | |
| 119 | int best_delay = -1; |
ivoc | af27ed0 | 2016-10-28 07:04:03 -0700 | [diff] [blame] | 120 | for (size_t delay = 0; delay < covariances_.size(); ++delay) { |
ivoc | af27ed0 | 2016-10-28 07:04:03 -0700 | [diff] [blame] | 121 | RTC_DCHECK_LT(read_index, render_power_.size()); |
| 122 | covariances_[delay].Update(capture_power, capture_mean, |
| 123 | capture_std_deviation, render_power_[read_index], |
| 124 | render_power_mean_[read_index], |
| 125 | render_power_std_dev_[read_index]); |
peah | 94f6fa0 | 2017-05-16 07:25:06 -0700 | [diff] [blame] | 126 | read_index = read_index > 0 ? read_index - 1 : kLookbackFrames - 1; |
ivoc | 1592c74 | 2017-05-17 09:53:02 -0700 | [diff] [blame] | 127 | |
| 128 | if (covariances_[delay].normalized_cross_correlation() > echo_likelihood_) { |
| 129 | echo_likelihood_ = covariances_[delay].normalized_cross_correlation(); |
| 130 | best_delay = static_cast<int>(delay); |
| 131 | } |
| 132 | } |
| 133 | // This is a temporary log message to help find the underlying cause for echo |
| 134 | // likelihoods > 1.0. |
| 135 | // TODO(ivoc): Remove once the issue is resolved. |
| 136 | if (echo_likelihood_ > 1.1f) { |
| 137 | // Make sure we don't spam the log. |
| 138 | if (log_counter_ < 5 && best_delay != -1) { |
| 139 | size_t read_index = kLookbackFrames + next_insertion_index_ - best_delay; |
| 140 | if (read_index >= kLookbackFrames) { |
| 141 | read_index -= kLookbackFrames; |
| 142 | } |
| 143 | RTC_DCHECK_LT(read_index, render_power_.size()); |
| 144 | LOG_F(LS_ERROR) << "Echo detector internal state: {" |
| 145 | << "Echo likelihood: " << echo_likelihood_ |
| 146 | << ", Best Delay: " << best_delay << ", Covariance: " |
| 147 | << covariances_[best_delay].covariance() |
| 148 | << ", Last capture power: " << capture_power |
| 149 | << ", Capture mean: " << capture_mean |
| 150 | << ", Capture_standard deviation: " |
| 151 | << capture_std_deviation |
| 152 | << ", Last render power: " << render_power_[read_index] |
| 153 | << ", Render mean: " << render_power_mean_[read_index] |
| 154 | << ", Render standard deviation: " |
| 155 | << render_power_std_dev_[read_index] |
| 156 | << ", Reliability: " << reliability_ << "}"; |
| 157 | log_counter_++; |
| 158 | } |
ivoc | af27ed0 | 2016-10-28 07:04:03 -0700 | [diff] [blame] | 159 | } |
ivoc | 860249e | 2017-05-16 06:50:11 -0700 | [diff] [blame] | 160 | RTC_DCHECK_LT(echo_likelihood_, 1.1f); |
ivoc | 1592c74 | 2017-05-17 09:53:02 -0700 | [diff] [blame] | 161 | |
ivoc | fbb374d | 2016-11-17 06:19:47 -0800 | [diff] [blame] | 162 | reliability_ = (1.0f - kAlpha) * reliability_ + kAlpha * 1.0f; |
| 163 | echo_likelihood_ *= reliability_; |
ivoc | 8f94cd3 | 2017-05-05 05:50:10 -0700 | [diff] [blame] | 164 | // This is a temporary fix to prevent echo likelihood values > 1.0. |
| 165 | // TODO(ivoc): Find the root cause of this issue and fix it. |
| 166 | echo_likelihood_ = std::min(echo_likelihood_, 1.0f); |
ivoc | ef6cbae | 2016-11-10 08:21:04 -0800 | [diff] [blame] | 167 | int echo_percentage = static_cast<int>(echo_likelihood_ * 100); |
| 168 | RTC_HISTOGRAM_COUNTS("WebRTC.Audio.ResidualEchoDetector.EchoLikelihood", |
| 169 | echo_percentage, 0, 100, 100 /* number of bins */); |
ivoc | af27ed0 | 2016-10-28 07:04:03 -0700 | [diff] [blame] | 170 | |
ivoc | 4e477a1 | 2017-01-15 08:29:46 -0800 | [diff] [blame] | 171 | // Update the buffer of recent likelihood values. |
| 172 | recent_likelihood_max_.Update(echo_likelihood_); |
| 173 | |
ivoc | af27ed0 | 2016-10-28 07:04:03 -0700 | [diff] [blame] | 174 | // Update the next insertion index. |
peah | 94f6fa0 | 2017-05-16 07:25:06 -0700 | [diff] [blame] | 175 | next_insertion_index_ = next_insertion_index_ < (kLookbackFrames - 1) |
| 176 | ? next_insertion_index_ + 1 |
| 177 | : 0; |
ivoc | 9f4a4a0 | 2016-10-28 05:39:16 -0700 | [diff] [blame] | 178 | } |
| 179 | |
| 180 | void ResidualEchoDetector::Initialize() { |
ivoc | af27ed0 | 2016-10-28 07:04:03 -0700 | [diff] [blame] | 181 | render_buffer_.Clear(); |
| 182 | std::fill(render_power_.begin(), render_power_.end(), 0.f); |
| 183 | std::fill(render_power_mean_.begin(), render_power_mean_.end(), 0.f); |
| 184 | std::fill(render_power_std_dev_.begin(), render_power_std_dev_.end(), 0.f); |
| 185 | render_statistics_.Clear(); |
| 186 | capture_statistics_.Clear(); |
ivoc | 4e477a1 | 2017-01-15 08:29:46 -0800 | [diff] [blame] | 187 | recent_likelihood_max_.Clear(); |
ivoc | af27ed0 | 2016-10-28 07:04:03 -0700 | [diff] [blame] | 188 | for (auto& cov : covariances_) { |
| 189 | cov.Clear(); |
| 190 | } |
| 191 | echo_likelihood_ = 0.f; |
| 192 | next_insertion_index_ = 0; |
ivoc | fbb374d | 2016-11-17 06:19:47 -0800 | [diff] [blame] | 193 | reliability_ = 0.f; |
ivoc | 9f4a4a0 | 2016-10-28 05:39:16 -0700 | [diff] [blame] | 194 | } |
| 195 | |
| 196 | void ResidualEchoDetector::PackRenderAudioBuffer( |
| 197 | AudioBuffer* audio, |
| 198 | std::vector<float>* packed_buffer) { |
ivoc | 9f4a4a0 | 2016-10-28 05:39:16 -0700 | [diff] [blame] | 199 | packed_buffer->clear(); |
peah | 9e6a290 | 2017-05-15 07:19:21 -0700 | [diff] [blame] | 200 | packed_buffer->insert(packed_buffer->end(), audio->channels_f()[0], |
| 201 | audio->channels_f()[0] + audio->num_frames()); |
ivoc | 9f4a4a0 | 2016-10-28 05:39:16 -0700 | [diff] [blame] | 202 | } |
| 203 | |
ivoc | 9f4a4a0 | 2016-10-28 05:39:16 -0700 | [diff] [blame] | 204 | } // namespace webrtc |