blob: 7da9b957a422488957c00efd2f67b007dce617c0 [file] [log] [blame]
ekm030249d2015-06-15 13:02:24 -07001/*
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
ekmdb4fecf2015-06-22 17:49:08 -070011//
12// Implements helper functions and classes for intelligibility enhancement.
13//
14
ekm030249d2015-06-15 13:02:24 -070015#include "webrtc/modules/audio_processing/intelligibility/intelligibility_utils.h"
16
ekm35b72fb2015-07-10 14:11:52 -070017#include <math.h>
ekmeyerson3ab2f142015-07-23 12:15:24 -070018#include <stdlib.h>
ekm35b72fb2015-07-10 14:11:52 -070019#include <string.h>
ekm030249d2015-06-15 13:02:24 -070020#include <algorithm>
ekm030249d2015-06-15 13:02:24 -070021
22using std::complex;
ekm35b72fb2015-07-10 14:11:52 -070023using std::min;
ekm030249d2015-06-15 13:02:24 -070024
ekm35b72fb2015-07-10 14:11:52 -070025namespace webrtc {
ekm030249d2015-06-15 13:02:24 -070026
ekm35b72fb2015-07-10 14:11:52 -070027namespace intelligibility {
28
29float UpdateFactor(float target, float current, float limit) {
ekm030249d2015-06-15 13:02:24 -070030 float delta = fabsf(target - current);
31 float sign = copysign(1.0f, target - current);
32 return current + sign * fminf(delta, limit);
33}
34
ekmeyerson3ab2f142015-07-23 12:15:24 -070035float AddDitherIfZero(float value) {
36 return value == 0.f ? std::rand() * 0.01f / RAND_MAX : value;
ekm030249d2015-06-15 13:02:24 -070037}
38
ekm35b72fb2015-07-10 14:11:52 -070039complex<float> zerofudge(complex<float> c) {
ekmeyerson3ab2f142015-07-23 12:15:24 -070040 return complex<float>(AddDitherIfZero(c.real()), AddDitherIfZero(c.imag()));
ekm030249d2015-06-15 13:02:24 -070041}
42
Peter Kastingdce40cf2015-08-24 14:52:23 -070043complex<float> NewMean(complex<float> mean, complex<float> data, size_t count) {
ekm030249d2015-06-15 13:02:24 -070044 return mean + (data - mean) / static_cast<float>(count);
45}
46
Peter Kastingdce40cf2015-08-24 14:52:23 -070047void AddToMean(complex<float> data, size_t count, complex<float>* mean) {
ekm030249d2015-06-15 13:02:24 -070048 (*mean) = NewMean(*mean, data, count);
49}
50
ekm030249d2015-06-15 13:02:24 -070051
Peter Kastingdce40cf2015-08-24 14:52:23 -070052static const size_t kWindowBlockSize = 10;
ekm030249d2015-06-15 13:02:24 -070053
Peter Kastingdce40cf2015-08-24 14:52:23 -070054VarianceArray::VarianceArray(size_t num_freqs,
ekmdb4fecf2015-06-22 17:49:08 -070055 StepType type,
Peter Kastingdce40cf2015-08-24 14:52:23 -070056 size_t window_size,
ekm030249d2015-06-15 13:02:24 -070057 float decay)
ekmeyerson60d9b332015-08-14 10:35:55 -070058 : running_mean_(new complex<float>[num_freqs]()),
59 running_mean_sq_(new complex<float>[num_freqs]()),
60 sub_running_mean_(new complex<float>[num_freqs]()),
61 sub_running_mean_sq_(new complex<float>[num_freqs]()),
62 variance_(new float[num_freqs]()),
63 conj_sum_(new float[num_freqs]()),
64 num_freqs_(num_freqs),
ekm030249d2015-06-15 13:02:24 -070065 window_size_(window_size),
66 decay_(decay),
67 history_cursor_(0),
68 count_(0),
ekm35b72fb2015-07-10 14:11:52 -070069 array_mean_(0.0f),
70 buffer_full_(false) {
ekmeyerson60d9b332015-08-14 10:35:55 -070071 history_.reset(new rtc::scoped_ptr<complex<float>[]>[num_freqs_]());
Peter Kastingdce40cf2015-08-24 14:52:23 -070072 for (size_t i = 0; i < num_freqs_; ++i) {
ekm030249d2015-06-15 13:02:24 -070073 history_[i].reset(new complex<float>[window_size_]());
74 }
ekmeyerson60d9b332015-08-14 10:35:55 -070075 subhistory_.reset(new rtc::scoped_ptr<complex<float>[]>[num_freqs_]());
Peter Kastingdce40cf2015-08-24 14:52:23 -070076 for (size_t i = 0; i < num_freqs_; ++i) {
ekm030249d2015-06-15 13:02:24 -070077 subhistory_[i].reset(new complex<float>[window_size_]());
78 }
ekmeyerson60d9b332015-08-14 10:35:55 -070079 subhistory_sq_.reset(new rtc::scoped_ptr<complex<float>[]>[num_freqs_]());
Peter Kastingdce40cf2015-08-24 14:52:23 -070080 for (size_t i = 0; i < num_freqs_; ++i) {
ekm030249d2015-06-15 13:02:24 -070081 subhistory_sq_[i].reset(new complex<float>[window_size_]());
82 }
83 switch (type) {
84 case kStepInfinite:
85 step_func_ = &VarianceArray::InfiniteStep;
86 break;
87 case kStepDecaying:
88 step_func_ = &VarianceArray::DecayStep;
89 break;
90 case kStepWindowed:
91 step_func_ = &VarianceArray::WindowedStep;
92 break;
93 case kStepBlocked:
94 step_func_ = &VarianceArray::BlockedStep;
95 break;
ekm35b72fb2015-07-10 14:11:52 -070096 case kStepBlockBasedMovingAverage:
97 step_func_ = &VarianceArray::BlockBasedMovingAverage;
98 break;
ekm030249d2015-06-15 13:02:24 -070099 }
100}
101
102// Compute the variance with Welford's algorithm, adding some fudge to
103// the input in case of all-zeroes.
104void VarianceArray::InfiniteStep(const complex<float>* data, bool skip_fudge) {
105 array_mean_ = 0.0f;
106 ++count_;
Peter Kastingdce40cf2015-08-24 14:52:23 -0700107 for (size_t i = 0; i < num_freqs_; ++i) {
ekm030249d2015-06-15 13:02:24 -0700108 complex<float> sample = data[i];
109 if (!skip_fudge) {
110 sample = zerofudge(sample);
111 }
112 if (count_ == 1) {
113 running_mean_[i] = sample;
114 variance_[i] = 0.0f;
115 } else {
116 float old_sum = conj_sum_[i];
117 complex<float> old_mean = running_mean_[i];
ekmdb4fecf2015-06-22 17:49:08 -0700118 running_mean_[i] =
119 old_mean + (sample - old_mean) / static_cast<float>(count_);
120 conj_sum_[i] =
121 (old_sum + std::conj(sample - old_mean) * (sample - running_mean_[i]))
122 .real();
123 variance_[i] =
ekmeyerson3ab2f142015-07-23 12:15:24 -0700124 conj_sum_[i] / (count_ - 1);
ekm030249d2015-06-15 13:02:24 -0700125 }
126 array_mean_ += (variance_[i] - array_mean_) / (i + 1);
127 }
128}
129
130// Compute the variance from the beginning, with exponential decaying of the
131// series data.
132void VarianceArray::DecayStep(const complex<float>* data, bool /*dummy*/) {
133 array_mean_ = 0.0f;
134 ++count_;
Peter Kastingdce40cf2015-08-24 14:52:23 -0700135 for (size_t i = 0; i < num_freqs_; ++i) {
ekm030249d2015-06-15 13:02:24 -0700136 complex<float> sample = data[i];
137 sample = zerofudge(sample);
138
139 if (count_ == 1) {
140 running_mean_[i] = sample;
141 running_mean_sq_[i] = sample * std::conj(sample);
142 variance_[i] = 0.0f;
143 } else {
144 complex<float> prev = running_mean_[i];
145 complex<float> prev2 = running_mean_sq_[i];
146 running_mean_[i] = decay_ * prev + (1.0f - decay_) * sample;
ekmdb4fecf2015-06-22 17:49:08 -0700147 running_mean_sq_[i] =
148 decay_ * prev2 + (1.0f - decay_) * sample * std::conj(sample);
ekmdb4fecf2015-06-22 17:49:08 -0700149 variance_[i] = (running_mean_sq_[i] -
150 running_mean_[i] * std::conj(running_mean_[i])).real();
ekm030249d2015-06-15 13:02:24 -0700151 }
152
153 array_mean_ += (variance_[i] - array_mean_) / (i + 1);
154 }
155}
156
157// Windowed variance computation. On each step, the variances for the
158// window are recomputed from scratch, using Welford's algorithm.
159void VarianceArray::WindowedStep(const complex<float>* data, bool /*dummy*/) {
Peter Kastingdce40cf2015-08-24 14:52:23 -0700160 size_t num = min(count_ + 1, window_size_);
ekm030249d2015-06-15 13:02:24 -0700161 array_mean_ = 0.0f;
Peter Kastingdce40cf2015-08-24 14:52:23 -0700162 for (size_t i = 0; i < num_freqs_; ++i) {
ekm030249d2015-06-15 13:02:24 -0700163 complex<float> mean;
164 float conj_sum = 0.0f;
165
166 history_[i][history_cursor_] = data[i];
167
168 mean = history_[i][history_cursor_];
169 variance_[i] = 0.0f;
Peter Kastingdce40cf2015-08-24 14:52:23 -0700170 for (size_t j = 1; j < num; ++j) {
ekmdb4fecf2015-06-22 17:49:08 -0700171 complex<float> sample =
172 zerofudge(history_[i][(history_cursor_ + j) % window_size_]);
ekm030249d2015-06-15 13:02:24 -0700173 sample = history_[i][(history_cursor_ + j) % window_size_];
174 float old_sum = conj_sum;
175 complex<float> old_mean = mean;
176
177 mean = old_mean + (sample - old_mean) / static_cast<float>(j + 1);
ekmdb4fecf2015-06-22 17:49:08 -0700178 conj_sum =
179 (old_sum + std::conj(sample - old_mean) * (sample - mean)).real();
ekm030249d2015-06-15 13:02:24 -0700180 variance_[i] = conj_sum / (j);
181 }
182 array_mean_ += (variance_[i] - array_mean_) / (i + 1);
183 }
184 history_cursor_ = (history_cursor_ + 1) % window_size_;
185 ++count_;
186}
187
188// Variance with a window of blocks. Within each block, the variances are
189// recomputed from scratch at every stp, using |Var(X) = E(X^2) - E^2(X)|.
190// Once a block is filled with kWindowBlockSize samples, it is added to the
191// history window and a new block is started. The variances for the window
192// are recomputed from scratch at each of these transitions.
193void VarianceArray::BlockedStep(const complex<float>* data, bool /*dummy*/) {
Peter Kastingdce40cf2015-08-24 14:52:23 -0700194 size_t blocks = min(window_size_, history_cursor_ + 1);
195 for (size_t i = 0; i < num_freqs_; ++i) {
ekm030249d2015-06-15 13:02:24 -0700196 AddToMean(data[i], count_ + 1, &sub_running_mean_[i]);
197 AddToMean(data[i] * std::conj(data[i]), count_ + 1,
198 &sub_running_mean_sq_[i]);
199 subhistory_[i][history_cursor_ % window_size_] = sub_running_mean_[i];
200 subhistory_sq_[i][history_cursor_ % window_size_] = sub_running_mean_sq_[i];
201
ekmdb4fecf2015-06-22 17:49:08 -0700202 variance_[i] =
203 (NewMean(running_mean_sq_[i], sub_running_mean_sq_[i], blocks) -
204 NewMean(running_mean_[i], sub_running_mean_[i], blocks) *
205 std::conj(NewMean(running_mean_[i], sub_running_mean_[i], blocks)))
206 .real();
ekm030249d2015-06-15 13:02:24 -0700207 if (count_ == kWindowBlockSize - 1) {
208 sub_running_mean_[i] = complex<float>(0.0f, 0.0f);
209 sub_running_mean_sq_[i] = complex<float>(0.0f, 0.0f);
210 running_mean_[i] = complex<float>(0.0f, 0.0f);
211 running_mean_sq_[i] = complex<float>(0.0f, 0.0f);
Peter Kastingdce40cf2015-08-24 14:52:23 -0700212 for (size_t j = 0; j < min(window_size_, history_cursor_); ++j) {
ekm35b72fb2015-07-10 14:11:52 -0700213 AddToMean(subhistory_[i][j], j + 1, &running_mean_[i]);
214 AddToMean(subhistory_sq_[i][j], j + 1, &running_mean_sq_[i]);
ekm030249d2015-06-15 13:02:24 -0700215 }
216 ++history_cursor_;
217 }
218 }
219 ++count_;
220 if (count_ == kWindowBlockSize) {
221 count_ = 0;
222 }
223}
224
ekm35b72fb2015-07-10 14:11:52 -0700225// Recomputes variances for each window from scratch based on previous window.
226void VarianceArray::BlockBasedMovingAverage(const std::complex<float>* data,
227 bool /*dummy*/) {
228 // TODO(ekmeyerson) To mitigate potential divergence, add counter so that
229 // after every so often sums are computed scratch by summing over all
230 // elements instead of subtracting oldest and adding newest.
Peter Kastingdce40cf2015-08-24 14:52:23 -0700231 for (size_t i = 0; i < num_freqs_; ++i) {
ekm35b72fb2015-07-10 14:11:52 -0700232 sub_running_mean_[i] += data[i];
233 sub_running_mean_sq_[i] += data[i] * std::conj(data[i]);
234 }
235 ++count_;
236
237 // TODO(ekmeyerson) Make kWindowBlockSize nonconstant to allow
238 // experimentation with different block size,window size pairs.
239 if (count_ >= kWindowBlockSize) {
240 count_ = 0;
241
Peter Kastingdce40cf2015-08-24 14:52:23 -0700242 for (size_t i = 0; i < num_freqs_; ++i) {
ekm35b72fb2015-07-10 14:11:52 -0700243 running_mean_[i] -= subhistory_[i][history_cursor_];
244 running_mean_sq_[i] -= subhistory_sq_[i][history_cursor_];
245
246 float scale = 1.f / kWindowBlockSize;
247 subhistory_[i][history_cursor_] = sub_running_mean_[i] * scale;
248 subhistory_sq_[i][history_cursor_] = sub_running_mean_sq_[i] * scale;
249
250 sub_running_mean_[i] = std::complex<float>(0.0f, 0.0f);
251 sub_running_mean_sq_[i] = std::complex<float>(0.0f, 0.0f);
252
253 running_mean_[i] += subhistory_[i][history_cursor_];
254 running_mean_sq_[i] += subhistory_sq_[i][history_cursor_];
255
256 scale = 1.f / (buffer_full_ ? window_size_ : history_cursor_ + 1);
257 variance_[i] = std::real(running_mean_sq_[i] * scale -
258 running_mean_[i] * scale *
259 std::conj(running_mean_[i]) * scale);
260 }
261
262 ++history_cursor_;
263 if (history_cursor_ >= window_size_) {
264 buffer_full_ = true;
265 history_cursor_ = 0;
266 }
267 }
268}
269
ekm030249d2015-06-15 13:02:24 -0700270void VarianceArray::Clear() {
ekmeyerson60d9b332015-08-14 10:35:55 -0700271 memset(running_mean_.get(), 0, sizeof(*running_mean_.get()) * num_freqs_);
272 memset(running_mean_sq_.get(), 0,
273 sizeof(*running_mean_sq_.get()) * num_freqs_);
274 memset(variance_.get(), 0, sizeof(*variance_.get()) * num_freqs_);
275 memset(conj_sum_.get(), 0, sizeof(*conj_sum_.get()) * num_freqs_);
ekm030249d2015-06-15 13:02:24 -0700276 history_cursor_ = 0;
277 count_ = 0;
278 array_mean_ = 0.0f;
279}
280
281void VarianceArray::ApplyScale(float scale) {
282 array_mean_ = 0.0f;
Peter Kastingdce40cf2015-08-24 14:52:23 -0700283 for (size_t i = 0; i < num_freqs_; ++i) {
ekm030249d2015-06-15 13:02:24 -0700284 variance_[i] *= scale * scale;
285 array_mean_ += (variance_[i] - array_mean_) / (i + 1);
286 }
287}
288
Peter Kastingdce40cf2015-08-24 14:52:23 -0700289GainApplier::GainApplier(size_t freqs, float change_limit)
ekmeyerson60d9b332015-08-14 10:35:55 -0700290 : num_freqs_(freqs),
ekm030249d2015-06-15 13:02:24 -0700291 change_limit_(change_limit),
292 target_(new float[freqs]()),
293 current_(new float[freqs]()) {
Peter Kastingdce40cf2015-08-24 14:52:23 -0700294 for (size_t i = 0; i < freqs; ++i) {
ekm030249d2015-06-15 13:02:24 -0700295 target_[i] = 1.0f;
296 current_[i] = 1.0f;
297 }
298}
299
300void GainApplier::Apply(const complex<float>* in_block,
301 complex<float>* out_block) {
Peter Kastingdce40cf2015-08-24 14:52:23 -0700302 for (size_t i = 0; i < num_freqs_; ++i) {
ekm030249d2015-06-15 13:02:24 -0700303 float factor = sqrtf(fabsf(current_[i]));
304 if (!std::isnormal(factor)) {
305 factor = 1.0f;
306 }
307 out_block[i] = factor * in_block[i];
308 current_[i] = UpdateFactor(target_[i], current_[i], change_limit_);
309 }
310}
311
312} // namespace intelligibility
313
314} // namespace webrtc